From 6e61c5e2163c8509411143752afc7f3bb37184cb Mon Sep 17 00:00:00 2001
From: Hendrik Makait <hendrik@makait.com>
Date: Thu, 7 Dec 2023 14:18:06 +0100
Subject: [PATCH 001/570] GH-39096: [Python] Release GIL in `.nbytes` (#39097)

### Rationale for this change

The `.nbytes` holds the GIL while computing the data size in C++, which has caused performance issues in Dask because threads were blocking each other

See #39096

### Are these changes tested?

I am not sure if additional tests are necessary here. If so, I'm happy to add them but would welcome some pointers.

### Are there any user-facing changes?

No

* Closes: #39096

Authored-by: Hendrik Makait <hendrik@makait.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/array.pxi |  5 +++--
 python/pyarrow/table.pxi | 15 +++++++++------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 9d62bed51f4a4..789e30d3e9b00 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1206,8 +1206,9 @@ cdef class Array(_PandasConvertible):
         cdef:
             CResult[int64_t] c_size_res
 
-        c_size_res = ReferencedBufferSize(deref(self.ap))
-        size = GetResultValue(c_size_res)
+        with nogil:
+            c_size_res = ReferencedBufferSize(deref(self.ap))
+            size = GetResultValue(c_size_res)
         return size
 
     def get_total_buffer_size(self):
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index f93f5950902c7..2f8d1abd1f085 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -248,8 +248,9 @@ cdef class ChunkedArray(_PandasConvertible):
         cdef:
             CResult[int64_t] c_res_buffer
 
-        c_res_buffer = ReferencedBufferSize(deref(self.chunked_array))
-        size = GetResultValue(c_res_buffer)
+        with nogil:
+            c_res_buffer = ReferencedBufferSize(deref(self.chunked_array))
+            size = GetResultValue(c_res_buffer)
         return size
 
     def get_total_buffer_size(self):
@@ -2386,8 +2387,9 @@ cdef class RecordBatch(_Tabular):
         cdef:
             CResult[int64_t] c_res_buffer
 
-        c_res_buffer = ReferencedBufferSize(deref(self.batch))
-        size = GetResultValue(c_res_buffer)
+        with nogil:
+            c_res_buffer = ReferencedBufferSize(deref(self.batch))
+            size = GetResultValue(c_res_buffer)
         return size
 
     def get_total_buffer_size(self):
@@ -4337,8 +4339,9 @@ cdef class Table(_Tabular):
         cdef:
             CResult[int64_t] c_res_buffer
 
-        c_res_buffer = ReferencedBufferSize(deref(self.table))
-        size = GetResultValue(c_res_buffer)
+        with nogil:
+            c_res_buffer = ReferencedBufferSize(deref(self.table))
+            size = GetResultValue(c_res_buffer)
         return size
 
     def get_total_buffer_size(self):

From 1b634e7d274cf42089d1ab237905a550de36c260 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Thu, 7 Dec 2023 08:35:34 -0500
Subject: [PATCH 002/570] GH-37061: [Docs][Format] Clarify semantics of
 GetSchema in FSQL (#38549)

### Rationale for this change

Schemas of result sets and bind parameters are ambiguous in a few cases when they interact.

### What changes are included in this PR?

Add documentation clarifying the expected behavior.

### Are these changes tested?
N/A

### Are there any user-facing changes?

No

* Closes: #37061

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 docs/source/format/FlightSql.rst | 21 +++++++++++++++++++++
 format/FlightSql.proto           | 10 ++++++++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/docs/source/format/FlightSql.rst b/docs/source/format/FlightSql.rst
index f7521c3876493..add044c2d3621 100644
--- a/docs/source/format/FlightSql.rst
+++ b/docs/source/format/FlightSql.rst
@@ -120,6 +120,23 @@ the ``type`` should be ``ClosePreparedStatement``).
 ``ActionCreatePreparedStatementRequest``
     Create a new prepared statement for a SQL query.
 
+    The response will contain an opaque handle used to identify the
+    prepared statement.  It may also contain two optional schemas: the
+    Arrow schema of the result set, and the Arrow schema of the bind
+    parameters (if any).  Because the schema of the result set may
+    depend on the bind parameters, the schemas may not necessarily be
+    provided here as a result, or if provided, they may not be accurate.
+    Clients should not assume the schema provided here will be the
+    schema of any data actually returned by executing the prepared
+    statement.
+
+    Some statements may have bind parameters without any specific type.
+    (As a trivial example for SQL, consider ``SELECT ?``.)  It is
+    not currently specified how this should be handled in the bind
+    parameter schema above.  We suggest either using a union type to
+    enumerate the possible types, or using the NA (null) type as a
+    wildcard/placeholder.
+
 ``CommandPreparedStatementQuery``
     Execute a previously created prepared statement and get the results.
 
@@ -128,6 +145,10 @@ the ``type`` should be ``ClosePreparedStatement``).
     When used with GetFlightInfo: execute the prepared statement. The
     prepared statement can be reused after fetching results.
 
+    When used with GetSchema: get the expected Arrow schema of the
+    result set.  If the client has bound parameter values with DoPut
+    previously, the server should take those values into account.
+
 ``CommandPreparedStatementUpdate``
     Execute a previously created prepared statement that does not
     return results.
diff --git a/format/FlightSql.proto b/format/FlightSql.proto
index 9b5968e5306f0..581cf1f76d57c 100644
--- a/format/FlightSql.proto
+++ b/format/FlightSql.proto
@@ -1537,11 +1537,14 @@ message ActionCreatePreparedStatementResult {
   bytes prepared_statement_handle = 1;
 
   // If a result set generating query was provided, dataset_schema contains the
-  // schema of the dataset as described in Schema.fbs::Schema, it is serialized as an IPC message.
+  // schema of the result set.  It should be an IPC-encapsulated Schema, as described in Schema.fbs.
+  // For some queries, the schema of the results may depend on the schema of the parameters.  The server
+  // should provide its best guess as to the schema at this point.  Clients must not assume that this
+  // schema, if provided, will be accurate.
   bytes dataset_schema = 2;
 
   // If the query provided contained parameters, parameter_schema contains the
-  // schema of the expected parameters as described in Schema.fbs::Schema, it is serialized as an IPC message.
+  // schema of the expected parameters.  It should be an IPC-encapsulated Schema, as described in Schema.fbs.
   bytes parameter_schema = 3;
 }
 
@@ -1743,6 +1746,9 @@ message TicketStatementQuery {
  *    - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
  *    - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
  *    - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
+ *
+ *    If the schema is retrieved after parameter values have been bound with DoPut, then the server should account
+ *    for the parameters when determining the schema.
  *  - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution.
  *  - GetFlightInfo: execute the prepared statement instance.
  */

From f2fb8fffae91c0a909fe219b7948f3dd0e73db83 Mon Sep 17 00:00:00 2001
From: Josh Soref <2119212+jsoref@users.noreply.github.com>
Date: Thu, 7 Dec 2023 12:18:30 -0500
Subject: [PATCH 003/570] GH-38928: [R] Fix spelling (#38929)

### Rationale for this change

### What changes are included in this PR?

Spelling fixes to r/

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #38928

Authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/NEWS.md                                       | 10 +++++-----
 r/R/arrow-object.R                              |  2 +-
 r/R/arrow-package.R                             |  2 +-
 r/R/compression.R                               |  2 +-
 r/R/config.R                                    |  2 +-
 r/R/csv.R                                       |  2 +-
 r/R/dataset.R                                   |  2 +-
 r/R/dplyr-count.R                               |  2 +-
 r/R/dplyr-filter.R                              | 10 +++++-----
 r/R/dplyr-funcs-augmented.R                     |  2 +-
 r/R/dplyr-funcs-conditional.R                   |  2 +-
 r/R/dplyr-funcs-datetime.R                      |  4 ++--
 r/R/dplyr-funcs-string.R                        |  2 +-
 r/R/dplyr-funcs-type.R                          |  4 ++--
 r/R/duckdb.R                                    |  2 +-
 r/R/extension.R                                 |  6 +++---
 r/R/feather.R                                   |  4 ++--
 r/R/filesystem.R                                |  2 +-
 r/R/parquet.R                                   |  2 +-
 r/R/udf.R                                       |  2 +-
 r/configure                                     |  2 +-
 r/man/ExtensionType.Rd                          |  2 +-
 r/man/FileSystem.Rd                             |  2 +-
 r/man/add_filename.Rd                           |  2 +-
 r/man/codec_is_available.Rd                     |  2 +-
 r/man/io_thread_count.Rd                        |  2 +-
 r/man/new_extension_type.Rd                     |  2 +-
 r/man/open_dataset.Rd                           |  2 +-
 r/man/read_delim_arrow.Rd                       |  2 +-
 r/man/write_feather.Rd                          |  2 +-
 r/man/write_parquet.Rd                          |  2 +-
 r/src/altrep.cpp                                |  2 +-
 r/src/safe-call-into-r.h                        |  6 +++---
 r/tests/testthat/helper-arrow.R                 |  2 +-
 r/tests/testthat/helper-skip.R                  |  4 ++--
 r/tests/testthat/test-Array.R                   |  6 +++---
 r/tests/testthat/test-backwards-compatibility.R |  2 +-
 r/tests/testthat/test-dataset-write.R           |  4 ++--
 r/tests/testthat/test-dplyr-funcs-datetime.R    | 12 ++++++------
 r/tests/testthat/test-dplyr-summarize.R         |  6 +++---
 r/tests/testthat/test-extension.R               |  4 ++--
 r/tools/nixlibs.R                               |  4 ++--
 r/tools/update-checksums.R                      |  2 +-
 r/vignettes/arrow.Rmd                           |  2 +-
 r/vignettes/data_objects.Rmd                    |  2 +-
 r/vignettes/data_types.Rmd                      |  2 +-
 r/vignettes/data_wrangling.Rmd                  |  2 +-
 r/vignettes/developers/setup.Rmd                |  6 +++---
 r/vignettes/fs.Rmd                              |  4 ++--
 r/vignettes/install.Rmd                         |  6 +++---
 r/vignettes/read_write.Rmd                      |  2 +-
 51 files changed, 84 insertions(+), 84 deletions(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index 8c8852e9c86b9..8515facdff871 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -80,10 +80,10 @@
 
 ## Installation
 
-* MacOS builds now use the same installation pathway as on Linux (@assignUser,
+* macOS builds now use the same installation pathway as on Linux (@assignUser,
   #37684).
 * A warning message is now issued on package load when running under emulation
-  on MacOS (i.e., use of x86 installation of R on M1/aarch64; #37777).
+  on macOS (i.e., use of x86 installation of R on M1/aarch64; #37777).
 * R scripts that run during configuration and installation are now run
   using the correct R interpreter (@meztez, #37225).
 * Failed libarrow builds now return more detailed output (@amoeba, #37727).
@@ -416,7 +416,7 @@ As of version 10.0.0, `arrow` requires C++17 to build. This means that:
 
 * The `arrow.dev_repo` for nightly builds of the R package and prebuilt
   libarrow binaries is now <https://nightlies.apache.org/arrow/r/>.
-* Brotli and BZ2 are shipped with MacOS binaries. BZ2 is shipped with Windows binaries. (#13484)
+* Brotli and BZ2 are shipped with macOS binaries. BZ2 is shipped with Windows binaries. (#13484)
 
 # arrow 8.0.0
 
@@ -549,7 +549,7 @@ Arrow arrays and tables can be easily concatenated:
 ## Other improvements and fixes
 
 * Many of the vignettes have been reorganized, restructured and expanded to improve their usefulness and clarity.
-* Code to generate schemas (and individual data type specficiations) are accessible with the `$code()` method on a `schema` or `type`. This allows you to easily get the code needed to create a schema from an object that already has one.
+* Code to generate schemas (and individual data type specifications) are accessible with the `$code()` method on a `schema` or `type`. This allows you to easily get the code needed to create a schema from an object that already has one.
 * Arrow `Duration` type has been mapped to R's `difftime` class.
 * The `decimal256()` type is supported. The `decimal()` function has been revised to call either `decimal256()` or `decimal128()` based on the value of the `precision` argument.
 * `write_parquet()` uses a reasonable guess at `chunk_size` instead of always writing a single chunk. This improves the speed of reading and writing large Parquet files.
@@ -824,7 +824,7 @@ to send and receive data. See `vignette("flight", package = "arrow")` for an ove
 
 * `arrow` now depends on [`cpp11`](https://cpp11.r-lib.org/), which brings more robust UTF-8 handling and faster compilation
 * The Linux build script now succeeds on older versions of R
-* MacOS binary packages now ship with zstandard compression enabled
+* macOS binary packages now ship with zstandard compression enabled
 
 ## Bug fixes and other enhancements
 
diff --git a/r/R/arrow-object.R b/r/R/arrow-object.R
index 5c2cf4691fc9c..b66c39dce957e 100644
--- a/r/R/arrow-object.R
+++ b/r/R/arrow-object.R
@@ -56,7 +56,7 @@ ArrowObject <- R6Class("ArrowObject",
       # Return NULL, because keeping this R6 object in scope is not a good idea.
       # This syntax would allow the rare use that has to actually do this to
       # do `object <- object$.unsafe_delete()` and reduce the chance that an
-      # IDE like RStudio will try try to call other methods which will error
+      # IDE like RStudio will try to call other methods which will error
       invisible(NULL)
     }
   )
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 1f39a50744abc..54e237192e080 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -183,7 +183,7 @@ configure_tzdb <- function() {
   # Just to be extra safe, let's wrap this in a try();
   # we don't want a failed startup message to prevent the package from loading
   try({
-    # On MacOS only, Check if we are running in under emulation, and warn this will not work
+    # On macOS only, Check if we are running in under emulation, and warn this will not work
     if (on_rosetta()) {
       packageStartupMessage(
         paste(
diff --git a/r/R/compression.R b/r/R/compression.R
index 8d28fbefd7b3d..3fe00a756987c 100644
--- a/r/R/compression.R
+++ b/r/R/compression.R
@@ -61,7 +61,7 @@ Codec$create <- function(type = "gzip", compression_level = NA) {
 #' the Arrow C++ library. This function lets you know which are available for
 #' use.
 #' @param type A string, one of "uncompressed", "snappy", "gzip", "brotli",
-#' "zstd", "lz4", "lzo", or "bz2", case insensitive.
+#' "zstd", "lz4", "lzo", or "bz2", case-insensitive.
 #' @return Logical: is `type` available?
 #' @export
 #' @examples
diff --git a/r/R/config.R b/r/R/config.R
index bd00afe1be631..941d74e59a90d 100644
--- a/r/R/config.R
+++ b/r/R/config.R
@@ -40,7 +40,7 @@ io_thread_count <- function() {
 
 #' @rdname io_thread_count
 #' @param num_threads integer: New number of threads for thread pool. At least
-#'   two threads are reccomended to support all operations in the arrow
+#'   two threads are recommended to support all operations in the arrow
 #'   package.
 #' @export
 set_io_thread_count <- function(num_threads) {
diff --git a/r/R/csv.R b/r/R/csv.R
index a024c4531e748..03540006ca0a2 100644
--- a/r/R/csv.R
+++ b/r/R/csv.R
@@ -76,7 +76,7 @@
 #'
 #' Note that if you are specifying column names, whether by `schema` or
 #' `col_names`, and the CSV file has a header row that would otherwise be used
-#' to idenfity column names, you'll need to add `skip = 1` to skip that row.
+#' to identify column names, you'll need to add `skip = 1` to skip that row.
 #'
 #' @param file A character file name or URI, literal data (either a single string or a [raw] vector),
 #' an Arrow input stream, or a `FileSystem` with path (`SubTreeFileSystem`).
diff --git a/r/R/dataset.R b/r/R/dataset.R
index 682f6c1481b4f..08189f1b290a2 100644
--- a/r/R/dataset.R
+++ b/r/R/dataset.R
@@ -46,7 +46,7 @@
 #'
 #' The default behavior in `open_dataset()` is to inspect the file paths
 #' contained in the provided directory, and if they look like Hive-style, parse
-#' them as Hive. If your dataset has Hive-style partioning in the file paths,
+#' them as Hive. If your dataset has Hive-style partitioning in the file paths,
 #' you do not need to provide anything in the `partitioning` argument to
 #' `open_dataset()` to use them. If you do provide a character vector of
 #' partition column names, they will be ignored if they match what is detected,
diff --git a/r/R/dplyr-count.R b/r/R/dplyr-count.R
index ee713030b262e..df585a6cf0111 100644
--- a/r/R/dplyr-count.R
+++ b/r/R/dplyr-count.R
@@ -56,7 +56,7 @@ tally.arrow_dplyr_query <- function(x, wt = NULL, sort = FALSE, name = NULL) {
 
 tally.Dataset <- tally.ArrowTabular <- tally.RecordBatchReader <- tally.arrow_dplyr_query
 
-# we don't want to depend on dplyr, but we refrence these above
+# we don't want to depend on dplyr, but we reference these above
 utils::globalVariables(c("n", "desc"))
 
 check_n_name <- function(name,
diff --git a/r/R/dplyr-filter.R b/r/R/dplyr-filter.R
index c14c67e70168c..d85fa16af2e71 100644
--- a/r/R/dplyr-filter.R
+++ b/r/R/dplyr-filter.R
@@ -28,20 +28,20 @@ filter.arrow_dplyr_query <- function(.data, ..., .by = NULL, .preserve = FALSE)
     out$group_by_vars <- by$names
   }
 
-  filts <- expand_across(out, quos(...))
-  if (length(filts) == 0) {
+  expanded_filters <- expand_across(out, quos(...))
+  if (length(expanded_filters) == 0) {
     # Nothing to do
     return(as_adq(.data))
   }
 
   # tidy-eval the filter expressions inside an Arrow data_mask
-  filters <- lapply(filts, arrow_eval, arrow_mask(out))
+  filters <- lapply(expanded_filters, arrow_eval, arrow_mask(out))
   bad_filters <- map_lgl(filters, ~ inherits(., "try-error"))
   if (any(bad_filters)) {
     # This is similar to abandon_ship() except that the filter eval is
     # vectorized, and we apply filters that _did_ work before abandoning ship
     # with the rest
-    expr_labs <- map_chr(filts[bad_filters], format_expr)
+    expr_labs <- map_chr(expanded_filters[bad_filters], format_expr)
     if (query_on_dataset(out)) {
       # Abort. We don't want to auto-collect if this is a Dataset because that
       # could blow up, too big.
@@ -71,7 +71,7 @@ filter.arrow_dplyr_query <- function(.data, ..., .by = NULL, .preserve = FALSE)
       if (by$from_by) {
         out <- dplyr::ungroup(out)
       }
-      return(dplyr::filter(out, !!!filts[bad_filters], .by = {{ .by }}))
+      return(dplyr::filter(out, !!!expanded_filters[bad_filters], .by = {{ .by }}))
     }
   }
 
diff --git a/r/R/dplyr-funcs-augmented.R b/r/R/dplyr-funcs-augmented.R
index 116248d2dd92a..dca5ca16fa437 100644
--- a/r/R/dplyr-funcs-augmented.R
+++ b/r/R/dplyr-funcs-augmented.R
@@ -18,7 +18,7 @@
 #' Add the data filename as a column
 #'
 #' This function only exists inside `arrow` `dplyr` queries, and it only is
-#' valid when quering on a `FileSystemDataset`.
+#' valid when querying on a `FileSystemDataset`.
 #'
 #' To use filenames generated by this function in subsequent pipeline steps, you
 #' must either call \code{\link[dplyr:compute]{compute()}} or
diff --git a/r/R/dplyr-funcs-conditional.R b/r/R/dplyr-funcs-conditional.R
index cd0245eeee182..b9639f00295ce 100644
--- a/r/R/dplyr-funcs-conditional.R
+++ b/r/R/dplyr-funcs-conditional.R
@@ -55,7 +55,7 @@ register_bindings_conditional <- function() {
       }
 
       if (last_arg && arg$type_id() %in% TYPES_WITH_NAN) {
-        # store the NA_real_ in the same type as arg to avoid avoid casting
+        # store the NA_real_ in the same type as arg to avoid casting
         # smaller float types to larger float types
         NA_expr <- Expression$scalar(Scalar$create(NA_real_, type = arg$type()))
         Expression$create("if_else", Expression$create("is_nan", arg), NA_expr, arg)
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 5b6e16d376554..440210afd630c 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -459,7 +459,7 @@ register_bindings_datetime_timezone <- function() {
         roll_dst[1],
         "error" = 0L,
         "boundary" = 2L,
-        arrow_not_supported("`roll_dst` value must be 'error' or 'boundary' for non-existent times; other values")
+        arrow_not_supported("`roll_dst` value must be 'error' or 'boundary' for nonexistent times; other values")
       )
 
       ambiguous <- switch(
@@ -467,7 +467,7 @@ register_bindings_datetime_timezone <- function() {
         "error" = 0L,
         "pre" = 1L,
         "post" = 2L,
-        arrow_not_supported("`roll_dst` value must be 'error', 'pre', or 'post' for non-existent times")
+        arrow_not_supported("`roll_dst` value must be 'error', 'pre', or 'post' for nonexistent times")
       )
 
       if (identical(tzone, "")) {
diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index 3cd8f94476e5e..9f3220e557f08 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -516,7 +516,7 @@ register_bindings_string_other <- function() {
         msg = "`stop` must be length 1 - other lengths are not supported in Arrow"
       )
 
-      # substr treats values as if they're on a continous number line, so values
+      # substr treats values as if they're on a continuous number line, so values
       # 0 are effectively blank characters - set `start` to 1 here so Arrow mimics
       # this behavior
       if (start <= 0) {
diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R
index 0bd340d4be2dd..f244682737cb4 100644
--- a/r/R/dplyr-funcs-type.R
+++ b/r/R/dplyr-funcs-type.R
@@ -158,8 +158,8 @@ register_bindings_type_cast <- function() {
         if (identical(fix.empty.names, TRUE)) {
           names(args) <- make.names(names(args), unique = TRUE)
         } else {
-          name_emtpy <- names(args) == ""
-          names(args)[!name_emtpy] <- make.names(names(args)[!name_emtpy], unique = TRUE)
+          name_empty <- names(args) == ""
+          names(args)[!name_empty] <- make.names(names(args)[!name_empty], unique = TRUE)
         }
       }
 
diff --git a/r/R/duckdb.R b/r/R/duckdb.R
index bf3a57daf2f1e..9632e9bad1984 100644
--- a/r/R/duckdb.R
+++ b/r/R/duckdb.R
@@ -89,7 +89,7 @@ arrow_duck_connection <- function() {
     # but if we don't explicitly run dbDisconnect() the user gets a warning
     # that they may not expect (since they did not open a duckdb connection).
     # This bit of code will run when the package namespace is cleaned up (i.e.,
-    # at exit). This is more reliable than .onUnload() or .onDetatch(), which
+    # at exit). This is more reliable than .onUnload() or .onDetach(), which
     # don't necessarily run on exit.
     reg.finalizer(arrow_duck_finalizer, function(...) {
       con <- getOption("arrow_duck_con")
diff --git a/r/R/extension.R b/r/R/extension.R
index 4419c8ba01642..59a02121fd18c 100644
--- a/r/R/extension.R
+++ b/r/R/extension.R
@@ -83,7 +83,7 @@ ExtensionArray$create <- function(x, type) {
 #' - `$WrapArray(array)`: Wraps a storage [Array] into an [ExtensionArray]
 #'   with this extension type.
 #'
-#' In addition, subclasses may override the following methos to customize
+#' In addition, subclasses may override the following methods to customize
 #' the behaviour of extension classes.
 #'
 #' - `$deserialize_instance()`: This method is called when a new [ExtensionType]
@@ -184,7 +184,7 @@ ExtensionType <- R6Class("ExtensionType",
     },
     ToString = function() {
       # metadata is probably valid UTF-8 (e.g., JSON), but might not be
-      # and it's confusing to error when printing the object. This herustic
+      # and it's confusing to error when printing the object. This heuristic
       # isn't perfect (but subclasses should override this method anyway)
       metadata_raw <- self$extension_metadata()
 
@@ -286,7 +286,7 @@ ExtensionType$create <- function(storage_type,
 #'   "dot" syntax (i.e., "some_package.some_type"). The namespace "arrow"
 #'    is reserved for extension types defined by the Apache Arrow libraries.
 #' @param extension_metadata A [raw()] or [character()] vector containing the
-#'   serialized version of the type. Chatacter vectors must be length 1 and
+#'   serialized version of the type. Character vectors must be length 1 and
 #'   are converted to UTF-8 before converting to [raw()].
 #' @param type_class An [R6::R6Class] whose `$new()` class method will be
 #'   used to construct a new instance of the type.
diff --git a/r/R/feather.R b/r/R/feather.R
index 3e390018c825f..474fc6118e44f 100644
--- a/r/R/feather.R
+++ b/r/R/feather.R
@@ -24,7 +24,7 @@
 #' a legacy version available starting in 2016, and the Version 2 (V2),
 #' which is the Apache Arrow IPC file format.
 #' The default version is V2.
-#' V1 files are distinct from Arrow IPC files and lack many feathures,
+#' V1 files are distinct from Arrow IPC files and lack many features,
 #' such as the ability to store all Arrow data tyeps, and compression support.
 #' [write_ipc_file()] can only write V2 files.
 #'
@@ -91,7 +91,7 @@ write_feather <- function(x,
     }
   }
   if (is.null(compression_level)) {
-    # Use -1 as sentinal for "default"
+    # Use -1 as sentinel for "default"
     compression_level <- -1L
   }
   compression_level <- as.integer(compression_level)
diff --git a/r/R/filesystem.R b/r/R/filesystem.R
index e0f370ad601b3..c6f92cba1932c 100644
--- a/r/R/filesystem.R
+++ b/r/R/filesystem.R
@@ -156,7 +156,7 @@ FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = F
 #'    buckets if `$CreateDir()` is called on the bucket level (default `FALSE`).
 #' - `allow_bucket_deletion`: logical, if TRUE, the filesystem will delete
 #'    buckets if`$DeleteDir()` is called on the bucket level (default `FALSE`).
-#' - `request_timeout`: Socket read time on Windows and MacOS in seconds. If
+#' - `request_timeout`: Socket read time on Windows and macOS in seconds. If
 #'    negative, the AWS SDK default (typically 3 seconds).
 #' - `connect_timeout`: Socket connection timeout in seconds. If negative, AWS
 #'    SDK default is used (typically 1 second).
diff --git a/r/R/parquet.R b/r/R/parquet.R
index 74f51767a29c4..d92e913cb5db3 100644
--- a/r/R/parquet.R
+++ b/r/R/parquet.R
@@ -128,7 +128,7 @@ read_parquet <- function(file,
 #'  - A named vector, to specify the value for the named columns, the default
 #'    value for the setting is used when not supplied
 #'
-#' The `compression` argument can be any of the following (case insensitive):
+#' The `compression` argument can be any of the following (case-insensitive):
 #' "uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4", "lzo" or "bz2".
 #' Only "uncompressed" is guaranteed to be available, but "snappy" and "gzip"
 #' are almost always included. See [codec_is_available()].
diff --git a/r/R/udf.R b/r/R/udf.R
index fe08f02812fd9..922095cceba6a 100644
--- a/r/R/udf.R
+++ b/r/R/udf.R
@@ -154,7 +154,7 @@ arrow_scalar_function <- function(fun, in_type, out_type, auto_convert = FALSE)
       sprintf(
         paste0(
           "Expected `fun` to accept %d argument(s)\n",
-          "but found a function that acccepts %d argument(s)\n",
+          "but found a function that accepts %d argument(s)\n",
           "Did you forget to include `context` as the first argument?"
         ),
         expected_n_args,
diff --git a/r/configure b/r/configure
index 96238f0b9a37e..029fc004dfc4c 100755
--- a/r/configure
+++ b/r/configure
@@ -62,7 +62,7 @@ PKG_CONFIG_NAME="arrow"
 PKG_BREW_NAME="apache-arrow"
 PKG_TEST_HEADER="<arrow/api.h>"
 
-# Some env vars that control the build (all logical, case insensitive)
+# Some env vars that control the build (all logical, case-insensitive)
 # Development mode, also increases verbosity in the bundled build
 ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'`
 # The bundled build compiles arrow C++ from source; FORCE ensures we don't pick up
diff --git a/r/man/ExtensionType.Rd b/r/man/ExtensionType.Rd
index 032a4a76bf80b..aef4d01d7539e 100644
--- a/r/man/ExtensionType.Rd
+++ b/r/man/ExtensionType.Rd
@@ -26,7 +26,7 @@ extension metadata as a UTF-8 encoded string.
 with this extension type.
 }
 
-In addition, subclasses may override the following methos to customize
+In addition, subclasses may override the following methods to customize
 the behaviour of extension classes.
 \itemize{
 \item \verb{$deserialize_instance()}: This method is called when a new \link{ExtensionType}
diff --git a/r/man/FileSystem.Rd b/r/man/FileSystem.Rd
index b71d95f423ee3..dbf89ef1387ac 100644
--- a/r/man/FileSystem.Rd
+++ b/r/man/FileSystem.Rd
@@ -57,7 +57,7 @@ in the background, without blocking (default \code{TRUE})
 buckets if \verb{$CreateDir()} is called on the bucket level (default \code{FALSE}).
 \item \code{allow_bucket_deletion}: logical, if TRUE, the filesystem will delete
 buckets if\verb{$DeleteDir()} is called on the bucket level (default \code{FALSE}).
-\item \code{request_timeout}: Socket read time on Windows and MacOS in seconds. If
+\item \code{request_timeout}: Socket read time on Windows and macOS in seconds. If
 negative, the AWS SDK default (typically 3 seconds).
 \item \code{connect_timeout}: Socket connection timeout in seconds. If negative, AWS
 SDK default is used (typically 1 second).
diff --git a/r/man/add_filename.Rd b/r/man/add_filename.Rd
index 93718435a2042..1fe10ea4f8f26 100644
--- a/r/man/add_filename.Rd
+++ b/r/man/add_filename.Rd
@@ -12,7 +12,7 @@ augmented column.
 }
 \description{
 This function only exists inside \code{arrow} \code{dplyr} queries, and it only is
-valid when quering on a \code{FileSystemDataset}.
+valid when querying on a \code{FileSystemDataset}.
 }
 \details{
 To use filenames generated by this function in subsequent pipeline steps, you
diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd
index 5cda813f41673..e79b5724b8b17 100644
--- a/r/man/codec_is_available.Rd
+++ b/r/man/codec_is_available.Rd
@@ -8,7 +8,7 @@ codec_is_available(type)
 }
 \arguments{
 \item{type}{A string, one of "uncompressed", "snappy", "gzip", "brotli",
-"zstd", "lz4", "lzo", or "bz2", case insensitive.}
+"zstd", "lz4", "lzo", or "bz2", case-insensitive.}
 }
 \value{
 Logical: is \code{type} available?
diff --git a/r/man/io_thread_count.Rd b/r/man/io_thread_count.Rd
index 6cd44e1f6ea94..ae9297bb57761 100644
--- a/r/man/io_thread_count.Rd
+++ b/r/man/io_thread_count.Rd
@@ -11,7 +11,7 @@ set_io_thread_count(num_threads)
 }
 \arguments{
 \item{num_threads}{integer: New number of threads for thread pool. At least
-two threads are reccomended to support all operations in the arrow
+two threads are recommended to support all operations in the arrow
 package.}
 }
 \description{
diff --git a/r/man/new_extension_type.Rd b/r/man/new_extension_type.Rd
index 6d0f27c321991..a7307e538b940 100644
--- a/r/man/new_extension_type.Rd
+++ b/r/man/new_extension_type.Rd
@@ -32,7 +32,7 @@ array.}
 is reserved for extension types defined by the Apache Arrow libraries.}
 
 \item{extension_metadata}{A \code{\link[=raw]{raw()}} or \code{\link[=character]{character()}} vector containing the
-serialized version of the type. Chatacter vectors must be length 1 and
+serialized version of the type. Character vectors must be length 1 and
 are converted to UTF-8 before converting to \code{\link[=raw]{raw()}}.}
 
 \item{type_class}{An \link[R6:R6Class]{R6::R6Class} whose \verb{$new()} class method will be
diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd
index 7c3d32289f73e..7028f38467303 100644
--- a/r/man/open_dataset.Rd
+++ b/r/man/open_dataset.Rd
@@ -142,7 +142,7 @@ what names to give the virtual columns that come from the path segments.
 
 The default behavior in \code{open_dataset()} is to inspect the file paths
 contained in the provided directory, and if they look like Hive-style, parse
-them as Hive. If your dataset has Hive-style partioning in the file paths,
+them as Hive. If your dataset has Hive-style partitioning in the file paths,
 you do not need to provide anything in the \code{partitioning} argument to
 \code{open_dataset()} to use them. If you do provide a character vector of
 partition column names, they will be ignored if they match what is detected,
diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd
index 999f2d265b7fd..b56d445c9e2e3 100644
--- a/r/man/read_delim_arrow.Rd
+++ b/r/man/read_delim_arrow.Rd
@@ -230,7 +230,7 @@ be dropped.
 
 Note that if you are specifying column names, whether by \code{schema} or
 \code{col_names}, and the CSV file has a header row that would otherwise be used
-to idenfity column names, you'll need to add \code{skip = 1} to skip that row.
+to identify column names, you'll need to add \code{skip = 1} to skip that row.
 }
 
 \examples{
diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd
index 78cf60b67477f..0d3a7da3b90b4 100644
--- a/r/man/write_feather.Rd
+++ b/r/man/write_feather.Rd
@@ -59,7 +59,7 @@ and to make sharing data across data analysis languages easy.
 a legacy version available starting in 2016, and the Version 2 (V2),
 which is the Apache Arrow IPC file format.
 The default version is V2.
-V1 files are distinct from Arrow IPC files and lack many feathures,
+V1 files are distinct from Arrow IPC files and lack many features,
 such as the ability to store all Arrow data tyeps, and compression support.
 \code{\link[=write_ipc_file]{write_ipc_file()}} can only write V2 files.
 }
diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd
index af976b1aabf81..480abb12fcf4a 100644
--- a/r/man/write_parquet.Rd
+++ b/r/man/write_parquet.Rd
@@ -86,7 +86,7 @@ value for each column, in positional order
 value for the setting is used when not supplied
 }
 
-The \code{compression} argument can be any of the following (case insensitive):
+The \code{compression} argument can be any of the following (case-insensitive):
 "uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4", "lzo" or "bz2".
 Only "uncompressed" is guaranteed to be available, but "snappy" and "gzip"
 are almost always included. See \code{\link[=codec_is_available]{codec_is_available()}}.
diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
index 9bacf07d1840e..9745393d01bbc 100644
--- a/r/src/altrep.cpp
+++ b/r/src/altrep.cpp
@@ -747,7 +747,7 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
   // Helper class to convert to R strings. We declare one of these for the
   // class to avoid having to stack-allocate one for every STRING_ELT call.
   // This class does not own a reference to any arrays: it is the caller's
-  // responsibility to ensure the Array lifetime exeeds that of the viewer.
+  // responsibility to ensure the Array lifetime exceeds that of the viewer.
   struct RStringViewer {
     RStringViewer() : strip_out_nuls_(false), nul_was_stripped_(false) {}
 
diff --git a/r/src/safe-call-into-r.h b/r/src/safe-call-into-r.h
index 319d46d11f0d6..0ffd1d16dca01 100644
--- a/r/src/safe-call-into-r.h
+++ b/r/src/safe-call-into-r.h
@@ -141,15 +141,15 @@ class MainRThread {
   MainRThread() : initialized_(false), executor_(nullptr), stop_source_(nullptr) {}
 };
 
-// This object is used to ensure that signal hanlders are registered when
+// This object is used to ensure that signal handlers are registered when
 // RunWithCapturedR launches its background thread to call Arrow and is
 // cleaned up however this exits. Note that the lifecycle of the StopSource,
 // which is registered at package load, is not necessarily tied to the
 // lifecycle of the signal handlers. The general approach is to register
 // the signal handlers only when we are evaluating code outside the R thread
 // (when we are evaluating code *on* the R thread, R's signal handlers are
-// sufficient and will signal an interupt condition that will propagate
-// via a cpp11::unwind_excpetion).
+// sufficient and will signal an interrupt condition that will propagate
+// via a cpp11::unwind_exception).
 class WithSignalHandlerContext {
  public:
   WithSignalHandlerContext() : signal_handler_registered_(false) {
diff --git a/r/tests/testthat/helper-arrow.R b/r/tests/testthat/helper-arrow.R
index 8d39f7252ee21..e277c645d456e 100644
--- a/r/tests/testthat/helper-arrow.R
+++ b/r/tests/testthat/helper-arrow.R
@@ -37,7 +37,7 @@ with_language <- function(lang, expr) {
   skip_on_cran()
   old <- Sys.getenv("LANGUAGE")
   # Check what this message is before changing languages; this will
-  # trigger caching the transations if the OS does that (some do).
+  # trigger caching the translations if the OS does that (some do).
   # If the OS does cache, then we can't test changing languages safely.
   before <- i18ize_error_messages()
   Sys.setenv(LANGUAGE = lang)
diff --git a/r/tests/testthat/helper-skip.R b/r/tests/testthat/helper-skip.R
index 3d68dac5af69b..bd29080848184 100644
--- a/r/tests/testthat/helper-skip.R
+++ b/r/tests/testthat/helper-skip.R
@@ -38,11 +38,11 @@ skip_if_not_available <- function(feature) {
     skip_on_linux_devel()
   }
 
-  # curl/ssl on MacOS is too old to support S3 filesystems without
+  # curl/ssl on macOS is too old to support S3 filesystems without
   # crashing when the process exits.
   if (feature == "s3") {
     if (on_macos_10_13_or_lower()) {
-      skip("curl/ssl runtime on MacOS 10.13 is too old")
+      skip("curl/ssl runtime on macOS 10.13 is too old")
     }
   }
 
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index b29c1f4e09dde..bb005605de318 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -371,19 +371,19 @@ test_that("support for NaN (ARROW-3615)", {
   expect_equal(y$null_count, 1L)
 })
 
-test_that("is.nan() evalutes to FALSE on NA (for consistency with base R)", {
+test_that("is.nan() evaluates to FALSE on NA (for consistency with base R)", {
   x <- c(1.0, NA, NaN, -1.0)
   compare_expression(is.nan(.input), x)
 })
 
-test_that("is.nan() evalutes to FALSE on non-floats (for consistency with base R)", {
+test_that("is.nan() evaluates to FALSE on non-floats (for consistency with base R)", {
   x <- c(1L, 2L, 3L)
   y <- c("foo", "bar")
   compare_expression(is.nan(.input), x)
   compare_expression(is.nan(.input), y)
 })
 
-test_that("is.na() evalutes to TRUE on NaN (for consistency with base R)", {
+test_that("is.na() evaluates to TRUE on NaN (for consistency with base R)", {
   x <- c(1, NA, NaN, -1)
   compare_expression(is.na(.input), x)
 })
diff --git a/r/tests/testthat/test-backwards-compatibility.R b/r/tests/testthat/test-backwards-compatibility.R
index 8210bd2e78fd8..5f804b02dcee7 100644
--- a/r/tests/testthat/test-backwards-compatibility.R
+++ b/r/tests/testthat/test-backwards-compatibility.R
@@ -22,7 +22,7 @@
 # To write a new version of a test file for an old version, use docker(-compose)
 # to setup a linux distribution and use RStudio's public package manager binary
 # repo to install the old version. The following commands should be run at the
-# root of the arrow repo directory and might need slight adjusments.
+# root of the arrow repo directory and might need slight adjustments.
 # R_ORG=rstudio R_IMAGE=r-base R_TAG=4.0-focal docker-compose build --no-cache r
 # R_ORG=rstudio R_IMAGE=r-base R_TAG=4.0-focal docker-compose run r /bin/bash
 # R
diff --git a/r/tests/testthat/test-dataset-write.R b/r/tests/testthat/test-dataset-write.R
index 28ff308747584..9f69380c55b3b 100644
--- a/r/tests/testthat/test-dataset-write.R
+++ b/r/tests/testthat/test-dataset-write.R
@@ -139,7 +139,7 @@ test_that("Writing a dataset: Parquet->Parquet (default)", {
   )
 })
 
-test_that("Writing a dataset: `basename_template` default behavier", {
+test_that("Writing a dataset: `basename_template` default behavior", {
   ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
 
   dst_dir <- make_temp_dir()
@@ -840,7 +840,7 @@ test_that("Writing a dataset to text files with wrapper functions.", {
   expect_equal(new_ds %>% collect(), df)
 })
 
-test_that("Writing a flat file dataset: `basename_template` default behavier", {
+test_that("Writing a flat file dataset: `basename_template` default behavior", {
   ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
 
   dst_dir <- make_temp_dir()
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index e707a194a3626..4d3226798d3ff 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1550,7 +1550,7 @@ test_that("as.difftime()", {
   )
 
   # only integer (or integer-like) -> duration conversion supported in Arrow.
-  # double -> duration not supported. we're not testing the content of the
+  # double -> duration not supported. We aren't testing the content of the
   # error message as it is being generated in the C++ code and it might change,
   # but we want to make sure that this error is raised in our binding implementation
   expect_error(
@@ -1961,7 +1961,7 @@ test_that("`as.Date()` and `as_date()`", {
   # `as.Date()` ignores the `tzone` attribute and uses the value of the `tz` arg
   # to `as.Date()`
   # `as_date()` does the opposite: uses the tzone attribute of the POSIXct object
-  # passsed if`tz` is NULL
+  # passed if`tz` is NULL
   compare_dplyr_binding(
     .input %>%
       transmute(
@@ -2831,7 +2831,7 @@ test_that("parse_date_time with truncated formats", {
 })
 
 test_that("parse_date_time with `locale != NULL` not supported", {
-  # parse_date_time currently doesn't take locale paramete which will be
+  # parse_date_time currently doesn't take locale parameter which will be
   # addressed in https://issues.apache.org/jira/browse/ARROW-17147
   skip_if_not_available("re2")
 
@@ -3038,7 +3038,7 @@ test_that("build_formats() and build_format_from_order()", {
 
 # an "easy" date to avoid conflating tests of different things (i.e., it's
 # UTC time, and not one of the edge cases on or extremely close to the
-# rounding boundaty)
+# rounding boundary)
 easy_date <- as.POSIXct("2022-10-11 12:00:00", tz = "UTC")
 easy_df <- tibble::tibble(datetime = easy_date)
 
@@ -3703,7 +3703,7 @@ test_that("with_tz() and force_tz() works", {
         roll_dst = "post")
       ) %>%
       collect(),
-    "roll_dst` value must be 'error' or 'boundary' for non-existent times"
+    "roll_dst` value must be 'error' or 'boundary' for nonexistent times"
   )
 
   expect_warning(
@@ -3716,7 +3716,7 @@ test_that("with_tz() and force_tz() works", {
         )
       ) %>%
       collect(),
-    "`roll_dst` value must be 'error', 'pre', or 'post' for non-existent times"
+    "`roll_dst` value must be 'error', 'pre', or 'post' for nonexistent times"
   )
 
   # Raise error when the timezone falls into the DST-break
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index d39c800f3ff0c..b2b2a9e54695d 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -355,7 +355,7 @@ test_that("Functions that take ... but we only accept a single arg", {
 
 test_that("median()", {
   # When medians are integer-valued, stats::median() sometimes returns output of
-  # type integer, whereas whereas the Arrow approx_median kernels always return
+  # type integer, whereas the Arrow approx_median kernels always return
   # output of type float64. The calls to median(int, ...) in the tests below
   # are enclosed in as.double() to work around this known difference.
 
@@ -434,7 +434,7 @@ test_that("quantile()", {
   # returned by Arrow.
 
   # When quantiles are integer-valued, stats::quantile() sometimes returns
-  # output of type integer, whereas whereas the Arrow tdigest kernels always
+  # output of type integer, whereas the Arrow tdigest kernels always
   # return output of type float64. The calls to quantile(int, ...) in the tests
   # below are enclosed in as.double() to work around this known difference.
 
@@ -841,7 +841,7 @@ test_that("Expressions on aggregations", {
     )
   )
 
-  # Check aggregates on aggeregates with more complex calls
+  # Check aggregates on aggregates with more complex calls
   expect_warning(
     record_batch(tbl) %>% summarise(any(any(!lgl))),
     paste(
diff --git a/r/tests/testthat/test-extension.R b/r/tests/testthat/test-extension.R
index 55a1f8d21eedb..8b3d7d8aaa902 100644
--- a/r/tests/testthat/test-extension.R
+++ b/r/tests/testthat/test-extension.R
@@ -256,7 +256,7 @@ test_that("RecordBatch can roundtrip extension types", {
   )
 
   # check both column orders, since column order should stay in the same
-  # order whether the colunns are are extension types or not
+  # order whether the columns are extension types or not
   mixed_record_batch2 <- record_batch(
     normal = normal_vctr,
     custom = custom_array
@@ -296,7 +296,7 @@ test_that("Table can roundtrip extension types", {
   )
 
   # check both column orders, since column order should stay in the same
-  # order whether the colunns are are extension types or not
+  # order whether the columns are extension types or not
   mixed_table2 <- arrow_table(
     normal = normal_vctr,
     custom = custom_array
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index f4ae7312d3757..1794acee70d22 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-#### Fuctions #### check end of file for main logic
+#### Functions #### check end of file for main logic
 env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value)
 
 # Log messages in the style of the configure script
@@ -896,7 +896,7 @@ download_libarrow_ok <- download_ok && !env_is("LIBARROW_DOWNLOAD", "false")
 thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies")
 
 arrow_versioned <- paste0("arrow-", VERSION)
-# configure.win uses a different libarrow dir and and the zip is already nested
+# configure.win uses a different libarrow dir and the zip is already nested
 if (on_windows) {
   lib_dir <- "windows"
   dst_dir <- lib_dir
diff --git a/r/tools/update-checksums.R b/r/tools/update-checksums.R
index 8b9f1e6959cfd..f41652e87849e 100644
--- a/r/tools/update-checksums.R
+++ b/r/tools/update-checksums.R
@@ -49,7 +49,7 @@ binary_paths <- readLines(tasks_yml) |>
 
 artifactory_root <- "https://apache.jfrog.io/artifactory/arrow/r/%s/libarrow/bin/%s"
 
-# Get the checksuym file from the artifactory
+# Get the checksum file from the artifactory
 for (path in binary_paths) {
   sha_path <- paste0(path, ".sha512")
   file <- file.path("tools/checksums", sha_path)
diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd
index c218b08ede77b..50329334ce8b0 100644
--- a/r/vignettes/arrow.Rmd
+++ b/r/vignettes/arrow.Rmd
@@ -66,7 +66,7 @@ as.data.frame(dat)
 
 When this coercion takes place, each of the columns in the original Arrow Table must be converted to native R data objects. In the `dat` Table, for instance, `dat$x` is stored as the Arrow data type int32 inherited from C++, which becomes an R integer type when `as.data.frame()` is called. 
 
-It is possible to exercise fine grained control over this conversion process. To learn more about the different types and how they are converted, see the [data types](./data_types.html) article. 
+It is possible to exercise fine-grained control over this conversion process. To learn more about the different types and how they are converted, see the [data types](./data_types.html) article. 
 
 
 ## Reading and writing data
diff --git a/r/vignettes/data_objects.Rmd b/r/vignettes/data_objects.Rmd
index 7fcef8e6e78c6..065745182df04 100644
--- a/r/vignettes/data_objects.Rmd
+++ b/r/vignettes/data_objects.Rmd
@@ -259,7 +259,7 @@ write_parquet(df_b, file.path(ds_dir_b, "part-0.parquet"))
 write_parquet(df_c, file.path(ds_dir_c, "part-0.parquet"))
 ```
 
-If we had wanted to, we could have further subdivided the dataset. A folder could contain multiple files (`part-0.parquet`, `part-1.parquet`, etc) if we wanted it to. Similarly, there is no particular reason to name the files `part-0.parquet` this way at all: it would have been fine to call these files `subset-a.parquet`, `subset-b.parquet`, and `subset-c.parquet` if we had wished. We could have written other file formats if we wanted, and we don't necessarily have to use Hive-style folders. You can learn more about the supported formats by reading the help documentation for `open_dataset()`, and learn about how to exercise fine grained control with `help("Dataset", package = "arrow")`. 
+If we had wanted to, we could have further subdivided the dataset. A folder could contain multiple files (`part-0.parquet`, `part-1.parquet`, etc) if we wanted it to. Similarly, there is no particular reason to name the files `part-0.parquet` this way at all: it would have been fine to call these files `subset-a.parquet`, `subset-b.parquet`, and `subset-c.parquet` if we had wished. We could have written other file formats if we wanted, and we don't necessarily have to use Hive-style folders. You can learn more about the supported formats by reading the help documentation for `open_dataset()`, and learn about how to exercise fine-grained control with `help("Dataset", package = "arrow")`. 
 
 In any case, we have created an on-disk parquet Dataset using Hive-style partitioning. Our Dataset is defined by these files:
 
diff --git a/r/vignettes/data_types.Rmd b/r/vignettes/data_types.Rmd
index 6cbe7c72e6809..4b5ee01b6ab83 100644
--- a/r/vignettes/data_types.Rmd
+++ b/r/vignettes/data_types.Rmd
@@ -34,7 +34,7 @@ When the arrow package converts between R data and Arrow data, it will first che
 knitr::include_graphics("./data_types.png")
 ```
 
-In this image, black boxes refer to R data types and light blue boxes refer to Arrow data types. Directional arrows specify conversions (e.g., the bidirectional arrow between the logical R type and the boolean Arrow type means that R logicals convert to Arrow booleans and vice versa). Solid lines indicate that the this conversion rule is always the default; dashed lines mean that it only sometimes applies (the rules and special cases are described below). 
+In this image, black boxes refer to R data types and light blue boxes refer to Arrow data types. Directional arrows specify conversions (e.g., the bidirectional arrow between the logical R type and the boolean Arrow type means that the logical R converts to an Arrow boolean and vice versa). Solid lines indicate that this conversion rule is always the default; dashed lines mean that it only sometimes applies (the rules and special cases are described below). 
 
 ## Logical/boolean types
 
diff --git a/r/vignettes/data_wrangling.Rmd b/r/vignettes/data_wrangling.Rmd
index e3d5b306f3e71..305a91c156eb1 100644
--- a/r/vignettes/data_wrangling.Rmd
+++ b/r/vignettes/data_wrangling.Rmd
@@ -165,7 +165,7 @@ sw2 %>%
   transmute(name, height, mass, res = residuals(lm(mass ~ height)))
 ```
 
-Because window functions are not supported, computing an aggregation like `mean()` on a grouped table or within a rowwise opertation like `filter()` is not supported:
+Because window functions are not supported, computing an aggregation like `mean()` on a grouped table or within a rowwise operation like `filter()` is not supported:
 
 ```{r}
 sw %>%
diff --git a/r/vignettes/developers/setup.Rmd b/r/vignettes/developers/setup.Rmd
index de33e72407792..8e7cff7410473 100644
--- a/r/vignettes/developers/setup.Rmd
+++ b/r/vignettes/developers/setup.Rmd
@@ -46,18 +46,18 @@ not possible to link to a system version of libarrow during development).
 
 ## Option 1: Using nightly libarrow binaries
 
-On Linux, MacOS, and Windows you can use the same workflow you might use for another
+On Linux, macOS, and Windows you can use the same workflow you might use for another
 package that contains compiled code (e.g., `R CMD INSTALL .` from
 a terminal, `devtools::load_all()` from an R prompt, or `Install & Restart` from
 RStudio). If the `arrow/r/libarrow` directory is not populated, the configure script will
 attempt to download the latest nightly libarrow binary, extract it to the
-`arrow/r/libarrow` directory (MacOS, Linux) or `arrow/r/windows`
+`arrow/r/libarrow` directory (macOS, Linux) or `arrow/r/windows`
 directory (Windows), and continue building the R package as usual.
 
 Most of the time, you won't need to update your version of libarrow because
 the R package rarely changes with updates to the C++ library; however, if you
 start to get errors when rebuilding the R package, you may have to remove the
-`libarrow` directory (MacOS, Linux) or `windows` directory (Windows)
+`libarrow` directory (macOS, Linux) or `windows` directory (Windows)
 and do a "clean" rebuild. You can do this from a terminal with
 `R CMD INSTALL . --preclean`, from RStudio using the "Clean and Install"
 option from "Build" tab, or using `make clean` if you are using the `Makefile`
diff --git a/r/vignettes/fs.Rmd b/r/vignettes/fs.Rmd
index a21a7864f7d73..50278af25bd1b 100644
--- a/r/vignettes/fs.Rmd
+++ b/r/vignettes/fs.Rmd
@@ -14,7 +14,7 @@ This article provides an overview of working with both S3 and GCS data using the
 
 ## S3 and GCS support on Linux
 
-Before you start, make sure that your arrow install has support for S3 and/or GCS enabled. For most users this will be true by default, because the Windows and MacOS binary packages hosted on CRAN include S3 and GCS support. You can check whether support is enabled via helper functions:
+Before you start, make sure that your arrow install has support for S3 and/or GCS enabled. For most users this will be true by default, because the Windows and macOS binary packages hosted on CRAN include S3 and GCS support. You can check whether support is enabled via helper functions:
 
 ```r
 arrow_with_s3()
@@ -307,7 +307,7 @@ Sys.unsetenv("AWS_S3_ENDPOINT")
 ```
 
 By default, the AWS SDK tries to retrieve metadata about user configuration, 
-which can cause conficts when passing in connection details via URI (for example
+which can cause conflicts when passing in connection details via URI (for example
 when accessing a MINIO bucket).  To disable the use of AWS environment 
 variables, you can set environment variable `AWS_EC2_METADATA_DISABLED` to 
 `TRUE`.
diff --git a/r/vignettes/install.Rmd b/r/vignettes/install.Rmd
index 10155e3a8cd5b..df43a9de36fc2 100644
--- a/r/vignettes/install.Rmd
+++ b/r/vignettes/install.Rmd
@@ -10,9 +10,9 @@ In most cases, `install.packages("arrow")` should just work. There are things yo
 
 ## Background
 
-The Apache Arrow project is implemented in multiple languages, and the R package depends on the Arrow C++ library (referred to from here on as libarrow). This means that when you install arrow, you need both the R and C++ versions.  If you install arrow from CRAN on a machine running Windows or MacOS, when you call `install.packages("arrow")`, a precompiled binary containing both the R package and libarrow will be downloaded.  However, CRAN does not host R package binaries for Linux, and so you must choose from one of the alternative approaches.
+The Apache Arrow project is implemented in multiple languages, and the R package depends on the Arrow C++ library (referred to from here on as libarrow). This means that when you install arrow, you need both the R and C++ versions.  If you install arrow from CRAN on a machine running Windows or macOS, when you call `install.packages("arrow")`, a precompiled binary containing both the R package and libarrow will be downloaded.  However, CRAN does not host R package binaries for Linux, and so you must choose from one of the alternative approaches.
 
-This article outlines the recommend approaches to installing arrow on Linux, starting from the simplest and least customizable to the most complex but with more flexbility to customize your installation.
+This article outlines the recommend approaches to installing arrow on Linux, starting from the simplest and least customizable to the most complex but with more flexibility to customize your installation.
 
 The primary audience for this document is arrow R package _users_ on Linux, and not Arrow _developers_. Additional resources for developers are listed at the end of this article.
 
@@ -225,7 +225,7 @@ already present (when set to `AUTO`, the default).
 These dependencies vary by platform; however, if you wish to install these
 yourself prior to libarrow installation, we recommend that you take a look at
 the [docker file for whichever of our CI builds](https://github.com/apache/arrow/tree/main/ci/docker)
-(the ones ending in "cpp" are for building Arrow's C++ libaries, aka libarrow)
+(the ones ending in "cpp" are for building Arrow's C++ libraries, aka libarrow)
 corresponds most closely to your setup.  This will contain the most up-to-date
 information about dependencies and minimum versions.
 
diff --git a/r/vignettes/read_write.Rmd b/r/vignettes/read_write.Rmd
index 15b2392b8ee5c..0ee695a6f4907 100644
--- a/r/vignettes/read_write.Rmd
+++ b/r/vignettes/read_write.Rmd
@@ -140,7 +140,7 @@ write_csv_arrow(mtcars, file_path)
 read_csv_arrow(file_path, col_select = starts_with("d"))
 ```
 
-In addition to the options provided by the readr-style arguments (`delim`, `quote`, `escape_doubple`, `escape_backslash`, etc), you can use the `schema` argument to specify column types: see `schema()` help for details. There is also the option of using `parse_options`, `convert_options`, and `read_options` to exercise fine-grained control over the arrow csv reader: see `help("CsvReadOptions", package = "arrow")` for details. 
+In addition to the options provided by the readr-style arguments (`delim`, `quote`, `escape_double`, `escape_backslash`, etc), you can use the `schema` argument to specify column types: see `schema()` help for details. There is also the option of using `parse_options`, `convert_options`, and `read_options` to exercise fine-grained control over the arrow csv reader: see `help("CsvReadOptions", package = "arrow")` for details. 
 
 ## JSON format
 

From 081b4022fe6f659d8765efc82b3f4787c5039e3c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 7 Dec 2023 13:10:52 -0500
Subject: [PATCH 004/570] MINOR: [Java] Bump ch.qos.logback:logback-classic
 from 1.2.3 to 1.2.13 in /java (#39085)

Bumps [ch.qos.logback:logback-classic](https://github.com/qos-ch/logback) from 1.2.3 to 1.2.13.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/qos-ch/logback/commit/2648b9e7fbb47426c89b9c93b411c07484e8f277"><code>2648b9e</code></a> prepare release 1.2.13</li>
<li><a href="https://github.com/qos-ch/logback/commit/bb095154be011267b64e37a1d401546e7cc2b7c3"><code>bb09515</code></a> fix CVE-2023-6378</li>
<li><a href="https://github.com/qos-ch/logback/commit/45732949bfb845df04cbe65292cf48aaa090cb1d"><code>4573294</code></a> start work on 1.2.13-SNAPSHOT</li>
<li><a href="https://github.com/qos-ch/logback/commit/a388193052c298ca87cc64192319df723288c6ab"><code>a388193</code></a> Merge branch 'branch_1.2.x' of github.com:qos-ch/logback into branch_1.2.x</li>
<li><a href="https://github.com/qos-ch/logback/commit/de44dc422bc3da1d7808283851324d960b492d4d"><code>de44dc4</code></a> prepare release 1.2.12</li>
<li><a href="https://github.com/qos-ch/logback/commit/ca0cf172f680308938515b8a5d69348759ee947c"><code>ca0cf17</code></a> Merge pull request <a href="https://redirect.github.com/qos-ch/logback/issues/532">#532</a> from joakime/fix-jetty-requestlog</li>
<li><a href="https://github.com/qos-ch/logback/commit/e31609b1980b9ba986344aae3cab7275fa2b4935"><code>e31609b</code></a> removed unused files</li>
<li><a href="https://github.com/qos-ch/logback/commit/21e29efb284766f386781175b2ba18585b690154"><code>21e29ef</code></a> Merge pull request <a href="https://redirect.github.com/qos-ch/logback/issues/567">#567</a> from spliffone/LOGBACK-1633</li>
<li><a href="https://github.com/qos-ch/logback/commit/e869000e1d5901e6aa6f46cc6575ee2137f15b69"><code>e869000</code></a> fix: published POM file contain the wrong scm URL</li>
<li><a href="https://github.com/qos-ch/logback/commit/009ea46cb81a015f2ca312bde6e823581b93b37a"><code>009ea46</code></a> version for next dev cycle</li>
<li>Additional commits viewable in <a href="https://github.com/qos-ch/logback/compare/v_1.2.3...v_1.2.13">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=ch.qos.logback:logback-classic&package-manager=maven&previous-version=1.2.3&new-version=1.2.13)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apache/arrow/network/alerts).

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/pom.xml       | 2 +-
 java/tools/pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/pom.xml b/java/pom.xml
index 4d561dba87a2d..cd16b862d10c0 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -698,7 +698,7 @@
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-classic</artifactId>
-      <version>1.2.3</version>
+      <version>1.2.13</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 4b43c513efd36..1815c39227de9 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -47,7 +47,7 @@
         <dependency>
           <groupId>ch.qos.logback</groupId>
           <artifactId>logback-classic</artifactId>
-          <version>1.2.3</version>
+          <version>1.2.13</version>
           <scope>runtime</scope>
         </dependency>
         <dependency>

From 476c78fd6e535faacfc6a171529ef496abb30cd9 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Fri, 8 Dec 2023 15:48:06 -0300
Subject: [PATCH 005/570] GH-38597: [C++] Implement GetFileInfo(selector) for
 Azure filesystem (#39009)

### Rationale for this change

Part of Azure FS implementation.

### What changes are included in this PR?

The version of `GetFileInfo` that takes a prefix and can optionally
recurse into directories.

### Are these changes tested?

By unit tests present in this PR. Separate from this PR, I'm thinking of
way to fuzz-test the FS API.
* Closes: #38597
---
 cpp/src/arrow/filesystem/azurefs.cc      | 212 ++++++++++++++++++-
 cpp/src/arrow/filesystem/azurefs.h       |   2 +-
 cpp/src/arrow/filesystem/azurefs_test.cc | 248 ++++++++++++++++++++++-
 cpp/src/arrow/filesystem/filesystem.cc   |   3 +-
 cpp/src/arrow/filesystem/path_util.cc    |  31 ++-
 cpp/src/arrow/filesystem/path_util.h     |  12 +-
 cpp/src/arrow/filesystem/test_util.cc    |   6 +
 cpp/src/arrow/filesystem/test_util.h     |   4 +
 8 files changed, 481 insertions(+), 37 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 9bd2b0ae9d8a0..daababb04c172 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -39,7 +39,7 @@ namespace fs {
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
-AzureOptions::AzureOptions() {}
+AzureOptions::AzureOptions() = default;
 
 bool AzureOptions::Equals(const AzureOptions& other) const {
   return (account_dfs_url == other.account_dfs_url &&
@@ -820,6 +820,209 @@ class AzureFileSystem::Impl {
     }
   }
 
+ private:
+  template <typename OnContainer>
+  Status VisitContainers(const Azure::Core::Context& context,
+                         OnContainer&& on_container) const {
+    Azure::Storage::Blobs::ListBlobContainersOptions options;
+    try {
+      auto container_list_response =
+          blob_service_client_->ListBlobContainers(options, context);
+      for (; container_list_response.HasPage();
+           container_list_response.MoveToNextPage(context)) {
+        for (const auto& container : container_list_response.BlobContainers) {
+          RETURN_NOT_OK(on_container(container));
+        }
+      }
+    } catch (const Azure::Storage::StorageException& exception) {
+      return internal::ExceptionToStatus("Failed to list account containers.", exception);
+    }
+    return Status::OK();
+  }
+
+  static FileInfo FileInfoFromBlob(const std::string& container,
+                                   const Azure::Storage::Blobs::Models::BlobItem& blob) {
+    auto path = internal::ConcatAbstractPath(container, blob.Name);
+    if (internal::HasTrailingSlash(blob.Name)) {
+      return DirectoryFileInfoFromPath(path);
+    }
+    FileInfo info{std::move(path), FileType::File};
+    info.set_size(blob.BlobSize);
+    info.set_mtime(std::chrono::system_clock::time_point{blob.Details.LastModified});
+    return info;
+  }
+
+  static FileInfo DirectoryFileInfoFromPath(const std::string& path) {
+    return FileInfo{std::string{internal::RemoveTrailingSlash(path)},
+                    FileType::Directory};
+  }
+
+  static std::string_view BasenameView(std::string_view s) {
+    DCHECK(!internal::HasTrailingSlash(s));
+    auto offset = s.find_last_of(internal::kSep);
+    auto result = (offset == std::string_view::npos) ? s : s.substr(offset);
+    DCHECK(!result.empty() && result.back() != internal::kSep);
+    return result;
+  }
+
+  /// \brief List the blobs at the root of a container or some dir in a container.
+  ///
+  /// \pre container_client is the client for the container named like the first
+  /// segment of select.base_dir.
+  Status GetFileInfoWithSelectorFromContainer(
+      const Azure::Storage::Blobs::BlobContainerClient& container_client,
+      const Azure::Core::Context& context, Azure::Nullable<int32_t> page_size_hint,
+      const FileSelector& select, FileInfoVector* acc_results) {
+    ARROW_ASSIGN_OR_RAISE(auto base_location, AzureLocation::FromString(select.base_dir));
+
+    bool found = false;
+    Azure::Storage::Blobs::ListBlobsOptions options;
+    if (internal::IsEmptyPath(base_location.path)) {
+      // If the base_dir is the root of the container, then we want to list all blobs in
+      // the container and the Prefix should be empty and not even include the trailing
+      // slash because the container itself represents the `<container>/` directory.
+      options.Prefix = {};
+      found = true;  // Unless the container itself is not found later!
+    } else {
+      options.Prefix = internal::EnsureTrailingSlash(base_location.path);
+    }
+    options.PageSizeHint = page_size_hint;
+    options.Include = Azure::Storage::Blobs::Models::ListBlobsIncludeFlags::Metadata;
+
+    auto recurse = [&](const std::string& blob_prefix) noexcept -> Status {
+      if (select.recursive && select.max_recursion > 0) {
+        FileSelector sub_select;
+        sub_select.base_dir = internal::ConcatAbstractPath(
+            base_location.container, internal::RemoveTrailingSlash(blob_prefix));
+        sub_select.allow_not_found = true;
+        sub_select.recursive = true;
+        sub_select.max_recursion = select.max_recursion - 1;
+        return GetFileInfoWithSelectorFromContainer(
+            container_client, context, page_size_hint, sub_select, acc_results);
+      }
+      return Status::OK();
+    };
+
+    auto process_blob =
+        [&](const Azure::Storage::Blobs::Models::BlobItem& blob) noexcept {
+          // blob.Name has trailing slash only when Prefix is an empty
+          // directory marker blob for the directory we're listing
+          // from, and we should skip it.
+          if (!internal::HasTrailingSlash(blob.Name)) {
+            acc_results->push_back(FileInfoFromBlob(base_location.container, blob));
+          }
+        };
+    auto process_prefix = [&](const std::string& prefix) noexcept -> Status {
+      const auto path = internal::ConcatAbstractPath(base_location.container, prefix);
+      acc_results->push_back(DirectoryFileInfoFromPath(path));
+      return recurse(prefix);
+    };
+
+    try {
+      auto list_response =
+          container_client.ListBlobsByHierarchy(/*delimiter=*/"/", options, context);
+      for (; list_response.HasPage(); list_response.MoveToNextPage(context)) {
+        if (list_response.Blobs.empty() && list_response.BlobPrefixes.empty()) {
+          continue;
+        }
+        found = true;
+        // Blob and BlobPrefixes are sorted by name, so we can merge-iterate
+        // them to ensure returned results are all sorted.
+        size_t blob_index = 0;
+        size_t blob_prefix_index = 0;
+        while (blob_index < list_response.Blobs.size() &&
+               blob_prefix_index < list_response.BlobPrefixes.size()) {
+          const auto& blob = list_response.Blobs[blob_index];
+          const auto& prefix = list_response.BlobPrefixes[blob_prefix_index];
+          const int cmp = blob.Name.compare(prefix);
+          if (cmp < 0) {
+            process_blob(blob);
+            blob_index += 1;
+          } else if (cmp > 0) {
+            RETURN_NOT_OK(process_prefix(prefix));
+            blob_prefix_index += 1;
+          } else {
+            DCHECK_EQ(blob.Name, prefix);
+            RETURN_NOT_OK(process_prefix(prefix));
+            blob_index += 1;
+            blob_prefix_index += 1;
+            // If the container has an empty dir marker blob and another blob starting
+            // with this blob name as a prefix, the blob doesn't appear in the listing
+            // that also contains the prefix, so AFAICT this branch in unreachable. The
+            // code above is kept just in case, but if this DCHECK(false) is ever reached,
+            // we should refactor this loop to ensure no duplicate entries are ever
+            // reported.
+            DCHECK(false)
+                << "Unexpected blob/prefix name collision on the same listing request";
+          }
+        }
+        for (; blob_index < list_response.Blobs.size(); blob_index++) {
+          process_blob(list_response.Blobs[blob_index]);
+        }
+        for (; blob_prefix_index < list_response.BlobPrefixes.size();
+             blob_prefix_index++) {
+          RETURN_NOT_OK(process_prefix(list_response.BlobPrefixes[blob_prefix_index]));
+        }
+      }
+    } catch (const Azure::Storage::StorageException& exception) {
+      if (exception.ErrorCode == "ContainerNotFound") {
+        found = false;
+      } else {
+        return internal::ExceptionToStatus(
+            "Failed to list blobs in a directory: " + select.base_dir + ": " +
+                container_client.GetUrl(),
+            exception);
+      }
+    }
+
+    return found || select.allow_not_found
+               ? Status::OK()
+               : ::arrow::fs::internal::PathNotFound(select.base_dir);
+  }
+
+ public:
+  Status GetFileInfoWithSelector(const Azure::Core::Context& context,
+                                 Azure::Nullable<int32_t> page_size_hint,
+                                 const FileSelector& select,
+                                 FileInfoVector* acc_results) {
+    ARROW_ASSIGN_OR_RAISE(auto base_location, AzureLocation::FromString(select.base_dir));
+
+    if (base_location.container.empty()) {
+      // Without a container, the base_location is equivalent to the filesystem
+      // root -- `/`. FileSelector::allow_not_found doesn't matter in this case
+      // because the root always exists.
+      auto on_container =
+          [&](const Azure::Storage::Blobs::Models::BlobContainerItem& container) {
+            // Deleted containers are not listed by ListContainers.
+            DCHECK(!container.IsDeleted);
+
+            // Every container is considered a directory.
+            FileInfo info{container.Name, FileType::Directory};
+            info.set_mtime(
+                std::chrono::system_clock::time_point{container.Details.LastModified});
+            acc_results->push_back(std::move(info));
+
+            // Recurse into containers (subdirectories) if requested.
+            if (select.recursive && select.max_recursion > 0) {
+              FileSelector sub_select;
+              sub_select.base_dir = container.Name;
+              sub_select.allow_not_found = true;
+              sub_select.recursive = true;
+              sub_select.max_recursion = select.max_recursion - 1;
+              ARROW_RETURN_NOT_OK(GetFileInfoWithSelector(context, page_size_hint,
+                                                          sub_select, acc_results));
+            }
+            return Status::OK();
+          };
+      return VisitContainers(context, std::move(on_container));
+    }
+
+    auto container_client =
+        blob_service_client_->GetBlobContainerClient(base_location.container);
+    return GetFileInfoWithSelectorFromContainer(container_client, context, page_size_hint,
+                                                select, acc_results);
+  }
+
   Result<std::shared_ptr<ObjectInputFile>> OpenInputFile(const AzureLocation& location,
                                                          AzureFileSystem* fs) {
     RETURN_NOT_OK(ValidateFileLocation(location));
@@ -1196,7 +1399,12 @@ Result<FileInfo> AzureFileSystem::GetFileInfo(const std::string& path) {
 }
 
 Result<FileInfoVector> AzureFileSystem::GetFileInfo(const FileSelector& select) {
-  return Status::NotImplemented("The Azure FileSystem is not fully implemented");
+  Azure::Core::Context context;
+  Azure::Nullable<int32_t> page_size_hint;  // unspecified
+  FileInfoVector results;
+  RETURN_NOT_OK(
+      impl_->GetFileInfoWithSelector(context, page_size_hint, select, &results));
+  return {std::move(results)};
 }
 
 Status AzureFileSystem::CreateDir(const std::string& path, bool recursive) {
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 9f980ee8baae0..b2865b059ef6e 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -157,7 +157,7 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {
       const AzureOptions& options, const io::IOContext& = io::default_io_context());
 
  private:
-  explicit AzureFileSystem(const AzureOptions& options, const io::IOContext& io_context);
+  AzureFileSystem(const AzureOptions& options, const io::IOContext& io_context);
 
   class Impl;
   std::unique_ptr<Impl> impl_;
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 41f1663114f45..792c63b209402 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -70,6 +70,9 @@ using ::testing::IsEmpty;
 using ::testing::Not;
 using ::testing::NotNull;
 
+namespace Blobs = Azure::Storage::Blobs;
+namespace Files = Azure::Storage::Files;
+
 auto const* kLoremIpsum = R"""(
 Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
 incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
@@ -193,9 +196,8 @@ TEST(AzureFileSystem, OptionsCompare) {
 class AzureFileSystemTest : public ::testing::Test {
  public:
   std::shared_ptr<FileSystem> fs_;
-  std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient> blob_service_client_;
-  std::unique_ptr<Azure::Storage::Files::DataLake::DataLakeServiceClient>
-      datalake_service_client_;
+  std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
+  std::unique_ptr<Files::DataLake::DataLakeServiceClient> datalake_service_client_;
   AzureOptions options_;
   std::mt19937_64 generator_;
   std::string container_name_;
@@ -213,15 +215,14 @@ class AzureFileSystemTest : public ::testing::Test {
       suite_skipped_ = true;
       GTEST_SKIP() << options.status().message();
     }
-    container_name_ = RandomChars(32);
-    blob_service_client_ = std::make_unique<Azure::Storage::Blobs::BlobServiceClient>(
+    // Stop-gap solution before GH-39119 is fixed.
+    container_name_ = "z" + RandomChars(31);
+    blob_service_client_ = std::make_unique<Blobs::BlobServiceClient>(
         options_.account_blob_url, options_.storage_credentials_provider);
-    datalake_service_client_ =
-        std::make_unique<Azure::Storage::Files::DataLake::DataLakeServiceClient>(
-            options_.account_dfs_url, options_.storage_credentials_provider);
+    datalake_service_client_ = std::make_unique<Files::DataLake::DataLakeServiceClient>(
+        options_.account_dfs_url, options_.storage_credentials_provider);
     ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_));
-    auto container_client = blob_service_client_->GetBlobContainerClient(container_name_);
-    container_client.CreateIfNotExists();
+    auto container_client = CreateContainer(container_name_);
 
     auto blob_client = container_client.GetBlockBlobClient(PreexistingObjectName());
     blob_client.UploadFrom(reinterpret_cast<const uint8_t*>(kLoremIpsum),
@@ -239,6 +240,20 @@ class AzureFileSystemTest : public ::testing::Test {
     }
   }
 
+  Blobs::BlobContainerClient CreateContainer(const std::string& name) {
+    auto container_client = blob_service_client_->GetBlobContainerClient(name);
+    (void)container_client.CreateIfNotExists();
+    return container_client;
+  }
+
+  Blobs::BlobClient CreateBlob(Blobs::BlobContainerClient& container_client,
+                               const std::string& name, const std::string& data = "") {
+    auto blob_client = container_client.GetBlockBlobClient(name);
+    (void)blob_client.UploadFrom(reinterpret_cast<const uint8_t*>(data.data()),
+                                 data.size());
+    return blob_client;
+  }
+
   std::string PreexistingContainerName() const { return container_name_; }
 
   std::string PreexistingContainerPath() const {
@@ -326,6 +341,45 @@ class AzureFileSystemTest : public ::testing::Test {
         top_blob_path,
     };
   }
+
+  char const* kSubData = "sub data";
+  char const* kSomeData = "some data";
+  char const* kOtherData = "other data";
+
+  void SetUpSmallFileSystemTree() {
+    // Set up test containers
+    CreateContainer("empty-container");
+    auto container = CreateContainer("container");
+
+    CreateBlob(container, "emptydir/");
+    CreateBlob(container, "somedir/subdir/subfile", kSubData);
+    CreateBlob(container, "somefile", kSomeData);
+    // Add an explicit marker for a non-empty directory.
+    CreateBlob(container, "otherdir/1/2/");
+    // otherdir/{1/,2/,3/} are implicitly assumed to exist because of
+    // the otherdir/1/2/3/otherfile blob.
+    CreateBlob(container, "otherdir/1/2/3/otherfile", kOtherData);
+  }
+
+  void AssertInfoAllContainersRecursive(const std::vector<FileInfo>& infos) {
+    ASSERT_EQ(infos.size(), 14);
+    AssertFileInfo(infos[0], "container", FileType::Directory);
+    AssertFileInfo(infos[1], "container/emptydir", FileType::Directory);
+    AssertFileInfo(infos[2], "container/otherdir", FileType::Directory);
+    AssertFileInfo(infos[3], "container/otherdir/1", FileType::Directory);
+    AssertFileInfo(infos[4], "container/otherdir/1/2", FileType::Directory);
+    AssertFileInfo(infos[5], "container/otherdir/1/2/3", FileType::Directory);
+    AssertFileInfo(infos[6], "container/otherdir/1/2/3/otherfile", FileType::File,
+                   strlen(kOtherData));
+    AssertFileInfo(infos[7], "container/somedir", FileType::Directory);
+    AssertFileInfo(infos[8], "container/somedir/subdir", FileType::Directory);
+    AssertFileInfo(infos[9], "container/somedir/subdir/subfile", FileType::File,
+                   strlen(kSubData));
+    AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
+    AssertFileInfo(infos[11], "empty-container", FileType::Directory);
+    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
+    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
+  }
 };
 
 class AzuriteFileSystemTest : public AzureFileSystemTest {
@@ -518,6 +572,180 @@ TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObject) {
   RunGetFileInfoObjectTest();
 }
 
+TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) {
+  SetUpSmallFileSystemTree();
+
+  FileSelector select;
+  std::vector<FileInfo> infos;
+
+  // Root dir
+  select.base_dir = "";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 3);
+  ASSERT_EQ(infos, SortedInfos(infos));
+  AssertFileInfo(infos[0], "container", FileType::Directory);
+  AssertFileInfo(infos[1], "empty-container", FileType::Directory);
+  AssertFileInfo(infos[2], container_name_, FileType::Directory);
+
+  // Empty container
+  select.base_dir = "empty-container";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 0);
+  // Nonexistent container
+  select.base_dir = "nonexistent-container";
+  ASSERT_RAISES(IOError, fs_->GetFileInfo(select));
+  select.allow_not_found = true;
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 0);
+  select.allow_not_found = false;
+  // Non-empty container
+  select.base_dir = "container";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos, SortedInfos(infos));
+  ASSERT_EQ(infos.size(), 4);
+  AssertFileInfo(infos[0], "container/emptydir", FileType::Directory);
+  AssertFileInfo(infos[1], "container/otherdir", FileType::Directory);
+  AssertFileInfo(infos[2], "container/somedir", FileType::Directory);
+  AssertFileInfo(infos[3], "container/somefile", FileType::File, 9);
+
+  // Empty "directory"
+  select.base_dir = "container/emptydir";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 0);
+  // Non-empty "directories"
+  select.base_dir = "container/somedir";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 1);
+  AssertFileInfo(infos[0], "container/somedir/subdir", FileType::Directory);
+  select.base_dir = "container/somedir/subdir";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 1);
+  AssertFileInfo(infos[0], "container/somedir/subdir/subfile", FileType::File, 8);
+  // Nonexistent
+  select.base_dir = "container/nonexistent";
+  ASSERT_RAISES(IOError, fs_->GetFileInfo(select));
+  select.allow_not_found = true;
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 0);
+  select.allow_not_found = false;
+
+  // Trailing slashes
+  select.base_dir = "empty-container/";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 0);
+  select.base_dir = "nonexistent-container/";
+  ASSERT_RAISES(IOError, fs_->GetFileInfo(select));
+  select.base_dir = "container/";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos, SortedInfos(infos));
+  ASSERT_EQ(infos.size(), 4);
+}
+
+TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) {
+  SetUpSmallFileSystemTree();
+
+  FileSelector select;
+  select.recursive = true;
+
+  std::vector<FileInfo> infos;
+  // Root dir
+  select.base_dir = "";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 14);
+  ASSERT_EQ(infos, SortedInfos(infos));
+  AssertInfoAllContainersRecursive(infos);
+
+  // Empty container
+  select.base_dir = "empty-container";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 0);
+
+  // Non-empty container
+  select.base_dir = "container";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos, SortedInfos(infos));
+  ASSERT_EQ(infos.size(), 10);
+  AssertFileInfo(infos[0], "container/emptydir", FileType::Directory);
+  AssertFileInfo(infos[1], "container/otherdir", FileType::Directory);
+  AssertFileInfo(infos[2], "container/otherdir/1", FileType::Directory);
+  AssertFileInfo(infos[3], "container/otherdir/1/2", FileType::Directory);
+  AssertFileInfo(infos[4], "container/otherdir/1/2/3", FileType::Directory);
+  AssertFileInfo(infos[5], "container/otherdir/1/2/3/otherfile", FileType::File, 10);
+  AssertFileInfo(infos[6], "container/somedir", FileType::Directory);
+  AssertFileInfo(infos[7], "container/somedir/subdir", FileType::Directory);
+  AssertFileInfo(infos[8], "container/somedir/subdir/subfile", FileType::File, 8);
+  AssertFileInfo(infos[9], "container/somefile", FileType::File, 9);
+
+  // Empty "directory"
+  select.base_dir = "container/emptydir";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 0);
+
+  // Non-empty "directories"
+  select.base_dir = "container/somedir";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos, SortedInfos(infos));
+  ASSERT_EQ(infos.size(), 2);
+  AssertFileInfo(infos[0], "container/somedir/subdir", FileType::Directory);
+  AssertFileInfo(infos[1], "container/somedir/subdir/subfile", FileType::File, 8);
+
+  select.base_dir = "container/otherdir";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos, SortedInfos(infos));
+  ASSERT_EQ(infos.size(), 4);
+  AssertFileInfo(infos[0], "container/otherdir/1", FileType::Directory);
+  AssertFileInfo(infos[1], "container/otherdir/1/2", FileType::Directory);
+  AssertFileInfo(infos[2], "container/otherdir/1/2/3", FileType::Directory);
+  AssertFileInfo(infos[3], "container/otherdir/1/2/3/otherfile", FileType::File, 10);
+}
+
+TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorExplicitImplicitDirDedup) {
+  {
+    auto container = CreateContainer("container");
+    CreateBlob(container, "mydir/emptydir1/");
+    CreateBlob(container, "mydir/emptydir2/");
+    CreateBlob(container, "mydir/nonemptydir1/");  // explicit dir marker
+    CreateBlob(container, "mydir/nonemptydir1/somefile", kSomeData);
+    CreateBlob(container, "mydir/nonemptydir2/somefile", kSomeData);
+  }
+  std::vector<FileInfo> infos;
+
+  FileSelector select;  // non-recursive
+  select.base_dir = "container";
+
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 1);
+  ASSERT_EQ(infos, SortedInfos(infos));
+  AssertFileInfo(infos[0], "container/mydir", FileType::Directory);
+
+  select.base_dir = "container/mydir";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 4);
+  ASSERT_EQ(infos, SortedInfos(infos));
+  AssertFileInfo(infos[0], "container/mydir/emptydir1", FileType::Directory);
+  AssertFileInfo(infos[1], "container/mydir/emptydir2", FileType::Directory);
+  AssertFileInfo(infos[2], "container/mydir/nonemptydir1", FileType::Directory);
+  AssertFileInfo(infos[3], "container/mydir/nonemptydir2", FileType::Directory);
+
+  select.base_dir = "container/mydir/emptydir1";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 0);
+
+  select.base_dir = "container/mydir/emptydir2";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 0);
+
+  select.base_dir = "container/mydir/nonemptydir1";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 1);
+  AssertFileInfo(infos[0], "container/mydir/nonemptydir1/somefile", FileType::File);
+
+  select.base_dir = "container/mydir/nonemptydir2";
+  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_EQ(infos.size(), 1);
+  AssertFileInfo(infos[0], "container/mydir/nonemptydir2/somefile", FileType::File);
+}
+
 TEST_F(AzuriteFileSystemTest, CreateDirFailureNoContainer) {
   ASSERT_RAISES(Invalid, fs_->CreateDir("", false));
 }
diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc
index 9ecc4610f3864..810e9c179b156 100644
--- a/cpp/src/arrow/filesystem/filesystem.cc
+++ b/cpp/src/arrow/filesystem/filesystem.cc
@@ -654,8 +654,7 @@ Status CopyFiles(const std::shared_ptr<FileSystem>& source_fs,
                              "', which is outside base dir '", source_sel.base_dir, "'");
     }
 
-    auto destination_path =
-        internal::ConcatAbstractPath(destination_base_dir, std::string(*relative));
+    auto destination_path = internal::ConcatAbstractPath(destination_base_dir, *relative);
 
     if (source_info.IsDirectory()) {
       dirs.push_back(destination_path);
diff --git a/cpp/src/arrow/filesystem/path_util.cc b/cpp/src/arrow/filesystem/path_util.cc
index 46ea436a9f31a..9c895ae76c7b8 100644
--- a/cpp/src/arrow/filesystem/path_util.cc
+++ b/cpp/src/arrow/filesystem/path_util.cc
@@ -52,7 +52,7 @@ std::vector<std::string> SplitAbstractPath(const std::string& path, char sep) {
   }
 
   auto append_part = [&parts, &v](size_t start, size_t end) {
-    parts.push_back(std::string(v.substr(start, end - start)));
+    parts.emplace_back(v.substr(start, end - start));
   };
 
   size_t start = 0;
@@ -72,15 +72,12 @@ std::string SliceAbstractPath(const std::string& s, int offset, int length, char
     return "";
   }
   std::vector<std::string> components = SplitAbstractPath(s, sep);
-  std::stringstream combined;
   if (offset >= static_cast<int>(components.size())) {
     return "";
   }
-  int end = offset + length;
-  if (end > static_cast<int>(components.size())) {
-    end = static_cast<int>(components.size());
-  }
-  for (int i = offset; i < end; i++) {
+  const auto end = std::min(static_cast<size_t>(offset) + length, components.size());
+  std::stringstream combined;
+  for (auto i = static_cast<size_t>(offset); i < end; i++) {
     combined << components[i];
     if (i < end - 1) {
       combined << sep;
@@ -140,16 +137,20 @@ Status ValidateAbstractPathParts(const std::vector<std::string>& parts) {
   return Status::OK();
 }
 
-std::string ConcatAbstractPath(const std::string& base, const std::string& stem) {
+std::string ConcatAbstractPath(std::string_view base, std::string_view stem) {
   DCHECK(!stem.empty());
   if (base.empty()) {
-    return stem;
+    return std::string{stem};
   }
-  return EnsureTrailingSlash(base) + std::string(RemoveLeadingSlash(stem));
+  std::string result;
+  result.reserve(base.length() + stem.length() + 1);  // extra 1 is for potential kSep
+  result += EnsureTrailingSlash(base);
+  result += RemoveLeadingSlash(stem);
+  return result;
 }
 
 std::string EnsureTrailingSlash(std::string_view v) {
-  if (v.length() > 0 && v.back() != kSep) {
+  if (!v.empty() && !HasTrailingSlash(v)) {
     // XXX How about "C:" on Windows?  We probably don't want to turn it into "C:/"...
     // Unless the local filesystem always uses absolute paths
     return std::string(v) + kSep;
@@ -159,7 +160,7 @@ std::string EnsureTrailingSlash(std::string_view v) {
 }
 
 std::string EnsureLeadingSlash(std::string_view v) {
-  if (v.length() == 0 || v.front() != kSep) {
+  if (!HasLeadingSlash(v)) {
     // XXX How about "C:" on Windows?  We probably don't want to turn it into "/C:"...
     return kSep + std::string(v);
   } else {
@@ -197,10 +198,6 @@ Status AssertNoTrailingSlash(std::string_view key) {
   return Status::OK();
 }
 
-bool HasTrailingSlash(std::string_view key) { return key.back() == '/'; }
-
-bool HasLeadingSlash(std::string_view key) { return key.front() == '/'; }
-
 Result<std::string> MakeAbstractPathRelative(const std::string& base,
                                              const std::string& path) {
   if (base.empty() || base.front() != kSep) {
@@ -383,7 +380,7 @@ struct Globber::Impl {
 
 Globber::Globber(std::string pattern) : impl_(new Impl(pattern)) {}
 
-Globber::~Globber() {}
+Globber::~Globber() = default;
 
 bool Globber::Matches(const std::string& path) {
   return regex_match(path, impl_->pattern_);
diff --git a/cpp/src/arrow/filesystem/path_util.h b/cpp/src/arrow/filesystem/path_util.h
index 2c8c123e779f4..1da7afd3f9381 100644
--- a/cpp/src/arrow/filesystem/path_util.h
+++ b/cpp/src/arrow/filesystem/path_util.h
@@ -69,7 +69,7 @@ Status ValidateAbstractPathParts(const std::vector<std::string>& parts);
 
 // Append a non-empty stem to an abstract path.
 ARROW_EXPORT
-std::string ConcatAbstractPath(const std::string& base, const std::string& stem);
+std::string ConcatAbstractPath(std::string_view base, std::string_view stem);
 
 // Make path relative to base, if it starts with base.  Otherwise error out.
 ARROW_EXPORT
@@ -94,11 +94,13 @@ std::string_view RemoveTrailingSlash(std::string_view s, bool preserve_root = fa
 ARROW_EXPORT
 Status AssertNoTrailingSlash(std::string_view s);
 
-ARROW_EXPORT
-bool HasTrailingSlash(std::string_view s);
+inline bool HasTrailingSlash(std::string_view s) {
+  return !s.empty() && s.back() == kSep;
+}
 
-ARROW_EXPORT
-bool HasLeadingSlash(std::string_view s);
+inline bool HasLeadingSlash(std::string_view s) {
+  return !s.empty() && s.front() == kSep;
+}
 
 ARROW_EXPORT
 bool IsAncestorOf(std::string_view ancestor, std::string_view descendant);
diff --git a/cpp/src/arrow/filesystem/test_util.cc b/cpp/src/arrow/filesystem/test_util.cc
index 6c5dda8e659df..040917dcd218a 100644
--- a/cpp/src/arrow/filesystem/test_util.cc
+++ b/cpp/src/arrow/filesystem/test_util.cc
@@ -126,6 +126,12 @@ void SortInfos(std::vector<FileInfo>* infos) {
   std::sort(infos->begin(), infos->end(), FileInfo::ByPath{});
 }
 
+std::vector<FileInfo> SortedInfos(const std::vector<FileInfo>& infos) {
+  auto sorted = infos;
+  SortInfos(&sorted);
+  return sorted;
+}
+
 void CollectFileInfoGenerator(FileInfoGenerator gen, FileInfoVector* out_infos) {
   auto fut = CollectAsyncGenerator(gen);
   ASSERT_FINISHES_OK_AND_ASSIGN(auto nested_infos, fut);
diff --git a/cpp/src/arrow/filesystem/test_util.h b/cpp/src/arrow/filesystem/test_util.h
index c4d846fd31b34..62b488e159a24 100644
--- a/cpp/src/arrow/filesystem/test_util.h
+++ b/cpp/src/arrow/filesystem/test_util.h
@@ -74,6 +74,10 @@ void CreateFile(FileSystem* fs, const std::string& path, const std::string& data
 ARROW_TESTING_EXPORT
 void SortInfos(FileInfoVector* infos);
 
+// Create a copy of a FileInfo vector sorted by lexicographic path order
+ARROW_TESTING_EXPORT
+FileInfoVector SortedInfos(const FileInfoVector& infos);
+
 ARROW_TESTING_EXPORT
 void CollectFileInfoGenerator(FileInfoGenerator gen, FileInfoVector* out_infos);
 

From b75755a2c06419abda8859e56f3bcc64f148d681 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Fri, 8 Dec 2023 20:04:03 +0100
Subject: [PATCH 006/570] GH-38479: [C++] Avoid passing null pointer to LZ4
 frame decompressor (#39125)

### Rationale for this change

Avoid undefined behavior in LZ4 when adding an offset to a null pointer.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #38479
---
 cpp/src/arrow/io/compressed.cc        | 4 +++-
 cpp/src/arrow/util/compression_lz4.cc | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/io/compressed.cc b/cpp/src/arrow/io/compressed.cc
index 72977f0f297f5..6c484242a4fc8 100644
--- a/cpp/src/arrow/io/compressed.cc
+++ b/cpp/src/arrow/io/compressed.cc
@@ -279,6 +279,8 @@ class CompressedInputStream::Impl {
   // Decompress some data from the compressed_ buffer.
   // Call this function only if the decompressed_ buffer is empty.
   Status DecompressData() {
+    DCHECK_NE(compressed_->data(), nullptr);
+
     int64_t decompress_size = kDecompressSize;
 
     while (true) {
@@ -329,7 +331,7 @@ class CompressedInputStream::Impl {
   // Try to feed more data into the decompressed_ buffer.
   Status RefillDecompressed(bool* has_data) {
     // First try to read data from the decompressor
-    if (compressed_) {
+    if (compressed_ && compressed_->size() != 0) {
       if (decompressor_->IsFinished()) {
         // We just went over the end of a previous compressed stream.
         RETURN_NOT_OK(decompressor_->Reset());
diff --git a/cpp/src/arrow/util/compression_lz4.cc b/cpp/src/arrow/util/compression_lz4.cc
index 17e013c13ee0b..be957afab3c46 100644
--- a/cpp/src/arrow/util/compression_lz4.cc
+++ b/cpp/src/arrow/util/compression_lz4.cc
@@ -109,6 +109,7 @@ class LZ4Decompressor : public Decompressor {
     auto dst_capacity = static_cast<size_t>(output_len);
     size_t ret;
 
+    DCHECK_NE(src, nullptr);
     ret =
         LZ4F_decompress(ctx_, dst, &dst_capacity, src, &src_size, nullptr /* options */);
     if (LZ4F_isError(ret)) {

From 140ae018f372ee14c9ff19f3e4c2af1b1a579f49 Mon Sep 17 00:00:00 2001
From: Tim Schaub <tschaub@users.noreply.github.com>
Date: Fri, 8 Dec 2023 20:06:32 +0100
Subject: [PATCH 007/570] GH-38506: [Go][Parquet] Add NumRows and
 RowGroupNumRows to pqarrow.FileWriter (#38507)

### Rationale for this change

When using a chunked column reader to read from one Parquet file and a chunked column writer to write to another Parquet file, it can be useful to keep track of the number of rows written.

### What changes are included in this PR?

This branch adds a new `RowGroupNumRows` method to the `pqarrow.FileWriter`.  This is somewhat similar to the existing `RowGroupTotalBytesWritten` function.

### Are these changes tested?

A new `file_writer_test.go` file is added that adds a test for the new method.

### Are there any user-facing changes?

The new method is exported and documented.

* Closes: #38506

Authored-by: Tim Schaub <tim@planet.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/parquet/pqarrow/file_writer.go      | 17 +++++
 go/parquet/pqarrow/file_writer_test.go | 89 ++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 go/parquet/pqarrow/file_writer_test.go

diff --git a/go/parquet/pqarrow/file_writer.go b/go/parquet/pqarrow/file_writer.go
index bc484ba243f87..1164cd690c399 100644
--- a/go/parquet/pqarrow/file_writer.go
+++ b/go/parquet/pqarrow/file_writer.go
@@ -134,6 +134,23 @@ func (fw *FileWriter) RowGroupTotalBytesWritten() int64 {
 	return 0
 }
 
+// RowGroupNumRows returns the number of rows written to the current row group.
+// Returns an error if they are unequal between columns that have been written so far.
+func (fw *FileWriter) RowGroupNumRows() (int, error) {
+	if fw.rgw != nil {
+		return fw.rgw.NumRows()
+	}
+	return 0, nil
+}
+
+// NumRows returns the total number of rows that have been written so far.
+func (fw *FileWriter) NumRows() int {
+	if fw.wr != nil {
+		return fw.wr.NumRows()
+	}
+	return 0
+}
+
 // WriteBuffered will either append to an existing row group or create a new one
 // based on the record length and max row group length.
 //
diff --git a/go/parquet/pqarrow/file_writer_test.go b/go/parquet/pqarrow/file_writer_test.go
new file mode 100644
index 0000000000000..0b76733a62876
--- /dev/null
+++ b/go/parquet/pqarrow/file_writer_test.go
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package pqarrow_test
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v15/parquet/pqarrow"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestFileWriterRowGroupNumRows(t *testing.T) {
+	schema := arrow.NewSchema([]arrow.Field{
+		{Name: "one", Nullable: true, Type: arrow.PrimitiveTypes.Float64},
+		{Name: "two", Nullable: true, Type: arrow.PrimitiveTypes.Float64},
+	}, nil)
+
+	data := `[
+		{"one": 1, "two": 2},
+		{"one": 1, "two": null},
+		{"one": null, "two": 2},
+		{"one": null, "two": null}
+	]`
+	record, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, strings.NewReader(data))
+	require.NoError(t, err)
+
+	output := &bytes.Buffer{}
+	writerProps := parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(100))
+	writer, err := pqarrow.NewFileWriter(schema, output, writerProps, pqarrow.DefaultWriterProps())
+	require.NoError(t, err)
+
+	require.NoError(t, writer.Write(record))
+	numRows, err := writer.RowGroupNumRows()
+	require.NoError(t, err)
+	assert.Equal(t, 4, numRows)
+	require.NoError(t, writer.Close())
+}
+
+func TestFileWriterNumRows(t *testing.T) {
+	schema := arrow.NewSchema([]arrow.Field{
+		{Name: "one", Nullable: true, Type: arrow.PrimitiveTypes.Float64},
+		{Name: "two", Nullable: true, Type: arrow.PrimitiveTypes.Float64},
+	}, nil)
+
+	data := `[
+		{"one": 1, "two": 2},
+		{"one": 1, "two": null},
+		{"one": null, "two": 2},
+		{"one": null, "two": null}
+	]`
+	record, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, strings.NewReader(data))
+	require.NoError(t, err)
+
+	maxRowGroupLength := 2
+
+	output := &bytes.Buffer{}
+	writerProps := parquet.NewWriterProperties(parquet.WithMaxRowGroupLength(int64(maxRowGroupLength)))
+	writer, err := pqarrow.NewFileWriter(schema, output, writerProps, pqarrow.DefaultWriterProps())
+	require.NoError(t, err)
+
+	require.NoError(t, writer.Write(record))
+	rowGroupNumRows, err := writer.RowGroupNumRows()
+	require.NoError(t, err)
+	assert.Equal(t, maxRowGroupLength, rowGroupNumRows)
+
+	require.NoError(t, writer.Close())
+	assert.Equal(t, 4, writer.NumRows())
+}

From 4b1f06327f05341b6e51293b3186d80cd5fdbf87 Mon Sep 17 00:00:00 2001
From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com>
Date: Fri, 8 Dec 2023 14:41:33 -0500
Subject: [PATCH 008/570] MINOR: [Java] Bump ch.qos.logback:logback-classic
 from 1.2.13 to 1.3.14 in /java (#39145)

### Rationale for this change

Raised by dependabot, but dependabot didn't upgrade to the correct version for Arrow Java and did not upgrade the dependency.

### What changes are included in this PR?

* logback 1.2.13 -> 1.3.14
* slf4j 1.7.25 -> 2.0.7 (required by logback 1.3.14)

### Are these changes tested?

CI

### Are there any user-facing changes?

No

Authored-by: Dane Pitkin <dane@voltrondata.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/memory/memory-netty/pom.xml | 2 +-
 java/pom.xml                     | 6 +++---
 java/tools/pom.xml               | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml
index 88a0436eb7175..e625cbeabc65a 100644
--- a/java/memory/memory-netty/pom.xml
+++ b/java/memory/memory-netty/pom.xml
@@ -41,7 +41,7 @@
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-core</artifactId>
-      <version>1.2.13</version>
+      <version>1.3.14</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/java/pom.xml b/java/pom.xml
index cd16b862d10c0..cd26e79d47f3d 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -31,7 +31,7 @@
     <target.gen.source.path>${project.build.directory}/generated-sources</target.gen.source.path>
     <dep.junit.platform.version>1.9.0</dep.junit.platform.version>
     <dep.junit.jupiter.version>5.10.1</dep.junit.jupiter.version>
-    <dep.slf4j.version>1.7.25</dep.slf4j.version>
+    <dep.slf4j.version>2.0.7</dep.slf4j.version>
     <dep.guava-bom.version>32.1.3-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.100.Final</dep.netty-bom.version>
     <dep.grpc-bom.version>1.59.0</dep.grpc-bom.version>
@@ -308,7 +308,7 @@
           <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>jcl-over-slf4j</artifactId>
-            <version>1.7.5</version>
+            <version>${dep.slf4j.version}</version>
           </dependency>
         </dependencies>
         <executions>
@@ -698,7 +698,7 @@
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-classic</artifactId>
-      <version>1.2.13</version>
+      <version>1.3.14</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 1815c39227de9..8ea98a84b4ad1 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -47,7 +47,7 @@
         <dependency>
           <groupId>ch.qos.logback</groupId>
           <artifactId>logback-classic</artifactId>
-          <version>1.2.13</version>
+          <version>1.3.14</version>
           <scope>runtime</scope>
         </dependency>
         <dependency>

From ad63158e74d903c263c51cd0207cf77f8aa12ede Mon Sep 17 00:00:00 2001
From: abandy <abandy@live.com>
Date: Fri, 8 Dec 2023 14:50:22 -0700
Subject: [PATCH 009/570] GH-37884: [Swift] allow reading of unaligned
 FlatBuffers buffers (#38635)

The PR enables the swift readers to read from unaligned buffers (fix for issue: 37884)

Enabling unaligned buffers incurs a performance penalty so the developer will need to consider this when enabling this feature.

It is not currently possible to recover from a buffer unaligned error as this error is a fatalError so trying aligned and then falling back to unaligned is not an option.  Also, FlatBuffers has a verifier that should be able to catch this error but currently it seems to fail on both aligned and unaligned buffers (I tried verifying the example python server get return value but verification fails even though the buffers are able to be read successfully)
* Closes: #37884

Authored-by: Alva Bandy <abandy@live.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 swift/Arrow/Package.swift                      |  6 +++++-
 swift/Arrow/Sources/Arrow/ArrowReader.swift    | 18 +++++++++++++-----
 .../Sources/ArrowFlight/FlightClient.swift     | 11 +++++++++--
 .../Sources/ArrowFlight/FlightServer.swift     |  7 +++++++
 .../ArrowFlight/RecordBatchStreamReader.swift  | 11 +++++++++--
 5 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/swift/Arrow/Package.swift b/swift/Arrow/Package.swift
index 065afe62640ea..946eb999c798a 100644
--- a/swift/Arrow/Package.swift
+++ b/swift/Arrow/Package.swift
@@ -32,7 +32,11 @@ let package = Package(
             targets: ["Arrow"]),
     ],
     dependencies: [
-        .package(url: "https://github.com/google/flatbuffers.git", from: "23.3.3")
+        // The latest version of flatbuffers v23.5.26 was built in May 26, 2023
+        // and therefore doesn't include the unaligned buffer swift changes.
+        // This can be changed back to using the tag once a new version of
+        // flatbuffers has been released.
+        .package(url: "https://github.com/google/flatbuffers.git", branch: "master")
     ],
     targets: [
         // Targets are the basic building blocks of a package. A target can define a module or a test suite.
diff --git a/swift/Arrow/Sources/Arrow/ArrowReader.swift b/swift/Arrow/Sources/Arrow/ArrowReader.swift
index ef995b18052a8..d9dc1bdb470e6 100644
--- a/swift/Arrow/Sources/Arrow/ArrowReader.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowReader.swift
@@ -132,7 +132,8 @@ public class ArrowReader {
     }
 
     public func fromStream( // swiftlint:disable:this function_body_length
-        _ fileData: Data
+        _ fileData: Data,
+        useUnalignedBuffers: Bool = false
     ) -> Result<ArrowReaderResult, ArrowError> {
         let footerLength = fileData.withUnsafeBytes { rawBuffer in
             rawBuffer.loadUnaligned(fromByteOffset: fileData.count - 4, as: Int32.self)
@@ -141,7 +142,9 @@ public class ArrowReader {
         let result = ArrowReaderResult()
         let footerStartOffset = fileData.count - Int(footerLength + 4)
         let footerData = fileData[footerStartOffset...]
-        let footerBuffer = ByteBuffer(data: footerData)
+        let footerBuffer = ByteBuffer(
+            data: footerData,
+            allowReadingUnalignedBuffers: useUnalignedBuffers)
         let footer = org_apache_arrow_flatbuf_Footer.getRootAsFooter(bb: footerBuffer)
         let schemaResult = loadSchema(footer.schema!)
         switch schemaResult {
@@ -170,7 +173,9 @@ public class ArrowReader {
             let messageStartOffset = recordBatch.offset + (Int64(MemoryLayout<Int32>.size) * messageOffset)
             let messageEndOffset = messageStartOffset + Int64(messageLength)
             let recordBatchData = fileData[messageStartOffset ..< messageEndOffset]
-            let mbb = ByteBuffer(data: recordBatchData)
+            let mbb = ByteBuffer(
+                data: recordBatchData,
+                allowReadingUnalignedBuffers: useUnalignedBuffers)
             let message = org_apache_arrow_flatbuf_Message.getRootAsMessage(bb: mbb)
             switch message.headerType {
             case .recordbatch:
@@ -219,9 +224,12 @@ public class ArrowReader {
     public func fromMessage(
         _ dataHeader: Data,
         dataBody: Data,
-        result: ArrowReaderResult
+        result: ArrowReaderResult,
+        useUnalignedBuffers: Bool = false
     ) -> Result<Void, ArrowError> {
-        let mbb = ByteBuffer(data: dataHeader)
+        let mbb = ByteBuffer(
+            data: dataHeader,
+            allowReadingUnalignedBuffers: useUnalignedBuffers)
         let message = org_apache_arrow_flatbuf_Message.getRootAsMessage(bb: mbb)
         switch message.headerType {
         case .schema:
diff --git a/swift/ArrowFlight/Sources/ArrowFlight/FlightClient.swift b/swift/ArrowFlight/Sources/ArrowFlight/FlightClient.swift
index 7a572ceca5bd6..ef3e4fa239e84 100644
--- a/swift/ArrowFlight/Sources/ArrowFlight/FlightClient.swift
+++ b/swift/ArrowFlight/Sources/ArrowFlight/FlightClient.swift
@@ -24,8 +24,11 @@ import Arrow
 
 public class FlightClient {
     let client: Arrow_Flight_Protocol_FlightServiceAsyncClient
-    public init(channel: GRPCChannel) {
+    let allowReadingUnalignedBuffers: Bool
+
+    public init(channel: GRPCChannel, allowReadingUnalignedBuffers: Bool = false ) {
         client = Arrow_Flight_Protocol_FlightServiceAsyncClient(channel: channel)
+        self.allowReadingUnalignedBuffers = allowReadingUnalignedBuffers
     }
 
     private func readMessages(
@@ -34,7 +37,11 @@ public class FlightClient {
         let reader = ArrowReader()
         let arrowResult = ArrowReader.makeArrowReaderResult()
         for try await data in responseStream {
-            switch reader.fromMessage(data.dataHeader, dataBody: data.dataBody, result: arrowResult) {
+            switch reader.fromMessage(
+                data.dataHeader,
+                dataBody: data.dataBody,
+                result: arrowResult,
+                useUnalignedBuffers: allowReadingUnalignedBuffers) {
             case .success:
                 continue
             case .failure(let error):
diff --git a/swift/ArrowFlight/Sources/ArrowFlight/FlightServer.swift b/swift/ArrowFlight/Sources/ArrowFlight/FlightServer.swift
index a34bf5c0acee9..19644d632e997 100644
--- a/swift/ArrowFlight/Sources/ArrowFlight/FlightServer.swift
+++ b/swift/ArrowFlight/Sources/ArrowFlight/FlightServer.swift
@@ -63,6 +63,7 @@ public func schemaFromMessage(_ schemaData: Data) -> ArrowSchema? {
 }
 
 public protocol ArrowFlightServer: Sendable {
+    var allowReadingUnalignedBuffers: Bool { get }
     func listFlights(_ criteria: FlightCriteria, writer: FlightInfoStreamWriter) async throws
     func getFlightInfo(_ request: FlightDescriptor) async throws -> FlightInfo
     func getSchema(_ request: FlightDescriptor) async throws -> ArrowFlight.FlightSchemaResult
@@ -73,6 +74,12 @@ public protocol ArrowFlightServer: Sendable {
     func doExchange(_ reader: RecordBatchStreamReader, writer: RecordBatchStreamWriter) async throws
 }
 
+extension ArrowFlightServer {
+    var allowReadingUnalignedBuffers: Bool {
+        return false
+    }
+}
+
 public func makeFlightServer(_ handler: ArrowFlightServer) -> CallHandlerProvider {
   return InternalFlightServer(handler)
 }
diff --git a/swift/ArrowFlight/Sources/ArrowFlight/RecordBatchStreamReader.swift b/swift/ArrowFlight/Sources/ArrowFlight/RecordBatchStreamReader.swift
index 972d19435ddfc..464752dbcbeea 100644
--- a/swift/ArrowFlight/Sources/ArrowFlight/RecordBatchStreamReader.swift
+++ b/swift/ArrowFlight/Sources/ArrowFlight/RecordBatchStreamReader.swift
@@ -27,10 +27,13 @@ public class RecordBatchStreamReader: AsyncSequence, AsyncIteratorProtocol {
     var descriptor: FlightDescriptor?
     var batchIndex = 0
     var streamIterator: any AsyncIteratorProtocol
+    var useUnalignedBuffers: Bool
     let stream: GRPC.GRPCAsyncRequestStream<Arrow_Flight_Protocol_FlightData>
-    init(_ stream: GRPC.GRPCAsyncRequestStream<Arrow_Flight_Protocol_FlightData>) {
+    init(_ stream: GRPC.GRPCAsyncRequestStream<Arrow_Flight_Protocol_FlightData>,
+         useUnalignedBuffers: Bool = false) {
         self.stream = stream
         self.streamIterator = self.stream.makeAsyncIterator()
+        self.useUnalignedBuffers = useUnalignedBuffers
     }
 
     public func next() async throws -> (Arrow.RecordBatch?, FlightDescriptor?)? {
@@ -55,7 +58,11 @@ public class RecordBatchStreamReader: AsyncSequence, AsyncIteratorProtocol {
             let dataBody = flightData.dataBody
             let dataHeader = flightData.dataHeader
             descriptor = FlightDescriptor(flightData.flightDescriptor)
-            switch reader.fromMessage(dataHeader, dataBody: dataBody, result: result) {
+            switch reader.fromMessage(
+                dataHeader,
+                dataBody: dataBody,
+                result: result,
+                useUnalignedBuffers: useUnalignedBuffers) {
             case .success(()):
                 if result.batches.count > 0 {
                     batches = result.batches

From 8a644afc77ebe6333114e503cab29f9b0969618a Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 9 Dec 2023 07:01:31 +0900
Subject: [PATCH 010/570] GH-39136: [C++] Remove needless system Protobuf
 dependency with -DARROW_HDFS=ON (#39137)

### Rationale for this change

Our HDFS related codes don't depend on Protobuf because we process HDFS via external `libhdfs.so` and it's `dlopen()`-ed.

### What changes are included in this PR?

Remove a needless CMake configuration.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39136

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/CMakeLists.txt | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index bcb298407bd8b..9f17350b2505a 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -873,9 +873,6 @@ add_dependencies(arrow_test_dependencies toolchain-tests)
 if(ARROW_STATIC_LINK_LIBS)
   add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS})
   if(ARROW_HDFS OR ARROW_ORC)
-    if(Protobuf_SOURCE STREQUAL "SYSTEM")
-      list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF})
-    endif()
     if(NOT MSVC_TOOLCHAIN)
       list(APPEND ARROW_STATIC_LINK_LIBS ${CMAKE_DL_LIBS})
       list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS})

From 31d2afc28a201bda78da8b0229e823413ff82e0d Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <pitrou@free.fr>
Date: Sat, 9 Dec 2023 16:39:51 +0100
Subject: [PATCH 011/570] GH-39126: [C++][CI] Fix Valgrind failures (#39127)

### Rationale for this change

### What changes are included in this PR?

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #39126

Lead-authored-by: Antoine Pitrou <pitrou@free.fr>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/array/array_dict_test.cc |  2 +-
 cpp/src/arrow/array/array_test.cc      |  1 +
 cpp/src/arrow/array/builder_binary.cc  |  9 ++++----
 cpp/src/arrow/array/builder_binary.h   | 31 +++++++++++++++++---------
 4 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc
index 2f3ee6e2d49a5..a4c03b5db6371 100644
--- a/cpp/src/arrow/array/array_dict_test.cc
+++ b/cpp/src/arrow/array/array_dict_test.cc
@@ -1129,7 +1129,7 @@ TEST(TestDictionary, Validate) {
   arr = std::make_shared<DictionaryArray>(dict_type, indices, MakeArray(invalid_data));
   ASSERT_RAISES(Invalid, arr->ValidateFull());
 
-#if !defined(__APPLE__)
+#if !defined(__APPLE__) && !defined(ARROW_VALGRIND)
   // GH-35712: ASSERT_DEATH would make testing slow on MacOS.
   ASSERT_DEATH(
       {
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 974eb54d2caca..e9d478f108584 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -728,6 +728,7 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
   }
 
   for (auto scalar : scalars) {
+    ARROW_SCOPED_TRACE("scalar type: ", scalar->type->ToString());
     AssertAppendScalar(pool_, scalar);
   }
 }
diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc
index 3ff22d4a3feeb..f85852fa0eda6 100644
--- a/cpp/src/arrow/array/builder_binary.cc
+++ b/cpp/src/arrow/array/builder_binary.cc
@@ -80,10 +80,11 @@ Status BinaryViewBuilder::AppendArraySlice(const ArraySpan& array, int64_t offse
 Status BinaryViewBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
   ARROW_ASSIGN_OR_RAISE(auto null_bitmap, null_bitmap_builder_.FinishWithLength(length_));
   ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
-  BufferVector buffers = {null_bitmap, data};
-  for (auto&& buffer : data_heap_builder_.Finish()) {
-    buffers.push_back(std::move(buffer));
-  }
+  ARROW_ASSIGN_OR_RAISE(auto byte_buffers, data_heap_builder_.Finish());
+  BufferVector buffers(byte_buffers.size() + 2);
+  buffers[0] = std::move(null_bitmap);
+  buffers[1] = std::move(data);
+  std::move(byte_buffers.begin(), byte_buffers.end(), buffers.begin() + 2);
   *out = ArrayData::Make(type(), length_, std::move(buffers), null_count_);
   Reset();
   return Status::OK();
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index 3e87cf2403610..d825f7d32520a 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -524,16 +524,11 @@ class ARROW_EXPORT StringHeapBuilder {
           "strings larger than 2GB");
     }
     if (num_bytes > current_remaining_bytes_) {
-      // Ensure the buffer is fully overwritten to avoid leaking uninitialized
-      // bytes from the allocator
-      if (current_remaining_bytes_ > 0) {
-        std::memset(current_out_buffer_, 0, current_remaining_bytes_);
-        blocks_.back() = SliceBuffer(blocks_.back(), 0,
-                                     blocks_.back()->size() - current_remaining_bytes_);
-      }
+      ARROW_RETURN_NOT_OK(FinishLastBlock());
       current_remaining_bytes_ = num_bytes > blocksize_ ? num_bytes : blocksize_;
-      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> new_block,
-                            AllocateBuffer(current_remaining_bytes_, alignment_, pool_));
+      ARROW_ASSIGN_OR_RAISE(
+          std::shared_ptr<ResizableBuffer> new_block,
+          AllocateResizableBuffer(current_remaining_bytes_, alignment_, pool_));
       current_offset_ = 0;
       current_out_buffer_ = new_block->mutable_data();
       blocks_.emplace_back(std::move(new_block));
@@ -550,7 +545,10 @@ class ARROW_EXPORT StringHeapBuilder {
 
   int64_t current_remaining_bytes() const { return current_remaining_bytes_; }
 
-  std::vector<std::shared_ptr<Buffer>> Finish() {
+  Result<std::vector<std::shared_ptr<ResizableBuffer>>> Finish() {
+    if (!blocks_.empty()) {
+      ARROW_RETURN_NOT_OK(FinishLastBlock());
+    }
     current_offset_ = 0;
     current_out_buffer_ = NULLPTR;
     current_remaining_bytes_ = 0;
@@ -558,10 +556,21 @@ class ARROW_EXPORT StringHeapBuilder {
   }
 
  private:
+  Status FinishLastBlock() {
+    if (current_remaining_bytes_ > 0) {
+      // Avoid leaking uninitialized bytes from the allocator
+      ARROW_RETURN_NOT_OK(
+          blocks_.back()->Resize(blocks_.back()->size() - current_remaining_bytes_,
+                                 /*shrink_to_fit=*/true));
+      blocks_.back()->ZeroPadding();
+    }
+    return Status::OK();
+  }
+
   MemoryPool* pool_;
   int64_t alignment_;
   int64_t blocksize_ = kDefaultBlocksize;
-  std::vector<std::shared_ptr<Buffer>> blocks_;
+  std::vector<std::shared_ptr<ResizableBuffer>> blocks_;
 
   int32_t current_offset_ = 0;
   uint8_t* current_out_buffer_ = NULLPTR;

From 20c975d03f8db85a0a3adea2e384b2291fb56da3 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <pitrou@free.fr>
Date: Sat, 9 Dec 2023 17:22:51 +0100
Subject: [PATCH 012/570] GH-39122: [C++][Parquet] Optimize FLBA record reader
 (#39124)

### Rationale for this change

The FLBA implementation of RecordReader is suboptimal:
* it doesn't preallocate the output array
* it reads the decoded validity bitmap one bit at a time and recreates it, one bit at a time

### What changes are included in this PR?

Optimize the FLBA implementation of RecordReader so as to avoid the aforementioned inefficiencies.
I did a quick-and-dirty benchmark on a Parquet file with two columns:
* column 1: uncompressed, PLAIN-encoded, FLBA<3> with no nulls
* column 2: uncompressed, PLAIN-encoded, FLBA<3> with 25% nulls

With git main, the file can be read at 465 MB/s. With this PR, the file can be read at 700 MB/s.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39122

Lead-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/column_reader.cc | 70 +++++++++++++++++++++++---------
 1 file changed, 50 insertions(+), 20 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index ecc48811e46fc..a49e58afbdb83 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -30,6 +30,7 @@
 #include <vector>
 
 #include "arrow/array.h"
+#include "arrow/array/array_binary.h"
 #include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_dict.h"
 #include "arrow/array/builder_primitive.h"
@@ -2040,23 +2041,29 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
   LevelInfo leaf_info_;
 };
 
-class FLBARecordReader : public TypedRecordReader<FLBAType>,
-                         virtual public BinaryRecordReader {
+class FLBARecordReader final : public TypedRecordReader<FLBAType>,
+                               virtual public BinaryRecordReader {
  public:
   FLBARecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info,
                    ::arrow::MemoryPool* pool, bool read_dense_for_nullable)
       : TypedRecordReader<FLBAType>(descr, leaf_info, pool, read_dense_for_nullable),
-        builder_(nullptr) {
+        byte_width_(descr_->type_length()),
+        empty_(byte_width_, 0),
+        type_(::arrow::fixed_size_binary(byte_width_)),
+        null_bitmap_builder_(pool),
+        data_builder_(pool) {
     ARROW_DCHECK_EQ(descr_->physical_type(), Type::FIXED_LEN_BYTE_ARRAY);
-    int byte_width = descr_->type_length();
-    std::shared_ptr<::arrow::DataType> type = ::arrow::fixed_size_binary(byte_width);
-    builder_ = std::make_unique<::arrow::FixedSizeBinaryBuilder>(type, this->pool_);
   }
 
   ::arrow::ArrayVector GetBuilderChunks() override {
-    std::shared_ptr<::arrow::Array> chunk;
-    PARQUET_THROW_NOT_OK(builder_->Finish(&chunk));
-    return ::arrow::ArrayVector({chunk});
+    const int64_t null_count = null_bitmap_builder_.false_count();
+    const int64_t length = null_bitmap_builder_.length();
+    ARROW_DCHECK_EQ(length * byte_width_, data_builder_.length());
+    PARQUET_ASSIGN_OR_THROW(auto data_buffer, data_builder_.Finish());
+    PARQUET_ASSIGN_OR_THROW(auto null_bitmap, null_bitmap_builder_.Finish());
+    auto chunk = std::make_shared<::arrow::FixedSizeBinaryArray>(
+        type_, length, data_buffer, null_bitmap, null_count);
+    return ::arrow::ArrayVector({std::move(chunk)});
   }
 
   void ReadValuesDense(int64_t values_to_read) override {
@@ -2065,9 +2072,9 @@ class FLBARecordReader : public TypedRecordReader<FLBAType>,
         this->current_decoder_->Decode(values, static_cast<int>(values_to_read));
     CheckNumberDecoded(num_decoded, values_to_read);
 
-    for (int64_t i = 0; i < num_decoded; i++) {
-      PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr));
-    }
+    PARQUET_THROW_NOT_OK(null_bitmap_builder_.Reserve(num_decoded));
+    PARQUET_THROW_NOT_OK(data_builder_.Reserve(num_decoded * byte_width_));
+    UnsafeAppendDense(values, num_decoded);
     ResetValues();
   }
 
@@ -2081,22 +2088,45 @@ class FLBARecordReader : public TypedRecordReader<FLBAType>,
         valid_bits, valid_bits_offset);
     ARROW_DCHECK_EQ(num_decoded, values_to_read);
 
+    PARQUET_THROW_NOT_OK(null_bitmap_builder_.Reserve(num_decoded));
+    PARQUET_THROW_NOT_OK(data_builder_.Reserve(num_decoded * byte_width_));
+    if (null_count == 0) {
+      UnsafeAppendDense(values, num_decoded);
+    } else {
+      UnsafeAppendSpaced(values, num_decoded, valid_bits, valid_bits_offset);
+    }
+    ResetValues();
+  }
+
+  void UnsafeAppendDense(const FLBA* values, int64_t num_decoded) {
+    null_bitmap_builder_.UnsafeAppend(num_decoded, /*value=*/true);
+    for (int64_t i = 0; i < num_decoded; i++) {
+      data_builder_.UnsafeAppend(values[i].ptr, byte_width_);
+    }
+  }
+
+  void UnsafeAppendSpaced(const FLBA* values, int64_t num_decoded,
+                          const uint8_t* valid_bits, int64_t valid_bits_offset) {
+    null_bitmap_builder_.UnsafeAppend(valid_bits, valid_bits_offset, num_decoded);
     for (int64_t i = 0; i < num_decoded; i++) {
       if (::arrow::bit_util::GetBit(valid_bits, valid_bits_offset + i)) {
-        PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr));
+        data_builder_.UnsafeAppend(values[i].ptr, byte_width_);
       } else {
-        PARQUET_THROW_NOT_OK(builder_->AppendNull());
+        data_builder_.UnsafeAppend(empty_.data(), byte_width_);
       }
     }
-    ResetValues();
   }
 
  private:
-  std::unique_ptr<::arrow::FixedSizeBinaryBuilder> builder_;
+  const int byte_width_;
+  const std::vector<uint8_t> empty_;
+  std::shared_ptr<::arrow::DataType> type_;
+  ::arrow::TypedBufferBuilder<bool> null_bitmap_builder_;
+  ::arrow::BufferBuilder data_builder_;
 };
 
-class ByteArrayChunkedRecordReader : public TypedRecordReader<ByteArrayType>,
-                                     virtual public BinaryRecordReader {
+class ByteArrayChunkedRecordReader final : public TypedRecordReader<ByteArrayType>,
+                                           virtual public BinaryRecordReader {
  public:
   ByteArrayChunkedRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info,
                                ::arrow::MemoryPool* pool, bool read_dense_for_nullable)
@@ -2137,8 +2167,8 @@ class ByteArrayChunkedRecordReader : public TypedRecordReader<ByteArrayType>,
   typename EncodingTraits<ByteArrayType>::Accumulator accumulator_;
 };
 
-class ByteArrayDictionaryRecordReader : public TypedRecordReader<ByteArrayType>,
-                                        virtual public DictionaryRecordReader {
+class ByteArrayDictionaryRecordReader final : public TypedRecordReader<ByteArrayType>,
+                                              virtual public DictionaryRecordReader {
  public:
   ByteArrayDictionaryRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info,
                                   ::arrow::MemoryPool* pool, bool read_dense_for_nullable)

From 47f4d18ec5c42a9652d9f9bec18adb9cf5fb0e55 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Sat, 9 Dec 2023 21:15:20 -0300
Subject: [PATCH 013/570] GH-38702 [C++]: Implement
 AzureFileSystem::DeleteRootDirContents (#39151)

### Rationale for this change

This copies the behavior implemented by S3FileSystem.

### What changes are included in this PR?

An implementation of `DeleteRootDirContent` that prevents deletion of all blob containers.

### Are these changes tested?

N/A.
* Closes: #38702

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index daababb04c172..824a8fb531483 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -1427,7 +1427,7 @@ Status AzureFileSystem::DeleteDirContents(const std::string& path, bool missing_
 }
 
 Status AzureFileSystem::DeleteRootDirContents() {
-  return Status::NotImplemented("The Azure FileSystem is not fully implemented");
+  return Status::NotImplemented("Cannot delete all Azure Blob Storage containers");
 }
 
 Status AzureFileSystem::DeleteFile(const std::string& path) {

From cca5eec5fd853d4593dfe1b6c158e9543d32619f Mon Sep 17 00:00:00 2001
From: Jin Shang <shangjin1997@gmail.com>
Date: Sun, 10 Dec 2023 09:57:56 +0800
Subject: [PATCH 014/570] GH-39156: [C++][Compute] Fix negative duration
 division (#39158)

### Rationale for this change

I forgot to cast durations to doubles in the current `division(duration, duration)` kernel. So they were essentially `reinterpret_cast`ed  to double. Because I only tested small positive ints but not large ints or negative ints, I missed this bug.

### What changes are included in this PR?

Add a `FloatingDivide` operator that casts ints to doubles and do floating division. Replace the `division(duration, duration)` with this op.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.

* Closes: #39156

Authored-by: Jin Shang <shangjin1997@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../kernels/base_arithmetic_internal.h        | 38 +++++++++++++++++++
 .../compute/kernels/scalar_arithmetic.cc      |  7 ++--
 .../compute/kernels/scalar_temporal_test.cc   |  8 ++--
 3 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
index 7798c615777a4..d59320d270e4f 100644
--- a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
+++ b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
@@ -426,6 +426,44 @@ struct DivideChecked {
   }
 };
 
+struct FloatingDivide {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_value<Arg0> Call(KernelContext*, Arg0 left, Arg1 right,
+                                             Status*) {
+    return left / right;
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer_value<Arg0, double> Call(KernelContext* ctx, Arg0 left,
+                                                    Arg1 right, Status* st) {
+    static_assert(std::is_same<Arg0, Arg1>::value);
+    return Call<double>(ctx, static_cast<double>(left), static_cast<double>(right), st);
+  }
+
+  // TODO: Add decimal
+};
+
+struct FloatingDivideChecked {
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_floating_value<Arg0> Call(KernelContext*, Arg0 left, Arg1 right,
+                                             Status* st) {
+    static_assert(std::is_same<T, Arg0>::value && std::is_same<T, Arg1>::value);
+    if (ARROW_PREDICT_FALSE(right == 0)) {
+      *st = Status::Invalid("divide by zero");
+      return 0;
+    }
+    return left / right;
+  }
+
+  template <typename T, typename Arg0, typename Arg1>
+  static enable_if_integer_value<Arg0, double> Call(KernelContext* ctx, Arg0 left,
+                                                    Arg1 right, Status* st) {
+    static_assert(std::is_same<Arg0, Arg1>::value);
+    return Call<double>(ctx, static_cast<double>(left), static_cast<double>(right), st);
+  }
+  // TODO: Add decimal
+};
+
 struct Negate {
   template <typename T, typename Arg>
   static constexpr enable_if_floating_value<T> Call(KernelContext*, Arg arg, Status*) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index c305028be19c9..ad33d7f8951f4 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -1513,7 +1513,8 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
 
   // Add divide(duration, duration) -> float64
   for (auto unit : TimeUnit::values()) {
-    auto exec = ScalarBinaryNotNull<DoubleType, DoubleType, DoubleType, Divide>::Exec;
+    auto exec =
+        ScalarBinaryNotNull<DoubleType, Int64Type, Int64Type, FloatingDivide>::Exec;
     DCHECK_OK(
         divide->AddKernel({duration(unit), duration(unit)}, float64(), std::move(exec)));
   }
@@ -1533,8 +1534,8 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
 
   // Add divide_checked(duration, duration) -> float64
   for (auto unit : TimeUnit::values()) {
-    auto exec =
-        ScalarBinaryNotNull<DoubleType, DoubleType, DoubleType, DivideChecked>::Exec;
+    auto exec = ScalarBinaryNotNull<DoubleType, Int64Type, Int64Type,
+                                    FloatingDivideChecked>::Exec;
     DCHECK_OK(divide_checked->AddKernel({duration(unit), duration(unit)}, float64(),
                                         std::move(exec)));
   }
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
index 4c7975add0308..d8bbe5ca8a34c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
@@ -1722,12 +1722,12 @@ TEST_F(ScalarTemporalTest, TestTemporalDivideDuration) {
   }
 
   // div(duration, duration) -> float64
-  auto left = ArrayFromJSON(duration(TimeUnit::SECOND), "[1, 2, 3, 4]");
-  auto right = ArrayFromJSON(duration(TimeUnit::MILLI), "[4000, 300, 20, 1]");
+  auto left = ArrayFromJSON(duration(TimeUnit::SECOND), "[1, 2, -3, 4]");
+  auto right = ArrayFromJSON(duration(TimeUnit::MILLI), "[4000, -300, 20, 1]");
   auto expected_left_by_right =
-      ArrayFromJSON(float64(), "[0.25, 6.666666666666667, 150, 4000]");
+      ArrayFromJSON(float64(), "[0.25, -6.666666666666667, -150, 4000]");
   auto expected_right_by_left =
-      ArrayFromJSON(float64(), "[4, 0.15, 0.006666666666666667, 0.00025]");
+      ArrayFromJSON(float64(), "[4, -0.15, -0.006666666666666667, 0.00025]");
   CheckScalarBinary("divide", left, right, expected_left_by_right);
   CheckScalarBinary("divide_checked", left, right, expected_left_by_right);
   CheckScalarBinary("divide", right, left, expected_right_by_left);

From 4841cdaf9f336bdcbe31aff02ebc32e218ab84db Mon Sep 17 00:00:00 2001
From: Josh Soref <2119212+jsoref@users.noreply.github.com>
Date: Sun, 10 Dec 2023 07:16:18 -0500
Subject: [PATCH 015/570] GH-38979: [C++] Fix spelling (#38980)

### Rationale for this change

### What changes are included in this PR?

Spelling fixes to cpp/src/

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #38979

Authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/adapters/orc/adapter.h                   |  6 +++---
 cpp/src/arrow/c/bridge_test.cc                         |  4 ++--
 cpp/src/arrow/chunked_array_test.cc                    |  2 +-
 .../kernels/vector_selection_filter_internal.cc        |  2 +-
 cpp/src/arrow/csv/lexing_internal.h                    |  2 +-
 cpp/src/arrow/csv/reader.cc                            |  2 +-
 cpp/src/arrow/csv/writer_benchmark.cc                  |  2 +-
 .../engine/substrait/extended_expression_internal.cc   |  2 +-
 cpp/src/arrow/engine/substrait/extension_set.h         |  2 +-
 cpp/src/arrow/engine/substrait/options.cc              |  2 +-
 cpp/src/arrow/engine/substrait/serde_test.cc           |  8 ++++----
 cpp/src/arrow/engine/substrait/visibility.h            |  2 +-
 cpp/src/arrow/extension/fixed_shape_tensor_test.cc     |  4 ++--
 cpp/src/arrow/field_ref_test.cc                        |  2 +-
 cpp/src/arrow/integration/json_integration.h           |  2 +-
 cpp/src/arrow/io/file_benchmark.cc                     |  2 +-
 cpp/src/arrow/io/interfaces.h                          |  2 +-
 cpp/src/arrow/ipc/metadata_internal.cc                 |  2 +-
 cpp/src/arrow/ipc/read_write_test.cc                   | 10 +++++-----
 cpp/src/arrow/ipc/reader.h                             |  2 +-
 cpp/src/arrow/json/converter_test.cc                   |  2 +-
 cpp/src/arrow/json/reader.h                            |  2 +-
 cpp/src/arrow/table_test.cc                            |  6 +++---
 cpp/src/arrow/testing/util.cc                          |  2 +-
 cpp/src/arrow/type_test.cc                             |  2 +-
 cpp/src/generated/Schema_generated.h                   |  2 +-
 cpp/src/skyhook/CMakeLists.txt                         |  2 +-
 cpp/src/skyhook/protocol/rados_protocol.h              |  2 +-
 cpp/src/skyhook/protocol/skyhook_protocol.h            |  2 +-
 29 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/cpp/src/arrow/adapters/orc/adapter.h b/cpp/src/arrow/adapters/orc/adapter.h
index 013be78600a8f..4ffff81f355f1 100644
--- a/cpp/src/arrow/adapters/orc/adapter.h
+++ b/cpp/src/arrow/adapters/orc/adapter.h
@@ -138,7 +138,7 @@ class ARROW_EXPORT ORCFileReader {
   /// \brief Get a stripe level record batch iterator.
   ///
   /// Each record batch will have up to `batch_size` rows.
-  /// NextStripeReader serves as a fine grained alternative to ReadStripe
+  /// NextStripeReader serves as a fine-grained alternative to ReadStripe
   /// which may cause OOM issues by loading the whole stripe into memory.
   ///
   /// Note this will only read rows for the current stripe, not the entire
@@ -151,7 +151,7 @@ class ARROW_EXPORT ORCFileReader {
   /// \brief Get a stripe level record batch iterator.
   ///
   /// Each record batch will have up to `batch_size` rows.
-  /// NextStripeReader serves as a fine grained alternative to ReadStripe
+  /// NextStripeReader serves as a fine-grained alternative to ReadStripe
   /// which may cause OOM issues by loading the whole stripe into memory.
   ///
   /// Note this will only read rows for the current stripe, not the entire
@@ -256,7 +256,7 @@ class ARROW_EXPORT ORCFileReader {
   int64_t GetFileLength();
 
   /// \brief Get the serialized file tail.
-  ///         Usefull if another reader of the same file wants to avoid re-reading
+  ///         Useful if another reader of the same file wants to avoid re-reading
   ///         the file tail. See ReadOptions.SetSerializedFileTail().
   ///
   /// \return a string of bytes with the file tail
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 326c67f5eceac..58bbc9282c204 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -3131,7 +3131,7 @@ TEST_F(TestArrayImport, RunEndEncodedWithOffset) {
                        REEFromJSON(ree_type, "[-2.0, -2.0, -2.0, -2.0, 3.0, 3.0, 3.0]"));
   CheckImport(expected);
 
-  // Ofsset in parent
+  // Offset in parent
   FillPrimitive(AddChild(), 5, 0, 0, run_ends_buffers5);
   FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls5);
   FillRunEndEncoded(5, 2);
@@ -3383,7 +3383,7 @@ TEST_F(TestArrayImport, ListError) {
 }
 
 TEST_F(TestArrayImport, ListViewNoError) {
-  // Unlike with lists, importing a length-0 list-view with all buffers ommitted is
+  // Unlike with lists, importing a length-0 list-view with all buffers omitted is
   // not an error. List-views don't need an extra offset value, so an empty offsets
   // buffer is valid in this case.
 
diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc
index 46dccaf3c6b86..6ca52ab46ca68 100644
--- a/cpp/src/arrow/chunked_array_test.cc
+++ b/cpp/src/arrow/chunked_array_test.cc
@@ -228,7 +228,7 @@ TEST_F(TestChunkedArray, Validate) {
 
   random::RandomArrayGenerator gen(0);
 
-  // Valid if non-empty and ommitted type
+  // Valid if non-empty and omitted type
   ArrayVector arrays = {gen.Int64(50, 0, 100, 0.1), gen.Int64(50, 0, 100, 0.1)};
   auto chunks_with_no_type = std::make_shared<ChunkedArray>(arrays, nullptr);
   ASSERT_OK(chunks_with_no_type->ValidateFull());
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc
index 4a5e579fb155e..a25b04ae4fa65 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc
@@ -171,7 +171,7 @@ class PrimitiveFilterImpl {
     }
 
     if (out_arr->buffers[0] != nullptr) {
-      // May not be allocated if neither filter nor values contains nulls
+      // May be unallocated if neither filter nor values contain nulls
       out_is_valid_ = out_arr->buffers[0]->mutable_data();
     }
     out_data_ = reinterpret_cast<T*>(out_arr->buffers[1]->mutable_data());
diff --git a/cpp/src/arrow/csv/lexing_internal.h b/cpp/src/arrow/csv/lexing_internal.h
index 357c5716d5115..b1da12750ac58 100644
--- a/cpp/src/arrow/csv/lexing_internal.h
+++ b/cpp/src/arrow/csv/lexing_internal.h
@@ -71,7 +71,7 @@ class BaseBloomFilter {
   // For example 'b' (ASCII value 98) will set/test bit #34 in the filter.
   // If the bit is set in the filter, the given character *may* be part
   // of the matched characters.  If the bit is unset in the filter,
-  // the the given character *cannot* be part of the matched characters.
+  // the given character *cannot* be part of the matched characters.
   FilterType CharFilter(uint8_t c) const {
     return static_cast<FilterType>(1) << (c & kCharMask);
   }
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 30fc0bc6aca44..332fad054fea3 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -389,7 +389,7 @@ namespace {
 // The parsed batch contains a list of offsets for each of the columns so that columns
 // can be individually scanned
 //
-// This operator is not re-entrant
+// This operator is not reentrant
 class BlockParsingOperator {
  public:
   BlockParsingOperator(io::IOContext io_context, ParseOptions parse_options,
diff --git a/cpp/src/arrow/csv/writer_benchmark.cc b/cpp/src/arrow/csv/writer_benchmark.cc
index 9bbba7ebd7e9f..54c0f50613754 100644
--- a/cpp/src/arrow/csv/writer_benchmark.cc
+++ b/cpp/src/arrow/csv/writer_benchmark.cc
@@ -109,7 +109,7 @@ void BenchmarkWriteCsv(benchmark::State& state, const WriteOptions& options,
   state.counters["null_percent"] = static_cast<double>(state.range(0));
 }
 
-// Exercies UnQuotedColumnPopulator with integer
+// Exercises UnQuotedColumnPopulator with integer
 void WriteCsvNumeric(benchmark::State& state) {
   auto batch = MakeIntTestBatch(kCsvRows, kCsvCols, state.range(0));
   BenchmarkWriteCsv(state, WriteOptions::Defaults(), *batch);
diff --git a/cpp/src/arrow/engine/substrait/extended_expression_internal.cc b/cpp/src/arrow/engine/substrait/extended_expression_internal.cc
index a6401e1d0b36d..225901c910f25 100644
--- a/cpp/src/arrow/engine/substrait/extended_expression_internal.cc
+++ b/cpp/src/arrow/engine/substrait/extended_expression_internal.cc
@@ -85,7 +85,7 @@ Result<NamedExpression> ExpressionFromProto(
   // expression which is not redundant.
   //
   // For example, if the base schema is [struct<foo:i32>, i32] and the expression is
-  // field(0) the the extended expression output names might be ["foo", "my_expression"].
+  // field(0) the extended expression output names might be ["foo", "my_expression"].
   // The "foo" is redundant but we can verify it matches and reject if it does not.
   //
   // The one exception is struct literals which have no field names.  For example, if
diff --git a/cpp/src/arrow/engine/substrait/extension_set.h b/cpp/src/arrow/engine/substrait/extension_set.h
index d9c0af081a546..0a502960447e6 100644
--- a/cpp/src/arrow/engine/substrait/extension_set.h
+++ b/cpp/src/arrow/engine/substrait/extension_set.h
@@ -86,7 +86,7 @@ struct ARROW_ENGINE_EXPORT IdHashEq {
 /// \brief Owning storage for ids
 ///
 /// Substrait plans may reuse URIs and names in many places.  For convenience
-/// and performance Substarit ids are typically passed around as views.  As we
+/// and performance Substrait ids are typically passed around as views.  As we
 /// convert a plan from Substrait to Arrow we need to copy these strings out of
 /// the Substrait buffer and into owned storage.  This class serves as that owned
 /// storage.
diff --git a/cpp/src/arrow/engine/substrait/options.cc b/cpp/src/arrow/engine/substrait/options.cc
index 481375076734f..f8e7173386583 100644
--- a/cpp/src/arrow/engine/substrait/options.cc
+++ b/cpp/src/arrow/engine/substrait/options.cc
@@ -81,7 +81,7 @@ class DefaultExtensionProvider : public BaseExtensionProvider {
       rel.UnpackTo(&seg_agg_rel);
       return MakeSegmentedAggregateRel(conv_opts, inputs, seg_agg_rel, ext_set);
     }
-    return Status::NotImplemented("Unrecognized extension in Susbstrait plan: ",
+    return Status::NotImplemented("Unrecognized extension in Substrait plan: ",
                                   rel.DebugString());
   }
 
diff --git a/cpp/src/arrow/engine/substrait/serde_test.cc b/cpp/src/arrow/engine/substrait/serde_test.cc
index 2e72ae70edd88..1e771ccdd25c2 100644
--- a/cpp/src/arrow/engine/substrait/serde_test.cc
+++ b/cpp/src/arrow/engine/substrait/serde_test.cc
@@ -1334,7 +1334,7 @@ TEST(Substrait, GetRecordBatchReader) {
     ASSERT_OK_AND_ASSIGN(auto reader, ExecuteSerializedPlan(*buf));
     ASSERT_OK_AND_ASSIGN(auto table, Table::FromRecordBatchReader(reader.get()));
     // Note: assuming the binary.parquet file contains fixed amount of records
-    // in case of a test failure, re-evalaute the content in the file
+    // in case of a test failure, re-evaluate the content in the file
     EXPECT_EQ(table->num_rows(), 12);
   });
 }
@@ -4223,7 +4223,7 @@ TEST(Substrait, ReadRelWithGlobFiles) {
       }
     }]
   })"));
-  // To avoid unnecessar metadata columns being included in the final result
+  // To avoid unnecessary metadata columns being included in the final result
   std::vector<int> include_columns = {0, 1, 2};
   compute::SortOptions options({compute::SortKey("A", compute::SortOrder::Ascending)});
   CheckRoundTripResult(std::move(expected_table), buf, std::move(include_columns),
@@ -6108,7 +6108,7 @@ TEST(Substrait, ExtendedExpressionSerialization) {
 
 TEST(Substrait, ExtendedExpressionInvalidPlans) {
   // The schema defines the type as {"x", "y"} but output_names has {"a", "y"}
-  constexpr std::string_view kBadOuptutNames = R"(
+  constexpr std::string_view kBadOutputNames = R"(
     {
       "referredExpr":[
         {
@@ -6159,7 +6159,7 @@ TEST(Substrait, ExtendedExpressionInvalidPlans) {
   )";
 
   ASSERT_OK_AND_ASSIGN(
-      auto buf, internal::SubstraitFromJSON("ExtendedExpression", kBadOuptutNames));
+      auto buf, internal::SubstraitFromJSON("ExtendedExpression", kBadOutputNames));
 
   ASSERT_THAT(DeserializeExpressions(*buf),
               Raises(StatusCode::Invalid, testing::HasSubstr("Ambiguous plan")));
diff --git a/cpp/src/arrow/engine/substrait/visibility.h b/cpp/src/arrow/engine/substrait/visibility.h
index cfd0db2747bba..d81d202ee6567 100644
--- a/cpp/src/arrow/engine/substrait/visibility.h
+++ b/cpp/src/arrow/engine/substrait/visibility.h
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-// TODO(westonpace): Once we have a propert engine module this file
+// TODO(westonpace): Once we have a proper engine module this file
 // should be renamed arrow/engine/visibility.h
 // This API is EXPERIMENTAL.
 
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
index b8be1edc49e60..2b8e703d3c66e 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
+++ b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
@@ -194,7 +194,7 @@ TEST_F(TestExtensionType, MetadataSerializationRoundtrip) {
                              "Invalid dim_names");
 }
 
-TEST_F(TestExtensionType, RoudtripBatch) {
+TEST_F(TestExtensionType, RoundtripBatch) {
   auto exact_ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
 
   std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, Buffer::Wrap(values_)};
@@ -383,7 +383,7 @@ TEST_F(TestExtensionType, SliceTensor) {
   ASSERT_EQ(sliced->length(), partial->length());
 }
 
-TEST_F(TestExtensionType, RoudtripBatchFromTensor) {
+TEST_F(TestExtensionType, RoundtripBatchFromTensor) {
   auto exact_ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
   ASSERT_OK_AND_ASSIGN(auto tensor, Tensor::Make(value_type_, Buffer::Wrap(values_),
                                                  shape_, {}, {"n", "x", "y"}));
diff --git a/cpp/src/arrow/field_ref_test.cc b/cpp/src/arrow/field_ref_test.cc
index 10e2564ed1896..0cb2da4f709a1 100644
--- a/cpp/src/arrow/field_ref_test.cc
+++ b/cpp/src/arrow/field_ref_test.cc
@@ -135,7 +135,7 @@ struct FieldPathTestCase {
     out.schema = arrow::schema({out.v0.field, out.v1.field});
     out.type = struct_(out.schema->fields());
 
-    // Create null bitmaps for the struct fields independent of its childrens'
+    // Create null bitmaps for the struct fields independent of its children's
     // bitmaps. For FieldPath::GetFlattened, parent/child bitmaps should be combined
     // - for FieldPath::Get, higher-level nulls are ignored.
     auto bitmap1_1 = gen.NullBitmap(kNumRows, 0.15);
diff --git a/cpp/src/arrow/integration/json_integration.h b/cpp/src/arrow/integration/json_integration.h
index 0284ef6c89d97..13abfae095ab6 100644
--- a/cpp/src/arrow/integration/json_integration.h
+++ b/cpp/src/arrow/integration/json_integration.h
@@ -40,7 +40,7 @@ class ARROW_EXPORT IntegrationJsonWriter {
   /// \brief Create a new JSON writer that writes to memory
   ///
   /// \param[in] schema the schema of record batches
-  /// \return the creater writer object
+  /// \return the creator writer object
   static Result<std::unique_ptr<IntegrationJsonWriter>> Open(
       const std::shared_ptr<Schema>& schema);
 
diff --git a/cpp/src/arrow/io/file_benchmark.cc b/cpp/src/arrow/io/file_benchmark.cc
index 7fd10a0a0e659..02ccfb6337f4b 100644
--- a/cpp/src/arrow/io/file_benchmark.cc
+++ b/cpp/src/arrow/io/file_benchmark.cc
@@ -220,7 +220,7 @@ static void BenchmarkStreamingWrites(benchmark::State& state,
 
 // Benchmark writing to /dev/null
 //
-// This situation is irrealistic as the kernel likely doesn't
+// This situation is unrealistic as the kernel likely doesn't
 // copy the data at all, so we only measure small writes.
 
 static void FileOutputStreamSmallWritesToNull(
diff --git a/cpp/src/arrow/io/interfaces.h b/cpp/src/arrow/io/interfaces.h
index d2a11b7b6d7ce..b36c38c6d4868 100644
--- a/cpp/src/arrow/io/interfaces.h
+++ b/cpp/src/arrow/io/interfaces.h
@@ -196,7 +196,7 @@ class ARROW_EXPORT Readable {
   /// EXPERIMENTAL: The IOContext associated with this file.
   ///
   /// By default, this is the same as default_io_context(), but it may be
-  /// overriden by subclasses.
+  /// overridden by subclasses.
   virtual const IOContext& io_context() const;
 };
 
diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc
index 4f41edf8e15db..4154b594d9507 100644
--- a/cpp/src/arrow/ipc/metadata_internal.cc
+++ b/cpp/src/arrow/ipc/metadata_internal.cc
@@ -1423,7 +1423,7 @@ Status GetSchema(const void* opaque_schema, DictionaryMemo* dictionary_memo,
 
   std::shared_ptr<KeyValueMetadata> metadata;
   RETURN_NOT_OK(internal::GetKeyValueMetadata(schema->custom_metadata(), &metadata));
-  // set endianess using the value in flatbuf schema
+  // set endianness using the value in flatbuf schema
   auto endianness = schema->endianness() == flatbuf::Endianness::Little
                         ? Endianness::Little
                         : Endianness::Big;
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index 5c15cb912e4a7..17c4c5636d5b0 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -140,7 +140,7 @@ TEST_P(TestMessage, SerializeTo) {
               output_length);
     ASSERT_OK_AND_EQ(output_length, stream->Tell());
     ASSERT_OK_AND_ASSIGN(auto buffer, stream->Finish());
-    // chech whether length is written in little endian
+    // check whether length is written in little endian
     auto buffer_ptr = buffer.get()->data();
     ASSERT_EQ(output_length - body_length - prefix_size,
               bit_util::FromLittleEndian(*(uint32_t*)(buffer_ptr + 4)));
@@ -363,7 +363,7 @@ TEST_F(TestSchemaMetadata, MetadataVersionForwardCompatibility) {
   std::string root;
   ASSERT_OK(GetTestResourceRoot(&root));
 
-  // schema_v6.arrow with currently non-existent MetadataVersion::V6
+  // schema_v6.arrow with currently nonexistent MetadataVersion::V6
   std::stringstream schema_v6_path;
   schema_v6_path << root << "/forward-compatibility/schema_v6.arrow";
 
@@ -520,7 +520,7 @@ class IpcTestFixture : public io::MemoryMapFixture, public ExtensionTypesMixin {
 };
 
 TEST(MetadataVersion, ForwardsCompatCheck) {
-  // Verify UBSAN is ok with casting out of range metdata version.
+  // Verify UBSAN is ok with casting out of range metadata version.
   EXPECT_LT(flatbuf::MetadataVersion::MAX, static_cast<flatbuf::MetadataVersion>(72));
 }
 
@@ -3019,14 +3019,14 @@ TEST(TestRecordBatchFileReaderIo, SkipTheFieldInTheMiddle) {
   GetReadRecordBatchReadRanges({0, 2}, {1, 40});
 }
 
-TEST(TestRecordBatchFileReaderIo, ReadTwoContinousFields) {
+TEST(TestRecordBatchFileReaderIo, ReadTwoContinuousFields) {
   // read the int32 field and the int64 field
   // + 5 int32: 5 * 4 bytes
   // + 5 int64: 5 * 8 bytes
   GetReadRecordBatchReadRanges({1, 2}, {20, 40});
 }
 
-TEST(TestRecordBatchFileReaderIo, ReadTwoContinousFieldsWithIoMerged) {
+TEST(TestRecordBatchFileReaderIo, ReadTwoContinuousFieldsWithIoMerged) {
   // change the array length to 64 so that bool field and int32 are continuous without
   // padding
   // read the bool field and the int32 field since the bool field's aligned offset
diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h
index de4606094049c..888f59a627771 100644
--- a/cpp/src/arrow/ipc/reader.h
+++ b/cpp/src/arrow/ipc/reader.h
@@ -258,7 +258,7 @@ class ARROW_EXPORT Listener {
   virtual Status OnEOS();
 
   /// \brief Called when a record batch is decoded and
-  /// OnRecordBatchWithMetadataDecoded() isn't overrided.
+  /// OnRecordBatchWithMetadataDecoded() isn't overridden.
   ///
   /// The default implementation just returns
   /// arrow::Status::NotImplemented().
diff --git a/cpp/src/arrow/json/converter_test.cc b/cpp/src/arrow/json/converter_test.cc
index cfc44c99976d5..fa85e704bc5e3 100644
--- a/cpp/src/arrow/json/converter_test.cc
+++ b/cpp/src/arrow/json/converter_test.cc
@@ -39,7 +39,7 @@ Result<std::shared_ptr<Array>> Convert(std::shared_ptr<DataType> type,
   return converted;
 }
 
-// bool, null are trivial pass throughs
+// bool, null are trivial pass-throughs
 
 TEST(ConverterTest, Integers) {
   for (auto int_type : {int8(), int16(), int32(), int64()}) {
diff --git a/cpp/src/arrow/json/reader.h b/cpp/src/arrow/json/reader.h
index 7776cb0b7d8a0..b7849a83ba1f8 100644
--- a/cpp/src/arrow/json/reader.h
+++ b/cpp/src/arrow/json/reader.h
@@ -79,7 +79,7 @@ class ARROW_EXPORT StreamingReader : public RecordBatchReader {
   /// threading is disabled, this will block until completion.
   virtual Future<std::shared_ptr<RecordBatch>> ReadNextAsync() = 0;
 
-  /// Get the number of bytes which have been succesfully converted to record batches
+  /// Get the number of bytes which have been successfully converted to record batches
   /// and consumed
   [[nodiscard]] virtual int64_t bytes_processed() const = 0;
 
diff --git a/cpp/src/arrow/table_test.cc b/cpp/src/arrow/table_test.cc
index 3949caa402846..5f6905ce672d2 100644
--- a/cpp/src/arrow/table_test.cc
+++ b/cpp/src/arrow/table_test.cc
@@ -179,7 +179,7 @@ TEST_F(TestTable, Equals) {
   other = Table::Make(schema_, other_columns);
   ASSERT_FALSE(table_->Equals(*other));
 
-  // Differring schema metadata
+  // Differing schema metadata
   other_schema = schema_->WithMetadata(::arrow::key_value_metadata({"key"}, {"value"}));
   other = Table::Make(other_schema, columns_);
   ASSERT_TRUE(table_->Equals(*other));
@@ -635,8 +635,8 @@ TEST_F(TestTable, SelectColumns) {
   ASSERT_OK_AND_ASSIGN(auto subset, table->SelectColumns({0, 2}));
   ASSERT_OK(subset->ValidateFull());
 
-  auto expexted_schema = ::arrow::schema({schema_->field(0), schema_->field(2)});
-  auto expected = Table::Make(expexted_schema, {table->column(0), table->column(2)});
+  auto expected_schema = ::arrow::schema({schema_->field(0), schema_->field(2)});
+  auto expected = Table::Make(expected_schema, {table->column(0), table->column(2)});
   ASSERT_TRUE(subset->Equals(*expected));
 
   // Out of bounds indices
diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc
index e8a782575e278..36351fa8595be 100644
--- a/cpp/src/arrow/testing/util.cc
+++ b/cpp/src/arrow/testing/util.cc
@@ -198,7 +198,7 @@ std::string GetListenAddress() {
     ss << "." << byte;
   }
 #else
-  // On MacOS, only 127.0.0.1 is a valid loopback address by default.
+  // On macOS, only 127.0.0.1 is a valid loopback address by default.
   ss << "127.0.0.1";
 #endif
   // Append port number
diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc
index 009e557f82f68..22913f77fbfc1 100644
--- a/cpp/src/arrow/type_test.cc
+++ b/cpp/src/arrow/type_test.cc
@@ -612,7 +612,7 @@ TEST_F(TestSchema, TestMetadataConstruction) {
   AssertSchemaEqual(schema2, schema1);
   AssertSchemaNotEqual(schema2, schema1, /*check_metadata=*/true);
 
-  // Field has different metatadata
+  // Field has different metadata
   AssertSchemaEqual(schema2, schema3);
   AssertSchemaNotEqual(schema2, schema3, /*check_metadata=*/true);
 
diff --git a/cpp/src/generated/Schema_generated.h b/cpp/src/generated/Schema_generated.h
index eeeeac68f0a45..12ee81e6743b5 100644
--- a/cpp/src/generated/Schema_generated.h
+++ b/cpp/src/generated/Schema_generated.h
@@ -1725,7 +1725,7 @@ inline ::flatbuffers::Offset<Time> CreateTime(
 /// no indication of how to map this information to a physical point in time.
 /// Naive date-times must be handled with care because of this missing
 /// information, and also because daylight saving time (DST) may make
-/// some values ambiguous or non-existent. A naive date-time may be
+/// some values ambiguous or nonexistent. A naive date-time may be
 /// stored as a struct with Date and Time fields. However, it may also be
 /// encoded into a Timestamp column with an empty timezone. The timestamp
 /// values should be computed "as if" the timezone of the date-time values
diff --git a/cpp/src/skyhook/CMakeLists.txt b/cpp/src/skyhook/CMakeLists.txt
index 0019251bd6071..67fddec135f99 100644
--- a/cpp/src/skyhook/CMakeLists.txt
+++ b/cpp/src/skyhook/CMakeLists.txt
@@ -12,7 +12,7 @@
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitationsn
+# specific language governing permissions and limitations
 # under the License.
 
 # add the client subdirectory
diff --git a/cpp/src/skyhook/protocol/rados_protocol.h b/cpp/src/skyhook/protocol/rados_protocol.h
index 6a0b2ea0067ff..52b0ed4a9b456 100644
--- a/cpp/src/skyhook/protocol/rados_protocol.h
+++ b/cpp/src/skyhook/protocol/rados_protocol.h
@@ -44,7 +44,7 @@ class IoCtxInterface {
   /// \param[in] cls the name of the object class.
   /// \param[in] method the name of the object class method.
   /// \param[in] in a bufferlist to send data to the object class method.
-  /// \param[in] out a bufferlist to recieve data from the object class method.
+  /// \param[in] out a bufferlist to receive data from the object class method.
   arrow::Status exec(const std::string& oid, const char* cls, const char* method,
                      ceph::bufferlist& in, ceph::bufferlist& out);
   /// \brief Execute POSIX stat on a RADOS object.
diff --git a/cpp/src/skyhook/protocol/skyhook_protocol.h b/cpp/src/skyhook/protocol/skyhook_protocol.h
index 37dda8ad77a51..4097b4efa8785 100644
--- a/cpp/src/skyhook/protocol/skyhook_protocol.h
+++ b/cpp/src/skyhook/protocol/skyhook_protocol.h
@@ -101,7 +101,7 @@ class SkyhookDirectObjectAccess {
   }
 
   /// Execute an object class method. It uses the `librados::exec` api to
-  /// perform object clsass method calls on the storage node and
+  /// perform object class method calls on the storage node and
   /// stores the result in an output bufferlist.
   arrow::Status Exec(uint64_t inode, const std::string& fn, ceph::bufferlist& in,
                      ceph::bufferlist& out) {

From 539f31c83ca0b357a0ec64099c8d7d1493b58d15 Mon Sep 17 00:00:00 2001
From: Josh Soref <2119212+jsoref@users.noreply.github.com>
Date: Sun, 10 Dec 2023 07:19:44 -0500
Subject: [PATCH 016/570] GH-38977: [C++] Fix spelling (#38978)

### Rationale for this change

### What changes are included in this PR?

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #38977

Authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/CHANGELOG_PARQUET.md                      | 20 +++++++++----------
 cpp/CMakeLists.txt                            |  2 +-
 cpp/CMakePresets.json                         |  2 +-
 cpp/apidoc/Doxyfile                           |  6 +++---
 cpp/build-support/cpplint.py                  | 16 +++++++--------
 cpp/build-support/iwyu/mappings/boost-all.imp |  2 +-
 cpp/cmake_modules/SetupCxxFlags.cmake         |  8 ++++----
 cpp/cmake_modules/ThirdpartyToolchain.cmake   |  8 ++++----
 cpp/cmake_modules/Usevcpkg.cmake              |  2 +-
 .../execution_plan_documentation_examples.cc  |  2 +-
 cpp/examples/arrow/rapidjson_row_converter.cc |  4 ++--
 11 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/cpp/CHANGELOG_PARQUET.md b/cpp/CHANGELOG_PARQUET.md
index 06a09c20f0ef9..68aa8386b5186 100644
--- a/cpp/CHANGELOG_PARQUET.md
+++ b/cpp/CHANGELOG_PARQUET.md
@@ -4,7 +4,7 @@ Parquet C++ 1.5.0
     * [PARQUET-979] - [C++] Limit size of min, max or disable stats for long binary types
     * [PARQUET-1071] - [C++] parquet::arrow::FileWriter::Close is not idempotent
     * [PARQUET-1349] - [C++] PARQUET_RPATH_ORIGIN is not picked by the build
-    * [PARQUET-1334] - [C++] memory_map parameter seems missleading in parquet file opener
+    * [PARQUET-1334] - [C++] memory_map parameter seems misleading in parquet file opener
     * [PARQUET-1333] - [C++] Reading of files with dictionary size 0 fails on Windows with bad_alloc
     * [PARQUET-1283] - [C++] FormatStatValue appends trailing space to string and int96
     * [PARQUET-1270] - [C++] Executable tools do not get installed
@@ -13,7 +13,7 @@ Parquet C++ 1.5.0
     * [PARQUET-1255] - [C++] Exceptions thrown in some tests
     * [PARQUET-1358] - [C++] index_page_offset should be unset as it is not supported.
     * [PARQUET-1357] - [C++] FormatStatValue truncates binary statistics on zero character
-    * [PARQUET-1319] - [C++] Pass BISON_EXECUTABLE to Thrift EP for MacOS
+    * [PARQUET-1319] - [C++] Pass BISON_EXECUTABLE to Thrift EP for macOS
     * [PARQUET-1313] - [C++] Compilation failure with VS2017
     * [PARQUET-1315] - [C++] ColumnChunkMetaData.has_dictionary_page() should return bool, not int64_t
     * [PARQUET-1307] - [C++] memory-test fails with latest Arrow
@@ -28,7 +28,7 @@ Parquet C++ 1.5.0
     * [PARQUET-1346] - [C++] Protect against null values data in empty Arrow array
     * [PARQUET-1340] - [C++] Fix Travis Ci valgrind errors related to std::random_device
     * [PARQUET-1323] - [C++] Fix compiler warnings with clang-6.0
-    * [PARQUET-1279] - Use ASSERT_NO_FATAIL_FAILURE in C++ unit tests
+    * [PARQUET-1279] - Use ASSERT_NO_FATAL_FAILURE in C++ unit tests
     * [PARQUET-1262] - [C++] Use the same BOOST_ROOT and Boost_NAMESPACE for Thrift
     * [PARQUET-1267] - replace "unsafe" std::equal by std::memcmp
     * [PARQUET-1360] - [C++] Minor API + style changes follow up to PARQUET-1348
@@ -89,7 +89,7 @@ Parquet C++ 1.4.0
 
 ## New Feature
     * [PARQUET-1095] - [C++] Read and write Arrow decimal values
-    * [PARQUET-970] - Add Add Lz4 and Zstd compression codecs
+    * [PARQUET-970] - Add Lz4 and Zstd compression codecs
 
 ## Task
     * [PARQUET-1221] - [C++] Extend release README
@@ -233,10 +233,10 @@ Parquet C++ 1.1.0
     * [PARQUET-977] - Improve MSVC build
     * [PARQUET-957] - [C++] Add optional $PARQUET_BUILD_TOOLCHAIN environment variable option for configuring build environment
     * [PARQUET-961] - [C++] Strip debug symbols from libparquet libraries in release builds by default
-    * [PARQUET-954] - C++: Use Brolti 0.6 release
+    * [PARQUET-954] - C++: Use Brotli 0.6 release
     * [PARQUET-953] - [C++] Change arrow::FileWriter API to be initialized from a Schema, and provide for writing multiple tables
     * [PARQUET-941] - [C++] Stop needless Boost static library detection for CentOS 7 support
-    * [PARQUET-942] - [C++] Fix wrong variabe use in FindSnappy
+    * [PARQUET-942] - [C++] Fix wrong variable use in FindSnappy
     * [PARQUET-939] - [C++] Support Thrift_HOME CMake variable like FindSnappy does as Snappy_HOME
     * [PARQUET-940] - [C++] Fix Arrow library path detection
     * [PARQUET-937] - [C++] Support CMake < 3.4 again for Arrow detection
@@ -278,7 +278,7 @@ Parquet C++ 1.0.0
     * [PARQUET-614] - C++: Remove unneeded LZ4-related code
     * [PARQUET-604] - Install writer.h headers
     * [PARQUET-621] - C++: Uninitialised DecimalMetadata is read
-    * [PARQUET-620] - C++: Duplicate calls to ParquetFileWriter::Close cause duplicate metdata writes
+    * [PARQUET-620] - C++: Duplicate calls to ParquetFileWriter::Close cause duplicate metadata writes
     * [PARQUET-599] - ColumnWriter::RleEncodeLevels' size estimation might be wrong
     * [PARQUET-617] - C++: Enable conda build to work on systems with non-default C++ toolchains
     * [PARQUET-627] - Ensure that thrift headers are generated before source compilation
@@ -339,7 +339,7 @@ Parquet C++ 1.0.0
     * [PARQUET-626] - Fix builds due to unavailable llvm.org apt mirror
     * [PARQUET-629] - RowGroupSerializer should only close itself once
     * [PARQUET-472] - Clean up InputStream ownership semantics in ColumnReader
-    * [PARQUET-739] - Rle-decoding uses static buffer that is shared accross threads
+    * [PARQUET-739] - Rle-decoding uses static buffer that is shared across threads
     * [PARQUET-561] - ParquetFileReader::Contents PIMPL missing a virtual destructor
     * [PARQUET-892] - [C++] Clean up link library targets in CMake files
     * [PARQUET-454] - Address inconsistencies in boolean decoding
@@ -401,12 +401,12 @@ Parquet C++ 1.0.0
     * [PARQUET-653] - [C++] Re-enable -static-libstdc++ in dev artifact builds
     * [PARQUET-763] - C++: Expose ParquetFileReader through Arrow reader
     * [PARQUET-857] - [C++] Flatten parquet/encodings directory
-    * [PARQUET-862] - Provide defaut cache size values if CPU info probing is not available
+    * [PARQUET-862] - Provide default cache size values if CPU info probing is not available
     * [PARQUET-689] - C++: Compress DataPages eagerly
     * [PARQUET-874] - [C++] Use default memory allocator from Arrow
     * [PARQUET-267] - Detach thirdparty code from build configuration.
     * [PARQUET-418] - Add a utility to print contents of a Parquet file to stdout
-    * [PARQUET-519] - Disable compiler warning supressions and fix all DEBUG build warnings
+    * [PARQUET-519] - Disable compiler warning suppressions and fix all DEBUG build warnings
     * [PARQUET-447] - Add Debug and Release build types and associated compiler flags
     * [PARQUET-868] - C++: Build snappy with optimizations
     * [PARQUET-894] - Fix compilation warning
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 9f17350b2505a..d26e06a146b56 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -75,7 +75,7 @@ set(ARROW_VERSION "15.0.0-SNAPSHOT")
 
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
 
-# if no build build type is specified, default to release builds
+# if no build type is specified, default to release builds
 if(NOT DEFINED CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE
       Release
diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json
index a15b204c39757..9d99b3b2a79e0 100644
--- a/cpp/CMakePresets.json
+++ b/cpp/CMakePresets.json
@@ -428,7 +428,7 @@
         "base-benchmarks",
         "features-maximal"
       ],
-      "displayName": "Benchmarking build with with everything enabled",
+      "displayName": "Benchmarking build with everything enabled",
       "cacheVariables": {}
     },
     {
diff --git a/cpp/apidoc/Doxyfile b/cpp/apidoc/Doxyfile
index baa3b41e693c4..e19c933cd454f 100644
--- a/cpp/apidoc/Doxyfile
+++ b/cpp/apidoc/Doxyfile
@@ -239,7 +239,7 @@ QT_AUTOBRIEF           = NO
 # tag to YES if you prefer the old behavior instead.
 #
 # Note that setting this tag to YES also means that rational rose comments are
-# not recognized any more.
+# not recognized anymore.
 # The default value is: NO.
 
 MULTILINE_CPP_IS_BRIEF = NO
@@ -569,7 +569,7 @@ INTERNAL_DOCS          = NO
 # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
 # names in lower-case letters. If set to YES, upper-case letters are also
 # allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
+# in case and if your file system supports case-sensitive file names. Windows
 # (including Cygwin) ands Mac users are advised to set this option to NO.
 # The default value is: system dependent.
 
@@ -734,7 +734,7 @@ SHOW_NAMESPACES        = YES
 # The FILE_VERSION_FILTER tag can be used to specify a program or script that
 # doxygen should invoke to get the current version for each file (typically from
 # the version control system). Doxygen will invoke the program by executing (via
-# popen()) the command command input-file, where command is the value of the
+# popen()) the command input-file, where command is the value of the
 # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
 # by doxygen. Whatever the program writes to standard output is used as the file
 # version. For an example see the documentation.
diff --git a/cpp/build-support/cpplint.py b/cpp/build-support/cpplint.py
index cf1859bb6d48e..1bceed9a67a05 100755
--- a/cpp/build-support/cpplint.py
+++ b/cpp/build-support/cpplint.py
@@ -873,7 +873,7 @@
 # Files to exclude from linting. This is set by the --exclude flag.
 _excludes = None
 
-# Whether to supress all PrintInfo messages, UNRELATED to --quiet flag
+# Whether to suppress all PrintInfo messages, UNRELATED to --quiet flag
 _quiet = False
 
 # The allowed line length of files.
@@ -1001,7 +1001,7 @@ def ParseNolintSuppressions(filename, raw_line, linenum, error):
                 'Unknown NOLINT error category: %s' % category)
 
 
-def ProcessGlobalSuppresions(lines):
+def ProcessGlobalSuppressions(lines):
   """Updates the list of global error suppressions.
 
   Parses any lint directives in the file that have global effect.
@@ -1029,7 +1029,7 @@ def IsErrorSuppressedByNolint(category, linenum):
   """Returns true if the specified error category is suppressed on this line.
 
   Consults the global error_suppressions map populated by
-  ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions.
+  ParseNolintSuppressions/ProcessGlobalSuppressions/ResetNolintSuppressions.
 
   Args:
     category: str, the category of the error.
@@ -1271,7 +1271,7 @@ def __init__(self):
     self._filters_backup = self.filters[:]
     self.counting = 'total'  # In what way are we counting errors?
     self.errors_by_category = {}  # string to int dict storing error counts
-    self.quiet = False  # Suppress non-error messagess?
+    self.quiet = False  # Suppress non-error messages?
 
     # output format:
     # "emacs" - format that emacs can parse (default)
@@ -1599,7 +1599,7 @@ def RepositoryName(self):
         repo = FileInfo(_repository).FullName()
         root_dir = project_dir
         while os.path.exists(root_dir):
-          # allow case insensitive compare on Windows
+          # allow case-insensitive compare on Windows
           if os.path.normcase(root_dir) == os.path.normcase(repo):
             return os.path.relpath(fullname, root_dir).replace('\\', '/')
           one_up_dir = os.path.dirname(root_dir)
@@ -1765,7 +1765,7 @@ def Error(filename, linenum, category, confidence, message):
 def IsCppString(line):
   """Does line terminate so, that the next symbol is in string constant.
 
-  This function does not consider single-line nor multi-line comments.
+  This function does not consider comments at all.
 
   Args:
     line: is a partial line of code starting from the 0..n.
@@ -3870,7 +3870,7 @@ def CheckOperatorSpacing(filename, clean_lines, linenum, error):
   elif not Match(r'#.*include', line):
     # Look for < that is not surrounded by spaces.  This is only
     # triggered if both sides are missing spaces, even though
-    # technically should should flag if at least one side is missing a
+    # technically it should flag if at least one side is missing a
     # space.  This is done to avoid some false positives with shifts.
     match = Match(r'^(.*[^\s<])<[^\s=<,]', line)
     if match:
@@ -6495,7 +6495,7 @@ def ProcessFileData(filename, file_extension, lines, error,
   ResetNolintSuppressions()
 
   CheckForCopyright(filename, lines, error)
-  ProcessGlobalSuppresions(lines)
+  ProcessGlobalSuppressions(lines)
   RemoveMultiLineComments(filename, lines, error)
   clean_lines = CleansedLines(lines)
 
diff --git a/cpp/build-support/iwyu/mappings/boost-all.imp b/cpp/build-support/iwyu/mappings/boost-all.imp
index 5427ae2ac54be..7c48acaf34163 100644
--- a/cpp/build-support/iwyu/mappings/boost-all.imp
+++ b/cpp/build-support/iwyu/mappings/boost-all.imp
@@ -57,7 +57,7 @@
 { include: ["@<boost/function/.*>", private, "<boost/function.hpp>", public ] },
 #manually delete $ sed '/workarounds*\.hpp/d' -i boost-all.imp
 #also good idea to remove all lines referring to folders above (e.g., sed '/\/format\//d' -i boost-all.imp)
-#programatically include:
+#programmatically include:
     { include: ["<boost/accumulators/numeric/detail/function1.hpp>", private, "<boost/accumulators/numeric/functional.hpp>", public ] },
     { include: ["<boost/accumulators/numeric/detail/function2.hpp>", private, "<boost/accumulators/numeric/functional.hpp>", public ] },
     { include: ["<boost/accumulators/numeric/detail/pod_singleton.hpp>", private, "<boost/accumulators/numeric/functional.hpp>", public ] },
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index 8e8f687d06539..6940c6befacc7 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -73,7 +73,7 @@ if(ARROW_CPU_FLAG STREQUAL "x86")
       message(STATUS "Disable AVX512 support on MINGW for now")
     else()
       # Check for AVX512 support in the compiler.
-      set(OLD_CMAKE_REQURED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+      set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
       set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${ARROW_AVX512_FLAG}")
       check_cxx_source_compiles("
         #ifdef _MSC_VER
@@ -89,7 +89,7 @@ if(ARROW_CPU_FLAG STREQUAL "x86")
           return 0;
         }"
                                 CXX_SUPPORTS_AVX512)
-      set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQURED_FLAGS})
+      set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
     endif()
   endif()
   # Runtime SIMD level it can get from compiler and ARROW_RUNTIME_SIMD_LEVEL
@@ -459,7 +459,7 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STRE
 
     if(CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
       # Avoid C++17 std::get 'not available' issue on macOS 10.13
-      # This will be required until atleast R 4.4 is released and
+      # This will be required until at least R 4.4 is released and
       # CRAN (hopefully) stops checking on 10.13
       string(APPEND CXX_ONLY_FLAGS " -D_LIBCPP_DISABLE_AVAILABILITY")
     endif()
@@ -527,7 +527,7 @@ if(ARROW_CPU_FLAG STREQUAL "aarch64")
         set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -msve-vector-bits=${SVE_VECTOR_BITS}")
       else()
         set(ARROW_HAVE_SVE_SIZELESS ON)
-        add_definitions(-DARROW_HAVE_SVE_SIZELSS)
+        add_definitions(-DARROW_HAVE_SVE_SIZELESS)
       endif()
     endif()
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -march=${ARROW_ARMV8_MARCH}")
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 978f0319837da..89d046945e5fe 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1328,8 +1328,8 @@ macro(build_snappy)
   set(SNAPPY_CMAKE_ARGS
       ${EP_COMMON_CMAKE_ARGS} -DSNAPPY_BUILD_TESTS=OFF -DSNAPPY_BUILD_BENCHMARKS=OFF
       "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
-  # Snappy unconditionaly enables Werror when building with clang this can lead
-  # to build failues by way of new compiler warnings. This adds a flag to disable
+  # Snappy unconditionally enables -Werror when building with clang this can lead
+  # to build failures by way of new compiler warnings. This adds a flag to disable
   # Werror to the very end of the invocation to override the snappy internal setting.
   if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
     foreach(CONFIG DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
@@ -4238,7 +4238,7 @@ macro(build_google_cloud_cpp_storage)
   target_include_directories(google-cloud-cpp::common BEFORE
                              INTERFACE "${GOOGLE_CLOUD_CPP_INCLUDE_DIR}")
   # Refer to https://github.com/googleapis/google-cloud-cpp/blob/main/google/cloud/google_cloud_cpp_common.cmake
-  # (subsitute `main` for the SHA of the version we use)
+  # (substitute `main` for the SHA of the version we use)
   # Version 1.39.0 is at a different place (they refactored after):
   # https://github.com/googleapis/google-cloud-cpp/blob/29e5af8ca9b26cec62106d189b50549f4dc1c598/google/cloud/CMakeLists.txt#L146-L155
   target_link_libraries(google-cloud-cpp::common
@@ -5071,7 +5071,7 @@ if(ARROW_S3)
 
   if(APPLE)
     # CoreFoundation's path is hardcoded in the CMake files provided by
-    # aws-sdk-cpp to use the MacOSX SDK provided by XCode which makes
+    # aws-sdk-cpp to use the macOS SDK provided by XCode which makes
     # XCode a hard dependency. Command Line Tools is often used instead
     # of the full XCode suite, so let the linker to find it.
     set_target_properties(AWS::aws-c-common
diff --git a/cpp/cmake_modules/Usevcpkg.cmake b/cpp/cmake_modules/Usevcpkg.cmake
index ee2cfbc670c03..b6192468da342 100644
--- a/cpp/cmake_modules/Usevcpkg.cmake
+++ b/cpp/cmake_modules/Usevcpkg.cmake
@@ -20,7 +20,7 @@ message(STATUS "Using vcpkg to find dependencies")
 # ----------------------------------------------------------------------
 # Define macros
 
-# macro to list subdirectirectories (non-recursive)
+# macro to list subdirectories (non-recursive)
 macro(list_subdirs SUBDIRS DIR)
   file(GLOB children_
        RELATIVE ${DIR}
diff --git a/cpp/examples/arrow/execution_plan_documentation_examples.cc b/cpp/examples/arrow/execution_plan_documentation_examples.cc
index 00a23be293510..b92f5801c140d 100644
--- a/cpp/examples/arrow/execution_plan_documentation_examples.cc
+++ b/cpp/examples/arrow/execution_plan_documentation_examples.cc
@@ -342,7 +342,7 @@ arrow::Status TableSourceSinkExample() {
 ///
 /// Source-Filter-Table
 /// This example shows how a filter can be used in an execution plan,
-/// to filter data from a source. The output from the exeuction plan
+/// to filter data from a source. The output from the execution plan
 /// is collected into a table.
 arrow::Status ScanFilterSinkExample() {
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::dataset::Dataset> dataset, GetDataset());
diff --git a/cpp/examples/arrow/rapidjson_row_converter.cc b/cpp/examples/arrow/rapidjson_row_converter.cc
index 3907e72121c6d..7448e9d04e564 100644
--- a/cpp/examples/arrow/rapidjson_row_converter.cc
+++ b/cpp/examples/arrow/rapidjson_row_converter.cc
@@ -75,7 +75,7 @@ class RowBatchBuilder {
   // Default implementation
   arrow::Status Visit(const arrow::Array& array) {
     return arrow::Status::NotImplemented(
-        "Can not convert to json document for array of type ", array.type()->ToString());
+        "Cannot convert to json document for array of type ", array.type()->ToString());
   }
 
   // Handles booleans, integers, floats
@@ -346,7 +346,7 @@ class JsonValueConverter {
   // Default implementation
   arrow::Status Visit(const arrow::DataType& type) {
     return arrow::Status::NotImplemented(
-        "Can not convert json value to Arrow array of type ", type.ToString());
+        "Cannot convert json value to Arrow array of type ", type.ToString());
   }
 
   arrow::Status Visit(const arrow::Int64Type& type) {

From e3c8187c61ad30ebca8b09e256630b924cf4d478 Mon Sep 17 00:00:00 2001
From: Josh Soref <2119212+jsoref@users.noreply.github.com>
Date: Sun, 10 Dec 2023 07:22:08 -0500
Subject: [PATCH 017/570] GH-38925: [CI] Fix spelling (#38926)

### Rationale for this change

### What changes are included in this PR?

Spelling fixes to ci/

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #38925

Authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/conan/all/conandata.yml                       | 6 +++---
 ci/conan/all/conanfile.py                        | 2 +-
 ci/conan/all/test_package/test_package.cpp       | 2 +-
 ci/docker/conda-cpp.dockerfile                   | 2 +-
 ci/docker/java-jni-manylinux-201x.dockerfile     | 2 +-
 ci/docker/python-wheel-manylinux.dockerfile      | 2 +-
 ci/docker/python-wheel-windows-vs2017.dockerfile | 4 ++--
 ci/scripts/install_python.sh                     | 4 ++--
 ci/scripts/install_sccache.sh                    | 2 +-
 ci/scripts/integration_spark.sh                  | 2 +-
 ci/scripts/r_valgrind.sh                         | 2 +-
 11 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/ci/conan/all/conandata.yml b/ci/conan/all/conandata.yml
index 376fdfbfa7ad5..7402272a4b366 100644
--- a/ci/conan/all/conandata.yml
+++ b/ci/conan/all/conandata.yml
@@ -45,21 +45,21 @@ sources:
 patches:
   "8.0.1":
     - patch_file: "patches/8.0.0-0005-install-utils.patch"
-      patch_description: "enable utilis installation"
+      patch_description: "enable utils installation"
       patch_type: "conan"
     - patch_file: "patches/8.0.0-0006-fix-cmake.patch"
       patch_description: "use cci package"
       patch_type: "conan"
   "8.0.0":
     - patch_file: "patches/8.0.0-0005-install-utils.patch"
-      patch_description: "enable utilis installation"
+      patch_description: "enable utils installation"
       patch_type: "conan"
     - patch_file: "patches/8.0.0-0006-fix-cmake.patch"
       patch_description: "use cci package"
       patch_type: "conan"
   "7.0.0":
     - patch_file: "patches/7.0.0-0006-install-utils.patch"
-      patch_description: "enable utilis installation"
+      patch_description: "enable utils installation"
       patch_type: "conan"
     - patch_file: "patches/7.0.0-0007-fix-cmake.patch"
       patch_description: "use cci package"
diff --git a/ci/conan/all/conanfile.py b/ci/conan/all/conanfile.py
index b32219f63c8e5..7e87f82e7e018 100644
--- a/ci/conan/all/conanfile.py
+++ b/ci/conan/all/conanfile.py
@@ -328,7 +328,7 @@ def _with_boost(self, required=False):
             return bool(self.options.with_boost)
 
     def _with_thrift(self, required=False):
-        # No self.options.with_thift exists
+        # No self.options.with_thrift exists
         return bool(required or self._parquet())
 
     def _with_utf8proc(self, required=False):
diff --git a/ci/conan/all/test_package/test_package.cpp b/ci/conan/all/test_package/test_package.cpp
index 42cab6cc76e7a..fb54b040f44e6 100644
--- a/ci/conan/all/test_package/test_package.cpp
+++ b/ci/conan/all/test_package/test_package.cpp
@@ -78,7 +78,7 @@ arrow::Status VectorToColumnarTable(const std::vector<struct data_row>& rows,
     // Indicate the start of a new list row. This will memorise the current
     // offset in the values builder.
     ARROW_RETURN_NOT_OK(components_builder.Append());
-    // Store the actual values. The final nullptr argument tells the underyling
+    // Store the actual values. The final nullptr argument tells the underlying
     // builder that all added values are valid, i.e. non-null.
     ARROW_RETURN_NOT_OK(cost_components_builder.AppendValues(row.cost_components.data(),
                                                              row.cost_components.size()));
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index b635e5e93455c..7a54dcc86f8fa 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -22,7 +22,7 @@ FROM ${repo}:${arch}-conda
 COPY ci/scripts/install_minio.sh /arrow/ci/scripts
 RUN /arrow/ci/scripts/install_minio.sh latest /opt/conda
 
-# Unless overriden use Python 3.10
+# Unless overridden use Python 3.10
 # Google GCS fails building with Python 3.11 at the moment.
 ARG python=3.10
 
diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile
index 207bea72b5375..8b73c73c1d240 100644
--- a/ci/docker/java-jni-manylinux-201x.dockerfile
+++ b/ci/docker/java-jni-manylinux-201x.dockerfile
@@ -18,7 +18,7 @@
 ARG base
 FROM ${base}
 
-# Install the libaries required by the Gandiva to run
+# Install the libraries required by the Gandiva to run
 # Use enable llvm[enable-rtti] in the vcpkg.json to avoid link problems in Gandiva
 RUN vcpkg install \
         --clean-after-build \
diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index 0f7779c878505..0a50d450c225a 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -73,7 +73,7 @@ ENV CMAKE_BUILD_TYPE=${build_type} \
     VCPKG_FEATURE_FLAGS="manifests"
 COPY ci/vcpkg/vcpkg.json arrow/ci/vcpkg/
 # cannot use the S3 feature here because while aws-sdk-cpp=1.9.160 contains
-# ssl related fixies as well as we can patch the vcpkg portfile to support
+# ssl related fixes as well as we can patch the vcpkg portfile to support
 # arm machines it hits ARROW-15141 where we would need to fall back to 1.8.186
 # but we cannot patch those portfiles since vcpkg-tool handles the checkout of
 # previous versions => use bundled S3 build
diff --git a/ci/docker/python-wheel-windows-vs2017.dockerfile b/ci/docker/python-wheel-windows-vs2017.dockerfile
index b6a8125a5c4fc..faf07800c956a 100644
--- a/ci/docker/python-wheel-windows-vs2017.dockerfile
+++ b/ci/docker/python-wheel-windows-vs2017.dockerfile
@@ -55,7 +55,7 @@ ENV CMAKE_BUILD_TYPE=${build_type} \
     VCPKG_FEATURE_FLAGS="manifests"
 COPY ci/vcpkg/vcpkg.json arrow/ci/vcpkg/
 # cannot use the S3 feature here because while aws-sdk-cpp=1.9.160 contains
-# ssl related fixies as well as we can patch the vcpkg portfile to support
+# ssl related fixes as well as we can patch the vcpkg portfile to support
 # arm machines it hits ARROW-15141 where we would need to fall back to 1.8.186
 # but we cannot patch those portfiles since vcpkg-tool handles the checkout of
 # previous versions => use bundled S3 build
@@ -97,4 +97,4 @@ RUN python -m pip install -r arrow/python/requirements-wheel-build.txt
 
 # For debugging purposes
 # RUN wget --no-check-certificate https://github.com/lucasg/Dependencies/releases/download/v1.10/Dependencies_x64_Release.zip
-# RUN unzip Dependencies_x64_Release.zip -d Dependencies && setx path "%path%;C:\Depencencies"
+# RUN unzip Dependencies_x64_Release.zip -d Dependencies && setx path "%path%;C:\Dependencies"
diff --git a/ci/scripts/install_python.sh b/ci/scripts/install_python.sh
index 8487090f7ad26..5f962f02b911b 100755
--- a/ci/scripts/install_python.sh
+++ b/ci/scripts/install_python.sh
@@ -21,7 +21,7 @@ set -eu
 
 declare -A platforms
 platforms=([windows]=Windows
-           [macos]=MacOSX
+           [macos]=macOS
            [linux]=Linux)
 
 declare -A versions
@@ -43,7 +43,7 @@ platform=${platforms[$1]}
 version=$2
 full_version=${versions[$2]}
 
-if [ $platform = "MacOSX" ]; then
+if [ $platform = "macOS" ]; then
     echo "Downloading Python installer..."
 
     if [ "$(uname -m)" = "arm64" ] || [ "$version" = "3.10" ] || [ "$version" = "3.11" ] || [ "$version" = "3.12" ]; then
diff --git a/ci/scripts/install_sccache.sh b/ci/scripts/install_sccache.sh
index 902fb69ec6b67..0346c0cc9ce7d 100755
--- a/ci/scripts/install_sccache.sh
+++ b/ci/scripts/install_sccache.sh
@@ -56,7 +56,7 @@ if [ ! -d $PREFIX ]; then
     mkdir -p $PREFIX
 fi
 
-# Extract only the sccache binary into $PREFIX and ignore README and LCIENSE.
+# Extract only the sccache binary into $PREFIX and ignore README and LICENSE.
 # --wildcards doesn't work on busybox.
 tar -xzvf $SCCACHE_ARCHIVE --strip-component=1 --directory $PREFIX --exclude="sccache*/*E*E*"
 chmod u+x $PREFIX/sccache
diff --git a/ci/scripts/integration_spark.sh b/ci/scripts/integration_spark.sh
index 6e20e77032952..424ac5994653a 100755
--- a/ci/scripts/integration_spark.sh
+++ b/ci/scripts/integration_spark.sh
@@ -27,7 +27,7 @@ test_pyarrow_only=${3:-false}
 # Spark branch to checkout
 spark_version=${SPARK_VERSION:-master}
 
-# Use old behavior that always dropped tiemzones.
+# Use old behavior that always dropped timezones.
 export PYARROW_IGNORE_TIMEZONE=1
 
 if [ "${SPARK_VERSION:1:2}" == "2." ]; then
diff --git a/ci/scripts/r_valgrind.sh b/ci/scripts/r_valgrind.sh
index fe015cbb0c412..a14cb803ca898 100755
--- a/ci/scripts/r_valgrind.sh
+++ b/ci/scripts/r_valgrind.sh
@@ -33,7 +33,7 @@ ${R_BIN} CMD INSTALL ${INSTALL_ARGS} arrow*.tar.gz
 pushd tests
 
 # to generate suppression files run:
-# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testtthat.supp
+# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testthat.supp
 ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out
 
 # valgrind --error-exitcode=1 should return an erroring exit code that we can catch,

From a5a366d9ca08334d5c01fdc013ae7954a4912c4c Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sun, 10 Dec 2023 06:51:07 -0800
Subject: [PATCH 018/570] GH-32662: [C#] Make dictionaries in file and memory
 implementations work correctly and support integration tests (#39146)

### Rationale for this change

While dictionary support was implemented for C# in #6870 for streams, support did not extend to files or memory buffers. This change rectifies that.

### What changes are included in this PR?

Changes to the memory and file implementations to support reading and writing of dictionaries, including nested dictionaries.
Changes to the integration tests so that they work with dictionaries.
Enabling the dictionary tests in CI.

### Are these changes tested?

Yes, both directly and indirectly via the integration tests.

### Are there any user-facing changes?

No.

* Closes: #32662

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Ipc/ArrowFileReaderImplementation.cs      |  34 +++++
 .../src/Apache.Arrow/Ipc/ArrowFileWriter.cs   |  54 +++++++-
 csharp/src/Apache.Arrow/Ipc/ArrowFooter.cs    |   4 +-
 .../Ipc/ArrowMemoryReaderImplementation.cs    |  60 ++++----
 .../Ipc/ArrowStreamReaderImplementation.cs    | 128 +++++++++++-------
 .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs |  78 +++++------
 csharp/src/Apache.Arrow/Ipc/DictionaryMemo.cs |  10 +-
 .../src/Apache.Arrow/Types/DictionaryType.cs  |   3 +-
 .../ArrowReaderBenchmark.cs                   |   2 +-
 .../IntegrationCommand.cs                     |  23 ++--
 .../Apache.Arrow.IntegrationTest/JsonFile.cs  | 106 +++++++++++++--
 .../ArrowFileReaderTests.cs                   |   6 +-
 csharp/test/Apache.Arrow.Tests/TestData.cs    |   2 +-
 dev/archery/archery/integration/datagen.py    |   5 +-
 .../archery/integration/tester_csharp.py      |   4 +-
 docs/source/status.rst                        |   2 +-
 16 files changed, 352 insertions(+), 169 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
index d88665e496dc9..3ae475885f16a 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
@@ -35,6 +35,8 @@ internal sealed class ArrowFileReaderImplementation : ArrowStreamReaderImplement
 
         private ArrowFooter _footer;
 
+        private bool HasReadDictionaries => HasReadSchema && DictionaryMemo.LoadedDictionaryCount >= _footer.DictionaryCount;
+
         public ArrowFileReaderImplementation(Stream stream, MemoryAllocator allocator, ICompressionCodecFactory compressionCodecFactory, bool leaveOpen)
             : base(stream, allocator, compressionCodecFactory, leaveOpen)
         {
@@ -143,6 +145,7 @@ private void ReadSchema(Memory<byte> buffer)
         public async ValueTask<RecordBatch> ReadRecordBatchAsync(int index, CancellationToken cancellationToken)
         {
             await ReadSchemaAsync().ConfigureAwait(false);
+            await ReadDictionariesAsync(cancellationToken).ConfigureAwait(false);
 
             if (index >= _footer.RecordBatchCount)
             {
@@ -159,6 +162,7 @@ public async ValueTask<RecordBatch> ReadRecordBatchAsync(int index, Cancellation
         public RecordBatch ReadRecordBatch(int index)
         {
             ReadSchema();
+            ReadDictionaries();
 
             if (index >= _footer.RecordBatchCount)
             {
@@ -175,6 +179,7 @@ public RecordBatch ReadRecordBatch(int index)
         public override async ValueTask<RecordBatch> ReadNextRecordBatchAsync(CancellationToken cancellationToken)
         {
             await ReadSchemaAsync().ConfigureAwait(false);
+            await ReadDictionariesAsync(cancellationToken).ConfigureAwait(false);
 
             if (_recordBatchIndex >= _footer.RecordBatchCount)
             {
@@ -190,6 +195,7 @@ public override async ValueTask<RecordBatch> ReadNextRecordBatchAsync(Cancellati
         public override RecordBatch ReadNextRecordBatch()
         {
             ReadSchema();
+            ReadDictionaries();
 
             if (_recordBatchIndex >= _footer.RecordBatchCount)
             {
@@ -202,6 +208,34 @@ public override RecordBatch ReadNextRecordBatch()
             return result;
         }
 
+        private async ValueTask ReadDictionariesAsync(CancellationToken cancellationToken = default)
+        {
+            if (HasReadDictionaries)
+            {
+                return;
+            }
+
+            foreach (Block block in _footer.Dictionaries)
+            {
+                BaseStream.Position = block.Offset;
+                await ReadMessageAsync(cancellationToken);
+            }
+        }
+
+        private void ReadDictionaries()
+        {
+            if (HasReadDictionaries)
+            {
+                return;
+            }
+
+            foreach (Block block in _footer.Dictionaries)
+            {
+                BaseStream.Position = block.Offset;
+                ReadMessage();
+            }
+        }
+
         /// <summary>
         /// Check if file format is valid. If it's valid don't run the validation again.
         /// </summary>
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
index 4fefb121cb669..95b9f60fffe0f 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
@@ -23,10 +23,12 @@
 
 namespace Apache.Arrow.Ipc
 {
-    public class ArrowFileWriter: ArrowStreamWriter
+    public class ArrowFileWriter : ArrowStreamWriter
     {
         private long _currentRecordBatchOffset = -1;
+        private long _currentDictionaryOffset = -1;
 
+        private List<Block> DictionaryBlocks { get; set; }
         private List<Block> RecordBatchBlocks { get; }
 
         public ArrowFileWriter(Stream stream, Schema schema)
@@ -105,6 +107,34 @@ private protected override void FinishedWritingRecordBatch(long bodyLength, long
             _currentRecordBatchOffset = -1;
         }
 
+        private protected override void StartingWritingDictionary()
+        {
+            if (DictionaryBlocks == null) { DictionaryBlocks = new List<Block>(); }
+            _currentDictionaryOffset = BaseStream.Position;
+        }
+
+        private protected override void FinishedWritingDictionary(long bodyLength, long metadataLength)
+        {
+            // Dictionaries only appear after a Schema is written, so the dictionary offsets must
+            // always be greater than 0.
+            Debug.Assert(_currentDictionaryOffset > 0, "_currentDictionaryOffset must be positive.");
+
+            int metadataLengthInt = checked((int)metadataLength);
+
+            Debug.Assert(BitUtility.IsMultipleOf8(_currentDictionaryOffset));
+            Debug.Assert(BitUtility.IsMultipleOf8(metadataLengthInt));
+            Debug.Assert(BitUtility.IsMultipleOf8(bodyLength));
+
+            var block = new Block(
+                offset: _currentDictionaryOffset,
+                length: bodyLength,
+                metadataLength: metadataLengthInt);
+
+            DictionaryBlocks.Add(block);
+
+            _currentDictionaryOffset = -1;
+        }
+
         private protected override void WriteEndInternal()
         {
             base.WriteEndInternal();
@@ -161,9 +191,16 @@ private void WriteFooter(Schema schema)
             Google.FlatBuffers.VectorOffset recordBatchesVectorOffset = Builder.EndVector();
 
             // Serialize all dictionaries
-            // NOTE: Currently unsupported.
 
-            Flatbuf.Footer.StartDictionariesVector(Builder, 0);
+            int dictionaryCount = DictionaryBlocks?.Count ?? 0;
+            Flatbuf.Footer.StartDictionariesVector(Builder, dictionaryCount);
+
+            for (int i = dictionaryCount - 1; i >= 0; i--)
+            {
+                Block dictionary = DictionaryBlocks[i];
+                Flatbuf.Block.CreateBlock(
+                    Builder, dictionary.Offset, dictionary.MetadataLength, dictionary.BodyLength);
+            }
 
             Google.FlatBuffers.VectorOffset dictionaryBatchesOffset = Builder.EndVector();
 
@@ -221,9 +258,16 @@ private async Task WriteFooterAsync(Schema schema, CancellationToken cancellatio
             Google.FlatBuffers.VectorOffset recordBatchesVectorOffset = Builder.EndVector();
 
             // Serialize all dictionaries
-            // NOTE: Currently unsupported.
 
-            Flatbuf.Footer.StartDictionariesVector(Builder, 0);
+            int dictionaryCount = DictionaryBlocks?.Count ?? 0;
+            Flatbuf.Footer.StartDictionariesVector(Builder, dictionaryCount);
+
+            for (int i = dictionaryCount - 1; i >= 0; i--)
+            {
+                Block dictionary = DictionaryBlocks[i];
+                Flatbuf.Block.CreateBlock(
+                    Builder, dictionary.Offset, dictionary.MetadataLength, dictionary.BodyLength);
+            }
 
             Google.FlatBuffers.VectorOffset dictionaryBatchesOffset = Builder.EndVector();
 
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFooter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFooter.cs
index db269ae019b51..600624ef9ef12 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFooter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFooter.cs
@@ -25,8 +25,8 @@ internal class ArrowFooter
         private readonly List<Block> _dictionaries;
         private readonly List<Block> _recordBatches;
 
-        public IEnumerable<Block> Dictionaries => _dictionaries;
-        public IEnumerable<Block> RecordBatches => _recordBatches;
+        public IReadOnlyList<Block> Dictionaries => _dictionaries;
+        public IReadOnlyList<Block> RecordBatches => _recordBatches;
 
         public Block GetRecordBatchBlock(int i) => _recordBatches[i];
 
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowMemoryReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowMemoryReaderImplementation.cs
index af4f963ee520f..6e2336a591bf1 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowMemoryReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowMemoryReaderImplementation.cs
@@ -43,30 +43,17 @@ public override RecordBatch ReadNextRecordBatch()
         {
             ReadSchema();
 
-            if (_buffer.Length <= _bufferPosition + sizeof(int))
+            RecordBatch batch = null;
+            while (batch == null)
             {
-                // reached the end
-                return null;
-            }
-
-            // Get Length of record batch for message header.
-            int messageLength = BinaryPrimitives.ReadInt32LittleEndian(_buffer.Span.Slice(_bufferPosition));
-            _bufferPosition += sizeof(int);
-
-            if (messageLength == 0)
-            {
-                //reached the end
-                return null;
-            }
-            else if (messageLength == MessageSerializer.IpcContinuationToken)
-            {
-                // ARROW-6313, if the first 4 bytes are continuation message, read the next 4 for the length
                 if (_buffer.Length <= _bufferPosition + sizeof(int))
                 {
-                    throw new InvalidDataException("Corrupted IPC message. Received a continuation token at the end of the message.");
+                    // reached the end
+                    return null;
                 }
 
-                messageLength = BinaryPrimitives.ReadInt32LittleEndian(_buffer.Span.Slice(_bufferPosition));
+                // Get Length of record batch for message header.
+                int messageLength = BinaryPrimitives.ReadInt32LittleEndian(_buffer.Span.Slice(_bufferPosition));
                 _bufferPosition += sizeof(int);
 
                 if (messageLength == 0)
@@ -74,17 +61,36 @@ public override RecordBatch ReadNextRecordBatch()
                     //reached the end
                     return null;
                 }
-            }
+                else if (messageLength == MessageSerializer.IpcContinuationToken)
+                {
+                    // ARROW-6313, if the first 4 bytes are continuation message, read the next 4 for the length
+                    if (_buffer.Length <= _bufferPosition + sizeof(int))
+                    {
+                        throw new InvalidDataException("Corrupted IPC message. Received a continuation token at the end of the message.");
+                    }
+
+                    messageLength = BinaryPrimitives.ReadInt32LittleEndian(_buffer.Span.Slice(_bufferPosition));
+                    _bufferPosition += sizeof(int);
+
+                    if (messageLength == 0)
+                    {
+                        //reached the end
+                        return null;
+                    }
+                }
+
+                Message message = Message.GetRootAsMessage(
+                    CreateByteBuffer(_buffer.Slice(_bufferPosition, messageLength)));
+                _bufferPosition += messageLength;
 
-            Message message = Message.GetRootAsMessage(
-                CreateByteBuffer(_buffer.Slice(_bufferPosition, messageLength)));
-            _bufferPosition += messageLength;
+                int bodyLength = (int)message.BodyLength;
+                ByteBuffer bodybb = CreateByteBuffer(_buffer.Slice(_bufferPosition, bodyLength));
+                _bufferPosition += bodyLength;
 
-            int bodyLength = (int)message.BodyLength;
-            ByteBuffer bodybb = CreateByteBuffer(_buffer.Slice(_bufferPosition, bodyLength));
-            _bufferPosition += bodyLength;
+                batch = CreateArrowObjectFromMessage(message, bodybb, memoryOwner: null);
+            }
 
-            return CreateArrowObjectFromMessage(message, bodybb, memoryOwner: null);
+            return batch;
         }
 
         private void ReadSchema()
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
index df80ffe1e0fa5..184e0348e5e07 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
@@ -57,79 +57,93 @@ protected async ValueTask<RecordBatch> ReadRecordBatchAsync(CancellationToken ca
         {
             await ReadSchemaAsync().ConfigureAwait(false);
 
-            RecordBatch result = null;
-
-            while (result == null)
+            ReadResult result = default;
+            do
             {
-                int messageLength = await ReadMessageLengthAsync(throwOnFullRead: false, cancellationToken)
-                    .ConfigureAwait(false);
+                result = await ReadMessageAsync(cancellationToken).ConfigureAwait(false);
+            } while (result.Batch == null && result.MessageLength > 0);
 
-                if (messageLength == 0)
-                {
-                    // reached end
-                    return null;
-                }
+            return result.Batch;
+        }
 
-                await ArrayPool<byte>.Shared.RentReturnAsync(messageLength, async (messageBuff) =>
-                {
-                    int bytesRead = await BaseStream.ReadFullBufferAsync(messageBuff, cancellationToken)
-                        .ConfigureAwait(false);
-                    EnsureFullRead(messageBuff, bytesRead);
+        protected async ValueTask<ReadResult> ReadMessageAsync(CancellationToken cancellationToken)
+        {
+            int messageLength = await ReadMessageLengthAsync(throwOnFullRead: false, cancellationToken)
+                .ConfigureAwait(false);
 
-                    Flatbuf.Message message = Flatbuf.Message.GetRootAsMessage(CreateByteBuffer(messageBuff));
+            if (messageLength == 0)
+            {
+                // reached end
+                return default;
+            }
 
-                    int bodyLength = checked((int)message.BodyLength);
+            RecordBatch result = null;
+            await ArrayPool<byte>.Shared.RentReturnAsync(messageLength, async (messageBuff) =>
+            {
+                int bytesRead = await BaseStream.ReadFullBufferAsync(messageBuff, cancellationToken)
+                    .ConfigureAwait(false);
+                EnsureFullRead(messageBuff, bytesRead);
 
-                    IMemoryOwner<byte> bodyBuffOwner = _allocator.Allocate(bodyLength);
-                    Memory<byte> bodyBuff = bodyBuffOwner.Memory.Slice(0, bodyLength);
-                    bytesRead = await BaseStream.ReadFullBufferAsync(bodyBuff, cancellationToken)
-                        .ConfigureAwait(false);
-                    EnsureFullRead(bodyBuff, bytesRead);
+                Flatbuf.Message message = Flatbuf.Message.GetRootAsMessage(CreateByteBuffer(messageBuff));
 
-                    Google.FlatBuffers.ByteBuffer bodybb = CreateByteBuffer(bodyBuff);
-                    result = CreateArrowObjectFromMessage(message, bodybb, bodyBuffOwner);
-                }).ConfigureAwait(false);
-            }
+                int bodyLength = checked((int)message.BodyLength);
+
+                IMemoryOwner<byte> bodyBuffOwner = _allocator.Allocate(bodyLength);
+                Memory<byte> bodyBuff = bodyBuffOwner.Memory.Slice(0, bodyLength);
+                bytesRead = await BaseStream.ReadFullBufferAsync(bodyBuff, cancellationToken)
+                    .ConfigureAwait(false);
+                EnsureFullRead(bodyBuff, bytesRead);
+
+                Google.FlatBuffers.ByteBuffer bodybb = CreateByteBuffer(bodyBuff);
+                result = CreateArrowObjectFromMessage(message, bodybb, bodyBuffOwner);
+            }).ConfigureAwait(false);
 
-            return result;
+            return new ReadResult(messageLength, result);
         }
 
         protected RecordBatch ReadRecordBatch()
         {
             ReadSchema();
 
-            RecordBatch result = null;
-
-            while (result == null)
+            ReadResult result = default;
+            do
             {
-                int messageLength = ReadMessageLength(throwOnFullRead: false);
+                result = ReadMessage();
+            } while (result.Batch == null && result.MessageLength > 0);
 
-                if (messageLength == 0)
-                {
-                    // reached end
-                    return null;
-                }
+            return result.Batch;
+        }
 
-                ArrayPool<byte>.Shared.RentReturn(messageLength, messageBuff =>
-                {
-                    int bytesRead = BaseStream.ReadFullBuffer(messageBuff);
-                    EnsureFullRead(messageBuff, bytesRead);
+        protected ReadResult ReadMessage()
+        {
+            int messageLength = ReadMessageLength(throwOnFullRead: false);
 
-                    Flatbuf.Message message = Flatbuf.Message.GetRootAsMessage(CreateByteBuffer(messageBuff));
+            if (messageLength == 0)
+            {
+                // reached end
+                return default;
+            }
 
-                    int bodyLength = checked((int)message.BodyLength);
+            RecordBatch result = null;
+            ArrayPool<byte>.Shared.RentReturn(messageLength, messageBuff =>
+            {
+                int bytesRead = BaseStream.ReadFullBuffer(messageBuff);
+                EnsureFullRead(messageBuff, bytesRead);
 
-                    IMemoryOwner<byte> bodyBuffOwner = _allocator.Allocate(bodyLength);
-                    Memory<byte> bodyBuff = bodyBuffOwner.Memory.Slice(0, bodyLength);
-                    bytesRead = BaseStream.ReadFullBuffer(bodyBuff);
-                    EnsureFullRead(bodyBuff, bytesRead);
+                Flatbuf.Message message = Flatbuf.Message.GetRootAsMessage(CreateByteBuffer(messageBuff));
 
-                    Google.FlatBuffers.ByteBuffer bodybb = CreateByteBuffer(bodyBuff);
-                    result = CreateArrowObjectFromMessage(message, bodybb, bodyBuffOwner);
-                });
-            }
+                int bodyLength = checked((int)message.BodyLength);
+
+                IMemoryOwner<byte> bodyBuffOwner = _allocator.Allocate(bodyLength);
+                Memory<byte> bodyBuff = bodyBuffOwner.Memory.Slice(0, bodyLength);
+                bytesRead = BaseStream.ReadFullBuffer(bodyBuff);
+                EnsureFullRead(bodyBuff, bytesRead);
 
-            return result;
+                Google.FlatBuffers.ByteBuffer bodybb = CreateByteBuffer(bodyBuff);
+                result = CreateArrowObjectFromMessage(message, bodybb, bodyBuffOwner);
+            });
+
+            return new ReadResult(messageLength, result);
         }
 
         protected virtual async ValueTask ReadSchemaAsync()
@@ -264,5 +278,17 @@ internal static void EnsureFullRead(Memory<byte> buffer, int bytesRead)
                 throw new InvalidOperationException("Unexpectedly reached the end of the stream before a full buffer was read.");
             }
         }
+
+        internal struct ReadResult
+        {
+            public readonly int MessageLength;
+            public readonly RecordBatch Batch;
+
+            public ReadResult(int messageLength, RecordBatch batch)
+            {
+                MessageLength = messageLength;
+                Batch = batch;
+            }
+        }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index dcb8852bc1f65..d4e8bb48df4e1 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -270,7 +270,6 @@ public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen, IpcOp
             _options = options ?? IpcOptions.Default;
         }
 
-
         private void CreateSelfAndChildrenFieldNodes(ArrayData data)
         {
             if (data.DataType is NestedType)
@@ -319,7 +318,7 @@ private protected void WriteRecordBatchInternal(RecordBatch recordBatch)
             if (!HasWrittenDictionaryBatch)
             {
                 DictionaryCollector.Collect(recordBatch, ref _dictionaryMemo);
-                WriteDictionaries(recordBatch);
+                WriteDictionaries(_dictionaryMemo);
                 HasWrittenDictionaryBatch = true;
             }
 
@@ -358,7 +357,7 @@ private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBat
             if (!HasWrittenDictionaryBatch)
             {
                 DictionaryCollector.Collect(recordBatch, ref _dictionaryMemo);
-                await WriteDictionariesAsync(recordBatch, cancellationToken).ConfigureAwait(false);
+                await WriteDictionariesAsync(_dictionaryMemo, cancellationToken).ConfigureAwait(false);
                 HasWrittenDictionaryBatch = true;
             }
 
@@ -492,74 +491,65 @@ private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingR
             return Tuple.Create(recordBatchBuilder, fieldNodesVectorOffset);
         }
 
+        private protected virtual void StartingWritingDictionary()
+        {
+        }
 
-        private protected void WriteDictionaries(RecordBatch recordBatch)
+        private protected virtual void FinishedWritingDictionary(long bodyLength, long metadataLength)
         {
-            foreach (Field field in recordBatch.Schema.FieldsList)
-            {
-                WriteDictionary(field);
-            }
         }
 
-        private protected void WriteDictionary(Field field)
+        private protected void WriteDictionaries(DictionaryMemo dictionaryMemo)
         {
-            if (field.DataType.TypeId != ArrowTypeId.Dictionary)
+            int fieldCount = dictionaryMemo?.DictionaryCount ?? 0;
+            for (int i = 0; i < fieldCount; i++)
             {
-                if (field.DataType is NestedType nestedType)
-                {
-                    foreach (Field child in nestedType.Fields)
-                    {
-                        WriteDictionary(child);
-                    }
-                }
-                return;
+                WriteDictionary(i, dictionaryMemo.GetDictionaryType(i), dictionaryMemo.GetDictionary(i));
             }
+        }
+
+        private protected void WriteDictionary(long id, IArrowType valueType, IArrowArray dictionary)
+        {
+            StartingWritingDictionary();
 
             (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, Offset<Flatbuf.DictionaryBatch> dictionaryBatchOffset) =
-                CreateDictionaryBatchOffset(field);
+                CreateDictionaryBatchOffset(id, valueType, dictionary);
 
-            WriteMessage(Flatbuf.MessageHeader.DictionaryBatch,
+            long metadataLength = WriteMessage(Flatbuf.MessageHeader.DictionaryBatch,
                 dictionaryBatchOffset, recordBatchBuilder.TotalLength);
 
-            WriteBufferData(recordBatchBuilder.Buffers);
+            long bufferLength = WriteBufferData(recordBatchBuilder.Buffers);
+
+            FinishedWritingDictionary(bufferLength, metadataLength);
         }
 
-        private protected async Task WriteDictionariesAsync(RecordBatch recordBatch, CancellationToken cancellationToken)
+        private protected async Task WriteDictionariesAsync(DictionaryMemo dictionaryMemo, CancellationToken cancellationToken)
         {
-            foreach (Field field in recordBatch.Schema.FieldsList)
+            int fieldCount = dictionaryMemo?.DictionaryCount ?? 0;
+            for (int i = 0; i < fieldCount; i++)
             {
-                await WriteDictionaryAsync(field, cancellationToken).ConfigureAwait(false);
+                await WriteDictionaryAsync(i, dictionaryMemo.GetDictionaryType(i), dictionaryMemo.GetDictionary(i), cancellationToken).ConfigureAwait(false);
             }
         }
 
-        private protected async Task WriteDictionaryAsync(Field field, CancellationToken cancellationToken)
+        private protected async Task WriteDictionaryAsync(long id, IArrowType valueType, IArrowArray dictionary, CancellationToken cancellationToken)
         {
-            if (field.DataType.TypeId != ArrowTypeId.Dictionary)
-            {
-                if (field.DataType is NestedType nestedType)
-                {
-                    foreach (Field child in nestedType.Fields)
-                    {
-                        await WriteDictionaryAsync(child, cancellationToken).ConfigureAwait(false);
-                    }
-                }
-                return;
-            }
+            StartingWritingDictionary();
 
             (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, Offset<Flatbuf.DictionaryBatch> dictionaryBatchOffset) =
-                CreateDictionaryBatchOffset(field);
+                CreateDictionaryBatchOffset(id, valueType, dictionary);
 
-            await WriteMessageAsync(Flatbuf.MessageHeader.DictionaryBatch,
+            long metadataLength = await WriteMessageAsync(Flatbuf.MessageHeader.DictionaryBatch,
                 dictionaryBatchOffset, recordBatchBuilder.TotalLength, cancellationToken).ConfigureAwait(false);
 
-            await WriteBufferDataAsync(recordBatchBuilder.Buffers, cancellationToken).ConfigureAwait(false);
+            long bufferLength = await WriteBufferDataAsync(recordBatchBuilder.Buffers, cancellationToken).ConfigureAwait(false);
+
+            FinishedWritingDictionary(bufferLength, metadataLength);
         }
 
-        private Tuple<ArrowRecordBatchFlatBufferBuilder, Offset<Flatbuf.DictionaryBatch>> CreateDictionaryBatchOffset(Field field)
+        private Tuple<ArrowRecordBatchFlatBufferBuilder, Offset<Flatbuf.DictionaryBatch>> CreateDictionaryBatchOffset(long id, IArrowType valueType, IArrowArray dictionary)
         {
-            Field dictionaryField = new Field("dummy", ((DictionaryType)field.DataType).ValueType, false);
-            long id = DictionaryMemo.GetId(field);
-            IArrowArray dictionary = DictionaryMemo.GetDictionary(id);
+            Field dictionaryField = new Field("dummy", valueType, false);
 
             var fields = new Field[] { dictionaryField };
 
@@ -987,12 +977,12 @@ private static void CollectDictionary(Field field, ArrayData arrayData, ref Dict
                 arrayData.Dictionary.EnsureDataType(dictionaryType.ValueType.TypeId);
 
                 IArrowArray dictionary = ArrowArrayFactory.BuildArray(arrayData.Dictionary);
+                WalkChildren(dictionary.Data, ref dictionaryMemo);
 
                 dictionaryMemo ??= new DictionaryMemo();
                 long id = dictionaryMemo.GetOrAssignId(field);
 
                 dictionaryMemo.AddOrReplaceDictionary(id, dictionary);
-                WalkChildren(dictionary.Data, ref dictionaryMemo);
             }
             else
             {
diff --git a/csharp/src/Apache.Arrow/Ipc/DictionaryMemo.cs b/csharp/src/Apache.Arrow/Ipc/DictionaryMemo.cs
index 24f25a142966c..b107cc65bfac5 100644
--- a/csharp/src/Apache.Arrow/Ipc/DictionaryMemo.cs
+++ b/csharp/src/Apache.Arrow/Ipc/DictionaryMemo.cs
@@ -33,6 +33,9 @@ public DictionaryMemo()
             _fieldToId = new Dictionary<Field, long>();
         }
 
+        public int DictionaryCount => _fieldToId.Count;
+        public int LoadedDictionaryCount => _idToDictionary.Count;
+
         public IArrowType GetDictionaryType(long id)
         {
             if (!_idToValueType.TryGetValue(id, out IArrowType type))
@@ -72,9 +75,12 @@ public void AddField(long id, Field field)
                     throw new ArgumentException($"Field type {field.DataType.Name} does not match the existing type {valueTypeInDic})");
                 }
             }
+            else
+            {
+                _idToValueType.Add(id, valueType);
+            }
 
             _fieldToId.Add(field, id);
-            _idToValueType.Add(id, valueType);
         }
 
         public long GetId(Field field)
@@ -90,7 +96,7 @@ public long GetOrAssignId(Field field)
         {
             if (!_fieldToId.TryGetValue(field, out long id))
             {
-                id = _fieldToId.Count + 1;
+                id = _fieldToId.Count;
                 AddField(id, field);
             }
             return id;
diff --git a/csharp/src/Apache.Arrow/Types/DictionaryType.cs b/csharp/src/Apache.Arrow/Types/DictionaryType.cs
index 5c1dd4095eb16..6316578aa6a5d 100644
--- a/csharp/src/Apache.Arrow/Types/DictionaryType.cs
+++ b/csharp/src/Apache.Arrow/Types/DictionaryType.cs
@@ -20,6 +20,7 @@ namespace Apache.Arrow.Types
 {
     public sealed class DictionaryType : FixedWidthType
     {
+        [Obsolete]
         public static readonly DictionaryType Default = new DictionaryType(Int64Type.Default, Int64Type.Default, false);
 
         public DictionaryType(IArrowType indexType, IArrowType valueType, bool ordered)
@@ -36,7 +37,7 @@ public DictionaryType(IArrowType indexType, IArrowType valueType, bool ordered)
 
         public override ArrowTypeId TypeId => ArrowTypeId.Dictionary;
         public override string Name => "dictionary";
-        public override int BitWidth => 64;
+        public override int BitWidth => ((IntegerType)IndexType).BitWidth;
         public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
 
         public IArrowType IndexType { get; private set; }
diff --git a/csharp/test/Apache.Arrow.Benchmarks/ArrowReaderBenchmark.cs b/csharp/test/Apache.Arrow.Benchmarks/ArrowReaderBenchmark.cs
index 4e491a2a6b128..cd8198d434cc7 100644
--- a/csharp/test/Apache.Arrow.Benchmarks/ArrowReaderBenchmark.cs
+++ b/csharp/test/Apache.Arrow.Benchmarks/ArrowReaderBenchmark.cs
@@ -38,7 +38,7 @@ public class ArrowReaderBenchmark
         [GlobalSetup]
         public async Task GlobalSetup()
         {
-            RecordBatch batch = TestData.CreateSampleRecordBatch(length: Count);
+            RecordBatch batch = TestData.CreateSampleRecordBatch(length: Count, createDictionaryArray: false);
             _memoryStream = new MemoryStream();
 
             ArrowStreamWriter writer = new ArrowStreamWriter(_memoryStream, batch.Schema);
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
index d19d19f1ce7c1..6a1e91240989b 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
@@ -14,14 +14,8 @@
 // limitations under the License.
 
 using System;
-using System.Collections.Generic;
-using System.Globalization;
 using System.IO;
-using System.Numerics;
-using System.Text;
-using System.Text.Json;
 using System.Threading.Tasks;
-using Apache.Arrow.Arrays;
 using Apache.Arrow.Ipc;
 using Apache.Arrow.Tests;
 using Apache.Arrow.Types;
@@ -49,6 +43,7 @@ public async Task<int> Execute()
                 "json-to-arrow" => JsonToArrow,
                 "stream-to-file" => StreamToFile,
                 "file-to-stream" => FileToStream,
+                "round-trip-json-arrow" => RoundTripJsonArrow,
                 _ => () =>
                 {
                     Console.WriteLine($"Mode '{Mode}' is not supported.");
@@ -58,6 +53,14 @@ public async Task<int> Execute()
             return await commandDelegate();
         }
 
+        private async Task<int> RoundTripJsonArrow()
+        {
+            int status = await JsonToArrow();
+            if (status != 0) { return status; }
+
+            return await Validate();
+        }
+
         private async Task<int> Validate()
         {
             JsonFile jsonFile = await ParseJsonFile();
@@ -72,7 +75,7 @@ private async Task<int> Validate()
                 return -1;
             }
 
-            Schema jsonFileSchema = jsonFile.Schema.ToArrow();
+            Schema jsonFileSchema = jsonFile.GetSchemaAndDictionaries(out Func<DictionaryType, IArrowArray> dictionaries);
             Schema arrowFileSchema = reader.Schema;
 
             SchemaComparer.Compare(jsonFileSchema, arrowFileSchema);
@@ -80,7 +83,7 @@ private async Task<int> Validate()
             for (int i = 0; i < batchCount; i++)
             {
                 RecordBatch arrowFileRecordBatch = reader.ReadNextRecordBatch();
-                RecordBatch jsonFileRecordBatch = jsonFile.Batches[i].ToArrow(jsonFileSchema);
+                RecordBatch jsonFileRecordBatch = jsonFile.Batches[i].ToArrow(jsonFileSchema, dictionaries);
 
                 ArrowReaderVerifier.CompareBatches(jsonFileRecordBatch, arrowFileRecordBatch, strictCompare: false);
             }
@@ -98,7 +101,7 @@ private async Task<int> Validate()
         private async Task<int> JsonToArrow()
         {
             JsonFile jsonFile = await ParseJsonFile();
-            Schema schema = jsonFile.Schema.ToArrow();
+            Schema schema = jsonFile.GetSchemaAndDictionaries(out Func<DictionaryType, IArrowArray> dictionaries);
 
             using (FileStream fs = ArrowFileInfo.Create())
             {
@@ -107,7 +110,7 @@ private async Task<int> JsonToArrow()
 
                 foreach (var jsonRecordBatch in jsonFile.Batches)
                 {
-                    RecordBatch batch = jsonRecordBatch.ToArrow(schema);
+                    RecordBatch batch = jsonRecordBatch.ToArrow(schema, dictionaries);
                     await writer.WriteRecordBatchAsync(batch);
                 }
                 await writer.WriteEndAsync();
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
index 987a236a10191..bdb9e2682bb01 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
@@ -15,9 +15,9 @@
 
 using System;
 using System.Collections.Generic;
-using System.Diagnostics;
 using System.Globalization;
 using System.IO;
+using System.Linq;
 using System.Numerics;
 using System.Text;
 using System.Text.Json;
@@ -31,8 +31,10 @@ namespace Apache.Arrow.IntegrationTest
     public class JsonFile
     {
         public JsonSchema Schema { get; set; }
+
+        public List<JsonDictionary> Dictionaries { get; set; }
+
         public List<JsonRecordBatch> Batches { get; set; }
-        //public List<DictionaryBatch> Dictionaries {get;set;}
 
         public static async ValueTask<JsonFile> ParseAsync(FileInfo fileInfo)
         {
@@ -48,6 +50,33 @@ public static JsonFile Parse(FileInfo fileInfo)
             return JsonSerializer.Deserialize<JsonFile>(fileStream, options);
         }
 
+        public Schema GetSchemaAndDictionaries(out Func<DictionaryType, IArrowArray> dictionaries)
+        {
+            Schema schema = Schema.ToArrow(out Dictionary<DictionaryType, int> dictionaryIndexes);
+
+            Func<DictionaryType, IArrowArray> lookup = null;
+            lookup = type => Dictionaries.Single(d => d.Id == dictionaryIndexes[type]).Data.ToArrow(type.ValueType, lookup);
+            dictionaries = lookup;
+
+            return schema;
+        }
+
+        /// <summary>
+        /// Return both the schema and a specific batch number.
+        /// This method is used by C Data Interface integration testing.
+        /// </summary>
+        public Schema ToArrow(int batchNumber, out RecordBatch batch)
+        {
+            Schema schema = Schema.ToArrow(out Dictionary<DictionaryType, int> dictionaryIndexes);
+
+            Func<DictionaryType, IArrowArray> lookup = null;
+            lookup = type => Dictionaries.Single(d => d.Id == dictionaryIndexes[type]).Data.ToArrow(type.ValueType, lookup);
+
+            batch = Batches[batchNumber].ToArrow(schema, lookup);
+
+            return schema;
+        }
+
         private static JsonSerializerOptions GetJsonOptions()
         {
             JsonSerializerOptions options = new JsonSerializerOptions()
@@ -67,22 +96,33 @@ public class JsonSchema
         /// <summary>
         /// Decode this JSON schema as a Schema instance.
         /// </summary>
+        public Schema ToArrow(out Dictionary<DictionaryType, int> dictionaryIndexes)
+        {
+            dictionaryIndexes = new Dictionary<DictionaryType, int>();
+            return CreateSchema(this, dictionaryIndexes);
+        }
+
+        /// <summary>
+        /// Decode this JSON schema as a Schema instance without computing dictionaries.
+        /// This method is used by C Data Interface integration testing.
+        /// </summary>
         public Schema ToArrow()
         {
-            return CreateSchema(this);
+            Dictionary<DictionaryType, int> dictionaryIndexes = new Dictionary<DictionaryType, int>();
+            return CreateSchema(this, dictionaryIndexes);
         }
 
-        private static Schema CreateSchema(JsonSchema jsonSchema)
+        private static Schema CreateSchema(JsonSchema jsonSchema, Dictionary<DictionaryType, int> dictionaryIndexes)
         {
             Schema.Builder builder = new Schema.Builder();
             for (int i = 0; i < jsonSchema.Fields.Count; i++)
             {
-                builder.Field(f => CreateField(f, jsonSchema.Fields[i]));
+                builder.Field(f => CreateField(f, jsonSchema.Fields[i], dictionaryIndexes));
             }
             return builder.Build();
         }
 
-        private static void CreateField(Field.Builder builder, JsonField jsonField)
+        private static void CreateField(Field.Builder builder, JsonField jsonField, Dictionary<DictionaryType, int> dictionaryIndexes)
         {
             Field[] children = null;
             if (jsonField.Children?.Count > 0)
@@ -91,13 +131,26 @@ private static void CreateField(Field.Builder builder, JsonField jsonField)
                 for (int i = 0; i < jsonField.Children.Count; i++)
                 {
                     Field.Builder field = new Field.Builder();
-                    CreateField(field, jsonField.Children[i]);
+                    CreateField(field, jsonField.Children[i], dictionaryIndexes);
                     children[i] = field.Build();
                 }
             }
 
+            IArrowType type = ToArrowType(jsonField.Type, children);
+
+            if (jsonField.Dictionary != null)
+            {
+                DictionaryType dictType = new DictionaryType(
+                    ToArrowType(jsonField.Dictionary.IndexType, new Field[0]),
+                    type,
+                    jsonField.Dictionary.IsOrdered);
+
+                dictionaryIndexes[dictType] = jsonField.Dictionary.Id;
+                type = dictType;
+            }
+
             builder.Name(jsonField.Name)
-                .DataType(ToArrowType(jsonField.Type, children))
+                .DataType(type)
                 .Nullable(jsonField.Nullable);
 
             if (jsonField.Metadata != null)
@@ -300,10 +353,18 @@ public class JsonArrowType
     public class JsonDictionaryIndex
     {
         public int Id { get; set; }
-        public JsonArrowType Type { get; set; }
+        public JsonArrowType IndexType { get; set; }
         public bool IsOrdered { get; set; }
     }
 
+    public class JsonDictionary
+    {
+        public int Id { get; set; }
+
+        [JsonPropertyName("data")]
+        public JsonRecordBatch Data { get; set; }
+    }
+
     public class JsonMetadata : List<KeyValuePair<string, string>>
     {
     }
@@ -316,12 +377,19 @@ public class JsonRecordBatch
         /// <summary>
         /// Decode this JSON record batch as a RecordBatch instance.
         /// </summary>
-        public RecordBatch ToArrow(Schema schema)
+        public RecordBatch ToArrow(Schema schema, Func<DictionaryType, IArrowArray> dictionaries)
+        {
+            return CreateRecordBatch(schema, dictionaries, this);
+        }
+
+        public IArrowArray ToArrow(IArrowType arrowType, Func<DictionaryType, IArrowArray> dictionaries)
         {
-            return CreateRecordBatch(schema, this);
+            ArrayCreator creator = new ArrayCreator(this.Columns[0], dictionaries);
+            arrowType.Accept(creator);
+            return creator.Array;
         }
 
-        private RecordBatch CreateRecordBatch(Schema schema, JsonRecordBatch jsonRecordBatch)
+        private RecordBatch CreateRecordBatch(Schema schema, Func<DictionaryType, IArrowArray> dictionaries, JsonRecordBatch jsonRecordBatch)
         {
             if (schema.FieldsList.Count != jsonRecordBatch.Columns.Count)
             {
@@ -333,7 +401,7 @@ private RecordBatch CreateRecordBatch(Schema schema, JsonRecordBatch jsonRecordB
             {
                 JsonFieldData data = jsonRecordBatch.Columns[i];
                 Field field = schema.FieldsList[i];
-                ArrayCreator creator = new ArrayCreator(data);
+                ArrayCreator creator = new ArrayCreator(data, dictionaries);
                 field.DataType.Accept(creator);
                 arrays.Add(creator.Array);
             }
@@ -369,14 +437,18 @@ private class ArrayCreator :
             IArrowTypeVisitor<StructType>,
             IArrowTypeVisitor<UnionType>,
             IArrowTypeVisitor<MapType>,
+            IArrowTypeVisitor<DictionaryType>,
             IArrowTypeVisitor<NullType>
         {
             private JsonFieldData JsonFieldData { get; set; }
             public IArrowArray Array { get; private set; }
 
-            public ArrayCreator(JsonFieldData jsonFieldData)
+            private readonly Func<DictionaryType, IArrowArray> dictionaries;
+
+            public ArrayCreator(JsonFieldData jsonFieldData, Func<DictionaryType, IArrowArray> dictionaries)
             {
                 JsonFieldData = jsonFieldData;
+                this.dictionaries = dictionaries;
             }
 
             public void Visit(BooleanType type)
@@ -656,6 +728,12 @@ public void Visit(MapType type)
                 Array = new MapArray(arrayData);
             }
 
+            public void Visit(DictionaryType type)
+            {
+                type.IndexType.Accept(this);
+                Array = new DictionaryArray(type, Array, this.dictionaries(type));
+            }
+
             private ArrayData[] GetChildren(NestedType type)
             {
                 ArrayData[] children = new ArrayData[type.Fields.Count];
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowFileReaderTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowFileReaderTests.cs
index 2f2229ded4c46..585b1acc27f17 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowFileReaderTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowFileReaderTests.cs
@@ -66,7 +66,7 @@ public async Task Ctor_MemoryPool_AllocatesFromPool(bool shouldLeaveOpen)
                 ArrowFileReader reader = new ArrowFileReader(stream, memoryPool, leaveOpen: shouldLeaveOpen);
                 reader.ReadNextRecordBatch();
 
-                Assert.Equal(1, memoryPool.Statistics.Allocations);
+                Assert.Equal(2, memoryPool.Statistics.Allocations);
                 Assert.True(memoryPool.Statistics.BytesAllocated > 0);
 
                 reader.Dispose();
@@ -132,8 +132,8 @@ private static async Task TestReadRecordBatchHelper(
         [Fact]
         public async Task TestReadMultipleRecordBatchAsync()
         {
-            RecordBatch originalBatch1 = TestData.CreateSampleRecordBatch(length: 100);
-            RecordBatch originalBatch2 = TestData.CreateSampleRecordBatch(length: 50);
+            RecordBatch originalBatch1 = TestData.CreateSampleRecordBatch(length: 100, createDictionaryArray: false);
+            RecordBatch originalBatch2 = TestData.CreateSampleRecordBatch(length: 50, createDictionaryArray: false);
 
             using (MemoryStream stream = new MemoryStream())
             {
diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs
index 3af6efb97b437..79e886f0deabb 100644
--- a/csharp/test/Apache.Arrow.Tests/TestData.cs
+++ b/csharp/test/Apache.Arrow.Tests/TestData.cs
@@ -23,7 +23,7 @@ namespace Apache.Arrow.Tests
 {
     public static class TestData
     {
-        public static RecordBatch CreateSampleRecordBatch(int length, bool createDictionaryArray = false)
+        public static RecordBatch CreateSampleRecordBatch(int length, bool createDictionaryArray = true)
         {
             return CreateSampleRecordBatch(length, columnSetCount: 1, createDictionaryArray);
         }
diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 80cc1c1e76425..341b48117ab80 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1836,15 +1836,12 @@ def _temp_path():
         .skip_tester('C#')
         .skip_tester('JS'),
 
-        generate_dictionary_case()
-        .skip_tester('C#'),
+        generate_dictionary_case(),
 
         generate_dictionary_unsigned_case()
-        .skip_tester('C#')
         .skip_tester('Java'),  # TODO(ARROW-9377)
 
         generate_nested_dictionary_case()
-        .skip_tester('C#')
         .skip_tester('Java'),  # TODO(ARROW-7779)
 
         generate_run_end_encoded_case()
diff --git a/dev/archery/archery/integration/tester_csharp.py b/dev/archery/archery/integration/tester_csharp.py
index 4f7765641130d..9aab5b0b28ef9 100644
--- a/dev/archery/archery/integration/tester_csharp.py
+++ b/dev/archery/archery/integration/tester_csharp.py
@@ -78,9 +78,7 @@ def _pointer_to_int(self, c_ptr):
     def _read_batch_from_json(self, json_path, num_batch):
         from Apache.Arrow.IntegrationTest import CDataInterface
 
-        jf = CDataInterface.ParseJsonFile(json_path)
-        schema = jf.Schema.ToArrow()
-        return schema, jf.Batches[num_batch].ToArrow(schema)
+        return CDataInterface.ParseJsonFile(json_path).ToArrow(num_batch)
 
     def _run_gc(self):
         from Apache.Arrow.IntegrationTest import CDataInterface
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 6167d3037ba77..140e15f44cbca 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -100,7 +100,7 @@ Data Types
 | Data type         | C++   | Java  | Go    | JavaScript | C#    | Rust  | Julia | Swift |
 | (special)         |       |       |       |            |       |       |       |       |
 +===================+=======+=======+=======+============+=======+=======+=======+=======+
-| Dictionary        | ✓     | ✓ (3) | ✓     | ✓          | ✓ (3) | ✓ (3) | ✓     |       |
+| Dictionary        | ✓     | ✓ (3) | ✓     | ✓          | ✓     | ✓ (3) | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Extension         | ✓     | ✓     | ✓     |            |       | ✓     | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+

From 72cf412065a16f430551a51db4c2ecbab5cb700b Mon Sep 17 00:00:00 2001
From: Josh Soref <2119212+jsoref@users.noreply.github.com>
Date: Sun, 10 Dec 2023 10:19:18 -0500
Subject: [PATCH 019/570] GH-38956: [Gandiva] Fix spelling (#38957)

### Rationale for this change

### What changes are included in this PR?

Spelling fixes to cpp/src/gandiva/

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #38956

Authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/gandiva/date_utils.cc                 |  2 +-
 cpp/src/gandiva/expr_decomposer.cc            |  2 +-
 cpp/src/gandiva/expr_decomposer_test.cc       |  6 ++--
 cpp/src/gandiva/expr_validator.cc             |  2 +-
 cpp/src/gandiva/expression_cache_key.h        | 22 +++++++-------
 cpp/src/gandiva/gdv_function_stubs.cc         | 16 +++++-----
 cpp/src/gandiva/gdv_function_stubs.h          |  5 ++--
 cpp/src/gandiva/gdv_function_stubs_test.cc    |  8 ++---
 cpp/src/gandiva/gdv_string_function_stubs.cc  |  4 +--
 cpp/src/gandiva/hash_utils_test.cc            |  8 ++---
 cpp/src/gandiva/interval_holder.cc            |  6 ++--
 cpp/src/gandiva/interval_holder.h             | 14 ++++-----
 cpp/src/gandiva/llvm_generator.h              |  2 +-
 cpp/src/gandiva/precompiled/decimal_ops.cc    |  4 +--
 cpp/src/gandiva/precompiled/string_ops.cc     |  8 ++---
 .../gandiva/precompiled/string_ops_test.cc    | 30 +++++++++----------
 16 files changed, 70 insertions(+), 69 deletions(-)

diff --git a/cpp/src/gandiva/date_utils.cc b/cpp/src/gandiva/date_utils.cc
index f0a80d3c95921..9d9f500f1262e 100644
--- a/cpp/src/gandiva/date_utils.cc
+++ b/cpp/src/gandiva/date_utils.cc
@@ -26,7 +26,7 @@
 namespace gandiva {
 
 std::vector<std::string> DateUtils::GetMatches(std::string pattern, bool exactMatch) {
-  // we are case insensitive
+  // we are case-insensitive
   std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
   std::vector<std::string> matches;
 
diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc
index 42566ca035159..df8eed5fd6316 100644
--- a/cpp/src/gandiva/expr_decomposer.cc
+++ b/cpp/src/gandiva/expr_decomposer.cc
@@ -248,7 +248,7 @@ Status ExprDecomposer::Visit(const LiteralNode& node) {
   return Status::OK();
 }
 
-// The bolow functions use a stack to detect :
+// The below functions use a stack to detect :
 // a. nested if-else expressions.
 //    In such cases,  the local bitmap can be re-used.
 // b. detect terminal else expressions
diff --git a/cpp/src/gandiva/expr_decomposer_test.cc b/cpp/src/gandiva/expr_decomposer_test.cc
index 7681d9e646297..194c13bc82c86 100644
--- a/cpp/src/gandiva/expr_decomposer_test.cc
+++ b/cpp/src/gandiva/expr_decomposer_test.cc
@@ -333,7 +333,7 @@ TEST_F(TestExprDecomposer, TestComplexIfCondition) {
 
       int idx_cond_a_inner_if = decomposer.PushThenEntry(cond_node_a_inner_if, true);
       EXPECT_EQ(idx_cond_a_inner_if,
-                0);  // expect bitmap to be resused since nested if else
+                0);  // expect bitmap to be reused since nested if else
       decomposer.PopThenEntry(cond_node_a_inner_if);
 
       decomposer.PushElseEntry(cond_node_a_inner_if, idx_cond_a_inner_if);
@@ -363,7 +363,7 @@ TEST_F(TestExprDecomposer, TestComplexIfCondition) {
 
       int idx_then_a_inner_if = decomposer.PushThenEntry(then_node_a_inner_if, true);
       EXPECT_EQ(idx_then_a_inner_if,
-                2);  // expect bitmap to be resused since nested if else
+                2);  // expect bitmap to be reused since nested if else
       decomposer.PopThenEntry(then_node_a_inner_if);
 
       decomposer.PushElseEntry(then_node_a_inner_if, idx_then_a_inner_if);
@@ -392,7 +392,7 @@ TEST_F(TestExprDecomposer, TestComplexIfCondition) {
 
       int idx_else_a_inner_if = decomposer.PushThenEntry(else_node_a_inner_if, true);
       EXPECT_EQ(idx_else_a_inner_if,
-                1);  // expect bitmap to be resused since nested if else
+                1);  // expect bitmap to be reused since nested if else
       decomposer.PopThenEntry(else_node_a_inner_if);
 
       decomposer.PushElseEntry(else_node_a_inner_if, idx_else_a_inner_if);
diff --git a/cpp/src/gandiva/expr_validator.cc b/cpp/src/gandiva/expr_validator.cc
index 8a6f86e6f0419..cd76ffe08234e 100644
--- a/cpp/src/gandiva/expr_validator.cc
+++ b/cpp/src/gandiva/expr_validator.cc
@@ -79,7 +79,7 @@ Status ExprValidator::Visit(const FieldNode& node) {
                   Status::ExpressionValidationError("Field ", node.field()->name(),
                                                     " not in schema."));
 
-  // Ensure that that the found field match.
+  // Ensure that the found field matches.
   FieldPtr field_in_schema = field_in_schema_entry->second;
   ARROW_RETURN_IF(!field_in_schema->Equals(node.field()),
                   Status::ExpressionValidationError(
diff --git a/cpp/src/gandiva/expression_cache_key.h b/cpp/src/gandiva/expression_cache_key.h
index db174d0642eef..e7522042a7d43 100644
--- a/cpp/src/gandiva/expression_cache_key.h
+++ b/cpp/src/gandiva/expression_cache_key.h
@@ -34,19 +34,19 @@ class ExpressionCacheKey {
  public:
   ExpressionCacheKey(SchemaPtr schema, std::shared_ptr<Configuration> configuration,
                      ExpressionVector expression_vector, SelectionVector::Mode mode)
-      : schema_(schema), mode_(mode), uniqifier_(0), configuration_(configuration) {
+      : schema_(schema), mode_(mode), uniquifier_(0), configuration_(configuration) {
     static const int kSeedValue = 4;
     size_t result = kSeedValue;
     for (auto& expr : expression_vector) {
       std::string expr_as_string = expr->ToString();
       expressions_as_strings_.push_back(expr_as_string);
       arrow::internal::hash_combine(result, expr_as_string);
-      UpdateUniqifier(expr_as_string);
+      UpdateUniquifier(expr_as_string);
     }
     arrow::internal::hash_combine(result, static_cast<size_t>(mode));
     arrow::internal::hash_combine(result, configuration->Hash());
     arrow::internal::hash_combine(result, schema_->ToString());
-    arrow::internal::hash_combine(result, uniqifier_);
+    arrow::internal::hash_combine(result, uniquifier_);
     hash_code_ = result;
   }
 
@@ -54,25 +54,25 @@ class ExpressionCacheKey {
                      Expression& expression)
       : schema_(schema),
         mode_(SelectionVector::MODE_NONE),
-        uniqifier_(0),
+        uniquifier_(0),
         configuration_(configuration) {
     static const int kSeedValue = 4;
     size_t result = kSeedValue;
     expressions_as_strings_.push_back(expression.ToString());
-    UpdateUniqifier(expression.ToString());
+    UpdateUniquifier(expression.ToString());
 
     arrow::internal::hash_combine(result, configuration->Hash());
     arrow::internal::hash_combine(result, schema_->ToString());
-    arrow::internal::hash_combine(result, uniqifier_);
+    arrow::internal::hash_combine(result, uniquifier_);
     hash_code_ = result;
   }
 
-  void UpdateUniqifier(const std::string& expr) {
-    if (uniqifier_ == 0) {
+  void UpdateUniquifier(const std::string& expr) {
+    if (uniquifier_ == 0) {
       // caching of expressions with re2 patterns causes lock contention. So, use
       // multiple instances to reduce contention.
       if (expr.find(" like(") != std::string::npos) {
-        uniqifier_ = std::hash<std::thread::id>()(std::this_thread::get_id()) % 16;
+        uniquifier_ = std::hash<std::thread::id>()(std::this_thread::get_id()) % 16;
       }
     }
   }
@@ -100,7 +100,7 @@ class ExpressionCacheKey {
       return false;
     }
 
-    if (uniqifier_ != other.uniqifier_) {
+    if (uniquifier_ != other.uniquifier_) {
       return false;
     }
 
@@ -114,7 +114,7 @@ class ExpressionCacheKey {
   SchemaPtr schema_;
   std::vector<std::string> expressions_as_strings_;
   SelectionVector::Mode mode_;
-  uint32_t uniqifier_;
+  uint32_t uniquifier_;
   std::shared_ptr<Configuration> configuration_;
 };
 
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index 0ad3c1738e835..bcef954a473ea 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -209,7 +209,7 @@ GANDIVA_EXPORT
 const char* gdv_fn_base64_encode_binary(int64_t context, const char* in, int32_t in_len,
                                         int32_t* out_len) {
   if (in_len < 0) {
-    gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");
+    gdv_fn_context_set_error_msg(context, "Buffer length cannot be negative");
     *out_len = 0;
     return "";
   }
@@ -236,7 +236,7 @@ GANDIVA_EXPORT
 const char* gdv_fn_base64_decode_utf8(int64_t context, const char* in, int32_t in_len,
                                       int32_t* out_len) {
   if (in_len < 0) {
-    gdv_fn_context_set_error_msg(context, "Buffer length can not be negative");
+    gdv_fn_context_set_error_msg(context, "Buffer length cannot be negative");
     *out_len = 0;
     return "";
   }
@@ -743,17 +743,17 @@ int32_t gdv_fn_cast_intervalyear_utf8_int32(int64_t context_ptr, int64_t holder_
 }
 
 GANDIVA_EXPORT
-gdv_timestamp to_utc_timezone_timestamp(int64_t context, gdv_timestamp time_miliseconds,
+gdv_timestamp to_utc_timezone_timestamp(int64_t context, gdv_timestamp time_milliseconds,
                                         const char* timezone, gdv_int32 length) {
   using arrow_vendored::date::locate_zone;
   using arrow_vendored::date::sys_time;
   using std::chrono::milliseconds;
 
-  sys_time<milliseconds> tp{milliseconds{time_miliseconds}};
+  sys_time<milliseconds> tp{milliseconds{time_milliseconds}};
   try {
     const auto local_tz = locate_zone(std::string(timezone, length));
     gdv_timestamp offset = local_tz->get_info(tp).offset.count() * 1000;
-    return time_miliseconds - static_cast<gdv_timestamp>(offset);
+    return time_milliseconds - static_cast<gdv_timestamp>(offset);
   } catch (...) {
     std::string e_msg = std::string(timezone, length) + " is an invalid time zone name.";
     gdv_fn_context_set_error_msg(context, e_msg.c_str());
@@ -763,17 +763,17 @@ gdv_timestamp to_utc_timezone_timestamp(int64_t context, gdv_timestamp time_mili
 
 GANDIVA_EXPORT
 gdv_timestamp from_utc_timezone_timestamp(gdv_int64 context,
-                                          gdv_timestamp time_miliseconds,
+                                          gdv_timestamp time_milliseconds,
                                           const char* timezone, gdv_int32 length) {
   using arrow_vendored::date::sys_time;
   using arrow_vendored::date::zoned_time;
   using std::chrono::milliseconds;
 
-  const sys_time<milliseconds> tp{milliseconds{time_miliseconds}};
+  const sys_time<milliseconds> tp{milliseconds{time_milliseconds}};
   try {
     const zoned_time<milliseconds> local_tz{std::string(timezone, length), tp};
     gdv_timestamp offset = local_tz.get_time_zone()->get_info(tp).offset.count() * 1000;
-    return time_miliseconds + static_cast<gdv_timestamp>(offset);
+    return time_milliseconds + static_cast<gdv_timestamp>(offset);
   } catch (...) {
     std::string e_msg = std::string(timezone, length) + " is an invalid time zone name.";
     gdv_fn_context_set_error_msg(context, e_msg.c_str());
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 3f52537ee05ca..8e87bc51215e1 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -342,11 +342,12 @@ const char* translate_utf8_utf8_utf8(int64_t context, const char* in, int32_t in
                                      int32_t to_len, int32_t* out_len);
 
 GANDIVA_EXPORT
-gdv_timestamp to_utc_timezone_timestamp(int64_t context, gdv_timestamp time_miliseconds,
+gdv_timestamp to_utc_timezone_timestamp(int64_t context, gdv_timestamp time_milliseconds,
                                         const char* timezone, int32_t length);
 
 GANDIVA_EXPORT
-gdv_timestamp from_utc_timezone_timestamp(int64_t context, gdv_timestamp time_miliseconds,
+gdv_timestamp from_utc_timezone_timestamp(int64_t context,
+                                          gdv_timestamp time_milliseconds,
                                           const char* timezone, int32_t length);
 
 GANDIVA_EXPORT
diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc
index a8dfcd088ab17..3e403828a4cce 100644
--- a/cpp/src/gandiva/gdv_function_stubs_test.cc
+++ b/cpp/src/gandiva/gdv_function_stubs_test.cc
@@ -73,7 +73,7 @@ TEST(TestGdvFnStubs, TestCastVarbinaryNumeric) {
   EXPECT_FALSE(ctx.has_error());
 
   gdv_fn_castVARBINARY_int32_int64(ctx_ptr, 347, -1, &out_len);
-  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative"));
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length cannot be negative"));
   ctx.Reset();
 
   // tests for big integer values as input
@@ -122,7 +122,7 @@ TEST(TestGdvFnStubs, TestBase64Encode) {
   value = gdv_fn_base64_encode_binary(ctx_ptr, "test", -5, &out_len);
   out_value = std::string(value, out_len);
   EXPECT_EQ(out_value, "");
-  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative"));
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length cannot be negative"));
   ctx.Reset();
 }
 
@@ -151,7 +151,7 @@ TEST(TestGdvFnStubs, TestBase64Decode) {
   value = gdv_fn_base64_decode_utf8(ctx_ptr, "test", -5, &out_len);
   out_value = std::string(value, out_len);
   EXPECT_EQ(out_value, "");
-  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative"));
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length cannot be negative"));
   ctx.Reset();
 }
 
@@ -323,7 +323,7 @@ TEST(TestGdvFnStubs, TestCastVARCHARFromInt32) {
   EXPECT_FALSE(ctx.has_error());
 
   out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, 347, -1, &out_len);
-  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative"));
+  EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length cannot be negative"));
   ctx.Reset();
 }
 
diff --git a/cpp/src/gandiva/gdv_string_function_stubs.cc b/cpp/src/gandiva/gdv_string_function_stubs.cc
index 9f5b5ce64b4a9..17eefbe22e31b 100644
--- a/cpp/src/gandiva/gdv_string_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_string_function_stubs.cc
@@ -84,7 +84,7 @@ const char* gdv_fn_regexp_extract_utf8_utf8_int32(int64_t ptr, int64_t holder_pt
   const char* gdv_fn_cast##CAST_NAME##_##IN_TYPE##_int64(                         \
       int64_t context, gdv_##IN_TYPE value, int64_t len, int32_t * out_len) {     \
     if (len < 0) {                                                                \
-      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative"); \
+      gdv_fn_context_set_error_msg(context, "Buffer length cannot be negative");  \
       *out_len = 0;                                                               \
       return "";                                                                  \
     }                                                                             \
@@ -120,7 +120,7 @@ const char* gdv_fn_regexp_extract_utf8_utf8_int32(int64_t ptr, int64_t holder_pt
   const char* gdv_fn_cast##CAST_NAME##_##IN_TYPE##_int64(                         \
       int64_t context, gdv_##IN_TYPE value, int64_t len, int32_t * out_len) {     \
     if (len < 0) {                                                                \
-      gdv_fn_context_set_error_msg(context, "Buffer length can not be negative"); \
+      gdv_fn_context_set_error_msg(context, "Buffer length cannot be negative");  \
       *out_len = 0;                                                               \
       return "";                                                                  \
     }                                                                             \
diff --git a/cpp/src/gandiva/hash_utils_test.cc b/cpp/src/gandiva/hash_utils_test.cc
index 96f9819e53cc2..65385023324ac 100644
--- a/cpp/src/gandiva/hash_utils_test.cc
+++ b/cpp/src/gandiva/hash_utils_test.cc
@@ -50,7 +50,7 @@ TEST(TestShaHashUtils, TestSha1Numeric) {
     std::string sha1_as_str(sha_1, out_length);
     EXPECT_EQ(sha1_as_str.size(), sha1_size);
 
-    // The value can not exists inside the set with the hash results
+    // The value cannot exists inside the set with the hash results
     EXPECT_EQ(sha_values.find(sha1_as_str), sha_values.end());
     sha_values.insert(sha1_as_str);
   }
@@ -85,7 +85,7 @@ TEST(TestShaHashUtils, TestSha512Numeric) {
     std::string sha512_as_str(sha_512, out_length);
     EXPECT_EQ(sha512_as_str.size(), sha512_size);
 
-    // The value can not exists inside the set with the hash results
+    // The value cannot exists inside the set with the hash results
     EXPECT_EQ(sha_values.find(sha512_as_str), sha_values.end());
     sha_values.insert(sha512_as_str);
   }
@@ -120,7 +120,7 @@ TEST(TestShaHashUtils, TestSha256Numeric) {
     std::string sha256_as_str(sha_256, out_length);
     EXPECT_EQ(sha256_as_str.size(), sha256_size);
 
-    // The value can not exists inside the set with the hash results
+    // The value cannot exists inside the set with the hash results
     EXPECT_EQ(sha_values.find(sha256_as_str), sha_values.end());
     sha_values.insert(sha256_as_str);
   }
@@ -154,7 +154,7 @@ TEST(TestShaHashUtils, TestMD5Numeric) {
     std::string md5_as_str(md5, out_length);
     EXPECT_EQ(md5_as_str.size(), md5_size);
 
-    // The value can not exists inside the set with the hash results
+    // The value cannot exists inside the set with the hash results
     EXPECT_EQ(md5_values.find(md5_as_str), md5_values.end());
     md5_values.insert(md5_as_str);
   }
diff --git a/cpp/src/gandiva/interval_holder.cc b/cpp/src/gandiva/interval_holder.cc
index 70f779263525f..73e75009d5d46 100644
--- a/cpp/src/gandiva/interval_holder.cc
+++ b/cpp/src/gandiva/interval_holder.cc
@@ -48,7 +48,7 @@ static const RE2 iso8601_period_without_time(
 static const std::regex period_not_contains_time(R"(^((?!T).)*$)");
 
 // pre-compiled pattern for matching periods in 8601 formats that contains weeks inside
-// them. The ISO8601 specification defines that if the string contains a week, it can not
+// them. The ISO8601 specification defines that if the string contains a week, it cannot
 // have other time granularities information, like day, years and months.
 static const RE2 iso8601_period_with_weeks(
     R"(P(-?[[:digit:]]+W|-?[[:digit:]]+[,.][[:digit:]]+W){1})");
@@ -61,8 +61,8 @@ static const int64_t kMillisInAMinute = 60000;
 static const int64_t kMillisInASecond = 1000;
 
 static void return_error_with_cause(ExecutionContext* context, std::string& data,
-                                    int32_t supression_error) {
-  if (supression_error != 0) {
+                                    int32_t suppression_error) {
+  if (suppression_error != 0) {
     return;
   }
 
diff --git a/cpp/src/gandiva/interval_holder.h b/cpp/src/gandiva/interval_holder.h
index 0a6a988025406..faa7c9f0607b2 100644
--- a/cpp/src/gandiva/interval_holder.h
+++ b/cpp/src/gandiva/interval_holder.h
@@ -70,7 +70,7 @@ class GANDIVA_EXPORT IntervalHolder : public FunctionHolder {
     return std::make_shared<INTERVAL_TYPE>(suppress_errors);
   }
 
-  explicit IntervalHolder(int32_t supress_errors) : suppress_errors_(supress_errors) {}
+  explicit IntervalHolder(int32_t suppress_errors) : suppress_errors_(suppress_errors) {}
 
   // If the flag is equals to 0, the errors will not be suppressed, any other value
   // will made the errors being suppressed
@@ -99,17 +99,17 @@ class GANDIVA_EXPORT IntervalDaysHolder : public IntervalHolder<IntervalDaysHold
   int64_t operator()(ExecutionContext* ctx, const char* data, int32_t data_len,
                      bool in_valid, bool* out_valid);
 
-  explicit IntervalDaysHolder(int32_t supress_errors)
-      : IntervalHolder<IntervalDaysHolder>(supress_errors) {}
+  explicit IntervalDaysHolder(int32_t suppress_errors)
+      : IntervalHolder<IntervalDaysHolder>(suppress_errors) {}
 
  private:
-  /// Retrieves the day interval from the number of milliseconds enconded as
+  /// Retrieves the day interval from the number of milliseconds encoded as
   /// a string
   static int64_t GetIntervalDayFromMillis(ExecutionContext* context,
                                           std::string& number_as_string,
                                           int32_t suppress_errors, bool* out_valid);
 
-  /// Retrieves the day interval from the number of weeks enconded as
+  /// Retrieves the day interval from the number of weeks encoded as
   /// a string.
   static int64_t GetIntervalDayFromWeeks(ExecutionContext* context,
                                          std::string& number_as_string,
@@ -134,8 +134,8 @@ class GANDIVA_EXPORT IntervalYearsHolder : public IntervalHolder<IntervalYearsHo
   int32_t operator()(ExecutionContext* ctx, const char* data, int32_t data_len,
                      bool in_valid, bool* out_valid);
 
-  explicit IntervalYearsHolder(int32_t supress_errors)
-      : IntervalHolder<IntervalYearsHolder>(supress_errors) {}
+  explicit IntervalYearsHolder(int32_t suppress_errors)
+      : IntervalHolder<IntervalYearsHolder>(suppress_errors) {}
 
  private:
   static int32_t GetIntervalYearFromNumber(ExecutionContext* context,
diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h
index fae6ed48defa5..250ab78fbfe28 100644
--- a/cpp/src/gandiva/llvm_generator.h
+++ b/cpp/src/gandiva/llvm_generator.h
@@ -146,7 +146,7 @@ class GANDIVA_EXPORT LLVMGenerator {
                                           const ValueValidityPairVector& args,
                                           bool with_validity, bool with_context);
 
-    // Generate code to onvoke a function call.
+    // Generate code to invoke a function call.
     LValuePtr BuildFunctionCall(const NativeFunction* func, DataTypePtr arrow_return_type,
                                 std::vector<llvm::Value*>* params);
 
diff --git a/cpp/src/gandiva/precompiled/decimal_ops.cc b/cpp/src/gandiva/precompiled/decimal_ops.cc
index 61cac60624dfa..6ecb9368fbb67 100644
--- a/cpp/src/gandiva/precompiled/decimal_ops.cc
+++ b/cpp/src/gandiva/precompiled/decimal_ops.cc
@@ -401,7 +401,7 @@ BasicDecimal128 Mod(int64_t context, const BasicDecimalScalar128& x,
     return 0;
   }
 
-  // Adsjust x and y to the same scale (higher one), and then, do a integer mod.
+  // Adjust x and y to the same scale (higher one), and then, do a integer mod.
   *overflow = false;
   BasicDecimal128 result;
   int32_t min_lz = MinLeadingZeros(x, y);
@@ -559,7 +559,7 @@ enum RoundType {
                           // else if -ve and trailing value is >= half of base, -1.
 };
 
-// Compute the rounding delta for the givven rounding type.
+// Compute the rounding delta for the given rounding type.
 static int32_t ComputeRoundingDelta(const BasicDecimal128& x, int32_t x_scale,
                                     int32_t out_scale, RoundType type) {
   if (type == kRoundTypeTrunc ||  // no rounding for this type.
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc
index c255b9a11c084..5aa0eb38eafd7 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -1705,7 +1705,7 @@ gdv_int32 levenshtein(int64_t context, const char* in1, int32_t in1_len, const c
     return in1_len;
   }
 
-  // arr_larger and arr_smaller is one pointer for entrys
+  // arr_larger and arr_smaller is one pointer for entries
   const char* arr_larger;
   const char* arr_smaller;
   // len_larger and len_smaller is one copy from lengths
@@ -1733,7 +1733,7 @@ gdv_int32 levenshtein(int64_t context, const char* in1, int32_t in1_len, const c
     return 0;
   }
 
-  // MEMORY ADRESS MALLOC
+  // MEMORY ADDRESS MALLOC
   // v0 -> (0, ..., &ptr[in2_len])
   // v1 -> (in2_len+1, ..., &ptr[in2_len * 2])
   int* v0;
@@ -1742,7 +1742,7 @@ gdv_int32 levenshtein(int64_t context, const char* in1, int32_t in1_len, const c
   v0 = &ptr[0];
   v1 = &ptr[len_smaller + 1];
 
-  // Initializate v0
+  // Initialize v0
   for (int i = 0; i <= len_smaller; i++) {
     v0[i] = i;
   }
@@ -1778,7 +1778,7 @@ gdv_int32 levenshtein(int64_t context, const char* in1, int32_t in1_len, const c
       v1[j + 1] = min;
     }
 
-    // Swaping v0 and v1
+    // Swapping v0 and v1
     aux = v0;
     v0 = v1;
     v1 = aux;
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc
index b84c51b3a6b00..89213592e7ea2 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -2156,67 +2156,67 @@ TEST(TestStringOps, TestEltFunction) {
   //  gandiva::ExecutionContext ctx;
   //  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
   gdv_int32 out_len = 0;
-  bool out_vality = false;
+  bool out_validity = false;
 
   const char* word1 = "john";
   auto word1_len = static_cast<int32_t>(strlen(word1));
   const char* word2 = "";
   auto word2_len = static_cast<int32_t>(strlen(word2));
   auto out_string = elt_int32_utf8_utf8(1, true, word1, word1_len, true, word2, word2_len,
-                                        true, &out_vality, &out_len);
+                                        true, &out_validity, &out_len);
   EXPECT_EQ("john", std::string(out_string, out_len));
-  EXPECT_EQ(out_vality, true);
+  EXPECT_EQ(out_validity, true);
 
   word1 = "hello";
   word1_len = static_cast<int32_t>(strlen(word1));
   word2 = "world";
   word2_len = static_cast<int32_t>(strlen(word2));
   out_string = elt_int32_utf8_utf8(2, true, word1, word1_len, true, word2, word2_len,
-                                   true, &out_vality, &out_len);
+                                   true, &out_validity, &out_len);
   EXPECT_EQ("world", std::string(out_string, out_len));
-  EXPECT_EQ(out_vality, true);
+  EXPECT_EQ(out_validity, true);
 
   word1 = "goodbye";
   word1_len = static_cast<int32_t>(strlen(word1));
   word2 = "world";
   word2_len = static_cast<int32_t>(strlen(word2));
   out_string = elt_int32_utf8_utf8(4, true, word1, word1_len, true, word2, word2_len,
-                                   true, &out_vality, &out_len);
+                                   true, &out_validity, &out_len);
   EXPECT_EQ("", std::string(out_string, out_len));
-  EXPECT_EQ(out_vality, false);
+  EXPECT_EQ(out_validity, false);
 
   word1 = "hi";
   word1_len = static_cast<int32_t>(strlen(word1));
   word2 = "yeah";
   word2_len = static_cast<int32_t>(strlen(word2));
   out_string = elt_int32_utf8_utf8(0, true, word1, word1_len, true, word2, word2_len,
-                                   true, &out_vality, &out_len);
+                                   true, &out_validity, &out_len);
   EXPECT_EQ("", std::string(out_string, out_len));
-  EXPECT_EQ(out_vality, false);
+  EXPECT_EQ(out_validity, false);
 
   const char* word3 = "wow";
   auto word3_len = static_cast<int32_t>(strlen(word3));
   out_string =
       elt_int32_utf8_utf8_utf8(3, true, word1, word1_len, true, word2, word2_len, true,
-                               word3, word3_len, true, &out_vality, &out_len);
+                               word3, word3_len, true, &out_validity, &out_len);
   EXPECT_EQ("wow", std::string(out_string, out_len));
-  EXPECT_EQ(out_vality, true);
+  EXPECT_EQ(out_validity, true);
 
   const char* word4 = "awesome";
   auto word4_len = static_cast<int32_t>(strlen(word4));
   out_string = elt_int32_utf8_utf8_utf8_utf8(
       4, true, word1, word1_len, true, word2, word2_len, true, word3, word3_len, true,
-      word4, word4_len, true, &out_vality, &out_len);
+      word4, word4_len, true, &out_validity, &out_len);
   EXPECT_EQ("awesome", std::string(out_string, out_len));
-  EXPECT_EQ(out_vality, true);
+  EXPECT_EQ(out_validity, true);
 
   const char* word5 = "not-empty";
   auto word5_len = static_cast<int32_t>(strlen(word5));
   out_string = elt_int32_utf8_utf8_utf8_utf8_utf8(
       5, true, word1, word1_len, true, word2, word2_len, true, word3, word3_len, true,
-      word4, word4_len, true, word5, word5_len, true, &out_vality, &out_len);
+      word4, word4_len, true, word5, word5_len, true, &out_validity, &out_len);
   EXPECT_EQ("not-empty", std::string(out_string, out_len));
-  EXPECT_EQ(out_vality, true);
+  EXPECT_EQ(out_validity, true);
 }
 
 TEST(TestStringOps, TestToHex) {

From 91f73308a0747899f6a6d7fc5b57c3102130c16d Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sun, 10 Dec 2023 07:42:23 -0800
Subject: [PATCH 020/570] GH-38316: [C#] Implement interval types (#39043)

### What changes are included in this PR?

Changes required to support the three interval types in the C# implementation.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Adds new classes for interval support.
* Closes: #38316
* Closes: #29431

Lead-authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Co-authored-by: Platob <nfillot.pro@gmail.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Arrays/ArrayDataTypeComparer.cs           |  12 +-
 .../Apache.Arrow/Arrays/ArrowArrayFactory.cs  |   1 +
 .../src/Apache.Arrow/Arrays/IntervalArray.cs  | 140 ++++++++++
 .../Apache.Arrow/C/CArrowSchemaExporter.cs    |   9 +
 .../Apache.Arrow/C/CArrowSchemaImporter.cs    |   2 +-
 .../Extensions/FlatbufExtensions.cs           |   2 +
 .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs |   6 +
 .../Ipc/ArrowTypeFlatbufferBuilder.cs         |  19 ++
 .../Apache.Arrow/Scalars/DayTimeInterval.cs   |  63 +++++
 .../Scalars/MonthDayNanosecondInterval.cs     |  67 +++++
 .../Apache.Arrow/Scalars/YearMonthInterval.cs |  66 +++++
 csharp/src/Apache.Arrow/Types/IntervalType.cs |  51 ++++
 csharp/src/Apache.Arrow/Types/IntervalUnit.cs |  27 +-
 .../Apache.Arrow.IntegrationTest/JsonFile.cs  |  59 +++++
 .../ArrowArrayConcatenatorTests.cs            |  65 ++++-
 .../Apache.Arrow.Tests/ArrowReaderVerifier.cs |  14 +-
 .../CDataInterfacePythonTests.cs              |  23 +-
 .../Apache.Arrow.Tests/IntervalScalarTests.cs | 250 ++++++++++++++++++
 csharp/test/Apache.Arrow.Tests/TableTests.cs  |   2 +-
 csharp/test/Apache.Arrow.Tests/TestData.cs    |  38 +++
 dev/archery/archery/integration/datagen.py    |   2 -
 docs/source/status.rst                        |   2 +-
 22 files changed, 883 insertions(+), 37 deletions(-)
 create mode 100644 csharp/src/Apache.Arrow/Arrays/IntervalArray.cs
 create mode 100644 csharp/src/Apache.Arrow/Scalars/DayTimeInterval.cs
 create mode 100644 csharp/src/Apache.Arrow/Scalars/MonthDayNanosecondInterval.cs
 create mode 100644 csharp/src/Apache.Arrow/Scalars/YearMonthInterval.cs
 create mode 100644 csharp/src/Apache.Arrow/Types/IntervalType.cs
 create mode 100644 csharp/test/Apache.Arrow.Tests/IntervalScalarTests.cs

diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
index 1abaa7f043b64..2a5c386e3a30e 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
@@ -27,7 +27,8 @@ internal sealed class ArrayDataTypeComparer :
         IArrowTypeVisitor<FixedSizeListType>,
         IArrowTypeVisitor<StructType>,
         IArrowTypeVisitor<UnionType>,
-        IArrowTypeVisitor<MapType>
+        IArrowTypeVisitor<MapType>,
+        IArrowTypeVisitor<IntervalType>
     {
         private readonly IArrowType _expectedType;
         private bool _dataTypeMatch;
@@ -133,6 +134,15 @@ public void Visit(MapType actualType)
             }
         }
 
+        public void Visit(IntervalType actualType)
+        {
+            if (_expectedType is IntervalType expectedType
+                && expectedType.Unit == actualType.Unit)
+            {
+                _dataTypeMatch = true;
+            }
+        }
+
         private static bool CompareNested(NestedType expectedType, NestedType actualType)
         {
             if (expectedType.Fields.Count != actualType.Fields.Count)
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
index 0520513334db3..d6577260bb82d 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
@@ -90,6 +90,7 @@ public static IArrowArray BuildArray(ArrayData data)
                 case ArrowTypeId.FixedSizeList:
                     return new FixedSizeListArray(data);
                 case ArrowTypeId.Interval:
+                    return IntervalArray.Create(data);
                 default:
                     throw new NotSupportedException($"An ArrowArray cannot be built for type {data.DataType.TypeId}.");
             }
diff --git a/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs b/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs
new file mode 100644
index 0000000000000..de4fc42b4cf92
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs
@@ -0,0 +1,140 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Scalars;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow
+{
+    internal static class IntervalArray
+    {
+        internal static IArrowArray Create(ArrayData data) => ((IntervalType)data.DataType).Unit switch
+        {
+            IntervalUnit.YearMonth => new YearMonthIntervalArray(data),
+            IntervalUnit.DayTime => new DayTimeIntervalArray(data),
+            IntervalUnit.MonthDayNanosecond => new MonthDayNanosecondIntervalArray(data),
+            _ => throw new InvalidOperationException($"Unsupported interval unit {((IntervalType)data.DataType).Unit}"),
+        };
+    }
+
+    public abstract class IntervalArray<T> : PrimitiveArray<T>
+        where T : struct
+    {
+        protected IntervalArray(ArrayData data)
+            : base(data)
+        {
+            data.EnsureBufferCount(2);
+            data.EnsureDataType(ArrowTypeId.Interval);
+        }
+
+        public IntervalType Type => (IntervalType)Data.DataType;
+
+        public IntervalUnit Unit => Type.Unit;
+
+        internal static IArrowArray Create(ArrayData data) => ((IntervalType)data.DataType).Unit switch
+        {
+            IntervalUnit.YearMonth => new YearMonthIntervalArray(data),
+            IntervalUnit.DayTime => new DayTimeIntervalArray(data),
+            IntervalUnit.MonthDayNanosecond => new MonthDayNanosecondIntervalArray(data),
+            _ => throw new InvalidOperationException($"Unsupported interval unit {((IntervalType)data.DataType).Unit}"),
+        };
+
+        internal static void ValidateUnit(IntervalUnit expected, IntervalUnit actual)
+        {
+            if (expected != actual)
+            {
+                throw new ArgumentException(
+                    $"Specified interval unit <{actual}> does not match expected unit <{expected}>",
+                    "Unit");
+            }
+        }
+    }
+
+    public sealed class YearMonthIntervalArray : IntervalArray<YearMonthInterval>
+    {
+        public class Builder : PrimitiveArrayBuilder<YearMonthInterval, YearMonthIntervalArray, Builder>
+        {
+            protected override YearMonthIntervalArray Build(
+                ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer,
+                int length, int nullCount, int offset) =>
+                new YearMonthIntervalArray(valueBuffer, nullBitmapBuffer, length, nullCount, offset);
+        }
+
+        public YearMonthIntervalArray(
+            ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer,
+            int length, int nullCount, int offset)
+            : this(new ArrayData(IntervalType.YearMonth, length, nullCount, offset,
+                new[] { nullBitmapBuffer, valueBuffer }))
+        { }
+
+        public YearMonthIntervalArray(ArrayData data) : base(data)
+        {
+            ValidateUnit(IntervalUnit.YearMonth, Unit);
+        }
+
+        public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+    }
+
+    public sealed class DayTimeIntervalArray : IntervalArray<DayTimeInterval>
+    {
+        public class Builder : PrimitiveArrayBuilder<DayTimeInterval, DayTimeIntervalArray, Builder>
+        {
+            protected override DayTimeIntervalArray Build(
+                ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer,
+                int length, int nullCount, int offset) =>
+                new DayTimeIntervalArray(valueBuffer, nullBitmapBuffer, length, nullCount, offset);
+        }
+
+        public DayTimeIntervalArray(
+            ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer,
+            int length, int nullCount, int offset)
+            : this(new ArrayData(IntervalType.DayTime, length, nullCount, offset,
+                new[] { nullBitmapBuffer, valueBuffer }))
+        { }
+
+        public DayTimeIntervalArray(ArrayData data) : base(data)
+        {
+            ValidateUnit(IntervalUnit.DayTime, Unit);
+        }
+
+        public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+    }
+
+    public sealed class MonthDayNanosecondIntervalArray : IntervalArray<MonthDayNanosecondInterval>
+    {
+        public class Builder : PrimitiveArrayBuilder<MonthDayNanosecondInterval, MonthDayNanosecondIntervalArray, Builder>
+        {
+            protected override MonthDayNanosecondIntervalArray Build(
+                ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer,
+                int length, int nullCount, int offset) =>
+                new MonthDayNanosecondIntervalArray(valueBuffer, nullBitmapBuffer, length, nullCount, offset);
+        }
+
+        public MonthDayNanosecondIntervalArray(
+            ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer,
+            int length, int nullCount, int offset)
+            : this(new ArrayData(IntervalType.MonthDayNanosecond, length, nullCount, offset,
+                new[] { nullBitmapBuffer, valueBuffer }))
+        { }
+
+        public MonthDayNanosecondIntervalArray(ArrayData data) : base(data)
+        {
+            ValidateUnit(IntervalUnit.MonthDayNanosecond, Unit);
+        }
+
+        public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+    }
+}
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
index d805e9afc4c8b..c9b45a8eb2d87 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
@@ -185,6 +185,15 @@ private static string GetFormat(IArrowType datatype)
                 // Timestamp
                 case TimestampType timestampType:
                     return String.Format("ts{0}:{1}", FormatTimeUnit(timestampType.Unit), timestampType.Timezone);
+                // Interval
+                case IntervalType intervalType:
+                    return intervalType.Unit switch
+                    {
+                        IntervalUnit.YearMonth => "tiM",
+                        IntervalUnit.DayTime => "tiD",
+                        IntervalUnit.MonthDayNanosecond => "tin",
+                        _ => throw new InvalidDataException($"Unsupported interval unit for export: {intervalType.Unit}"),
+                    };
                 // Nested
                 case ListType _: return "+l";
                 case FixedSizeListType fixedListType:
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
index 12545c9831a04..9c81195771bae 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
@@ -319,7 +319,7 @@ public ArrowType GetAsType()
                     "tDn" => DurationType.Nanosecond,
                     "tiM" => IntervalType.YearMonth,
                     "tiD" => IntervalType.DayTime,
-                    //"tin" => IntervalType.MonthDayNanosecond, // Not yet implemented
+                    "tin" => IntervalType.MonthDayNanosecond,
                     _ => throw new NotSupportedException("Data type is not yet supported in import.")
                 };
             }
diff --git a/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs b/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
index 35c5b3e55157d..5f39680b90ebc 100644
--- a/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
+++ b/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
@@ -46,6 +46,8 @@ public static Types.IntervalUnit ToArrow(this Flatbuf.IntervalUnit unit)
                     return Types.IntervalUnit.DayTime;
                 case Flatbuf.IntervalUnit.YEAR_MONTH:
                     return Types.IntervalUnit.YearMonth;
+                case Flatbuf.IntervalUnit.MONTH_DAY_NANO:
+                    return Types.IntervalUnit.MonthDayNanosecond;
                 default:
                     throw new ArgumentException($"Unexpected Flatbuf IntervalUnit", nameof(unit));
             }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index d4e8bb48df4e1..483dcea898fbe 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -50,6 +50,9 @@ internal class ArrowRecordBatchFlatBufferBuilder :
             IArrowArrayVisitor<Time32Array>,
             IArrowArrayVisitor<Time64Array>,
             IArrowArrayVisitor<DurationArray>,
+            IArrowArrayVisitor<YearMonthIntervalArray>,
+            IArrowArrayVisitor<DayTimeIntervalArray>,
+            IArrowArrayVisitor<MonthDayNanosecondIntervalArray>,
             IArrowArrayVisitor<ListArray>,
             IArrowArrayVisitor<FixedSizeListArray>,
             IArrowArrayVisitor<StringArray>,
@@ -106,6 +109,9 @@ public ArrowRecordBatchFlatBufferBuilder()
             public void Visit(Time32Array array) => CreateBuffers(array);
             public void Visit(Time64Array array) => CreateBuffers(array);
             public void Visit(DurationArray array) => CreateBuffers(array);
+            public void Visit(YearMonthIntervalArray array) => CreateBuffers(array);
+            public void Visit(DayTimeIntervalArray array) => CreateBuffers(array);
+            public void Visit(MonthDayNanosecondIntervalArray array) => CreateBuffers(array);
 
             public void Visit(ListArray array)
             {
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
index 1397eb3e00f3c..84ff4f9cc7202 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
@@ -58,6 +58,7 @@ class TypeVisitor :
             IArrowTypeVisitor<Time32Type>,
             IArrowTypeVisitor<Time64Type>,
             IArrowTypeVisitor<DurationType>,
+            IArrowTypeVisitor<IntervalType>,
             IArrowTypeVisitor<BinaryType>,
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<ListType>,
@@ -196,6 +197,13 @@ public void Visit(DurationType type)
                     Flatbuf.Duration.CreateDuration(Builder, ToFlatBuffer(type.Unit)));
             }
 
+            public void Visit(IntervalType type)
+            {
+                Result = FieldType.Build(
+                    Flatbuf.Type.Interval,
+                    Flatbuf.Interval.CreateInterval(Builder, ToFlatBuffer(type.Unit)));
+            }
+
             public void Visit(StructType type)
             {
                 Flatbuf.Struct_.StartStruct_(Builder);
@@ -307,5 +315,16 @@ private static Flatbuf.UnionMode ToFlatBuffer(Types.UnionMode mode)
                 _ => throw new ArgumentException($"unsupported union mode <{mode}>", nameof(mode)),
             };
         }
+
+        private static Flatbuf.IntervalUnit ToFlatBuffer(Types.IntervalUnit unit)
+        {
+            return unit switch
+            {
+                Types.IntervalUnit.YearMonth => Flatbuf.IntervalUnit.YEAR_MONTH,
+                Types.IntervalUnit.DayTime => Flatbuf.IntervalUnit.DAY_TIME,
+                Types.IntervalUnit.MonthDayNanosecond => Flatbuf.IntervalUnit.MONTH_DAY_NANO,
+                _ => throw new ArgumentException($"unsupported interval unit <{unit}>", nameof(unit))
+            }; ;
+        }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Scalars/DayTimeInterval.cs b/csharp/src/Apache.Arrow/Scalars/DayTimeInterval.cs
new file mode 100644
index 0000000000000..8d2814e28bb42
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Scalars/DayTimeInterval.cs
@@ -0,0 +1,63 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Runtime.InteropServices;
+
+namespace Apache.Arrow.Scalars
+{
+    [StructLayout(LayoutKind.Explicit)]
+    public struct DayTimeInterval : IEquatable<DayTimeInterval>
+    {
+        [FieldOffset(0)]
+        public readonly int Days;
+
+        [FieldOffset(4)]
+        public readonly int Milliseconds;
+
+        public DayTimeInterval(int days, int milliseconds)
+        {
+            Days = days;
+            Milliseconds = milliseconds;
+        }
+
+        public override bool Equals(object obj) => obj switch
+        {
+            DayTimeInterval interval => Equals(interval),
+            _ => false,
+        };
+
+        public override int GetHashCode() => unchecked((17 + Days) * 23 + Milliseconds);
+
+        public bool Equals(DayTimeInterval interval)
+        {
+            return this.Days == interval.Days && this.Milliseconds == interval.Milliseconds;
+        }
+
+        public static DateTime operator +(DateTime dateTime, DayTimeInterval interval)
+            => dateTime.AddDays(interval.Days).AddMilliseconds(interval.Milliseconds);
+        public static DateTime operator +(DayTimeInterval interval, DateTime dateTime)
+            => dateTime.AddDays(interval.Days).AddMilliseconds(interval.Milliseconds);
+        public static DateTime operator -(DateTime dateTime, DayTimeInterval interval)
+            => dateTime.AddDays(-interval.Days).AddMilliseconds(-interval.Milliseconds);
+
+        public static DateTimeOffset operator +(DateTimeOffset dateTime, DayTimeInterval interval)
+            => dateTime.AddDays(interval.Days).AddMilliseconds(interval.Milliseconds);
+        public static DateTimeOffset operator +(DayTimeInterval interval, DateTimeOffset dateTime)
+            => dateTime.AddDays(interval.Days).AddMilliseconds(interval.Milliseconds);
+        public static DateTimeOffset operator -(DateTimeOffset dateTime, DayTimeInterval interval)
+            => dateTime.AddDays(-interval.Days).AddMilliseconds(-interval.Milliseconds);
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Scalars/MonthDayNanosecondInterval.cs b/csharp/src/Apache.Arrow/Scalars/MonthDayNanosecondInterval.cs
new file mode 100644
index 0000000000000..2af0e6fcdea48
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Scalars/MonthDayNanosecondInterval.cs
@@ -0,0 +1,67 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Runtime.InteropServices;
+
+namespace Apache.Arrow.Scalars
+{
+    [StructLayout(LayoutKind.Explicit)]
+    public struct MonthDayNanosecondInterval : IEquatable<MonthDayNanosecondInterval>
+    {
+        [FieldOffset(0)]
+        public readonly int Months;
+
+        [FieldOffset(4)]
+        public readonly int Days;
+
+        [FieldOffset(8)]
+        public readonly long Nanoseconds;
+
+        public MonthDayNanosecondInterval(int months, int days, long nanoseconds)
+        {
+            Months = months;
+            Days = days;
+            Nanoseconds = nanoseconds;
+        }
+
+        public override bool Equals(object obj) => obj switch
+        {
+            MonthDayNanosecondInterval interval => Equals(interval),
+            _ => false,
+        };
+
+        public override int GetHashCode() => unchecked(((17 + Months) * 23 + Days) * 23 + (int)Nanoseconds);
+
+        public bool Equals(MonthDayNanosecondInterval interval)
+        {
+            return this.Months == interval.Months && this.Days == interval.Days && this.Nanoseconds == interval.Nanoseconds;
+        }
+
+        public static DateTime operator +(DateTime dateTime, MonthDayNanosecondInterval interval)
+            => dateTime.AddMonths(interval.Months).AddDays(interval.Days).AddTicks(interval.Nanoseconds / 100);
+        public static DateTime operator +(MonthDayNanosecondInterval interval, DateTime dateTime)
+            => dateTime.AddMonths(interval.Months).AddDays(interval.Days).AddTicks(interval.Nanoseconds / 100);
+        public static DateTime operator -(DateTime dateTime, MonthDayNanosecondInterval interval)
+            => dateTime.AddMonths(-interval.Months).AddDays(-interval.Days).AddTicks(-interval.Nanoseconds / 100);
+
+        public static DateTimeOffset operator +(DateTimeOffset dateTime, MonthDayNanosecondInterval interval)
+            => dateTime.AddMonths(interval.Months).AddDays(interval.Days).AddTicks(interval.Nanoseconds / 100);
+        public static DateTimeOffset operator +(MonthDayNanosecondInterval interval, DateTimeOffset dateTime)
+            => dateTime.AddMonths(interval.Months).AddDays(interval.Days).AddTicks(interval.Nanoseconds / 100);
+        public static DateTimeOffset operator -(DateTimeOffset dateTime, MonthDayNanosecondInterval interval)
+            => dateTime.AddMonths(-interval.Months).AddDays(-interval.Days).AddTicks(-interval.Nanoseconds / 100);
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Scalars/YearMonthInterval.cs b/csharp/src/Apache.Arrow/Scalars/YearMonthInterval.cs
new file mode 100644
index 0000000000000..365cc352615bd
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Scalars/YearMonthInterval.cs
@@ -0,0 +1,66 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Runtime.InteropServices;
+
+namespace Apache.Arrow.Scalars
+{
+    [StructLayout(LayoutKind.Explicit)]
+    public struct YearMonthInterval : IEquatable<YearMonthInterval>
+    {
+        private const int MonthsPerYear = 12;
+
+        [FieldOffset(0)]
+        public readonly int Months;
+
+        public YearMonthInterval(int totalMonths)
+        {
+            Months = totalMonths;
+        }
+
+        public YearMonthInterval(int years, int months)
+        {
+            Months = years * MonthsPerYear + months;
+        }
+
+        public override bool Equals(object obj) => obj switch
+        {
+            DayTimeInterval interval => Equals(interval),
+            _ => false,
+        };
+
+        public override int GetHashCode() => Months;
+
+        public bool Equals(YearMonthInterval interval)
+        {
+            return this.Months == interval.Months;
+        }
+
+        public static DateTime operator +(DateTime dateTime, YearMonthInterval interval) => dateTime.AddMonths(interval.Months);
+        public static DateTime operator +(YearMonthInterval interval, DateTime dateTime) => dateTime.AddMonths(interval.Months);
+        public static DateTime operator -(DateTime dateTime, YearMonthInterval interval) => dateTime.AddMonths(-interval.Months);
+
+        public static DateTimeOffset operator +(DateTimeOffset dateTime, YearMonthInterval interval) => dateTime.AddMonths(interval.Months);
+        public static DateTimeOffset operator +(YearMonthInterval interval, DateTimeOffset dateTime) => dateTime.AddMonths(interval.Months);
+        public static DateTimeOffset operator -(DateTimeOffset dateTime, YearMonthInterval interval) => dateTime.AddMonths(-interval.Months);
+
+#if NET6_0_OR_GREATER
+        public static DateOnly operator +(DateOnly date, YearMonthInterval interval) => date.AddMonths(interval.Months);
+        public static DateOnly operator +(YearMonthInterval interval, DateOnly date) => date.AddMonths(interval.Months);
+        public static DateOnly operator -(DateOnly date, YearMonthInterval interval) => date.AddMonths(-interval.Months);
+#endif
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Types/IntervalType.cs b/csharp/src/Apache.Arrow/Types/IntervalType.cs
new file mode 100644
index 0000000000000..b8e1fdea4ed07
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Types/IntervalType.cs
@@ -0,0 +1,51 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+
+namespace Apache.Arrow.Types
+{
+    public sealed class IntervalType : FixedWidthType
+    {
+        public static readonly IntervalType YearMonth = new IntervalType(IntervalUnit.YearMonth);
+        public static readonly IntervalType DayTime = new IntervalType(IntervalUnit.DayTime);
+        public static readonly IntervalType MonthDayNanosecond = new IntervalType(IntervalUnit.MonthDayNanosecond);
+        private static readonly IntervalType[] _types = new IntervalType[] { YearMonth, DayTime, MonthDayNanosecond };
+
+        public override ArrowTypeId TypeId => ArrowTypeId.Interval;
+        public override string Name => "interval";
+        public override int BitWidth => Unit switch
+        {
+            IntervalUnit.YearMonth => 32,
+            IntervalUnit.DayTime => 64,
+            IntervalUnit.MonthDayNanosecond => 128,
+            _ => throw new InvalidOperationException($"Unsupported interval unit {Unit}"),
+        };
+
+        public IntervalUnit Unit { get; }
+
+        public IntervalType(IntervalUnit unit = IntervalUnit.YearMonth)
+        {
+            Unit = unit;
+        }
+
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
+
+        public static IntervalType FromIntervalUnit(IntervalUnit unit)
+        {
+            return _types[(int)unit];
+        }
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Types/IntervalUnit.cs b/csharp/src/Apache.Arrow/Types/IntervalUnit.cs
index bcd67e112dd2c..09cdc4d7bfd84 100644
--- a/csharp/src/Apache.Arrow/Types/IntervalUnit.cs
+++ b/csharp/src/Apache.Arrow/Types/IntervalUnit.cs
@@ -13,37 +13,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
 namespace Apache.Arrow.Types
 {
     public enum IntervalUnit
     {
         YearMonth = 0,
         DayTime = 1,
-    }
-
-    public sealed class IntervalType : FixedWidthType
-    {
-        public static readonly IntervalType YearMonth = new IntervalType(IntervalUnit.YearMonth);
-        public static readonly IntervalType DayTime = new IntervalType(IntervalUnit.DayTime);
-        private static readonly IntervalType[] _types = new IntervalType[] { YearMonth, DayTime };
-
-        public override ArrowTypeId TypeId => ArrowTypeId.Interval;
-        public override string Name => "date";
-        public override int BitWidth => 64;
-
-        public IntervalUnit Unit { get; }
-
-        public IntervalType(IntervalUnit unit = IntervalUnit.YearMonth)
-        {
-            Unit = unit;
-        }
-
-        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
-
-        public static IntervalType FromIntervalUnit(IntervalUnit unit)
-        {
-            return _types[(int)unit];
-        }
+        MonthDayNanosecond = 2,
     }
 }
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
index bdb9e2682bb01..51bcf6dd7583e 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
@@ -24,6 +24,7 @@
 using System.Text.Json.Serialization;
 using System.Threading.Tasks;
 using Apache.Arrow.Arrays;
+using Apache.Arrow.Scalars;
 using Apache.Arrow.Types;
 
 namespace Apache.Arrow.IntegrationTest
@@ -173,6 +174,8 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children)
                 "date" => ToDateArrowType(type),
                 "time" => ToTimeArrowType(type),
                 "duration" => ToDurationArrowType(type),
+                "interval" => ToIntervalArrowType(type),
+                "interval_mdn" => ToIntervalArrowType(type),
                 "timestamp" => ToTimestampArrowType(type),
                 "list" => ToListArrowType(type, children),
                 "fixedsizelist" => ToFixedSizeListArrowType(type, children),
@@ -257,6 +260,17 @@ private static IArrowType ToDurationArrowType(JsonArrowType type)
             };
         }
 
+        private static IArrowType ToIntervalArrowType(JsonArrowType type)
+        {
+            return type.Unit switch
+            {
+                "YEAR_MONTH" => IntervalType.YearMonth,
+                "DAY_TIME" => IntervalType.DayTime,
+                "MONTH_DAY_NANO" => IntervalType.MonthDayNanosecond,
+                _ => throw new NotSupportedException($"Interval type not supported: {type.Unit}")
+            };
+        }
+
         private static IArrowType ToTimestampArrowType(JsonArrowType type)
         {
             return type.Unit switch
@@ -428,6 +442,7 @@ private class ArrayCreator :
             IArrowTypeVisitor<Time32Type>,
             IArrowTypeVisitor<Time64Type>,
             IArrowTypeVisitor<DurationType>,
+            IArrowTypeVisitor<IntervalType>,
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<StringType>,
             IArrowTypeVisitor<BinaryType>,
@@ -484,6 +499,31 @@ public void Visit(BooleanType type)
             public void Visit(Time64Type type) => GenerateLongArray<long, Time64Array>((v, n, c, nc, o) => new Time64Array(type, v, n, c, nc, o), s => long.Parse(s));
             public void Visit(DurationType type) => GenerateLongArray<long, DurationArray>((v, n, c, nc, o) => new DurationArray(type, v, n, c, nc, o), s => long.Parse(s));
 
+            public void Visit(IntervalType type)
+            {
+                switch (type.Unit)
+                {
+                    case IntervalUnit.YearMonth:
+                        GenerateArray((v, n, c, nc, o) => new YearMonthIntervalArray(v, n, c, nc, o), e => new YearMonthInterval(e.GetInt32()));
+                        break;
+                    case IntervalUnit.DayTime:
+                        GenerateArray(
+                            (v, n, c, nc, o) => new DayTimeIntervalArray(v, n, c, nc, o),
+                            e => new DayTimeInterval(e.GetProperty("days").GetInt32(), e.GetProperty("milliseconds").GetInt32()));
+                        break;
+                    case IntervalUnit.MonthDayNanosecond:
+                        GenerateArray(
+                            (v, n, c, nc, o) => new MonthDayNanosecondIntervalArray(v, n, c, nc, o),
+                            e => new MonthDayNanosecondInterval(
+                                e.GetProperty("months").GetInt32(),
+                                e.GetProperty("days").GetInt32(),
+                                e.GetProperty("nanoseconds").GetInt64()));
+                        break;
+                    default:
+                        throw new InvalidOperationException($"unsupported interval unit <{type.Unit}>");
+                }
+            }
+
             public void Visit(Decimal128Type type)
             {
                 Array = new Decimal128Array(GetDecimalArrayData(type));
@@ -811,6 +851,25 @@ private void GenerateLongArray<T, TArray>(Func<ArrowBuffer, ArrowBuffer, int, in
                     JsonFieldData.Count, nullCount, 0);
             }
 
+            private void GenerateArray<T, TArray>(Func<ArrowBuffer, ArrowBuffer, int, int, int, TArray> createArray, Func<JsonElement, T> construct)
+                where TArray : PrimitiveArray<T>
+                where T : struct
+            {
+                ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
+
+                ArrowBuffer.Builder<T> valueBuilder = new ArrowBuffer.Builder<T>(JsonFieldData.Count);
+
+                foreach (JsonElement element in JsonFieldData.Data.EnumerateArray())
+                {
+                    valueBuilder.Append(construct(element));
+                }
+                ArrowBuffer valueBuffer = valueBuilder.Build();
+
+                Array = createArray(
+                    valueBuffer, validityBuffer,
+                    JsonFieldData.Count, nullCount, 0);
+            }
+
             private ArrowBuffer GetOffsetBuffer()
             {
                 ArrowBuffer.Builder<int> valueOffsets = new ArrowBuffer.Builder<int>(JsonFieldData.Offset.Length);
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
index 1d108d2123c10..137dc16d473a4 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
@@ -16,8 +16,7 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
-using System.Reflection;
-using Apache.Arrow.Memory;
+using Apache.Arrow.Scalars;
 using Apache.Arrow.Types;
 using Xunit;
 
@@ -97,6 +96,9 @@ private static IEnumerable<Tuple<List<IArrowArray>, IArrowArray>> GenerateTestDa
                         new Field.Builder().Name("key").DataType(StringType.Default).Nullable(false).Build(),
                         new Field.Builder().Name("value").DataType(Int32Type.Default).Nullable(true).Build(),
                         keySorted: false),
+                    IntervalType.YearMonth,
+                    IntervalType.DayTime,
+                    IntervalType.MonthDayNanosecond,
                 };
 
             foreach (IArrowType type in targetTypes)
@@ -126,6 +128,7 @@ private class TestDataGenerator :
             IArrowTypeVisitor<Date32Type>,
             IArrowTypeVisitor<Date64Type>,
             IArrowTypeVisitor<DurationType>,
+            IArrowTypeVisitor<IntervalType>,
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<ListType>,
             IArrowTypeVisitor<FixedSizeListType>,
@@ -256,7 +259,6 @@ public void Visit(TimestampType type)
             public void Visit(DurationType type)
             {
                 DurationArray.Builder resultBuilder = new DurationArray.Builder(type).Reserve(_baseDataTotalElementCount);
-                DateTimeOffset basis = DateTimeOffset.UtcNow;
 
                 for (int i = 0; i < _baseDataListCount; i++)
                 {
@@ -281,6 +283,63 @@ public void Visit(DurationType type)
                 ExpectedArray = resultBuilder.Build();
             }
 
+            public void Visit(IntervalType type)
+            {
+                switch (type.Unit)
+                {
+                    case IntervalUnit.YearMonth:
+                        YearMonthIntervalArray.Builder yearMonthBuilder = new YearMonthIntervalArray.Builder().Reserve(_baseDataTotalElementCount);
+                        foreach (List<int?> dataList in _baseData)
+                        {
+                            YearMonthIntervalArray.Builder yearMonthBuilder1 = new YearMonthIntervalArray.Builder().Reserve(dataList.Count);
+                            foreach (int? value in dataList)
+                            {
+                                YearMonthInterval? ymi = value != null ? new YearMonthInterval(value.Value) : null;
+                                yearMonthBuilder.Append(ymi);
+                                yearMonthBuilder1.Append(ymi);
+                            }
+                            TestTargetArrayList.Add(yearMonthBuilder1.Build());
+                        }
+                        ExpectedArray = yearMonthBuilder.Build();
+                        break;
+
+                    case IntervalUnit.DayTime:
+                        DayTimeIntervalArray.Builder dayTimeBuilder = new DayTimeIntervalArray.Builder().Reserve(_baseDataTotalElementCount);
+                        foreach (List<int?> dataList in _baseData)
+                        {
+                            DayTimeIntervalArray.Builder dayTimeBuilder1 = new DayTimeIntervalArray.Builder().Reserve(dataList.Count);
+                            foreach (int? value in dataList)
+                            {
+                                DayTimeInterval? dti = value != null ? new DayTimeInterval(100 - 50 * value.Value, 100 * value.Value) : null;
+                                dayTimeBuilder.Append(dti);
+                                dayTimeBuilder1.Append(dti);
+                            }
+                            TestTargetArrayList.Add(dayTimeBuilder1.Build());
+                        }
+                        ExpectedArray = dayTimeBuilder.Build();
+                        break;
+
+                    case IntervalUnit.MonthDayNanosecond:
+                        MonthDayNanosecondIntervalArray.Builder monthDayNanoBuilder = new MonthDayNanosecondIntervalArray.Builder().Reserve(_baseDataTotalElementCount);
+                        foreach (List<int?> dataList in _baseData)
+                        {
+                            MonthDayNanosecondIntervalArray.Builder monthDayNanoBuilder1 = new MonthDayNanosecondIntervalArray.Builder().Reserve(dataList.Count);
+                            foreach (int? value in dataList)
+                            {
+                                MonthDayNanosecondInterval? mdni = value != null ? new MonthDayNanosecondInterval(value.Value, 5 - value.Value, 100 * value.Value) : null;
+                                monthDayNanoBuilder.Append(mdni);
+                                monthDayNanoBuilder1.Append(mdni);
+                            }
+                            TestTargetArrayList.Add(monthDayNanoBuilder1.Build());
+                        }
+                        ExpectedArray = monthDayNanoBuilder.Build();
+                        break;
+
+                    default:
+                        throw new InvalidOperationException($"unsupported interval unit <{type.Unit}>");
+                }
+            }
+
             public void Visit(BinaryType type)
             {
                 BinaryArray.Builder resultBuilder = new BinaryArray.Builder().Reserve(_baseDataTotalElementCount);
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
index 75d62b25d7b88..2aaffe7835258 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
@@ -86,6 +86,9 @@ private class ArrayComparer :
             IArrowArrayVisitor<Time32Array>,
             IArrowArrayVisitor<Time64Array>,
             IArrowArrayVisitor<DurationArray>,
+            IArrowArrayVisitor<YearMonthIntervalArray>,
+            IArrowArrayVisitor<DayTimeIntervalArray>,
+            IArrowArrayVisitor<MonthDayNanosecondIntervalArray>,
             IArrowArrayVisitor<ListArray>,
             IArrowArrayVisitor<FixedSizeListArray>,
             IArrowArrayVisitor<StringArray>,
@@ -129,6 +132,9 @@ public ArrayComparer(IArrowArray expectedArray, bool strictCompare)
             public void Visit(Time32Array array) => CompareArrays(array);
             public void Visit(Time64Array array) => CompareArrays(array);
             public void Visit(DurationArray array) => CompareArrays(array);
+            public void Visit(YearMonthIntervalArray array) => CompareArrays(array);
+            public void Visit(DayTimeIntervalArray array) => CompareArrays(array);
+            public void Visit(MonthDayNanosecondIntervalArray array) => CompareArrays(array);
             public void Visit(ListArray array) => CompareArrays(array);
             public void Visit(FixedSizeListArray array) => CompareArrays(array);
             public void Visit(FixedSizeBinaryArray array) => CompareArrays(array);
@@ -276,7 +282,13 @@ private void CompareArrays<T>(PrimitiveArray<T> actualArray)
                 {
                     for (int i = 0; i < expectedArray.Length; i++)
                     {
-                        Assert.Equal(expectedArray.GetValue(i), actualArray.GetValue(i));
+                        T? expected = expectedArray.GetValue(i);
+                        T? actual = actualArray.GetValue(i);
+                        Assert.Equal(expected.HasValue, actual.HasValue);
+                        if (expected.HasValue)
+                        {
+                            Assert.Equal(expected.Value, actual.Value);
+                        }
                     }
                 }
             }
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index 4efa94e8c7363..a3b53a40db064 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -22,6 +22,7 @@
 using System.Threading.Tasks;
 using Apache.Arrow.C;
 using Apache.Arrow.Ipc;
+using Apache.Arrow.Scalars;
 using Apache.Arrow.Types;
 using Python.Runtime;
 using Xunit;
@@ -122,6 +123,8 @@ private static Schema GetTestSchema()
                     .Field(f => f.Name("duration_us").DataType(DurationType.Microsecond).Nullable(false))
                     .Field(f => f.Name("duration_ns").DataType(DurationType.Nanosecond).Nullable(true))
 
+                    .Field(f => f.Name("interval").DataType(IntervalType.FromIntervalUnit(IntervalUnit.MonthDayNanosecond)))
+
                     // Checking wider characters.
                     .Field(f => f.Name("hello 你好 😄").DataType(BooleanType.Default).Nullable(true))
 
@@ -192,6 +195,8 @@ private static IEnumerable<dynamic> GetPythonFields()
                 yield return pa.field("duration_us", pa.duration("us"), false);
                 yield return pa.field("duration_ns", pa.duration("ns"), true);
 
+                yield return pa.field("interval", pa.month_day_nano_interval());
+
                 yield return pa.field("hello 你好 😄", pa.bool_(), true);
             }
         }
@@ -531,8 +536,11 @@ public unsafe void ImportRecordBatch()
                             pa.array(List("one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten")),
                             pa.array(List(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))),
                         pa.array(List(1234, 2345, 3456, null, 6789), pa.duration("ms")),
+                        pa.array(
+                            List(Tuple(1, 2, 3), PyObject.None, Tuple(-1, -2, -3), Tuple(10, 0, 0), Tuple(0, 0, 20)),
+                            pa.month_day_nano_interval()),
                     }),
-                    new[] { "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10", "col11" });
+                    new[] { "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10", "col11", "col12" });
 
                 dynamic batch = table.to_batches()[0];
 
@@ -612,6 +620,14 @@ public unsafe void ImportRecordBatch()
 
             DurationArray col11 = (DurationArray)recordBatch.Column("col11");
             Assert.Equal(5, col11.Length);
+
+            MonthDayNanosecondIntervalArray col12 = (MonthDayNanosecondIntervalArray)recordBatch.Column("col12");
+            Assert.Equal(5, col12.Length);
+            Assert.Equal(new MonthDayNanosecondInterval(1, 2, 3), col12.GetValue(0));
+            Assert.Null(col12.GetValue(1));
+            Assert.Equal(new MonthDayNanosecondInterval(-1, -2, -3), col12.GetValue(2));
+            Assert.Equal(new MonthDayNanosecondInterval(10, 0, 0), col12.GetValue(3));
+            Assert.Equal(new MonthDayNanosecondInterval(0, 0, 20), col12.GetValue(4));
         }
 
         [SkippableFact]
@@ -838,6 +854,11 @@ private static PyObject List(params PyObject[] values)
             return new PyList(values);
         }
 
+        private static PyObject Tuple(params int?[] values)
+        {
+            return new PyTuple(values.Select(i => i == null ? PyObject.None : new PyInt(i.Value)).ToArray());
+        }
+
         sealed class TestArrayStream : IArrowArrayStream
         {
             private readonly RecordBatch[] _batches;
diff --git a/csharp/test/Apache.Arrow.Tests/IntervalScalarTests.cs b/csharp/test/Apache.Arrow.Tests/IntervalScalarTests.cs
new file mode 100644
index 0000000000000..52c403c51ffba
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/IntervalScalarTests.cs
@@ -0,0 +1,250 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Apache.Arrow.Scalars;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+    public class IntervalScalarTests
+    {
+        private static TimeSpan[] timeSpans = new[] { TimeSpan.FromHours(0), TimeSpan.FromHours(-1), TimeSpan.FromHours(1), TimeSpan.FromHours(11.5), TimeSpan.FromHours(-11.5) };
+
+        private static IEnumerable<object[]> ToDateTimeOffsets(IEnumerable<object[]> datetimes) =>
+            from dtdata in datetimes
+            from ts in timeSpans
+            select new object[] { new DateTimeOffset((DateTime)dtdata[0], ts), dtdata[1], new DateTimeOffset((DateTime)dtdata[2], ts) };
+
+#if NET6_0_OR_GREATER
+        private static IEnumerable<object[]> ToDateOnlys(IEnumerable<object[]> datetimes) =>
+            from dtdata in datetimes
+            select new object[] { DateOnly.FromDateTime((DateTime)dtdata[0]), dtdata[1], DateOnly.FromDateTime((DateTime)dtdata[2]) };
+#endif
+
+        public class YearMonth
+        {
+            public static IEnumerable<object[]> GetGoodDateTimeArithmeticData()
+            {
+                yield return new object[] { new DateTime(2020, 3, 4, 12, 14, 15), new YearMonthInterval(-3), new DateTime(2019, 12, 4, 12, 14, 15) };
+                yield return new object[] { new DateTime(2020, 1, 31), new YearMonthInterval(14), new DateTime(2021, 3, 31) };
+                yield return new object[] { new DateTime(2020, 1, 1, 23, 59, 59), new YearMonthInterval(-95), new DateTime(2012, 2, 1, 23, 59, 59) };
+            }
+
+            public static IEnumerable<object[]> GetBadDateTimeArithmeticData()
+            {
+                yield return new object[] { new DateTime(2020, 3, 4, 0, 0, 1), new YearMonthInterval(-2), new DateTime(2019, 1, 4, 0, 0, 0) };
+                yield return new object[] { new DateTime(2019, 12, 1), new YearMonthInterval(15), new DateTime(2021, 3, 2) };
+            }
+
+            public static IEnumerable<object[]> GetGoodDateTimeOffsetArithmeticData() => ToDateTimeOffsets(GetGoodDateTimeArithmeticData());
+            public static IEnumerable<object[]> GetBadDateTimeOffsetArithmeticData() => ToDateTimeOffsets(GetBadDateTimeArithmeticData());
+
+#if NET6_0_OR_GREATER
+            public static IEnumerable<object[]> GetGoodDateOnlyArithmeticData() => ToDateOnlys(GetGoodDateTimeArithmeticData());
+            public static IEnumerable<object[]> GetBadDateOnlyArithmeticData() => ToDateOnlys(GetBadDateTimeArithmeticData());
+#endif
+
+            [Fact]
+            public void Equality()
+            {
+                Assert.Equal(new YearMonthInterval(10), new YearMonthInterval(1, -2));
+                Assert.NotEqual(new YearMonthInterval(9), new YearMonthInterval(1, -2));
+            }
+
+            [Theory]
+            [MemberData(nameof(GetGoodDateTimeArithmeticData))]
+            public void CorrectDateTimeMath(DateTime a, YearMonthInterval b, DateTime c)
+            {
+                Assert.Equal(c, a + b);
+                Assert.Equal(c, b + a);
+                Assert.Equal(a, c - b);
+            }
+
+            [Theory]
+            [MemberData(nameof(GetBadDateTimeArithmeticData))]
+            public void IncorrectDateTimeMath(DateTime a, YearMonthInterval b, DateTime c)
+            {
+                Assert.NotEqual(c, a + b);
+                Assert.NotEqual(c, b + a);
+                Assert.NotEqual(a, c - b);
+            }
+
+            [Theory]
+            [MemberData(nameof(GetGoodDateTimeOffsetArithmeticData))]
+            public void CorrectDateTimeOffsetMath(DateTimeOffset a, YearMonthInterval b, DateTimeOffset c)
+            {
+                Assert.Equal(c, a + b);
+                Assert.Equal(c, b + a);
+                Assert.Equal(a, c - b);
+            }
+
+            [Theory]
+            [MemberData(nameof(GetBadDateTimeOffsetArithmeticData))]
+            public void IncorrectDateTimeOffsetMath(DateTimeOffset a, YearMonthInterval b, DateTimeOffset c)
+            {
+                Assert.NotEqual(c, a + b);
+                Assert.NotEqual(c, b + a);
+                Assert.NotEqual(a, c - b);
+            }
+
+#if NET6_0_OR_GREATER
+            [Theory]
+            [MemberData(nameof(GetGoodDateOnlyArithmeticData))]
+            public void CorrectDateOnlyMath(DateOnly a, YearMonthInterval b, DateOnly c)
+            {
+                Assert.Equal(c, a + b);
+                Assert.Equal(c, b + a);
+                Assert.Equal(a, c - b);
+            }
+
+            [Theory]
+            [MemberData(nameof(GetBadDateOnlyArithmeticData))]
+            public void IncorrectDateOnlyMath(DateOnly a, YearMonthInterval b, DateOnly c)
+            {
+                Assert.NotEqual(c, a + b);
+                Assert.NotEqual(c, b + a);
+                Assert.NotEqual(a, c - b);
+            }
+#endif
+        }
+
+        public class DayTime
+        {
+            public static IEnumerable<object[]> GetGoodDateTimeArithmeticData()
+            {
+                yield return new object[] { new DateTime(2020, 3, 4, 12, 14, 15), new DayTimeInterval(-3, 123456), new DateTime(2020, 3, 1, 12, 16, 18, 456) };
+                yield return new object[] { new DateTime(2020, 1, 31), new DayTimeInterval(14, 0), new DateTime(2020, 2, 14) };
+                yield return new object[] { new DateTime(2020, 1, 1, 23, 59, 59), new DayTimeInterval(300, -300000), new DateTime(2020, 10, 27, 23, 54, 59) };
+            }
+
+            public static IEnumerable<object[]> GetBadDateTimeArithmeticData()
+            {
+                yield return new object[] { new DateTime(2020, 3, 4, 0, 0, 1), new DayTimeInterval(-2, 0), new DateTime(2019, 1, 4, 0, 0, 0) };
+                yield return new object[] { new DateTime(2019, 12, 1), new DayTimeInterval(15, 0), new DateTime(2021, 3, 2) };
+            }
+
+            public static IEnumerable<object[]> GetGoodDateTimeOffsetArithmeticData() => ToDateTimeOffsets(GetGoodDateTimeArithmeticData());
+            public static IEnumerable<object[]> GetBadDateTimeOffsetArithmeticData() => ToDateTimeOffsets(GetBadDateTimeArithmeticData());
+
+            [Fact]
+            public void Equality()
+            {
+                Assert.Equal(new DayTimeInterval(10, -1), new DayTimeInterval(10, -1));
+                Assert.NotEqual(new DayTimeInterval(10, 0), new DayTimeInterval(9, 86400*1000));
+            }
+
+            [Theory]
+            [MemberData(nameof(GetGoodDateTimeArithmeticData))]
+            public void CorrectDateTimeMath(DateTime a, DayTimeInterval b, DateTime c)
+            {
+                Assert.Equal(c, a + b);
+                Assert.Equal(c, b + a);
+                Assert.Equal(a, c - b);
+            }
+
+            [Theory]
+            [MemberData(nameof(GetBadDateTimeArithmeticData))]
+            public void IncorrectDateTimeMath(DateTime a, DayTimeInterval b, DateTime c)
+            {
+                Assert.NotEqual(c, a + b);
+                Assert.NotEqual(c, b + a);
+                Assert.NotEqual(a, c - b);
+            }
+
+            [Theory]
+            [MemberData(nameof(GetGoodDateTimeOffsetArithmeticData))]
+            public void CorrectDateTimeOffsetMath(DateTimeOffset a, DayTimeInterval b, DateTimeOffset c)
+            {
+                Assert.Equal(c, a + b);
+                Assert.Equal(c, b + a);
+                Assert.Equal(a, c - b);
+            }
+
+            [Theory]
+            [MemberData(nameof(GetBadDateTimeOffsetArithmeticData))]
+            public void IncorrectDateTimeOffsetMath(DateTimeOffset a, DayTimeInterval b, DateTimeOffset c)
+            {
+                Assert.NotEqual(c, a + b);
+                Assert.NotEqual(c, b + a);
+                Assert.NotEqual(a, c - b);
+            }
+        }
+
+        public class MonthDayNanosecond
+        {
+            public static IEnumerable<object[]> GetGoodDateTimeArithmeticData()
+            {
+                yield return new object[] { new DateTime(2020, 3, 4, 12, 14, 15), new MonthDayNanosecondInterval(-3, 0, 100), new DateTime(2019, 12, 4, 12, 14, 15).AddTicks(1) };
+                yield return new object[] { new DateTime(2020, 1, 31), new MonthDayNanosecondInterval(14, -2, -200), new DateTime(2021, 3, 29).AddTicks(-2) };
+                yield return new object[] { new DateTime(2020, 1, 1, 23, 59, 59), new MonthDayNanosecondInterval(-95, 0, 0), new DateTime(2012, 2, 1, 23, 59, 59) };
+            }
+
+            public static IEnumerable<object[]> GetBadDateTimeArithmeticData()
+            {
+                yield return new object[] { new DateTime(2020, 3, 4, 0, 0, 1), new MonthDayNanosecondInterval(-2, 0, 0), new DateTime(2019, 1, 4, 0, 0, 0) };
+                yield return new object[] { new DateTime(2019, 12, 1), new MonthDayNanosecondInterval(15, 0, 0), new DateTime(2021, 3, 2) };
+            }
+
+            public static IEnumerable<object[]> GetGoodDateTimeOffsetArithmeticData() => ToDateTimeOffsets(GetGoodDateTimeArithmeticData());
+            public static IEnumerable<object[]> GetBadDateTimeOffsetArithmeticData() => ToDateTimeOffsets(GetBadDateTimeArithmeticData());
+
+            [Fact]
+            public void Equality()
+            {
+                Assert.Equal(new MonthDayNanosecondInterval(1, -2, 3), new MonthDayNanosecondInterval(1, -2, 3));
+                Assert.NotEqual(new MonthDayNanosecondInterval(1, -2, 3), new MonthDayNanosecondInterval(1, -2, 4));
+            }
+
+            [Theory]
+            [MemberData(nameof(GetGoodDateTimeArithmeticData))]
+            public void CorrectDateTimeMath(DateTime a, MonthDayNanosecondInterval b, DateTime c)
+            {
+                Assert.Equal(c, a + b);
+                Assert.Equal(c, b + a);
+                Assert.Equal(a, c - b);
+            }
+
+            [Theory]
+            [MemberData(nameof(GetBadDateTimeArithmeticData))]
+            public void IncorrectDateTimeMath(DateTime a, MonthDayNanosecondInterval b, DateTime c)
+            {
+                Assert.NotEqual(c, a + b);
+                Assert.NotEqual(c, b + a);
+                Assert.NotEqual(a, c - b);
+            }
+
+            [Theory]
+            [MemberData(nameof(GetGoodDateTimeOffsetArithmeticData))]
+            public void CorrectDateTimeOffsetMath(DateTimeOffset a, MonthDayNanosecondInterval b, DateTimeOffset c)
+            {
+                Assert.Equal(c, a + b);
+                Assert.Equal(c, b + a);
+                Assert.Equal(a, c - b);
+            }
+
+            [Theory]
+            [MemberData(nameof(GetBadDateTimeOffsetArithmeticData))]
+            public void IncorrectDateTimeOffsetMath(DateTimeOffset a, MonthDayNanosecondInterval b, DateTimeOffset c)
+            {
+                Assert.NotEqual(c, a + b);
+                Assert.NotEqual(c, b + a);
+                Assert.NotEqual(a, c - b);
+            }
+        }
+    }
+}
diff --git a/csharp/test/Apache.Arrow.Tests/TableTests.cs b/csharp/test/Apache.Arrow.Tests/TableTests.cs
index 9e23fa99a769c..d52b514e092d9 100644
--- a/csharp/test/Apache.Arrow.Tests/TableTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/TableTests.cs
@@ -62,7 +62,7 @@ public void TestTableFromRecordBatches()
 
             Table table1 = Table.TableFromRecordBatches(recordBatch1.Schema, recordBatches);
             Assert.Equal(20, table1.RowCount);
-            Assert.Equal(27, table1.ColumnCount);
+            Assert.Equal(30, table1.ColumnCount);
             Assert.Equal("ChunkedArray: Length=20, DataType=list", table1.Column(0).Data.ToString());
 
             FixedSizeBinaryType type = new FixedSizeBinaryType(17);
diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs
index 79e886f0deabb..b43321abd7499 100644
--- a/csharp/test/Apache.Arrow.Tests/TestData.cs
+++ b/csharp/test/Apache.Arrow.Tests/TestData.cs
@@ -14,6 +14,7 @@
 // limitations under the License.
 
 using Apache.Arrow.Arrays;
+using Apache.Arrow.Scalars;
 using Apache.Arrow.Types;
 using System;
 using System.Collections.Generic;
@@ -55,6 +56,9 @@ public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount
                 builder.Field(CreateField(new Decimal128Type(10, 6), i));
                 builder.Field(CreateField(new Decimal256Type(16, 8), i));
                 builder.Field(CreateField(new MapType(StringType.Default, Int32Type.Default), i));
+                builder.Field(CreateField(IntervalType.YearMonth, i));
+                builder.Field(CreateField(IntervalType.DayTime, i));
+                builder.Field(CreateField(IntervalType.MonthDayNanosecond, i));
 
                 if (createAdvancedTypeArrays)
                 {
@@ -135,6 +139,7 @@ private class ArrayCreator :
             IArrowTypeVisitor<DictionaryType>,
             IArrowTypeVisitor<FixedSizeBinaryType>,
             IArrowTypeVisitor<MapType>,
+            IArrowTypeVisitor<IntervalType>,
             IArrowTypeVisitor<NullType>
         {
             private int Length { get; }
@@ -442,6 +447,39 @@ public void Visit(MapType type)
                 Array = builder.Build();
             }
 
+            public void Visit(IntervalType type)
+            {
+                switch (type.Unit)
+                {
+                    case IntervalUnit.YearMonth:
+                        var yearMonthBuilder = new YearMonthIntervalArray.Builder().Reserve(Length);
+                        for (var i = 0; i < Length; i++)
+                        {
+                            yearMonthBuilder.Append(new YearMonthInterval(i));
+                        }
+                        Array = yearMonthBuilder.Build();
+                        break;
+                    case IntervalUnit.DayTime:
+                        var dayTimeBuilder = new DayTimeIntervalArray.Builder().Reserve(Length);
+                        for (var i = 0; i < Length; i++)
+                        {
+                            dayTimeBuilder.Append(new DayTimeInterval(100 - 50*i, 100 * i));
+                        }
+                        Array = dayTimeBuilder.Build();
+                        break;
+                    case IntervalUnit.MonthDayNanosecond:
+                        var monthDayNanoBuilder = new MonthDayNanosecondIntervalArray.Builder().Reserve(Length);
+                        for (var i = 0; i < Length; i++)
+                        {
+                            monthDayNanoBuilder.Append(new MonthDayNanosecondInterval(i, 5-i, 100*i));
+                        }
+                        Array = monthDayNanoBuilder.Build();
+                        break;
+                    default:
+                        throw new InvalidOperationException($"unsupported interval unit <{type.Unit}>");
+                }
+            }
+
             public void Visit(NullType type)
             {
                 Array = new NullArray(Length);
diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 341b48117ab80..eca1effff6fbe 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1804,11 +1804,9 @@ def _temp_path():
         generate_duration_case(),
 
         generate_interval_case()
-        .skip_tester('C#')
         .skip_tester('JS'),  # TODO(ARROW-5239): Intervals + JS
 
         generate_month_day_nano_interval_case()
-        .skip_tester('C#')
         .skip_tester('JS'),
 
         generate_map_case(),
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 140e15f44cbca..b8ee7eedbf284 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -56,7 +56,7 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Duration          | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Interval          | ✓     | ✓     | ✓     |            |       |  ✓    | ✓     |       |
+| Interval          | ✓     | ✓     | ✓     |            |  ✓    |  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Fixed Size Binary | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+

From 7415ce6204d23fb7f9cd87d9ff41a1881d47f998 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sun, 10 Dec 2023 08:42:39 -0800
Subject: [PATCH 021/570] GH-31579 [C#]: Remove out-of-support versions of .NET
 and update C# README (#39165)

### What changes are included in this PR?

The project file no longer builds targets for netstandard1.3 or netcoreapp3.1.
The C# README.md is a little more up-to-date.

### Are these changes tested?

N/A

* Closes: #31579

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/README.md                            | 28 ++++++++++++++-------
 csharp/src/Apache.Arrow/Apache.Arrow.csproj |  2 +-
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/csharp/README.md b/csharp/README.md
index 649e8a722574c..6e6ed9c756873 100644
--- a/csharp/README.md
+++ b/csharp/README.md
@@ -26,9 +26,9 @@ for currently available features.
 
 # Implementation
 
-- Arrow 0.11 (specification)
-- C# 8
-- .NET Standard 1.3
+- Arrow specification 1.0.0. (Support for reading 0.11+.)
+- C# 11
+- .NET Standard 2.0 and .NET 6.0
 - Asynchronous I/O
 - Uses modern .NET runtime features such as **Span&lt;T&gt;**, **Memory&lt;T&gt;**, **MemoryManager&lt;T&gt;**, and **System.Buffers** primitives for memory allocation, memory storage, and fast serialization.
 - Uses **Acyclic Visitor Pattern** for array types and arrays to facilitate serialization, record batch traversal, and format growth.
@@ -95,6 +95,10 @@ for currently available features.
 - Binary (fixed-length)
 - List
 - Struct
+- Union
+- Map
+- Duration
+- Interval
 
 ### Type Metadata
 
@@ -121,15 +125,21 @@ for currently available features.
 
 - Serialization
     - Exhaustive validation
-    - Dictionary Batch
-        - Cannot serialize files or streams containing dictionary batches
-    - Dictionary Encoding
+    - Run End Encoding
 - Types
     - Tensor
 - Arrays
-    - Union
-        - Dense
-        - Sparse
+    - Large Arrays
+        - Large Binary
+        - Large List
+        - Large String
+    - Views
+        - Binary
+        - List
+        - String
+        - Large Binary
+        - Large List
+        - Large String
 - Array Operations
 	- Equality / Comparison
 	- Casting
diff --git a/csharp/src/Apache.Arrow/Apache.Arrow.csproj b/csharp/src/Apache.Arrow/Apache.Arrow.csproj
index 62d5858fadeb2..3a229f4ffcaf8 100644
--- a/csharp/src/Apache.Arrow/Apache.Arrow.csproj
+++ b/csharp/src/Apache.Arrow/Apache.Arrow.csproj
@@ -1,7 +1,7 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFrameworks>netstandard1.3;netstandard2.0;netcoreapp3.1;net6.0</TargetFrameworks>
+    <TargetFrameworks>netstandard2.0;net6.0</TargetFrameworks>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <DefineConstants>$(DefineConstants);UNSAFE_BYTEBUFFER;BYTEBUFFER_NO_BOUNDS_CHECK;ENABLE_SPAN_T</DefineConstants>
     

From c92de88bd96f9a6d250498e7e2024dddc5a7d6a6 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sun, 10 Dec 2023 09:59:15 -0800
Subject: [PATCH 022/570] GH-34636: [C#] Reduce allocations when using
 ArrayPool (#39166)

### Rationale for this change

GH-34636 is a great suggestion for simplifying the code and making it more efficient by changing the delegate-based RentReturn pattern to a "using"-based one. As most of the affected call sites were the ones not passing CancellationToken properly, it was a good time to fix that as well.

### Are these changes tested?

This is basically a refactoring which doesn't add new functionality and so is covered by existing tests.

Closes #39144
* Closes: #34636

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Extensions/ArrayPoolExtensions.cs         | 40 ++++++----------
 .../Ipc/ArrowFileReaderImplementation.cs      | 46 +++++++++----------
 .../src/Apache.Arrow/Ipc/ArrowFileWriter.cs   |  8 ++--
 .../Ipc/ArrowStreamReaderImplementation.cs    | 40 ++++++++--------
 .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs |  8 ++--
 5 files changed, 65 insertions(+), 77 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Extensions/ArrayPoolExtensions.cs b/csharp/src/Apache.Arrow/Extensions/ArrayPoolExtensions.cs
index 95a39439f7b20..51287674b2e70 100644
--- a/csharp/src/Apache.Arrow/Extensions/ArrayPoolExtensions.cs
+++ b/csharp/src/Apache.Arrow/Extensions/ArrayPoolExtensions.cs
@@ -16,46 +16,36 @@
 using System;
 using System.Buffers;
 using System.Runtime.CompilerServices;
-using System.Threading.Tasks;
 
 namespace Apache.Arrow
 {
     internal static class ArrayPoolExtensions
     {
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static void RentReturn(this ArrayPool<byte> pool, int length, Action<Memory<byte>> action)
+        public static ArrayLease RentReturn(this ArrayPool<byte> pool, int length, out Memory<byte> buffer)
         {
-            byte[] array = null;
-
-            try
-            {
-                array = pool.Rent(length);
-                action(array.AsMemory(0, length));
-            }
-            finally
-            {
-                if (array != null)
-                {
-                    pool.Return(array);
-                }
-            }
+            byte[] array = pool.Rent(length);
+            buffer = array.AsMemory(0, length);
+            return new ArrayLease(pool, array);
         }
 
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static async ValueTask RentReturnAsync(this ArrayPool<byte> pool, int length, Func<Memory<byte>, ValueTask> action)
+        internal struct ArrayLease : IDisposable
         {
-            byte[] array = null;
+            private readonly ArrayPool<byte> _pool;
+            private byte[] _array;
 
-            try
+            public ArrayLease(ArrayPool<byte> pool, byte[] array)
             {
-                array = pool.Rent(length);
-                await action(array.AsMemory(0, length));
+                _pool = pool;
+                _array = array;
             }
-            finally
+
+            public void Dispose()
             {
-                if (array != null)
+                if (_array != null)
                 {
-                    pool.Return(array);
+                    _pool.Return(_array);
+                    _array = null;
                 }
             }
         }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
index 3ae475885f16a..02f36b079349b 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
@@ -42,47 +42,47 @@ public ArrowFileReaderImplementation(Stream stream, MemoryAllocator allocator, I
         {
         }
 
-        public async ValueTask<int> RecordBatchCountAsync()
+        public async ValueTask<int> RecordBatchCountAsync(CancellationToken cancellationToken = default)
         {
             if (!HasReadSchema)
             {
-                await ReadSchemaAsync().ConfigureAwait(false);
+                await ReadSchemaAsync(cancellationToken).ConfigureAwait(false);
             }
 
             return _footer.RecordBatchCount;
         }
 
-        protected override async ValueTask ReadSchemaAsync()
+        protected override async ValueTask ReadSchemaAsync(CancellationToken cancellationToken = default)
         {
             if (HasReadSchema)
             {
                 return;
             }
 
-            await ValidateFileAsync().ConfigureAwait(false);
+            await ValidateFileAsync(cancellationToken).ConfigureAwait(false);
 
             int footerLength = 0;
-            await ArrayPool<byte>.Shared.RentReturnAsync(4, async (buffer) =>
+            using (ArrayPool<byte>.Shared.RentReturn(4, out Memory<byte> buffer))
             {
                 BaseStream.Position = GetFooterLengthPosition();
 
-                int bytesRead = await BaseStream.ReadFullBufferAsync(buffer).ConfigureAwait(false);
+                int bytesRead = await BaseStream.ReadFullBufferAsync(buffer, cancellationToken).ConfigureAwait(false);
                 EnsureFullRead(buffer, bytesRead);
 
                 footerLength = ReadFooterLength(buffer);
-            }).ConfigureAwait(false);
+            }
 
-            await ArrayPool<byte>.Shared.RentReturnAsync(footerLength, async (buffer) =>
+            using (ArrayPool<byte>.Shared.RentReturn(footerLength, out Memory<byte> buffer))
             {
                 long footerStartPosition = GetFooterLengthPosition() - footerLength;
 
                 BaseStream.Position = footerStartPosition;
 
-                int bytesRead = await BaseStream.ReadFullBufferAsync(buffer).ConfigureAwait(false);
+                int bytesRead = await BaseStream.ReadFullBufferAsync(buffer, cancellationToken).ConfigureAwait(false);
                 EnsureFullRead(buffer, bytesRead);
 
                 ReadSchema(buffer);
-            }).ConfigureAwait(false);
+            }
         }
 
         protected override void ReadSchema()
@@ -95,7 +95,7 @@ protected override void ReadSchema()
             ValidateFile();
 
             int footerLength = 0;
-            ArrayPool<byte>.Shared.RentReturn(4, (buffer) =>
+            using (ArrayPool<byte>.Shared.RentReturn(4, out Memory<byte> buffer))
             {
                 BaseStream.Position = GetFooterLengthPosition();
 
@@ -103,9 +103,9 @@ protected override void ReadSchema()
                 EnsureFullRead(buffer, bytesRead);
 
                 footerLength = ReadFooterLength(buffer);
-            });
+            }
 
-            ArrayPool<byte>.Shared.RentReturn(footerLength, (buffer) =>
+            using (ArrayPool<byte>.Shared.RentReturn(footerLength, out Memory<byte> buffer))
             {
                 long footerStartPosition = GetFooterLengthPosition() - footerLength;
 
@@ -115,7 +115,7 @@ protected override void ReadSchema()
                 EnsureFullRead(buffer, bytesRead);
 
                 ReadSchema(buffer);
-            });
+            }
         }
 
         private long GetFooterLengthPosition()
@@ -239,14 +239,14 @@ private void ReadDictionaries()
         /// <summary>
         /// Check if file format is valid. If it's valid don't run the validation again.
         /// </summary>
-        private async ValueTask ValidateFileAsync()
+        private async ValueTask ValidateFileAsync(CancellationToken cancellationToken = default)
         {
             if (IsFileValid)
             {
                 return;
             }
 
-            await ValidateMagicAsync().ConfigureAwait(false);
+            await ValidateMagicAsync(cancellationToken).ConfigureAwait(false);
 
             IsFileValid = true;
         }
@@ -266,20 +266,20 @@ private void ValidateFile()
             IsFileValid = true;
         }
 
-        private async ValueTask ValidateMagicAsync()
+        private async ValueTask ValidateMagicAsync(CancellationToken cancellationToken = default)
         {
             long startingPosition = BaseStream.Position;
             int magicLength = ArrowFileConstants.Magic.Length;
 
             try
             {
-                await ArrayPool<byte>.Shared.RentReturnAsync(magicLength, async (buffer) =>
+                using (ArrayPool<byte>.Shared.RentReturn(magicLength, out Memory<byte> buffer))
                 {
                     // Seek to the beginning of the stream
                     BaseStream.Position = 0;
 
                     // Read beginning of stream
-                    await BaseStream.ReadAsync(buffer).ConfigureAwait(false);
+                    await BaseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false);
 
                     VerifyMagic(buffer);
 
@@ -287,10 +287,10 @@ await ArrayPool<byte>.Shared.RentReturnAsync(magicLength, async (buffer) =>
                     BaseStream.Position = BaseStream.Length - magicLength;
 
                     // Read the end of the stream
-                    await BaseStream.ReadAsync(buffer).ConfigureAwait(false);
+                    await BaseStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false);
 
                     VerifyMagic(buffer);
-                }).ConfigureAwait(false);
+                }
             }
             finally
             {
@@ -305,7 +305,7 @@ private void ValidateMagic()
 
             try
             {
-                ArrayPool<byte>.Shared.RentReturn(magicLength, buffer =>
+                using (ArrayPool<byte>.Shared.RentReturn(magicLength, out Memory<byte> buffer))
                 {
                     // Seek to the beginning of the stream
                     BaseStream.Position = 0;
@@ -322,7 +322,7 @@ private void ValidateMagic()
                     BaseStream.Read(buffer);
 
                     VerifyMagic(buffer);
-                });
+                }
             }
             finally
             {
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
index 95b9f60fffe0f..547fa800ec71e 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
@@ -215,7 +215,7 @@ private void WriteFooter(Schema schema)
 
             // Write footer length
 
-            Buffers.RentReturn(4, (buffer) =>
+            using (Buffers.RentReturn(4, out Memory<byte> buffer))
             {
                 int footerLength;
                 checked
@@ -226,7 +226,7 @@ private void WriteFooter(Schema schema)
                 BinaryPrimitives.WriteInt32LittleEndian(buffer.Span, footerLength);
 
                 BaseStream.Write(buffer);
-            });
+            }
 
             // Write magic
 
@@ -286,7 +286,7 @@ private async Task WriteFooterAsync(Schema schema, CancellationToken cancellatio
 
             cancellationToken.ThrowIfCancellationRequested();
 
-            await Buffers.RentReturnAsync(4, async (buffer) =>
+            using (Buffers.RentReturn(4, out Memory<byte> buffer))
             {
                 int footerLength;
                 checked
@@ -297,7 +297,7 @@ await Buffers.RentReturnAsync(4, async (buffer) =>
                 BinaryPrimitives.WriteInt32LittleEndian(buffer.Span, footerLength);
 
                 await BaseStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false);
-            }).ConfigureAwait(false);
+            }
 
             // Write magic
 
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
index 184e0348e5e07..5428c88c27bbc 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
@@ -78,7 +78,7 @@ protected async ValueTask<ReadResult> ReadMessageAsync(CancellationToken cancell
             }
 
             RecordBatch result = null;
-            await ArrayPool<byte>.Shared.RentReturnAsync(messageLength, async (messageBuff) =>
+            using (ArrayPool<byte>.Shared.RentReturn(messageLength, out Memory<byte> messageBuff))
             {
                 int bytesRead = await BaseStream.ReadFullBufferAsync(messageBuff, cancellationToken)
                     .ConfigureAwait(false);
@@ -96,7 +96,7 @@ await ArrayPool<byte>.Shared.RentReturnAsync(messageLength, async (messageBuff)
 
                 Google.FlatBuffers.ByteBuffer bodybb = CreateByteBuffer(bodyBuff);
                 result = CreateArrowObjectFromMessage(message, bodybb, bodyBuffOwner);
-            }).ConfigureAwait(false);
+            }
 
             return new ReadResult(messageLength, result);
         }
@@ -125,7 +125,7 @@ protected ReadResult ReadMessage()
             }
 
             RecordBatch result = null;
-            ArrayPool<byte>.Shared.RentReturn(messageLength, messageBuff =>
+            using (ArrayPool<byte>.Shared.RentReturn(messageLength, out Memory<byte> messageBuff))
             {
                 int bytesRead = BaseStream.ReadFullBuffer(messageBuff);
                 EnsureFullRead(messageBuff, bytesRead);
@@ -141,12 +141,12 @@ protected ReadResult ReadMessage()
 
                 Google.FlatBuffers.ByteBuffer bodybb = CreateByteBuffer(bodyBuff);
                 result = CreateArrowObjectFromMessage(message, bodybb, bodyBuffOwner);
-            });
+            }
 
             return new ReadResult(messageLength, result);
         }
 
-        protected virtual async ValueTask ReadSchemaAsync()
+        protected virtual async ValueTask ReadSchemaAsync(CancellationToken cancellationToken = default)
         {
             if (HasReadSchema)
             {
@@ -154,18 +154,18 @@ protected virtual async ValueTask ReadSchemaAsync()
             }
 
             // Figure out length of schema
-            int schemaMessageLength = await ReadMessageLengthAsync(throwOnFullRead: true)
+            int schemaMessageLength = await ReadMessageLengthAsync(throwOnFullRead: true, cancellationToken)
                 .ConfigureAwait(false);
 
-            await ArrayPool<byte>.Shared.RentReturnAsync(schemaMessageLength, async (buff) =>
+            using (ArrayPool<byte>.Shared.RentReturn(schemaMessageLength, out Memory<byte> buff))
             {
                 // Read in schema
-                int bytesRead = await BaseStream.ReadFullBufferAsync(buff).ConfigureAwait(false);
+                int bytesRead = await BaseStream.ReadFullBufferAsync(buff, cancellationToken).ConfigureAwait(false);
                 EnsureFullRead(buff, bytesRead);
 
                 Google.FlatBuffers.ByteBuffer schemabb = CreateByteBuffer(buff);
                 Schema = MessageSerializer.GetSchema(ReadMessage<Flatbuf.Schema>(schemabb), ref _dictionaryMemo);
-            }).ConfigureAwait(false);
+            }
         }
 
         protected virtual void ReadSchema()
@@ -178,20 +178,20 @@ protected virtual void ReadSchema()
             // Figure out length of schema
             int schemaMessageLength = ReadMessageLength(throwOnFullRead: true);
 
-            ArrayPool<byte>.Shared.RentReturn(schemaMessageLength, buff =>
+            using (ArrayPool<byte>.Shared.RentReturn(schemaMessageLength, out Memory<byte> buff))
             {
                 int bytesRead = BaseStream.ReadFullBuffer(buff);
                 EnsureFullRead(buff, bytesRead);
 
                 Google.FlatBuffers.ByteBuffer schemabb = CreateByteBuffer(buff);
                 Schema = MessageSerializer.GetSchema(ReadMessage<Flatbuf.Schema>(schemabb), ref _dictionaryMemo);
-            });
+            }
         }
 
         private async ValueTask<int> ReadMessageLengthAsync(bool throwOnFullRead, CancellationToken cancellationToken = default)
         {
             int messageLength = 0;
-            await ArrayPool<byte>.Shared.RentReturnAsync(4, async (lengthBuffer) =>
+            using (ArrayPool<byte>.Shared.RentReturn(4, out Memory<byte> lengthBuffer))
             {
                 int bytesRead = await BaseStream.ReadFullBufferAsync(lengthBuffer, cancellationToken)
                     .ConfigureAwait(false);
@@ -201,7 +201,7 @@ await ArrayPool<byte>.Shared.RentReturnAsync(4, async (lengthBuffer) =>
                 }
                 else if (bytesRead != 4)
                 {
-                    return;
+                    return 0;
                 }
 
                 messageLength = BitUtility.ReadInt32(lengthBuffer);
@@ -217,13 +217,12 @@ await ArrayPool<byte>.Shared.RentReturnAsync(4, async (lengthBuffer) =>
                     }
                     else if (bytesRead != 4)
                     {
-                        messageLength = 0;
-                        return;
+                        return 0;
                     }
 
                     messageLength = BitUtility.ReadInt32(lengthBuffer);
                 }
-            }).ConfigureAwait(false);
+            };
 
             return messageLength;
         }
@@ -231,7 +230,7 @@ await ArrayPool<byte>.Shared.RentReturnAsync(4, async (lengthBuffer) =>
         private int ReadMessageLength(bool throwOnFullRead)
         {
             int messageLength = 0;
-            ArrayPool<byte>.Shared.RentReturn(4, lengthBuffer =>
+            using (ArrayPool<byte>.Shared.RentReturn(4, out Memory<byte> lengthBuffer))
             {
                 int bytesRead = BaseStream.ReadFullBuffer(lengthBuffer);
                 if (throwOnFullRead)
@@ -240,7 +239,7 @@ private int ReadMessageLength(bool throwOnFullRead)
                 }
                 else if (bytesRead != 4)
                 {
-                    return;
+                    return 0;
                 }
 
                 messageLength = BitUtility.ReadInt32(lengthBuffer);
@@ -255,13 +254,12 @@ private int ReadMessageLength(bool throwOnFullRead)
                     }
                     else if (bytesRead != 4)
                     {
-                        messageLength = 0;
-                        return;
+                        return 0;
                     }
 
                     messageLength = BitUtility.ReadInt32(lengthBuffer);
                 }
-            });
+            }
 
             return messageLength;
         }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 483dcea898fbe..5f490019b2133 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -890,7 +890,7 @@ private protected async ValueTask WriteFlatBufferAsync(CancellationToken cancell
 
         private void WriteIpcMessageLength(int length)
         {
-            Buffers.RentReturn(_options.SizeOfIpcLength, (buffer) =>
+            using (Buffers.RentReturn(_options.SizeOfIpcLength, out Memory<byte> buffer))
             {
                 Memory<byte> currentBufferPosition = buffer;
                 if (!_options.WriteLegacyIpcFormat)
@@ -902,12 +902,12 @@ private void WriteIpcMessageLength(int length)
 
                 BinaryPrimitives.WriteInt32LittleEndian(currentBufferPosition.Span, length);
                 BaseStream.Write(buffer);
-            });
+            }
         }
 
         private async ValueTask WriteIpcMessageLengthAsync(int length, CancellationToken cancellationToken)
         {
-            await Buffers.RentReturnAsync(_options.SizeOfIpcLength, async (buffer) =>
+            using (Buffers.RentReturn(_options.SizeOfIpcLength, out Memory<byte> buffer))
             {
                 Memory<byte> currentBufferPosition = buffer;
                 if (!_options.WriteLegacyIpcFormat)
@@ -919,7 +919,7 @@ await Buffers.RentReturnAsync(_options.SizeOfIpcLength, async (buffer) =>
 
                 BinaryPrimitives.WriteInt32LittleEndian(currentBufferPosition.Span, length);
                 await BaseStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false);
-            }).ConfigureAwait(false);
+            }
         }
 
         protected int CalculatePadding(long offset, int alignment = 8)

From c87faf82a410a71751fdf85391c0dca20088b7d6 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sun, 10 Dec 2023 18:01:35 -0800
Subject: [PATCH 023/570] GH-36588: [C#] Support blank column names and enable
 more integration tests. (#39167)

### What changes are included in this PR?

Allows field names to be blank (but not null).
Enables schema metadata to be read from JSON integration tests.
Enables integration tests for cases that are now working.

### Are these changes tested?

Yes.

* Closes: #36588

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/src/Apache.Arrow/Field.Builder.cs      |  3 +-
 csharp/src/Apache.Arrow/Field.cs              | 11 ++---
 .../src/Apache.Arrow/Ipc/MessageSerializer.cs |  9 ++--
 csharp/src/Apache.Arrow/Types/MapType.cs      | 48 +++++++++++++++++--
 .../Apache.Arrow.IntegrationTest/JsonFile.cs  |  6 +++
 .../CDataInterfacePythonTests.cs              | 44 +++++++++++++++++
 dev/archery/archery/integration/datagen.py    |  6 +--
 7 files changed, 105 insertions(+), 22 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Field.Builder.cs b/csharp/src/Apache.Arrow/Field.Builder.cs
index 1e7aa192e08b4..f7f33383e2adc 100644
--- a/csharp/src/Apache.Arrow/Field.Builder.cs
+++ b/csharp/src/Apache.Arrow/Field.Builder.cs
@@ -30,14 +30,13 @@ public class Builder
 
             public Builder()
             {
-                _name = string.Empty;
                 _type = NullType.Default;
                 _nullable = true;
             }
 
             public Builder Name(string value)
             {
-                if (string.IsNullOrWhiteSpace(value))
+                if (value == null)
                 {
                     throw new ArgumentNullException(nameof(value));
                 }
diff --git a/csharp/src/Apache.Arrow/Field.cs b/csharp/src/Apache.Arrow/Field.cs
index 4fddd1bc4e2de..ac3cafac93e59 100644
--- a/csharp/src/Apache.Arrow/Field.cs
+++ b/csharp/src/Apache.Arrow/Field.cs
@@ -35,24 +35,23 @@ public partial class Field
 
         public Field(string name, IArrowType dataType, bool nullable,
             IEnumerable<KeyValuePair<string, string>> metadata = default)
-            : this(name, dataType, nullable, false)
+            : this(name, dataType, nullable)
         {
             Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value);
-
         }
 
         internal Field(string name, IArrowType dataType, bool nullable,
-            IReadOnlyDictionary<string, string> metadata, bool copyCollections, bool allowBlankName)
-            : this(name, dataType, nullable, allowBlankName)
+            IReadOnlyDictionary<string, string> metadata, bool copyCollections)
+            : this(name, dataType, nullable)
         {
             Debug.Assert(copyCollections == false, "This internal constructor is to not copy the collections.");
 
             Metadata = metadata;
         }
 
-        private Field(string name, IArrowType dataType, bool nullable, bool allowBlankName)
+        private Field(string name, IArrowType dataType, bool nullable)
         {
-            if (name == null || (!allowBlankName && string.IsNullOrWhiteSpace(name)))
+            if (name == null)
             {
                 throw new ArgumentNullException(nameof(name));
             }
diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
index 3f504cf3b975a..633554fc53261 100644
--- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
+++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
@@ -59,7 +59,7 @@ internal static Schema GetSchema(Flatbuf.Schema schema, ref DictionaryMemo dicti
             for (int i = 0; i < schema.FieldsLength; i++)
             {
                 Flatbuf.Field field = schema.Fields(i).GetValueOrDefault();
-                fields.Add(FieldFromFlatbuffer(field, ref dictionaryMemo, allowBlankName: false));
+                fields.Add(FieldFromFlatbuffer(field, ref dictionaryMemo));
             }
 
             Dictionary<string, string> metadata = schema.CustomMetadataLength > 0 ? new Dictionary<string, string>() : null;
@@ -73,14 +73,13 @@ internal static Schema GetSchema(Flatbuf.Schema schema, ref DictionaryMemo dicti
             return new Schema(fields, metadata, copyCollections: false);
         }
 
-        private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField, ref DictionaryMemo dictionaryMemo, bool allowBlankName)
+        private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField, ref DictionaryMemo dictionaryMemo)
         {
-            bool allowBlankNameChild = flatbufField.ChildrenLength == 1 && flatbufField.TypeType == Flatbuf.Type.FixedSizeList;
             Field[] childFields = flatbufField.ChildrenLength > 0 ? new Field[flatbufField.ChildrenLength] : null;
             for (int i = 0; i < flatbufField.ChildrenLength; i++)
             {
                 Flatbuf.Field? childFlatbufField = flatbufField.Children(i);
-                childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value, ref dictionaryMemo, allowBlankNameChild);
+                childFields[i] = FieldFromFlatbuffer(childFlatbufField.Value, ref dictionaryMemo);
             }
 
             Flatbuf.DictionaryEncoding? dictionaryEncoding = flatbufField.Dictionary;
@@ -104,7 +103,7 @@ private static Field FieldFromFlatbuffer(Flatbuf.Field flatbufField, ref Diction
                 metadata[keyValue.Key] = keyValue.Value;
             }
 
-            var arrowField = new Field(flatbufField.Name, type, flatbufField.Nullable, metadata, copyCollections: false, allowBlankName);
+            var arrowField = new Field(flatbufField.Name, type, flatbufField.Nullable, metadata, copyCollections: false);
 
             if (dictionaryEncoding.HasValue)
             {
diff --git a/csharp/src/Apache.Arrow/Types/MapType.cs b/csharp/src/Apache.Arrow/Types/MapType.cs
index 73112c815bfbf..47e0be48f9f78 100644
--- a/csharp/src/Apache.Arrow/Types/MapType.cs
+++ b/csharp/src/Apache.Arrow/Types/MapType.cs
@@ -13,12 +13,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+using System;
 using System.Collections.Generic;
 
 namespace Apache.Arrow.Types
 {
     public sealed class MapType : NestedType // MapType = ListType(StructType("key", "value")) 
     {
+        private const string EntriesKey = "entries";
+        private const string KeyKey = "key";
+        private const string ValueKey = "value";
+
         public override ArrowTypeId TypeId => ArrowTypeId.Map;
         public override string Name => "map";
         public readonly bool KeySorted;
@@ -28,20 +33,23 @@ public sealed class MapType : NestedType // MapType = ListType(StructType("key",
         public Field ValueField => KeyValueType.Fields[1];
 
         public MapType(IArrowType key, IArrowType value, bool nullable = true, bool keySorted = false)
-            : this(new Field("key", key, false), new Field("value", value, nullable), keySorted)
+            : base(Entries(key, value, nullable))
         {
+            KeySorted = keySorted;
         }
 
         public MapType(Field key, Field value, bool keySorted = false)
-            : this(new StructType(new List<Field>() { key, value }), keySorted)
+            : base(Entries(key, value))
         {
+            KeySorted = keySorted;
         }
 
-        public MapType(StructType entries, bool keySorted = false) : this(new Field("entries", entries, false), keySorted)
+        public MapType(StructType entries, bool keySorted = false) : base(Entries(entries))
         {
+            KeySorted = keySorted;
         }
 
-        public MapType(Field entries, bool keySorted = false) : base(entries)
+        public MapType(Field entries, bool keySorted = false) : base(Entries(entries))
         {
             KeySorted = keySorted;
         }
@@ -54,5 +62,37 @@ public MapType UnsortedKey()
 
             return new MapType(Fields[0], keySorted: false);
         }
+
+        private static Field Entries(IArrowType key, IArrowType value, bool nullable) =>
+            new Field(EntriesKey, NewStruct(new Field(KeyKey, key, false), new Field(ValueKey, value, nullable)), false);
+
+        private static Field Entries(Field key, Field value) =>
+            new Field(EntriesKey, NewStruct(NamedField(KeyKey, key), NamedField(ValueKey, value)), false);
+
+        private static Field Entries(StructType entries)
+        {
+            return new Field(EntriesKey, Struct(entries), false);
+        }
+
+        private static StructType NewStruct(Field key, Field value) => new StructType(new[] { key, value });
+
+        private static StructType Struct(StructType entries)
+        {
+            Field key = NamedField(KeyKey, entries.Fields[0]);
+            Field value = NamedField(ValueKey, entries.Fields[1]);
+            return object.ReferenceEquals(key, entries.Fields[0]) && object.ReferenceEquals(value, entries.Fields[1]) ? entries : NewStruct(key, value);
+        }
+
+        private static Field Entries(Field entries)
+        {
+            StructType structType = (StructType)entries.DataType;
+            StructType adjustedStruct = Struct(structType);
+            return StringComparer.Ordinal.Equals(entries.Name, EntriesKey) && object.ReferenceEquals(structType, adjustedStruct) ? entries : new Field(EntriesKey, adjustedStruct, false);
+        }
+
+        private static Field NamedField(string name, Field field)
+        {
+            return StringComparer.Ordinal.Equals(name, field.Name) ? field : new Field(name, field.DataType, field.IsNullable, field.Metadata);
+        }
     }
 }
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
index 51bcf6dd7583e..f3fe73588a7bb 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
@@ -120,6 +120,12 @@ private static Schema CreateSchema(JsonSchema jsonSchema, Dictionary<DictionaryT
             {
                 builder.Field(f => CreateField(f, jsonSchema.Fields[i], dictionaryIndexes));
             }
+
+            if (jsonSchema.Metadata != null)
+            {
+                builder.Metadata(jsonSchema.Metadata);
+            }
+
             return builder.Build();
         }
 
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index a3b53a40db064..83902d8d93c70 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -829,6 +829,50 @@ public unsafe void ExportArrayStream()
             CArrowArrayStream.Free(cArrayStream);
         }
 
+        [SkippableFact]
+        public unsafe void ImportRecordBatchFromBuffer()
+        {
+            using (Py.GIL())
+            {
+                dynamic pa = Py.Import("pyarrow");
+                dynamic batch = pa.record_batch(
+                    new PyList(new PyObject[]
+                    {
+                        pa.array(new long?[] { 1, 2, 3, null, 5 }),
+                        pa.array(new[] { "hello", "world", null, "foo", "bar" }),
+                        pa.array(new[] { 0.0, 1.4, 2.5, 3.6, 4.7 }),
+                        pa.array(new bool?[] { true, false, null, false, true }),
+                    }),
+                    new[] { null, "", "column", "column" });
+
+                dynamic sink = pa.BufferOutputStream();
+                dynamic writer = pa.ipc.new_stream(sink, batch.schema);
+                writer.write_batch(batch);
+                ((IDisposable)writer).Dispose();
+
+                dynamic buf = sink.getvalue();
+                // buf.address, buf.size
+
+                IntPtr address = (IntPtr)(long)buf.address;
+                int size = buf.size;
+                byte[] buffer = new byte[size];
+                byte* ptr = (byte*)address.ToPointer();
+                for (int i = 0; i < size; i++)
+                {
+                    buffer[i] = ptr[i];
+                }
+
+                using (ArrowStreamReader reader = new ArrowStreamReader(new ReadOnlyMemory<byte>(buffer)))
+                {
+                    RecordBatch batch2 = reader.ReadNextRecordBatch();
+                    Assert.Equal("None", batch2.Schema.FieldsList[0].Name);
+                    Assert.Equal("", batch2.Schema.FieldsList[1].Name);
+                    Assert.Equal("column", batch2.Schema.FieldsList[2].Name);
+                    Assert.Equal("column", batch2.Schema.FieldsList[3].Name);
+                }
+            }
+        }
+
         private static PyObject List(params int?[] values)
         {
             return new PyList(values.Select(i => i == null ? PyObject.None : new PyInt(i.Value)).ToArray());
diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index eca1effff6fbe..29b203ae130c6 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1812,7 +1812,6 @@ def _temp_path():
         generate_map_case(),
 
         generate_non_canonical_map_case()
-        .skip_tester('C#')
         .skip_tester('Java')  # TODO(ARROW-8715)
         # Canonical map names are restored on import, so the schemas are unequal
         .skip_format(SKIP_C_SCHEMA, 'C++'),
@@ -1827,11 +1826,9 @@ def _temp_path():
 
         generate_unions_case(),
 
-        generate_custom_metadata_case()
-        .skip_tester('C#'),
+        generate_custom_metadata_case(),
 
         generate_duplicate_fieldnames_case()
-        .skip_tester('C#')
         .skip_tester('JS'),
 
         generate_dictionary_case(),
@@ -1856,7 +1853,6 @@ def _temp_path():
         .skip_tester('Rust'),
 
         generate_extension_case()
-        .skip_tester('C#')
         # TODO: ensure the extension is registered in the C++ entrypoint
         .skip_format(SKIP_C_SCHEMA, 'C++')
         .skip_format(SKIP_C_ARRAY, 'C++'),

From 7fc6a9761f2e808f2572648c90ce0d834a9ceec2 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 11 Dec 2023 15:18:12 +0900
Subject: [PATCH 024/570] GH-39130: [CI][GLib][Windows] Use old Ruby as
 workaround for load error (#39168)

### Rationale for this change

I don't know why the following error is happen:

    Failed to load shared library 'libarrow-glib-1500.dll' referenced
    by the typelib: 'libarrow-glib-1500.dll': The specified procedure
    could not be found.

I think that some symbols are missing but I can't identify them because:

* Windows doesn't report them...
* I can't reproduce this on my local environment.

I found that this isn't happen with a bit old Ruby. So we can use a bit old Ruby as workaround for now.

### What changes are included in this PR?

* Use Ruby 3.1
* Don't update existing packages

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39130

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/ruby.yml | 11 +++--------
 ci/scripts/msys2_setup.sh  | 13 ++++++++++---
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 300d49742b713..25edec62e06eb 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -196,7 +196,9 @@ jobs:
         mingw-n-bits:
           - 64
         ruby-version:
-          - ruby
+          # TODO: Use the latest Ruby again when we fix GH-39130.
+          # - ruby
+          - "3.1"
     env:
       ARROW_BUILD_STATIC: OFF
       ARROW_BUILD_TESTS: OFF
@@ -246,13 +248,6 @@ jobs:
         uses: ruby/setup-ruby@v1
         with:
           ruby-version: ${{ matrix.ruby-version }}
-      - name: Upgrade MSYS2
-        run: |
-          ridk exec bash ci\scripts\msys2_system_upgrade.sh
-          taskkill /F /FI "MODULES eq msys-2.0.dll"
-      - name: Clean MSYS2
-        run: |
-          ridk exec bash ci\scripts\msys2_system_clean.sh
       - name: Setup MSYS2
         run: |
           ridk exec bash ci\scripts\msys2_setup.sh ruby
diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh
index 60c77499b9ab4..8bd36328ec607 100755
--- a/ci/scripts/msys2_setup.sh
+++ b/ci/scripts/msys2_setup.sh
@@ -43,7 +43,6 @@ case "${target}" in
     packages+=(${MINGW_PACKAGE_PREFIX}-lz4)
     packages+=(${MINGW_PACKAGE_PREFIX}-ninja)
     packages+=(${MINGW_PACKAGE_PREFIX}-nlohmann-json)
-    packages+=(${MINGW_PACKAGE_PREFIX}-openssl)
     packages+=(${MINGW_PACKAGE_PREFIX}-protobuf)
     packages+=(${MINGW_PACKAGE_PREFIX}-rapidjson)
     packages+=(${MINGW_PACKAGE_PREFIX}-re2)
@@ -52,8 +51,17 @@ case "${target}" in
     packages+=(${MINGW_PACKAGE_PREFIX}-thrift)
     packages+=(${MINGW_PACKAGE_PREFIX}-xsimd)
     packages+=(${MINGW_PACKAGE_PREFIX}-uriparser)
-    packages+=(${MINGW_PACKAGE_PREFIX}-zlib)
     packages+=(${MINGW_PACKAGE_PREFIX}-zstd)
+
+    if [ "${target}" != "ruby" ]; then
+      # We don't update the exiting packages for Ruby because
+      # RubyInstaller for Windows bundles some DLLs such as libffi,
+      # OpenSSL and zlib separately. They should be ABI compatible
+      # with packages installed by MSYS2. If we specify packages
+      # explicitly here, the existing packages may be updated.
+      packages+=(${MINGW_PACKAGE_PREFIX}-openssl)
+      packages+=(${MINGW_PACKAGE_PREFIX}-zlib)
+    fi
   ;;
 esac
 
@@ -78,7 +86,6 @@ esac
 pacman \
   --needed \
   --noconfirm \
-  --refresh \
   --sync \
   "${packages[@]}"
 

From 92e56ba8906f40996cc81bc09fca10c4d53b32fa Mon Sep 17 00:00:00 2001
From: Josh Soref <2119212+jsoref@users.noreply.github.com>
Date: Mon, 11 Dec 2023 02:27:39 -0500
Subject: [PATCH 025/570] GH-39031: [Docs] Remove misspelled rule from contrib
 css (#39032)

### Rationale for this change

The css rule had a typo which means that it hasn't worked.

### What changes are included in this PR?

Fixes the css rule

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #39031

Authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 docs/source/_static/theme_overrides.css | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css
index 58f4554d11c16..f7f25269a6613 100644
--- a/docs/source/_static/theme_overrides.css
+++ b/docs/source/_static/theme_overrides.css
@@ -61,12 +61,6 @@
   border: none;
 }
 
-.contib-card .sd-card-footer p.sd-card-text{
-  max-width: 220px;
-  margin-left: auto;
-  margin-right: auto;
-}
-
 /* This is the bootstrap CSS style for "table-striped". Since the theme does
 not yet provide an easy way to configure this globally, it easier to simply
 include this snippet here than updating each table in all rst files to

From 595b37ceba52e74cda72a7073ab31fe637b16ee2 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Mon, 11 Dec 2023 11:55:46 -0300
Subject: [PATCH 026/570] GH-39159 [C++]: Try to make Buffer::device_type_
 non-optional (#39150)

### Rationale for this change

Buffer should always have a device type. When unspecified, CPU can be assumed.

### What changes are included in this PR?

A change of the member variable type and some adjustments.

### Are these changes tested?

N/A.

### Are there any user-facing changes?

**This PR includes breaking changes to public APIs.**

`Buffer::device_type_` is now a `DeviceAllocationType` instead of a `std::optional<DeviceAllocationType>`.
* Closes: #39159

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/buffer.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 7cc2d2c9cc8c4..ae76550be26fc 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -70,7 +70,7 @@ class ARROW_EXPORT Buffer {
 
   Buffer(const uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm,
          std::shared_ptr<Buffer> parent = NULLPTR,
-         std::optional<DeviceAllocationType> device_type = std::nullopt)
+         std::optional<DeviceAllocationType> device_type_override = std::nullopt)
       : is_mutable_(false),
         data_(data),
         size_(size),
@@ -78,11 +78,11 @@ class ARROW_EXPORT Buffer {
         parent_(std::move(parent)) {
     // SetMemoryManager will also set device_type_
     SetMemoryManager(std::move(mm));
-    // if a device type is specified, use that instead. for example:
-    // CUDA_HOST. The CudaMemoryManager will set device_type_ to CUDA,
-    // but you can specify CUDA_HOST as the device type to override it.
-    if (device_type != std::nullopt) {
-      device_type_ = device_type;
+    // If a device type is specified, use that instead. Example of when this can be
+    // useful: the CudaMemoryManager can set device_type_ to kCUDA, but you can specify
+    // device_type_override=kCUDA_HOST as the device type to override it.
+    if (device_type_override != std::nullopt) {
+      device_type_ = *device_type_override;
     }
   }
 
@@ -296,7 +296,7 @@ class ARROW_EXPORT Buffer {
 
   const std::shared_ptr<MemoryManager>& memory_manager() const { return memory_manager_; }
 
-  std::optional<DeviceAllocationType> device_type() const { return device_type_; }
+  DeviceAllocationType device_type() const { return device_type_; }
 
   std::shared_ptr<Buffer> parent() const { return parent_; }
 
@@ -354,7 +354,7 @@ class ARROW_EXPORT Buffer {
   const uint8_t* data_;
   int64_t size_;
   int64_t capacity_;
-  std::optional<DeviceAllocationType> device_type_;
+  DeviceAllocationType device_type_;
 
   // null by default, but may be set
   std::shared_ptr<Buffer> parent_;

From dff3068156444e5fc8c407b0555a829c11af0ae0 Mon Sep 17 00:00:00 2001
From: John <thespica@qq.com>
Date: Tue, 12 Dec 2023 01:00:24 +0800
Subject: [PATCH 027/570] MINOR: [Docs] Fix broken links to documents of
 contributing (#39161)

### Rationale for this change

Now, the link of CONTRIBUTING.md is broken:
```
Please read our [development documentation](https://arrow.apache.org/docs/developers/contributing.html)
```

### What changes are included in this PR?

Replace the original link by current link to docs about  development.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.

Authored-by: John <thespica@qq.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 .github/workflows/dev_pr/title_check.md | 2 +-
 CONTRIBUTING.md                         | 2 +-
 r/cheatsheet/README.md                  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/dev_pr/title_check.md b/.github/workflows/dev_pr/title_check.md
index 55fc188f773dd..c810d7477947c 100644
--- a/.github/workflows/dev_pr/title_check.md
+++ b/.github/workflows/dev_pr/title_check.md
@@ -38,4 +38,4 @@ In the case of PARQUET issues on JIRA the title also supports:
 See also:
 
   * [Other pull requests](https://github.com/apache/arrow/pulls/)
-  * [Contribution Guidelines - How to contribute patches](https://arrow.apache.org/docs/developers/contributing.html#how-to-contribute-patches)
+  * [Contribution Guidelines - Contributing Overview](https://arrow.apache.org/docs/developers/overview.html)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 5ad7ca248625a..03ce556add2bd 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -67,5 +67,5 @@ You can also ask on the mailing-list, see above.
 
 ## Further information
 
-Please read our [development documentation](https://arrow.apache.org/docs/developers/contributing.html)
+Please read our [development documentation](https://arrow.apache.org/docs/developers/index.html)
 or look through the [New Contributor's Guide](https://arrow.apache.org/docs/developers/guide/index.html).
diff --git a/r/cheatsheet/README.md b/r/cheatsheet/README.md
index f47375631e5ef..3befbec802902 100644
--- a/r/cheatsheet/README.md
+++ b/r/cheatsheet/README.md
@@ -22,4 +22,4 @@
 * The `powerpoint/arrow-cheatsheet.pptx` file was built in and exported from Google Slides
 * The arrow cheatsheet leverages some design elements from the official [RStudio cheatsheet template](https://github.com/rstudio/cheatsheets) ([CC-BY-4.0](https://github.com/rstudio/cheatsheets/blob/main/LICENSE))
 * The official Apache Arrow hex sticker is sourced from <https://arrow.apache.org/visual_identity/>
-* To suggest changes or update the cheatsheet, please download and edit the `arrow-cheatsheet.pptx` file, and provide the revised `.pptx` and rendered PDF via a Pull Request to the repository (see [Pull request and review guidelines for help](https://arrow.apache.org/docs/developers/contributing.html))
+* To suggest changes or update the cheatsheet, please download and edit the `arrow-cheatsheet.pptx` file, and provide the revised `.pptx` and rendered PDF via a Pull Request to the repository (see [Pull request and review guidelines for help](https://arrow.apache.org/docs/developers/overview.html#pull-request-and-review))

From e502728d174e3b9170abe2ec4db6845c2e8eab01 Mon Sep 17 00:00:00 2001
From: Jin Shang <shangjin1997@gmail.com>
Date: Tue, 12 Dec 2023 01:01:15 +0800
Subject: [PATCH 028/570] GH-34890: [C++][Python] Add a no-op kernel for
 dictionary_encode(dictionary) (#38349)

Added a no-op kernel for convenience as discussed in the issue.
* Closes: #34890

Lead-authored-by: Jin Shang <shangjin1997@gmail.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/kernels/vector_hash.cc      | 13 ++++++++-----
 cpp/src/arrow/compute/kernels/vector_hash_test.cc |  9 +++++++++
 docs/source/cpp/compute.rst                       |  3 ++-
 python/pyarrow/tests/test_compute.py              |  1 +
 4 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc
index 5426dc405429c..65e59d1a2eb14 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash.cc
@@ -718,8 +718,9 @@ const DictionaryEncodeOptions* GetDefaultDictionaryEncodeOptions() {
 
 const FunctionDoc dictionary_encode_doc(
     "Dictionary-encode array",
-    ("Return a dictionary-encoded version of the input array."), {"array"},
-    "DictionaryEncodeOptions");
+    ("Return a dictionary-encoded version of the input array.\n"
+     "This function does nothing if the input is already a dictionary array."),
+    {"array"}, "DictionaryEncodeOptions");
 
 // ----------------------------------------------------------------------
 // This function does not use any hashing utilities
@@ -803,9 +804,11 @@ void RegisterVectorHash(FunctionRegistry* registry) {
       GetDefaultDictionaryEncodeOptions());
   AddHashKernels<DictEncodeAction>(dict_encode.get(), base, DictEncodeOutput);
 
-  // Calling dictionary_encode on dictionary input not supported, but if it
-  // ends up being needed (or convenience), a kernel could be added to make it
-  // a no-op
+  auto no_op = [](KernelContext*, const ExecSpan& span, ExecResult* out) {
+    out->value = span[0].array.ToArrayData();
+    return Status::OK();
+  };
+  DCHECK_OK(dict_encode->AddKernel({Type::DICTIONARY}, OutputType(FirstType), no_op));
 
   DCHECK_OK(registry->AddFunction(std::move(dict_encode)));
 }
diff --git a/cpp/src/arrow/compute/kernels/vector_hash_test.cc b/cpp/src/arrow/compute/kernels/vector_hash_test.cc
index 7b713362f6feb..c4ec74fbaabca 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash_test.cc
@@ -687,6 +687,15 @@ TEST_F(TestHashKernel, DictEncodeIntervalMonth) {
       {0, 0, 1, 0, 2});
 }
 
+TEST_F(TestHashKernel, DictEncodeDictInput) {
+  // Dictionary encode a dictionary is a no-op
+  auto dict_ty = dictionary(int32(), utf8());
+  auto dict = ArrayFromJSON(utf8(), R"(["a", "b", "c"])");
+  auto indices = ArrayFromJSON(int32(), "[0, 1, 2, 0, 1, 2, 0, 1, 2]");
+  auto input = std::make_shared<DictionaryArray>(dict_ty, indices, dict);
+  CheckDictEncode(input, dict, indices);
+}
+
 TEST_F(TestHashKernel, DictionaryUniqueAndValueCounts) {
   auto dict_json = "[10, 20, 30, 40]";
   auto dict = ArrayFromJSON(int64(), dict_json);
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 47af9764150e5..17d003b261dca 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -1675,7 +1675,8 @@ Associative transforms
 |                   |       | Temporal, Binary- and String-like |             |       |
 +-------------------+-------+-----------------------------------+-------------+-------+
 
-* \(1) Output is ``Dictionary(Int32, input type)``.
+* \(1) Output is ``Dictionary(Int32, input type)``. It is a no-op if input is
+  already a Dictionary array.
 
 * \(2) Duplicates are removed from the output while the original order is
   maintained.
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 067d96a82113f..7c5a134d330ac 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -1781,6 +1781,7 @@ def test_dictionary_decode():
 
     assert array == dictionary_array_decode
     assert array == pc.dictionary_decode(array)
+    assert pc.dictionary_encode(dictionary_array) == dictionary_array
 
 
 def test_cast():

From 7ba022863c75a7d04f16d21b65a66250ac74dbfa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?JB=20Onofr=C3=A9?= <jbonofre@apache.org>
Date: Mon, 11 Dec 2023 18:57:11 +0100
Subject: [PATCH 029/570] GH-39170: [Java] Improve error message explaining why
 TestTls might fail (#39171)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

### What changes are included in this PR?

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #39170

Authored-by: JB Onofré <jbonofre@apache.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../test/java/org/apache/arrow/flight/FlightTestUtil.java  | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
index 25d59d99ad7cd..64f70856a3b05 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
@@ -19,6 +19,7 @@
 
 import java.io.File;
 import java.lang.reflect.InvocationTargetException;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.List;
@@ -47,7 +48,11 @@ static Path exampleTlsRootCert() {
 
   static List<CertKeyPair> exampleTlsCerts() {
     final Path root = getFlightTestDataRoot();
-    return Arrays.asList(new CertKeyPair(root.resolve("cert0.pem").toFile(), root.resolve("cert0.pkcs1").toFile()),
+    final Path cert0Pem = root.resolve("cert0.pem");
+    if (!Files.exists(cert0Pem)) {
+      throw new RuntimeException(cert0Pem + " doesn't exist. Make sure submodules are initialized (see https://arrow.apache.org/docs/dev/developers/java/building.html#building)");
+    }
+    return Arrays.asList(new CertKeyPair(cert0Pem.toFile(), root.resolve("cert0.pkcs1").toFile()),
         new CertKeyPair(root.resolve("cert1.pem").toFile(), root.resolve("cert1.pkcs1").toFile()));
   }
 

From a118ffb70ef97683d3a33a43e412ee5ed47bbe7a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 05:10:43 +0900
Subject: [PATCH 030/570] MINOR: [C#] Bump xunit.runner.visualstudio from 2.5.4
 to 2.5.5 in /csharp (#39173)

Bumps [xunit.runner.visualstudio](https://github.com/xunit/visualstudio.xunit) from 2.5.4 to 2.5.5.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/28b46ff510a2846342ea1756a32c2a14d17b9938"><code>28b46ff</code></a> v2.5.5</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/409be7a45f67ce0578c912340c3d5320c7246b62"><code>409be7a</code></a> Add PackageId to solve VS issues (see <a href="https://redirect.github.com/dotnet/sdk/issues/22">dotnet/sdk#22</a>...</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/56db5de6cece6ca1341664ceece0a1e186dceca8"><code>56db5de</code></a> Latest dependencies (and use ExecutionSink instead of delegating sinks)</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/1f419e62a61cbbfbcf06b7dd4d0022dafe7299f5"><code>1f419e6</code></a> Update README</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/f052a1dfa46cee7faf8d5af54bec69aaaececb41"><code>f052a1d</code></a> Bump up to v2.5.5-pre</li>
<li>See full diff in <a href="https://github.com/xunit/visualstudio.xunit/compare/2.5.4...2.5.5">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit.runner.visualstudio&package-manager=nuget&previous-version=2.5.4&new-version=2.5.5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../Apache.Arrow.Compression.Tests.csproj                       | 2 +-
 .../Apache.Arrow.Flight.Sql.Tests.csproj                        | 2 +-
 .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj  | 2 +-
 csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index c222dc0bca08b..c6ab44bdb3827 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -9,7 +9,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
     <PackageReference Include="xunit" Version="2.6.2" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.4" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 0de93b470a201..59a9742f35d70 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -8,7 +8,7 @@
     <ItemGroup>
       <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
       <PackageReference Include="xunit" Version="2.6.2" />
-      <PackageReference Include="xunit.runner.visualstudio" Version="2.5.4" />
+      <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5" />
       <PackageReference Include="coverlet.collector" Version="6.0.0" />
     </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index c227abbed4c5d..a50ccae9d779a 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -8,7 +8,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
     <PackageReference Include="xunit" Version="2.6.2" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.4" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5" />
     <PackageReference Include="coverlet.collector" Version="6.0.0" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index 5b36e369b1961..2f135cd3cb722 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -16,7 +16,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
     <PackageReference Include="xunit" Version="2.6.2" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.4">
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
     </PackageReference>

From adc4214c3eb8fbfccdb7c53c23231412c4e84e63 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 05:11:15 +0900
Subject: [PATCH 031/570] MINOR: [CI] Bump actions/setup-go from 4 to 5
 (#39175)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [actions/setup-go](https://github.com/actions/setup-go) from 4 to 5.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/actions/setup-go/releases">actions/setup-go's releases</a>.</em></p>
<blockquote>
<h2>v5.0.0</h2>
<h2>What's Changed</h2>
<p>In scope of this release, we change Nodejs runtime from node16 to node20 (<a href="https://redirect.github.com/actions/setup-go/pull/421">actions/setup-go#421</a>). Moreover, we update some dependencies to the latest versions (<a href="https://redirect.github.com/actions/setup-go/pull/445">actions/setup-go#445</a>).</p>
<p>Besides, this release contains such changes as:</p>
<ul>
<li>Fix hosted tool cache usage on windows by <a href="https://github.com/galargh"><code>@​galargh</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/411">actions/setup-go#411</a></li>
<li>Improve documentation regarding dependencies caching by <a href="https://github.com/artemgavrilov"><code>@​artemgavrilov</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/417">actions/setup-go#417</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/galargh"><code>@​galargh</code></a> made their first contribution in <a href="https://redirect.github.com/actions/setup-go/pull/411">actions/setup-go#411</a></li>
<li><a href="https://github.com/artemgavrilov"><code>@​artemgavrilov</code></a> made their first contribution in <a href="https://redirect.github.com/actions/setup-go/pull/417">actions/setup-go#417</a></li>
<li><a href="https://github.com/chenrui333"><code>@​chenrui333</code></a> made their first contribution in <a href="https://redirect.github.com/actions/setup-go/pull/421">actions/setup-go#421</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/setup-go/compare/v4...v5.0.0">https://github.com/actions/setup-go/compare/v4...v5.0.0</a></p>
<h2>v4.1.0</h2>
<h2>What's Changed</h2>
<p>In scope of this release, slow installation on Windows was fixed by <a href="https://github.com/dsame"><code>@​dsame</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/393">actions/setup-go#393</a> and OS version was added to <code>primaryKey</code> for Ubuntu runners to avoid conflicts (<a href="https://redirect.github.com/actions/setup-go/pull/383">actions/setup-go#383</a>)</p>
<p>This release also includes the following changes:</p>
<ul>
<li>Remove implicit dependencies by <a href="https://github.com/nikolai-laevskii"><code>@​nikolai-laevskii</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/378">actions/setup-go#378</a></li>
<li>Update action.yml by <a href="https://github.com/mkelly"><code>@​mkelly</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/379">actions/setup-go#379</a></li>
<li>Added a description that go-version should be specified as a string type by <a href="https://github.com/n3xem"><code>@​n3xem</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/367">actions/setup-go#367</a></li>
<li>Add note about YAML parsing versions by <a href="https://github.com/dmitry-shibanov"><code>@​dmitry-shibanov</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/382">actions/setup-go#382</a></li>
<li>Automatic update of configuration files from 05/23/2023 by <a href="https://github.com/github-actions"><code>@​github-actions</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/377">actions/setup-go#377</a></li>
<li>Bump tough-cookie and <code>@​azure/ms-rest-js</code> by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/392">actions/setup-go#392</a></li>
<li>Bump word-wrap from 1.2.3 to 1.2.4 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/397">actions/setup-go#397</a></li>
<li>Bump semver from 6.3.0 to 6.3.1 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/396">actions/setup-go#396</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/mkelly"><code>@​mkelly</code></a> made their first contribution in <a href="https://redirect.github.com/actions/setup-go/pull/379">actions/setup-go#379</a></li>
<li><a href="https://github.com/n3xem"><code>@​n3xem</code></a> made their first contribution in <a href="https://redirect.github.com/actions/setup-go/pull/367">actions/setup-go#367</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/setup-go/compare/v4...v4.1.0">https://github.com/actions/setup-go/compare/v4...v4.1.0</a></p>
<h2>v4.0.1</h2>
<h2>What's Changed</h2>
<ul>
<li>Update documentation for <code>v4</code> by <a href="https://github.com/dsame"><code>@​dsame</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/354">actions/setup-go#354</a></li>
<li>Fix glob bug in the package.json scripts section by <a href="https://github.com/IvanZosimov"><code>@​IvanZosimov</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/359">actions/setup-go#359</a></li>
<li>Bump <code>xml2js</code> dependency by <a href="https://github.com/dmitry-shibanov"><code>@​dmitry-shibanov</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/370">actions/setup-go#370</a></li>
<li>Bump <code>@ actions/cache</code> dependency to v3.2.1 by <a href="https://github.com/nikolai-laevskii"><code>@​nikolai-laevskii</code></a> in <a href="https://redirect.github.com/actions/setup-go/pull/374">actions/setup-go#374</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/nikolai-laevskii"><code>@​nikolai-laevskii</code></a> made their first contribution in <a href="https://redirect.github.com/actions/setup-go/pull/374">actions/setup-go#374</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/setup-go/compare/v4...v4.0.1">https://github.com/actions/setup-go/compare/v4...v4.0.1</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/actions/setup-go/commit/0c52d547c9bc32b1aa3301fd7a9cb496313a4491"><code>0c52d54</code></a> Update dependencies for node20 (<a href="https://redirect.github.com/actions/setup-go/issues/445">#445</a>)</li>
<li><a href="https://github.com/actions/setup-go/commit/bfd2fb341f32be7281829126376a12a780ca79fc"><code>bfd2fb3</code></a> Merge pull request <a href="https://redirect.github.com/actions/setup-go/issues/421">#421</a> from chenrui333/node20-runtime</li>
<li><a href="https://github.com/actions/setup-go/commit/3d65fa57fcbfe4a359b6b71a6c65e6eec12984eb"><code>3d65fa5</code></a> feat: bump to use actions/checkout@ v4</li>
<li><a href="https://github.com/actions/setup-go/commit/8a505c9cf2e2726eda7f3268d6992e386a12da52"><code>8a505c9</code></a> feat: bump to use node20 runtime</li>
<li><a href="https://github.com/actions/setup-go/commit/883490dfd06f396ebe0b738bc313a53cf9d851e5"><code>883490d</code></a> Merge pull request <a href="https://redirect.github.com/actions/setup-go/issues/417">#417</a> from artemgavrilov/main</li>
<li><a href="https://github.com/actions/setup-go/commit/d45ebba0ce181dc5604aaf69ce5a0bdcbd3b1807"><code>d45ebba</code></a> Rephrase sentence</li>
<li><a href="https://github.com/actions/setup-go/commit/317c6617fa9e4e67f1e5e20ad8bc98bf298a0f8f"><code>317c661</code></a> Replace <code>wildcards</code> term with <code>globs</code>.</li>
<li><a href="https://github.com/actions/setup-go/commit/f90673ad641a19d0689fba58b5c79adc54be5d81"><code>f90673a</code></a> Merge pull request <a href="https://redirect.github.com/actions/setup-go/issues/1">#1</a> from artemgavrilov/caching-docs-improvement</li>
<li><a href="https://github.com/actions/setup-go/commit/801823434715e45aa48743a38182d33b33675d02"><code>8018234</code></a> Improve documentation regarding dependencies cachin</li>
<li><a href="https://github.com/actions/setup-go/commit/d085b4fe57b6e17262cbebc67b4d2d341d8938c2"><code>d085b4f</code></a> Merge pull request <a href="https://redirect.github.com/actions/setup-go/issues/411">#411</a> from galargh/fix/windows-hostedtoolcache</li>
<li>Additional commits viewable in <a href="https://github.com/actions/setup-go/compare/v4...v5">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/setup-go&package-manager=github_actions&previous-version=4&new-version=5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/go.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index ded6985f9ed5c..cd44e65e8811b 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -106,7 +106,7 @@ jobs:
           github.event_name == 'push' &&
           github.repository == 'apache/arrow' &&
           github.ref_name == 'main'
-        uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
+        uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
         with:
           go-version: ${{ matrix.go }}
           cache: true
@@ -139,7 +139,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Install Go
-        uses: actions/setup-go@v4
+        uses: actions/setup-go@v5
         with:
           go-version: 1.19
           cache: true
@@ -245,7 +245,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Install go
-        uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
+        uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
         with:
           go-version: ${{ matrix.go }}
           cache: true
@@ -278,7 +278,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Install go
-        uses: actions/setup-go@93397bea11091df50f3d7e59dc26a7711a8bcfbe # v4.1.0
+        uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
         with:
           go-version: ${{ matrix.go }}
           cache: true
@@ -338,7 +338,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Install go
-        uses: actions/setup-go@v4
+        uses: actions/setup-go@v5
         with:
           go-version: ${{ matrix.go }}
           cache: true
@@ -404,7 +404,7 @@ jobs:
           echo "CGO_LDFLAGS=-g -O2 -L$(cygpath --windows ${MINGW_PREFIX}/lib) -L$(cygpath --windows ${MINGW_PREFIX}/bin)" >> $GITHUB_ENV
           echo "MINGW_PREFIX=$(cygpath --windows ${MINGW_PREFIX})" >> $GITHUB_ENV
       - name: Install go
-        uses: actions/setup-go@v4
+        uses: actions/setup-go@v5
         with:
           go-version: '1.19'
           cache: true

From c94de888d14227b25499ca7a3fe9a9dafa6714bc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 05:17:26 +0900
Subject: [PATCH 032/570] MINOR: [Java] Bump commons-cli:commons-cli from 1.4
 to 1.6.0 in /java (#39177)

Bumps commons-cli:commons-cli from 1.4 to 1.6.0.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=commons-cli:commons-cli&package-manager=maven&previous-version=1.4&new-version=1.6.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/flight/flight-integration-tests/pom.xml | 2 +-
 java/flight/flight-sql/pom.xml               | 2 +-
 java/tools/pom.xml                           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml
index 778cb7df11f61..bb4f6a6b18733 100644
--- a/java/flight/flight-integration-tests/pom.xml
+++ b/java/flight/flight-integration-tests/pom.xml
@@ -48,7 +48,7 @@
         <dependency>
             <groupId>commons-cli</groupId>
             <artifactId>commons-cli</artifactId>
-            <version>1.4</version>
+            <version>1.6.0</version>
         </dependency>
         <dependency>
             <groupId>org.slf4j</groupId>
diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml
index 25478e58d2986..3ddc1758ae8ef 100644
--- a/java/flight/flight-sql/pom.xml
+++ b/java/flight/flight-sql/pom.xml
@@ -105,7 +105,7 @@
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.4</version>
+      <version>1.6.0</version>
       <optional>true</optional>
     </dependency>
   </dependencies>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 8ea98a84b4ad1..8df436bac9aef 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -42,7 +42,7 @@
         <dependency>
             <groupId>commons-cli</groupId>
             <artifactId>commons-cli</artifactId>
-            <version>1.4</version>
+            <version>1.6.0</version>
         </dependency>
         <dependency>
           <groupId>ch.qos.logback</groupId>

From 836ce955db790457ae0b95abc2fc0e813b85ee46 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 05:18:22 +0900
Subject: [PATCH 033/570] MINOR: [Java] Bump
 org.apache.commons:commons-compress from 1.21 to 1.25.0 in /java (#39179)

Bumps org.apache.commons:commons-compress from 1.21 to 1.25.0.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.commons:commons-compress&package-manager=maven&previous-version=1.21&new-version=1.25.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/compression/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index 129268b9e6a07..9a9f029fee137 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -38,7 +38,7 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-compress</artifactId>
-      <version>1.21</version>
+      <version>1.25.0</version>
     </dependency>
     <dependency>
       <groupId>com.github.luben</groupId>

From 2dc1bff262f34ff03faf0547641c44d68bd267c5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 05:44:09 +0900
Subject: [PATCH 034/570] MINOR: [Java] Bump dep.slf4j.version from 2.0.7 to
 2.0.9 in /java (#39178)

Bumps `dep.slf4j.version` from 2.0.7 to 2.0.9.
Updates `org.slf4j:jcl-over-slf4j` from 2.0.7 to 2.0.9

Updates `org.slf4j:slf4j-api` from 2.0.7 to 2.0.9

Updates `org.slf4j:jul-to-slf4j` from 2.0.7 to 2.0.9

Updates `org.slf4j:log4j-over-slf4j` from 2.0.7 to 2.0.9

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index cd26e79d47f3d..44bf8581856b8 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -31,7 +31,7 @@
     <target.gen.source.path>${project.build.directory}/generated-sources</target.gen.source.path>
     <dep.junit.platform.version>1.9.0</dep.junit.platform.version>
     <dep.junit.jupiter.version>5.10.1</dep.junit.jupiter.version>
-    <dep.slf4j.version>2.0.7</dep.slf4j.version>
+    <dep.slf4j.version>2.0.9</dep.slf4j.version>
     <dep.guava-bom.version>32.1.3-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.100.Final</dep.netty-bom.version>
     <dep.grpc-bom.version>1.59.0</dep.grpc-bom.version>

From 44adb7c49855cc9b3697c860d72b19f621178612 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 05:45:35 +0900
Subject: [PATCH 035/570] MINOR: [Java] Bump
 org.apache.maven.surefire:surefire-junit-platform from 3.0.0-M7 to 3.2.2 in
 /java (#39176)

Bumps org.apache.maven.surefire:surefire-junit-platform from 3.0.0-M7 to 3.2.2.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.surefire:surefire-junit-platform&package-manager=maven&previous-version=3.0.0-M7&new-version=3.2.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 44bf8581856b8..cb1aecb1d213e 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -431,7 +431,7 @@
             <dependency>
               <groupId>org.apache.maven.surefire</groupId>
               <artifactId>surefire-junit-platform</artifactId>
-              <version>3.0.0-M7</version>
+              <version>3.2.2</version>
             </dependency>
           </dependencies>
           <configuration>

From 087fc8f5d31b377916711e98024048b76eae06e8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 08:52:43 +0900
Subject: [PATCH 036/570] MINOR: [C#] Bump xunit from 2.6.2 to 2.6.3 in /csharp
 (#39174)

Bumps [xunit](https://github.com/xunit/xunit) from 2.6.2 to 2.6.3.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/xunit/xunit/commit/4c2c19380d12a1159e66ec543587b26574099beb"><code>4c2c193</code></a> v2.6.3</li>
<li><a href="https://github.com/xunit/xunit/commit/c4e29fdee2ba5c28f77d3a02f9efd56832427b0e"><code>c4e29fd</code></a> <a href="https://redirect.github.com/xunit/xunit/issues/2811">xunit/xunit#2811</a>: Add tests for SortedSet and ImmutableSortedSet overloads fo...</li>
<li><a href="https://github.com/xunit/xunit/commit/51851a64edb68040e8387f61c148b551a28b95e1"><code>51851a6</code></a> Unit tests to ensure equality assertions with sets + comparer func throw an e...</li>
<li><a href="https://github.com/xunit/xunit/commit/ca683935df915c827d31ab07a4a17685235d9689"><code>ca68393</code></a> Unit tests for <a href="https://redirect.github.com/xunit/xunit/issues/2828">#2828</a> (v2)</li>
<li><a href="https://github.com/xunit/xunit/commit/62505d1b387049f49395aef92d5e5f883821ec26"><code>62505d1</code></a> Unit tests for <a href="https://redirect.github.com/xunit/xunit/issues/2824">#2824</a> (v2)</li>
<li><a href="https://github.com/xunit/xunit/commit/b41e8ad2b9010e36edbc87bc8fbf8cc634c2d682"><code>b41e8ad</code></a> Obsolete the delegating sinks in favor of a single ExecutionSink</li>
<li><a href="https://github.com/xunit/xunit/commit/512d24f136b1960582c56b703e8dfd5863458ee8"><code>512d24f</code></a> Update README</li>
<li><a href="https://github.com/xunit/xunit/commit/5684d87f29028b1054c652e1e18c6dd3422705c3"><code>5684d87</code></a> Bump up to v2.6.3-pre</li>
<li>See full diff in <a href="https://github.com/xunit/xunit/compare/2.6.2...2.6.3">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit&package-manager=nuget&previous-version=2.6.2&new-version=2.6.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../Apache.Arrow.Compression.Tests.csproj                       | 2 +-
 .../Apache.Arrow.Flight.Sql.Tests.csproj                        | 2 +-
 .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj  | 2 +-
 csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index c6ab44bdb3827..94ef4b5f3c5f5 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -8,7 +8,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.2" />
+    <PackageReference Include="xunit" Version="2.6.3" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 59a9742f35d70..46d0a59b5d8e1 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -7,7 +7,7 @@
 
     <ItemGroup>
       <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-      <PackageReference Include="xunit" Version="2.6.2" />
+      <PackageReference Include="xunit" Version="2.6.3" />
       <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5" />
       <PackageReference Include="coverlet.collector" Version="6.0.0" />
     </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index a50ccae9d779a..99c772770d6c6 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -7,7 +7,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.2" />
+    <PackageReference Include="xunit" Version="2.6.3" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5" />
     <PackageReference Include="coverlet.collector" Version="6.0.0" />
   </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index 2f135cd3cb722..fde30a90e6479 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -15,7 +15,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.2" />
+    <PackageReference Include="xunit" Version="2.6.3" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>

From d0e74d7f738f9ca06b0e0f4799ac1a902c3ed230 Mon Sep 17 00:00:00 2001
From: James Duong <james.duong@improving.com>
Date: Tue, 12 Dec 2023 05:41:43 -0800
Subject: [PATCH 037/570] GH-39134: Create module info compiler plugin (#39135)

### Rationale for this change
Create and integrate a maven plugin for compiling module-info.java files without using maven-compiler-plugin
and release 9+. This is necessary for supporting Java 8 unsafe code, which has different than unsafe code
in JDK 9.

### What changes are included in this PR?
- Add the module-info-compiler-maven-plugin and utilize it in the codebase.
- Exclude module-info.java files from maven-compiler-plugin.

### Are these changes tested?
Yes, they generate valid module-info.class files in JARs and test JARs.

### Are there any user-facing changes?
No
* Closes: #39134

Authored-by: James Duong <james.duong@improving.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../module-info-compiler-maven-plugin/pom.xml | 130 ++++++++
 .../plugins/BaseModuleInfoCompilerPlugin.java |  92 ++++++
 .../plugins/ModuleInfoCompilerPlugin.java     |  58 ++++
 .../plugins/ModuleInfoTestCompilerPlugin.java |  53 ++++
 java/maven/pom.xml                            | 287 ++++++++++++++++++
 java/pom.xml                                  |  28 +-
 6 files changed, 647 insertions(+), 1 deletion(-)
 create mode 100644 java/maven/module-info-compiler-maven-plugin/pom.xml
 create mode 100644 java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java
 create mode 100644 java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java
 create mode 100644 java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java
 create mode 100644 java/maven/pom.xml

diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml
new file mode 100644
index 0000000000000..ce598fc7f0f87
--- /dev/null
+++ b/java/maven/module-info-compiler-maven-plugin/pom.xml
@@ -0,0 +1,130 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+  license agreements. See the NOTICE file distributed with this work for additional
+  information regarding copyright ownership. The ASF licenses this file to
+  You under the Apache License, Version 2.0 (the "License"); you may not use
+  this file except in compliance with the License. You may obtain a copy of
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+  by applicable law or agreed to in writing, software distributed under the
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+  OF ANY KIND, either express or implied. See the License for the specific
+  language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.arrow.maven.plugins</groupId>
+    <artifactId>arrow-maven-plugins</artifactId>
+    <version>15.0.0-SNAPSHOT</version>
+  </parent>
+  <artifactId>module-info-compiler-maven-plugin</artifactId>
+  <packaging>maven-plugin</packaging>
+
+  <name>Module Info Compiler Maven Plugin</name>
+
+  <url>https://arrow.apache.org</url>
+
+  <prerequisites>
+    <maven>${maven.version}</maven>
+  </prerequisites>
+
+  <properties>
+    <maven.version>3.3.9</maven.version>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.glavo</groupId>
+      <artifactId>module-info-compiler</artifactId>
+      <version>2.0</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.maven</groupId>
+      <artifactId>maven-plugin-api</artifactId>
+      <version>${maven.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.maven</groupId>
+      <artifactId>maven-core</artifactId>
+      <version>${maven.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.maven</groupId>
+      <artifactId>maven-artifact</artifactId>
+      <version>${maven.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.maven</groupId>
+      <artifactId>maven-model</artifactId>
+      <version>${maven.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.maven.plugin-tools</groupId>
+      <artifactId>maven-plugin-annotations</artifactId>
+      <version>3.6.0</version>
+      <scope>provided</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
+      <plugins>
+        <plugin>
+          <artifactId>maven-clean-plugin</artifactId>
+          <version>3.1.0</version>
+        </plugin>
+        <plugin>
+          <artifactId>maven-plugin-plugin</artifactId>
+          <version>3.6.0</version>
+        </plugin>
+        <plugin>
+          <artifactId>maven-jar-plugin</artifactId>
+          <version>3.0.2</version>
+        </plugin>
+        <plugin>
+          <artifactId>maven-install-plugin</artifactId>
+          <version>2.5.2</version>
+        </plugin>
+        <plugin>
+          <artifactId>maven-deploy-plugin</artifactId>
+          <version>2.8.2</version>
+        </plugin>
+        <plugin>
+          <artifactId>maven-invoker-plugin</artifactId>
+          <version>3.1.0</version>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-plugin-plugin</artifactId>
+        <version>3.6.0</version>
+        <configuration>
+          <skipErrorNoDescriptorsFound>true</skipErrorNoDescriptorsFound>
+        </configuration>
+        <executions>
+          <execution>
+            <id>mojo-descriptor</id>
+            <goals>
+              <goal>descriptor</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>help-goal</id>
+            <goals>
+              <goal>helpmojo</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+
+</project>
\ No newline at end of file
diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java
new file mode 100644
index 0000000000000..37cbf5d7e772b
--- /dev/null
+++ b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.maven.plugins;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.Optional;
+
+import org.apache.maven.plugin.AbstractMojo;
+import org.apache.maven.plugin.MojoExecutionException;
+import org.glavo.mic.ModuleInfoCompiler;
+
+/**
+ * Compiles the first module-info.java file in the project purely syntactically.
+ */
+public abstract class BaseModuleInfoCompilerPlugin extends AbstractMojo {
+  protected abstract List<String> getSourceRoots();
+
+  protected abstract boolean skip();
+
+  protected abstract String getOutputDirectory();
+
+  @Override
+  public void execute() throws MojoExecutionException {
+    if (skip()) {
+      getLog().info("Skipping module-info-compiler-maven-plugin");
+      return;
+    }
+
+    Optional<File> moduleInfoFile = findFirstModuleInfo(getSourceRoots());
+    if (moduleInfoFile.isPresent()) {
+      // The compiled module-info.class file goes into target/classes/module-info/main
+      Path outputDir = Paths.get(getOutputDirectory());
+
+      outputDir.toFile().mkdirs();
+      Path targetPath = outputDir.resolve("module-info.class");
+
+      // Invoke the compiler,
+      ModuleInfoCompiler compiler = new ModuleInfoCompiler();
+      try (Reader reader = new InputStreamReader(Files.newInputStream(moduleInfoFile.get().toPath()),
+          StandardCharsets.UTF_8);
+           OutputStream output = Files.newOutputStream(targetPath)) {
+        compiler.compile(reader, output);
+        getLog().info("Successfully wrote module-info.class file.");
+      } catch (IOException ex) {
+        throw new MojoExecutionException("Error compiling module-info.java", ex);
+      }
+    } else {
+      getLog().info("No module-info.java file found. module-info.class file was not generated.");
+    }
+  }
+
+  /**
+   * Finds the first module-info.java file in the set of source directories.
+   */
+  private Optional<File> findFirstModuleInfo(List<String> sourceDirectories) {
+    if (sourceDirectories == null) {
+      return Optional.empty();
+    }
+
+    return sourceDirectories.stream().map(Paths::get)
+        .map(sourcePath ->
+            sourcePath.toFile().listFiles(file ->
+                file.getName().equals("module-info.java")))
+        .filter(matchingFiles -> matchingFiles != null && matchingFiles.length != 0)
+        .map(matchingFiles -> matchingFiles[0])
+        .findAny();
+  }
+}
diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java
new file mode 100644
index 0000000000000..31df6372925cc
--- /dev/null
+++ b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.maven.plugins;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.maven.plugins.annotations.LifecyclePhase;
+import org.apache.maven.plugins.annotations.Mojo;
+import org.apache.maven.plugins.annotations.Parameter;
+import org.apache.maven.project.MavenProject;
+
+/**
+ * A maven plugin for compiler module-info files in main code with JDK8.
+ */
+@Mojo(name = "compile", defaultPhase = LifecyclePhase.COMPILE)
+public class ModuleInfoCompilerPlugin extends BaseModuleInfoCompilerPlugin {
+
+  @Parameter(defaultValue = "${project.compileSourceRoots}", property = "compileSourceRoots",
+      required = true)
+  private final List<String> compileSourceRoots = new ArrayList<>();
+
+  @Parameter(defaultValue = "false", property = "skip", required = false)
+  private boolean skip = false;
+
+  @Parameter(defaultValue = "${project}", readonly = true, required = true)
+  private MavenProject project;
+
+  @Override
+  protected List<String> getSourceRoots() {
+    return compileSourceRoots;
+  }
+
+  @Override
+  protected boolean skip() {
+    return skip;
+  }
+
+  @Override
+  protected String getOutputDirectory() {
+    return project.getBuild().getOutputDirectory();
+  }
+}
diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java
new file mode 100644
index 0000000000000..4705506ac5346
--- /dev/null
+++ b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.maven.plugins;
+
+import java.util.List;
+
+import org.apache.maven.plugins.annotations.LifecyclePhase;
+import org.apache.maven.plugins.annotations.Mojo;
+import org.apache.maven.plugins.annotations.Parameter;
+import org.apache.maven.project.MavenProject;
+
+/**
+ * A maven plugin for compiler module-info files in unit tests with JDK8.
+ */
+@Mojo(name = "testCompile", defaultPhase = LifecyclePhase.TEST_COMPILE)
+public class ModuleInfoTestCompilerPlugin extends BaseModuleInfoCompilerPlugin {
+
+  @Parameter(defaultValue = "false", property = "skip", required = false)
+  private boolean skip = false;
+
+  @Parameter(defaultValue = "${project}", readonly = true, required = true)
+  private MavenProject project;
+
+  @Override
+  protected List<String> getSourceRoots() {
+    return project.getTestCompileSourceRoots();
+  }
+
+  @Override
+  protected boolean skip() {
+    return skip;
+  }
+
+  @Override
+  protected String getOutputDirectory() {
+    return project.getBuild().getTestOutputDirectory();
+  }
+}
diff --git a/java/maven/pom.xml b/java/maven/pom.xml
new file mode 100644
index 0000000000000..86ac402732bc4
--- /dev/null
+++ b/java/maven/pom.xml
@@ -0,0 +1,287 @@
+<?xml version="1.0"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+  license agreements. See the NOTICE file distributed with this work for additional
+  information regarding copyright ownership. The ASF licenses this file to
+  You under the Apache License, Version 2.0 (the "License"); you may not use
+  this file except in compliance with the License. You may obtain a copy of
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+  by applicable law or agreed to in writing, software distributed under the
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+  OF ANY KIND, either express or implied. See the License for the specific
+  language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <!--
+    Note: Do not inherit from the Arrow parent POM as plugins can be referenced
+    during the parent POM, introducing circular dependencies.
+  -->
+  <groupId>org.apache.arrow.maven.plugins</groupId>
+  <artifactId>arrow-maven-plugins</artifactId>
+  <version>15.0.0-SNAPSHOT</version>
+  <name>Arrow Maven Plugins</name>
+  <packaging>pom</packaging>
+
+  <modules>
+    <module>module-info-compiler-maven-plugin</module>
+  </modules>
+
+  <properties>
+    <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
+  </properties>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>rat-checks</id>
+            <phase>validate</phase>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <excludeSubProjects>false</excludeSubProjects>
+          <excludes>
+            <exclude>**/dependency-reduced-pom.xml</exclude>
+            <exclude>**/*.log</exclude>
+            <exclude>**/*.css</exclude>
+            <exclude>**/*.js</exclude>
+            <exclude>**/*.md</exclude>
+            <exclude>**/*.eps</exclude>
+            <exclude>**/*.json</exclude>
+            <exclude>**/*.seq</exclude>
+            <exclude>**/*.parquet</exclude>
+            <exclude>**/*.sql</exclude>
+            <exclude>**/arrow-git.properties</exclude>
+            <exclude>**/*.csv</exclude>
+            <exclude>**/*.csvh</exclude>
+            <exclude>**/*.csvh-test</exclude>
+            <exclude>**/*.tsv</exclude>
+            <exclude>**/*.txt</exclude>
+            <exclude>**/*.ssv</exclude>
+            <exclude>**/arrow-*.conf</exclude>
+            <exclude>**/.buildpath</exclude>
+            <exclude>**/*.proto</exclude>
+            <exclude>**/*.fmpp</exclude>
+            <exclude>**/target/**</exclude>
+            <exclude>**/*.tdd</exclude>
+            <exclude>**/*.project</exclude>
+            <exclude>**/TAGS</exclude>
+            <exclude>**/*.checkstyle</exclude>
+            <exclude>**/.classpath</exclude>
+            <exclude>**/.factorypath</exclude>
+            <exclude>**/.settings/**</exclude>
+            <exclude>.*/**</exclude>
+            <exclude>**/*.patch</exclude>
+            <exclude>**/*.pb.cc</exclude>
+            <exclude>**/*.pb.h</exclude>
+            <exclude>**/*.linux</exclude>
+            <exclude>**/client/build/**</exclude>
+            <exclude>**/*.tbl</exclude>
+            <exclude>**/*.iml</exclude>
+            <exclude>**/flight.properties</exclude>
+            <exclude>**/*.idea/**</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <exclude>**/logging.properties</exclude>
+            <exclude>**/logback-test.xml</exclude>
+            <exclude>**/logback.out.xml</exclude>
+            <exclude>**/logback.xml</exclude>
+          </excludes>
+          <archive>
+            <index>true</index>
+            <manifest>
+              <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+              <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
+            </manifest>
+            <manifestEntries>
+              <Extension-Name>org.apache.arrow</Extension-Name>
+              <Built-By>${username}</Built-By>
+              <url>https://arrow.apache.org/</url>
+            </manifestEntries>
+          </archive>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+            <configuration>
+              <skipIfEmpty>true</skipIfEmpty>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-resources-plugin</artifactId>
+        <configuration>
+          <encoding>UTF-8</encoding>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <encoding>UTF-8</encoding>
+          <source>1.8</source>
+          <target>1.8</target>
+          <maxmem>2048m</maxmem>
+          <useIncrementalCompilation>false</useIncrementalCompilation>
+          <fork>true</fork>
+        </configuration>
+      </plugin>
+      <plugin>
+        <artifactId>maven-enforcer-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>validate_java_and_maven_version</id>
+            <phase>verify</phase>
+            <goals>
+              <goal>enforce</goal>
+            </goals>
+            <inherited>false</inherited>
+            <configuration>
+              <rules>
+                <requireMavenVersion>
+                  <version>[3.3.0,4)</version>
+                </requireMavenVersion>
+              </rules>
+            </configuration>
+          </execution>
+          <execution>
+            <id>avoid_bad_dependencies</id>
+            <phase>verify</phase>
+            <goals>
+              <goal>enforce</goal>
+            </goals>
+            <configuration>
+              <rules>
+                <bannedDependencies>
+                  <excludes>
+                    <exclude>commons-logging</exclude>
+                    <exclude>javax.servlet:servlet-api</exclude>
+                    <exclude>org.mortbay.jetty:servlet-api</exclude>
+                    <exclude>org.mortbay.jetty:servlet-api-2.5</exclude>
+                    <exclude>log4j:log4j</exclude>
+                  </excludes>
+                </bannedDependencies>
+              </rules>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>pl.project13.maven</groupId>
+        <artifactId>git-commit-id-plugin</artifactId>
+        <version>4.0.5</version>
+        <executions>
+          <execution>
+            <id>for-jars</id>
+            <inherited>true</inherited>
+            <goals>
+              <goal>revision</goal>
+            </goals>
+            <configuration>
+              <generateGitPropertiesFilename>target/classes/arrow-git.properties</generateGitPropertiesFilename>
+            </configuration>
+          </execution>
+          <execution>
+            <id>for-source-tarball</id>
+            <goals>
+              <goal>revision</goal>
+            </goals>
+            <inherited>false</inherited>
+            <configuration>
+              <generateGitPropertiesFilename>./arrow-git.properties</generateGitPropertiesFilename>
+            </configuration>
+          </execution>
+        </executions>
+
+        <configuration>
+          <dateFormat>dd.MM.yyyy '@' HH:mm:ss z</dateFormat>
+          <verbose>false</verbose>
+          <skipPoms>false</skipPoms>
+          <generateGitPropertiesFile>true</generateGitPropertiesFile>
+          <failOnNoGitDirectory>false</failOnNoGitDirectory>
+          <gitDescribe>
+            <skip>false</skip>
+            <always>false</always>
+            <abbrev>7</abbrev>
+            <dirty>-dirty</dirty>
+            <forceLongFormat>true</forceLongFormat>
+          </gitDescribe>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>3.1.0</version>
+        <dependencies>
+          <dependency>
+            <groupId>com.puppycrawl.tools</groupId>
+            <artifactId>checkstyle</artifactId>
+            <version>8.19</version>
+          </dependency>
+          <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>jcl-over-slf4j</artifactId>
+            <version>1.7.5</version>
+          </dependency>
+        </dependencies>
+        <executions>
+          <execution>
+            <id>validate</id>
+            <phase>validate</phase>
+            <goals>
+              <goal>check</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <configLocation>../dev/checkstyle/checkstyle.xml</configLocation>
+          <headerLocation>../dev/checkstyle/checkstyle.license</headerLocation>
+          <suppressionsLocation>../dev/checkstyle/suppressions.xml</suppressionsLocation>
+          <includeTestSourceDirectory>true</includeTestSourceDirectory>
+          <encoding>UTF-8</encoding>
+          <consoleOutput>true</consoleOutput>
+          <failsOnError>${checkstyle.failOnViolation}</failsOnError>
+          <failOnViolation>${checkstyle.failOnViolation}</failOnViolation>
+          <violationSeverity>warning</violationSeverity>
+          <format>xml</format>
+          <format>html</format>
+          <outputFile>${project.build.directory}/test/checkstyle-errors.xml</outputFile>
+          <linkXRef>false</linkXRef>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.cyclonedx</groupId>
+        <artifactId>cyclonedx-maven-plugin</artifactId>
+        <version>2.7.10</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>makeBom</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>
diff --git a/java/pom.xml b/java/pom.xml
index cb1aecb1d213e..86eb428ebd571 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -321,6 +321,7 @@
           </execution>
         </executions>
         <configuration>
+          <excludes>**/module-info.java</excludes>
           <configLocation>dev/checkstyle/checkstyle.xml</configLocation>
           <headerLocation>dev/checkstyle/checkstyle.license</headerLocation>
           <suppressionsLocation>dev/checkstyle/suppressions.xml</suppressionsLocation>
@@ -371,6 +372,24 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.arrow.maven.plugins</groupId>
+        <artifactId>module-info-compiler-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>default-compile</id>
+            <goals>
+              <goal>compile</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>default-testCompile</id>
+            <goals>
+              <goal>testCompile</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
 
     <pluginManagement>
@@ -400,6 +419,8 @@
           <artifactId>maven-compiler-plugin</artifactId>
           <version>${maven-compiler-plugin.version}</version>
           <configuration>
+            <excludes>**/module-info.java</excludes>
+            <testExcludes>**/module-info.java</testExcludes>
             <useModulePath>false</useModulePath>
             <annotationProcessorPaths>
               <path>
@@ -546,6 +567,11 @@
             </lifecycleMappingMetadata>
           </configuration>
         </plugin>
+        <plugin>
+          <groupId>org.apache.arrow.maven.plugins</groupId>
+          <artifactId>module-info-compiler-maven-plugin</artifactId>
+          <version>${project.version}</version>
+        </plugin>
       </plugins>
     </pluginManagement>
   </build>
@@ -735,6 +761,7 @@
   </reporting>
 
   <modules>
+    <module>maven</module>
     <module>bom</module>
     <module>format</module>
     <module>memory</module>
@@ -1236,7 +1263,6 @@
         </plugins>
       </build>
     </profile>
-
   </profiles>
 
 </project>

From 4aa9f604dfdab4c4b524a5b18c7976adb10c9b41 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Tue, 12 Dec 2023 17:07:56 -0400
Subject: [PATCH 038/570] GH-39185: [C++] Remove compiler warnings with
 `-Wconversion -Wno-sign-conversion` in public headers (#39186)

### Rationale for this change

The R package has a warning from CRAN to fix a failure to compile with `-Wconversion -Wno-sign-conversion -Werror`. Some of these errors we control and can patch easily; however, the ones in the Arrow C++ portion are more difficult to work around (hence the separate PR). See #39138 for all reported errors (including those in just the R package).

### What changes are included in this PR?

The requisite `static_cast<>()`s were added to silence the warnings.

### Are these changes tested?

By existing tests. We may add a future R nightly job that runs with these warning flags.

### Are there any user-facing changes?

No
* Closes: #39185

Authored-by: Dewey Dunnington <dewey@fishandwhistle.net>
Signed-off-by: Dewey Dunnington <dewey@fishandwhistle.net>
---
 cpp/src/arrow/util/bit_util.h        | 9 ++++++---
 cpp/src/arrow/util/bitmap_generate.h | 7 ++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h
index 04ab07af1d779..1d3a1dc2459f9 100644
--- a/cpp/src/arrow/util/bit_util.h
+++ b/cpp/src/arrow/util/bit_util.h
@@ -335,7 +335,9 @@ void ClearBitmap(uint8_t* data, int64_t offset, int64_t length);
 /// ref: https://stackoverflow.com/a/59523400
 template <typename Word>
 constexpr Word PrecedingWordBitmask(unsigned int const i) {
-  return (static_cast<Word>(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 - 1))) - 1;
+  return static_cast<Word>(static_cast<Word>(i < sizeof(Word) * 8)
+                           << (i & (sizeof(Word) * 8 - 1))) -
+         1;
 }
 static_assert(PrecedingWordBitmask<uint8_t>(0) == 0x00, "");
 static_assert(PrecedingWordBitmask<uint8_t>(4) == 0x0f, "");
@@ -357,8 +359,9 @@ constexpr Word SpliceWord(int n, Word low, Word high) {
 template <int batch_size>
 void PackBits(const uint32_t* values, uint8_t* out) {
   for (int i = 0; i < batch_size / 8; ++i) {
-    *out++ = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 |
-              values[4] << 4 | values[5] << 5 | values[6] << 6 | values[7] << 7);
+    *out++ = static_cast<uint8_t>(values[0] | values[1] << 1 | values[2] << 2 |
+                                  values[3] << 3 | values[4] << 4 | values[5] << 5 |
+                                  values[6] << 6 | values[7] << 7);
     values += 8;
   }
 }
diff --git a/cpp/src/arrow/util/bitmap_generate.h b/cpp/src/arrow/util/bitmap_generate.h
index 5efc5d5a1d501..52a1e228e01f1 100644
--- a/cpp/src/arrow/util/bitmap_generate.h
+++ b/cpp/src/arrow/util/bitmap_generate.h
@@ -90,9 +90,10 @@ void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t length,
     for (int i = 0; i < 8; ++i) {
       out_results[i] = g();
     }
-    *cur++ = (out_results[0] | out_results[1] << 1 | out_results[2] << 2 |
-              out_results[3] << 3 | out_results[4] << 4 | out_results[5] << 5 |
-              out_results[6] << 6 | out_results[7] << 7);
+    *cur++ = static_cast<uint8_t>(out_results[0] | out_results[1] << 1 |
+                                  out_results[2] << 2 | out_results[3] << 3 |
+                                  out_results[4] << 4 | out_results[5] << 5 |
+                                  out_results[6] << 6 | out_results[7] << 7);
   }
 
   int64_t remaining_bits = remaining % 8;

From d2209582a0ef81c93342183cab3c12d69e79c5be Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Wed, 13 Dec 2023 10:15:22 -0500
Subject: [PATCH 039/570] MINOR: [JS] Fix typo in unmemoize comment (#39084)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Authored-by: Dominik Moritz <domoritz@gmail.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 js/src/vector.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/js/src/vector.ts b/js/src/vector.ts
index 8c9a3da66c92c..7e1caa343562c 100644
--- a/js/src/vector.ts
+++ b/js/src/vector.ts
@@ -324,7 +324,7 @@ export class Vector<T extends DataType = any> {
      * Returns a vector without memoization of the {@link get} method. If this
      * vector is not memoized, this method returns this vector.
      *
-     * @returns A a vector without memoization.
+     * @returns A new vector without memoization.
      */
     public unmemoize(): Vector<T> {
         if (DataType.isDictionary(this.type) && this.isMemoized) {

From b5a46572cb6446d1c08ca1e111733b4861e8ddca Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Thu, 14 Dec 2023 01:28:02 +0800
Subject: [PATCH 040/570] GH-39208: [C++][Parquet] Remove deprecated
 AppendRowGroup(int64_t num_rows) (#39209)

### Rationale for this change

Described in issue

### What changes are included in this PR?

Remove the function below:

```
    /// \note Deprecated since 1.3.0
    RowGroupWriter* AppendRowGroup(int64_t num_rows);
```

### Are these changes tested?

no

### Are there any user-facing changes?

no

* Closes: #39208

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/file_writer.cc |  4 ----
 cpp/src/parquet/file_writer.h  | 12 ------------
 2 files changed, 16 deletions(-)

diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc
index 5502e1f94a9d0..6f5610b934d81 100644
--- a/cpp/src/parquet/file_writer.cc
+++ b/cpp/src/parquet/file_writer.cc
@@ -642,10 +642,6 @@ RowGroupWriter* ParquetFileWriter::AppendBufferedRowGroup() {
   return contents_->AppendBufferedRowGroup();
 }
 
-RowGroupWriter* ParquetFileWriter::AppendRowGroup(int64_t num_rows) {
-  return AppendRowGroup();
-}
-
 void ParquetFileWriter::AddKeyValueMetadata(
     const std::shared_ptr<const KeyValueMetadata>& key_value_metadata) {
   if (contents_) {
diff --git a/cpp/src/parquet/file_writer.h b/cpp/src/parquet/file_writer.h
index 3bda1e535cfa6..31706af86dbde 100644
--- a/cpp/src/parquet/file_writer.h
+++ b/cpp/src/parquet/file_writer.h
@@ -147,9 +147,6 @@ class PARQUET_EXPORT ParquetFileWriter {
     // Perform any cleanup associated with the file contents
     virtual void Close() = 0;
 
-    /// \note Deprecated since 1.3.0
-    RowGroupWriter* AppendRowGroup(int64_t num_rows);
-
     virtual RowGroupWriter* AppendRowGroup() = 0;
     virtual RowGroupWriter* AppendBufferedRowGroup() = 0;
 
@@ -190,15 +187,6 @@ class PARQUET_EXPORT ParquetFileWriter {
   void Open(std::unique_ptr<Contents> contents);
   void Close();
 
-  // Construct a RowGroupWriter for the indicated number of rows.
-  //
-  // Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid
-  // until the next call to AppendRowGroup or AppendBufferedRowGroup or Close.
-  // @param num_rows The number of rows that are stored in the new RowGroup
-  //
-  // \deprecated Since 1.3.0
-  RowGroupWriter* AppendRowGroup(int64_t num_rows);
-
   /// Construct a RowGroupWriter with an arbitrary number of rows.
   ///
   /// Ownership is solely within the ParquetFileWriter. The RowGroupWriter is only valid

From 4142607f61a2e52fddaaee6e82a9e1be1d462cd9 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Thu, 14 Dec 2023 01:28:21 +0800
Subject: [PATCH 041/570] GH-39210: [C++][Parquet] Avoid WriteRecordBatch from
 produce zero-sized RowGroup (#39211)

### Rationale for this change

`WriteRecordBatch` might produce zero-sized row-group, which is mentioned in https://github.com/apache/arrow/issues/39210 . This patch avoid WriteRecordBatch from produce zero-sized RowGroup.

### What changes are included in this PR?

adding a check for zero-sized row-group

### Are these changes tested?

Yes

### Are there any user-facing changes?

no

* Closes: #39210

Lead-authored-by: mwish <maplewish117@gmail.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../parquet/arrow/arrow_reader_writer_test.cc | 41 +++++++++++++++++++
 cpp/src/parquet/arrow/writer.cc               |  6 ++-
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index a2f3498190f93..dd0b19c2ce048 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -5224,6 +5224,47 @@ TEST(TestArrowReadWrite, WriteAndReadRecordBatch) {
   EXPECT_TRUE(record_batch->Equals(*read_record_batch));
 }
 
+TEST(TestArrowReadWrite, WriteRecordBatchNotProduceEmptyRowGroup) {
+  // GH-39211: WriteRecordBatch should prevent from writing a empty row group
+  // in the end of the file.
+  auto pool = ::arrow::default_memory_pool();
+  auto sink = CreateOutputStream();
+  // Limit the max number of rows in a row group to 2
+  auto writer_properties = WriterProperties::Builder().max_row_group_length(2)->build();
+  auto arrow_writer_properties = default_arrow_writer_properties();
+
+  // Prepare schema
+  auto schema = ::arrow::schema({::arrow::field("a", ::arrow::int64())});
+  std::shared_ptr<SchemaDescriptor> parquet_schema;
+  ASSERT_OK_NO_THROW(ToParquetSchema(schema.get(), *writer_properties,
+                                     *arrow_writer_properties, &parquet_schema));
+  auto schema_node = std::static_pointer_cast<GroupNode>(parquet_schema->schema_root());
+
+  auto gen = ::arrow::random::RandomArrayGenerator(/*seed=*/42);
+
+  // Create writer to write data via RecordBatch.
+  auto writer = ParquetFileWriter::Open(sink, schema_node, writer_properties);
+  std::unique_ptr<FileWriter> arrow_writer;
+  ASSERT_OK(FileWriter::Make(pool, std::move(writer), schema, arrow_writer_properties,
+                             &arrow_writer));
+  // NewBufferedRowGroup() is not called explicitly and it will be called
+  // inside WriteRecordBatch().
+  // Write 20 rows for two times
+  for (int i = 0; i < 2; ++i) {
+    auto record_batch =
+        gen.BatchOf({::arrow::field("a", ::arrow::int64())}, /*length=*/20);
+    ASSERT_OK_NO_THROW(arrow_writer->WriteRecordBatch(*record_batch));
+  }
+  ASSERT_OK_NO_THROW(arrow_writer->Close());
+  ASSERT_OK_AND_ASSIGN(auto buffer, sink->Finish());
+
+  auto file_metadata = arrow_writer->metadata();
+  EXPECT_EQ(20, file_metadata->num_row_groups());
+  for (int i = 0; i < 20; ++i) {
+    EXPECT_EQ(2, file_metadata->RowGroup(i)->num_rows());
+  }
+}
+
 TEST(TestArrowReadWrite, MultithreadedWrite) {
   const int num_columns = 20;
   const int num_rows = 1000;
diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
index 07c627d5eda67..5238986c428d3 100644
--- a/cpp/src/parquet/arrow/writer.cc
+++ b/cpp/src/parquet/arrow/writer.cc
@@ -419,6 +419,7 @@ class FileWriterImpl : public FileWriter {
     // Max number of rows allowed in a row group.
     const int64_t max_row_group_length = this->properties().max_row_group_length();
 
+    // Initialize a new buffered row group writer if necessary.
     if (row_group_writer_ == nullptr || !row_group_writer_->buffered() ||
         row_group_writer_->num_rows() >= max_row_group_length) {
       RETURN_NOT_OK(NewBufferedRowGroup());
@@ -461,8 +462,9 @@ class FileWriterImpl : public FileWriter {
       RETURN_NOT_OK(WriteBatch(offset, batch_size));
       offset += batch_size;
 
-      // Flush current row group if it is full.
-      if (row_group_writer_->num_rows() >= max_row_group_length) {
+      // Flush current row group writer and create a new writer if it is full.
+      if (row_group_writer_->num_rows() >= max_row_group_length &&
+          offset < batch.num_rows()) {
         RETURN_NOT_OK(NewBufferedRowGroup());
       }
     }

From dbed728f840bdb84880708dda865ba4c985e95f9 Mon Sep 17 00:00:00 2001
From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com>
Date: Wed, 13 Dec 2023 14:31:51 -0500
Subject: [PATCH 042/570] GH-39189: [Java] Bump com.h2database:h2 from 1.4.196
 to 2.2.224 in /java (#39188)

### Rationale for this change

Dependabot flagged this upgrade, but it requires test code changes. H2 is an in-memory database used for JDBC testing and 2.0 had several backwards-breaking changes: https://h2database.com/html/migration-to-v2.html

### What changes are included in this PR?

* h2database upgraded from 1.4.196 -> 2.2.224
* H2 changed VARCHAR description from `VARCHAR` to `CHARACTER VARYING`
* To query all tables/columns in H2, use `null` values for catalog and schema parameters instead of `%`
* H2 now returns Binary and Blob data as a byte array instead of hex values
* H2 added the type `VARBINARY`. `Binary` must now be a fixed length and is padded with zeroes.
* H2 `CHAR` is fixed length and pads with whitespace now
* H2 enforces all `ARRAY`s must be typed
* H2 changed the literal syntax for arrays to be `ARRAY[val1, val2, ...]` from `(val1, val2, ...)`
* H2 handles unicode chars natively now
* H2 connections' `createArrayOf` API handles null values differently now

### Are these changes tested?

Unit tests.

### Are there any user-facing changes?

No, only tests updated.
* Closes: #39189

Authored-by: Dane Pitkin <dane@voltrondata.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/adapter/jdbc/pom.xml                     |  2 +-
 .../jdbc/JdbcToArrowCommentMetadataTest.java  |  6 ++---
 .../adapter/jdbc/JdbcToArrowTestHelper.java   | 12 +---------
 .../org/apache/arrow/adapter/jdbc/Table.java  | 23 ++----------------
 .../adapter/jdbc/h2/JdbcToArrowArrayTest.java | 21 ++++++++++------
 .../resources/h2/test1_all_datatypes_h2.yml   | 24 +++++++++----------
 .../h2/test1_all_datatypes_null_h2.yml        |  4 ++--
 ...t1_all_datatypes_selected_null_rows_h2.yml |  8 +++----
 .../src/test/resources/h2/test1_binary_h2.yml |  2 +-
 .../src/test/resources/h2/test1_char_h2.yml   |  2 +-
 .../test/resources/h2/test1_charset_ch_h2.yml |  2 +-
 .../test/resources/h2/test1_charset_h2.yml    |  2 +-
 .../test/resources/h2/test1_charset_jp_h2.yml |  2 +-
 .../test/resources/h2/test1_charset_kr_h2.yml |  2 +-
 .../src/test/resources/h2/test1_list_h2.yml   | 22 ++++++++---------
 .../h2/test1_selected_datatypes_null_h2.yml   |  4 ++--
 java/performance/pom.xml                      |  2 +-
 17 files changed, 59 insertions(+), 81 deletions(-)

diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml
index 2490f708e6f24..f95956d1f61d5 100644
--- a/java/adapter/jdbc/pom.xml
+++ b/java/adapter/jdbc/pom.xml
@@ -51,7 +51,7 @@
         <dependency>
             <groupId>com.h2database</groupId>
             <artifactId>h2</artifactId>
-            <version>1.4.196</version>
+            <version>2.2.224</version>
             <scope>test</scope>
         </dependency>
 
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowCommentMetadataTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowCommentMetadataTest.java
index dc52210d6c7ab..07cab0d829fed 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowCommentMetadataTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowCommentMetadataTest.java
@@ -127,7 +127,7 @@ public void schemaCommentWithDatabaseMetadata() throws Exception {
                 "SQL_SCHEMA_NAME", "PUBLIC",
                 "SQL_TABLE_NAME", "TABLE1",
                 "SQL_COLUMN_NAME", "NAME",
-                "SQL_TYPE", "VARCHAR",
+                "SQL_TYPE", "CHARACTER VARYING",
                 "comment", "Name of record")),
         field("COLUMN1", true, Types.MinorType.BIT.getType(),
             metadata(
@@ -205,7 +205,7 @@ private String getTableComment(DatabaseMetaData metaData, String tableName) thro
     }
     String comment = null;
     int rowCount = 0;
-    try (ResultSet tableMetadata = metaData.getTables("%", "%", tableName, null)) {
+    try (ResultSet tableMetadata = metaData.getTables(null, null, tableName, null)) {
       if (tableMetadata.next()) {
         comment = tableMetadata.getString("REMARKS");
         rowCount++;
@@ -221,7 +221,7 @@ private String getTableComment(DatabaseMetaData metaData, String tableName) thro
   }
 
   private String getColumnComment(DatabaseMetaData metaData, String tableName, String columnName) throws SQLException {
-    try (ResultSet tableMetadata = metaData.getColumns("%", "%", tableName, columnName)) {
+    try (ResultSet tableMetadata = metaData.getColumns(null, null, tableName, columnName)) {
       if (tableMetadata.next()) {
         return tableMetadata.getString("REMARKS");
       }
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java
index d5f896ba7df56..91f2f465dd989 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java
@@ -334,16 +334,6 @@ public static void assertFieldMetadataMatchesResultSetMetadata(ResultSetMetaData
     }
   }
 
-  public static byte[] hexStringToByteArray(String s) {
-    int len = s.length();
-    byte[] data = new byte[len / 2];
-    for (int i = 0; i < len; i += 2) {
-      data[i / 2] = (byte) ((Character.digit(s.charAt(i), 16) << 4) +
-              Character.digit(s.charAt(i + 1), 16));
-    }
-    return data;
-  }
-
   public static Integer[] getIntValues(String[] values, String dataType) {
     String[] dataArr = getValues(values, dataType);
     Integer[] valueArr = new Integer[dataArr.length];
@@ -429,7 +419,7 @@ public static byte[][] getBinaryValues(String[] values, String dataType) {
     byte[][] valueArr = new byte[dataArr.length][];
     int i = 0;
     for (String data : dataArr) {
-      valueArr[i++] = "null".equals(data.trim()) ? null : hexStringToByteArray(data.trim());
+      valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes();
     }
     return valueArr;
   }
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java
index 87d5765b5d4b4..50c4fe6db2a14 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java
@@ -129,7 +129,7 @@ public Float[] getFloatValues() {
   }
 
   public byte[][] getBinaryValues() {
-    return getHexToByteArray(values);
+    return getByteArray(values);
   }
 
   public byte[][] getVarCharValues() {
@@ -137,7 +137,7 @@ public byte[][] getVarCharValues() {
   }
 
   public byte[][] getBlobValues() {
-    return getBinaryValues();
+    return getByteArray(values);
   }
 
   public byte[][] getClobValues() {
@@ -221,23 +221,4 @@ static byte[][] getByteArray(String[] data) {
     }
     return byteArr;
   }
-
-  static byte[][] getHexToByteArray(String[] data) {
-    byte[][] byteArr = new byte[data.length][];
-
-    for (int i = 0; i < data.length; i++) {
-      byteArr[i] = hexStringToByteArray(data[i]);
-    }
-    return byteArr;
-  }
-
-  static byte[] hexStringToByteArray(String s) {
-    int len = s.length();
-    byte[] data = new byte[len / 2];
-    for (int i = 0; i < len; i += 2) {
-      data[i / 2] = (byte) ((Character.digit(s.charAt(i), 16) << 4) +
-              Character.digit(s.charAt(i + 1), 16));
-    }
-    return data;
-  }
 }
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java
index b7dc1ee58a5ba..377e332b43a13 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java
@@ -53,7 +53,8 @@ public class JdbcToArrowArrayTest {
   private Connection conn = null;
 
   private static final String CREATE_STATEMENT =
-      "CREATE TABLE array_table (id INTEGER, int_array ARRAY, float_array ARRAY, string_array ARRAY);";
+      "CREATE TABLE array_table (id INTEGER, int_array INTEGER ARRAY, float_array REAL ARRAY, " +
+          "string_array VARCHAR ARRAY);";
   private static final String INSERT_STATEMENT =
       "INSERT INTO array_table (id, int_array, float_array, string_array) VALUES (?, ?, ?, ?);";
   private static final String QUERY = "SELECT int_array, float_array, string_array FROM array_table ORDER BY id;";
@@ -354,9 +355,9 @@ private void insertRows(
         Float[] floatArray = floatArrays[i];
         String[] strArray = strArrays[i];
 
-        Array intArray = conn.createArrayOf("INT", integerArray);
-        Array realArray = conn.createArrayOf("REAL", floatArray);
-        Array varcharArray = conn.createArrayOf("VARCHAR", strArray);
+        Array intArray = integerArray != null ? conn.createArrayOf("INT", integerArray) : null;
+        Array realArray = floatArray != null ? conn.createArrayOf("REAL", floatArray) : null;
+        Array varcharArray = strArray != null ? conn.createArrayOf("VARCHAR", strArray) : null;
 
         // Insert Arrays of 4 Values in Each Row
         stmt.setInt(1, i);
@@ -366,9 +367,15 @@ private void insertRows(
 
         stmt.executeUpdate();
 
-        intArray.free();
-        realArray.free();
-        varcharArray.free();
+        if (intArray != null) {
+          intArray.free();
+        }
+        if (realArray != null) {
+          realArray.free();
+        }
+        if (varcharArray != null) {
+          varcharArray.free();
+        }
       }
     }
   }
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml
index ff76acf8d7cfb..c4f0017095df0 100644
--- a/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml
@@ -13,59 +13,59 @@ name: 'test1_all_datatypes_h2'
 
 create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT,
     decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP,
-    binary_field12 BINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(16), bit_field17 BIT,
-    null_field18 NULL, list_field19 ARRAY, map_field20 VARCHAR(256));'
+    binary_field12 VARBINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(14), bit_field17 BIT,
+    null_field18 NULL, list_field19 INT ARRAY, map_field20 VARCHAR(256));'
 
 data:
   - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
   PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
-  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, (1, 2, 3), ''{"a":"b","key":"12345"}'');'
+  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[1, 2, 3], ''{"a":"b","key":"12345"}'');'
 
   - 'INSERT INTO table1 VALUES (102, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
   PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
-  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, (1, 2),''{"c":"d"}'');'
+  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[1, 2],''{"c":"d"}'');'
 
   - 'INSERT INTO table1 VALUES (103, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
   PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
-  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, (1),''{"e":"f"}'');'
+  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[1],''{"e":"f"}'');'
 
   - 'INSERT INTO table1 VALUES (104, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
   PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
-  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, (2, 3, 4),''{"g":"h"}'');'
+  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[2, 3, 4],''{"g":"h"}'');'
 
   - 'INSERT INTO table1 VALUES (null, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
   PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
-  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, (2, 3),''{"i":"j"}'');'
+  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[2, 3],''{"i":"j"}'');'
 
   - 'INSERT INTO table1 VALUES (null, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
   PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
-  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, (2),''{"k":"l"}'');'
+  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[2],''{"k":"l"}'');'
 
   - 'INSERT INTO table1 VALUES (107, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
   PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
-  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, (3, 4, 5),''{"m":"n"}'');'
+  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[3, 4, 5],''{"m":"n"}'');'
 
   - 'INSERT INTO table1 VALUES (108, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
   PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
-  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, (3, 4),''{"o":"p"}'');'
+  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[3, 4],''{"o":"p"}'');'
 
   - 'INSERT INTO table1 VALUES (109, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
   PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
-  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, (3),''{"q":"r"}'');'
+  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[3],''{"q":"r"}'');'
 
   - 'INSERT INTO table1 VALUES (110, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''),
   PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
-  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, (),''{"s":"t"}'');'
+  ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null, ARRAY[],''{"s":"t"}'');'
  
 query: 'select int_field1, bool_field2, tinyint_field3, smallint_field4, bigint_field5, decimal_field6, double_field7, real_field8,
         time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17, null_field18, list_field19, map_field20 from table1'
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml
index e1b1a1adcbb70..9be76229dab82 100644
--- a/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml
@@ -38,8 +38,8 @@ rowCount: '5'
 
 create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT,
     decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP,
-    binary_field12 BINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(16), bit_field17 BIT,
-    list_field19 ARRAY,map_field20 VARCHAR(256));'
+    binary_field12 VARBINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(14), bit_field17 BIT,
+    list_field19 INT ARRAY, map_field20 VARCHAR(256));'
 
 data:
   - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);'
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml
index 0521ce2f9c30d..fda31da150775 100644
--- a/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml
@@ -36,8 +36,8 @@ vectors:
 
 create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT,
     decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP,
-    binary_field12 BINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(16), bit_field17 BIT,
-    list_field19 ARRAY, map_field20 VARCHAR(256));'
+    binary_field12 VARBINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(14), bit_field17 BIT,
+    list_field19 INT ARRAY, map_field20 VARCHAR(256));'
 
 data:
   - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);'
@@ -46,7 +46,7 @@ data:
   PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'',
-  1, (1, 2, 3),''{"a":"b"}'');'
+  1, ARRAY[1, 2, 3],''{"a":"b"}'');'
 
   - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);'
 
@@ -54,7 +54,7 @@ data:
   PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''),
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'',
   ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'',
-  1, (1, 2, 3),''{"c":"d"}'');'
+  1, ARRAY[1, 2, 3],''{"c":"d"}'');'
 
   - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);'
 
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml
index ed94a7a189135..3d7b1ec658ef7 100644
--- a/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml
@@ -15,7 +15,7 @@ type: 'binary'
 
 vector: 'BINARY_FIELD12'
 
-create: 'CREATE TABLE table1 (binary_field12 BINARY(100));'
+create: 'CREATE TABLE table1 (binary_field12 VARBINARY(100));'
 
 data:
   - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');'
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml
index 018fe46c3ed53..588df7bff4df6 100644
--- a/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml
@@ -15,7 +15,7 @@ type: 'char'
 
 vector: 'CHAR_FIELD16'
 
-create: 'CREATE TABLE table1 (char_field16 CHAR(16));'
+create: 'CREATE TABLE table1 (char_field16 CHAR(14));'
 
 data:
   - 'INSERT INTO table1 VALUES (''some char text'');'
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml
index 1a82fa60a0b97..2e60a4af5a970 100644
--- a/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml
@@ -22,7 +22,7 @@ rowCount: '5'
 
 charSet: 'GBK'
 
-create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(128));'
+create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(13));'
 
 data:
   - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');'
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml
index 42f088e18d931..383681e5b3b41 100644
--- a/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml
@@ -20,7 +20,7 @@ vectors:
 
 rowCount: '10'
 
-create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(128));'
+create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(33));'
 
 data:
   - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');'
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml
index 2eae4019277ea..9b3cf9a18fe01 100644
--- a/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml
@@ -22,7 +22,7 @@ rowCount: '5'
 
 charSet: 'SJIS'
 
-create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(128));'
+create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(23));'
 
 data:
   - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');'
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml
index c6b6ee0551a36..d6e051c094fbe 100644
--- a/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml
@@ -22,7 +22,7 @@ rowCount: '5'
 
 charSet: 'EUC-KR'
 
-create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(128));'
+create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(22));'
 
 data:
   - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');'
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_list_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_list_h2.yml
index 1314c49bf70fa..044c22182af58 100644
--- a/java/adapter/jdbc/src/test/resources/h2/test1_list_h2.yml
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_list_h2.yml
@@ -15,19 +15,19 @@ type: 'list'
 
 vector: 'LIST_FIELD19'
 
-create: 'CREATE TABLE table1 (list_field19 ARRAY);'
+create: 'CREATE TABLE table1 (list_field19 INT ARRAY);'
 
 data:
-  - 'INSERT INTO table1 VALUES ((1,2,3));'
-  - 'INSERT INTO table1 VALUES ((1,2,3));'
-  - 'INSERT INTO table1 VALUES ((1,2,3));'
-  - 'INSERT INTO table1 VALUES ((1,2,3));'
-  - 'INSERT INTO table1 VALUES ((1,2,3));'
-  - 'INSERT INTO table1 VALUES ((1,2,3));'
-  - 'INSERT INTO table1 VALUES ((1,2,3));'
-  - 'INSERT INTO table1 VALUES ((1,2,3));'
-  - 'INSERT INTO table1 VALUES ((1,2,3));'
-  - 'INSERT INTO table1 VALUES ((1,2,3));'
+  - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);'
+  - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);'
+  - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);'
+  - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);'
+  - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);'
+  - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);'
+  - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);'
+  - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);'
+  - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);'
+  - 'INSERT INTO table1 VALUES (ARRAY[1, 2, 3]);'
 
 query: 'select list_field19 from table1;'
 
diff --git a/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml b/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml
index e8d1d5de02c63..60a4462272c7f 100644
--- a/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml
+++ b/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml
@@ -34,8 +34,8 @@ rowCount: '5'
 
 create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT,
     decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP,
-    binary_field12 BINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(16), bit_field17 BIT,
-    list_field19 ARRAY, map_field20 VARCHAR(256));'
+    binary_field12 VARBINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(14), bit_field17 BIT,
+    list_field19 INT ARRAY, map_field20 VARCHAR(256));'
 
 data:
   - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);'
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index 269ac72d83326..a3e4da85b4321 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -64,7 +64,7 @@
         <dependency>
             <groupId>com.h2database</groupId>
             <artifactId>h2</artifactId>
-            <version>1.4.196</version>
+            <version>2.2.224</version>
             <scope>test</scope>
         </dependency>
         <dependency>

From 50cc141310f5ebb10d018f8e6416fa92ec28a91b Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Thu, 14 Dec 2023 17:35:31 +0900
Subject: [PATCH 043/570] MINOR: [C++] Use Cast() instead of CastTo() for
 Timestamp Scalar in test (#39226)

### Rationale for this change

Remove legacy code

This is a sub-PR of the PR mentioned below.

* #39060

### What changes are included in this PR?

* Replace the legacy scalar `CastTo` implementation for Timestamp Scalar in test. It was supposed to be resolved in the mentioned PR, but it was missed.

### Are these changes tested?

Yes. It is passed by existing test cases.

### Are there any user-facing changes?

No.

Authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/scalar_test.cc | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index 9d40e688f1dfb..ac740f92c8527 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -878,17 +878,17 @@ TEST(TestTimestampScalars, Cast) {
   EXPECT_EQ(convert(TimeUnit::MICRO, TimeUnit::MILLI, 4567), 4);
 
   ASSERT_OK_AND_ASSIGN(auto str,
-                       TimestampScalar(1024, timestamp(TimeUnit::MILLI)).CastTo(utf8()));
-  EXPECT_EQ(*str, StringScalar("1970-01-01 00:00:01.024"));
+                       Cast(TimestampScalar(1024, timestamp(TimeUnit::MILLI)), utf8()));
+  EXPECT_EQ(*str.scalar(), StringScalar("1970-01-01 00:00:01.024"));
   ASSERT_OK_AND_ASSIGN(auto i64,
-                       TimestampScalar(1024, timestamp(TimeUnit::MILLI)).CastTo(int64()));
-  EXPECT_EQ(*i64, Int64Scalar(1024));
+                       Cast(TimestampScalar(1024, timestamp(TimeUnit::MILLI)), int64()));
+  EXPECT_EQ(*i64.scalar(), Int64Scalar(1024));
 
   constexpr int64_t kMillisecondsInDay = 86400000;
-  ASSERT_OK_AND_ASSIGN(
-      auto d64, TimestampScalar(1024 * kMillisecondsInDay + 3, timestamp(TimeUnit::MILLI))
-                    .CastTo(date64()));
-  EXPECT_EQ(*d64, Date64Scalar(1024 * kMillisecondsInDay));
+  ASSERT_OK_AND_ASSIGN(auto d64, Cast(TimestampScalar(1024 * kMillisecondsInDay + 3,
+                                                      timestamp(TimeUnit::MILLI)),
+                                      date64()));
+  EXPECT_EQ(*d64.scalar(), Date64Scalar(1024 * kMillisecondsInDay));
 }
 
 TEST(TestDurationScalars, Basics) {

From 3236c129d1cbe3f73359278d1459a3f20e5c4df0 Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Thu, 14 Dec 2023 14:12:17 +0000
Subject: [PATCH 044/570] GH-36441: [Python] Make `CacheOptions` configurable
 from Python  (#36627)

### Rationale for this change
Resolves: https://github.com/apache/arrow/issues/36441

### What changes are included in this PR?
- Add python bindings for `CacheOptions` from the C++ side.
- Allow setting `cache_options` on `ParquetFragmentScanOptions` from the python side.
- Adjust some of the comments on `CacheOptions`

### Are these changes tested?
Yes. I added python side tests for these newly available configs similar to other configs. I have not added an integration test that ensures setting the configs on the python side leads to correctly using them on the C++ side.

### Are there any user-facing changes?
Yes. The are new configs available on the python side but the defaults are unchanged. I've added/updated docstrings where relevant.

* Closes: #36441

Lead-authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 cpp/src/arrow/io/caching.h           |  10 +-
 python/pyarrow/__init__.py           |   2 +-
 python/pyarrow/_dataset_parquet.pyx  |  21 ++++-
 python/pyarrow/_parquet.pxd          |   6 +-
 python/pyarrow/includes/libarrow.pxd |  16 ++++
 python/pyarrow/io.pxi                | 134 +++++++++++++++++++++++++++
 python/pyarrow/lib.pxd               |  12 +++
 python/pyarrow/tests/test_dataset.py |  28 ++++--
 python/pyarrow/tests/test_io.py      |  59 ++++++++++++
 9 files changed, 271 insertions(+), 17 deletions(-)

diff --git a/cpp/src/arrow/io/caching.h b/cpp/src/arrow/io/caching.h
index 9c1b8fe88b3bd..e2b911fafdbbc 100644
--- a/cpp/src/arrow/io/caching.h
+++ b/cpp/src/arrow/io/caching.h
@@ -42,6 +42,11 @@ struct ARROW_EXPORT CacheOptions {
   ///   size greater than this, they are not combined
   int64_t range_size_limit;
   /// \brief A lazy cache does not perform any I/O until requested.
+  ///   lazy = false: request all byte ranges when PreBuffer or WillNeed is called.
+  ///   lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader
+  ///   needs them.
+  ///   lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the
+  ///   range that is currently being read.
   bool lazy;
   /// \brief The maximum number of ranges to be prefetched. This is only used
   ///   for lazy cache to asynchronously read some ranges after reading the target range.
@@ -56,9 +61,10 @@ struct ARROW_EXPORT CacheOptions {
   /// \brief Construct CacheOptions from network storage metrics (e.g. S3).
   ///
   /// \param[in] time_to_first_byte_millis Seek-time or Time-To-First-Byte (TTFB) in
-  ///   milliseconds, also called call setup latency of a new S3 request.
+  ///   milliseconds, also called call setup latency of a new read request.
   ///   The value is a positive integer.
-  /// \param[in] transfer_bandwidth_mib_per_sec Data transfer Bandwidth (BW) in MiB/sec.
+  /// \param[in] transfer_bandwidth_mib_per_sec Data transfer Bandwidth (BW) in MiB/sec
+  ///   (per connection).
   ///   The value is a positive integer.
   /// \param[in] ideal_bandwidth_utilization_frac Transfer bandwidth utilization fraction
   ///   (per connection) to maximize the net data load.
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index cd66abcb44840..9da94885ec6b2 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -243,7 +243,7 @@ def print_entry(label, value):
 
 # I/O
 from pyarrow.lib import (NativeFile, PythonFile,
-                         BufferedInputStream, BufferedOutputStream,
+                         BufferedInputStream, BufferedOutputStream, CacheOptions,
                          CompressedInputStream, CompressedOutputStream,
                          TransformInputStream, transcoding_input_stream,
                          FixedSizeBufferWriter,
diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx
index d458ac4ee710d..61e051f56cfb0 100644
--- a/python/pyarrow/_dataset_parquet.pyx
+++ b/python/pyarrow/_dataset_parquet.pyx
@@ -42,6 +42,7 @@ from pyarrow._dataset cimport (
     FileWriteOptions,
     Fragment,
     FragmentScanOptions,
+    CacheOptions,
     Partitioning,
     PartitioningFactory,
     WrittenFile
@@ -693,6 +694,10 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
         parallel using a background I/O thread pool.
         Set to False if you want to prioritize minimal memory usage
         over maximum speed.
+    cache_options : pyarrow.CacheOptions, default None
+        Cache options used when pre_buffer is enabled. The default values should
+        be good for most use cases. You may want to adjust these for example if
+        you have exceptionally high latency to the file system. 
     thrift_string_size_limit : int, default None
         If not None, override the maximum total string size allocated
         when decoding Thrift structures. The default limit should be
@@ -714,6 +719,7 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
     def __init__(self, *, bint use_buffered_stream=False,
                  buffer_size=8192,
                  bint pre_buffer=True,
+                 cache_options=None,
                  thrift_string_size_limit=None,
                  thrift_container_size_limit=None,
                  decryption_config=None,
@@ -723,6 +729,8 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
         self.use_buffered_stream = use_buffered_stream
         self.buffer_size = buffer_size
         self.pre_buffer = pre_buffer
+        if cache_options is not None:
+            self.cache_options = cache_options
         if thrift_string_size_limit is not None:
             self.thrift_string_size_limit = thrift_string_size_limit
         if thrift_container_size_limit is not None:
@@ -770,6 +778,14 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
     def pre_buffer(self, bint pre_buffer):
         self.arrow_reader_properties().set_pre_buffer(pre_buffer)
 
+    @property
+    def cache_options(self):
+        return CacheOptions.wrap(self.arrow_reader_properties().cache_options())
+
+    @cache_options.setter
+    def cache_options(self, CacheOptions options):
+        self.arrow_reader_properties().set_cache_options(options.unwrap())
+
     @property
     def thrift_string_size_limit(self):
         return self.reader_properties().thrift_string_size_limit()
@@ -828,11 +844,11 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
         bool
         """
         attrs = (
-            self.use_buffered_stream, self.buffer_size, self.pre_buffer,
+            self.use_buffered_stream, self.buffer_size, self.pre_buffer, self.cache_options,
             self.thrift_string_size_limit, self.thrift_container_size_limit,
             self.page_checksum_verification)
         other_attrs = (
-            other.use_buffered_stream, other.buffer_size, other.pre_buffer,
+            other.use_buffered_stream, other.buffer_size, other.pre_buffer, other.cache_options,
             other.thrift_string_size_limit,
             other.thrift_container_size_limit, other.page_checksum_verification)
         return attrs == other_attrs
@@ -849,6 +865,7 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions):
             use_buffered_stream=self.use_buffered_stream,
             buffer_size=self.buffer_size,
             pre_buffer=self.pre_buffer,
+            cache_options=self.cache_options,
             thrift_string_size_limit=self.thrift_string_size_limit,
             thrift_container_size_limit=self.thrift_container_size_limit,
             page_checksum_verification=self.page_checksum_verification
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 59b50ceda8c40..7ce747e0aa46d 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -21,8 +21,8 @@
 from pyarrow.includes.common cimport *
 from pyarrow.includes.libarrow cimport (CChunkedArray, CScalar, CSchema, CStatus,
                                         CTable, CMemoryPool, CBuffer,
-                                        CKeyValueMetadata,
-                                        CRandomAccessFile, COutputStream,
+                                        CKeyValueMetadata, CRandomAccessFile,
+                                        COutputStream, CCacheOptions,
                                         TimeUnit, CRecordBatchReader)
 from pyarrow.lib cimport _Weakrefable
 
@@ -393,6 +393,8 @@ cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
         int64_t batch_size()
         void set_pre_buffer(c_bool pre_buffer)
         c_bool pre_buffer() const
+        void set_cache_options(CCacheOptions options)
+        CCacheOptions cache_options() const
         void set_coerce_int96_timestamp_unit(TimeUnit unit)
         TimeUnit coerce_int96_timestamp_unit() const
 
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index b0b89f8614f18..403846a38f3fd 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1347,6 +1347,22 @@ cdef extern from "arrow/io/api.h" namespace "arrow::io" nogil:
         CStatus Write(const uint8_t* data, int64_t nbytes)
         CStatus Flush()
 
+    cdef cppclass CCacheOptions "arrow::io::CacheOptions":
+        int64_t hole_size_limit
+        int64_t range_size_limit
+        c_bool lazy
+        int64_t prefetch_limit
+        c_bool Equals "operator==" (CCacheOptions other)
+
+        @staticmethod
+        CCacheOptions MakeFromNetworkMetrics(int64_t time_to_first_byte_millis,
+                                             int64_t transfer_bandwidth_mib_per_sec,
+                                             double ideal_bandwidth_utilization_frac,
+                                             int64_t max_ideal_request_size_mib)
+
+        @staticmethod
+        CCacheOptions LazyDefaults()
+
     cdef cppclass COutputStream" arrow::io::OutputStream"(FileInterface,
                                                           Writable):
         pass
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 6f3916640199a..1897e76efc2a0 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -2122,6 +2122,140 @@ cdef CCompressionType _ensure_compression(str name) except *:
         raise ValueError('Invalid value for compression: {!r}'.format(name))
 
 
+cdef class CacheOptions(_Weakrefable):
+    """
+    Cache options for a pre-buffered fragment scan.
+
+    Parameters
+    ----------
+    hole_size_limit : int, default 8KiB
+        The maximum distance in bytes between two consecutive ranges; beyond 
+        this value, ranges are not combined.
+    range_size_limit : int, default 32MiB
+        The maximum size in bytes of a combined range; if combining two 
+        consecutive ranges would produce a range of a size greater than this, 
+        they are not combined
+    lazy : bool, default True
+        lazy = false: request all byte ranges when PreBuffer or WillNeed is called.
+        lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader 
+        needs them. 
+        lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the 
+        range that is currently being read.
+    prefetch_limit : int, default 0
+        The maximum number of ranges to be prefetched. This is only used for 
+        lazy cache to asynchronously read some ranges after reading the target 
+        range.
+    """
+
+    def __init__(self, *, hole_size_limit=None, range_size_limit=None, lazy=None, prefetch_limit=None):
+        self.wrapped = CCacheOptions.LazyDefaults()
+        if hole_size_limit is not None:
+            self.hole_size_limit = hole_size_limit
+        if range_size_limit is not None:
+            self.range_size_limit = range_size_limit
+        if lazy is not None:
+            self.lazy = lazy
+        if prefetch_limit is not None:
+            self.prefetch_limit = prefetch_limit
+
+    cdef void init(self, CCacheOptions options):
+        self.wrapped = options
+
+    cdef inline CCacheOptions unwrap(self):
+        return self.wrapped
+
+    @staticmethod
+    cdef wrap(CCacheOptions options):
+        self = CacheOptions()
+        self.init(options)
+        return self
+
+    @property
+    def hole_size_limit(self):
+        return self.wrapped.hole_size_limit
+
+    @hole_size_limit.setter
+    def hole_size_limit(self, hole_size_limit):
+        self.wrapped.hole_size_limit = hole_size_limit
+
+    @property
+    def range_size_limit(self):
+        return self.wrapped.range_size_limit
+
+    @range_size_limit.setter
+    def range_size_limit(self, range_size_limit):
+        self.wrapped.range_size_limit = range_size_limit
+
+    @property
+    def lazy(self):
+        return self.wrapped.lazy
+
+    @lazy.setter
+    def lazy(self, lazy):
+        self.wrapped.lazy = lazy
+
+    @property
+    def prefetch_limit(self):
+        return self.wrapped.prefetch_limit
+
+    @prefetch_limit.setter
+    def prefetch_limit(self, prefetch_limit):
+        self.wrapped.prefetch_limit = prefetch_limit
+
+    def __eq__(self, CacheOptions other):
+        try:
+            return self.unwrap().Equals(other.unwrap())
+        except TypeError:
+            return False
+
+    @staticmethod
+    def from_network_metrics(time_to_first_byte_millis, transfer_bandwidth_mib_per_sec,
+                             ideal_bandwidth_utilization_frac=0.9, max_ideal_request_size_mib=64):
+        """
+        Create suiteable CacheOptions based on provided network metrics.
+
+        Typically this will be used with object storage solutions like Amazon S3, 
+        Google Cloud Storage and Azure Blob Storage.
+
+        Parameters
+        ----------
+        time_to_first_byte_millis : int
+            Seek-time or Time-To-First-Byte (TTFB) in milliseconds, also called call 
+            setup latency of a new read request. The value is a positive integer. 
+        transfer_bandwidth_mib_per_sec : int
+            Data transfer Bandwidth (BW) in MiB/sec (per connection). The value is a positive 
+            integer.
+        ideal_bandwidth_utilization_frac : int, default 0.9
+            Transfer bandwidth utilization fraction (per connection) to maximize the net 
+            data load. The value is a positive float less than 1.
+        max_ideal_request_size_mib : int, default 64
+            The maximum single data request size (in MiB) to maximize the net data load.
+
+        Returns
+        -------
+        CacheOptions
+        """
+        return CacheOptions.wrap(CCacheOptions.MakeFromNetworkMetrics(
+            time_to_first_byte_millis, transfer_bandwidth_mib_per_sec,
+            ideal_bandwidth_utilization_frac, max_ideal_request_size_mib))
+
+    @staticmethod
+    @binding(True)  # Required for Cython < 3
+    def _reconstruct(kwargs):
+        # __reduce__ doesn't allow passing named arguments directly to the
+        # reconstructor, hence this wrapper.
+        return CacheOptions(**kwargs)
+
+    def __reduce__(self):
+        kwargs = dict(
+            hole_size_limit=self.hole_size_limit,
+            range_size_limit=self.range_size_limit,
+            lazy=self.lazy,
+            prefetch_limit=self.prefetch_limit,
+        )
+        return CacheOptions._reconstruct, (kwargs,)
+
+
 cdef class Codec(_Weakrefable):
     """
     Compression codec.
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 1440ba0750094..58ec34addbc0a 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -561,6 +561,18 @@ cdef class RecordBatchReader(_Weakrefable):
         SharedPtrNoGIL[CRecordBatchReader] reader
 
 
+cdef class CacheOptions(_Weakrefable):
+    cdef:
+        CCacheOptions wrapped
+
+    cdef void init(self, CCacheOptions options)
+
+    cdef inline CCacheOptions unwrap(self)
+
+    @staticmethod
+    cdef wrap(const CCacheOptions options)
+
+
 cdef class Codec(_Weakrefable):
     cdef:
         shared_ptr[CCodec] wrapped
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index f3c25ee8c5c3b..a37eb1e426f7a 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -16,17 +16,16 @@
 # under the License.
 
 import contextlib
-import os
-import posixpath
 import datetime
+import os
 import pathlib
+import posixpath
 import sys
-import textwrap
 import tempfile
+import textwrap
 import threading
 import time
 from shutil import copytree
-
 from urllib.parse import quote
 
 import numpy as np
@@ -35,12 +34,12 @@
 import pyarrow as pa
 import pyarrow.compute as pc
 import pyarrow.csv
-import pyarrow.json
 import pyarrow.feather
 import pyarrow.fs as fs
-from pyarrow.tests.util import (change_cwd, _filesystem_uri,
-                                FSProtocolClass, ProxyHandler,
-                                _configure_s3_limited_user)
+import pyarrow.json
+from pyarrow.tests.util import (FSProtocolClass, ProxyHandler,
+                                _configure_s3_limited_user, _filesystem_uri,
+                                change_cwd)
 
 try:
     import pandas as pd
@@ -138,7 +137,8 @@ def mockfs():
 
 @pytest.fixture
 def open_logging_fs(monkeypatch):
-    from pyarrow.fs import PyFileSystem, LocalFileSystem
+    from pyarrow.fs import LocalFileSystem, PyFileSystem
+
     from .test_fs import ProxyHandler
 
     localfs = LocalFileSystem()
@@ -791,6 +791,9 @@ def test_parquet_scan_options():
         thrift_container_size_limit=987654,)
     opts6 = ds.ParquetFragmentScanOptions(
         page_checksum_verification=True)
+    cache_opts = pa.CacheOptions(
+        hole_size_limit=2**10, range_size_limit=8*2**10, lazy=True)
+    opts7 = ds.ParquetFragmentScanOptions(pre_buffer=True, cache_options=cache_opts)
 
     assert opts1.use_buffered_stream is False
     assert opts1.buffer_size == 2**13
@@ -816,12 +819,17 @@ def test_parquet_scan_options():
 
     assert opts6.page_checksum_verification is True
 
+    assert opts7.pre_buffer is True
+    assert opts7.cache_options == cache_opts
+    assert opts7.cache_options != opts1.cache_options
+
     assert opts1 == opts1
     assert opts1 != opts2
     assert opts2 != opts3
     assert opts3 != opts4
     assert opts5 != opts1
     assert opts6 != opts1
+    assert opts7 != opts1
 
 
 def test_file_format_pickling(pickle_module):
@@ -2711,7 +2719,7 @@ def test_open_dataset_from_uri_s3_fsspec(s3_example_simple):
     table, path, _, _, host, port, access_key, secret_key = s3_example_simple
     s3fs = pytest.importorskip("s3fs")
 
-    from pyarrow.fs import PyFileSystem, FSSpecHandler
+    from pyarrow.fs import FSSpecHandler, PyFileSystem
 
     fs = s3fs.S3FileSystem(
         key=access_key,
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 071962af290fc..5a495aa80abdf 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -664,6 +664,65 @@ def test_allocate_buffer_resizable():
     assert buf.size == 200
 
 
+def test_cache_options():
+    opts1 = pa.CacheOptions()
+    opts2 = pa.CacheOptions(hole_size_limit=1024)
+    opts3 = pa.CacheOptions(hole_size_limit=4096, range_size_limit=8192)
+    opts4 = pa.CacheOptions(hole_size_limit=4096,
+                            range_size_limit=8192, prefetch_limit=5)
+    opts5 = pa.CacheOptions(hole_size_limit=4096,
+                            range_size_limit=8192, lazy=False)
+    opts6 = pa.CacheOptions.from_network_metrics(time_to_first_byte_millis=100,
+                                                 transfer_bandwidth_mib_per_sec=200,
+                                                 ideal_bandwidth_utilization_frac=0.9,
+                                                 max_ideal_request_size_mib=64)
+
+    assert opts1.hole_size_limit == 8192
+    assert opts1.range_size_limit == 32 * 1024 * 1024
+    assert opts1.lazy is True
+    assert opts1.prefetch_limit == 0
+
+    assert opts2.hole_size_limit == 1024
+    assert opts2.range_size_limit == 32 * 1024 * 1024
+    assert opts2.lazy is True
+    assert opts2.prefetch_limit == 0
+
+    assert opts3.hole_size_limit == 4096
+    assert opts3.range_size_limit == 8192
+    assert opts3.lazy is True
+    assert opts3.prefetch_limit == 0
+
+    assert opts4.hole_size_limit == 4096
+    assert opts4.range_size_limit == 8192
+    assert opts4.lazy is True
+    assert opts4.prefetch_limit == 5
+
+    assert opts5.hole_size_limit == 4096
+    assert opts5.range_size_limit == 8192
+    assert opts5.lazy is False
+    assert opts5.prefetch_limit == 0
+
+    assert opts6.lazy is False
+
+    assert opts1 == opts1
+    assert opts1 != opts2
+    assert opts2 != opts3
+    assert opts3 != opts4
+    assert opts4 != opts5
+    assert opts6 != opts1
+
+
+def test_cache_options_pickling(pickle_module):
+    options = [
+        pa.CacheOptions(),
+        pa.CacheOptions(hole_size_limit=4096, range_size_limit=8192,
+                        lazy=True, prefetch_limit=5),
+    ]
+
+    for option in options:
+        assert pickle_module.loads(pickle_module.dumps(option)) == option
+
+
 @pytest.mark.parametrize("compression", [
     pytest.param(
         "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)

From cf1b265f4481f1a42ce8362db82c377fb659a363 Mon Sep 17 00:00:00 2001
From: Josh Soref <2119212+jsoref@users.noreply.github.com>
Date: Thu, 14 Dec 2023 09:36:48 -0500
Subject: [PATCH 045/570] GH-38930: [Java] Fix spelling (#38931)

### Rationale for this change

### What changes are included in this PR?

Spelling fixes to java/

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #38930

Authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../java/org/apache/arrow/AvroToArrow.java    | 12 +++++------
 .../adapter/jdbc/ArrowVectorIterator.java     |  2 +-
 .../arrow/adapter/jdbc/JdbcFieldInfo.java     |  2 +-
 .../arrow/adapter/jdbc/JdbcToArrow.java       |  4 ++--
 .../arrow/adapter/jdbc/JdbcToArrowConfig.java |  2 +-
 .../adapter/jdbc/AbstractJdbcToArrowTest.java | 20 +++++++++----------
 .../adapter/jdbc/UnreliableMetaDataTest.java  |  2 +-
 .../adapter/jdbc/h2/JdbcAliasToArrowTest.java |  2 +-
 .../main/java/org/apache/arrow/c/Data.java    |  2 +-
 .../java/org/apache/arrow/c/NativeUtil.java   |  4 ++--
 .../arrow/vector/StructVectorLoader.java      |  2 +-
 .../org/apache/arrow/c/DictionaryTest.java    |  2 +-
 .../compression/TestCompressionCodec.java     |  2 +-
 java/dataset/src/main/cpp/jni_util.cc         |  6 +++---
 java/dataset/src/main/cpp/jni_wrapper.cc      |  4 ++--
 .../apache/arrow/dataset/jni/JniWrapper.java  |  2 +-
 .../arrow/dataset/substrait/JniWrapper.java   |  2 +-
 .../arrow/flight/OutboundStreamListener.java  |  2 +-
 .../flight/auth2/ClientHandshakeWrapper.java  |  2 +-
 .../driver/jdbc/ArrowDatabaseMetadata.java    | 10 +++++-----
 .../client/ArrowFlightSqlClientHandler.java   |  6 +++---
 .../jdbc/utils/AvaticaParameterBinder.java    |  2 +-
 .../utils/VectorSchemaRootTransformer.java    |  4 ++--
 .../jdbc/ArrowFlightJdbcDriverTest.java       |  6 +++---
 .../jdbc/utils/MockFlightSqlProducer.java     |  2 +-
 .../flight/sql/FlightSqlColumnMetadata.java   |  8 ++++----
 .../arrow/flight/sql/util/TableRef.java       |  2 +-
 .../arrow/gandiva/evaluator/JniLoader.java    |  6 +++---
 .../arrow/gandiva/expression/InNode.java      |  8 ++++----
 .../arrow/memory/AllocationReservation.java   |  2 +-
 .../org/apache/arrow/memory/ArrowBuf.java     |  2 +-
 .../main/codegen/templates/BaseWriter.java    |  4 ++++
 .../codegen/templates/DenseUnionVector.java   |  2 +-
 .../arrow/vector/BaseFixedWidthVector.java    |  4 ++--
 .../vector/BaseLargeVariableWidthVector.java  |  2 +-
 .../arrow/vector/BaseVariableWidthVector.java |  2 +-
 .../apache/arrow/vector/Decimal256Vector.java |  2 +-
 .../apache/arrow/vector/DecimalVector.java    |  4 ++--
 .../arrow/vector/ExtensionTypeVector.java     |  2 +-
 .../vector/IntervalMonthDayNanoVector.java    |  2 +-
 .../org/apache/arrow/vector/VectorLoader.java |  2 +-
 .../arrow/vector/complex/StructVector.java    |  4 ++--
 .../complex/impl/ComplexWriterImpl.java       |  2 +-
 .../impl/NullableStructWriterFactory.java     |  2 +-
 .../complex/impl/StructOrListWriterImpl.java  | 11 ++++++++++
 .../arrow/vector/util/DecimalUtility.java     |  4 ++--
 .../arrow/vector/util/VectorAppender.java     |  2 +-
 .../validate/ValidateVectorTypeVisitor.java   |  2 +-
 .../arrow/vector/TestDecimal256Vector.java    |  2 +-
 .../arrow/vector/TestDenseUnionVector.java    |  6 +++---
 .../arrow/vector/TestVectorReAlloc.java       |  8 ++++----
 .../vector/ipc/MessageSerializerTest.java     |  4 ++--
 .../vector/types/pojo/TestExtensionType.java  |  2 +-
 53 files changed, 112 insertions(+), 97 deletions(-)

diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java b/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java
index 9fb5ce291fde8..33f180393780e 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java
@@ -39,9 +39,9 @@ public class AvroToArrow {
    */
   static VectorSchemaRoot avroToArrow(Schema schema, Decoder decoder, AvroToArrowConfig config)
       throws IOException {
-    Preconditions.checkNotNull(schema, "Avro schema object can not be null");
-    Preconditions.checkNotNull(decoder, "Avro decoder object can not be null");
-    Preconditions.checkNotNull(config, "config can not be null");
+    Preconditions.checkNotNull(schema, "Avro schema object cannot be null");
+    Preconditions.checkNotNull(decoder, "Avro decoder object cannot be null");
+    Preconditions.checkNotNull(config, "config cannot be null");
 
     return AvroToArrowUtils.avroToArrowVectors(schema, decoder, config);
   }
@@ -58,9 +58,9 @@ public static AvroToArrowVectorIterator avroToArrowIterator(
       Decoder decoder,
       AvroToArrowConfig config) throws IOException {
 
-    Preconditions.checkNotNull(schema, "Avro schema object can not be null");
-    Preconditions.checkNotNull(decoder, "Avro decoder object can not be null");
-    Preconditions.checkNotNull(config, "config can not be null");
+    Preconditions.checkNotNull(schema, "Avro schema object cannot be null");
+    Preconditions.checkNotNull(decoder, "Avro decoder object cannot be null");
+    Preconditions.checkNotNull(config, "config cannot be null");
 
     return AvroToArrowVectorIterator.create(decoder, schema, config);
   }
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
index 6e789009dd20a..632c7c474b4a9 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
@@ -53,7 +53,7 @@ public class ArrowVectorIterator implements Iterator<VectorSchemaRoot>, AutoClos
 
   private final int targetBatchSize;
 
-  // This is used to track whether the ResultSet has been fully read, and is needed spcifically for cases where there
+  // This is used to track whether the ResultSet has been fully read, and is needed specifically for cases where there
   // is a ResultSet having zero rows (empty):
   private boolean readComplete = false;
 
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java
index 97ca8f27ceb49..d16964ea14417 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java
@@ -129,7 +129,7 @@ public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException {
 
   /**
    * Builds a <code>JdbcFieldInfo</code> from the corresponding row from a {@link java.sql.DatabaseMetaData#getColumns}
-   * ResulSet.
+   * ResultSet.
    *
    * @param rs The {@link java.sql.ResultSet} to get the field information from.
    * @throws SQLException If the column information cannot be retrieved.
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
index daee64d93080a..246451b5b22f9 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java
@@ -77,7 +77,7 @@ public static ArrowVectorIterator sqlToArrowVectorIterator(
       ResultSet resultSet,
       BufferAllocator allocator)
       throws SQLException, IOException {
-    Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
+    Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null");
 
     JdbcToArrowConfig config =
         new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar());
@@ -96,7 +96,7 @@ public static ArrowVectorIterator sqlToArrowVectorIterator(
       ResultSet resultSet,
       JdbcToArrowConfig config)
       throws SQLException, IOException {
-    Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
+    Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null");
     Preconditions.checkNotNull(config, "The configuration cannot be null");
     return ArrowVectorIterator.create(resultSet, config);
   }
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
index e23bad54afc14..68851f4a98bc9 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
@@ -121,7 +121,7 @@ public final class JdbcToArrowConfig {
    * @param reuseVectorSchemaRoot Whether to reuse the vector schema root for each data load.
    * @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based).
    * @param arraySubTypesByColumnName  The type of the JDBC array at the column name.
-   * @param targetBatchSize The target batch size to be used in preallcation of the resulting vectors.
+   * @param targetBatchSize The target batch size to be used in preallocation of the resulting vectors.
    * @param jdbcToArrowTypeConverter The function that maps JDBC field type information to arrow type. If set to null,
    *                                 the default mapping will be used, which is defined as:
    *  <ul>
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java
index dc36ef9f8275b..88a66a31aa2c9 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java
@@ -183,7 +183,7 @@ public static Object[][] prepareTestData(String[] testFiles, @SuppressWarnings("
    */
   public VectorSchemaRoot sqlToArrow(Connection connection, String query, BufferAllocator allocator)
       throws SQLException, IOException {
-    Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
+    Preconditions.checkNotNull(allocator, "Memory allocator object cannot be null");
 
     JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar())
         .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP)
@@ -212,8 +212,8 @@ public VectorSchemaRoot sqlToArrow(
       BufferAllocator allocator,
       Calendar calendar) throws SQLException, IOException {
 
-    Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
-    Preconditions.checkNotNull(calendar, "Calendar object can not be null");
+    Preconditions.checkNotNull(allocator, "Memory allocator object cannot be null");
+    Preconditions.checkNotNull(calendar, "Calendar object cannot be null");
 
     JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, calendar)
         .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP)
@@ -237,8 +237,8 @@ public VectorSchemaRoot sqlToArrow(
    */
   public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config)
       throws SQLException, IOException {
-    Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
-    Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
+    Preconditions.checkNotNull(connection, "JDBC connection object cannot be null");
+    Preconditions.checkArgument(query != null && query.length() > 0, "SQL query cannot be null or empty");
 
     try (Statement stmt = connection.createStatement()) {
       return sqlToArrow(stmt.executeQuery(query), config);
@@ -256,7 +256,7 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, J
    * @throws SQLException on error
    */
   public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLException, IOException {
-    Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
+    Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null");
 
     return sqlToArrow(resultSet, JdbcToArrowUtils.getUtcCalendar());
   }
@@ -273,7 +273,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLExcepti
    */
   public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BufferAllocator allocator)
       throws SQLException, IOException {
-    Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
+    Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null");
 
     JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar())
         .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP)
@@ -292,7 +292,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BufferAllocator a
    * @throws SQLException on error
    */
   public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException {
-    Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
+    Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null");
 
     JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar)
         .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP)
@@ -316,7 +316,7 @@ public static VectorSchemaRoot sqlToArrow(
       BufferAllocator allocator,
       Calendar calendar)
       throws SQLException, IOException {
-    Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
+    Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null");
 
     JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, calendar)
         .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP)
@@ -336,7 +336,7 @@ public static VectorSchemaRoot sqlToArrow(
    */
   public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig config)
       throws SQLException, IOException {
-    Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
+    Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null");
     Preconditions.checkNotNull(config, "The configuration cannot be null");
 
     VectorSchemaRoot root = VectorSchemaRoot.create(
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java
index 90554578d1f45..3eb886faabc10 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java
@@ -151,7 +151,7 @@ public void testInconsistentPrecisionAndScale() throws Exception {
       assertThrows(RuntimeException.class, iter::next,
           "This is expected to fail due to inconsistent BigDecimal scales, while strict matching is enabled.");
     }
-    // Reuse same ResultSet, with RoundingMode.UNNECESSARY set to coerce BigDecmial scale as needed:
+    // Reuse same ResultSet, with RoundingMode.UNNECESSARY set to coerce BigDecimal scale as needed:
     config = new JdbcToArrowConfigBuilder(
         allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false)
         .setReuseVectorSchemaRoot(reuseVectorSchemaRoot)
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java
index a6e6b22fcb45d..d9acfe88f4f8b 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java
@@ -63,7 +63,7 @@ public void setUp() throws Exception {
 
   /**
    * Test h2 database query with alias for column name and column label.
-   * To vetify reading field alias from an H2 database works as expected.
+   * To verify reading field alias from an H2 database works as expected.
    * If this test fails, something is either wrong with the setup,
    * or the H2 SQL behavior changed.
    */
diff --git a/java/c/src/main/java/org/apache/arrow/c/Data.java b/java/c/src/main/java/org/apache/arrow/c/Data.java
index 6cb0c0ac40aca..a92853b3504f0 100644
--- a/java/c/src/main/java/org/apache/arrow/c/Data.java
+++ b/java/c/src/main/java/org/apache/arrow/c/Data.java
@@ -222,7 +222,7 @@ public static void exportVectorSchemaRoot(BufferAllocator allocator, VectorSchem
 
   /**
    * Export a reader as an ArrowArrayStream using the C Stream Interface.
-   * @param allocator Buffer allocator for allocating C data inteface fields
+   * @param allocator Buffer allocator for allocating C data interface fields
    * @param reader Reader to export
    * @param out C struct to export the stream
    */
diff --git a/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java b/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java
index b152ea4e7c9fd..ba65fd80c4141 100644
--- a/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java
+++ b/java/c/src/main/java/org/apache/arrow/c/NativeUtil.java
@@ -115,7 +115,7 @@ public static void closeBuffer(ArrowBuf buf) {
    * Get the address of a buffer or {@value #NULL} if the input buffer is null.
    * 
    * @param buf Buffer to get the address of
-   * @return Memory addresss or {@value #NULL}
+   * @return Memory address or {@value #NULL}
    */
   public static long addressOrNull(ArrowBuf buf) {
     if (buf == null) {
@@ -129,7 +129,7 @@ public static long addressOrNull(ArrowBuf buf) {
    * struct is null.
    * 
    * @param struct C Data Interface struct to get the address of
-   * @return Memory addresss or {@value #NULL}
+   * @return Memory address or {@value #NULL}
    */
   public static long addressOrNull(BaseStruct struct) {
     if (struct == null) {
diff --git a/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java b/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
index 10e35701776ee..4a62be7851ac7 100644
--- a/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
+++ b/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
@@ -101,7 +101,7 @@ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch
 
   private void loadBuffers(FieldVector vector, Field field, Iterator<ArrowBuf> buffers, Iterator<ArrowFieldNode> nodes,
       CompressionCodec codec) {
-    checkArgument(nodes.hasNext(), "no more field nodes for for field %s and vector %s", field, vector);
+    checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector);
     ArrowFieldNode fieldNode = nodes.next();
     int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType());
     List<ArrowBuf> ownBuffers = new ArrayList<>(bufferLayoutCount);
diff --git a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
index 9dcb262af4616..d892781756ede 100644
--- a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
@@ -128,7 +128,7 @@ public void testRoundtripMultipleBatches() throws IOException {
         ArrowSchema consumerArrowSchema = ArrowSchema.allocateNew(allocator)) {
       // Load first batch
       reader.loadNextBatch();
-      // Producer fills consumer schema stucture
+      // Producer fills consumer schema structure
       Data.exportSchema(allocator, reader.getVectorSchemaRoot().getSchema(), reader, consumerArrowSchema);
       // Consumer loads it as an empty vector schema root
       try (CDataDictionaryProvider consumerDictionaryProvider = new CDataDictionaryProvider();
diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
index 7db00cfde485d..403130edba52e 100644
--- a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
+++ b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
@@ -317,7 +317,7 @@ void withRoot(CompressionUtil.CodecType codec, BiConsumer<CompressionCodec.Facto
     try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
       final IntVector ints = (IntVector) root.getVector(0);
       final VarCharVector strings = (VarCharVector) root.getVector(1);
-      // Doesn't get compresed
+      // Doesn't get compressed
       ints.setSafe(0, 0x4a3e);
       ints.setSafe(1, 0x8aba);
       ints.setSafe(2, 0x4362);
diff --git a/java/dataset/src/main/cpp/jni_util.cc b/java/dataset/src/main/cpp/jni_util.cc
index f2f6871973f5f..f1b5a7f7c650e 100644
--- a/java/dataset/src/main/cpp/jni_util.cc
+++ b/java/dataset/src/main/cpp/jni_util.cc
@@ -192,9 +192,9 @@ std::string Describe(JNIEnv* env, jthrowable t) {
 }
 
 bool IsErrorInstanceOf(JNIEnv* env, jthrowable t, std::string class_name) {
-  jclass jclass = env->FindClass(class_name.c_str());
-  DCHECK_NE(jclass, nullptr) << "Could not find Java class " << class_name;
-  return env->IsInstanceOf(t, jclass);
+  jclass java_class = env->FindClass(class_name.c_str());
+  DCHECK_NE(java_class, nullptr) << "Could not find Java class " << class_name;
+  return env->IsInstanceOf(t, java_class);
 }
 
 arrow::StatusCode MapJavaError(JNIEnv* env, jthrowable t) {
diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc
index 49e0f1720909f..d2d976677bd6b 100644
--- a/java/dataset/src/main/cpp/jni_wrapper.cc
+++ b/java/dataset/src/main/cpp/jni_wrapper.cc
@@ -282,7 +282,7 @@ std::unordered_map<std::string, std::shared_ptr<arrow::Table>> LoadNamedTables(J
   std::unordered_map<std::string, std::shared_ptr<arrow::Table>> map_table_to_record_batch_reader;
   int length = env->GetArrayLength(str_array);
   if (length % 2 != 0) {
-    JniThrow("Can not map odd number of array elements to key/value pairs");
+    JniThrow("Cannot map odd number of array elements to key/value pairs");
   }
   std::shared_ptr<arrow::Table> output_table;
   for (int pos = 0; pos < length; pos++) {
@@ -399,7 +399,7 @@ JNIEXPORT jlong JNICALL Java_org_apache_arrow_dataset_jni_NativeMemoryPool_bytes
   JNI_METHOD_START
   arrow::MemoryPool* pool = reinterpret_cast<arrow::MemoryPool*>(memory_pool_id);
   if (pool == nullptr) {
-    JniThrow("Memory pool instance not found. It may not exist nor has been closed");
+    JniThrow("Memory pool instance not found. It may not exist or have been closed");
   }
   return pool->bytes_allocated();
   JNI_METHOD_END(-1L)
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
index a7df5be42f13b..637a3e8f22a9a 100644
--- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
@@ -116,7 +116,7 @@ public native long createScanner(long datasetId, String[] columns, ByteBuffer su
   public native void releaseBuffer(long bufferId);
 
   /**
-   * Ensure the S3 APIs are shutdown, but only if not already done. If the S3 APIs are unintialized,
+   * Ensure the S3 APIs are shutdown, but only if not already done. If the S3 APIs are uninitialized,
    * then this is a noop.
    */
   public native void ensureS3Finalized();
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/JniWrapper.java b/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/JniWrapper.java
index 236d1d5616061..5cb68f8514678 100644
--- a/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/JniWrapper.java
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/substrait/JniWrapper.java
@@ -25,7 +25,7 @@
  * Class that contains Native methods to call Acero C++ Substrait API. It internally depends on C++ function
  * arrow::engine::ExecuteSerializedPlan. Currently supported input parameters supported are:
  * <pre>
- * - arrow::Buffer: Susbtrait Plan (JSON or Binary format).
+ * - arrow::Buffer: Substrait Plan (JSON or Binary format).
  * - arrow::engine::ConversionOptions: Mapping for arrow::engine::NamedTableProvider.
  * </pre>
  */
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
index 38a44d0e5913f..e80fb41c67273 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
@@ -106,7 +106,7 @@ default void start(VectorSchemaRoot root, DictionaryProvider dictionaries) {
   void completed();
 
   /**
-   * Toggle whether to ues the zero-copy write optimization.
+   * Toggle whether to use the zero-copy write optimization.
    *
    * <p>By default or when disabled, Arrow may copy data into a buffer for the underlying implementation to
    * send. When enabled, Arrow will instead try to directly enqueue the Arrow buffer for sending. Not all
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHandshakeWrapper.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHandshakeWrapper.java
index 16a5142509d4d..c84739d2e345c 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHandshakeWrapper.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/ClientHandshakeWrapper.java
@@ -61,7 +61,7 @@ public static void doClientHandshake(FlightServiceStub stub) {
         throw wrappedException;
       }
     } catch (StatusRuntimeException sre) {
-      logger.error("Failed with SREe", sre);
+      logger.error("Failed with SRE", sre);
       throw StatusUtils.fromGrpcRuntimeException(sre);
     } catch (Throwable ex) {
       logger.error("Failed with unknown", ex);
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java
index 3487e58a64678..d68b8070e2bb7 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowDatabaseMetadata.java
@@ -1048,8 +1048,8 @@ private int setGetColumnsVectorSchemaRootFromFields(final VectorSchemaRoot curre
           SqlTypes.getSqlTypeNameFromArrowType(fieldType).getBytes(CHARSET);
       typeNameVector.setSafe(insertIndex, typeName);
 
-      // We're not setting COLUMN_SIZE for ROWID SQL Types, as there's no such Arrow type.
-      // We're not setting COLUMN_SIZE nor DECIMAL_DIGITS for Float/Double as their precision and scale are variable.
+      // We aren't setting COLUMN_SIZE for ROWID SQL Types, as there's no such Arrow type.
+      // We aren't setting COLUMN_SIZE nor DECIMAL_DIGITS for Float/Double as their precision and scale are variable.
       if (fieldType instanceof ArrowType.Decimal) {
         numPrecRadixVector.setSafe(insertIndex, BASE10_RADIX);
       } else if (fieldType instanceof ArrowType.Int) {
@@ -1101,7 +1101,7 @@ private static byte[] booleanToYesOrNo(boolean autoIncrement) {
   }
 
   static Integer getDecimalDigits(final ArrowType fieldType) {
-    // We're not setting  DECIMAL_DIGITS for Float/Double as their precision and scale are variable.
+    // We aren't setting DECIMAL_DIGITS for Float/Double as their precision and scale are variable.
     if (fieldType instanceof ArrowType.Decimal) {
       final ArrowType.Decimal thisDecimal = (ArrowType.Decimal) fieldType;
       return thisDecimal.getScale();
@@ -1141,8 +1141,8 @@ static Integer getDecimalDigits(final ArrowType fieldType) {
   }
 
   static Integer getColumnSize(final ArrowType fieldType) {
-    // We're not setting COLUMN_SIZE for ROWID SQL Types, as there's no such Arrow type.
-    // We're not setting COLUMN_SIZE nor DECIMAL_DIGITS for Float/Double as their precision and scale are variable.
+    // We aren't setting COLUMN_SIZE for ROWID SQL Types, as there's no such Arrow type.
+    // We aren't setting COLUMN_SIZE nor DECIMAL_DIGITS for Float/Double as their precision and scale are variable.
     if (fieldType instanceof ArrowType.Decimal) {
       final ArrowType.Decimal thisDecimal = (ArrowType.Decimal) fieldType;
       return thisDecimal.getPrecision();
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
index 54fd17853c00b..234820bd41823 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
@@ -469,8 +469,8 @@ public static final class Builder {
     @VisibleForTesting
     boolean retainAuth = true;
 
-    // These two middlewares are for internal use within build() and should not be exposed by builder APIs.
-    // Note that these middlewares may not necessarily be registered.
+    // These two middleware are for internal use within build() and should not be exposed by builder APIs.
+    // Note that these middleware may not necessarily be registered.
     @VisibleForTesting
     ClientIncomingAuthHeaderMiddleware.Factory authFactory
         = new ClientIncomingAuthHeaderMiddleware.Factory(new ClientBearerHeaderHandler());
@@ -742,7 +742,7 @@ public Builder withCallOptions(final Collection<CallOption> options) {
      * @throws SQLException on error.
      */
     public ArrowFlightSqlClientHandler build() throws SQLException {
-      // Copy middlewares so that the build method doesn't change the state of the builder fields itself.
+      // Copy middleware so that the build method doesn't change the state of the builder fields itself.
       Set<FlightClientMiddleware.Factory> buildTimeMiddlewareFactories = new HashSet<>(this.middlewareFactories);
       FlightClient client = null;
       boolean isUsingUserPasswordAuth = username != null && token == null;
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java
index 5fa3ba38f2506..b2bd8e745ecca 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java
@@ -94,7 +94,7 @@ public void bind(List<TypedValue> typedValues, int index) {
   }
 
   /**
-   * Bind a TypedValue to the given index on the FieldVctor.
+   * Bind a TypedValue to the given index on the FieldVector.
    *
    * @param vector     FieldVector to bind to.
    * @param typedValue TypedValue to bind to the vector.
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java
index 3bab918c83aab..52a1d7db791c2 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java
@@ -74,7 +74,7 @@ public Builder renameFieldVector(final String originalVectorName,
         final ArrowType transformedType = transformedVector.getField().getType();
         if (!originalType.equals(transformedType)) {
           throw new IllegalArgumentException(String.format(
-              "Can not transfer vector with field type %s to %s", originalType, transformedType));
+              "Cannot transfer vector with field type %s to %s", originalType, transformedType));
         }
 
         if (originalVector instanceof BaseVariableWidthVector) {
@@ -85,7 +85,7 @@ public Builder renameFieldVector(final String originalVectorName,
               ((BaseFixedWidthVector) transformedVector));
         } else {
           throw new IllegalStateException(String.format(
-              "Can not transfer vector of type %s", originalVector.getClass()));
+              "Cannot transfer vector of type %s", originalVector.getClass()));
         }
       });
 
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java
index 9b8fa96d2320e..784fd5b292b27 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java
@@ -142,7 +142,7 @@ public void testConnectWithInsensitiveCasePropertyKeys() throws Exception {
              driver.connect("jdbc:arrow-flight://" +
                      dataSource.getConfig().getHost() + ":" +
                      dataSource.getConfig().getPort() + "?" +
-                     "UseEncryptiOn=false",
+                     "UseEncryptIon=false",
                  dataSource.getProperties(dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()))) {
       assertTrue(connection.isValid(300));
     }
@@ -150,7 +150,7 @@ public void testConnectWithInsensitiveCasePropertyKeys() throws Exception {
              driver.connect("jdbc:arrow-flight-sql://" +
                      dataSource.getConfig().getHost() + ":" +
                      dataSource.getConfig().getPort() + "?" +
-                     "UseEncryptiOn=false",
+                     "UseEncryptIon=false",
                  dataSource.getProperties(dataSource.getConfig().getUser(), dataSource.getConfig().getPassword()))) {
       assertTrue(connection.isValid(300));
     }
@@ -162,7 +162,7 @@ public void testConnectWithInsensitiveCasePropertyKeys2() throws Exception {
     final Driver driver = new ArrowFlightJdbcDriver();
     Properties properties =
         dataSource.getProperties(dataSource.getConfig().getUser(), dataSource.getConfig().getPassword());
-    properties.put("UseEncryptiOn", "false");
+    properties.put("UseEncryptIon", "false");
 
     try (Connection connection =
              driver.connect("jdbc:arrow-flight://" +
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java
index f36956f193ce8..c165bfb7ce336 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java
@@ -195,7 +195,7 @@ void addUpdateQuery(final String sqlCommand,
                       final BiConsumer<FlightStream, StreamListener<PutResult>> resultsProvider) {
     Preconditions.checkState(
         updateResultProviders.putIfAbsent(sqlCommand, resultsProvider) == null,
-        format("Attempted to overwrite pre-existing query: <%s>.", sqlCommand));
+        format("Attempted to overwrite preexisting query: <%s>.", sqlCommand));
   }
 
   /** Registers parameters expected to be provided with a prepared statement. */
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlColumnMetadata.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlColumnMetadata.java
index bd52e4b495e6e..186e8bc04ec9c 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlColumnMetadata.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlColumnMetadata.java
@@ -145,8 +145,8 @@ public Boolean isAutoIncrement() {
   }
 
   /**
-   * Returns if the column is case sensitive.
-   * @return True if the column is case sensitive, false otherwise.
+   * Returns if the column is case-sensitive.
+   * @return True if the column is case-sensitive, false otherwise.
    */
   public Boolean isCaseSensitive() {
     String value = metadataMap.get(IS_CASE_SENSITIVE);
@@ -267,8 +267,8 @@ public Builder isAutoIncrement(boolean isAutoIncrement) {
     }
 
     /**
-     * Sets if the column is case sensitive.
-     * @param isCaseSensitive If the column is case sensitive.
+     * Sets if the column is case-sensitive.
+     * @param isCaseSensitive If the column is case-sensitive.
      * @return This builder.
      */
     public Builder isCaseSensitive(boolean isCaseSensitive) {
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/util/TableRef.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/util/TableRef.java
index 315f17ee911cf..b3751cab9038a 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/util/TableRef.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/util/TableRef.java
@@ -66,7 +66,7 @@ public String getDbSchema() {
   }
 
   /**
-   * Retreives the table from the object.
+   * Retrieves the table from the object.
    * @return  the table.
    */
   public String getTable() {
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
index 5158d52f8c998..2528989f3784b 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
@@ -158,10 +158,10 @@ static long getDefaultConfiguration() throws GandivaException {
       synchronized (ConfigurationBuilder.class) {
         if (defaultConfiguration == 0L) {
           JniLoader.getInstance(); // setup
-          ConfigurationBuilder.ConfigOptions defaultConfigOptons = ConfigurationBuilder.ConfigOptions.getDefault();
+          ConfigurationBuilder.ConfigOptions defaultConfigOptions = ConfigurationBuilder.ConfigOptions.getDefault();
           defaultConfiguration = new ConfigurationBuilder()
-            .buildConfigInstance(defaultConfigOptons);
-          configurationMap.put(defaultConfigOptons, defaultConfiguration);
+            .buildConfigInstance(defaultConfigOptions);
+          configurationMap.put(defaultConfigOptions, defaultConfiguration);
         }
       }
     }
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
index 0f8de962869d8..bb1391b4001ea 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/InNode.java
@@ -64,7 +64,7 @@ private InNode(Set<Integer> values, Set<Long> longValues, Set<String> stringValu
    *
    * @param node      Node with the 'IN' clause.
    * @param intValues Int values to build the IN node.
-   * @retur InNode referring to tree node.
+   * @return InNode referring to tree node.
    */
   public static InNode makeIntInExpr(TreeNode node, Set<Integer> intValues) {
     return new InNode(intValues,
@@ -77,7 +77,7 @@ public static InNode makeIntInExpr(TreeNode node, Set<Integer> intValues) {
    *
    * @param node      Node with the 'IN' clause.
    * @param longValues Long values to build the IN node.
-   * @retur InNode referring to tree node.
+   * @return InNode referring to tree node.
    */
   public static InNode makeLongInExpr(TreeNode node, Set<Long> longValues) {
     return new InNode(null, longValues,
@@ -90,7 +90,7 @@ public static InNode makeLongInExpr(TreeNode node, Set<Long> longValues) {
    *
    * @param node      Node with the 'IN' clause.
    * @param floatValues Float values to build the IN node.
-   * @retur InNode referring to tree node.
+   * @return InNode referring to tree node.
    */
   public static InNode makeFloatInExpr(TreeNode node, Set<Float> floatValues) {
     return new InNode(null, null, null, null, null, null,
@@ -102,7 +102,7 @@ public static InNode makeFloatInExpr(TreeNode node, Set<Float> floatValues) {
    *
    * @param node      Node with the 'IN' clause.
    * @param doubleValues Double values to build the IN node.
-   * @retur InNode referring to tree node.
+   * @return InNode referring to tree node.
    */
   public static InNode makeDoubleInExpr(TreeNode node, Set<Double> doubleValues) {
     return new InNode(null, null, null, null, null,
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
index 4331eb20ca3b6..c672dc48d79ca 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
@@ -59,7 +59,7 @@ public interface AllocationReservation extends AutoCloseable {
    * requested is available, if the allocation cannot be made contiguously.</p>
    *
    * @return the buffer, or null, if the request cannot be satisfied
-   * @throws IllegalStateException if called called more than once
+   * @throws IllegalStateException if called more than once
    */
   ArrowBuf allocateBuffer();
 
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
index 5b322b4ff566b..2c2e93b2d70ce 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
@@ -1210,7 +1210,7 @@ public ArrowBuf setOne(long index, long length) {
   }
 
   /**
-   * Returns <code>this</code> if size is less then {@link #capacity()}, otherwise
+   * Returns <code>this</code> if size is less than {@link #capacity()}, otherwise
    * delegates to {@link BufferManager#replace(ArrowBuf, long)} to get a new buffer.
    */
   public ArrowBuf reallocIfNeeded(final long size) {
diff --git a/java/vector/src/main/codegen/templates/BaseWriter.java b/java/vector/src/main/codegen/templates/BaseWriter.java
index 3b35d22692e68..35df256b324b5 100644
--- a/java/vector/src/main/codegen/templates/BaseWriter.java
+++ b/java/vector/src/main/codegen/templates/BaseWriter.java
@@ -117,7 +117,11 @@ public interface StructOrListWriter {
     void start();
     void end();
     StructOrListWriter struct(String name);
+    /**
+     * @deprecated use {@link #listOfStruct()} instead.
+     */
     StructOrListWriter listoftstruct(String name);
+    StructOrListWriter listOfStruct(String name);
     StructOrListWriter list(String name);
     boolean isStructWriter();
     boolean isListWriter();
diff --git a/java/vector/src/main/codegen/templates/DenseUnionVector.java b/java/vector/src/main/codegen/templates/DenseUnionVector.java
index de0cf84fd82ad..c23caf3bb5a03 100644
--- a/java/vector/src/main/codegen/templates/DenseUnionVector.java
+++ b/java/vector/src/main/codegen/templates/DenseUnionVector.java
@@ -662,7 +662,7 @@ public void splitAndTransfer(int startIndex, int length) {
       ReferenceManager refManager = slicedBuffer.getReferenceManager();
       to.typeBuffer = refManager.transferOwnership(slicedBuffer, to.allocator).getTransferredBuffer();
 
-      // transfer offset byffer
+      // transfer offset buffer
       while (to.offsetBuffer.capacity() < (long) length * OFFSET_WIDTH) {
         to.reallocOffsetBuffer();
       }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
index d09664e6d313e..90229460111c3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
@@ -478,7 +478,7 @@ public List<BufferBacked> getFieldInnerVectors() {
   @Override
   public void initializeChildrenFromFields(List<Field> children) {
     if (!children.isEmpty()) {
-      throw new IllegalArgumentException("primitive type vector can not have children");
+      throw new IllegalArgumentException("primitive type vector cannot have children");
     }
   }
 
@@ -608,7 +608,7 @@ public TransferPair getTransferPair(BufferAllocator allocator) {
   public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator);
 
   /**
-   * Transfer this vector'data to another vector. The memory associated
+   * Transfer this vector's data to another vector. The memory associated
    * with this vector is transferred to the allocator of target vector
    * for accounting and management purposes.
    * @param target destination vector for transfer
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
index fcac28bd08470..a77278138f28c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
@@ -276,7 +276,7 @@ public List<BufferBacked> getFieldInnerVectors() {
   @Override
   public void initializeChildrenFromFields(List<Field> children) {
     if (!children.isEmpty()) {
-      throw new IllegalArgumentException("primitive type vector can not have children");
+      throw new IllegalArgumentException("primitive type vector cannot have children");
     }
   }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index a0a5e085a5a8a..46bc9815f037a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -295,7 +295,7 @@ public List<BufferBacked> getFieldInnerVectors() {
   @Override
   public void initializeChildrenFromFields(List<Field> children) {
     if (!children.isEmpty()) {
-      throw new IllegalArgumentException("primitive type vector can not have children");
+      throw new IllegalArgumentException("primitive type vector cannot have children");
     }
   }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
index 79a9badc3955d..fe650c7d28074 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
@@ -212,7 +212,7 @@ public void set(int index, ArrowBuf buffer) {
    * ArrowBuf of decimal vector.
    *
    * <p>This method takes care of adding the necessary padding if the length
-   * of byte array is less then 32 (length of decimal type).
+   * of byte array is less than 32 (length of decimal type).
    *
    * @param index position of element
    * @param value array of bytes containing decimal in big endian byte order.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
index d1a3bfc3afb10..7c3662c86748b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
@@ -159,7 +159,7 @@ public BigDecimal getObject(int index) {
   }
 
   /**
-   * Same as {@link #getObect(int)} but does not check for null.
+   * Same as {@link #getObject(int)} but does not check for null.
    *
    * @param index   position of element
    * @return element at given index
@@ -211,7 +211,7 @@ public void set(int index, ArrowBuf buffer) {
    * ArrowBuf of decimal vector.
    *
    * <p>This method takes care of adding the necessary padding if the length
-   * of byte array is less then 16 (length of decimal type).
+   * of byte array is less than 16 (length of decimal type).
    *
    * @param index position of element
    * @param value array of bytes containing decimal in big endian byte order.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
index a70efe61bcdfe..3a35a44403492 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
@@ -50,7 +50,7 @@ public abstract class ExtensionTypeVector<T extends ValueVector & FieldVector> e
    */
   public ExtensionTypeVector(String name, BufferAllocator allocator, T underlyingVector) {
     super(allocator);
-    Preconditions.checkNotNull(underlyingVector, "underlyingVector can not be null.");
+    Preconditions.checkNotNull(underlyingVector, "underlyingVector cannot be null.");
     this.name = name;
     this.underlyingVector = underlyingVector;
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java
index 73bbc0a2c19f2..fc0aa9d27b1c3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java
@@ -39,7 +39,7 @@
  * A validity buffer (bit vector) is maintained to track which elements in the
  * vector are null.
  *
- * Month, day and nanoseconds are indepndent from one another and there
+ * Month, day and nanoseconds are independent from one another and there
  * is no specific limits imposed on their values.
  */
 public final class IntervalMonthDayNanoVector extends BaseFixedWidthVector {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
index ed5f3aef17397..510cef24c7e16 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
@@ -96,7 +96,7 @@ private void loadBuffers(
       Iterator<ArrowBuf> buffers,
       Iterator<ArrowFieldNode> nodes,
       CompressionCodec codec) {
-    checkArgument(nodes.hasNext(), "no more field nodes for for field %s and vector %s", field, vector);
+    checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector);
     ArrowFieldNode fieldNode = nodes.next();
     int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType());
     List<ArrowBuf> ownBuffers = new ArrayList<>(bufferLayoutCount);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
index d0304a6fd2504..27db1574808a3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
@@ -100,7 +100,7 @@ public StructVector(String name,
    * @param fieldType The type of this list.
    * @param callBack A schema change callback.
    * @param conflictPolicy policy to determine how duplicate names are handled.
-   * @param allowConflictPolicyChanges wether duplicate names are allowed at all.
+   * @param allowConflictPolicyChanges whether duplicate names are allowed at all.
    */
   public StructVector(String name,
                       BufferAllocator allocator,
@@ -139,7 +139,7 @@ public StructVector(Field field,
    * @param allocator The allocator to use to allocating/reallocating buffers.
    * @param callBack A schema change callback.
    * @param conflictPolicy policy to determine how duplicate names are handled.
-   * @param allowConflictPolicyChanges wether duplicate names are allowed at all.
+   * @param allowConflictPolicyChanges whether duplicate names are allowed at all.
    */
   public StructVector(Field field,
                       BufferAllocator allocator,
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
index 8d2694b6df887..8dd5763990fa8 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
@@ -49,7 +49,7 @@ private enum Mode { INIT, STRUCT, LIST, MAP }
    * @param name The name of the writer (for tracking).
    * @param container A container for the data field to be written.
    * @param unionEnabled Unused.
-   * @param caseSensitive Whether field names are case sensitive (if false field names will be lowercase.
+   * @param caseSensitive Whether field names are case-sensitive (if false field names will be lowercase.
    */
   public ComplexWriterImpl(
       String name,
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java
index 458aa7b610147..a305529b71fa8 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java
@@ -21,7 +21,7 @@
 
 /**
  * A factory for {@link NullableStructWriter} instances.  The factory allows for configuring if field
- * names should be considered case sensitive.
+ * names should be considered case-sensitive.
  */
 public class NullableStructWriterFactory {
   private final boolean caseSensitive;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java
index e9c0825dd3d49..5c4cd2af98d55 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java
@@ -87,8 +87,19 @@ public StructOrListWriter struct(final String name) {
    * Creates a new writer for a list of structs.
    *
    * @param name Unused.
+   *
+   * @deprecated use {@link #listOfStruct()} instead.
    */
   public StructOrListWriter listoftstruct(final String name) {
+    return listOfStruct(name);
+  }
+
+  /**
+   * Creates a new writer for a list of structs.
+   *
+   * @param name Unused.
+   */
+  public StructOrListWriter listOfStruct(final String name) {
     assert list != null;
     return new StructOrListWriterImpl(list.struct());
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
index a81169b8f7d73..0dfb61dcdf269 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
@@ -94,7 +94,7 @@ public static boolean checkPrecisionAndScale(BigDecimal value, int vectorPrecisi
           value.scale() + " != " + vectorScale);
     }
     if (value.precision() > vectorPrecision) {
-      throw new UnsupportedOperationException("BigDecimal precision can not be greater than that in the Arrow " +
+      throw new UnsupportedOperationException("BigDecimal precision cannot be greater than that in the Arrow " +
           "vector: " + value.precision() + " > " + vectorPrecision);
     }
     return true;
@@ -120,7 +120,7 @@ public static boolean checkPrecisionAndScale(int decimalPrecision, int decimalSc
           decimalScale + " != " + vectorScale);
     }
     if (decimalPrecision > vectorPrecision) {
-      throw new UnsupportedOperationException("BigDecimal precision can not be greater than that in the Arrow " +
+      throw new UnsupportedOperationException("BigDecimal precision cannot be greater than that in the Arrow " +
           "vector: " + decimalPrecision + " > " + vectorPrecision);
     }
     return true;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
index c5de380f9c173..068717c7acbc7 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
@@ -506,7 +506,7 @@ public ValueVector visit(DenseUnionVector deltaVector, Void value) {
         targetChildVector = targetDenseUnionVector.addVector(
             (byte) i, deltaChildVector.getField().createVector(targetDenseUnionVector.getAllocator()));
 
-        // now we have both child vecors not null, we can append them.
+        // now we have both child vectors not null, we can append them.
         VectorAppender childAppender = new VectorAppender(targetChildVector);
         deltaChildVector.accept(childAppender, null);
       } else if (targetChildVector != null && deltaChildVector == null) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
index 08e3ccccfa29a..3d1c5a4f27f7c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
@@ -280,7 +280,7 @@ public Void visit(BaseFixedWidthVector vector, Void value) {
       validateOrThrow(arrowType.getByteWidth() > 0, "The byte width of a FixedSizeBinaryVector %s is not positive.",
           arrowType.getByteWidth());
       validateOrThrow(arrowType.getByteWidth() == vector.getTypeWidth(),
-          "Type width mismatch for FixedSizeBinaryVector. Vector type width %s, arrow type type width %s.",
+          "Type width mismatch for FixedSizeBinaryVector. Vector type width %s, arrow type width %s.",
           vector.getTypeWidth(), arrowType.getByteWidth());
     } else {
       throw new IllegalArgumentException("Unknown type for fixed width vector " + vector.getClass());
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java
index 51368cf6aea35..b703959d2bb1e 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java
@@ -104,7 +104,7 @@ public void testDecimal256DifferentScaleAndPrecision() {
         BigDecimal decimal = new BigDecimal(BigInteger.valueOf(12345), 2);
         UnsupportedOperationException ue =
             assertThrows(UnsupportedOperationException.class, () -> decimalVector.setSafe(0, decimal));
-        assertEquals("BigDecimal precision can not be greater than that in the Arrow vector: 5 > 4", ue.getMessage());
+        assertEquals("BigDecimal precision cannot be greater than that in the Arrow vector: 5 > 4", ue.getMessage());
       }
     }
   }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
index 01becf00794ee..9cb12481612b2 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
@@ -610,9 +610,9 @@ public void testChildVectorValueCounts() {
       assertEquals(8L, longVector.get(0));
       assertEquals(12L, longVector.get(1));
 
-      Float4Vector floagVector = (Float4Vector) vector.getVectorByType(floatTypeId);
-      assertEquals(1, floagVector.getValueCount());
-      assertEquals(9.0f, floagVector.get(0), 0);
+      Float4Vector floatVector = (Float4Vector) vector.getVectorByType(floatTypeId);
+      assertEquals(1, floatVector.getValueCount());
+      assertEquals(9.0f, floatVector.get(0), 0);
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
index 18bb2c95738a4..7d5701ddb765b 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
@@ -255,7 +255,7 @@ public void testVarCharAllocateNew() throws Exception {
     try (final VarCharVector vector = new VarCharVector("", allocator)) {
       vector.allocateNew(count);
       
-      // verify that the validity buffer and value buffer have capacity for atleast 'count' elements.
+      // verify that the validity buffer and value buffer have capacity for at least 'count' elements.
       Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
       Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH);
     }
@@ -268,7 +268,7 @@ public void testLargeVarCharAllocateNew() throws Exception {
     try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
       vector.allocateNew(count);
       
-      // verify that the validity buffer and value buffer have capacity for atleast 'count' elements.
+      // verify that the validity buffer and value buffer have capacity for at least 'count' elements.
       Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
       Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH);
     }
@@ -281,7 +281,7 @@ public void testVarCharAllocateNewUsingHelper() throws Exception {
     try (final VarCharVector vector = new VarCharVector("", allocator)) {
       AllocationHelper.allocateNew(vector, count);
 
-      // verify that the validity buffer and value buffer have capacity for atleast 'count' elements.
+      // verify that the validity buffer and value buffer have capacity for at least 'count' elements.
       Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
       Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH);
     }
@@ -294,7 +294,7 @@ public void testLargeVarCharAllocateNewUsingHelper() throws Exception {
     try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
       AllocationHelper.allocateNew(vector, count);
 
-      // verify that the validity buffer and value buffer have capacity for atleast 'count' elements.
+      // verify that the validity buffer and value buffer have capacity for at least 'count' elements.
       Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
       Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH);
     }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
index 11b8d4fadd164..79a4b249a8a89 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
@@ -108,7 +108,7 @@ public void testWriteMessageBufferAligned() throws IOException {
 
     // First message continuation, size, and 2 int values
     assertEquals(MessageSerializer.IPC_CONTINUATION_TOKEN, result.getInt());
-    // mesage length is represented in little endian
+    // message length is represented in little endian
     result.order(ByteOrder.LITTLE_ENDIAN);
     assertEquals(8, result.getInt());
     result.order(ByteOrder.nativeOrder());
@@ -117,7 +117,7 @@ public void testWriteMessageBufferAligned() throws IOException {
 
     // Second message continuation, size, 1 int value and 4 bytes padding
     assertEquals(MessageSerializer.IPC_CONTINUATION_TOKEN, result.getInt());
-    // mesage length is represented in little endian
+    // message length is represented in little endian
     result.order(ByteOrder.LITTLE_ENDIAN);
     assertEquals(8, result.getInt());
     result.order(ByteOrder.nativeOrder());
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
index 1b3d5eee35f88..084350410a4f5 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
@@ -177,7 +177,7 @@ public void testNullCheck() {
             vector.allocateNewSafe();
           }
         });
-    assertTrue(e.getMessage().contains("underlyingVector can not be null."));
+    assertTrue(e.getMessage().contains("underlyingVector cannot be null."));
   }
 
   /**

From a4fae0230693f382d99910273c0c983ea3bc933a Mon Sep 17 00:00:00 2001
From: Laurent Goujon <laurentgo@users.noreply.github.com>
Date: Thu, 14 Dec 2023 06:38:02 -0800
Subject: [PATCH 046/570] GH-39037: [Java] Remove (Contrib/Experimental)
 mention in Flight SQL (#39040)

### Rationale for this change

Considering that Flight SQL has been present for a while and should be fairly stable, remove the `(Contrib/Experimental)` mention from the pom file which also shows up on Maven Central UI pages

### Are these changes tested?

Local test but there's no code change, only cosmetic

### Are there any user-facing changes?

None

* Closes: #39037

Authored-by: Laurent Goujon <laurent@apache.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/flight/flight-sql-jdbc-core/pom.xml   | 2 +-
 java/flight/flight-sql-jdbc-driver/pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml
index cbeaa88f1e2f7..74a2f8d320f37 100644
--- a/java/flight/flight-sql-jdbc-core/pom.xml
+++ b/java/flight/flight-sql-jdbc-core/pom.xml
@@ -23,7 +23,7 @@
 
     <artifactId>flight-sql-jdbc-core</artifactId>
     <name>Arrow Flight SQL JDBC Driver Core</name>
-    <description>(Contrib/Experimental) Core implementation of JDBC driver based on Arrow Flight SQL.</description>
+    <description>Core implementation of JDBC driver based on Arrow Flight SQL.</description>
     <packaging>jar</packaging>
     <url>https://arrow.apache.org</url>
 
diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml
index 84462f54950ba..d4ef1b4ea3b9b 100644
--- a/java/flight/flight-sql-jdbc-driver/pom.xml
+++ b/java/flight/flight-sql-jdbc-driver/pom.xml
@@ -23,7 +23,7 @@
 
     <artifactId>flight-sql-jdbc-driver</artifactId>
     <name>Arrow Flight SQL JDBC Driver</name>
-    <description>(Contrib/Experimental) A JDBC driver based on Arrow Flight SQL.</description>
+    <description>A JDBC driver based on Arrow Flight SQL.</description>
     <packaging>jar</packaging>
     <url>https://arrow.apache.org</url>
 

From 4e58f7ca0016c2b2d8a859a0c5965df3b15523e0 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Thu, 14 Dec 2023 15:25:28 -0300
Subject: [PATCH 047/570] GH-39119: [C++] Refactor the Azure FS tests and
 filesystem class instantiation (#39207)

### Rationale for this change

This PR contains some unrelated improvements (like better docs) and some nitpicky fixes. The test refactoring makes it easier to see on which environments tests run and make them able to be instantiated with different options in the future once we extend the `AzureOptions`.

### What changes are included in this PR?

 - Random cleanups
 - Short namespace aliases
 - Refactoring of the tests (multiple environments, TYPED_TEST_SUITE, explicit preexisting data setup)
 - Cleanup of the `AzureOptions` class

### Are these changes tested?

Yes. I created Azure Storage accounts to test with and without Hierarchical Namespace support. I also ran the tests in a shell without my environment variables to ensure the test-skipping behavior is correct.

### Are there any user-facing changes?

Changes to `AzureOptions`, but since `AzureFileSystem` is not really used yet, these breaking changes won't be a problem.
* Closes: #39119

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc          |  559 ++++----
 cpp/src/arrow/filesystem/azurefs.h           |  150 ++-
 cpp/src/arrow/filesystem/azurefs_internal.cc |    6 +
 cpp/src/arrow/filesystem/azurefs_internal.h  |    3 -
 cpp/src/arrow/filesystem/azurefs_test.cc     | 1269 ++++++++++--------
 5 files changed, 1094 insertions(+), 893 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 824a8fb531483..217885364089b 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -33,42 +33,85 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/string.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Http = Azure::Core::Http;
+namespace Storage = Azure::Storage;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
 AzureOptions::AzureOptions() = default;
 
+AzureOptions::~AzureOptions() = default;
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
-  return (account_dfs_url == other.account_dfs_url &&
-          account_blob_url == other.account_blob_url &&
-          credentials_kind == other.credentials_kind &&
-          default_metadata == other.default_metadata);
+  // TODO(GH-38598): update here when more auth methods are added.
+  const bool equals = backend == other.backend &&
+                      default_metadata == other.default_metadata &&
+                      account_blob_url_ == other.account_blob_url_ &&
+                      account_dfs_url_ == other.account_dfs_url_ &&
+                      credential_kind_ == other.credential_kind_;
+  if (!equals) {
+    return false;
+  }
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      return true;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return storage_shared_key_credential_->AccountName ==
+             other.storage_shared_key_credential_->AccountName;
+  }
+  DCHECK(false);
+  return false;
 }
 
-Status AzureOptions::ConfigureAccountKeyCredentials(const std::string& account_name,
-                                                    const std::string& account_key) {
-  if (this->backend == AzureBackend::Azurite) {
-    account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
-    account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
+                                                   const std::string& account_key) {
+  if (this->backend == AzureBackend::kAzurite) {
+    account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
+    account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
   } else {
-    account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
-    account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+    account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
+    account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
   }
-  storage_credentials_provider =
-      std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
-                                                                   account_key);
-  credentials_kind = AzureCredentialsKind::StorageCredentials;
+  credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
+  storage_shared_key_credential_ =
+      std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
   return Status::OK();
 }
 
+Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceClient()
+    const {
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      break;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+                                                        storage_shared_key_credential_);
+  }
+  return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
+}
+
+Result<std::unique_ptr<DataLake::DataLakeServiceClient>>
+AzureOptions::MakeDataLakeServiceClient() const {
+  switch (credential_kind_) {
+    case CredentialKind::kAnonymous:
+      break;
+    case CredentialKind::kStorageSharedKeyCredential:
+      return std::make_unique<DataLake::DataLakeServiceClient>(
+          account_dfs_url_, storage_shared_key_credential_);
+  }
+  return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
+}
+
 namespace {
 
-// An AzureFileSystem represents a single Azure storage
-// account. AzureLocation describes a container and path within
-// that storage account.
+// An AzureFileSystem represents an Azure storage account. An AzureLocation describes a
+// container in that storage account and a path within that container.
 struct AzureLocation {
   std::string all;
   std::string container;
@@ -82,8 +125,8 @@ struct AzureLocation {
     // path_parts = [testdir, testfile.txt]
     if (internal::IsLikelyUri(string)) {
       return Status::Invalid(
-          "Expected an Azure object location of the form 'container/path...', got a URI: "
-          "'",
+          "Expected an Azure object location of the form 'container/path...',"
+          " got a URI: '",
           string, "'");
     }
     auto first_sep = string.find_first_of(internal::kSep);
@@ -130,14 +173,15 @@ struct AzureLocation {
  private:
   Status Validate() {
     auto status = internal::ValidateAbstractPathParts(path_parts);
-    if (!status.ok()) {
-      return Status::Invalid(status.message(), " in location ", all);
-    } else {
-      return status;
-    }
+    return status.ok() ? status : Status::Invalid(status.message(), " in location ", all);
   }
 };
 
+Status ExceptionToStatus(const std::string& prefix,
+                         const Azure::Storage::StorageException& exception) {
+  return Status::IOError(prefix, " Azure Error: ", exception.what());
+}
+
 Status PathNotFound(const AzureLocation& location) {
   return ::arrow::fs::internal::PathNotFound(location.all);
 }
@@ -153,23 +197,41 @@ Status ValidateFileLocation(const AzureLocation& location) {
   if (location.path.empty()) {
     return NotAFile(location);
   }
-  ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(location.path));
-  return Status::OK();
+  return internal::AssertNoTrailingSlash(location.path);
+}
+
+std::string_view BodyTextView(const Http::RawResponse& raw_response) {
+  const auto& body = raw_response.GetBody();
+#ifndef NDEBUG
+  auto& headers = raw_response.GetHeaders();
+  auto content_type = headers.find("Content-Type");
+  if (content_type != headers.end()) {
+    DCHECK_EQ(std::string_view{content_type->second}.substr(5), "text/");
+  }
+#endif
+  return std::string_view{reinterpret_cast<const char*>(body.data()), body.size()};
 }
 
 Status StatusFromErrorResponse(const std::string& url,
-                               Azure::Core::Http::RawResponse* raw_response,
+                               const Http::RawResponse& raw_response,
                                const std::string& context) {
-  const auto& body = raw_response->GetBody();
   // There isn't an Azure specification that response body on error
   // doesn't contain any binary data but we assume it. We hope that
   // error response body has useful information for the error.
-  std::string_view body_text(reinterpret_cast<const char*>(body.data()), body.size());
-  return Status::IOError(context, ": ", url, ": ", raw_response->GetReasonPhrase(), " (",
-                         static_cast<int>(raw_response->GetStatusCode()),
+  auto body_text = BodyTextView(raw_response);
+  return Status::IOError(context, ": ", url, ": ", raw_response.GetReasonPhrase(), " (",
+                         static_cast<int>(raw_response.GetStatusCode()),
                          "): ", body_text);
 }
 
+bool IsContainerNotFound(const Storage::StorageException& exception) {
+  if (exception.ErrorCode == "ContainerNotFound") {
+    DCHECK_EQ(exception.StatusCode, Http::HttpStatusCode::NotFound);
+    return true;
+  }
+  return false;
+}
+
 template <typename ArrowType>
 std::string FormatValue(typename TypeTraits<ArrowType>::CType value) {
   struct StringAppender {
@@ -185,7 +247,7 @@ std::string FormatValue(typename TypeTraits<ArrowType>::CType value) {
 }
 
 std::shared_ptr<const KeyValueMetadata> PropertiesToMetadata(
-    const Azure::Storage::Blobs::Models::BlobProperties& properties) {
+    const Blobs::Models::BlobProperties& properties) {
   auto metadata = std::make_shared<KeyValueMetadata>();
   // Not supported yet:
   // * properties.ObjectReplicationSourceProperties
@@ -316,7 +378,7 @@ std::shared_ptr<const KeyValueMetadata> PropertiesToMetadata(
 
 class ObjectInputFile final : public io::RandomAccessFile {
  public:
-  ObjectInputFile(std::shared_ptr<Azure::Storage::Blobs::BlobClient> blob_client,
+  ObjectInputFile(std::shared_ptr<Blobs::BlobClient> blob_client,
                   const io::IOContext& io_context, AzureLocation location,
                   int64_t size = kNoSize)
       : blob_client_(std::move(blob_client)),
@@ -334,11 +396,11 @@ class ObjectInputFile final : public io::RandomAccessFile {
       content_length_ = properties.Value.BlobSize;
       metadata_ = PropertiesToMetadata(properties.Value);
       return Status::OK();
-    } catch (const Azure::Storage::StorageException& exception) {
-      if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) {
+    } catch (const Storage::StorageException& exception) {
+      if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
         return PathNotFound(location_);
       }
-      return internal::ExceptionToStatus(
+      return ExceptionToStatus(
           "GetProperties failed for '" + blob_client_->GetUrl() +
               "' with an unexpected Azure error. Cannot initialise an ObjectInputFile "
               "without knowing the file size.",
@@ -411,20 +473,20 @@ class ObjectInputFile final : public io::RandomAccessFile {
     }
 
     // Read the desired range of bytes
-    Azure::Core::Http::HttpRange range{position, nbytes};
-    Azure::Storage::Blobs::DownloadBlobToOptions download_options;
+    Http::HttpRange range{position, nbytes};
+    Storage::Blobs::DownloadBlobToOptions download_options;
     download_options.Range = range;
     try {
       return blob_client_
           ->DownloadTo(reinterpret_cast<uint8_t*>(out), nbytes, download_options)
           .Value.ContentRange.Length.Value();
-    } catch (const Azure::Storage::StorageException& exception) {
-      return internal::ExceptionToStatus("DownloadTo from '" + blob_client_->GetUrl() +
-                                             "' at position " + std::to_string(position) +
-                                             " for " + std::to_string(nbytes) +
-                                             " bytes failed with an Azure error. ReadAt "
-                                             "failed to read the required byte range.",
-                                         exception);
+    } catch (const Storage::StorageException& exception) {
+      return ExceptionToStatus("DownloadTo from '" + blob_client_->GetUrl() +
+                                   "' at position " + std::to_string(position) + " for " +
+                                   std::to_string(nbytes) +
+                                   " bytes failed with an Azure error. ReadAt "
+                                   "failed to read the required byte range.",
+                               exception);
     }
   }
 
@@ -459,7 +521,7 @@ class ObjectInputFile final : public io::RandomAccessFile {
   }
 
  private:
-  std::shared_ptr<Azure::Storage::Blobs::BlobClient> blob_client_;
+  std::shared_ptr<Blobs::BlobClient> blob_client_;
   const io::IOContext io_context_;
   AzureLocation location_;
 
@@ -469,12 +531,11 @@ class ObjectInputFile final : public io::RandomAccessFile {
   std::shared_ptr<const KeyValueMetadata> metadata_;
 };
 
-Status CreateEmptyBlockBlob(
-    std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> block_blob_client) {
+Status CreateEmptyBlockBlob(std::shared_ptr<Blobs::BlockBlobClient> block_blob_client) {
   try {
     block_blob_client->UploadFrom(nullptr, 0);
-  } catch (const Azure::Storage::StorageException& exception) {
-    return internal::ExceptionToStatus(
+  } catch (const Storage::StorageException& exception) {
+    return ExceptionToStatus(
         "UploadFrom failed for '" + block_blob_client->GetUrl() +
             "' with an unexpected Azure error. There is no existing blob at this "
             "location or the existing blob must be replaced so ObjectAppendStream must "
@@ -484,12 +545,12 @@ Status CreateEmptyBlockBlob(
   return Status::OK();
 }
 
-Result<Azure::Storage::Blobs::Models::GetBlockListResult> GetBlockList(
-    std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> block_blob_client) {
+Result<Blobs::Models::GetBlockListResult> GetBlockList(
+    std::shared_ptr<Blobs::BlockBlobClient> block_blob_client) {
   try {
     return block_blob_client->GetBlockList().Value;
-  } catch (Azure::Storage::StorageException& exception) {
-    return internal::ExceptionToStatus(
+  } catch (Storage::StorageException& exception) {
+    return ExceptionToStatus(
         "GetBlockList failed for '" + block_blob_client->GetUrl() +
             "' with an unexpected Azure error. Cannot write to a file without first "
             "fetching the existing block list.",
@@ -497,19 +558,19 @@ Result<Azure::Storage::Blobs::Models::GetBlockListResult> GetBlockList(
   }
 }
 
-Azure::Storage::Metadata ArrowMetadataToAzureMetadata(
+Storage::Metadata ArrowMetadataToAzureMetadata(
     const std::shared_ptr<const KeyValueMetadata>& arrow_metadata) {
-  Azure::Storage::Metadata azure_metadata;
+  Storage::Metadata azure_metadata;
   for (auto key_value : arrow_metadata->sorted_pairs()) {
     azure_metadata[key_value.first] = key_value.second;
   }
   return azure_metadata;
 }
 
-Status CommitBlockList(
-    std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> block_blob_client,
-    const std::vector<std::string>& block_ids, const Azure::Storage::Metadata& metadata) {
-  Azure::Storage::Blobs::CommitBlockListOptions options;
+Status CommitBlockList(std::shared_ptr<Storage::Blobs::BlockBlobClient> block_blob_client,
+                       const std::vector<std::string>& block_ids,
+                       const Storage::Metadata& metadata) {
+  Blobs::CommitBlockListOptions options;
   options.Metadata = metadata;
   try {
     // CommitBlockList puts all block_ids in the latest element. That means in the case of
@@ -517,8 +578,8 @@ Status CommitBlockList(
     // previously committed blocks.
     // https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-list?tabs=microsoft-entra-id#request-body
     block_blob_client->CommitBlockList(block_ids, options);
-  } catch (const Azure::Storage::StorageException& exception) {
-    return internal::ExceptionToStatus(
+  } catch (const Storage::StorageException& exception) {
+    return ExceptionToStatus(
         "CommitBlockList failed for '" + block_blob_client->GetUrl() +
             "' with an unexpected Azure error. Committing is required to flush an "
             "output/append stream.",
@@ -529,11 +590,10 @@ Status CommitBlockList(
 
 class ObjectAppendStream final : public io::OutputStream {
  public:
-  ObjectAppendStream(
-      std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> block_blob_client,
-      const io::IOContext& io_context, const AzureLocation& location,
-      const std::shared_ptr<const KeyValueMetadata>& metadata,
-      const AzureOptions& options, int64_t size = kNoSize)
+  ObjectAppendStream(std::shared_ptr<Blobs::BlockBlobClient> block_blob_client,
+                     const io::IOContext& io_context, const AzureLocation& location,
+                     const std::shared_ptr<const KeyValueMetadata>& metadata,
+                     const AzureOptions& options, int64_t size = kNoSize)
       : block_blob_client_(std::move(block_blob_client)),
         io_context_(io_context),
         location_(location),
@@ -560,11 +620,11 @@ class ObjectAppendStream final : public io::OutputStream {
         auto properties = block_blob_client_->GetProperties();
         content_length_ = properties.Value.BlobSize;
         pos_ = content_length_;
-      } catch (const Azure::Storage::StorageException& exception) {
-        if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) {
+      } catch (const Storage::StorageException& exception) {
+        if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
           RETURN_NOT_OK(CreateEmptyBlockBlob(block_blob_client_));
         } else {
-          return internal::ExceptionToStatus(
+          return ExceptionToStatus(
               "GetProperties failed for '" + block_blob_client_->GetUrl() +
                   "' with an unexpected Azure error. Cannot initialise an "
                   "ObjectAppendStream without knowing whether a file already exists at "
@@ -634,7 +694,7 @@ class ObjectAppendStream final : public io::OutputStream {
                   std::shared_ptr<Buffer> owned_buffer = nullptr) {
     RETURN_NOT_OK(CheckClosed("append"));
     auto append_data = reinterpret_cast<const uint8_t*>(data);
-    Azure::Core::IO::MemoryBodyStream block_content(append_data, nbytes);
+    Core::IO::MemoryBodyStream block_content(append_data, nbytes);
     if (block_content.Length() == 0) {
       return Status::OK();
     }
@@ -657,13 +717,13 @@ class ObjectAppendStream final : public io::OutputStream {
     // if the blob was previously created with one block, with id `00001-arrow` then the
     // next block we append will conflict with that, and cause corruption.
     new_block_id += "-arrow";
-    new_block_id = Azure::Core::Convert::Base64Encode(
+    new_block_id = Core::Convert::Base64Encode(
         std::vector<uint8_t>(new_block_id.begin(), new_block_id.end()));
 
     try {
       block_blob_client_->StageBlock(new_block_id, block_content);
-    } catch (const Azure::Storage::StorageException& exception) {
-      return internal::ExceptionToStatus(
+    } catch (const Storage::StorageException& exception) {
+      return ExceptionToStatus(
           "StageBlock failed for '" + block_blob_client_->GetUrl() + "' new_block_id: '" +
               new_block_id +
               "' with an unexpected Azure error. Staging new blocks is fundamental to "
@@ -676,7 +736,7 @@ class ObjectAppendStream final : public io::OutputStream {
     return Status::OK();
   }
 
-  std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> block_blob_client_;
+  std::shared_ptr<Blobs::BlockBlobClient> block_blob_client_;
   const io::IOContext io_context_;
   const AzureLocation location_;
 
@@ -684,7 +744,7 @@ class ObjectAppendStream final : public io::OutputStream {
   int64_t pos_ = 0;
   int64_t content_length_ = kNoSize;
   std::vector<std::string> block_ids_;
-  Azure::Storage::Metadata metadata_;
+  Storage::Metadata metadata_;
 };
 
 }  // namespace
@@ -693,27 +753,31 @@ class ObjectAppendStream final : public io::OutputStream {
 // AzureFilesystem Implementation
 
 class AzureFileSystem::Impl {
- public:
+ private:
   io::IOContext io_context_;
-  std::unique_ptr<Azure::Storage::Files::DataLake::DataLakeServiceClient>
-      datalake_service_client_;
-  std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient> blob_service_client_;
   AzureOptions options_;
-  internal::HierarchicalNamespaceDetector hierarchical_namespace_;
 
-  explicit Impl(AzureOptions options, io::IOContext io_context)
-      : io_context_(io_context), options_(std::move(options)) {}
+  std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
+  std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
+  internal::HierarchicalNamespaceDetector hns_detector_;
 
-  Status Init() {
-    blob_service_client_ = std::make_unique<Azure::Storage::Blobs::BlobServiceClient>(
-        options_.account_blob_url, options_.storage_credentials_provider);
-    datalake_service_client_ =
-        std::make_unique<Azure::Storage::Files::DataLake::DataLakeServiceClient>(
-            options_.account_dfs_url, options_.storage_credentials_provider);
-    RETURN_NOT_OK(hierarchical_namespace_.Init(datalake_service_client_.get()));
-    return Status::OK();
-  }
+  Impl(AzureOptions options, io::IOContext io_context)
+      : io_context_(std::move(io_context)), options_(std::move(options)) {}
 
+ public:
+  static Result<std::unique_ptr<AzureFileSystem::Impl>> Make(AzureOptions options,
+                                                             io::IOContext io_context) {
+    auto self = std::unique_ptr<AzureFileSystem::Impl>(
+        new AzureFileSystem::Impl(std::move(options), std::move(io_context)));
+    ARROW_ASSIGN_OR_RAISE(self->blob_service_client_,
+                          self->options_.MakeBlobServiceClient());
+    ARROW_ASSIGN_OR_RAISE(self->datalake_service_client_,
+                          self->options_.MakeDataLakeServiceClient());
+    RETURN_NOT_OK(self->hns_detector_.Init(self->datalake_service_client_.get()));
+    return self;
+  }
+
+  io::IOContext& io_context() { return io_context_; }
   const AzureOptions& options() const { return options_; }
 
  public:
@@ -722,12 +786,10 @@ class AzureFileSystem::Impl {
     info.set_path(location.all);
 
     if (location.container.empty()) {
-      // The location is invalid if the container is empty but not
-      // path.
+      // The location is invalid if the container is empty but the path is not.
       DCHECK(location.path.empty());
-      // The location must refer to the root of the Azure storage
-      // account. This is a directory, and there isn't any extra
-      // metadata to fetch.
+      // This location must be derived from the root path. FileInfo should describe it
+      // as a directory and there isn't any extra metadata to fetch.
       info.set_type(FileType::Directory);
       return info;
     }
@@ -739,20 +801,22 @@ class AzureFileSystem::Impl {
         auto properties = container_client.GetProperties();
         info.set_type(FileType::Directory);
         info.set_mtime(
-            std::chrono::system_clock::time_point(properties.Value.LastModified));
+            std::chrono::system_clock::time_point{properties.Value.LastModified});
         return info;
-      } catch (const Azure::Storage::StorageException& exception) {
-        if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) {
+      } catch (const Storage::StorageException& exception) {
+        if (IsContainerNotFound(exception)) {
           info.set_type(FileType::NotFound);
           return info;
         }
-        return internal::ExceptionToStatus(
+        return ExceptionToStatus(
             "GetProperties for '" + container_client.GetUrl() +
                 "' failed with an unexpected Azure error. GetFileInfo is unable to "
                 "determine whether the container exists.",
             exception);
       }
     }
+
+    // There is a path to search within the container.
     auto file_client = datalake_service_client_->GetFileSystemClient(location.container)
                            .GetFileClient(location.path);
     try {
@@ -763,6 +827,8 @@ class AzureFileSystem::Impl {
         // For a path with a trailing slash a hierarchical namespace may return a blob
         // with that trailing slash removed. For consistency with flat namespace and
         // other filesystems we chose to return NotFound.
+        //
+        // NOTE(felipecrv): could this be an empty directory marker?
         info.set_type(FileType::NotFound);
         return info;
       } else {
@@ -770,12 +836,12 @@ class AzureFileSystem::Impl {
         info.set_size(properties.Value.FileSize);
       }
       info.set_mtime(
-          std::chrono::system_clock::time_point(properties.Value.LastModified));
+          std::chrono::system_clock::time_point{properties.Value.LastModified});
       return info;
-    } catch (const Azure::Storage::StorageException& exception) {
-      if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) {
+    } catch (const Storage::StorageException& exception) {
+      if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
         ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
-                              hierarchical_namespace_.Enabled(location.container));
+                              hns_detector_.Enabled(location.container));
         if (hierarchical_namespace_enabled) {
           // If the hierarchical namespace is enabled, then the storage account will have
           // explicit directories. Neither a file nor a directory was found.
@@ -784,12 +850,10 @@ class AzureFileSystem::Impl {
         }
         // On flat namespace accounts there are no real directories. Directories are only
         // implied by using `/` in the blob name.
-        Azure::Storage::Blobs::ListBlobsOptions list_blob_options;
-
+        Blobs::ListBlobsOptions list_blob_options;
         // If listing the prefix `path.path_to_file` with trailing slash returns at least
         // one result then `path` refers to an implied directory.
-        auto prefix = internal::EnsureTrailingSlash(location.path);
-        list_blob_options.Prefix = prefix;
+        list_blob_options.Prefix = internal::EnsureTrailingSlash(location.path);
         // We only need to know if there is at least one result, so minimise page size
         // for efficiency.
         list_blob_options.PageSizeHint = 1;
@@ -798,21 +862,19 @@ class AzureFileSystem::Impl {
           auto paged_list_result =
               blob_service_client_->GetBlobContainerClient(location.container)
                   .ListBlobs(list_blob_options);
-          if (paged_list_result.Blobs.size() > 0) {
-            info.set_type(FileType::Directory);
-          } else {
-            info.set_type(FileType::NotFound);
-          }
+          auto file_type = paged_list_result.Blobs.size() > 0 ? FileType::Directory
+                                                              : FileType::NotFound;
+          info.set_type(file_type);
           return info;
-        } catch (const Azure::Storage::StorageException& exception) {
-          return internal::ExceptionToStatus(
-              "ListBlobs for '" + prefix +
+        } catch (const Storage::StorageException& exception) {
+          return ExceptionToStatus(
+              "ListBlobs for '" + *list_blob_options.Prefix +
                   "' failed with an unexpected Azure error. GetFileInfo is unable to "
                   "determine whether the path should be considered an implied directory.",
               exception);
         }
       }
-      return internal::ExceptionToStatus(
+      return ExceptionToStatus(
           "GetProperties for '" + file_client.GetUrl() +
               "' failed with an unexpected "
               "Azure error. GetFileInfo is unable to determine whether the path exists.",
@@ -822,9 +884,8 @@ class AzureFileSystem::Impl {
 
  private:
   template <typename OnContainer>
-  Status VisitContainers(const Azure::Core::Context& context,
-                         OnContainer&& on_container) const {
-    Azure::Storage::Blobs::ListBlobContainersOptions options;
+  Status VisitContainers(const Core::Context& context, OnContainer&& on_container) const {
+    Blobs::ListBlobContainersOptions options;
     try {
       auto container_list_response =
           blob_service_client_->ListBlobContainers(options, context);
@@ -834,14 +895,14 @@ class AzureFileSystem::Impl {
           RETURN_NOT_OK(on_container(container));
         }
       }
-    } catch (const Azure::Storage::StorageException& exception) {
-      return internal::ExceptionToStatus("Failed to list account containers.", exception);
+    } catch (const Storage::StorageException& exception) {
+      return ExceptionToStatus("Failed to list account containers.", exception);
     }
     return Status::OK();
   }
 
-  static FileInfo FileInfoFromBlob(const std::string& container,
-                                   const Azure::Storage::Blobs::Models::BlobItem& blob) {
+  static FileInfo FileInfoFromBlob(std::string_view container,
+                                   const Blobs::Models::BlobItem& blob) {
     auto path = internal::ConcatAbstractPath(container, blob.Name);
     if (internal::HasTrailingSlash(blob.Name)) {
       return DirectoryFileInfoFromPath(path);
@@ -852,7 +913,7 @@ class AzureFileSystem::Impl {
     return info;
   }
 
-  static FileInfo DirectoryFileInfoFromPath(const std::string& path) {
+  static FileInfo DirectoryFileInfoFromPath(std::string_view path) {
     return FileInfo{std::string{internal::RemoveTrailingSlash(path)},
                     FileType::Directory};
   }
@@ -870,13 +931,13 @@ class AzureFileSystem::Impl {
   /// \pre container_client is the client for the container named like the first
   /// segment of select.base_dir.
   Status GetFileInfoWithSelectorFromContainer(
-      const Azure::Storage::Blobs::BlobContainerClient& container_client,
-      const Azure::Core::Context& context, Azure::Nullable<int32_t> page_size_hint,
-      const FileSelector& select, FileInfoVector* acc_results) {
+      const Blobs::BlobContainerClient& container_client, const Core::Context& context,
+      Azure::Nullable<int32_t> page_size_hint, const FileSelector& select,
+      FileInfoVector* acc_results) {
     ARROW_ASSIGN_OR_RAISE(auto base_location, AzureLocation::FromString(select.base_dir));
 
     bool found = false;
-    Azure::Storage::Blobs::ListBlobsOptions options;
+    Blobs::ListBlobsOptions options;
     if (internal::IsEmptyPath(base_location.path)) {
       // If the base_dir is the root of the container, then we want to list all blobs in
       // the container and the Prefix should be empty and not even include the trailing
@@ -887,7 +948,7 @@ class AzureFileSystem::Impl {
       options.Prefix = internal::EnsureTrailingSlash(base_location.path);
     }
     options.PageSizeHint = page_size_hint;
-    options.Include = Azure::Storage::Blobs::Models::ListBlobsIncludeFlags::Metadata;
+    options.Include = Blobs::Models::ListBlobsIncludeFlags::Metadata;
 
     auto recurse = [&](const std::string& blob_prefix) noexcept -> Status {
       if (select.recursive && select.max_recursion > 0) {
@@ -903,15 +964,14 @@ class AzureFileSystem::Impl {
       return Status::OK();
     };
 
-    auto process_blob =
-        [&](const Azure::Storage::Blobs::Models::BlobItem& blob) noexcept {
-          // blob.Name has trailing slash only when Prefix is an empty
-          // directory marker blob for the directory we're listing
-          // from, and we should skip it.
-          if (!internal::HasTrailingSlash(blob.Name)) {
-            acc_results->push_back(FileInfoFromBlob(base_location.container, blob));
-          }
-        };
+    auto process_blob = [&](const Blobs::Models::BlobItem& blob) noexcept {
+      // blob.Name has trailing slash only when Prefix is an empty
+      // directory marker blob for the directory we're listing
+      // from, and we should skip it.
+      if (!internal::HasTrailingSlash(blob.Name)) {
+        acc_results->push_back(FileInfoFromBlob(base_location.container, blob));
+      }
+    };
     auto process_prefix = [&](const std::string& prefix) noexcept -> Status {
       const auto path = internal::ConcatAbstractPath(base_location.container, prefix);
       acc_results->push_back(DirectoryFileInfoFromPath(path));
@@ -964,14 +1024,13 @@ class AzureFileSystem::Impl {
           RETURN_NOT_OK(process_prefix(list_response.BlobPrefixes[blob_prefix_index]));
         }
       }
-    } catch (const Azure::Storage::StorageException& exception) {
-      if (exception.ErrorCode == "ContainerNotFound") {
+    } catch (const Storage::StorageException& exception) {
+      if (IsContainerNotFound(exception)) {
         found = false;
       } else {
-        return internal::ExceptionToStatus(
-            "Failed to list blobs in a directory: " + select.base_dir + ": " +
-                container_client.GetUrl(),
-            exception);
+        return ExceptionToStatus("Failed to list blobs in a directory: " +
+                                     select.base_dir + ": " + container_client.GetUrl(),
+                                 exception);
       }
     }
 
@@ -981,7 +1040,7 @@ class AzureFileSystem::Impl {
   }
 
  public:
-  Status GetFileInfoWithSelector(const Azure::Core::Context& context,
+  Status GetFileInfoWithSelector(const Core::Context& context,
                                  Azure::Nullable<int32_t> page_size_hint,
                                  const FileSelector& select,
                                  FileInfoVector* acc_results) {
@@ -991,29 +1050,28 @@ class AzureFileSystem::Impl {
       // Without a container, the base_location is equivalent to the filesystem
       // root -- `/`. FileSelector::allow_not_found doesn't matter in this case
       // because the root always exists.
-      auto on_container =
-          [&](const Azure::Storage::Blobs::Models::BlobContainerItem& container) {
-            // Deleted containers are not listed by ListContainers.
-            DCHECK(!container.IsDeleted);
-
-            // Every container is considered a directory.
-            FileInfo info{container.Name, FileType::Directory};
-            info.set_mtime(
-                std::chrono::system_clock::time_point{container.Details.LastModified});
-            acc_results->push_back(std::move(info));
-
-            // Recurse into containers (subdirectories) if requested.
-            if (select.recursive && select.max_recursion > 0) {
-              FileSelector sub_select;
-              sub_select.base_dir = container.Name;
-              sub_select.allow_not_found = true;
-              sub_select.recursive = true;
-              sub_select.max_recursion = select.max_recursion - 1;
-              ARROW_RETURN_NOT_OK(GetFileInfoWithSelector(context, page_size_hint,
-                                                          sub_select, acc_results));
-            }
-            return Status::OK();
-          };
+      auto on_container = [&](const Blobs::Models::BlobContainerItem& container) {
+        // Deleted containers are not listed by ListContainers.
+        DCHECK(!container.IsDeleted);
+
+        // Every container is considered a directory.
+        FileInfo info{container.Name, FileType::Directory};
+        info.set_mtime(
+            std::chrono::system_clock::time_point{container.Details.LastModified});
+        acc_results->push_back(std::move(info));
+
+        // Recurse into containers (subdirectories) if requested.
+        if (select.recursive && select.max_recursion > 0) {
+          FileSelector sub_select;
+          sub_select.base_dir = container.Name;
+          sub_select.allow_not_found = true;
+          sub_select.recursive = true;
+          sub_select.max_recursion = select.max_recursion - 1;
+          ARROW_RETURN_NOT_OK(
+              GetFileInfoWithSelector(context, page_size_hint, sub_select, acc_results));
+        }
+        return Status::OK();
+      };
       return VisitContainers(context, std::move(on_container));
     }
 
@@ -1026,7 +1084,7 @@ class AzureFileSystem::Impl {
   Result<std::shared_ptr<ObjectInputFile>> OpenInputFile(const AzureLocation& location,
                                                          AzureFileSystem* fs) {
     RETURN_NOT_OK(ValidateFileLocation(location));
-    auto blob_client = std::make_shared<Azure::Storage::Blobs::BlobClient>(
+    auto blob_client = std::make_shared<Blobs::BlobClient>(
         blob_service_client_->GetBlobContainerClient(location.container)
             .GetBlobClient(location.path));
 
@@ -1046,7 +1104,7 @@ class AzureFileSystem::Impl {
     }
     ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(info.path()));
     RETURN_NOT_OK(ValidateFileLocation(location));
-    auto blob_client = std::make_shared<Azure::Storage::Blobs::BlobClient>(
+    auto blob_client = std::make_shared<Blobs::BlobClient>(
         blob_service_client_->GetBlobContainerClient(location.container)
             .GetBlobClient(location.path));
 
@@ -1070,19 +1128,18 @@ class AzureFileSystem::Impl {
           return Status::OK();
         } else {
           return StatusFromErrorResponse(
-              container_client.GetUrl(), response.RawResponse.get(),
+              container_client.GetUrl(), *response.RawResponse,
               "Failed to create a container: " + location.container);
         }
-      } catch (const Azure::Storage::StorageException& exception) {
-        return internal::ExceptionToStatus(
-            "Failed to create a container: " + location.container + ": " +
-                container_client.GetUrl(),
-            exception);
+      } catch (const Storage::StorageException& exception) {
+        return ExceptionToStatus("Failed to create a container: " + location.container +
+                                     ": " + container_client.GetUrl(),
+                                 exception);
       }
     }
 
     ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
-                          hierarchical_namespace_.Enabled(location.container));
+                          hns_detector_.Enabled(location.container));
     if (!hierarchical_namespace_enabled) {
       // Without hierarchical namespace enabled Azure blob storage has no directories.
       // Therefore we can't, and don't need to create one. Simply creating a blob with `/`
@@ -1098,15 +1155,13 @@ class AzureFileSystem::Impl {
       if (response.Value.Created) {
         return Status::OK();
       } else {
-        return StatusFromErrorResponse(directory_client.GetUrl(),
-                                       response.RawResponse.get(),
+        return StatusFromErrorResponse(directory_client.GetUrl(), *response.RawResponse,
                                        "Failed to create a directory: " + location.path);
       }
-    } catch (const Azure::Storage::StorageException& exception) {
-      return internal::ExceptionToStatus(
-          "Failed to create a directory: " + location.path + ": " +
-              directory_client.GetUrl(),
-          exception);
+    } catch (const Storage::StorageException& exception) {
+      return ExceptionToStatus("Failed to create a directory: " + location.path + ": " +
+                                   directory_client.GetUrl(),
+                               exception);
     }
   }
 
@@ -1119,15 +1174,14 @@ class AzureFileSystem::Impl {
         blob_service_client_->GetBlobContainerClient(location.container);
     try {
       container_client.CreateIfNotExists();
-    } catch (const Azure::Storage::StorageException& exception) {
-      return internal::ExceptionToStatus(
-          "Failed to create a container: " + location.container + " (" +
-              container_client.GetUrl() + ")",
-          exception);
+    } catch (const Storage::StorageException& exception) {
+      return ExceptionToStatus("Failed to create a container: " + location.container +
+                                   " (" + container_client.GetUrl() + ")",
+                               exception);
     }
 
     ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
-                          hierarchical_namespace_.Enabled(location.container));
+                          hns_detector_.Enabled(location.container));
     if (!hierarchical_namespace_enabled) {
       // Without hierarchical namespace enabled Azure blob storage has no directories.
       // Therefore we can't, and don't need to create one. Simply creating a blob with `/`
@@ -1141,11 +1195,10 @@ class AzureFileSystem::Impl {
               .GetDirectoryClient(location.path);
       try {
         directory_client.CreateIfNotExists();
-      } catch (const Azure::Storage::StorageException& exception) {
-        return internal::ExceptionToStatus(
-            "Failed to create a directory: " + location.path + " (" +
-                directory_client.GetUrl() + ")",
-            exception);
+      } catch (const Storage::StorageException& exception) {
+        return ExceptionToStatus("Failed to create a directory: " + location.path + " (" +
+                                     directory_client.GetUrl() + ")",
+                                 exception);
       }
     }
 
@@ -1158,7 +1211,7 @@ class AzureFileSystem::Impl {
       AzureFileSystem* fs) {
     RETURN_NOT_OK(ValidateFileLocation(location));
 
-    auto block_blob_client = std::make_shared<Azure::Storage::Blobs::BlockBlobClient>(
+    auto block_blob_client = std::make_shared<Blobs::BlockBlobClient>(
         blob_service_client_->GetBlobContainerClient(location.container)
             .GetBlockBlobClient(location.path));
 
@@ -1180,7 +1233,7 @@ class AzureFileSystem::Impl {
                                                        bool missing_dir_ok) {
     auto container_client =
         blob_service_client_->GetBlobContainerClient(location.container);
-    Azure::Storage::Blobs::ListBlobsOptions options;
+    Blobs::ListBlobsOptions options;
     if (!location.path.empty()) {
       options.Prefix = internal::EnsureTrailingSlash(location.path);
     }
@@ -1200,19 +1253,17 @@ class AzureFileSystem::Impl {
           continue;
         }
         auto batch = container_client.CreateBatch();
-        std::vector<Azure::Storage::DeferredResponse<
-            Azure::Storage::Blobs::Models::DeleteBlobResult>>
+        std::vector<Storage::DeferredResponse<Blobs::Models::DeleteBlobResult>>
             deferred_responses;
         for (const auto& blob_item : list_response.Blobs) {
           deferred_responses.push_back(batch.DeleteBlob(blob_item.Name));
         }
         try {
           container_client.SubmitBatch(batch);
-        } catch (const Azure::Storage::StorageException& exception) {
-          return internal::ExceptionToStatus(
-              "Failed to delete blobs in a directory: " + location.path + ": " +
-                  container_client.GetUrl(),
-              exception);
+        } catch (const Storage::StorageException& exception) {
+          return ExceptionToStatus("Failed to delete blobs in a directory: " +
+                                       location.path + ": " + container_client.GetUrl(),
+                                   exception);
         }
         std::vector<std::string> failed_blob_names;
         for (size_t i = 0; i < deferred_responses.size(); ++i) {
@@ -1221,7 +1272,7 @@ class AzureFileSystem::Impl {
           try {
             auto delete_result = deferred_response.GetResponse();
             success = delete_result.Value.Deleted;
-          } catch (const Azure::Storage::StorageException& exception) {
+          } catch (const Storage::StorageException& exception) {
             success = false;
           }
           if (!success) {
@@ -1240,11 +1291,10 @@ class AzureFileSystem::Impl {
           }
         }
       }
-    } catch (const Azure::Storage::StorageException& exception) {
-      return internal::ExceptionToStatus(
-          "Failed to list blobs in a directory: " + location.path + ": " +
-              container_client.GetUrl(),
-          exception);
+    } catch (const Storage::StorageException& exception) {
+      return ExceptionToStatus("Failed to list blobs in a directory: " + location.path +
+                                   ": " + container_client.GetUrl(),
+                               exception);
     }
     return Status::OK();
   }
@@ -1264,19 +1314,18 @@ class AzureFileSystem::Impl {
           return Status::OK();
         } else {
           return StatusFromErrorResponse(
-              container_client.GetUrl(), response.RawResponse.get(),
+              container_client.GetUrl(), *response.RawResponse,
               "Failed to delete a container: " + location.container);
         }
-      } catch (const Azure::Storage::StorageException& exception) {
-        return internal::ExceptionToStatus(
-            "Failed to delete a container: " + location.container + ": " +
-                container_client.GetUrl(),
-            exception);
+      } catch (const Storage::StorageException& exception) {
+        return ExceptionToStatus("Failed to delete a container: " + location.container +
+                                     ": " + container_client.GetUrl(),
+                                 exception);
       }
     }
 
     ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
-                          hierarchical_namespace_.Enabled(location.container));
+                          hns_detector_.Enabled(location.container));
     if (hierarchical_namespace_enabled) {
       auto directory_client =
           datalake_service_client_->GetFileSystemClient(location.container)
@@ -1287,14 +1336,13 @@ class AzureFileSystem::Impl {
           return Status::OK();
         } else {
           return StatusFromErrorResponse(
-              directory_client.GetUrl(), response.RawResponse.get(),
+              directory_client.GetUrl(), *response.RawResponse,
               "Failed to delete a directory: " + location.path);
         }
-      } catch (const Azure::Storage::StorageException& exception) {
-        return internal::ExceptionToStatus(
-            "Failed to delete a directory: " + location.path + ": " +
-                directory_client.GetUrl(),
-            exception);
+      } catch (const Storage::StorageException& exception) {
+        return ExceptionToStatus("Failed to delete a directory: " + location.path + ": " +
+                                     directory_client.GetUrl(),
+                                 exception);
       }
     } else {
       return DeleteDirContentsWithoutHierarchicalNamespace(location,
@@ -1308,7 +1356,7 @@ class AzureFileSystem::Impl {
     }
 
     ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
-                          hierarchical_namespace_.Enabled(location.container));
+                          hns_detector_.Enabled(location.container));
     if (hierarchical_namespace_enabled) {
       auto file_system_client =
           datalake_service_client_->GetFileSystemClient(location.container);
@@ -1322,8 +1370,8 @@ class AzureFileSystem::Impl {
                   file_system_client.GetDirectoryClient(path.Name);
               try {
                 sub_directory_client.DeleteRecursive();
-              } catch (const Azure::Storage::StorageException& exception) {
-                return internal::ExceptionToStatus(
+              } catch (const Storage::StorageException& exception) {
+                return ExceptionToStatus(
                     "Failed to delete a sub directory: " + location.container +
                         internal::kSep + path.Name + ": " + sub_directory_client.GetUrl(),
                     exception);
@@ -1332,8 +1380,8 @@ class AzureFileSystem::Impl {
               auto sub_file_client = file_system_client.GetFileClient(path.Name);
               try {
                 sub_file_client.Delete();
-              } catch (const Azure::Storage::StorageException& exception) {
-                return internal::ExceptionToStatus(
+              } catch (const Storage::StorageException& exception) {
+                return ExceptionToStatus(
                     "Failed to delete a sub file: " + location.container +
                         internal::kSep + path.Name + ": " + sub_file_client.GetUrl(),
                     exception);
@@ -1341,15 +1389,13 @@ class AzureFileSystem::Impl {
             }
           }
         }
-      } catch (const Azure::Storage::StorageException& exception) {
-        if (missing_dir_ok &&
-            exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) {
+      } catch (const Storage::StorageException& exception) {
+        if (missing_dir_ok && exception.StatusCode == Http::HttpStatusCode::NotFound) {
           return Status::OK();
         } else {
-          return internal::ExceptionToStatus(
-              "Failed to delete directory contents: " + location.path + ": " +
-                  directory_client.GetUrl(),
-              exception);
+          return ExceptionToStatus("Failed to delete directory contents: " +
+                                       location.path + ": " + directory_client.GetUrl(),
+                                   exception);
         }
       }
       return Status::OK();
@@ -1371,8 +1417,8 @@ class AzureFileSystem::Impl {
                        .GetUrl();
     try {
       dest_blob_client.CopyFromUri(src_url);
-    } catch (const Azure::Storage::StorageException& exception) {
-      return internal::ExceptionToStatus(
+    } catch (const Storage::StorageException& exception) {
+      return ExceptionToStatus(
           "Failed to copy a blob. (" + src_url + " -> " + dest_blob_client.GetUrl() + ")",
           exception);
     }
@@ -1380,6 +1426,17 @@ class AzureFileSystem::Impl {
   }
 };
 
+AzureFileSystem::AzureFileSystem(std::unique_ptr<Impl>&& impl)
+    : FileSystem(impl->io_context()), impl_(std::move(impl)) {
+  default_async_is_sync_ = false;
+}
+
+Result<std::shared_ptr<AzureFileSystem>> AzureFileSystem::Make(
+    const AzureOptions& options, const io::IOContext& io_context) {
+  ARROW_ASSIGN_OR_RAISE(auto impl, AzureFileSystem::Impl::Make(options, io_context));
+  return std::shared_ptr<AzureFileSystem>(new AzureFileSystem(std::move(impl)));
+}
+
 const AzureOptions& AzureFileSystem::options() const { return impl_->options(); }
 
 bool AzureFileSystem::Equals(const FileSystem& other) const {
@@ -1399,7 +1456,7 @@ Result<FileInfo> AzureFileSystem::GetFileInfo(const std::string& path) {
 }
 
 Result<FileInfoVector> AzureFileSystem::GetFileInfo(const FileSelector& select) {
-  Azure::Core::Context context;
+  Core::Context context;
   Azure::Nullable<int32_t> page_size_hint;  // unspecified
   FileInfoVector results;
   RETURN_NOT_OK(
@@ -1478,18 +1535,4 @@ Result<std::shared_ptr<io::OutputStream>> AzureFileSystem::OpenAppendStream(
   return impl_->OpenAppendStream(location, metadata, false, this);
 }
 
-Result<std::shared_ptr<AzureFileSystem>> AzureFileSystem::Make(
-    const AzureOptions& options, const io::IOContext& io_context) {
-  std::shared_ptr<AzureFileSystem> ptr(new AzureFileSystem(options, io_context));
-  RETURN_NOT_OK(ptr->impl_->Init());
-  return ptr;
-}
-
-AzureFileSystem::AzureFileSystem(const AzureOptions& options,
-                                 const io::IOContext& io_context)
-    : FileSystem(io_context), impl_(std::make_unique<Impl>(options, io_context)) {
-  default_async_is_sync_ = false;
-}
-
-}  // namespace fs
-}  // namespace arrow
+}  // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index b2865b059ef6e..1266aa2d02b86 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -25,90 +25,118 @@
 #include "arrow/util/macros.h"
 #include "arrow/util/uri.h"
 
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
 class TokenCredential;
+}
 
-}  // namespace Credentials
-}  // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
 class StorageSharedKeyCredential;
+}
 
-}  // namespace Storage
-}  // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
-  /// Anonymous access (no credentials used), public
-  Anonymous,
-  /// Use explicitly-provided access key pair
-  StorageCredentials,
-  /// Use ServicePrincipleCredentials
-  ServicePrincipleCredentials,
-  /// Use Sas Token to authenticate
-  Sas,
-  /// Use Connection String
-  ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
 
-enum class AzureBackend : bool {
-  /// Official Azure Remote Backend
-  Azure,
-  /// Local Simulated Storage
-  Azurite
+namespace arrow::fs {
+
+enum class AzureBackend {
+  /// \brief Official Azure Remote Backend
+  kAzure,
+  /// \brief Local Simulated Storage
+  kAzurite
 };
 
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
-  std::string account_dfs_url;
-  std::string account_blob_url;
-  AzureBackend backend = AzureBackend::Azure;
-  AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
+  /// \brief The backend to connect to: Azure or Azurite (for testing).
+  AzureBackend backend = AzureBackend::kAzure;
 
-  std::string sas_token;
-  std::string connection_string;
-  std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
-      storage_credentials_provider;
-  std::shared_ptr<Azure::Core::Credentials::TokenCredential>
-      service_principle_credentials_provider;
+  // TODO(GH-38598): Add support for more auth methods.
+  // std::string connection_string;
+  // std::string sas_token;
 
   /// \brief Default metadata for OpenOutputStream.
   ///
   /// This will be ignored if non-empty metadata is passed to OpenOutputStream.
   std::shared_ptr<const KeyValueMetadata> default_metadata;
 
+ private:
+  std::string account_blob_url_;
+  std::string account_dfs_url_;
+
+  enum class CredentialKind {
+    kAnonymous,
+    kStorageSharedKeyCredential,
+  } credential_kind_ = CredentialKind::kAnonymous;
+
+  std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
+      storage_shared_key_credential_;
+
+ public:
   AzureOptions();
+  ~AzureOptions();
 
-  Status ConfigureAccountKeyCredentials(const std::string& account_name,
-                                        const std::string& account_key);
+  Status ConfigureAccountKeyCredential(const std::string& account_name,
+                                       const std::string& account_key);
 
   bool Equals(const AzureOptions& other) const;
+
+  const std::string& AccountBlobUrl() const { return account_blob_url_; }
+  const std::string& AccountDfsUrl() const { return account_dfs_url_; }
+
+  Result<std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>>
+  MakeBlobServiceClient() const;
+
+  Result<std::unique_ptr<Azure::Storage::Files::DataLake::DataLakeServiceClient>>
+  MakeDataLakeServiceClient() const;
 };
 
-/// \brief Azure-backed FileSystem implementation for ABFS and ADLS.
+/// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and
+/// Azure Data Lake Storage Gen2 (ADLS Gen2) [2].
+///
+/// ADLS Gen2 isn't a dedicated service or account type. It's a set of capabilities that
+/// support high throughput analytic workloads, built on Azure Blob Storage. All the data
+/// ingested via the ADLS Gen2 APIs is persisted as blobs in the storage account.
+/// ADLS Gen2 provides filesystem semantics, file-level security, and Hadoop
+/// compatibility. ADLS Gen1 exists as a separate object that will retired on 2024-02-29
+/// and new ADLS accounts use Gen2 instead.
 ///
-/// ABFS (Azure Blob Storage - https://azure.microsoft.com/en-us/products/storage/blobs/)
-/// object-based cloud storage system.
+/// ADLS Gen2 and Blob APIs can operate on the same data, but there are
+/// some limitations [3]. The ones that are relevant to this
+/// implementation are listed here:
 ///
-/// ADLS (Azure Data Lake Storage -
-/// https://azure.microsoft.com/en-us/products/storage/data-lake-storage/)
-/// is a scalable data storage system designed for big-data applications.
-/// ADLS provides filesystem semantics, file-level security, and Hadoop
-/// compatibility. Gen1 exists as a separate object that will retired
-/// on Feb 29, 2024. New ADLS accounts will use Gen2 instead, which is
-/// implemented on top of ABFS.
+/// - You can't use Blob APIs, and ADLS APIs to write to the same instance of a file. If
+///   you write to a file by using ADLS APIs then that file's blocks won't be visible
+///   to calls to the GetBlockList Blob API. The only exception is when you're
+///   overwriting.
+/// - When you use the ListBlobs operation without specifying a delimiter, the results
+///   include both directories and blobs. If you choose to use a delimiter, use only a
+///   forward slash (/) -- the only supported delimiter.
+/// - If you use the DeleteBlob API to delete a directory, that directory is deleted only
+///   if it's empty. This means that you can't use the Blob API delete directories
+///   recursively.
 ///
-/// TODO: GH-18014 Complete the internal implementation
-/// and review the documentation
+/// [1]: https://azure.microsoft.com/en-us/products/storage/blobs
+/// [2]: https://azure.microsoft.com/en-us/products/storage/data-lake-storage
+/// [3]:
+/// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues
 class ARROW_EXPORT AzureFileSystem : public FileSystem {
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+
+  explicit AzureFileSystem(std::unique_ptr<Impl>&& impl);
+
  public:
   ~AzureFileSystem() override = default;
 
+  static Result<std::shared_ptr<AzureFileSystem>> Make(
+      const AzureOptions& options, const io::IOContext& = io::default_io_context());
+
   std::string type_name() const override { return "abfs"; }
 
   /// Return the original Azure options when constructing the filesystem
@@ -152,16 +180,6 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
       const std::string& path,
       const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
-
-  static Result<std::shared_ptr<AzureFileSystem>> Make(
-      const AzureOptions& options, const io::IOContext& = io::default_io_context());
-
- private:
-  AzureFileSystem(const AzureOptions& options, const io::IOContext& io_context);
-
-  class Impl;
-  std::unique_ptr<Impl> impl_;
 };
 
-}  // namespace fs
-}  // namespace arrow
+}  // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/azurefs_internal.cc b/cpp/src/arrow/filesystem/azurefs_internal.cc
index 3e545d670cb04..39c3fb23e3cfd 100644
--- a/cpp/src/arrow/filesystem/azurefs_internal.cc
+++ b/cpp/src/arrow/filesystem/azurefs_internal.cc
@@ -23,11 +23,17 @@
 
 namespace arrow::fs::internal {
 
+namespace {
+
+// TODO(GH-38772): Remove azurefs_internal.h/.cc by moving the detector to
+// azurefs.cc (which contains a private copy of this helper function already).
 Status ExceptionToStatus(const std::string& prefix,
                          const Azure::Storage::StorageException& exception) {
   return Status::IOError(prefix, " Azure Error: ", exception.what());
 }
 
+}  // namespace
+
 Status HierarchicalNamespaceDetector::Init(
     Azure::Storage::Files::DataLake::DataLakeServiceClient* datalake_service_client) {
   datalake_service_client_ = datalake_service_client;
diff --git a/cpp/src/arrow/filesystem/azurefs_internal.h b/cpp/src/arrow/filesystem/azurefs_internal.h
index c3da96239a18f..92592cf164f5a 100644
--- a/cpp/src/arrow/filesystem/azurefs_internal.h
+++ b/cpp/src/arrow/filesystem/azurefs_internal.h
@@ -25,9 +25,6 @@
 
 namespace arrow::fs::internal {
 
-Status ExceptionToStatus(const std::string& prefix,
-                         const Azure::Storage::StorageException& exception);
-
 class HierarchicalNamespaceDetector {
  public:
   Status Init(
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 792c63b209402..463ff4e8daf3d 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -36,6 +36,7 @@
 #include "arrow/filesystem/azurefs.h"
 #include "arrow/filesystem/azurefs_internal.h"
 
+#include <memory>
 #include <random>
 #include <string>
 
@@ -63,6 +64,7 @@
 namespace arrow {
 using internal::TemporaryDir;
 namespace fs {
+using internal::ConcatAbstractPath;
 namespace {
 namespace bp = boost::process;
 
@@ -71,56 +73,133 @@ using ::testing::Not;
 using ::testing::NotNull;
 
 namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
 
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+  std::string account_name_;
+  std::string account_key_;
+
+  BaseAzureEnv(std::string account_name, std::string account_key)
+      : account_name_(std::move(account_name)), account_key_(std::move(account_key)) {}
 
-class AzuriteEnv : public ::testing::Environment {
  public:
-  AzuriteEnv() {
-    account_name_ = "devstoreaccount1";
-    account_key_ =
-        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
-        "KBHBeksoGMGw==";
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      auto error = std::string("Could not find Azurite emulator.");
-      status_ = Status::Invalid(error);
-      return;
+  const std::string& account_name() const { return account_name_; }
+  const std::string& account_key() const { return account_key_; }
+
+  virtual AzureBackend backend() const = 0;
+
+  virtual bool WithHierarchicalNamespace() const { return false; }
+
+  virtual Result<int64_t> GetDebugLogSize() { return 0; }
+  virtual Status DumpDebugLog(int64_t position) {
+    return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+  }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ private:
+  /// \brief Factory function that registers the singleton instance as a global test
+  /// environment. Must be called only once per implementation (see GetInstance()).
+  ///
+  /// Every BaseAzureEnv implementation defines a static and parameter-less member
+  /// function called Make() that returns a Result<std::unique_ptr<BaseAzureEnv>>.
+  /// This templated function performs the following steps:
+  ///
+  /// 1) Calls AzureEnvClass::Make() to get an instance of AzureEnvClass.
+  /// 2) Passes ownership of the AzureEnvClass instance to the testing environment.
+  /// 3) Returns a Result<BaseAzureEnv*> wrapping the raw heap-allocated pointer.
+  static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+    ARROW_ASSIGN_OR_RAISE(auto env, AzureEnvClass::Make());
+    auto* heap_ptr = env.release();
+    ::testing::AddGlobalTestEnvironment(heap_ptr);
+    return heap_ptr;
+  }
+
+ protected:
+  using BaseAzureEnv::BaseAzureEnv;
+
+  /// \brief Create an AzureEnvClass instance from environment variables.
+  ///
+  /// Reads the account name and key from the environment variables. This can be
+  /// used in BaseAzureEnv implementations that don't need to do any additional
+  /// setup to create the singleton instance (e.g. AzureFlatNSEnv,
+  /// AzureHierarchicalNSEnv).
+  static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+      const std::string& account_name_var, const std::string& account_key_var) {
+    const auto account_name = std::getenv(account_name_var.c_str());
+    const auto account_key = std::getenv(account_key_var.c_str());
+    if (!account_name && !account_key) {
+      return Status::Cancelled(account_name_var + " and " + account_key_var +
+                               " are not set. Skipping tests.");
     }
-    auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
-    auto debug_log_path_result = temp_dir_->path().Join("debug.log");
-    if (!debug_log_path_result.ok()) {
-      status_ = debug_log_path_result.status();
-      return;
+    // If only one of the variables is set. Don't cancel tests,
+    // fail with a Status::Invalid.
+    if (!account_name) {
+      return Status::Invalid(account_name_var + " not set while " + account_key_var +
+                             " is set.");
     }
-    debug_log_path_ = *debug_log_path_result;
-    server_process_ =
-        bp::child(boost::this_process::environment(), exe_path, "--silent", "--location",
-                  temp_dir_->path().ToString(), "--debug", debug_log_path_.ToString());
-    if (!(server_process_.valid() && server_process_.running())) {
-      auto error = "Could not start Azurite emulator.";
-      server_process_.terminate();
-      server_process_.wait();
-      status_ = Status::Invalid(error);
-      return;
+    if (!account_key) {
+      return Status::Invalid(account_key_var + " not set while " + account_name_var +
+                             " is set.");
     }
-    status_ = Status::OK();
+    return std::unique_ptr<AzureEnvClass>{new AzureEnvClass(account_name, account_key)};
   }
 
+ public:
+  static Result<BaseAzureEnv*> GetInstance() {
+    // Ensure MakeAndAddToGlobalTestEnvironment() is called only once by storing the
+    // Result<BaseAzureEnv*> in a static variable.
+    static auto singleton_env = MakeAndAddToGlobalTestEnvironment();
+    return singleton_env;
+  }
+
+  AzureBackend backend() const final { return AzureEnvClass::kBackend; }
+};
+
+class AzuriteEnv : public AzureEnvImpl<AzuriteEnv> {
+ private:
+  std::unique_ptr<TemporaryDir> temp_dir_;
+  arrow::internal::PlatformFilename debug_log_path_;
+  bp::child server_process_;
+
+  using AzureEnvImpl::AzureEnvImpl;
+
+ public:
+  static const AzureBackend kBackend = AzureBackend::kAzurite;
+
   ~AzuriteEnv() override {
     server_process_.terminate();
     server_process_.wait();
   }
 
-  Result<int64_t> GetDebugLogSize() {
+  static Result<std::unique_ptr<AzureEnvImpl>> Make() {
+    auto self = std::unique_ptr<AzuriteEnv>(
+        new AzuriteEnv("devstoreaccount1",
+                       "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/"
+                       "K1SZFPTOtr/KBHBeksoGMGw=="));
+    auto exe_path = bp::search_path("azurite");
+    if (exe_path.empty()) {
+      return Status::Invalid("Could not find Azurite emulator.");
+    }
+    ARROW_ASSIGN_OR_RAISE(self->temp_dir_, TemporaryDir::Make("azurefs-test-"));
+    ARROW_ASSIGN_OR_RAISE(self->debug_log_path_,
+                          self->temp_dir_->path().Join("debug.log"));
+    auto server_process = bp::child(
+        boost::this_process::environment(), exe_path, "--silent", "--location",
+        self->temp_dir_->path().ToString(), "--debug", self->debug_log_path_.ToString());
+    if (!server_process.valid() || !server_process.running()) {
+      server_process.terminate();
+      server_process.wait();
+      return Status::Invalid("Could not start Azurite emulator.");
+    }
+    self->server_process_ = std::move(server_process);
+    return self;
+  }
+
+  Result<int64_t> GetDebugLogSize() override {
     ARROW_ASSIGN_OR_RAISE(auto exists, arrow::internal::FileExists(debug_log_path_));
     if (!exists) {
       return 0;
@@ -131,7 +210,7 @@ class AzuriteEnv : public ::testing::Environment {
     return arrow::internal::FileTell(file_descriptor.fd());
   }
 
-  Status DumpDebugLog(int64_t position = 0) {
+  Status DumpDebugLog(int64_t position) override {
     ARROW_ASSIGN_OR_RAISE(auto exists, arrow::internal::FileExists(debug_log_path_));
     if (!exists) {
       return Status::OK();
@@ -157,25 +236,35 @@ class AzuriteEnv : public ::testing::Environment {
     std::cerr << std::endl;
     return Status::OK();
   }
+};
 
-  const std::string& account_name() const { return account_name_; }
-  const std::string& account_key() const { return account_key_; }
-  const Status status() const { return status_; }
-
+class AzureFlatNSEnv : public AzureEnvImpl<AzureFlatNSEnv> {
  private:
-  std::string account_name_;
-  std::string account_key_;
-  bp::child server_process_;
-  Status status_;
-  std::unique_ptr<TemporaryDir> temp_dir_;
-  arrow::internal::PlatformFilename debug_log_path_;
+  using AzureEnvImpl::AzureEnvImpl;
+
+ public:
+  static const AzureBackend kBackend = AzureBackend::kAzure;
+
+  static Result<std::unique_ptr<AzureFlatNSEnv>> Make() {
+    return MakeFromEnvVars("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME",
+                           "AZURE_FLAT_NAMESPACE_ACCOUNT_KEY");
+  }
 };
 
-auto* azurite_env = ::testing::AddGlobalTestEnvironment(new AzuriteEnv);
+class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {
+ private:
+  using AzureEnvImpl::AzureEnvImpl;
 
-AzuriteEnv* GetAzuriteEnv() {
-  return ::arrow::internal::checked_cast<AzuriteEnv*>(azurite_env);
-}
+ public:
+  static const AzureBackend kBackend = AzureBackend::kAzure;
+
+  static Result<std::unique_ptr<AzureHierarchicalNSEnv>> Make() {
+    return MakeFromEnvVars("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME",
+                           "AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY");
+  }
+
+  bool WithHierarchicalNamespace() const final { return true; }
+};
 
 // Placeholder tests
 // TODO: GH-18014 Remove once a proper test is added
@@ -193,44 +282,110 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+  using RNG = std::mt19937_64;
+
+ public:
+  const std::string container_name;
+  static constexpr char const* kObjectName = "test-object-name";
+
+  static constexpr char const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
  public:
+  explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {}
+
+  // Creates a path by concatenating the container name and the stem.
+  std::string ContainerPath(std::string_view stem) const {
+    return ConcatAbstractPath(container_name, stem);
+  }
+
+  std::string ObjectPath() const { return ContainerPath(kObjectName); }
+  std::string NotFoundObjectPath() const { return ContainerPath("not-found"); }
+
+  std::string RandomDirectoryPath(RNG& rng) const {
+    return ContainerPath(RandomChars(32, rng));
+  }
+
+  // Utilities
+  static std::string RandomContainerName(RNG& rng) { return RandomChars(32, rng); }
+
+  static std::string RandomChars(int count, RNG& rng) {
+    auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
+    std::uniform_int_distribution<int> d(0, static_cast<int>(fillers.size()) - 1);
+    std::string s;
+    std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(rng)]; });
+    return s;
+  }
+
+  static int RandomIndex(int end, RNG& rng) {
+    return std::uniform_int_distribution<int>(0, end - 1)(rng);
+  }
+
+  static std::string RandomLine(int lineno, int width, RNG& rng) {
+    auto line = std::to_string(lineno) + ":    ";
+    line += RandomChars(width - static_cast<int>(line.size()) - 1, rng);
+    line += '\n';
+    return line;
+  }
+};
+
+class TestAzureFileSystem : public ::testing::Test {
+ protected:
+  // Set in constructor
+  std::mt19937_64 rng_;
+
+  // Set in SetUp()
+  int64_t debug_log_start_ = 0;
+  bool set_up_succeeded_ = false;
+  AzureOptions options_;
+
   std::shared_ptr<FileSystem> fs_;
   std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
-  std::unique_ptr<Files::DataLake::DataLakeServiceClient> datalake_service_client_;
-  AzureOptions options_;
-  std::mt19937_64 generator_;
-  std::string container_name_;
-  bool suite_skipped_ = false;
+  std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
+
+ public:
+  TestAzureFileSystem() : rng_(std::random_device()()) {}
 
-  AzureFileSystemTest() : generator_(std::random_device()()) {}
+  virtual Result<BaseAzureEnv*> GetAzureEnv() const = 0;
 
-  virtual Result<AzureOptions> MakeOptions() = 0;
+  static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
+    AzureOptions options;
+    options.backend = env->backend();
+    ARROW_EXPECT_OK(
+        options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
+    return options;
+  }
 
   void SetUp() override {
-    auto options = MakeOptions();
-    if (options.ok()) {
-      options_ = *options;
+    auto make_options = [this]() -> Result<AzureOptions> {
+      ARROW_ASSIGN_OR_RAISE(auto env, GetAzureEnv());
+      EXPECT_THAT(env, NotNull());
+      ARROW_ASSIGN_OR_RAISE(debug_log_start_, env->GetDebugLogSize());
+      return MakeOptions(env);
+    };
+    auto options_res = make_options();
+    if (options_res.status().IsCancelled()) {
+      GTEST_SKIP() << options_res.status().message();
     } else {
-      suite_skipped_ = true;
-      GTEST_SKIP() << options.status().message();
+      EXPECT_OK_AND_ASSIGN(options_, options_res);
     }
-    // Stop-gap solution before GH-39119 is fixed.
-    container_name_ = "z" + RandomChars(31);
-    blob_service_client_ = std::make_unique<Blobs::BlobServiceClient>(
-        options_.account_blob_url, options_.storage_credentials_provider);
-    datalake_service_client_ = std::make_unique<Files::DataLake::DataLakeServiceClient>(
-        options_.account_dfs_url, options_.storage_credentials_provider);
-    ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_));
-    auto container_client = CreateContainer(container_name_);
 
-    auto blob_client = container_client.GetBlockBlobClient(PreexistingObjectName());
-    blob_client.UploadFrom(reinterpret_cast<const uint8_t*>(kLoremIpsum),
-                           strlen(kLoremIpsum));
+    ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_));
+    EXPECT_OK_AND_ASSIGN(blob_service_client_, options_.MakeBlobServiceClient());
+    EXPECT_OK_AND_ASSIGN(datalake_service_client_, options_.MakeDataLakeServiceClient());
+    set_up_succeeded_ = true;
   }
 
   void TearDown() override {
-    if (!suite_skipped_) {
+    if (set_up_succeeded_) {
       auto containers = blob_service_client_->ListBlobContainers();
       for (auto container : containers.BlobContainers) {
         auto container_client =
@@ -238,6 +393,13 @@ class AzureFileSystemTest : public ::testing::Test {
         container_client.DeleteIfExists();
       }
     }
+    if (HasFailure()) {
+      // XXX: This may not include all logs in the target test because
+      // Azurite doesn't flush debug logs immediately... You may want
+      // to check the log manually...
+      EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+      ARROW_IGNORE_EXPR(env->DumpDebugLog(debug_log_start_));
+    }
   }
 
   Blobs::BlobContainerClient CreateContainer(const std::string& name) {
@@ -254,54 +416,20 @@ class AzureFileSystemTest : public ::testing::Test {
     return blob_client;
   }
 
-  std::string PreexistingContainerName() const { return container_name_; }
-
-  std::string PreexistingContainerPath() const {
-    return PreexistingContainerName() + '/';
-  }
-
-  static std::string PreexistingObjectName() { return "test-object-name"; }
-
-  std::string PreexistingObjectPath() const {
-    return PreexistingContainerPath() + PreexistingObjectName();
-  }
-
-  std::string NotFoundObjectPath() { return PreexistingContainerPath() + "not-found"; }
-
-  std::string RandomLine(int lineno, std::size_t width) {
-    auto line = std::to_string(lineno) + ":    ";
-    line += RandomChars(width - line.size() - 1);
-    line += '\n';
-    return line;
-  }
-
-  std::size_t RandomIndex(std::size_t end) {
-    return std::uniform_int_distribution<std::size_t>(0, end - 1)(generator_);
-  }
-
-  std::string RandomChars(std::size_t count) {
-    auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
-    std::uniform_int_distribution<std::size_t> d(0, fillers.size() - 1);
-    std::string s;
-    std::generate_n(std::back_inserter(s), count, [&] { return fillers[d(generator_)]; });
-    return s;
-  }
-
-  std::string RandomContainerName() { return RandomChars(32); }
-
-  std::string RandomDirectoryName() { return RandomChars(32); }
-
-  void UploadLines(const std::vector<std::string>& lines, const char* path_to_file,
+  void UploadLines(const std::vector<std::string>& lines, const std::string& path,
                    int total_size) {
-    const auto path = PreexistingContainerPath() + path_to_file;
     ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
     const auto all_lines = std::accumulate(lines.begin(), lines.end(), std::string(""));
     ASSERT_OK(output->Write(all_lines));
     ASSERT_OK(output->Close());
   }
 
-  void RunGetFileInfoObjectWithNestedStructureTest();
-  void RunGetFileInfoObjectTest();
+  PreexistingData SetUpPreexistingData() {
+    PreexistingData data(rng_);
+    auto container_client = CreateContainer(data.container_name);
+    CreateBlob(container_client, data.kObjectName, PreexistingData::kLoremIpsum);
+    return data;
+  }
 
   struct HierarchicalPaths {
     std::string container;
@@ -310,15 +438,12 @@ class AzureFileSystemTest : public ::testing::Test {
   };
 
   // Need to use "void" as the return type to use ASSERT_* in this method.
-  void CreateHierarchicalData(HierarchicalPaths& paths) {
-    const auto container_path = RandomContainerName();
-    const auto directory_path =
-        internal::ConcatAbstractPath(container_path, RandomDirectoryName());
-    const auto sub_directory_path =
-        internal::ConcatAbstractPath(directory_path, "new-sub");
-    const auto sub_blob_path =
-        internal::ConcatAbstractPath(sub_directory_path, "sub.txt");
-    const auto top_blob_path = internal::ConcatAbstractPath(directory_path, "top.txt");
+  void CreateHierarchicalData(HierarchicalPaths* paths) {
+    auto data = SetUpPreexistingData();
+    const auto directory_path = data.RandomDirectoryPath(rng_);
+    const auto sub_directory_path = ConcatAbstractPath(directory_path, "new-sub");
+    const auto sub_blob_path = ConcatAbstractPath(sub_directory_path, "sub.txt");
+    const auto top_blob_path = ConcatAbstractPath(directory_path, "top.txt");
     ASSERT_OK(fs_->CreateDir(sub_directory_path, true));
     ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path));
     ASSERT_OK(output->Write(std::string_view("sub")));
@@ -327,15 +452,15 @@ class AzureFileSystemTest : public ::testing::Test {
     ASSERT_OK(output->Write(std::string_view("top")));
     ASSERT_OK(output->Close());
 
-    AssertFileInfo(fs_.get(), container_path, FileType::Directory);
+    AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
     AssertFileInfo(fs_.get(), directory_path, FileType::Directory);
     AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory);
     AssertFileInfo(fs_.get(), sub_blob_path, FileType::File);
     AssertFileInfo(fs_.get(), top_blob_path, FileType::File);
 
-    paths.container = container_path;
-    paths.directory = directory_path;
-    paths.sub_paths = {
+    paths->container = data.container_name;
+    paths->directory = directory_path;
+    paths->sub_paths = {
         sub_directory_path,
         sub_blob_path,
         top_blob_path,
@@ -362,7 +487,7 @@ class AzureFileSystemTest : public ::testing::Test {
   }
 
   void AssertInfoAllContainersRecursive(const std::vector<FileInfo>& infos) {
-    ASSERT_EQ(infos.size(), 14);
+    ASSERT_EQ(infos.size(), 12);
     AssertFileInfo(infos[0], "container", FileType::Directory);
     AssertFileInfo(infos[1], "container/emptydir", FileType::Directory);
     AssertFileInfo(infos[2], "container/otherdir", FileType::Directory);
@@ -377,202 +502,336 @@ class AzureFileSystemTest : public ::testing::Test {
                    strlen(kSubData));
     AssertFileInfo(infos[10], "container/somefile", FileType::File, strlen(kSomeData));
     AssertFileInfo(infos[11], "empty-container", FileType::Directory);
-    AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
-    AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
   }
-};
 
-class AzuriteFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    EXPECT_THAT(GetAzuriteEnv(), NotNull());
-    ARROW_EXPECT_OK(GetAzuriteEnv()->status());
-    ARROW_ASSIGN_OR_RAISE(debug_log_start_, GetAzuriteEnv()->GetDebugLogSize());
-    AzureOptions options;
-    options.backend = AzureBackend::Azurite;
-    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
-        GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
-    return options;
+  bool WithHierarchicalNamespace() const {
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    return env->WithHierarchicalNamespace();
   }
 
-  void TearDown() override {
-    AzureFileSystemTest::TearDown();
-    if (HasFailure()) {
-      // XXX: This may not include all logs in the target test because
-      // Azurite doesn't flush debug logs immediately... You may want
-      // to check the log manually...
-      ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_));
+  // Tests that are called from more than one implementation of TestAzureFileSystem
+
+  void TestDetectHierarchicalNamespace();
+  void TestGetFileInfoObject();
+  void TestGetFileInfoObjectWithNestedStructure();
+
+  void TestDeleteDirSuccessEmpty() {
+    auto data = SetUpPreexistingData();
+    const auto directory_path = data.RandomDirectoryPath(rng_);
+
+    if (WithHierarchicalNamespace()) {
+      ASSERT_OK(fs_->CreateDir(directory_path, true));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory);
+      ASSERT_OK(fs_->DeleteDir(directory_path));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() and DeleteDir() do nothing.
+      ASSERT_OK(fs_->CreateDir(directory_path));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+      ASSERT_OK(fs_->DeleteDir(directory_path));
+      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
     }
   }
 
-  int64_t debug_log_start_ = 0;
-};
+  void TestCreateDirSuccessContainerAndDirectory() {
+    auto data = SetUpPreexistingData();
+    const auto path = data.RandomDirectoryPath(rng_);
+    ASSERT_OK(fs_->CreateDir(path, false));
+    if (WithHierarchicalNamespace()) {
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() does nothing.
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+    }
+  }
 
-class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    AzureOptions options;
-    const auto account_key = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_KEY");
-    const auto account_name = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME");
-    if (account_key && account_name) {
-      RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key));
-      return options;
+  void TestCreateDirRecursiveSuccessContainerOnly() {
+    auto container_name = PreexistingData::RandomContainerName(rng_);
+    ASSERT_OK(fs_->CreateDir(container_name, true));
+    arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
+  }
+
+  void TestCreateDirRecursiveSuccessDirectoryOnly() {
+    auto data = SetUpPreexistingData();
+    const auto parent = data.RandomDirectoryPath(rng_);
+    const auto path = ConcatAbstractPath(parent, "new-sub");
+    ASSERT_OK(fs_->CreateDir(path, true));
+    if (WithHierarchicalNamespace()) {
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() does nothing.
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
     }
-    return Status::Cancelled(
-        "Connection details not provided for a real flat namespace "
-        "account.");
   }
-};
 
-// How to enable this test:
-//
-// You need an Azure account. You should be able to create a free
-// account at https://azure.microsoft.com/en-gb/free/ . You should be
-// able to create a storage account through the portal Web UI.
-//
-// See also the official document how to create a storage account:
-// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
-//
-// A few suggestions on configuration:
-//
-// * Use Standard general-purpose v2 not premium
-// * Use LRS redundancy
-// * Obviously you need to enable hierarchical namespace.
-// * Set the default access tier to hot
-// * SFTP, NFS and file shares are not required.
-class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest {
-  Result<AzureOptions> MakeOptions() override {
-    AzureOptions options;
-    const auto account_key = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY");
-    const auto account_name = std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME");
-    if (account_key && account_name) {
-      RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name, account_key));
-      return options;
+  void TestCreateDirRecursiveSuccessContainerAndDirectory() {
+    auto data = SetUpPreexistingData();
+    const auto parent = data.RandomDirectoryPath(rng_);
+    const auto path = ConcatAbstractPath(parent, "new-sub");
+    ASSERT_OK(fs_->CreateDir(path, true));
+    if (WithHierarchicalNamespace()) {
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+      arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
+    } else {
+      // There is only virtual directory without hierarchical namespace
+      // support. So the CreateDir() does nothing.
+      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+      arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
     }
-    return Status::Cancelled(
-        "Connection details not provided for a real hierarchical namespace "
-        "account.");
   }
-};
 
-TEST_F(AzureFlatNamespaceFileSystemTest, DetectHierarchicalNamespace) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+  void TestDeleteDirContentsSuccessNonexistent() {
+    auto data = SetUpPreexistingData();
+    const auto directory_path = data.RandomDirectoryPath(rng_);
+    ASSERT_OK(fs_->DeleteDirContents(directory_path, true));
+    arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+  }
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DetectHierarchicalNamespace) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(true, hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+  void TestDeleteDirContentsFailureNonexistent() {
+    auto data = SetUpPreexistingData();
+    const auto directory_path = data.RandomDirectoryPath(rng_);
+    ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false));
+  }
+};
 
-TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(false, hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+void TestAzureFileSystem::TestDetectHierarchicalNamespace() {
+  // Check the environments are implemented and injected here correctly.
+  auto expected = WithHierarchicalNamespace();
 
-TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) {
+  auto data = SetUpPreexistingData();
   auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
   ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container"));
+  ASSERT_OK_AND_EQ(expected, hierarchical_namespace.Enabled(data.container_name));
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) {
-  AssertFileInfo(fs_.get(), "", FileType::Directory);
-
-  // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://"));
-}
-
-TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) {
-  AssertFileInfo(fs_.get(), PreexistingContainerName(), FileType::Directory);
+void TestAzureFileSystem::TestGetFileInfoObject() {
+  auto data = SetUpPreexistingData();
+  auto object_properties =
+      blob_service_client_->GetBlobContainerClient(data.container_name)
+          .GetBlobClient(data.kObjectName)
+          .GetProperties()
+          .Value;
 
-  AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound);
+  AssertFileInfo(fs_.get(), data.ObjectPath(), FileType::File,
+                 std::chrono::system_clock::time_point{object_properties.LastModified},
+                 static_cast<int64_t>(object_properties.BlobSize));
 
   // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingContainerName()));
+  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + std::string{data.kObjectName}));
 }
 
-void AzureFileSystemTest::RunGetFileInfoObjectWithNestedStructureTest() {
+void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() {
+  auto data = SetUpPreexistingData();
   // Adds detailed tests to handle cases of different edge cases
   // with directory naming conventions (e.g. with and without slashes).
-  constexpr auto kObjectName = "test-object-dir/some_other_dir/another_dir/foo";
-  ASSERT_OK_AND_ASSIGN(
-      auto output,
-      fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName, /*metadata=*/{}));
-  const std::string_view data(kLoremIpsum);
-  ASSERT_OK(output->Write(data));
+  const std::string kObjectName = "test-object-dir/some_other_dir/another_dir/foo";
+  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(data.ContainerPath(kObjectName),
+                                                          /*metadata=*/{}));
+  const std::string_view lorem_ipsum(PreexistingData::kLoremIpsum);
+  ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
 
   // 0 is immediately after "/" lexicographically, ensure that this doesn't
   // cause unexpected issues.
-  ASSERT_OK_AND_ASSIGN(output,
-                       fs_->OpenOutputStream(
-                           PreexistingContainerPath() + "test-object-dir/some_other_dir0",
-                           /*metadata=*/{}));
-  ASSERT_OK(output->Write(data));
-  ASSERT_OK(output->Close());
   ASSERT_OK_AND_ASSIGN(
-      output, fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName + "0",
+      output, fs_->OpenOutputStream(data.ContainerPath("test-object-dir/some_other_dir0"),
                                     /*metadata=*/{}));
-  ASSERT_OK(output->Write(data));
+  ASSERT_OK(output->Write(lorem_ipsum));
+  ASSERT_OK(output->Close());
+  ASSERT_OK_AND_ASSIGN(output,
+                       fs_->OpenOutputStream(data.ContainerPath(kObjectName + "0"),
+                                             /*metadata=*/{}));
+  ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
 
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName, FileType::File);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName + "/",
-                 FileType::NotFound);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir",
-                 FileType::Directory);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/",
+  AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName), FileType::File);
+  AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName) + "/", FileType::NotFound);
+  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir"), FileType::Directory);
+  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir") + "/",
                  FileType::Directory);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_dir",
+  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_dir"),
                  FileType::Directory);
-  AssertFileInfo(fs_.get(),
-                 PreexistingContainerPath() + "test-object-dir/some_other_dir/",
+  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_dir") + "/",
                  FileType::Directory);
 
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-di",
-                 FileType::NotFound);
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/some_other_di",
+  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-di"), FileType::NotFound);
+  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_di"),
                  FileType::NotFound);
+
+  if (WithHierarchicalNamespace()) {
+    datalake_service_client_->GetFileSystemClient(data.container_name)
+        .GetDirectoryClient("test-empty-object-dir")
+        .Create();
+
+    AssertFileInfo(fs_.get(), data.ContainerPath("test-empty-object-dir"),
+                   FileType::Directory);
+  }
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) {
-  RunGetFileInfoObjectWithNestedStructureTest();
+template <class AzureEnvClass>
+class AzureFileSystemTestImpl : public TestAzureFileSystem {
+ public:
+  using TestAzureFileSystem::TestAzureFileSystem;
+
+  Result<BaseAzureEnv*> GetAzureEnv() const final { return AzureEnvClass::GetInstance(); }
+};
+
+// How to enable the non-Azurite tests:
+//
+// You need an Azure account. You should be able to create a free account [1].
+// Through the portal Web UI, you should create a storage account [2].
+//
+// A few suggestions on configuration:
+//
+// * Use Standard general-purpose v2 not premium
+// * Use LRS redundancy
+// * Set the default access tier to hot
+// * SFTP, NFS and file shares are not required.
+//
+// You must not enable Hierarchical Namespace on the storage account used for
+// TestAzureFlatNSFileSystem, but you must enable it on the storage account
+// used for TestAzureHierarchicalNSFileSystem.
+//
+// The credentials should be placed in the correct environment variables:
+//
+// * AZURE_FLAT_NAMESPACE_ACCOUNT_NAME
+// * AZURE_FLAT_NAMESPACE_ACCOUNT_KEY
+// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME
+// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY
+//
+// [1]: https://azure.microsoft.com/en-gb/free/
+// [2]:
+// https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
+using TestAzureFlatNSFileSystem = AzureFileSystemTestImpl<AzureFlatNSEnv>;
+using TestAzureHierarchicalNSFileSystem = AzureFileSystemTestImpl<AzureHierarchicalNSEnv>;
+using TestAzuriteFileSystem = AzureFileSystemTestImpl<AzuriteEnv>;
+
+// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS)
+
+template <class AzureEnvClass>
+using AzureFileSystemTestOnAllEnvs = AzureFileSystemTestImpl<AzureEnvClass>;
+
+using AllEnvironments =
+    ::testing::Types<AzuriteEnv, AzureFlatNSEnv, AzureHierarchicalNSEnv>;
+
+TYPED_TEST_SUITE(AzureFileSystemTestOnAllEnvs, AllEnvironments);
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespace) {
+  this->TestDetectHierarchicalNamespace();
 }
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObjectWithNestedStructure) {
-  RunGetFileInfoObjectWithNestedStructureTest();
-  datalake_service_client_->GetFileSystemClient(PreexistingContainerName())
-      .GetDirectoryClient("test-empty-object-dir")
-      .Create();
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObject) {
+  this->TestGetFileInfoObject();
+}
 
-  AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-empty-object-dir",
-                 FileType::Directory);
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, DeleteDirSuccessEmpty) {
+  this->TestDeleteDirSuccessEmpty();
 }
 
-void AzureFileSystemTest::RunGetFileInfoObjectTest() {
-  auto object_properties =
-      blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
-          .GetBlobClient(PreexistingObjectName())
-          .GetProperties()
-          .Value;
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObjectWithNestedStructure) {
+  this->TestGetFileInfoObjectWithNestedStructure();
+}
 
-  AssertFileInfo(fs_.get(), PreexistingObjectPath(), FileType::File,
-                 std::chrono::system_clock::time_point(object_properties.LastModified),
-                 static_cast<int64_t>(object_properties.BlobSize));
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirSuccessContainerAndDirectory) {
+  this->TestCreateDirSuccessContainerAndDirectory();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerOnly) {
+  this->TestCreateDirRecursiveSuccessContainerOnly();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessDirectoryOnly) {
+  this->TestCreateDirRecursiveSuccessDirectoryOnly();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerAndDirectory) {
+  this->TestCreateDirRecursiveSuccessContainerAndDirectory();
+}
+
+// Tests using a real storage account *with Hierarchical Namespace enabled*
+
+TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirFailureNonexistent) {
+  auto data = SetUpPreexistingData();
+  const auto path = data.RandomDirectoryPath(rng_);
+  ASSERT_RAISES(IOError, fs_->DeleteDir(path));
+}
+
+TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveBlob) {
+  auto data = SetUpPreexistingData();
+  const auto directory_path = data.RandomDirectoryPath(rng_);
+  const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt");
+  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
+  ASSERT_OK(output->Write(std::string_view("hello")));
+  ASSERT_OK(output->Close());
+  arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File);
+  ASSERT_OK(fs_->DeleteDir(directory_path));
+  arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
+}
+
+TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveDirectory) {
+  auto data = SetUpPreexistingData();
+  const auto parent = data.RandomDirectoryPath(rng_);
+  const auto path = ConcatAbstractPath(parent, "new-sub");
+  ASSERT_OK(fs_->CreateDir(path, true));
+  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+  ASSERT_OK(fs_->DeleteDir(parent));
+  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+}
+
+TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessExist) {
+  auto preexisting_data = SetUpPreexistingData();
+  HierarchicalPaths paths;
+  CreateHierarchicalData(&paths);
+  ASSERT_OK(fs_->DeleteDirContents(paths.directory));
+  arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory);
+  for (const auto& sub_path : paths.sub_paths) {
+    arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
+  }
+}
+
+TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessNonexistent) {
+  this->TestDeleteDirContentsSuccessNonexistent();
+}
+
+TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsFailureNonexistent) {
+  this->TestDeleteDirContentsFailureNonexistent();
+}
+
+// Tests using Azurite (the local Azure emulator)
+
+TEST_F(TestAzuriteFileSystem, DetectHierarchicalNamespaceFailsWithMissingContainer) {
+  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
+  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
+  ASSERT_RAISES(IOError, hierarchical_namespace.Enabled("nonexistent-container"));
+}
+
+TEST_F(TestAzuriteFileSystem, GetFileInfoAccount) {
+  AssertFileInfo(fs_.get(), "", FileType::Directory);
 
   // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingObjectName()));
+  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://"));
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoObject) { RunGetFileInfoObjectTest(); }
+TEST_F(TestAzuriteFileSystem, GetFileInfoContainer) {
+  auto data = SetUpPreexistingData();
+  AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObject) {
-  RunGetFileInfoObjectTest();
+  AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound);
+
+  // URI
+  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + data.container_name));
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) {
+TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) {
   SetUpSmallFileSystemTree();
 
   FileSelector select;
@@ -581,11 +840,10 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) {
   // Root dir
   select.base_dir = "";
   ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
-  ASSERT_EQ(infos.size(), 3);
+  ASSERT_EQ(infos.size(), 2);
   ASSERT_EQ(infos, SortedInfos(infos));
   AssertFileInfo(infos[0], "container", FileType::Directory);
   AssertFileInfo(infos[1], "empty-container", FileType::Directory);
-  AssertFileInfo(infos[2], container_name_, FileType::Directory);
 
   // Empty container
   select.base_dir = "empty-container";
@@ -641,7 +899,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) {
   ASSERT_EQ(infos.size(), 4);
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) {
+TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorRecursive) {
   SetUpSmallFileSystemTree();
 
   FileSelector select;
@@ -651,7 +909,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) {
   // Root dir
   select.base_dir = "";
   ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
-  ASSERT_EQ(infos.size(), 14);
+  ASSERT_EQ(infos.size(), 12);
   ASSERT_EQ(infos, SortedInfos(infos));
   AssertInfoAllContainersRecursive(infos);
 
@@ -699,7 +957,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) {
   AssertFileInfo(infos[3], "container/otherdir/1/2/3/otherfile", FileType::File, 10);
 }
 
-TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorExplicitImplicitDirDedup) {
+TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorExplicitImplicitDirDedup) {
   {
     auto container = CreateContainer("container");
     CreateBlob(container, "mydir/emptydir1/");
@@ -746,137 +1004,60 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorExplicitImplicitDirDedup) {
   AssertFileInfo(infos[0], "container/mydir/nonemptydir2/somefile", FileType::File);
 }
 
-TEST_F(AzuriteFileSystemTest, CreateDirFailureNoContainer) {
+TEST_F(TestAzuriteFileSystem, CreateDirFailureNoContainer) {
   ASSERT_RAISES(Invalid, fs_->CreateDir("", false));
 }
 
-TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerOnly) {
-  auto container_name = RandomContainerName();
+TEST_F(TestAzuriteFileSystem, CreateDirSuccessContainerOnly) {
+  auto container_name = PreexistingData::RandomContainerName(rng_);
   ASSERT_OK(fs_->CreateDir(container_name, false));
   arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
 }
 
-TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerAndDirectory) {
-  const auto path = PreexistingContainerPath() + RandomDirectoryName();
-  ASSERT_OK(fs_->CreateDir(path, false));
-  // There is only virtual directory without hierarchical namespace
-  // support. So the CreateDir() does nothing.
-  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirSuccessContainerAndDirectory) {
-  const auto path = PreexistingContainerPath() + RandomDirectoryName();
-  ASSERT_OK(fs_->CreateDir(path, false));
-  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
-}
-
-TEST_F(AzuriteFileSystemTest, CreateDirFailureDirectoryWithMissingContainer) {
+TEST_F(TestAzuriteFileSystem, CreateDirFailureDirectoryWithMissingContainer) {
   const auto path = std::string("not-a-container/new-directory");
   ASSERT_RAISES(IOError, fs_->CreateDir(path, false));
 }
 
-TEST_F(AzuriteFileSystemTest, CreateDirRecursiveFailureNoContainer) {
+TEST_F(TestAzuriteFileSystem, CreateDirRecursiveFailureNoContainer) {
   ASSERT_RAISES(Invalid, fs_->CreateDir("", true));
 }
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirRecursiveSuccessContainerOnly) {
-  auto container_name = RandomContainerName();
-  ASSERT_OK(fs_->CreateDir(container_name, true));
-  arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
-}
-
-TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerOnly) {
-  auto container_name = RandomContainerName();
-  ASSERT_OK(fs_->CreateDir(container_name, true));
-  arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) {
-  const auto parent = PreexistingContainerPath() + RandomDirectoryName();
-  const auto path = internal::ConcatAbstractPath(parent, "new-sub");
-  ASSERT_OK(fs_->CreateDir(path, true));
-  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
-  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
-}
-
-TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) {
-  const auto parent = PreexistingContainerPath() + RandomDirectoryName();
-  const auto path = internal::ConcatAbstractPath(parent, "new-sub");
-  ASSERT_OK(fs_->CreateDir(path, true));
-  // There is only virtual directory without hierarchical namespace
-  // support. So the CreateDir() does nothing.
-  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
-  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest,
-       CreateDirRecursiveSuccessContainerAndDirectory) {
-  auto container_name = RandomContainerName();
-  const auto parent = internal::ConcatAbstractPath(container_name, RandomDirectoryName());
-  const auto path = internal::ConcatAbstractPath(parent, "new-sub");
-  ASSERT_OK(fs_->CreateDir(path, true));
-  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
-  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
-  arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
+TEST_F(TestAzuriteFileSystem, CreateDirUri) {
+  ASSERT_RAISES(
+      Invalid,
+      fs_->CreateDir("abfs://" + PreexistingData::RandomContainerName(rng_), true));
 }
 
-TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerAndDirectory) {
-  auto container_name = RandomContainerName();
-  const auto parent = internal::ConcatAbstractPath(container_name, RandomDirectoryName());
-  const auto path = internal::ConcatAbstractPath(parent, "new-sub");
-  ASSERT_OK(fs_->CreateDir(path, true));
-  // There is only virtual directory without hierarchical namespace
-  // support. So the CreateDir() does nothing.
-  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
-  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
-  arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
-}
-
-TEST_F(AzuriteFileSystemTest, CreateDirUri) {
-  ASSERT_RAISES(Invalid, fs_->CreateDir("abfs://" + RandomContainerName(), true));
-}
-
-TEST_F(AzuriteFileSystemTest, DeleteDirSuccessContainer) {
-  const auto container_name = RandomContainerName();
+TEST_F(TestAzuriteFileSystem, DeleteDirSuccessContainer) {
+  const auto container_name = PreexistingData::RandomContainerName(rng_);
   ASSERT_OK(fs_->CreateDir(container_name));
   arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
   ASSERT_OK(fs_->DeleteDir(container_name));
   arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::NotFound);
 }
 
-TEST_F(AzuriteFileSystemTest, DeleteDirSuccessEmpty) {
-  const auto directory_path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName());
-  // There is only virtual directory without hierarchical namespace
-  // support. So the CreateDir() and DeleteDir() do nothing.
-  ASSERT_OK(fs_->CreateDir(directory_path));
-  arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
-  ASSERT_OK(fs_->DeleteDir(directory_path));
-  arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
-}
-
-TEST_F(AzuriteFileSystemTest, DeleteDirSuccessNonexistent) {
-  const auto directory_path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName());
+TEST_F(TestAzuriteFileSystem, DeleteDirSuccessNonexistent) {
+  auto data = SetUpPreexistingData();
+  const auto directory_path = data.RandomDirectoryPath(rng_);
   // There is only virtual directory without hierarchical namespace
   // support. So the DeleteDir() for nonexistent directory does nothing.
   ASSERT_OK(fs_->DeleteDir(directory_path));
   arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
 }
 
-TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) {
+TEST_F(TestAzuriteFileSystem, DeleteDirSuccessHaveBlobs) {
 #ifdef __APPLE__
   GTEST_SKIP() << "This test fails by an Azurite problem: "
                   "https://github.com/Azure/Azurite/pull/2302";
 #endif
-  const auto directory_path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName());
+  auto data = SetUpPreexistingData();
+  const auto directory_path = data.RandomDirectoryPath(rng_);
   // We must use 257 or more blobs here to test pagination of ListBlobs().
   // Because we can't add 257 or more delete blob requests to one SubmitBatch().
   int64_t n_blobs = 257;
   for (int64_t i = 0; i < n_blobs; ++i) {
-    const auto blob_path =
-        internal::ConcatAbstractPath(directory_path, std::to_string(i) + ".txt");
+    const auto blob_path = ConcatAbstractPath(directory_path, std::to_string(i) + ".txt");
     ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
     ASSERT_OK(output->Write(std::string_view(std::to_string(i))));
     ASSERT_OK(output->Close());
@@ -884,62 +1065,24 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) {
   }
   ASSERT_OK(fs_->DeleteDir(directory_path));
   for (int64_t i = 0; i < n_blobs; ++i) {
-    const auto blob_path =
-        internal::ConcatAbstractPath(directory_path, std::to_string(i) + ".txt");
+    const auto blob_path = ConcatAbstractPath(directory_path, std::to_string(i) + ".txt");
     arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
   }
 }
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessEmpty) {
-  const auto directory_path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName());
-  ASSERT_OK(fs_->CreateDir(directory_path, true));
-  arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory);
-  ASSERT_OK(fs_->DeleteDir(directory_path));
-  arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirFailureNonexistent) {
-  const auto path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName());
-  ASSERT_RAISES(IOError, fs_->DeleteDir(path));
+TEST_F(TestAzuriteFileSystem, DeleteDirUri) {
+  auto data = SetUpPreexistingData();
+  ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + data.container_name + "/"));
 }
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessHaveBlob) {
-  const auto directory_path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName());
-  const auto blob_path = internal::ConcatAbstractPath(directory_path, "hello.txt");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
-  ASSERT_OK(output->Write(std::string_view("hello")));
-  ASSERT_OK(output->Close());
-  arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File);
-  ASSERT_OK(fs_->DeleteDir(directory_path));
-  arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessHaveDirectory) {
-  const auto parent =
-      internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName());
-  const auto path = internal::ConcatAbstractPath(parent, "new-sub");
-  ASSERT_OK(fs_->CreateDir(path, true));
-  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
-  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
-  ASSERT_OK(fs_->DeleteDir(parent));
-  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
-  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
-}
-
-TEST_F(AzuriteFileSystemTest, DeleteDirUri) {
-  ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + PreexistingContainerPath()));
-}
-
-TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) {
+TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) {
 #ifdef __APPLE__
   GTEST_SKIP() << "This test fails by an Azurite problem: "
                   "https://github.com/Azure/Azurite/pull/2302";
 #endif
+  auto data = SetUpPreexistingData();
   HierarchicalPaths paths;
-  CreateHierarchicalData(paths);
+  CreateHierarchicalData(&paths);
   ASSERT_OK(fs_->DeleteDirContents(paths.container));
   arrow::fs::AssertFileInfo(fs_.get(), paths.container, FileType::Directory);
   arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound);
@@ -948,13 +1091,14 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) {
   }
 }
 
-TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) {
+TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessDirectory) {
 #ifdef __APPLE__
   GTEST_SKIP() << "This test fails by an Azurite problem: "
                   "https://github.com/Azure/Azurite/pull/2302";
 #endif
+  auto data = SetUpPreexistingData();
   HierarchicalPaths paths;
-  CreateHierarchicalData(paths);
+  CreateHierarchicalData(&paths);
   ASSERT_OK(fs_->DeleteDirContents(paths.directory));
   // GH-38772: We may change this to FileType::Directory.
   arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound);
@@ -963,98 +1107,72 @@ TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) {
   }
 }
 
-TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessNonexistent) {
-  const auto directory_path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName());
-  ASSERT_OK(fs_->DeleteDirContents(directory_path, true));
-  arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
-}
-
-TEST_F(AzuriteFileSystemTest, DeleteDirContentsFailureNonexistent) {
-  const auto directory_path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName());
-  ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false));
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsSuccessExist) {
-  HierarchicalPaths paths;
-  CreateHierarchicalData(paths);
-  ASSERT_OK(fs_->DeleteDirContents(paths.directory));
-  arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory);
-  for (const auto& sub_path : paths.sub_paths) {
-    arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
-  }
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsSuccessNonexistent) {
-  const auto directory_path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName());
-  ASSERT_OK(fs_->DeleteDirContents(directory_path, true));
-  arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessNonexistent) {
+  this->TestDeleteDirContentsSuccessNonexistent();
 }
 
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirContentsFailureNonexistent) {
-  const auto directory_path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), RandomDirectoryName());
-  ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false));
+TEST_F(TestAzuriteFileSystem, DeleteDirContentsFailureNonexistent) {
+  this->TestDeleteDirContentsFailureNonexistent();
 }
 
-TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationNonexistent) {
-  const auto destination_path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), "copy-destionation");
-  ASSERT_OK(fs_->CopyFile(PreexistingObjectPath(), destination_path));
+TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationNonexistent) {
+  auto data = SetUpPreexistingData();
+  const auto destination_path = data.ContainerPath("copy-destionation");
+  ASSERT_OK(fs_->CopyFile(data.ObjectPath(), destination_path));
   ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(destination_path));
   ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info));
   ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
-  EXPECT_EQ(kLoremIpsum, buffer->ToString());
+  EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString());
 }
 
-TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationSame) {
-  ASSERT_OK(fs_->CopyFile(PreexistingObjectPath(), PreexistingObjectPath()));
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath()));
+TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationSame) {
+  auto data = SetUpPreexistingData();
+  ASSERT_OK(fs_->CopyFile(data.ObjectPath(), data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath()));
   ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info));
   ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
-  EXPECT_EQ(kLoremIpsum, buffer->ToString());
+  EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString());
 }
 
-TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationTrailingSlash) {
-  ASSERT_RAISES(IOError,
-                fs_->CopyFile(PreexistingObjectPath(),
-                              internal::EnsureTrailingSlash(PreexistingObjectPath())));
+TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationTrailingSlash) {
+  auto data = SetUpPreexistingData();
+  ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(),
+                                       internal::EnsureTrailingSlash(data.ObjectPath())));
 }
 
-TEST_F(AzuriteFileSystemTest, CopyFileFailureSourceNonexistent) {
-  const auto destination_path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), "copy-destionation");
-  ASSERT_RAISES(IOError, fs_->CopyFile(NotFoundObjectPath(), destination_path));
+TEST_F(TestAzuriteFileSystem, CopyFileFailureSourceNonexistent) {
+  auto data = SetUpPreexistingData();
+  const auto destination_path = data.ContainerPath("copy-destionation");
+  ASSERT_RAISES(IOError, fs_->CopyFile(data.NotFoundObjectPath(), destination_path));
 }
 
-TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationParentNonexistent) {
+TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationParentNonexistent) {
+  auto data = SetUpPreexistingData();
   const auto destination_path =
-      internal::ConcatAbstractPath(RandomContainerName(), "copy-destionation");
-  ASSERT_RAISES(IOError, fs_->CopyFile(PreexistingObjectPath(), destination_path));
+      ConcatAbstractPath(PreexistingData::RandomContainerName(rng_), "copy-destionation");
+  ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), destination_path));
 }
 
-TEST_F(AzuriteFileSystemTest, CopyFileUri) {
-  const auto destination_path =
-      internal::ConcatAbstractPath(PreexistingContainerName(), "copy-destionation");
-  ASSERT_RAISES(Invalid,
-                fs_->CopyFile("abfs://" + PreexistingObjectPath(), destination_path));
-  ASSERT_RAISES(Invalid,
-                fs_->CopyFile(PreexistingObjectPath(), "abfs://" + destination_path));
+TEST_F(TestAzuriteFileSystem, CopyFileUri) {
+  auto data = SetUpPreexistingData();
+  const auto destination_path = data.ContainerPath("copy-destionation");
+  ASSERT_RAISES(Invalid, fs_->CopyFile("abfs://" + data.ObjectPath(), destination_path));
+  ASSERT_RAISES(Invalid, fs_->CopyFile(data.ObjectPath(), "abfs://" + destination_path));
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputStreamString) {
+TEST_F(TestAzuriteFileSystem, OpenInputStreamString) {
+  auto data = SetUpPreexistingData();
   std::shared_ptr<io::InputStream> stream;
-  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath()));
+  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath()));
 
   ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
-  EXPECT_EQ(buffer->ToString(), kLoremIpsum);
+  EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum);
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputStreamStringBuffers) {
+TEST_F(TestAzuriteFileSystem, OpenInputStreamStringBuffers) {
+  auto data = SetUpPreexistingData();
   std::shared_ptr<io::InputStream> stream;
-  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath()));
+  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath()));
 
   std::string contents;
   std::shared_ptr<Buffer> buffer;
@@ -1063,23 +1181,25 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamStringBuffers) {
     contents.append(buffer->ToString());
   } while (buffer && buffer->size() != 0);
 
-  EXPECT_EQ(contents, kLoremIpsum);
+  EXPECT_EQ(contents, PreexistingData::kLoremIpsum);
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputStreamInfo) {
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputStreamInfo) {
+  auto data = SetUpPreexistingData();
+  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath()));
 
   std::shared_ptr<io::InputStream> stream;
   ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(info));
 
   ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
-  EXPECT_EQ(buffer->ToString(), kLoremIpsum);
+  EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum);
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) {
+TEST_F(TestAzuriteFileSystem, OpenInputStreamEmpty) {
+  auto data = SetUpPreexistingData();
   const auto path_to_file = "empty-object.txt";
-  const auto path = PreexistingContainerPath() + path_to_file;
-  blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
+  const auto path = data.ContainerPath(path_to_file);
+  blob_service_client_->GetBlobContainerClient(data.container_name)
       .GetBlockBlobClient(path_to_file)
       .UploadFrom(nullptr, 0);
 
@@ -1090,24 +1210,28 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) {
   EXPECT_EQ(size, 0);
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputStreamNotFound) {
-  ASSERT_RAISES(IOError, fs_->OpenInputStream(NotFoundObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputStreamNotFound) {
+  auto data = SetUpPreexistingData();
+  ASSERT_RAISES(IOError, fs_->OpenInputStream(data.NotFoundObjectPath()));
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputStreamInfoInvalid) {
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingContainerPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputStreamInfoInvalid) {
+  auto data = SetUpPreexistingData();
+  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name + "/"));
   ASSERT_RAISES(IOError, fs_->OpenInputStream(info));
 
-  ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(NotFoundObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath()));
   ASSERT_RAISES(IOError, fs_->OpenInputStream(info2));
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputStreamUri) {
-  ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + PreexistingObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputStreamUri) {
+  auto data = SetUpPreexistingData();
+  ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + data.ObjectPath()));
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputStreamTrailingSlash) {
-  ASSERT_RAISES(IOError, fs_->OpenInputStream(PreexistingObjectPath() + '/'));
+TEST_F(TestAzuriteFileSystem, OpenInputStreamTrailingSlash) {
+  auto data = SetUpPreexistingData();
+  ASSERT_RAISES(IOError, fs_->OpenInputStream(data.ObjectPath() + '/'));
 }
 
 namespace {
@@ -1145,9 +1269,10 @@ std::shared_ptr<const KeyValueMetadata> NormalizerKeyValueMetadata(
 }
 };  // namespace
 
-TEST_F(AzuriteFileSystemTest, OpenInputStreamReadMetadata) {
+TEST_F(TestAzuriteFileSystem, OpenInputStreamReadMetadata) {
+  auto data = SetUpPreexistingData();
   std::shared_ptr<io::InputStream> stream;
-  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath()));
+  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath()));
 
   std::shared_ptr<const KeyValueMetadata> actual;
   ASSERT_OK_AND_ASSIGN(actual, stream->ReadMetadata());
@@ -1175,8 +1300,9 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamReadMetadata) {
       NormalizerKeyValueMetadata(actual)->ToString());
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) {
-  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(PreexistingObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputStreamClosed) {
+  auto data = SetUpPreexistingData();
+  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(data.ObjectPath()));
   ASSERT_OK(stream->Close());
   std::array<char, 16> buffer{};
   ASSERT_RAISES(Invalid, stream->Read(buffer.size(), buffer.data()));
@@ -1184,44 +1310,45 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) {
   ASSERT_RAISES(Invalid, stream->Tell());
 }
 
-TEST_F(AzuriteFileSystemTest, TestWriteMetadata) {
+TEST_F(TestAzuriteFileSystem, WriteMetadata) {
+  auto data = SetUpPreexistingData();
   options_.default_metadata = arrow::key_value_metadata({{"foo", "bar"}});
 
   ASSERT_OK_AND_ASSIGN(auto fs_with_defaults, AzureFileSystem::Make(options_));
-  std::string path = "object_with_defaults";
-  auto location = PreexistingContainerPath() + path;
+  std::string blob_path = "object_with_defaults";
+  auto full_path = data.ContainerPath(blob_path);
   ASSERT_OK_AND_ASSIGN(auto output,
-                       fs_with_defaults->OpenOutputStream(location, /*metadata=*/{}));
-  const std::string_view expected(kLoremIpsum);
+                       fs_with_defaults->OpenOutputStream(full_path, /*metadata=*/{}));
+  const std::string_view expected(PreexistingData::kLoremIpsum);
   ASSERT_OK(output->Write(expected));
   ASSERT_OK(output->Close());
 
   // Verify the metadata has been set.
-  auto blob_metadata =
-      blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
-          .GetBlockBlobClient(path)
-          .GetProperties()
-          .Value.Metadata;
-  EXPECT_EQ(Azure::Core::CaseInsensitiveMap{std::make_pair("foo", "bar")}, blob_metadata);
+  auto blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name)
+                           .GetBlockBlobClient(blob_path)
+                           .GetProperties()
+                           .Value.Metadata;
+  EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("foo", "bar")}, blob_metadata);
 
   // Check that explicit metadata overrides the defaults.
   ASSERT_OK_AND_ASSIGN(
       output, fs_with_defaults->OpenOutputStream(
-                  location, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}})));
+                  full_path, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}})));
   ASSERT_OK(output->Write(expected));
   ASSERT_OK(output->Close());
-  blob_metadata = blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
-                      .GetBlockBlobClient(path)
+  blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name)
+                      .GetBlockBlobClient(blob_path)
                       .GetProperties()
                       .Value.Metadata;
   // Defaults are overwritten and not merged.
-  EXPECT_EQ(Azure::Core::CaseInsensitiveMap{std::make_pair("bar", "foo")}, blob_metadata);
+  EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("bar", "foo")}, blob_metadata);
 }
 
-TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) {
-  const auto path = PreexistingContainerPath() + "test-write-object";
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmall) {
+  auto data = SetUpPreexistingData();
+  const auto path = data.ContainerPath("test-write-object");
   ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
-  const std::string_view expected(kLoremIpsum);
+  const std::string_view expected(PreexistingData::kLoremIpsum);
   ASSERT_OK(output->Write(expected));
   ASSERT_OK(output->Close());
 
@@ -1234,8 +1361,9 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) {
   EXPECT_EQ(expected, std::string_view(inbuf.data(), size));
 }
 
-TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) {
-  const auto path = PreexistingContainerPath() + "test-write-object";
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) {
+  auto data = SetUpPreexistingData();
+  const auto path = data.ContainerPath("test-write-object");
   ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
   std::array<std::int64_t, 3> sizes{257 * 1024, 258 * 1024, 259 * 1024};
   std::array<std::string, 3> buffers{
@@ -1265,8 +1393,9 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) {
   EXPECT_EQ(contents, buffers[0] + buffers[1] + buffers[2]);
 }
 
-TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) {
-  const auto path = PreexistingContainerPath() + "test-write-object";
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamTruncatesExistingFile) {
+  auto data = SetUpPreexistingData();
+  const auto path = data.ContainerPath("test-write-object");
   ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
   const std::string_view expected0("Existing blob content");
   ASSERT_OK(output->Write(expected0));
@@ -1281,7 +1410,7 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) {
   EXPECT_EQ(expected0, std::string_view(inbuf.data(), size));
 
   ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(path, {}));
-  const std::string_view expected1(kLoremIpsum);
+  const std::string_view expected1(PreexistingData::kLoremIpsum);
   ASSERT_OK(output->Write(expected1));
   ASSERT_OK(output->Close());
 
@@ -1291,8 +1420,9 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) {
   EXPECT_EQ(expected1, std::string_view(inbuf.data(), size));
 }
 
-TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) {
-  const auto path = PreexistingContainerPath() + "test-write-object";
+TEST_F(TestAzuriteFileSystem, OpenAppendStreamDoesNotTruncateExistingFile) {
+  auto data = SetUpPreexistingData();
+  const auto path = data.ContainerPath("test-write-object");
   ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
   const std::string_view expected0("Existing blob content");
   ASSERT_OK(output->Write(expected0));
@@ -1307,7 +1437,7 @@ TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) {
   EXPECT_EQ(expected0, std::string_view(inbuf.data()));
 
   ASSERT_OK_AND_ASSIGN(output, fs_->OpenAppendStream(path, {}));
-  const std::string_view expected1(kLoremIpsum);
+  const std::string_view expected1(PreexistingData::kLoremIpsum);
   ASSERT_OK(output->Write(expected1));
   ASSERT_OK(output->Close());
 
@@ -1319,35 +1449,37 @@ TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) {
             std::string(expected0) + std::string(expected1));
 }
 
-TEST_F(AzuriteFileSystemTest, OpenOutputStreamClosed) {
-  const auto path = internal::ConcatAbstractPath(PreexistingContainerName(),
-                                                 "open-output-stream-closed.txt");
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamClosed) {
+  auto data = SetUpPreexistingData();
+  const auto path = data.ContainerPath("open-output-stream-closed.txt");
   ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
   ASSERT_OK(output->Close());
-  ASSERT_RAISES(Invalid, output->Write(kLoremIpsum, std::strlen(kLoremIpsum)));
+  ASSERT_RAISES(Invalid, output->Write(PreexistingData::kLoremIpsum,
+                                       std::strlen(PreexistingData::kLoremIpsum)));
   ASSERT_RAISES(Invalid, output->Flush());
   ASSERT_RAISES(Invalid, output->Tell());
 }
 
-TEST_F(AzuriteFileSystemTest, OpenOutputStreamUri) {
-  const auto path = internal::ConcatAbstractPath(PreexistingContainerName(),
-                                                 "open-output-stream-uri.txt");
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamUri) {
+  auto data = SetUpPreexistingData();
+  const auto path = data.ContainerPath("open-output-stream-uri.txt");
   ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + path));
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) {
+TEST_F(TestAzuriteFileSystem, OpenInputFileMixedReadVsReadAt) {
+  auto data = SetUpPreexistingData();
   // Create a file large enough to make the random access tests non-trivial.
   auto constexpr kLineWidth = 100;
   auto constexpr kLineCount = 4096;
   std::vector<std::string> lines(kLineCount);
   int lineno = 0;
-  std::generate_n(lines.begin(), lines.size(),
-                  [&] { return RandomLine(++lineno, kLineWidth); });
+  std::generate_n(lines.begin(), lines.size(), [&] {
+    return PreexistingData::RandomLine(++lineno, kLineWidth, rng_);
+  });
 
-  const auto path_to_file = "OpenInputFileMixedReadVsReadAt/object-name";
-  const auto path = PreexistingContainerPath() + path_to_file;
+  const auto path = data.ContainerPath("OpenInputFileMixedReadVsReadAt/object-name");
 
-  UploadLines(lines, path_to_file, kLineCount * kLineWidth);
+  UploadLines(lines, path, kLineCount * kLineWidth);
 
   std::shared_ptr<io::RandomAccessFile> file;
   ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path));
@@ -1368,7 +1500,7 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) {
     }
 
     // Verify random reads interleave too.
-    auto const index = RandomIndex(kLineCount);
+    auto const index = PreexistingData::RandomIndex(kLineCount, rng_);
     auto const position = index * kLineWidth;
     ASSERT_OK_AND_ASSIGN(size, file->ReadAt(position, buffer.size(), buffer.data()));
     EXPECT_EQ(size, kLineWidth);
@@ -1381,27 +1513,28 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) {
   }
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) {
+TEST_F(TestAzuriteFileSystem, OpenInputFileRandomSeek) {
+  auto data = SetUpPreexistingData();
   // Create a file large enough to make the random access tests non-trivial.
   auto constexpr kLineWidth = 100;
   auto constexpr kLineCount = 4096;
   std::vector<std::string> lines(kLineCount);
   int lineno = 0;
-  std::generate_n(lines.begin(), lines.size(),
-                  [&] { return RandomLine(++lineno, kLineWidth); });
+  std::generate_n(lines.begin(), lines.size(), [&] {
+    return PreexistingData::RandomLine(++lineno, kLineWidth, rng_);
+  });
 
-  const auto path_to_file = "OpenInputFileRandomSeek/object-name";
-  const auto path = PreexistingContainerPath() + path_to_file;
+  const auto path = data.ContainerPath("OpenInputFileRandomSeek/object-name");
   std::shared_ptr<io::OutputStream> output;
 
-  UploadLines(lines, path_to_file, kLineCount * kLineWidth);
+  UploadLines(lines, path, kLineCount * kLineWidth);
 
   std::shared_ptr<io::RandomAccessFile> file;
   ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path));
   for (int i = 0; i != 32; ++i) {
     SCOPED_TRACE("Iteration " + std::to_string(i));
     // Verify sequential reads work as expected.
-    auto const index = RandomIndex(kLineCount);
+    auto const index = PreexistingData::RandomIndex(kLineCount, rng_);
     auto const position = index * kLineWidth;
     ASSERT_OK(file->Seek(position));
     ASSERT_OK_AND_ASSIGN(auto actual, file->Read(kLineWidth));
@@ -1409,15 +1542,15 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) {
   }
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) {
+TEST_F(TestAzuriteFileSystem, OpenInputFileIoContext) {
+  auto data = SetUpPreexistingData();
   // Create a test file.
-  const auto path_to_file = "OpenInputFileIoContext/object-name";
-  const auto path = PreexistingContainerPath() + path_to_file;
+  const auto blob_path = "OpenInputFileIoContext/object-name";
+  const auto path = data.ContainerPath(blob_path);
   const std::string contents = "The quick brown fox jumps over the lazy dog";
 
-  auto blob_client =
-      blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
-          .GetBlockBlobClient(path_to_file);
+  auto blob_client = blob_service_client_->GetBlobContainerClient(data.container_name)
+                         .GetBlockBlobClient(blob_path);
   blob_client.UploadFrom(reinterpret_cast<const uint8_t*>(contents.data()),
                          contents.length());
 
@@ -1426,8 +1559,9 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) {
   EXPECT_EQ(fs_->io_context().external_id(), file->io_context().external_id());
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputFileInfo) {
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputFileInfo) {
+  auto data = SetUpPreexistingData();
+  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath()));
 
   std::shared_ptr<io::RandomAccessFile> file;
   ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(info));
@@ -1437,24 +1571,27 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileInfo) {
   auto constexpr kStart = 16;
   ASSERT_OK_AND_ASSIGN(size, file->ReadAt(kStart, buffer.size(), buffer.data()));
 
-  auto const expected = std::string(kLoremIpsum).substr(kStart);
+  auto const expected = std::string(PreexistingData::kLoremIpsum).substr(kStart);
   EXPECT_EQ(std::string(buffer.data(), size), expected);
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputFileNotFound) {
-  ASSERT_RAISES(IOError, fs_->OpenInputFile(NotFoundObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputFileNotFound) {
+  auto data = SetUpPreexistingData();
+  ASSERT_RAISES(IOError, fs_->OpenInputFile(data.NotFoundObjectPath()));
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputFileInfoInvalid) {
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingContainerPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputFileInfoInvalid) {
+  auto data = SetUpPreexistingData();
+  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name));
   ASSERT_RAISES(IOError, fs_->OpenInputFile(info));
 
-  ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(NotFoundObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath()));
   ASSERT_RAISES(IOError, fs_->OpenInputFile(info2));
 }
 
-TEST_F(AzuriteFileSystemTest, OpenInputFileClosed) {
-  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputFile(PreexistingObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) {
+  auto data = SetUpPreexistingData();
+  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputFile(data.ObjectPath()));
   ASSERT_OK(stream->Close());
   std::array<char, 16> buffer{};
   ASSERT_RAISES(Invalid, stream->Tell());

From 431c4ea4d9facb23c612631317a2e1f862087ba7 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Thu, 14 Dec 2023 12:08:55 -0800
Subject: [PATCH 048/570] GH-39223: [C#] Support IReadOnlyList<T?> on remaining
 scalar types (#39224)

### What changes are included in this PR?

Decimal128Array implements IReadOnlyList<SqlDecimal?> and IReadOnlyList<byte[]>.
Decimal256Array implements IReadOnlyList<SqlDecimal?>, IReadOnlyList<string> and IReadOnlyList<byte[]>.
FixedLengthBinaryArray implements IReadOnlyList<byte[]>.
DurationArray implements IReadOnlyList<TimeSpan?>.

Also removes #ifs which are no longer relevant now that netstandard13 isn't being built any more.

### Are these changes tested?

Yes.
* Closes: #39223

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Apache.Arrow/Arrays/Decimal128Array.cs    | 23 ++++++----
 .../Apache.Arrow/Arrays/Decimal256Array.cs    | 44 ++++++++++++++++---
 .../src/Apache.Arrow/Arrays/DurationArray.cs  | 17 ++++++-
 .../Arrays/FixedSizeBinaryArray.cs            | 17 ++++++-
 csharp/src/Apache.Arrow/DecimalUtility.cs     |  6 ---
 .../Decimal128ArrayTests.cs                   | 25 +++--------
 .../Decimal256ArrayTests.cs                   | 36 +++++++--------
 .../Apache.Arrow.Tests/DecimalUtilityTests.cs |  5 ---
 .../Apache.Arrow.Tests/DurationArrayTests.cs  |  4 ++
 9 files changed, 112 insertions(+), 65 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Arrays/Decimal128Array.cs b/csharp/src/Apache.Arrow/Arrays/Decimal128Array.cs
index 0e3ec56740449..5a51175b7c4da 100644
--- a/csharp/src/Apache.Arrow/Arrays/Decimal128Array.cs
+++ b/csharp/src/Apache.Arrow/Arrays/Decimal128Array.cs
@@ -14,18 +14,16 @@
 // limitations under the License.
 
 using System;
+using System.Collections;
 using System.Collections.Generic;
-#if !NETSTANDARD1_3
 using System.Data.SqlTypes;
-#endif
 using System.Diagnostics;
-using System.Numerics;
 using Apache.Arrow.Arrays;
 using Apache.Arrow.Types;
 
 namespace Apache.Arrow
 {
-    public class Decimal128Array : FixedSizeBinaryArray
+    public class Decimal128Array : FixedSizeBinaryArray, IReadOnlyList<SqlDecimal?>
     {
         public class Builder : BuilderBase<Decimal128Array, Builder>
         {
@@ -95,7 +93,6 @@ public Builder AppendRange(IEnumerable<string> values)
                 return Instance;
             }
 
-#if !NETSTANDARD1_3
             public Builder Append(SqlDecimal value)
             {
                 Span<byte> bytes = stackalloc byte[DataType.ByteWidth];
@@ -118,7 +115,6 @@ public Builder AppendRange(IEnumerable<SqlDecimal> values)
 
                 return Instance;
             }
-#endif
 
             public Builder Set(int index, decimal value)
             {
@@ -184,7 +180,6 @@ public string GetString(int index)
             return DecimalUtility.GetString(ValueBuffer, index, Precision, Scale, ByteWidth);
         }
 
-#if !NETSTANDARD1_3
         public SqlDecimal? GetSqlDecimal(int index)
         {
             if (IsNull(index))
@@ -194,6 +189,18 @@ public string GetString(int index)
 
             return DecimalUtility.GetSqlDecimal128(ValueBuffer, index, Precision, Scale);
         }
-#endif
+
+        int IReadOnlyCollection<SqlDecimal?>.Count => Length;
+        SqlDecimal? IReadOnlyList<SqlDecimal?>.this[int index] => GetSqlDecimal(index);
+
+        IEnumerator<SqlDecimal?> IEnumerable<SqlDecimal?>.GetEnumerator()
+        {
+            for (int index = 0; index < Length; index++)
+            {
+                yield return GetSqlDecimal(index);
+            }
+        }
+
+        IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<SqlDecimal>)this).GetEnumerator();
     }
 }
diff --git a/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs b/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs
index 94a47f258280e..eca2611b6f3bb 100644
--- a/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs
+++ b/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs
@@ -14,17 +14,16 @@
 // limitations under the License.
 
 using System;
+using System.Collections;
 using System.Collections.Generic;
-#if !NETSTANDARD1_3
 using System.Data.SqlTypes;
-#endif
 using System.Diagnostics;
 using Apache.Arrow.Arrays;
 using Apache.Arrow.Types;
 
 namespace Apache.Arrow
 {
-    public class Decimal256Array : FixedSizeBinaryArray
+    public class Decimal256Array : FixedSizeBinaryArray, IReadOnlyList<SqlDecimal?>, IReadOnlyList<string>
     {
         public class Builder : BuilderBase<Decimal256Array, Builder>
         {
@@ -94,7 +93,6 @@ public Builder AppendRange(IEnumerable<string> values)
                 return Instance;
             }
 
-#if !NETSTANDARD1_3
             public Builder Append(SqlDecimal value)
             {
                 Span<byte> bytes = stackalloc byte[DataType.ByteWidth];
@@ -123,7 +121,6 @@ public Builder AppendRange(IEnumerable<SqlDecimal> values)
 
                 return Instance;
             }
-#endif
 
             public Builder Set(int index, decimal value)
             {
@@ -190,7 +187,6 @@ public string GetString(int index)
             return DecimalUtility.GetString(ValueBuffer, index, Precision, Scale, ByteWidth);
         }
 
-#if !NETSTANDARD1_3
         public bool TryGetSqlDecimal(int index, out SqlDecimal? value)
         {
             if (IsNull(index))
@@ -211,6 +207,40 @@ public bool TryGetSqlDecimal(int index, out SqlDecimal? value)
             value = null;
             return false;
         }
-#endif
+
+        private SqlDecimal? GetSqlDecimal(int index)
+        {
+            SqlDecimal? value;
+            if (TryGetSqlDecimal(index, out value))
+            {
+                return value;
+            }
+
+            throw new OverflowException("decimal256 value out of range of SqlDecimal");
+        }
+
+        int IReadOnlyCollection<SqlDecimal?>.Count => Length;
+        SqlDecimal? IReadOnlyList<SqlDecimal?>.this[int index] => GetSqlDecimal(index);
+
+        IEnumerator<SqlDecimal?> IEnumerable<SqlDecimal?>.GetEnumerator()
+        {
+            for (int index = 0; index < Length; index++)
+            {
+                yield return GetSqlDecimal(index);
+            }
+        }
+
+        int IReadOnlyCollection<string>.Count => Length;
+        string? IReadOnlyList<string>.this[int index] => GetString(index);
+
+        IEnumerator<string> IEnumerable<string>.GetEnumerator()
+        {
+            for (int index = 0; index < Length; index++)
+            {
+                yield return GetString(index);
+            }
+        }
+
+        IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<string>)this).GetEnumerator();
     }
 }
diff --git a/csharp/src/Apache.Arrow/Arrays/DurationArray.cs b/csharp/src/Apache.Arrow/Arrays/DurationArray.cs
index 3649dda50cd97..f725a71e377ab 100644
--- a/csharp/src/Apache.Arrow/Arrays/DurationArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/DurationArray.cs
@@ -14,11 +14,13 @@
 // limitations under the License.
 
 using System;
+using System.Collections;
+using System.Collections.Generic;
 using Apache.Arrow.Types;
 
 namespace Apache.Arrow
 {
-    public class DurationArray : PrimitiveArray<long>
+    public class DurationArray : PrimitiveArray<long>, IReadOnlyList<TimeSpan?>
     {
         public class Builder : PrimitiveArrayBuilder<long, DurationArray, Builder>
         {
@@ -80,5 +82,18 @@ public DurationArray(ArrayData data)
         }
 
         public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+
+        int IReadOnlyCollection<TimeSpan?>.Count => Length;
+        TimeSpan? IReadOnlyList<TimeSpan?>.this[int index] => GetTimeSpan(index);
+
+        IEnumerator<TimeSpan?> IEnumerable<TimeSpan?>.GetEnumerator()
+        {
+            for (int index = 0; index < Length; index++)
+            {
+                yield return GetTimeSpan(index);
+            }
+        }
+
+        IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<TimeSpan?>)this).GetEnumerator();
     }
 }
diff --git a/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs
index 866a674bc9df8..0fa7954724f38 100644
--- a/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs
@@ -14,13 +14,14 @@
 // limitations under the License.
 
 using System;
+using System.Collections;
 using System.Collections.Generic;
 using Apache.Arrow.Memory;
 using Apache.Arrow.Types;
 
 namespace Apache.Arrow.Arrays
 {
-    public class FixedSizeBinaryArray : Array
+    public class FixedSizeBinaryArray : Array, IReadOnlyList<byte[]>
     {
         public FixedSizeBinaryArray(ArrayData data)
             : base(data)
@@ -70,6 +71,19 @@ public ReadOnlySpan<byte> GetBytes(int index)
             return ValueBuffer.Span.Slice(index * size, size);
         }
 
+        int IReadOnlyCollection<byte[]>.Count => Length;
+        byte[] IReadOnlyList<byte[]>.this[int index] => GetBytes(index).ToArray();
+
+        IEnumerator<byte[]> IEnumerable<byte[]>.GetEnumerator()
+        {
+            for (int index = 0; index < Length; index++)
+            {
+                yield return GetBytes(index).ToArray();
+            }
+        }
+
+        IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<byte[]>)this).GetEnumerator();
+
         public abstract class BuilderBase<TArray, TBuilder> : IArrowArrayBuilder<byte[], TArray, TBuilder>
             where TArray : IArrowArray
             where TBuilder : class, IArrowArrayBuilder<byte[], TArray, TBuilder>
@@ -220,7 +234,6 @@ public TBuilder SetNull(int index)
                 ValidityBuffer.Set(index, false);
                 return Instance;
             }
-
         }
     }
 }
diff --git a/csharp/src/Apache.Arrow/DecimalUtility.cs b/csharp/src/Apache.Arrow/DecimalUtility.cs
index bb3f0834fcec3..e2ab18d479edb 100644
--- a/csharp/src/Apache.Arrow/DecimalUtility.cs
+++ b/csharp/src/Apache.Arrow/DecimalUtility.cs
@@ -14,9 +14,7 @@
 // limitations under the License.
 
 using System;
-#if !NETSTANDARD1_3
 using System.Data.SqlTypes;
-#endif
 using System.Numerics;
 
 namespace Apache.Arrow
@@ -183,7 +181,6 @@ internal unsafe static string GetString(in ArrowBuffer valueBuffer, int index, i
         }
 #endif
 
-#if !NETSTANDARD1_3
         internal static SqlDecimal GetSqlDecimal128(in ArrowBuffer valueBuffer, int index, int precision, int scale)
         {
             const int byteWidth = 16;
@@ -207,7 +204,6 @@ internal static SqlDecimal GetSqlDecimal128(in ArrowBuffer valueBuffer, int inde
                 return new SqlDecimal((byte)precision, (byte)scale, false, (int)(data1 & 0xffffffff), (int)(data1 >> 32), (int)(data2 & 0xffffffff), (int)(data2 >> 32));
             }
         }
-#endif
 
         private static decimal DivideByScale(BigInteger integerValue, int scale)
         {
@@ -428,7 +424,6 @@ internal static void GetBytes(string value, int precision, int scale, int byteWi
             }
         }
 
-#if !NETSTANDARD1_3
         internal static void GetBytes(SqlDecimal value, int precision, int scale, Span<byte> bytes)
         {
             if (value.Precision != precision || value.Scale != scale)
@@ -446,6 +441,5 @@ internal static void GetBytes(SqlDecimal value, int precision, int scale, Span<b
                 longSpan[1] = (longSpan[0] == 0) ? -longSpan[1] : ~longSpan[1];
             }
         }
-#endif
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/Decimal128ArrayTests.cs b/csharp/test/Apache.Arrow.Tests/Decimal128ArrayTests.cs
index 497c9d2f6c6af..fdc07effb715f 100644
--- a/csharp/test/Apache.Arrow.Tests/Decimal128ArrayTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/Decimal128ArrayTests.cs
@@ -14,10 +14,9 @@
 // limitations under the License.
 
 using System;
-#if !NETSTANDARD1_3
+using System.Collections.Generic;
 using System.Data.SqlTypes;
 using System.Linq;
-#endif
 using Apache.Arrow.Types;
 using Xunit;
 
@@ -25,7 +24,6 @@ namespace Apache.Arrow.Tests
 {
     public class Decimal128ArrayTests
     {
-#if !NETSTANDARD1_3
         static SqlDecimal? Convert(decimal? value)
         {
             return value == null ? null : new SqlDecimal(value.Value);
@@ -35,7 +33,6 @@ public class Decimal128ArrayTests
         {
             return value == null ? null : value.Value.Value;
         }
-#endif
 
         public class Builder
         {
@@ -61,11 +58,9 @@ public void AppendThenGetGivesNull()
                     Assert.Null(array.GetValue(1));
                     Assert.Null(array.GetValue(2));
 
-#if !NETSTANDARD1_3
                     Assert.Null(array.GetSqlDecimal(0));
                     Assert.Null(array.GetSqlDecimal(1));
                     Assert.Null(array.GetSqlDecimal(2));
-#endif
                 }
             }
 
@@ -99,9 +94,7 @@ public void AppendDecimal(int count)
                     for (int i = 0; i < count; i++)
                     {
                         Assert.Equal(testData[i], array.GetValue(i));
-#if !NETSTANDARD1_3
                         Assert.Equal(Convert(testData[i]), array.GetSqlDecimal(i));
-#endif
                     }
                 }
 
@@ -120,10 +113,8 @@ public void AppendLargeDecimal()
                     Assert.Equal(large, array.GetValue(0));
                     Assert.Equal(-large, array.GetValue(1));
 
-#if !NETSTANDARD1_3
                     Assert.Equal(Convert(large), array.GetSqlDecimal(0));
                     Assert.Equal(Convert(-large), array.GetSqlDecimal(1));
-#endif
                 }
 
                 [Fact]
@@ -145,12 +136,10 @@ public void AppendMaxAndMinDecimal()
                     Assert.Equal(Decimal.MaxValue - 10, array.GetValue(2));
                     Assert.Equal(Decimal.MinValue + 10, array.GetValue(3));
 
-#if !NETSTANDARD1_3
                     Assert.Equal(Convert(Decimal.MaxValue), array.GetSqlDecimal(0));
                     Assert.Equal(Convert(Decimal.MinValue), array.GetSqlDecimal(1));
                     Assert.Equal(Convert(Decimal.MaxValue) - 10, array.GetSqlDecimal(2));
                     Assert.Equal(Convert(Decimal.MinValue) + 10, array.GetSqlDecimal(3));
-#endif
                 }
 
                 [Fact]
@@ -168,10 +157,8 @@ public void AppendFractionalDecimal()
                     Assert.Equal(fraction, array.GetValue(0));
                     Assert.Equal(-fraction, array.GetValue(1));
 
-#if !NETSTANDARD1_3
                     Assert.Equal(Convert(fraction), array.GetSqlDecimal(0));
                     Assert.Equal(Convert(-fraction), array.GetSqlDecimal(1));
-#endif
                 }
 
                 [Fact]
@@ -190,9 +177,7 @@ public void AppendRangeDecimal()
                     for (int i = 0; i < range.Length; i++)
                     {
                         Assert.Equal(range[i], array.GetValue(i));
-#if !NETSTANDARD1_3
                         Assert.Equal(Convert(range[i]), array.GetSqlDecimal(i));
-#endif
                     }
 
                     Assert.Null(array.GetValue(range.Length));
@@ -301,7 +286,6 @@ public void SwapNull()
                 }
             }
 
-#if !NETSTANDARD1_3
             public class SqlDecimals
             {
                 [Theory]
@@ -335,6 +319,12 @@ public void AppendSqlDecimal(int count)
                         Assert.Equal(testData[i], array.GetSqlDecimal(i));
                         Assert.Equal(Convert(testData[i]), array.GetValue(i));
                     }
+
+                    IReadOnlyList<SqlDecimal?> asList = array;
+                    for (int i = 0; i < asList.Count; i++)
+                    {
+                        Assert.Equal(testData[i], asList[i]);
+                    }
                 }
 
                 [Fact]
@@ -467,7 +457,6 @@ public void AppendRangeSqlDecimal()
                     Assert.Null(array.GetValue(range.Length));
                 }
             }
-#endif
         }
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/Decimal256ArrayTests.cs b/csharp/test/Apache.Arrow.Tests/Decimal256ArrayTests.cs
index 3924c73a4e2f7..baeb7ee5419b9 100644
--- a/csharp/test/Apache.Arrow.Tests/Decimal256ArrayTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/Decimal256ArrayTests.cs
@@ -14,10 +14,9 @@
 // limitations under the License.
 
 using System;
-#if !NETSTANDARD1_3
+using System.Collections.Generic;
 using System.Data.SqlTypes;
 using System.Linq;
-#endif
 using Apache.Arrow.Types;
 using Xunit;
 
@@ -25,7 +24,6 @@ namespace Apache.Arrow.Tests
 {
     public class Decimal256ArrayTests
     {
-#if !NETSTANDARD1_3
         static SqlDecimal? GetSqlDecimal(Decimal256Array array, int index)
         {
             SqlDecimal? result;
@@ -42,7 +40,11 @@ public class Decimal256ArrayTests
         {
             return value == null ? null : value.Value.Value;
         }
-#endif
+
+        static decimal? Convert(string value)
+        {
+            return value == null ? null : decimal.Parse(value);
+        }
 
         public class Builder
         {
@@ -68,11 +70,9 @@ public void AppendThenGetGivesNull()
                     Assert.Null(array.GetValue(1));
                     Assert.Null(array.GetValue(2));
 
-#if !NETSTANDARD1_3
                     Assert.Null(GetSqlDecimal(array, 0));
                     Assert.Null(GetSqlDecimal(array, 1));
                     Assert.Null(GetSqlDecimal(array, 2));
-#endif
                 }
             }
 
@@ -106,9 +106,7 @@ public void AppendDecimal(int count)
                     for (int i = 0; i < count; i++)
                     {
                         Assert.Equal(testData[i], array.GetValue(i));
-#if !NETSTANDARD1_3
                         Assert.Equal(Convert(testData[i]), GetSqlDecimal(array, i));
-#endif
                     }
                 }
 
@@ -127,10 +125,8 @@ public void AppendLargeDecimal()
                     Assert.Equal(large, array.GetValue(0));
                     Assert.Equal(-large, array.GetValue(1));
 
-#if !NETSTANDARD1_3
                     Assert.Equal(Convert(large), GetSqlDecimal(array, 0));
                     Assert.Equal(Convert(-large), GetSqlDecimal(array, 1));
-#endif
                 }
 
                 [Fact]
@@ -152,12 +148,10 @@ public void AppendMaxAndMinDecimal()
                     Assert.Equal(Decimal.MaxValue - 10, array.GetValue(2));
                     Assert.Equal(Decimal.MinValue + 10, array.GetValue(3));
 
-#if !NETSTANDARD1_3
                     Assert.Equal(Convert(Decimal.MaxValue), GetSqlDecimal(array, 0));
                     Assert.Equal(Convert(Decimal.MinValue), GetSqlDecimal(array, 1));
                     Assert.Equal(Convert(Decimal.MaxValue) - 10, GetSqlDecimal(array, 2));
                     Assert.Equal(Convert(Decimal.MinValue) + 10, GetSqlDecimal(array, 3));
-#endif
                 }
 
                 [Fact]
@@ -175,10 +169,8 @@ public void AppendFractionalDecimal()
                     Assert.Equal(fraction, array.GetValue(0));
                     Assert.Equal(-fraction, array.GetValue(1));
 
-#if !NETSTANDARD1_3
                     Assert.Equal(Convert(fraction), GetSqlDecimal(array, 0));
                     Assert.Equal(Convert(-fraction), GetSqlDecimal(array, 1));
-#endif
                 }
 
                 [Fact]
@@ -197,9 +189,7 @@ public void AppendRangeDecimal()
                     for(int i = 0; i < range.Length; i ++)
                     {
                         Assert.Equal(range[i], array.GetValue(i));
-#if !NETSTANDARD1_3
                         Assert.Equal(Convert(range[i]), GetSqlDecimal(array, i));
-#endif
                     }
 
                     Assert.Null( array.GetValue(range.Length));
@@ -308,7 +298,6 @@ public void SwapNull()
                 }
             }
 
-#if !NETSTANDARD1_3
             public class SqlDecimals
             {
                 [Theory]
@@ -342,6 +331,18 @@ public void AppendSqlDecimal(int count)
                         Assert.Equal(testData[i], GetSqlDecimal(array, i));
                         Assert.Equal(Convert(testData[i]), array.GetValue(i));
                     }
+
+                    IReadOnlyList<SqlDecimal?> asDecimalList = array;
+                    for (int i = 0; i < asDecimalList.Count; i++)
+                    {
+                        Assert.Equal(testData[i], asDecimalList[i]);
+                    }
+
+                    IReadOnlyList<string> asStringList = array;
+                    for (int i = 0; i < asStringList.Count; i++)
+                    {
+                        Assert.Equal(Convert(testData[i]?.ToString()), Convert(asStringList[i]));
+                    }
                 }
 
                 [Fact]
@@ -474,7 +475,6 @@ public void AppendRangeSqlDecimal()
                     Assert.Null(array.GetValue(range.Length));
                 }
             }
-#endif
         }
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/DecimalUtilityTests.cs b/csharp/test/Apache.Arrow.Tests/DecimalUtilityTests.cs
index 677e9b6cadfcf..1156ecb452c94 100644
--- a/csharp/test/Apache.Arrow.Tests/DecimalUtilityTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/DecimalUtilityTests.cs
@@ -14,9 +14,7 @@
 // limitations under the License.
 
 using System;
-#if !NETSTANDARD1_3
 using System.Data.SqlTypes;
-#endif
 using Apache.Arrow.Types;
 using Xunit;
 
@@ -72,8 +70,6 @@ public void Decimal256HasExpectedResultOrThrows(decimal d, int precision, int sc
 
         public class SqlDecimals
         {
-
-#if !NETSTANDARD1_3
             [Fact]
             public void NegativeSqlDecimal()
             {
@@ -119,7 +115,6 @@ public void LargeScale()
                 Assert.Equal(negative, sqlNegative);
                 Assert.Equal(digits, sqlNegative.ToString());
             }
-#endif
         }
 
         public class Strings
diff --git a/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs b/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs
index 3395ca7bc9ad7..59080d739b10b 100644
--- a/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs
@@ -113,6 +113,10 @@ public void AppendTimeSpanGivesSameTimeSpan(TimeSpan? timeSpan, DurationType typ
                 var array = builder.Build();
                 Assert.Equal(1, array.Length);
                 Assert.Equal(timeSpan, array.GetTimeSpan(0));
+
+                IReadOnlyList<TimeSpan?> asList = array;
+                Assert.Equal(1, asList.Count);
+                Assert.Equal(timeSpan, asList[0]);
             }
         }
 

From 75c6b642b5ff1ed171bc1d1a758a70098539c48e Mon Sep 17 00:00:00 2001
From: Miguel Pragier <miguelpragier@gmail.com>
Date: Fri, 15 Dec 2023 20:03:18 +0100
Subject: [PATCH 049/570] GH-39238:[Go] PATCH Prevents empty record to be
 appended to empty resultset (#39239)

### Rationale for this change
When having an empty resultset, the driver tries to include an empty record referece, that cannot be scanned.

So, any operation that relies on the returned Row(s) will trigger a "Index out of Range" error.

### What changes are included in this PR?
We're preventing to include an invalid record (that can't be scanned) in an empty resultset

### Are these changes tested?
Yes, there's a new test included

### Are there any user-facing changes?
No

**This PR contains a "Critical Fix".**
* Closes: #39238

Authored-by: miguel pragier <miguel.pragier@ebnerstolz.de>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/flight/flightsql/driver/driver.go    |  7 +--
 .../flight/flightsql/driver/driver_test.go    | 44 +++++++++++++++++++
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/go/arrow/flight/flightsql/driver/driver.go b/go/arrow/flight/flightsql/driver/driver.go
index e31e572586557..f74bfa378a303 100644
--- a/go/arrow/flight/flightsql/driver/driver.go
+++ b/go/arrow/flight/flightsql/driver/driver.go
@@ -487,9 +487,10 @@ func readEndpoint(ctx context.Context, client *flightsql.Client, endpoint *fligh
 	schema := reader.Schema()
 	var records []arrow.Record
 	for reader.Next() {
-		record := reader.Record()
-		record.Retain()
-		records = append(records, record)
+		if record := reader.Record(); record.NumRows() > 0 {
+			record.Retain()
+			records = append(records, record)
+		}
 	}
 
 	if err := reader.Err(); err != nil && !errors.Is(err, io.EOF) {
diff --git a/go/arrow/flight/flightsql/driver/driver_test.go b/go/arrow/flight/flightsql/driver/driver_test.go
index a388bf155ec99..24eb5ee6812c0 100644
--- a/go/arrow/flight/flightsql/driver/driver_test.go
+++ b/go/arrow/flight/flightsql/driver/driver_test.go
@@ -273,6 +273,50 @@ func (s *SqlTestSuite) TestQuery() {
 	wg.Wait()
 }
 
+func (s *SqlTestSuite) TestQueryWithEmptyResultset() {
+	t := s.T()
+
+	// Create and start the server
+	server, addr, err := s.createServer()
+	require.NoError(t, err)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		require.NoError(s.T(), s.startServer(server))
+	}()
+	defer s.stopServer(server)
+	time.Sleep(100 * time.Millisecond)
+
+	// Configure client
+	cfg := s.Config
+	cfg.Address = addr
+	db, err := sql.Open("flightsql", cfg.DSN())
+	require.NoError(t, err)
+	defer db.Close()
+
+	// Create the table
+	_, err = db.Exec(fmt.Sprintf(s.Statements["create table"], s.TableName))
+	require.NoError(t, err)
+
+	rows, err := db.Query(fmt.Sprintf(s.Statements["query"], s.TableName))
+	require.NoError(t, err)
+	require.False(t, rows.Next())
+
+	row := db.QueryRow(fmt.Sprintf(s.Statements["query"], s.TableName))
+	require.NotNil(t, row)
+	require.NoError(t, row.Err())
+
+	target := make(map[string]any)
+	err = row.Scan(&target)
+	require.ErrorIs(t, err, sql.ErrNoRows)
+
+	// Tear-down server
+	s.stopServer(server)
+	wg.Wait()
+}
+
 func (s *SqlTestSuite) TestPreparedQuery() {
 	t := s.T()
 

From 132b1f71ab9a4993557fb79e89824418e5e3618b Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 16 Dec 2023 23:52:43 +0900
Subject: [PATCH 050/570] GH-39246: [CI][GLib][Ruby] Use Ubuntu 22.04 not 20.04
 (#39247)

### Rationale for this change

Ubuntu 20.04 ships Ruby 2.7 but it reached EOL.
Bundler 2.5.0 or later requires Ruby 3.0 or later.

### What changes are included in this PR?

Use Ubuntu 22.04 that ships Ruby 3.0.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39246

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/ruby.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 25edec62e06eb..be30865ac7ac6 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -66,7 +66,7 @@ jobs:
       fail-fast: false
       matrix:
         ubuntu:
-          - 20.04
+          - 22.04
     env:
       UBUNTU: ${{ matrix.ubuntu }}
     steps:

From 49fde2313bc429547bc7e13886ed28c9c7fc6a84 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sat, 16 Dec 2023 11:10:04 -0500
Subject: [PATCH 051/570] GH-15060: [JS] Add LargeUtf8 type (#35780)

This pull request adds support for the LargeUTF8 type in Arrow. Now we
can create, decode, and encode these vectors. However, while the offset
vectors support 64 bit integers, note that the value buffers are limited
to a length of 32 bits meaning that LargeUTF8 vectors cannot yet be
larger than UTF8 vectors. We will see how we can address this limitation
in a follow up pull request. The issue is that JS typed arrays can be at
most 2**31-1 elements long (implementation defined). This pull request
also fixes a bug in a rounding method which prevented us from supporting
large vectors so it's already a big step forward.

Fixes #15060.
* Closes: #15060

---------

Co-authored-by: Kyle Barron <kylebarron2@gmail.com>
---
 docs/source/status.rst                    |  2 +-
 js/src/Arrow.dom.ts                       |  4 +-
 js/src/Arrow.ts                           |  3 +-
 js/src/builder.ts                         | 27 +++++-----
 js/src/builder/buffer.ts                  | 52 ++++++++----------
 js/src/builder/largeutf8.ts               | 59 ++++++++++++++++++++
 js/src/builder/list.ts                    |  4 +-
 js/src/data.ts                            | 18 +++++--
 js/src/enum.ts                            |  6 +--
 js/src/interfaces.ts                      | 22 +++++---
 js/src/ipc/metadata/json.ts               |  3 +-
 js/src/ipc/metadata/message.ts            |  3 +-
 js/src/type.ts                            | 35 +++++++++---
 js/src/util/buffer.ts                     | 10 ++--
 js/src/visitor.ts                         |  4 ++
 js/src/visitor/builderctor.ts             |  2 +
 js/src/visitor/bytelength.ts              |  3 +-
 js/src/visitor/get.ts                     | 19 ++++++-
 js/src/visitor/indexof.ts                 |  4 +-
 js/src/visitor/iterator.ts                |  4 +-
 js/src/visitor/jsontypeassembler.ts       |  5 +-
 js/src/visitor/jsonvectorassembler.ts     |  8 ++-
 js/src/visitor/set.ts                     | 23 ++++++--
 js/src/visitor/typeassembler.ts           |  5 ++
 js/src/visitor/typecomparator.ts          |  4 +-
 js/src/visitor/typector.ts                |  1 +
 js/src/visitor/vectorassembler.ts         | 26 ++++++++-
 js/src/visitor/vectorloader.ts            |  7 ++-
 js/test/data/tables.ts                    |  2 +-
 js/test/generate-test-data.ts             | 60 +++++++++++++++++----
 js/test/unit/builders/builder-tests.ts    |  1 +
 js/test/unit/builders/largeUtf8-tests.ts  | 65 +++++++++++++++++++++++
 js/test/unit/generated-data-tests.ts      |  1 +
 js/test/unit/generated-data-validators.ts | 20 +++++--
 js/test/unit/vector/vector-tests.ts       | 24 ++++++++-
 js/test/unit/visitor-tests.ts             |  6 ++-
 36 files changed, 432 insertions(+), 110 deletions(-)
 create mode 100644 js/src/builder/largeutf8.ts
 create mode 100644 js/test/unit/builders/largeUtf8-tests.ts

diff --git a/docs/source/status.rst b/docs/source/status.rst
index b8ee7eedbf284..e52e4e4cd49bc 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -66,7 +66,7 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Utf8              | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Large Utf8        | ✓     | ✓     | ✓     |            |       |  ✓    | ✓     |       |
+| Large Utf8        | ✓     | ✓     | ✓     | ✓          |       |  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Binary View       | ✓     |       | ✓     |            |       |       |       |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
diff --git a/js/src/Arrow.dom.ts b/js/src/Arrow.dom.ts
index 451bf6acb6186..9ec76fdd009f3 100644
--- a/js/src/Arrow.dom.ts
+++ b/js/src/Arrow.dom.ts
@@ -47,7 +47,7 @@ export {
     Bool,
     Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
     Float, Float16, Float32, Float64,
-    Utf8,
+    Utf8, LargeUtf8,
     Binary,
     FixedSizeBinary,
     Date_, DateDay, DateMillisecond,
@@ -96,5 +96,5 @@ export {
     TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder,
     TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder,
     UnionBuilder, DenseUnionBuilder, SparseUnionBuilder,
-    Utf8Builder,
+    Utf8Builder, LargeUtf8Builder
 } from './Arrow.js';
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index 714861e764ccb..b7e5f63a6ab5a 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -36,7 +36,7 @@ export {
     Bool,
     Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
     Float, Float16, Float32, Float64,
-    Utf8,
+    Utf8, LargeUtf8,
     Binary,
     FixedSizeBinary,
     Date_, DateDay, DateMillisecond,
@@ -78,6 +78,7 @@ export { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder,
 export { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder } from './builder/interval.js';
 export { DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder, DurationMicrosecondBuilder, DurationNanosecondBuilder } from './builder/duration.js';
 export { Utf8Builder } from './builder/utf8.js';
+export { LargeUtf8Builder } from './builder/largeutf8.js';
 export { BinaryBuilder } from './builder/binary.js';
 export { ListBuilder } from './builder/list.js';
 export { FixedSizeListBuilder } from './builder/fixedsizelist.js';
diff --git a/js/src/builder.ts b/js/src/builder.ts
index 93510eedf84ff..1a4c52f871bbf 100644
--- a/js/src/builder.ts
+++ b/js/src/builder.ts
@@ -22,7 +22,7 @@ import {
     DataType, strideForType,
     Float, Int, Decimal, FixedSizeBinary,
     Date_, Time, Timestamp, Interval, Duration,
-    Utf8, Binary, List, Map_,
+    Utf8, LargeUtf8, Binary, List, Map_,
 } from './type.js';
 import { createIsValidFunction } from './builder/valid.js';
 import { BufferBuilder, BitmapBufferBuilder, DataBufferBuilder, OffsetsBufferBuilder } from './builder/buffer.js';
@@ -198,10 +198,10 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
         return this.children.reduce((size, child) => size + child.reservedByteLength, size);
     }
 
-    declare protected _offsets: DataBufferBuilder<Int32Array>;
+    declare protected _offsets: DataBufferBuilder<T['TOffsetArray']>;
     public get valueOffsets() { return this._offsets ? this._offsets.buffer : null; }
 
-    declare protected _values: BufferBuilder<T['TArray'], any>;
+    declare protected _values: BufferBuilder<T['TArray']>;
     public get values() { return this._values ? this._values.buffer : null; }
 
     declare protected _nulls: BitmapBufferBuilder;
@@ -277,18 +277,15 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
      * @returns A `Data<T>` of the buffers and children representing the values written.
      */
     public flush(): Data<T> {
-
-        let data;
-        let typeIds;
-        let nullBitmap;
-        let valueOffsets;
+        let data: BufferBuilder<T['TArray']> | undefined;
+        let typeIds: Int8Array;
+        let nullBitmap: Uint8Array | undefined;
+        let valueOffsets: T['TOffsetArray'];
         const { type, length, nullCount, _typeIds, _offsets, _values, _nulls } = this;
 
-        if (typeIds = _typeIds?.flush(length)) { // Unions
-            // DenseUnions
+        if (typeIds = _typeIds?.flush(length)) { // Unions, DenseUnions
             valueOffsets = _offsets?.flush(length);
-        } else if (valueOffsets = _offsets?.flush(length)) { // Variable-width primitives (Binary, Utf8), and Lists
-            // Binary, Utf8
+        } else if (valueOffsets = _offsets?.flush(length)) { // Variable-width primitives (Binary, Utf8, LargeUtf8), and Lists
             data = _values?.flush(_offsets.last());
         } else { // Fixed-width primitives (Int, Float, Decimal, Time, Timestamp, Duration and Interval)
             data = _values?.flush(length);
@@ -355,13 +352,13 @@ export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary
 }
 
 /** @ignore */
-export abstract class VariableWidthBuilder<T extends Binary | Utf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
+export abstract class VariableWidthBuilder<T extends Binary | Utf8 | LargeUtf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
     protected _pendingLength = 0;
-    protected _offsets: OffsetsBufferBuilder;
+    protected _offsets: OffsetsBufferBuilder<T>;
     protected _pending: Map<number, any> | undefined;
     constructor(opts: BuilderOptions<T, TNull>) {
         super(opts);
-        this._offsets = new OffsetsBufferBuilder();
+        this._offsets = new OffsetsBufferBuilder(opts.type);
     }
     public setValue(index: number, value: T['TValue']) {
         const pending = this._pending || (this._pending = new Map());
diff --git a/js/src/builder/buffer.ts b/js/src/builder/buffer.ts
index 03d4f33349a7a..402172059682c 100644
--- a/js/src/builder/buffer.ts
+++ b/js/src/builder/buffer.ts
@@ -16,32 +16,21 @@
 // under the License.
 
 import { memcpy } from '../util/buffer.js';
-import {
-    TypedArray, TypedArrayConstructor,
-    BigIntArray, BigIntArrayConstructor
-} from '../interfaces.js';
-
-/** @ignore */ type DataValue<T> = T extends TypedArray ? number : T extends BigIntArray ? WideValue<T> : T;
-/** @ignore */ type WideValue<T extends BigIntArray> = T extends BigIntArray ? bigint | Int32Array | Uint32Array : never;
-/** @ignore */ type ArrayCtor<T extends TypedArray | BigIntArray> =
-    T extends TypedArray ? TypedArrayConstructor<T> :
-    T extends BigIntArray ? BigIntArrayConstructor<T> :
-    any;
+import { TypedArray, BigIntArray, ArrayCtor } from '../interfaces.js';
+import { DataType } from '../type.js';
 
 /** @ignore */
-const roundLengthUpToNearest64Bytes = (len: number, BPE: number) => ((((Math.ceil(len) * BPE) + 63) & ~63) || 64) / BPE;
+function roundLengthUpToNearest64Bytes(len: number, BPE: number) {
+    const bytesMinus1 = Math.ceil(len) * BPE - 1;
+    return ((bytesMinus1 - bytesMinus1 % 64 + 64) || 64) / BPE;
+}
 /** @ignore */
 const sliceOrExtendArray = <T extends TypedArray | BigIntArray>(arr: T, len = 0) => (
     arr.length >= len ? arr.subarray(0, len) : memcpy(new (arr.constructor as any)(len), arr, 0)
 ) as T;
 
 /** @ignore */
-export interface BufferBuilder<T extends TypedArray | BigIntArray = any, TValue = DataValue<T>> {
-    readonly offset: number;
-}
-
-/** @ignore */
-export class BufferBuilder<T extends TypedArray | BigIntArray = any, TValue = DataValue<T>> {
+export class BufferBuilder<T extends TypedArray | BigIntArray> {
 
     constructor(buffer: T, stride = 1) {
         this.buffer = buffer;
@@ -64,8 +53,8 @@ export class BufferBuilder<T extends TypedArray | BigIntArray = any, TValue = Da
     public get reservedByteLength() { return this.buffer.byteLength; }
 
     // @ts-ignore
-    public set(index: number, value: TValue) { return this; }
-    public append(value: TValue) { return this.set(this.length, value); }
+    public set(index: number, value: T[0]) { return this; }
+    public append(value: T[0]) { return this.set(this.length, value); }
     public reserve(extra: number) {
         if (extra > 0) {
             this.length += extra;
@@ -97,13 +86,11 @@ export class BufferBuilder<T extends TypedArray | BigIntArray = any, TValue = Da
     }
 }
 
-(BufferBuilder.prototype as any).offset = 0;
-
 /** @ignore */
-export class DataBufferBuilder<T extends TypedArray> extends BufferBuilder<T, number> {
+export class DataBufferBuilder<T extends TypedArray | BigIntArray> extends BufferBuilder<T> {
     public last() { return this.get(this.length - 1); }
-    public get(index: number) { return this.buffer[index]; }
-    public set(index: number, value: number) {
+    public get(index: number): T[0] { return this.buffer[index]; }
+    public set(index: number, value: T[0]) {
         this.reserve(index - this.length + 1);
         this.buffer[index * this.stride] = value;
         return this;
@@ -134,15 +121,18 @@ export class BitmapBufferBuilder extends DataBufferBuilder<Uint8Array> {
 }
 
 /** @ignore */
-export class OffsetsBufferBuilder extends DataBufferBuilder<Int32Array> {
-    constructor(data = new Int32Array(1)) { super(data, 1); }
-    public append(value: number) {
+export class OffsetsBufferBuilder<T extends DataType> extends DataBufferBuilder<T['TOffsetArray']> {
+    constructor(type: T) {
+        super(new type.OffsetArrayType(1), 1);
+    }
+
+    public append(value: T['TOffsetArray'][0]) {
         return this.set(this.length - 1, value);
     }
-    public set(index: number, value: number) {
+    public set(index: number, value: T['TOffsetArray'][0]) {
         const offset = this.length - 1;
         const buffer = this.reserve(index - offset + 1).buffer;
-        if (offset < index++) {
+        if (offset < index++ && offset >= 0) {
             buffer.fill(buffer[offset], offset, index);
         }
         buffer[index] = buffer[index - 1] + value;
@@ -150,7 +140,7 @@ export class OffsetsBufferBuilder extends DataBufferBuilder<Int32Array> {
     }
     public flush(length = this.length - 1) {
         if (length > this.length) {
-            this.set(length - 1, 0);
+            this.set(length - 1, this.BYTES_PER_ELEMENT > 4 ? BigInt(0) : 0);
         }
         return super.flush(length + 1);
     }
diff --git a/js/src/builder/largeutf8.ts b/js/src/builder/largeutf8.ts
new file mode 100644
index 0000000000000..fddfeaf8e7b17
--- /dev/null
+++ b/js/src/builder/largeutf8.ts
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { LargeUtf8 } from '../type.js';
+import { encodeUtf8 } from '../util/utf8.js';
+import { BufferBuilder } from './buffer.js';
+import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
+
+/** @ignore */
+export class LargeUtf8Builder<TNull = any> extends VariableWidthBuilder<LargeUtf8, TNull> {
+    constructor(opts: BuilderOptions<LargeUtf8, TNull>) {
+        super(opts);
+        this._values = new BufferBuilder(new Uint8Array(0));
+    }
+    public get byteLength(): number {
+        let size = this._pendingLength + (this.length * 4);
+        this._offsets && (size += this._offsets.byteLength);
+        this._values && (size += this._values.byteLength);
+        this._nulls && (size += this._nulls.byteLength);
+        return size;
+    }
+    public setValue(index: number, value: string) {
+        return super.setValue(index, encodeUtf8(value) as any);
+    }
+    // @ts-ignore
+    // TODO: move to largeBinaryBuilder when implemented
+    // protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number): void { }
+    protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number) {
+        const offsets = this._offsets;
+        const data = this._values.reserve(pendingLength).buffer;
+        let offset = 0;
+        for (const [index, value] of pending) {
+            if (value === undefined) {
+                offsets.set(index, BigInt(0));
+            } else {
+                const length = value.length;
+                data.set(value, offset);
+                offsets.set(index, BigInt(length));
+                offset += length;
+            }
+        }
+    }
+}
+
+// (LargeUtf8Builder.prototype as any)._flushPending = (LargeBinaryBuilder.prototype as any)._flushPending;
diff --git a/js/src/builder/list.ts b/js/src/builder/list.ts
index d83cac8e7b1c6..b2739cd5a3260 100644
--- a/js/src/builder/list.ts
+++ b/js/src/builder/list.ts
@@ -22,10 +22,10 @@ import { Builder, BuilderOptions, VariableWidthBuilder } from '../builder.js';
 
 /** @ignore */
 export class ListBuilder<T extends DataType = any, TNull = any> extends VariableWidthBuilder<List<T>, TNull> {
-    protected _offsets: OffsetsBufferBuilder;
+    protected _offsets: OffsetsBufferBuilder<List<T>>;
     constructor(opts: BuilderOptions<List<T>, TNull>) {
         super(opts);
-        this._offsets = new OffsetsBufferBuilder();
+        this._offsets = new OffsetsBufferBuilder(opts.type);
     }
     public addChild(child: Builder<T>, name = '0') {
         if (this.numChildren > 0) {
diff --git a/js/src/data.ts b/js/src/data.ts
index 1e9df71cff8a7..145ee9d049cb4 100644
--- a/js/src/data.ts
+++ b/js/src/data.ts
@@ -17,7 +17,7 @@
 
 import { Vector } from './vector.js';
 import { BufferType, Type, UnionMode } from './enum.js';
-import { DataType, strideForType } from './type.js';
+import { DataType, LargeUtf8, strideForType } from './type.js';
 import { popcnt_bit_range, truncateBitmap } from './util/bit.js';
 
 // When slicing, we do not know the null count of the sliced range without
@@ -30,11 +30,12 @@ import { popcnt_bit_range, truncateBitmap } from './util/bit.js';
 /** @ignore */ export type NullBuffer = Uint8Array | null | undefined;
 /** @ignore */ export type TypeIdsBuffer = Int8Array | ArrayLike<number> | Iterable<number> | undefined;
 /** @ignore */ export type ValueOffsetsBuffer = Int32Array | ArrayLike<number> | Iterable<number> | undefined;
+/** @ignore */ export type LargeValueOffsetsBuffer = BigInt64Array | ArrayLike<bigint> | Iterable<bigint> | undefined;
 /** @ignore */ export type DataBuffer<T extends DataType> = T['TArray'] | ArrayLike<number> | Iterable<number> | undefined;
 
 /** @ignore */
 export interface Buffers<T extends DataType> {
-    [BufferType.OFFSET]: Int32Array;
+    [BufferType.OFFSET]: T['TOffsetArray'];
     [BufferType.DATA]: T['TArray'];
     [BufferType.VALIDITY]: Uint8Array;
     [BufferType.TYPE]: T['TArray'];
@@ -264,7 +265,7 @@ import {
 } from './type.js';
 
 import { Visitor } from './visitor.js';
-import { toArrayBufferView, toInt32Array, toUint8Array } from './util/buffer.js';
+import { toArrayBufferView, toBigInt64Array, toInt32Array, toUint8Array } from './util/buffer.js';
 
 class MakeDataVisitor extends Visitor {
     public visit<T extends DataType>(props: any): Data<T> {
@@ -307,6 +308,14 @@ class MakeDataVisitor extends Visitor {
         const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props;
         return new Data(type, offset, length, nullCount, [valueOffsets, data, nullBitmap]);
     }
+    public visitLargeUtf8<T extends LargeUtf8>(props: LargeUtf8DataProps<T>) {
+        const { ['type']: type, ['offset']: offset = 0 } = props;
+        const data = toUint8Array(props['data']);
+        const nullBitmap = toUint8Array(props['nullBitmap']);
+        const valueOffsets = toBigInt64Array(props['valueOffsets']);
+        const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props;
+        return new Data(type, offset, length, nullCount, [valueOffsets, data, nullBitmap]);
+    }
     public visitBinary<T extends Binary>(props: BinaryDataProps<T>) {
         const { ['type']: type, ['offset']: offset = 0 } = props;
         const data = toUint8Array(props['data']);
@@ -436,6 +445,7 @@ interface DurationDataProps<T extends Duration> extends DataProps_<T> { data?: D
 interface FixedSizeBinaryDataProps<T extends FixedSizeBinary> extends DataProps_<T> { data?: DataBuffer<T> }
 interface BinaryDataProps<T extends Binary> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer<T> }
 interface Utf8DataProps<T extends Utf8> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer<T> }
+interface LargeUtf8DataProps<T extends LargeUtf8> extends DataProps_<T> { valueOffsets: LargeValueOffsetsBuffer | ValueOffsetsBuffer; data?: DataBuffer<T> }
 interface ListDataProps<T extends List> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; child: Data<T['valueType']> }
 interface FixedSizeListDataProps<T extends FixedSizeList> extends DataProps_<T> { child: Data<T['valueType']> }
 interface StructDataProps<T extends Struct> extends DataProps_<T> { children: Data[] }
@@ -459,6 +469,7 @@ export type DataProps<T extends DataType> = (
     T extends FixedSizeBinary /* */ ? FixedSizeBinaryDataProps<T> :
     T extends Binary /*          */ ? BinaryDataProps<T> :
     T extends Utf8 /*            */ ? Utf8DataProps<T> :
+    T extends LargeUtf8 /*       */ ? LargeUtf8DataProps<T> :
     T extends List /*            */ ? ListDataProps<T> :
     T extends FixedSizeList /*   */ ? FixedSizeListDataProps<T> :
     T extends Struct /*          */ ? StructDataProps<T> :
@@ -485,6 +496,7 @@ export function makeData<T extends Duration>(props: DurationDataProps<T>): Data<
 export function makeData<T extends FixedSizeBinary>(props: FixedSizeBinaryDataProps<T>): Data<T>;
 export function makeData<T extends Binary>(props: BinaryDataProps<T>): Data<T>;
 export function makeData<T extends Utf8>(props: Utf8DataProps<T>): Data<T>;
+export function makeData<T extends LargeUtf8>(props: LargeUtf8DataProps<T>): Data<T>;
 export function makeData<T extends List>(props: ListDataProps<T>): Data<T>;
 export function makeData<T extends FixedSizeList>(props: FixedSizeListDataProps<T>): Data<T>;
 export function makeData<T extends Struct>(props: StructDataProps<T>): Data<T>;
diff --git a/js/src/enum.ts b/js/src/enum.ts
index 2a82dd4235c51..764ea64e63338 100644
--- a/js/src/enum.ts
+++ b/js/src/enum.ts
@@ -137,8 +137,7 @@ export enum MessageHeader {
  * nested type consisting of other data types, or another data type (e.g. a
  * timestamp encoded as an int64).
  *
- * **Note**: Only enum values 0-18 (NONE through Duration) are written to an Arrow
- * IPC payload.
+ * **Note**: Only non-negative enum values are written to an Arrow IPC payload.
  *
  * The rest of the values are specified here so TypeScript can narrow the type
  * signatures further beyond the base Arrow Types. The Arrow DataTypes include
@@ -175,6 +174,7 @@ export enum Type {
     FixedSizeList = 16, /** Fixed-size list. Each value occupies the same number of bytes */
     Map = 17, /** Map of named logical types */
     Duration = 18, /** Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds. */
+    LargeUtf8 = 20, /** Large variable-length string as List<Char> */
 
     Dictionary = -1, /** Dictionary aka Category type */
     Int8 = -2,
@@ -205,7 +205,7 @@ export enum Type {
     DurationSecond = -27,
     DurationMillisecond = -28,
     DurationMicrosecond = -29,
-    DurationNanosecond = -30
+    DurationNanosecond = -30,
 }
 
 export enum BufferType {
diff --git a/js/src/interfaces.ts b/js/src/interfaces.ts
index 95c5adbb2a25e..707d01bb14cca 100644
--- a/js/src/interfaces.ts
+++ b/js/src/interfaces.ts
@@ -33,6 +33,7 @@ import type { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuil
 import type { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder } from './builder/interval.js';
 import type { DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder, DurationMicrosecondBuilder, DurationNanosecondBuilder } from './builder/duration.js';
 import type { Utf8Builder } from './builder/utf8.js';
+import type { LargeUtf8Builder } from './builder/largeutf8.js';
 import type { BinaryBuilder } from './builder/binary.js';
 import type { ListBuilder } from './builder/list.js';
 import type { FixedSizeListBuilder } from './builder/fixedsizelist.js';
@@ -98,6 +99,12 @@ export interface BigIntArrayConstructor<T extends BigIntArray> {
     from<U>(arrayLike: ArrayLike<U>, mapfn: (v: U, k: number) => bigint, thisArg?: any): T;
 }
 
+/** @ignore */
+export type ArrayCtor<T extends TypedArray | BigIntArray> =
+    T extends TypedArray ? TypedArrayConstructor<T> :
+    T extends BigIntArray ? BigIntArrayConstructor<T> :
+    any;
+
 /** @ignore */
 export type BuilderCtorArgs<
     T extends BuilderType<R, any>,
@@ -105,7 +112,7 @@ export type BuilderCtorArgs<
     TArgs extends any[] = any[],
     TCtor extends new (type: R, ...args: TArgs) => T =
     new (type: R, ...args: TArgs) => T
-    > = TCtor extends new (type: R, ...args: infer TArgs) => T ? TArgs : never;
+> = TCtor extends new (type: R, ...args: infer TArgs) => T ? TArgs : never;
 
 /**
  * Obtain the constructor function of an instance type
@@ -115,7 +122,7 @@ export type ConstructorType<
     T,
     TCtor extends new (...args: any[]) => T =
     new (...args: any[]) => T
-    > = TCtor extends new (...args: any[]) => T ? TCtor : never;
+> = TCtor extends new (...args: any[]) => T ? TCtor : never;
 
 /** @ignore */
 export type BuilderCtorType<
@@ -123,7 +130,7 @@ export type BuilderCtorType<
     R extends DataType = any,
     TCtor extends new (options: BuilderOptions<R, any>) => T =
     new (options: BuilderOptions<R, any>) => T
-    > = TCtor extends new (options: BuilderOptions<R, any>) => T ? TCtor : never;
+> = TCtor extends new (options: BuilderOptions<R, any>) => T ? TCtor : never;
 
 /** @ignore */
 export type BuilderType<T extends Type | DataType = any, TNull = any> =
@@ -201,6 +208,7 @@ export type TypeToDataType<T extends Type> = {
     [Type.Float64]: type.Float64;
     [Type.Float]: type.Float;
     [Type.Utf8]: type.Utf8;
+    [Type.LargeUtf8]: type.LargeUtf8;
     [Type.Binary]: type.Binary;
     [Type.FixedSizeBinary]: type.FixedSizeBinary;
     [Type.Date]: type.Date_;
@@ -254,6 +262,7 @@ type TypeToBuilder<T extends Type = any, TNull = any> = {
     [Type.Float64]: Float64Builder<TNull>;
     [Type.Float]: FloatBuilder<any, TNull>;
     [Type.Utf8]: Utf8Builder<TNull>;
+    [Type.LargeUtf8]: LargeUtf8Builder<TNull>;
     [Type.Binary]: BinaryBuilder<TNull>;
     [Type.FixedSizeBinary]: FixedSizeBinaryBuilder<TNull>;
     [Type.Date]: DateBuilder<any, TNull>;
@@ -307,6 +316,7 @@ type DataTypeToBuilder<T extends DataType = any, TNull = any> = {
     [Type.Float64]: T extends type.Float64 ? Float64Builder<TNull> : never;
     [Type.Float]: T extends type.Float ? FloatBuilder<T, TNull> : never;
     [Type.Utf8]: T extends type.Utf8 ? Utf8Builder<TNull> : never;
+    [Type.LargeUtf8]: T extends type.LargeUtf8 ? LargeUtf8Builder<TNull> : never;
     [Type.Binary]: T extends type.Binary ? BinaryBuilder<TNull> : never;
     [Type.FixedSizeBinary]: T extends type.FixedSizeBinary ? FixedSizeBinaryBuilder<TNull> : never;
     [Type.Date]: T extends type.Date_ ? DateBuilder<T, TNull> : never;
@@ -329,11 +339,11 @@ type DataTypeToBuilder<T extends DataType = any, TNull = any> = {
     [Type.Interval]: T extends type.Interval ? IntervalBuilder<T, TNull> : never;
     [Type.IntervalDayTime]: T extends type.IntervalDayTime ? IntervalDayTimeBuilder<TNull> : never;
     [Type.IntervalYearMonth]: T extends type.IntervalYearMonth ? IntervalYearMonthBuilder<TNull> : never;
-    [Type.Duration]: T extends type.Duration ? DurationBuilder<T, TNull>: never;
+    [Type.Duration]: T extends type.Duration ? DurationBuilder<T, TNull> : never;
     [Type.DurationSecond]: T extends type.DurationSecond ? DurationSecondBuilder<TNull> : never;
     [Type.DurationMillisecond]: T extends type.DurationMillisecond ? DurationMillisecondBuilder<TNull> : never;
-    [Type.DurationMicrosecond]: T extends type.DurationMicrosecond ? DurationMicrosecondBuilder<TNull>: never;
-    [Type.DurationNanosecond]: T extends type.DurationNanosecond ? DurationNanosecondBuilder<TNull>: never;
+    [Type.DurationMicrosecond]: T extends type.DurationMicrosecond ? DurationMicrosecondBuilder<TNull> : never;
+    [Type.DurationNanosecond]: T extends type.DurationNanosecond ? DurationNanosecondBuilder<TNull> : never;
     [Type.Map]: T extends type.Map_ ? MapBuilder<T['keyType'], T['valueType'], TNull> : never;
     [Type.List]: T extends type.List ? ListBuilder<T['valueType'], TNull> : never;
     [Type.Struct]: T extends type.Struct ? StructBuilder<T['dataTypes'], TNull> : never;
diff --git a/js/src/ipc/metadata/json.ts b/js/src/ipc/metadata/json.ts
index f1f306730ddba..b669c0c612f8a 100644
--- a/js/src/ipc/metadata/json.ts
+++ b/js/src/ipc/metadata/json.ts
@@ -20,7 +20,7 @@
 import { Schema, Field } from '../../schema.js';
 import {
     DataType, Dictionary, TimeBitWidth,
-    Utf8, Binary, Decimal, FixedSizeBinary,
+    Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary,
     List, FixedSizeList, Map_, Struct, Union,
     Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, Duration,
 } from '../../type.js';
@@ -150,6 +150,7 @@ function typeFromJSON(f: any, children?: Field[]): DataType<any> {
         case 'null': return new Null();
         case 'binary': return new Binary();
         case 'utf8': return new Utf8();
+        case 'largeutf8': return new LargeUtf8();
         case 'bool': return new Bool();
         case 'list': return new List((children || [])[0]);
         case 'struct': return new Struct(children || []);
diff --git a/js/src/ipc/metadata/message.ts b/js/src/ipc/metadata/message.ts
index 27c9b92d6897b..cf05bff54cfba 100644
--- a/js/src/ipc/metadata/message.ts
+++ b/js/src/ipc/metadata/message.ts
@@ -56,7 +56,7 @@ import ByteBuffer = flatbuffers.ByteBuffer;
 
 import {
     DataType, Dictionary, TimeBitWidth,
-    Utf8, Binary, Decimal, FixedSizeBinary,
+    Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary,
     List, FixedSizeList, Map_, Struct, Union,
     Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, Duration,
 } from '../../type.js';
@@ -433,6 +433,7 @@ function decodeFieldType(f: _Field, children?: Field[]): DataType<any> {
         case Type['Null']: return new Null();
         case Type['Binary']: return new Binary();
         case Type['Utf8']: return new Utf8();
+        case Type['LargeUtf8']: return new LargeUtf8();
         case Type['Bool']: return new Bool();
         case Type['List']: return new List((children || [])[0]);
         case Type['Struct_']: return new Struct(children || []);
diff --git a/js/src/type.ts b/js/src/type.ts
index 34bbf45bca728..6223d0316f17a 100644
--- a/js/src/type.ts
+++ b/js/src/type.ts
@@ -19,7 +19,7 @@ import { Field } from './schema.js';
 import { Vector } from './vector.js';
 import { MapRow } from './row/map.js';
 import { StructRow, StructRowProxy } from './row/struct.js';
-import { TypedArrayConstructor } from './interfaces.js';
+import { ArrayCtor, BigIntArrayConstructor, TypedArrayConstructor } from './interfaces.js';
 import { bigIntToNumber } from './util/bigint.js';
 
 import {
@@ -38,9 +38,11 @@ export type IsSigned = { 'true': true; 'false': false };
 export interface DataType<TType extends Type = Type, TChildren extends TypeMap = any> {
     readonly TType: TType;
     readonly TArray: any;
+    readonly TOffsetArray: any;
     readonly TValue: any;
     readonly TChildren: TChildren;
     readonly ArrayType: any;
+    readonly OffsetArrayType: ArrayCtor<Int32Array | BigInt64Array>;
     readonly children: Field<TChildren[keyof TChildren]>[];
 }
 
@@ -57,6 +59,7 @@ export abstract class DataType<TType extends Type = Type, TChildren extends Type
     /** @nocollapse */ static isFloat(x: any): x is Float { return x?.typeId === Type.Float; }
     /** @nocollapse */ static isBinary(x: any): x is Binary { return x?.typeId === Type.Binary; }
     /** @nocollapse */ static isUtf8(x: any): x is Utf8 { return x?.typeId === Type.Utf8; }
+    /** @nocollapse */ static isLargeUtf8(x: any): x is LargeUtf8 { return x?.typeId === Type.LargeUtf8; }
     /** @nocollapse */ static isBool(x: any): x is Bool { return x?.typeId === Type.Bool; }
     /** @nocollapse */ static isDecimal(x: any): x is Decimal { return x?.typeId === Type.Decimal; }
     /** @nocollapse */ static isDate(x: any): x is Date_ { return x?.typeId === Type.Date; }
@@ -80,6 +83,7 @@ export abstract class DataType<TType extends Type = Type, TChildren extends Type
     protected static [Symbol.toStringTag] = ((proto: DataType) => {
         (<any>proto).children = null;
         (<any>proto).ArrayType = Array;
+        (<any>proto).OffsetArrayType = Int32Array;
         return proto[Symbol.toStringTag] = 'DataType';
     })(DataType.prototype);
 }
@@ -232,7 +236,7 @@ Object.defineProperty(Float32.prototype, 'ArrayType', { value: Float32Array });
 Object.defineProperty(Float64.prototype, 'ArrayType', { value: Float64Array });
 
 /** @ignore */
-export interface Binary extends DataType<Type.Binary> { TArray: Uint8Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array> }
+export interface Binary extends DataType<Type.Binary> { TArray: Uint8Array; TOffsetArray: Int32Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array>; OffsetArrayType: TypedArrayConstructor<Int32Array> }
 /** @ignore */
 export class Binary extends DataType<Type.Binary> {
     constructor() {
@@ -247,7 +251,7 @@ export class Binary extends DataType<Type.Binary> {
 }
 
 /** @ignore */
-export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TValue: string; ArrayType: TypedArrayConstructor<Uint8Array> }
+export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TOffsetArray: Int32Array; TValue: string; ArrayType: TypedArrayConstructor<Uint8Array>; OffsetArrayType: TypedArrayConstructor<Int32Array> }
 /** @ignore */
 export class Utf8 extends DataType<Type.Utf8> {
     constructor() {
@@ -261,6 +265,22 @@ export class Utf8 extends DataType<Type.Utf8> {
     })(Utf8.prototype);
 }
 
+/** @ignore */
+export interface LargeUtf8 extends DataType<Type.LargeUtf8> { TArray: Uint8Array; TOffsetArray: BigInt64Array; TValue: string; ArrayType: TypedArrayConstructor<Uint8Array>; OffsetArrayType: BigIntArrayConstructor<BigInt64Array> }
+/** @ignore */
+export class LargeUtf8 extends DataType<Type.LargeUtf8> {
+    constructor() {
+        super();
+    }
+    public get typeId() { return Type.LargeUtf8 as Type.LargeUtf8; }
+    public toString() { return `LargeUtf8`; }
+    protected static [Symbol.toStringTag] = ((proto: LargeUtf8) => {
+        (<any>proto).ArrayType = Uint8Array;
+        (<any>proto).OffsetArrayType = BigInt64Array;
+        return proto[Symbol.toStringTag] = 'LargeUtf8';
+    })(LargeUtf8.prototype);
+}
+
 /** @ignore */
 export interface Bool extends DataType<Type.Bool> { TArray: Uint8Array; TValue: boolean; ArrayType: TypedArrayConstructor<Uint8Array> }
 /** @ignore */
@@ -458,13 +478,13 @@ export class Duration<T extends Durations = Durations> extends DataType<T> {
 }
 
 /** @ignore */
-export class DurationSecond extends Duration<Type.DurationSecond> { constructor() { super(TimeUnit.SECOND); }}
+export class DurationSecond extends Duration<Type.DurationSecond> { constructor() { super(TimeUnit.SECOND); } }
 /** @ignore */
-export class DurationMillisecond extends Duration<Type.DurationMillisecond> { constructor() { super(TimeUnit.MILLISECOND); }}
+export class DurationMillisecond extends Duration<Type.DurationMillisecond> { constructor() { super(TimeUnit.MILLISECOND); } }
 /** @ignore */
-export class DurationMicrosecond extends Duration<Type.DurationMicrosecond> { constructor() { super(TimeUnit.MICROSECOND); }}
+export class DurationMicrosecond extends Duration<Type.DurationMicrosecond> { constructor() { super(TimeUnit.MICROSECOND); } }
 /** @ignore */
-export class DurationNanosecond extends Duration<Type.DurationNanosecond> { constructor() { super(TimeUnit.NANOSECOND); }}
+export class DurationNanosecond extends Duration<Type.DurationNanosecond> { constructor() { super(TimeUnit.NANOSECOND); } }
 
 
 /** @ignore */
@@ -581,6 +601,7 @@ export class FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
     protected static [Symbol.toStringTag] = ((proto: FixedSizeBinary) => {
         (<any>proto).byteWidth = null;
         (<any>proto).ArrayType = Uint8Array;
+        (<any>proto).OffsetArrayType = Int32Array;
         return proto[Symbol.toStringTag] = 'FixedSizeBinary';
     })(FixedSizeBinary.prototype);
 }
diff --git a/js/src/util/buffer.ts b/js/src/util/buffer.ts
index dd8edf11f9258..4f4379dedf6d8 100644
--- a/js/src/util/buffer.ts
+++ b/js/src/util/buffer.ts
@@ -83,9 +83,9 @@ export function joinUint8Arrays(chunks: Uint8Array[], size?: number | null): [Ui
 }
 
 /** @ignore */
-export type ArrayBufferViewInput = ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable<number> | ArrayLike<number> | ByteBuffer | string | null | undefined |
-    IteratorResult<ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable<number> | ArrayLike<number> | ByteBuffer | string | null | undefined> |
-    ReadableStreamReadResult<ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable<number> | ArrayLike<number> | ByteBuffer | string | null | undefined>;
+export type ArrayBufferViewInput = ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable<number> | Iterable<bigint> | ArrayLike<number> | ArrayLike<bigint> | ByteBuffer | string | null | undefined |
+    IteratorResult<ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable<number> | Iterable<bigint> | ArrayLike<number> | ArrayLike<bigint> | ByteBuffer | string | null | undefined> |
+    ReadableStreamReadResult<ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable<number> | Iterable<bigint> | ArrayLike<number> | ArrayLike<bigint> | ByteBuffer | string | null | undefined>;
 
 /** @ignore */
 export function toArrayBufferView<
@@ -208,7 +208,9 @@ export async function* toArrayBufferViewAsyncIterator<T extends TypedArray>(Arra
 /** @ignore */ export const toUint8ClampedArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Uint8ClampedArray, input);
 
 /** @ignore */
-export function rebaseValueOffsets(offset: number, length: number, valueOffsets: Int32Array) {
+export function rebaseValueOffsets(offset: number, length: number, valueOffsets: Int32Array): Int32Array;
+export function rebaseValueOffsets(offset: number, length: number, valueOffsets: BigInt64Array): BigInt64Array;
+export function rebaseValueOffsets(offset: number, length: number, valueOffsets: any) {
     // If we have a non-zero offset, create a new offsets array with the values
     // shifted by the start offset, such that the new start offset is 0
     if (offset !== 0) {
diff --git a/js/src/visitor.ts b/js/src/visitor.ts
index c63640b038e47..5b3cc4d3d0593 100644
--- a/js/src/visitor.ts
+++ b/js/src/visitor.ts
@@ -36,6 +36,7 @@ export abstract class Visitor {
     public visitInt(_node: any, ..._args: any[]): any { return null; }
     public visitFloat(_node: any, ..._args: any[]): any { return null; }
     public visitUtf8(_node: any, ..._args: any[]): any { return null; }
+    public visitLargeUtf8(_node: any, ..._args: any[]): any { return null; }
     public visitBinary(_node: any, ..._args: any[]): any { return null; }
     public visitFixedSizeBinary(_node: any, ..._args: any[]): any { return null; }
     public visitDate(_node: any, ..._args: any[]): any { return null; }
@@ -89,6 +90,7 @@ function getVisitFnByTypeId(visitor: Visitor, dtype: Type, throwIfNotFound = tru
         case Type.Float32: fn = visitor.visitFloat32 || visitor.visitFloat; break;
         case Type.Float64: fn = visitor.visitFloat64 || visitor.visitFloat; break;
         case Type.Utf8: fn = visitor.visitUtf8; break;
+        case Type.LargeUtf8: fn = visitor.visitLargeUtf8; break;
         case Type.Binary: fn = visitor.visitBinary; break;
         case Type.FixedSizeBinary: fn = visitor.visitFixedSizeBinary; break;
         case Type.Date: fn = visitor.visitDate; break;
@@ -152,6 +154,7 @@ function inferDType<T extends DataType>(type: T): Type {
             return Type.Float;
         case Type.Binary: return Type.Binary;
         case Type.Utf8: return Type.Utf8;
+        case Type.LargeUtf8: return Type.LargeUtf8;
         case Type.Bool: return Type.Bool;
         case Type.Decimal: return Type.Decimal;
         case Type.Time:
@@ -229,6 +232,7 @@ export interface Visitor {
     visitFloat32?(node: any, ...args: any[]): any;
     visitFloat64?(node: any, ...args: any[]): any;
     visitUtf8(node: any, ...args: any[]): any;
+    visitLargeUtf8(node: any, ...args: any[]): any;
     visitBinary(node: any, ...args: any[]): any;
     visitFixedSizeBinary(node: any, ...args: any[]): any;
     visitDate(node: any, ...args: any[]): any;
diff --git a/js/src/visitor/builderctor.ts b/js/src/visitor/builderctor.ts
index 54b5610a50eed..83374712b2642 100644
--- a/js/src/visitor/builderctor.ts
+++ b/js/src/visitor/builderctor.ts
@@ -40,6 +40,7 @@ import { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder,
 import { TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder } from '../builder/time.js';
 import { UnionBuilder, DenseUnionBuilder, SparseUnionBuilder } from '../builder/union.js';
 import { Utf8Builder } from '../builder/utf8.js';
+import { LargeUtf8Builder } from '../builder/largeutf8.js';
 
 /** @ignore */
 export interface GetBuilderCtor extends Visitor {
@@ -67,6 +68,7 @@ export class GetBuilderCtor extends Visitor {
     public visitFloat32() { return Float32Builder; }
     public visitFloat64() { return Float64Builder; }
     public visitUtf8() { return Utf8Builder; }
+    public visitLargeUtf8() { return LargeUtf8Builder; }
     public visitBinary() { return BinaryBuilder; }
     public visitFixedSizeBinary() { return FixedSizeBinaryBuilder; }
     public visitDate() { return DateBuilder; }
diff --git a/js/src/visitor/bytelength.ts b/js/src/visitor/bytelength.ts
index 72d6148a52fd8..c3bfadd50e155 100644
--- a/js/src/visitor/bytelength.ts
+++ b/js/src/visitor/bytelength.ts
@@ -26,7 +26,7 @@ import { Type, TimeUnit, UnionMode } from '../enum.js';
 import {
     DataType, Dictionary,
     Float, Int, Date_, Interval, Time, Timestamp, Duration,
-    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary,
+    Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary,
     List, FixedSizeList, Map_, Struct, Union, DenseUnion, SparseUnion,
 } from '../type.js';
 
@@ -40,6 +40,7 @@ export interface GetByteLengthVisitor extends Visitor {
     getVisitFn<T extends Type>(node: T): (data: Data<TypeToDataType<T>>, index: number) => number;
     visitBinary<T extends Binary>(data: Data<T>, index: number): number;
     visitUtf8<T extends Utf8>(data: Data<T>, index: number): number;
+    visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, index: number): number;
     visitList<T extends List>(data: Data<T>, index: number): number;
     visitDenseUnion<T extends DenseUnion>(data: Data<T>, index: number): number;
     visitSparseUnion<T extends SparseUnion>(data: Data<T>, index: number): number;
diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts
index 5aaaedf51a37e..a801c90047c89 100644
--- a/js/src/visitor/get.ts
+++ b/js/src/visitor/get.ts
@@ -21,6 +21,7 @@ import { Vector } from '../vector.js';
 import { Visitor } from '../visitor.js';
 import { MapRow } from '../row/map.js';
 import { StructRow, StructRowProxy } from '../row/struct.js';
+import { bigIntToNumber } from '../util/bigint.js';
 import { decodeUtf8 } from '../util/utf8.js';
 import { TypeToDataType } from '../interfaces.js';
 import { uint16ToFloat64 } from '../util/math.js';
@@ -35,7 +36,7 @@ import {
     Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
     Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
     Duration, DurationSecond, DurationMillisecond, DurationMicrosecond, DurationNanosecond,
-    Union, DenseUnion, SparseUnion,
+    Union, DenseUnion, SparseUnion, LargeUtf8,
 } from '../type.js';
 
 /** @ignore */
@@ -60,6 +61,7 @@ export interface GetVisitor extends Visitor {
     visitFloat32<T extends Float32>(data: Data<T>, index: number): T['TValue'] | null;
     visitFloat64<T extends Float64>(data: Data<T>, index: number): T['TValue'] | null;
     visitUtf8<T extends Utf8>(data: Data<T>, index: number): T['TValue'] | null;
+    visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, index: number): T['TValue'] | null;
     visitBinary<T extends Binary>(data: Data<T>, index: number): T['TValue'] | null;
     visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>, index: number): T['TValue'] | null;
     visitDate<T extends Date_>(data: Data<T>, index: number): T['TValue'] | null;
@@ -122,6 +124,15 @@ const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array, ind
     const y = valueOffsets[index + 1];
     return values.subarray(x, y);
 };
+/** @ignore */
+const getLargeVariableWidthBytes = (values: Uint8Array, valueOffsets: BigInt64Array, index: number) => {
+    if (index + 1 >= valueOffsets.length) {
+        return null as any;
+    }
+    const x = bigIntToNumber(valueOffsets[index]);
+    const y = bigIntToNumber(valueOffsets[index + 1]);
+    return values.subarray(x, y);
+};
 
 /** @ignore */
 const getBool = <T extends Bool>({ offset, values }: Data<T>, index: number): T['TValue'] => {
@@ -155,6 +166,11 @@ const getUtf8 = <T extends Utf8>({ values, valueOffsets }: Data<T>, index: numbe
     const bytes = getVariableWidthBytes(values, valueOffsets, index);
     return bytes !== null ? decodeUtf8(bytes) : null as any;
 };
+/** @ignore */
+const getLargeUtf8 = <T extends LargeUtf8>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => {
+    const bytes = getLargeVariableWidthBytes(values, valueOffsets, index);
+    return bytes !== null ? decodeUtf8(bytes) : null as any;
+};
 
 /* istanbul ignore next */
 /** @ignore */
@@ -328,6 +344,7 @@ GetVisitor.prototype.visitFloat16 = wrapGet(getFloat16);
 GetVisitor.prototype.visitFloat32 = wrapGet(getNumeric);
 GetVisitor.prototype.visitFloat64 = wrapGet(getNumeric);
 GetVisitor.prototype.visitUtf8 = wrapGet(getUtf8);
+GetVisitor.prototype.visitLargeUtf8 = wrapGet(getLargeUtf8);
 GetVisitor.prototype.visitBinary = wrapGet(getBinary);
 GetVisitor.prototype.visitFixedSizeBinary = wrapGet(getFixedSizeBinary);
 GetVisitor.prototype.visitDate = wrapGet(getDate);
diff --git a/js/src/visitor/indexof.ts b/js/src/visitor/indexof.ts
index 4cf0076b3c8e2..76f95788c7953 100644
--- a/js/src/visitor/indexof.ts
+++ b/js/src/visitor/indexof.ts
@@ -24,7 +24,7 @@ import { getBool, BitIterator } from '../util/bit.js';
 import { createElementComparator } from '../util/vector.js';
 import {
     DataType, Dictionary,
-    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
     Float, Float16, Float32, Float64,
     Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
     Date_, DateDay, DateMillisecond,
@@ -57,6 +57,7 @@ export interface IndexOfVisitor extends Visitor {
     visitFloat32<T extends Float32>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
     visitFloat64<T extends Float64>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
     visitUtf8<T extends Utf8>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
+    visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
     visitBinary<T extends Binary>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
     visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
     visitDate<T extends Date_>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
@@ -172,6 +173,7 @@ IndexOfVisitor.prototype.visitFloat16 = indexOfValue;
 IndexOfVisitor.prototype.visitFloat32 = indexOfValue;
 IndexOfVisitor.prototype.visitFloat64 = indexOfValue;
 IndexOfVisitor.prototype.visitUtf8 = indexOfValue;
+IndexOfVisitor.prototype.visitLargeUtf8 = indexOfValue;
 IndexOfVisitor.prototype.visitBinary = indexOfValue;
 IndexOfVisitor.prototype.visitFixedSizeBinary = indexOfValue;
 IndexOfVisitor.prototype.visitDate = indexOfValue;
diff --git a/js/src/visitor/iterator.ts b/js/src/visitor/iterator.ts
index e38bb907695d0..09dfcb0b565ae 100644
--- a/js/src/visitor/iterator.ts
+++ b/js/src/visitor/iterator.ts
@@ -21,7 +21,7 @@ import { Type, Precision } from '../enum.js';
 import { TypeToDataType } from '../interfaces.js';
 import {
     DataType, Dictionary,
-    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
     Float, Float16, Float32, Float64,
     Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
     Date_, DateDay, DateMillisecond,
@@ -55,6 +55,7 @@ export interface IteratorVisitor extends Visitor {
     visitFloat32<T extends Float32>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
     visitFloat64<T extends Float64>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
     visitUtf8<T extends Utf8>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitLargeUtf8<T extends LargeUtf8>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
     visitBinary<T extends Binary>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
     visitFixedSizeBinary<T extends FixedSizeBinary>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
     visitDate<T extends Date_>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
@@ -158,6 +159,7 @@ IteratorVisitor.prototype.visitFloat16 = vectorIterator;
 IteratorVisitor.prototype.visitFloat32 = vectorIterator;
 IteratorVisitor.prototype.visitFloat64 = vectorIterator;
 IteratorVisitor.prototype.visitUtf8 = vectorIterator;
+IteratorVisitor.prototype.visitLargeUtf8 = vectorIterator;
 IteratorVisitor.prototype.visitBinary = vectorIterator;
 IteratorVisitor.prototype.visitFixedSizeBinary = vectorIterator;
 IteratorVisitor.prototype.visitDate = vectorIterator;
diff --git a/js/src/visitor/jsontypeassembler.ts b/js/src/visitor/jsontypeassembler.ts
index 6e6cfb07413c3..a6746a858ecb4 100644
--- a/js/src/visitor/jsontypeassembler.ts
+++ b/js/src/visitor/jsontypeassembler.ts
@@ -48,6 +48,9 @@ export class JSONTypeAssembler extends Visitor {
     public visitUtf8<T extends type.Utf8>({ typeId }: T) {
         return { 'name': ArrowType[typeId].toLowerCase() };
     }
+    public visitLargeUtf8<T extends type.LargeUtf8>({ typeId }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase() };
+    }
     public visitDecimal<T extends type.Decimal>({ typeId, scale, precision, bitWidth }: T) {
         return { 'name': ArrowType[typeId].toLowerCase(), 'scale': scale, 'precision': precision, 'bitWidth': bitWidth };
     }
@@ -64,7 +67,7 @@ export class JSONTypeAssembler extends Visitor {
         return { 'name': ArrowType[typeId].toLowerCase(), 'unit': IntervalUnit[unit] };
     }
     public visitDuration<T extends type.Duration>({ typeId, unit }: T) {
-        return { 'name': ArrowType[typeId].toLocaleLowerCase(), 'unit': TimeUnit[unit]};
+        return { 'name': ArrowType[typeId].toLocaleLowerCase(), 'unit': TimeUnit[unit] };
     }
     public visitList<T extends type.List>({ typeId }: T) {
         return { 'name': ArrowType[typeId].toLowerCase() };
diff --git a/js/src/visitor/jsonvectorassembler.ts b/js/src/visitor/jsonvectorassembler.ts
index 0af954e4adacc..9a3cb8601a434 100644
--- a/js/src/visitor/jsonvectorassembler.ts
+++ b/js/src/visitor/jsonvectorassembler.ts
@@ -27,7 +27,7 @@ import { BitIterator, getBit, getBool } from '../util/bit.js';
 import {
     DataType,
     Float, Int, Date_, Interval, Time, Timestamp, Union, Duration,
-    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, IntArray,
+    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, IntArray, LargeUtf8,
 } from '../type.js';
 
 /** @ignore */
@@ -42,6 +42,7 @@ export interface JSONVectorAssembler extends Visitor {
     visitInt<T extends Int>(data: Data<T>): { DATA: number[] | string[] };
     visitFloat<T extends Float>(data: Data<T>): { DATA: number[] };
     visitUtf8<T extends Utf8>(data: Data<T>): { DATA: string[]; OFFSET: number[] };
+    visitLargeUtf8<T extends LargeUtf8>(data: Data<T>): { DATA: string[]; OFFSET: string[] };
     visitBinary<T extends Binary>(data: Data<T>): { DATA: string[]; OFFSET: number[] };
     visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>): { DATA: string[] };
     visitDate<T extends Date_>(data: Data<T>): { DATA: number[] };
@@ -100,6 +101,9 @@ export class JSONVectorAssembler extends Visitor {
     public visitUtf8<T extends Utf8>(data: Data<T>) {
         return { 'DATA': [...new Vector([data])], 'OFFSET': [...data.valueOffsets] };
     }
+    public visitLargeUtf8<T extends LargeUtf8>(data: Data<T>) {
+        return { 'DATA': [...new Vector([data])], 'OFFSET': [...bigNumsToStrings(data.valueOffsets, 2)] };
+    }
     public visitBinary<T extends Binary>(data: Data<T>) {
         return { 'DATA': [...binaryToString(new Vector([data]))], OFFSET: [...data.valueOffsets] };
     }
@@ -148,7 +152,7 @@ export class JSONVectorAssembler extends Visitor {
         return { 'DATA': [...data.values] };
     }
     public visitDuration<T extends Duration>(data: Data<T>) {
-        return { 'DATA': [...bigNumsToStrings(data.values, 2)]};
+        return { 'DATA': [...bigNumsToStrings(data.values, 2)] };
     }
     public visitFixedSizeList<T extends FixedSizeList>(data: Data<T>) {
         return {
diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts
index 1a0eddc556899..a439ec8311fd6 100644
--- a/js/src/visitor/set.ts
+++ b/js/src/visitor/set.ts
@@ -19,13 +19,14 @@ import { Data } from '../data.js';
 import { Field } from '../schema.js';
 import { Vector } from '../vector.js';
 import { Visitor } from '../visitor.js';
+import { bigIntToNumber } from '../util/bigint.js';
 import { encodeUtf8 } from '../util/utf8.js';
 import { TypeToDataType } from '../interfaces.js';
 import { float64ToUint16 } from '../util/math.js';
 import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum.js';
 import {
     DataType, Dictionary,
-    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
     Float, Float16, Float32, Float64,
     Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
     Date_, DateDay, DateMillisecond,
@@ -58,6 +59,7 @@ export interface SetVisitor extends Visitor {
     visitFloat32<T extends Float32>(data: Data<T>, index: number, value: T['TValue']): void;
     visitFloat64<T extends Float64>(data: Data<T>, index: number, value: T['TValue']): void;
     visitUtf8<T extends Utf8>(data: Data<T>, index: number, value: T['TValue']): void;
+    visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, index: number, value: T['TValue']): void;
     visitBinary<T extends Binary>(data: Data<T>, index: number, value: T['TValue']): void;
     visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>, index: number, value: T['TValue']): void;
     visitDate<T extends Date_>(data: Data<T>, index: number, value: T['TValue']): void;
@@ -123,9 +125,19 @@ export const setEpochMsToNanosecondsLong = (data: Int32Array, index: number, epo
 };
 
 /** @ignore */
-export const setVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array, index: number, value: Uint8Array) => {
+export const setVariableWidthBytes = <T extends Int32Array>(values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
     if (index + 1 < valueOffsets.length) {
-        const { [index]: x, [index + 1]: y } = valueOffsets;
+        const x = valueOffsets[index];
+        const y = valueOffsets[index + 1];
+        values.set(value.subarray(0, y - x), x);
+    }
+};
+
+/** @ignore */
+export const setLargeVariableWidthBytes = <T extends BigInt64Array>(values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
+    if (index + 1 < valueOffsets.length) {
+        const x = bigIntToNumber(valueOffsets[index]);
+        const y = bigIntToNumber(valueOffsets[index + 1]);
         values.set(value.subarray(0, y - x), x);
     }
 };
@@ -167,6 +179,10 @@ const setBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index: n
 const setUtf8 = <T extends Utf8>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => {
     setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
 };
+/** @ignore */
+const setLargeUtf8 = <T extends LargeUtf8>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => {
+    setLargeVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
+};
 
 /* istanbul ignore next */
 export const setDate = <T extends Date_>(data: Data<T>, index: number, value: T['TValue']): void => {
@@ -365,6 +381,7 @@ SetVisitor.prototype.visitFloat16 = wrapSet(setFloat16);
 SetVisitor.prototype.visitFloat32 = wrapSet(setFloat);
 SetVisitor.prototype.visitFloat64 = wrapSet(setFloat);
 SetVisitor.prototype.visitUtf8 = wrapSet(setUtf8);
+SetVisitor.prototype.visitLargeUtf8 = wrapSet(setLargeUtf8);
 SetVisitor.prototype.visitBinary = wrapSet(setBinary);
 SetVisitor.prototype.visitFixedSizeBinary = wrapSet(setFixedSizeBinary);
 SetVisitor.prototype.visitDate = wrapSet(setDate);
diff --git a/js/src/visitor/typeassembler.ts b/js/src/visitor/typeassembler.ts
index c2262d20531b9..f072714222739 100644
--- a/js/src/visitor/typeassembler.ts
+++ b/js/src/visitor/typeassembler.ts
@@ -27,6 +27,7 @@ import { FloatingPoint } from '../fb/floating-point.js';
 import { Binary } from '../fb/binary.js';
 import { Bool } from '../fb/bool.js';
 import { Utf8 } from '../fb/utf8.js';
+import { LargeUtf8 } from '../fb/large-utf8.js';
 import { Decimal } from '../fb/decimal.js';
 import { Date } from '../fb/date.js';
 import { Time } from '../fb/time.js';
@@ -78,6 +79,10 @@ export class TypeAssembler extends Visitor {
         Utf8.startUtf8(b);
         return Utf8.endUtf8(b);
     }
+    public visitLargeUtf8<T extends type.LargeUtf8>(_node: T, b: Builder) {
+        LargeUtf8.startLargeUtf8(b);
+        return LargeUtf8.endLargeUtf8(b);
+    }
     public visitDecimal<T extends type.Decimal>(node: T, b: Builder) {
         Decimal.startDecimal(b);
         Decimal.addScale(b, node.scale);
diff --git a/js/src/visitor/typecomparator.ts b/js/src/visitor/typecomparator.ts
index 1de8e218dae4f..2417dec09c6e9 100644
--- a/js/src/visitor/typecomparator.ts
+++ b/js/src/visitor/typecomparator.ts
@@ -21,7 +21,7 @@ import { Visitor } from '../visitor.js';
 import { Schema, Field } from '../schema.js';
 import {
     DataType, TypeMap, Dictionary,
-    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
     Float, Float16, Float32, Float64,
     Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
     Date_, DateDay, DateMillisecond,
@@ -53,6 +53,7 @@ export interface TypeComparator extends Visitor {
     visitFloat32<T extends Float32>(type: T, other?: DataType | null): other is T;
     visitFloat64<T extends Float64>(type: T, other?: DataType | null): other is T;
     visitUtf8<T extends Utf8>(type: T, other?: DataType | null): other is T;
+    visitLargeUtf8<T extends LargeUtf8>(type: T, other?: DataType | null): other is T;
     visitBinary<T extends Binary>(type: T, other?: DataType | null): other is T;
     visitFixedSizeBinary<T extends FixedSizeBinary>(type: T, other?: DataType | null): other is T;
     visitDate<T extends Date_>(type: T, other?: DataType | null): other is T;
@@ -249,6 +250,7 @@ TypeComparator.prototype.visitFloat16 = compareFloat;
 TypeComparator.prototype.visitFloat32 = compareFloat;
 TypeComparator.prototype.visitFloat64 = compareFloat;
 TypeComparator.prototype.visitUtf8 = compareAny;
+TypeComparator.prototype.visitLargeUtf8 = compareAny;
 TypeComparator.prototype.visitBinary = compareAny;
 TypeComparator.prototype.visitFixedSizeBinary = compareFixedSizeBinary;
 TypeComparator.prototype.visitDate = compareDate;
diff --git a/js/src/visitor/typector.ts b/js/src/visitor/typector.ts
index 077f66592fbfb..2e0bbc4147abb 100644
--- a/js/src/visitor/typector.ts
+++ b/js/src/visitor/typector.ts
@@ -49,6 +49,7 @@ export class GetDataTypeConstructor extends Visitor {
     public visitFloat32() { return type.Float32; }
     public visitFloat64() { return type.Float64; }
     public visitUtf8() { return type.Utf8; }
+    public visitLargeUtf8() { return type.LargeUtf8; }
     public visitBinary() { return type.Binary; }
     public visitFixedSizeBinary() { return type.FixedSizeBinary; }
     public visitDate() { return type.Date_; }
diff --git a/js/src/visitor/vectorassembler.ts b/js/src/visitor/vectorassembler.ts
index 949463272e718..7a9d3bdd57b0d 100644
--- a/js/src/visitor/vectorassembler.ts
+++ b/js/src/visitor/vectorassembler.ts
@@ -27,8 +27,9 @@ import { BufferRegion, FieldNode } from '../ipc/metadata/message.js';
 import {
     DataType, Dictionary,
     Float, Int, Date_, Interval, Time, Timestamp, Union, Duration,
-    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, LargeUtf8,
 } from '../type.js';
+import { bigIntToNumber } from '../util/bigint.js';
 
 /** @ignore */
 export interface VectorAssembler extends Visitor {
@@ -204,9 +205,29 @@ function assembleFlatVector<T extends Int | Float | FixedSizeBinary | Date_ | Ti
 function assembleFlatListVector<T extends Utf8 | Binary>(this: VectorAssembler, data: Data<T>) {
     const { length, values, valueOffsets } = data;
     const { [0]: begin, [length]: end } = valueOffsets;
+    return _assembleFlatListVector.call(this, length, begin, end, values, valueOffsets);
+}
+
+/** @ignore */
+function assembleLargeFlatListVector<T extends Utf8 | Binary | LargeUtf8>(this: VectorAssembler, data: Data<T>) {
+    const { length, values, valueOffsets } = data;
+    const begin = bigIntToNumber(valueOffsets[0]);
+    const end = bigIntToNumber(valueOffsets[length]);
+    return _assembleFlatListVector.call(this, length, begin, end, values, valueOffsets);
+}
+
+/** @ignore */
+function _assembleFlatListVector<T extends Utf8 | Binary | LargeUtf8>(
+    this: VectorAssembler,
+    length: number,
+    begin: number,
+    end: number,
+    values: T['TArray'],
+    valueOffsets: T['TOffsetArray']
+) {
     const byteLength = Math.min(end - begin, values.byteLength - begin);
     // Push in the order FlatList types read their buffers
-    addBuffer.call(this, rebaseValueOffsets(-begin, length + 1, valueOffsets)); // valueOffsets buffer first
+    addBuffer.call(this, rebaseValueOffsets(-begin, length + 1, valueOffsets as any)); // valueOffsets buffer first
     addBuffer.call(this, values.subarray(begin, begin + byteLength)); // sliced values buffer second
     return this;
 }
@@ -234,6 +255,7 @@ VectorAssembler.prototype.visitBool = assembleBoolVector;
 VectorAssembler.prototype.visitInt = assembleFlatVector;
 VectorAssembler.prototype.visitFloat = assembleFlatVector;
 VectorAssembler.prototype.visitUtf8 = assembleFlatListVector;
+VectorAssembler.prototype.visitLargeUtf8 = assembleLargeFlatListVector;
 VectorAssembler.prototype.visitBinary = assembleFlatListVector;
 VectorAssembler.prototype.visitFixedSizeBinary = assembleFlatVector;
 VectorAssembler.prototype.visitDate = assembleFlatVector;
diff --git a/js/src/visitor/vectorloader.ts b/js/src/visitor/vectorloader.ts
index db34edad9a1c1..35f28f49baada 100644
--- a/js/src/visitor/vectorloader.ts
+++ b/js/src/visitor/vectorloader.ts
@@ -71,6 +71,9 @@ export class VectorLoader extends Visitor {
     public visitUtf8<T extends type.Utf8>(type: T, { length, nullCount } = this.nextFieldNode()) {
         return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), data: this.readData(type) });
     }
+    public visitLargeUtf8<T extends type.LargeUtf8>(type: T, { length, nullCount } = this.nextFieldNode()) {
+        return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), data: this.readData(type) });
+    }
     public visitBinary<T extends type.Binary>(type: T, { length, nullCount } = this.nextFieldNode()) {
         return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), data: this.readData(type) });
     }
@@ -151,7 +154,7 @@ export class JSONVectorLoader extends VectorLoader {
         return nullCount <= 0 ? new Uint8Array(0) : packBools(this.sources[offset]);
     }
     protected readOffsets<T extends DataType>(_type: T, { offset } = this.nextBufferRange()) {
-        return toArrayBufferView(Uint8Array, toArrayBufferView(Int32Array, this.sources[offset]));
+        return toArrayBufferView(Uint8Array, toArrayBufferView(_type.OffsetArrayType, this.sources[offset]));
     }
     protected readTypeIds<T extends DataType>(type: T, { offset } = this.nextBufferRange()) {
         return toArrayBufferView(Uint8Array, toArrayBufferView(type.ArrayType, this.sources[offset]));
@@ -170,7 +173,7 @@ export class JSONVectorLoader extends VectorLoader {
             return binaryDataFromJSON(sources[offset] as string[]);
         } else if (DataType.isBool(type)) {
             return packBools(sources[offset] as number[]);
-        } else if (DataType.isUtf8(type)) {
+        } else if (DataType.isUtf8(type) || DataType.isLargeUtf8(type)) {
             return encodeUtf8((sources[offset] as string[]).join(''));
         }
         return toArrayBufferView(Uint8Array, toArrayBufferView(type.ArrayType, sources[offset].map((x) => +x)));
diff --git a/js/test/data/tables.ts b/js/test/data/tables.ts
index 28aed7e4feccf..449cfe1fb853a 100644
--- a/js/test/data/tables.ts
+++ b/js/test/data/tables.ts
@@ -27,7 +27,7 @@ const nestedVectorGeneratorNames = ['struct', 'denseUnion', 'sparseUnion', 'map'
 const dictionaryKeyGeneratorNames = ['int8', 'int16', 'int32', 'uint8', 'uint16', 'uint32'];
 const valueVectorGeneratorNames = [
     'null_', 'bool', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64',
-    'float16', 'float32', 'float64', 'utf8', 'binary', 'fixedSizeBinary', 'dateDay', 'dateMillisecond',
+    'float16', 'float32', 'float64', 'utf8', 'largeUtf8', 'binary', 'fixedSizeBinary', 'dateDay', 'dateMillisecond',
     'timestampSecond', 'timestampMillisecond', 'timestampMicrosecond', 'timestampNanosecond',
     'timeSecond', 'timeMillisecond', 'timeMicrosecond', 'timeNanosecond', 'decimal',
     'dictionary', 'intervalDayTime', 'intervalYearMonth',
diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts
index 15fb715a31f95..9d7b038331fe6 100644
--- a/js/test/generate-test-data.ts
+++ b/js/test/generate-test-data.ts
@@ -24,7 +24,7 @@ import {
     Bool,
     Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
     Float, Float16, Float32, Float64,
-    Utf8,
+    Utf8, LargeUtf8,
     Binary,
     FixedSizeBinary,
     Date_, DateDay, DateMillisecond,
@@ -52,6 +52,7 @@ interface TestDataVectorGenerator extends Visitor {
     visit<T extends Int>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
     visit<T extends Float>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
     visit<T extends Utf8>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
+    visit<T extends LargeUtf8>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
     visit<T extends Binary>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
     visit<T extends FixedSizeBinary>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
     visit<T extends Date_>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
@@ -75,6 +76,7 @@ interface TestDataVectorGenerator extends Visitor {
     visitUint64: typeof generateBigInt;
     visitFloat: typeof generateFloat;
     visitUtf8: typeof generateUtf8;
+    visitLargeUtf8: typeof generateLargeUtf8;
     visitBinary: typeof generateBinary;
     visitFixedSizeBinary: typeof generateFixedSizeBinary;
     visitDate: typeof generateDate;
@@ -100,6 +102,7 @@ TestDataVectorGenerator.prototype.visitInt64 = generateBigInt;
 TestDataVectorGenerator.prototype.visitUint64 = generateBigInt;
 TestDataVectorGenerator.prototype.visitFloat = generateFloat;
 TestDataVectorGenerator.prototype.visitUtf8 = generateUtf8;
+TestDataVectorGenerator.prototype.visitLargeUtf8 = generateLargeUtf8;
 TestDataVectorGenerator.prototype.visitBinary = generateBinary;
 TestDataVectorGenerator.prototype.visitFixedSizeBinary = generateFixedSizeBinary;
 TestDataVectorGenerator.prototype.visitDate = generateDate;
@@ -214,6 +217,7 @@ export const float16 = (length = 100, nullCount = Math.trunc(length * 0.2)) => v
 export const float32 = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new Float32(), length, nullCount);
 export const float64 = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new Float64(), length, nullCount);
 export const utf8 = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new Utf8(), length, nullCount);
+export const largeUtf8 = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new LargeUtf8(), length, nullCount);
 export const binary = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new Binary(), length, nullCount);
 export const fixedSizeBinary = (length = 100, nullCount = Math.trunc(length * 0.2), byteWidth = 8) => vectorGenerator.visit(new FixedSizeBinary(byteWidth), length, nullCount);
 export const dateDay = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new DateDay(), length, nullCount);
@@ -242,7 +246,7 @@ export const fixedSizeList = (length = 100, nullCount = Math.trunc(length * 0.2)
 export const map = <TKey extends DataType = any, TValue extends DataType = any>(length = 100, nullCount = Math.trunc(length * 0.2), child: Field<Struct<{ key: TKey; value: TValue }>> = <any>defaultMapChild()) => vectorGenerator.visit(new Map_<TKey, TValue>(child), length, nullCount);
 
 export const vecs = {
-    null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64, utf8, binary, fixedSizeBinary, dateDay, dateMillisecond, timestampSecond, timestampMillisecond, timestampMicrosecond, timestampNanosecond, timeSecond, timeMillisecond, timeMicrosecond, timeNanosecond, decimal, list, struct, denseUnion, sparseUnion, dictionary, intervalDayTime, intervalYearMonth, fixedSizeList, map, durationSecond, durationMillisecond, durationMicrosecond, durationNanosecond
+    null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64, utf8, largeUtf8, binary, fixedSizeBinary, dateDay, dateMillisecond, timestampSecond, timestampMillisecond, timestampMicrosecond, timestampNanosecond, timeSecond, timeMillisecond, timeMicrosecond, timeNanosecond, decimal, list, struct, denseUnion, sparseUnion, dictionary, intervalDayTime, intervalYearMonth, fixedSizeList, map, durationSecond, durationMillisecond, durationMicrosecond, durationNanosecond
 } as { [k: string]: (...args: any[]) => any };
 
 function generateNull<T extends Null>(this: TestDataVectorGenerator, type: T, length = 100): GeneratedVector<T> {
@@ -312,7 +316,7 @@ function generateFloat<T extends Float>(this: TestDataVectorGenerator, type: T,
 
 function generateUtf8<T extends Utf8>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector<T> {
     const nullBitmap = createBitmap(length, nullCount);
-    const valueOffsets = createVariableWidthOffsets(length, nullBitmap, 10, 20, nullCount != 0);
+    const valueOffsets = createVariableWidthOffsets32(length, nullBitmap, 10, 20, nullCount != 0);
     const values: string[] = new Array(valueOffsets.length - 1).fill(null);
     [...valueOffsets.slice(1)]
         .map((o, i) => isValid(nullBitmap, i) ? o - valueOffsets[i] : null)
@@ -332,9 +336,31 @@ function generateUtf8<T extends Utf8>(this: TestDataVectorGenerator, type: T, le
     return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, valueOffsets, data })]) };
 }
 
+function generateLargeUtf8<T extends LargeUtf8>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector<T> {
+    const nullBitmap = createBitmap(length, nullCount);
+    const valueOffsets = createVariableWidthOffsets64(length, nullBitmap, 10, 20, nullCount != 0);
+    const values: string[] = new Array(valueOffsets.length - 1).fill(null);
+    [...valueOffsets.slice(1)]
+        .map((o, i) => isValid(nullBitmap, i) ? o - valueOffsets[i] : null)
+        .reduce((map, length, i) => {
+            if (length !== null) {
+                if (length > 0) {
+                    do {
+                        values[i] = randomString(Number(length));
+                    } while (map.has(values[i]));
+                    return map.set(values[i], i);
+                }
+                values[i] = '';
+            }
+            return map;
+        }, new Map<string, number>());
+    const data = createVariableWidthBytes(length, nullBitmap, valueOffsets, (i) => encodeUtf8(values[i]));
+    return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, valueOffsets, data })]) };
+}
+
 function generateBinary<T extends Binary>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector<T> {
     const nullBitmap = createBitmap(length, nullCount);
-    const valueOffsets = createVariableWidthOffsets(length, nullBitmap, 10, 20, nullCount != 0);
+    const valueOffsets = createVariableWidthOffsets32(length, nullBitmap, 10, 20, nullCount != 0);
     const values = [...valueOffsets.slice(1)]
         .map((o, i) => isValid(nullBitmap, i) ? o - valueOffsets[i] : null)
         .map((length) => length == null ? null : randomBytes(length));
@@ -443,7 +469,7 @@ function generateList<T extends List>(this: TestDataVectorGenerator, type: T, le
     const childVec = child.vector;
     const nullBitmap = createBitmap(length, nullCount);
     const stride = childVec.length / (length - nullCount);
-    const valueOffsets = createVariableWidthOffsets(length, nullBitmap, stride, stride);
+    const valueOffsets = createVariableWidthOffsets32(length, nullBitmap, stride, stride);
     const values = memoize(() => {
         const childValues = child.values();
         const values: (T['valueType'] | null)[] = [...valueOffsets.slice(1)]
@@ -581,7 +607,7 @@ function generateMap<T extends Map_>(this: TestDataVectorGenerator,
     const childVec = child.vector;
     const nullBitmap = createBitmap(length, nullCount);
     const stride = childVec.length / (length - nullCount);
-    const valueOffsets = createVariableWidthOffsets(length, nullBitmap, stride, stride);
+    const valueOffsets = createVariableWidthOffsets32(length, nullBitmap, stride, stride);
     const values = memoize(() => {
         const childValues: { key: K; value: V }[] = <any>child.values();
         const values: (Record<K, V> | null)[] = [...valueOffsets.slice(1)]
@@ -660,7 +686,7 @@ function createBitmap(length: number, nullCount: number) {
     return bytes;
 }
 
-function createVariableWidthOffsets(length: number, nullBitmap: Uint8Array, min = 10, max = Number.POSITIVE_INFINITY, allowEmpty = true) {
+function createVariableWidthOffsets32(length: number, nullBitmap: Uint8Array, min = 10, max = Number.POSITIVE_INFINITY, allowEmpty = true) {
     const offsets = new Int32Array(length + 1);
     iterateBitmap(length, nullBitmap, (i, valid) => {
         if (!valid) {
@@ -674,10 +700,24 @@ function createVariableWidthOffsets(length: number, nullBitmap: Uint8Array, min
     return offsets;
 }
 
-function createVariableWidthBytes(length: number, nullBitmap: Uint8Array, offsets: Int32Array, getBytes: (index: number) => Uint8Array) {
-    const bytes = new Uint8Array(offsets[length]);
+function createVariableWidthOffsets64(length: number, nullBitmap: Uint8Array, min = 10, max = Number.POSITIVE_INFINITY, allowEmpty = true) {
+    const offsets = new BigInt64Array(length + 1);
+    iterateBitmap(length, nullBitmap, (i, valid) => {
+        if (!valid) {
+            offsets[i + 1] = offsets[i];
+        } else {
+            do {
+                offsets[i + 1] = offsets[i] + BigInt(Math.min(max, Math.max(min, Math.trunc(rand() * max))));
+            } while (!allowEmpty && offsets[i + 1] === offsets[i]);
+        }
+    });
+    return offsets;
+}
+
+function createVariableWidthBytes(length: number, nullBitmap: Uint8Array, offsets: Int32Array | BigInt64Array, getBytes: (index: number) => Uint8Array) {
+    const bytes = new Uint8Array(Number(offsets[length]));
     iterateBitmap(length, nullBitmap, (i, valid) => {
-        valid && bytes.set(getBytes(i), offsets[i]);
+        valid && bytes.set(getBytes(i), Number(offsets[i]));
     });
     return bytes;
 }
diff --git a/js/test/unit/builders/builder-tests.ts b/js/test/unit/builders/builder-tests.ts
index b261e4f815e3a..0137c7aa66635 100644
--- a/js/test/unit/builders/builder-tests.ts
+++ b/js/test/unit/builders/builder-tests.ts
@@ -44,6 +44,7 @@ describe('Generated Test Data', () => {
     describe('Float32Builder', () => { validateBuilder(generate.float32); });
     describe('Float64Builder', () => { validateBuilder(generate.float64); });
     describe('Utf8Builder', () => { validateBuilder(generate.utf8); });
+    describe('LargeUtf8Builder', () => { validateBuilder(generate.largeUtf8); });
     describe('BinaryBuilder', () => { validateBuilder(generate.binary); });
     describe('FixedSizeBinaryBuilder', () => { validateBuilder(generate.fixedSizeBinary); });
     describe('DateDayBuilder', () => { validateBuilder(generate.dateDay); });
diff --git a/js/test/unit/builders/largeUtf8-tests.ts b/js/test/unit/builders/largeUtf8-tests.ts
new file mode 100644
index 0000000000000..c789d5dbb1671
--- /dev/null
+++ b/js/test/unit/builders/largeUtf8-tests.ts
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import 'web-streams-polyfill';
+
+import { validateVector } from './utils.js';
+import {
+    encodeAll,
+    encodeEach,
+    encodeEachDOM,
+    encodeEachNode,
+    stringsNoNulls,
+    stringsWithNAs,
+    stringsWithNulls,
+    stringsWithEmpties
+} from './utils.js';
+
+import { Vector, LargeUtf8 } from 'apache-arrow';
+
+const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true';
+const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true';
+
+describe('LargeUtf8Builder', () => {
+    runTestsWithEncoder('encodeAll', encodeAll(() => new LargeUtf8()));
+    runTestsWithEncoder('encodeEach: 5', encodeEach(() => new LargeUtf8(), 5));
+    runTestsWithEncoder('encodeEach: 25', encodeEach(() => new LargeUtf8(), 25));
+    runTestsWithEncoder('encodeEach: undefined', encodeEach(() => new LargeUtf8(), void 0));
+    testDOMStreams && runTestsWithEncoder('encodeEachDOM: 25', encodeEachDOM(() => new LargeUtf8(), 25));
+    testNodeStreams && runTestsWithEncoder('encodeEachNode: 25', encodeEachNode(() => new LargeUtf8(), 25));
+});
+
+function runTestsWithEncoder(name: string, encode: (vals: (string | null)[], nullVals?: any[]) => Promise<Vector<LargeUtf8>>) {
+    describe(`${encode.name} ${name}`, () => {
+        it(`encodes strings no nulls`, async () => {
+            const vals = stringsNoNulls(20);
+            validateVector(vals, await encode(vals, []), []);
+        });
+        it(`encodes strings with nulls`, async () => {
+            const vals = stringsWithNulls(20);
+            validateVector(vals, await encode(vals, [null]), [null]);
+        });
+        it(`encodes strings using n/a as the null value rep`, async () => {
+            const vals = stringsWithNAs(20);
+            validateVector(vals, await encode(vals, ['n/a']), ['n/a']);
+        });
+        it(`encodes strings using \\0 as the null value rep`, async () => {
+            const vals = stringsWithEmpties(20);
+            validateVector(vals, await encode(vals, ['\0']), ['\0']);
+        });
+    });
+}
diff --git a/js/test/unit/generated-data-tests.ts b/js/test/unit/generated-data-tests.ts
index d64c7c188d3ed..0a06bcbab8ee0 100644
--- a/js/test/unit/generated-data-tests.ts
+++ b/js/test/unit/generated-data-tests.ts
@@ -38,6 +38,7 @@ describe('Generated Test Data', () => {
     describe('Float32', () => { validateVector(generate.float32()); });
     describe('Float64', () => { validateVector(generate.float64()); });
     describe('Utf8', () => { validateVector(generate.utf8()); });
+    describe('LargeUtf8', () => { validateVector(generate.largeUtf8()); });
     describe('Binary', () => { validateVector(generate.binary()); });
     describe('FixedSizeBinary', () => { validateVector(generate.fixedSizeBinary()); });
     describe('DateDay', () => { validateVector(generate.dateDay()); });
diff --git a/js/test/unit/generated-data-validators.ts b/js/test/unit/generated-data-validators.ts
index 52f642d2a6e89..57ee94876c300 100644
--- a/js/test/unit/generated-data-validators.ts
+++ b/js/test/unit/generated-data-validators.ts
@@ -113,7 +113,9 @@ function vectorTests(values: any[], vector: Vector<any>, keys?: number[]) {
                 expected = values[i];
                 expect(actual).toArrowCompare(expected);
             }
-        } catch (e: any) { throw new Error(`${vector}[${i}]:\n\t${e && e.stack || e}`); }
+        } catch (e: any) {
+            throw new Error(`${vector}[${i}]:\n\t${e && e.stack || e}`);
+        }
     });
     if (keys && keys.length > 0) {
         test(`dictionary indices should match`, () => {
@@ -126,7 +128,9 @@ function vectorTests(values: any[], vector: Vector<any>, keys?: number[]) {
                         ? expect(indices.get(i)).toBe(keys[i])
                         : expect(indices.get(i)).toBeNull();
                 }
-            } catch (e) { throw new Error(`${indices}[${i}]: ${e}`); }
+            } catch (e) {
+                throw new Error(`${indices}[${i}]: ${e}`);
+            }
         });
     }
     test(`sets expected values`, () => {
@@ -139,7 +143,9 @@ function vectorTests(values: any[], vector: Vector<any>, keys?: number[]) {
                 actual = vector.get(i);
                 expect(actual).toArrowCompare(expected);
             }
-        } catch (e: any) { throw new Error(`${vector}[${i}]:\n\t${e && e.stack || e}`); }
+        } catch (e: any) {
+            throw new Error(`${vector}[${i}]:\n\t${e && e.stack || e}`);
+        }
     });
     test(`iterates expected values`, () => {
         expect.hasAssertions();
@@ -149,7 +155,9 @@ function vectorTests(values: any[], vector: Vector<any>, keys?: number[]) {
                 expected = values[++i];
                 expect(actual).toArrowCompare(expected);
             }
-        } catch (e: any) { throw new Error(`${vector}[${i}]:\n\t${e && e.stack || e}`); }
+        } catch (e: any) {
+            throw new Error(`${vector}[${i}]:\n\t${e && e.stack || e}`);
+        }
     });
     test(`indexOf returns expected values`, () => {
         expect.hasAssertions();
@@ -169,7 +177,9 @@ function vectorTests(values: any[], vector: Vector<any>, keys?: number[]) {
             expect(vector.indexOf('purple elephants')).toBe(-1);
             expect(vector.indexOf('whistling wombats')).toBe(-1);
             expect(vector.indexOf('carnivorous novices')).toBe(-1);
-        } catch (e: any) { throw new Error(`${vector}[${i}]:\n\t${e && e.stack || e}`); }
+        } catch (e: any) {
+            throw new Error(`${vector}[${i}]:\n\t${e && e.stack || e}`);
+        }
     });
 }
 
diff --git a/js/test/unit/vector/vector-tests.ts b/js/test/unit/vector/vector-tests.ts
index a259cbef87772..bfcf0d8547861 100644
--- a/js/test/unit/vector/vector-tests.ts
+++ b/js/test/unit/vector/vector-tests.ts
@@ -16,7 +16,7 @@
 // under the License.
 
 import {
-    Bool, DateDay, DateMillisecond, Dictionary, Float64, Int32, List, makeVector, Struct, Timestamp, TimeUnit, Utf8, util, Vector, vectorFromArray
+    Bool, DateDay, DateMillisecond, Dictionary, Float64, Int32, List, makeVector, Struct, Timestamp, TimeUnit, Utf8, LargeUtf8, util, Vector, vectorFromArray
 } from 'apache-arrow';
 
 describe(`makeVectorFromArray`, () => {
@@ -196,6 +196,28 @@ describe(`Utf8Vector`, () => {
     });
 });
 
+describe(`LargeUtf8Vector`, () => {
+    const values = ['foo', 'bar', 'baz', 'foo bar', 'bar'];
+    const vector = vectorFromArray(values, new LargeUtf8);
+
+    test(`has largeUtf8 type`, () => {
+        expect(vector.type).toBeInstanceOf(LargeUtf8);
+    });
+
+    test(`is not memoized`, () => {
+        expect(vector.isMemoized).toBe(false);
+        const memoizedVector = vector.memoize();
+        expect(memoizedVector.isMemoized).toBe(true);
+        const unMemoizedVector = vector.unmemoize();
+        expect(unMemoizedVector.isMemoized).toBe(false);
+    });
+
+    basicVectorTests(vector, values, ['abc', '123']);
+    describe(`sliced`, () => {
+        basicVectorTests(vector.slice(1, 3), values.slice(1, 3), ['foo', 'abc']);
+    });
+});
+
 describe(`ListVector`, () => {
     const values = [[1, 2], [1, 2, 3]];
     const vector = vectorFromArray(values);
diff --git a/js/test/unit/visitor-tests.ts b/js/test/unit/visitor-tests.ts
index 8a7ba1ed778aa..f78adc59f8e98 100644
--- a/js/test/unit/visitor-tests.ts
+++ b/js/test/unit/visitor-tests.ts
@@ -18,7 +18,7 @@
 import {
     Field, Visitor,
     DataType, Dictionary,
-    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
     Float, Float16, Float32, Float64,
     Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
     Date_, DateDay, DateMillisecond,
@@ -36,6 +36,7 @@ class BasicVisitor extends Visitor {
     public visitInt<T extends Int>(type: T) { return (this.type = type); }
     public visitFloat<T extends Float>(type: T) { return (this.type = type); }
     public visitUtf8<T extends Utf8>(type: T) { return (this.type = type); }
+    public visitLargeUtf8<T extends LargeUtf8>(type: T) { return (this.type = type); }
     public visitBinary<T extends Binary>(type: T) { return (this.type = type); }
     public visitFixedSizeBinary<T extends FixedSizeBinary>(type: T) { return (this.type = type); }
     public visitDate<T extends Date_>(type: T) { return (this.type = type); }
@@ -68,6 +69,7 @@ class FeatureVisitor extends Visitor {
     public visitFloat32<T extends Float32>(type: T) { return (this.type = type); }
     public visitFloat64<T extends Float64>(type: T) { return (this.type = type); }
     public visitUtf8<T extends Utf8>(type: T) { return (this.type = type); }
+    public visitLargeUtf8<T extends LargeUtf8>(type: T) { return (this.type = type); }
     public visitBinary<T extends Binary>(type: T) { return (this.type = type); }
     public visitFixedSizeBinary<T extends FixedSizeBinary>(type: T) { return (this.type = type); }
     public visitDateDay<T extends DateDay>(type: T) { return (this.type = type); }
@@ -104,6 +106,7 @@ describe('Visitor', () => {
         test(`visits Int types`, () => validateBasicVisitor(new Int(true, 32)));
         test(`visits Float types`, () => validateBasicVisitor(new Float(0)));
         test(`visits Utf8 types`, () => validateBasicVisitor(new Utf8()));
+        test(`visits LargeUtf8 types`, () => validateBasicVisitor(new LargeUtf8()));
         test(`visits Binary types`, () => validateBasicVisitor(new Binary()));
         test(`visits FixedSizeBinary types`, () => validateBasicVisitor(new FixedSizeBinary(128)));
         test(`visits Date types`, () => validateBasicVisitor(new Date_(0)));
@@ -144,6 +147,7 @@ describe('Visitor', () => {
         test(`visits Float32 types`, () => validateFeatureVisitor(new Float32()));
         test(`visits Float64 types`, () => validateFeatureVisitor(new Float64()));
         test(`visits Utf8 types`, () => validateFeatureVisitor(new Utf8()));
+        test(`visits LargeUtf8 types`, () => validateFeatureVisitor(new LargeUtf8()));
         test(`visits Binary types`, () => validateFeatureVisitor(new Binary()));
         test(`visits FixedSizeBinary types`, () => validateFeatureVisitor(new FixedSizeBinary(128)));
         test(`visits DateDay types`, () => validateFeatureVisitor(new DateDay()));

From a91a11def5d6dc63463cd5ce0a7027f0174b5ac9 Mon Sep 17 00:00:00 2001
From: Carl Jackson <carl@avtok.com>
Date: Sat, 16 Dec 2023 11:35:00 -0800
Subject: [PATCH 052/570] GH-37983: [JS] create nullable Fields in Table
 constructor (#37982)

Previously, Tables constructed from vectors with null values would infer
Schemas that did not permit null values. This resulted in downstream
code making bad assumptions about the data. After this change, we always
assume data can be nullable, and construct a Schema with nullable
Fields.

### Are these changes tested?

I informally tested these changes for my use case, but have not tested
them more extensively

### Are there any user-facing changes?

Yes: the `Table` constructor will now produce schemas with nullable
`Fields` in situations in which it previously did not
* Closes: #37983
---
 js/src/table.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/js/src/table.ts b/js/src/table.ts
index b3633aa9c3015..ef7d09a1d8f44 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -112,7 +112,7 @@ export class Table<T extends TypeMap = any> {
                 } else if (typeof x === 'object') {
                     const keys = Object.keys(x) as (keyof T)[];
                     const vecs = keys.map((k) => new Vector([x[k]]));
-                    const schema = new Schema(keys.map((k, i) => new Field(String(k), vecs[i].type)));
+                    const schema = new Schema(keys.map((k, i) => new Field(String(k), vecs[i].type, true)));
                     const [, batches] = distributeVectorsIntoRecordBatches(schema, vecs);
                     return batches.length === 0 ? [new RecordBatch(x)] : batches;
                 }

From d9183643c86eccc7a620017e00333fb9d555fae0 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sat, 16 Dec 2023 14:50:59 -0500
Subject: [PATCH 053/570] Revert "GH-37983: [JS] create nullable Fields in
 Table constructor (#37982)" (#39253)

I missed that the tests were failing.
---
 js/src/table.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/js/src/table.ts b/js/src/table.ts
index ef7d09a1d8f44..b3633aa9c3015 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -112,7 +112,7 @@ export class Table<T extends TypeMap = any> {
                 } else if (typeof x === 'object') {
                     const keys = Object.keys(x) as (keyof T)[];
                     const vecs = keys.map((k) => new Vector([x[k]]));
-                    const schema = new Schema(keys.map((k, i) => new Field(String(k), vecs[i].type, true)));
+                    const schema = new Schema(keys.map((k, i) => new Field(String(k), vecs[i].type)));
                     const [, batches] = distributeVectorsIntoRecordBatches(schema, vecs);
                     return batches.length === 0 ? [new RecordBatch(x)] : batches;
                 }

From e43f575f4d21d66c1585d2b1be9a89963f5129b9 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sun, 17 Dec 2023 15:08:38 -0500
Subject: [PATCH 054/570] GH-37983: [JS] Allow nullable fields in table when
 constructed from vector with nulls (#39254)

---
 js/src/table.ts             |  2 +-
 js/test/unit/table-tests.ts | 31 ++++++++++++++++++++++---------
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/js/src/table.ts b/js/src/table.ts
index b3633aa9c3015..58518257b30cb 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -112,7 +112,7 @@ export class Table<T extends TypeMap = any> {
                 } else if (typeof x === 'object') {
                     const keys = Object.keys(x) as (keyof T)[];
                     const vecs = keys.map((k) => new Vector([x[k]]));
-                    const schema = new Schema(keys.map((k, i) => new Field(String(k), vecs[i].type)));
+                    const schema = new Schema(keys.map((k, i) => new Field(String(k), vecs[i].type, vecs[i].nullCount > 0)));
                     const [, batches] = distributeVectorsIntoRecordBatches(schema, vecs);
                     return batches.length === 0 ? [new RecordBatch(x)] : batches;
                 }
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index 50c8565f0f144..6b34124abcaba 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -24,7 +24,7 @@ import {
     Schema, Field, Table, RecordBatch,
     Vector, builderThroughIterable,
     Float32, Int32, Dictionary, Utf8, Int8,
-    tableFromIPC, tableToIPC
+    tableFromIPC, tableToIPC, vectorFromArray
 } from 'apache-arrow';
 
 const deepCopy = (t: Table) => tableFromIPC(tableToIPC(t));
@@ -104,7 +104,7 @@ describe(`Table`, () => {
     });
 
     describe(`constructor`, () => {
-        test(`creates an empty Table with Columns`, () => {
+        test(`creates an empty Table with Vectors`, () => {
             let i32 = new Vector([makeData({ type: new Int32 })]);
             let f32 = new Vector([makeData({ type: new Float32 })]);
             const table = new Table({ i32, f32 });
@@ -117,8 +117,24 @@ describe(`Table`, () => {
             expect(f32.toArray()).toBeInstanceOf(Float32Array);
         });
 
-        test(`creates a new Table from a Column`, () => {
+        test(`creates a Table with Vectors with Nulls`, () => {
+            const i32s = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, null];
+            const i32 = vectorFromArray(i32s);
+            expect(i32).toHaveLength(i32s.length);
+            expect(i32.nullCount).toBe(1);
+
+            const table = new Table({ i32 });
+            const i32Field = table.schema.fields[0];
+
+            expect(i32Field.name).toBe('i32');
+            expect(i32).toHaveLength(i32s.length);
+            expect(i32Field.nullable).toBe(true);
+            expect(i32.nullCount).toBe(1);
+
+            expect(i32).toEqualVector(vectorFromArray(i32s));
+        });
 
+        test(`creates a new Table from a Typed Array`, () => {
             const i32s = new Int32Array(arange(new Array<number>(10)));
             const i32 = makeVector([i32s]);
             expect(i32).toHaveLength(i32s.length);
@@ -135,8 +151,7 @@ describe(`Table`, () => {
             expect(i32).toEqualVector(makeVector(i32s));
         });
 
-        test(`creates a new Table from Columns`, () => {
-
+        test(`creates a new Table from Typed Arrays`, () => {
             const i32s = new Int32Array(arange(new Array<number>(10)));
             const f32s = new Float32Array(arange(new Array<number>(10)));
 
@@ -164,8 +179,7 @@ describe(`Table`, () => {
             expect(f32).toEqualVector(makeVector(f32s));
         });
 
-        test(`creates a new Table from Columns with different lengths`, () => {
-
+        test(`creates a new Table from Typed Arrays with different lengths`, () => {
             const i32s = new Int32Array(arange(new Array<number>(20)));
             const f32s = new Float32Array(arange(new Array<number>(8)));
 
@@ -209,8 +223,7 @@ describe(`Table`, () => {
             expect(f32Vector).toEqualVector(new Vector([f32Expected]));
         });
 
-        test(`creates a new Table from Columns with different lengths and number of inner chunks`, () => {
-
+        test(`creates a new Table from Typed Arrays with different lengths and number of inner chunks`, () => {
             const i32s = new Int32Array(arange(new Array<number>(20)));
             const f32s = new Float32Array(arange(new Array<number>(16)));
 

From 50ace0520748349dccc84f89e04bef0c289a4dba Mon Sep 17 00:00:00 2001
From: Stas Stepanov <78556261+stfdxv@users.noreply.github.com>
Date: Mon, 18 Dec 2023 06:02:07 +0300
Subject: [PATCH 055/570] GH-38883: [Docs] Fix struct example to show hiding a
 child's entry (#38898)

### Rationale for this change
See the issue.

### What changes are included in this PR?
The struct example now demonstrates what it's supposed to.

### Are these changes tested?
Renders well

### Are there any user-facing changes?
Just docs.

* Closes: #38883

Authored-by: Stas Stepanov <78556261+stfdxv@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 docs/source/format/Columnar.rst | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst
index a6632fa2cf81b..56cb27626a1f9 100644
--- a/docs/source/format/Columnar.rst
+++ b/docs/source/format/Columnar.rst
@@ -664,7 +664,9 @@ type.
 
 **Example Layout: ``Struct<VarBinary, Int32>``**
 
-The layout for ``[{'joe', 1}, {null, 2}, null, {'mark', 4}]`` would be: ::
+The layout for ``[{'joe', 1}, {null, 2}, null, {'mark', 4}]``, having
+child arrays ``['joe', null, 'alice', 'mark']`` and ``[1, 2, null, 4]``
+would be: ::
 
     * Length: 4, Null count: 1
     * Validity bitmap buffer:
@@ -675,24 +677,24 @@ The layout for ``[{'joe', 1}, {null, 2}, null, {'mark', 4}]`` would be: ::
 
     * Children arrays:
       * field-0 array (`VarBinary`):
-        * Length: 4, Null count: 2
+        * Length: 4, Null count: 1
         * Validity bitmap buffer:
 
           | Byte 0 (validity bitmap) | Bytes 1-63            |
           |--------------------------|-----------------------|
-          | 00001001                 | 0 (padding)           |
+          | 00001101                 | 0 (padding)           |
 
         * Offsets buffer:
 
           | Bytes 0-19     | Bytes 20-63           |
           |----------------|-----------------------|
-          | 0, 3, 3, 3, 7  | unspecified (padding) |
+          | 0, 3, 3, 8, 12 | unspecified (padding) |
 
          * Value buffer:
 
-          | Bytes 0-6      | Bytes 7-63            |
+          | Bytes 0-11     | Bytes 12-63           |
           |----------------|-----------------------|
-          | joemark        | unspecified (padding) |
+          | joealicemark   | unspecified (padding) |
 
       * field-1 array (int32 array):
         * Length: 4, Null count: 1
@@ -722,10 +724,10 @@ Therefore, to know whether a particular child entry is valid, one must
 take the logical AND of the corresponding bits in the two validity bitmaps
 (the struct array's and the child array's).
 
-This is illustrated in the example above, the child arrays have valid entries
-for the null struct but they are "hidden" by the struct array's validity
-bitmap. However, when treated independently, corresponding entries of the
-children array will be non-null.
+This is illustrated in the example above, one of the child arrays has a
+valid entry ``'alice'`` for the null struct but it is "hidden" by the
+struct array's validity bitmap. However, when treated independently,
+corresponding entries of the children array will be non-null.
 
 Union Layout
 ------------

From 9c097d504a6acc193a5ce0a4cbf3551c948dcf90 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@gmail.com>
Date: Sun, 17 Dec 2023 22:19:54 -0500
Subject: [PATCH 056/570] GH-39248: [JS] Unify code paths for utf8 and
 largeUtf8 (#39249)

Reduce the code size by using common code paths. We only call `Number` a
few times on numbers, which should be a noop.

* Closes: #39248
---
 js/.vscode/settings.json          |  2 +-
 js/src/visitor/get.ts             | 20 +++-----------------
 js/src/visitor/set.ts             | 19 +++----------------
 js/src/visitor/vectorassembler.ts | 24 +++---------------------
 4 files changed, 10 insertions(+), 55 deletions(-)

diff --git a/js/.vscode/settings.json b/js/.vscode/settings.json
index 113a662180c3c..e52da54e544ec 100644
--- a/js/.vscode/settings.json
+++ b/js/.vscode/settings.json
@@ -2,7 +2,7 @@
   "typescript.tsdk": "node_modules/typescript/lib",
   "editor.trimAutoWhitespace": true,
   "editor.codeActionsOnSave": {
-    "source.fixAll.eslint": false
+    "source.fixAll.eslint": "explicit"
   },
   "[javascript]": {
     "editor.tabSize": 4,
diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts
index a801c90047c89..112d2f2983e53 100644
--- a/js/src/visitor/get.ts
+++ b/js/src/visitor/get.ts
@@ -116,16 +116,7 @@ function wrapGet<T extends DataType>(fn: (data: Data<T>, _1: any) => any) {
 /** @ignore */
 const getNull = <T extends Null>(_data: Data<T>, _index: number): T['TValue'] => null;
 /** @ignore */
-const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array, index: number) => {
-    if (index + 1 >= valueOffsets.length) {
-        return null as any;
-    }
-    const x = valueOffsets[index];
-    const y = valueOffsets[index + 1];
-    return values.subarray(x, y);
-};
-/** @ignore */
-const getLargeVariableWidthBytes = (values: Uint8Array, valueOffsets: BigInt64Array, index: number) => {
+const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array | BigInt64Array, index: number) => {
     if (index + 1 >= valueOffsets.length) {
         return null as any;
     }
@@ -162,15 +153,10 @@ const getFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: Data<
 /** @ignore */
 const getBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index);
 /** @ignore */
-const getUtf8 = <T extends Utf8>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => {
+const getUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => {
     const bytes = getVariableWidthBytes(values, valueOffsets, index);
     return bytes !== null ? decodeUtf8(bytes) : null as any;
 };
-/** @ignore */
-const getLargeUtf8 = <T extends LargeUtf8>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => {
-    const bytes = getLargeVariableWidthBytes(values, valueOffsets, index);
-    return bytes !== null ? decodeUtf8(bytes) : null as any;
-};
 
 /* istanbul ignore next */
 /** @ignore */
@@ -344,7 +330,7 @@ GetVisitor.prototype.visitFloat16 = wrapGet(getFloat16);
 GetVisitor.prototype.visitFloat32 = wrapGet(getNumeric);
 GetVisitor.prototype.visitFloat64 = wrapGet(getNumeric);
 GetVisitor.prototype.visitUtf8 = wrapGet(getUtf8);
-GetVisitor.prototype.visitLargeUtf8 = wrapGet(getLargeUtf8);
+GetVisitor.prototype.visitLargeUtf8 = wrapGet(getUtf8);
 GetVisitor.prototype.visitBinary = wrapGet(getBinary);
 GetVisitor.prototype.visitFixedSizeBinary = wrapGet(getFixedSizeBinary);
 GetVisitor.prototype.visitDate = wrapGet(getDate);
diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts
index a439ec8311fd6..15b0721660f55 100644
--- a/js/src/visitor/set.ts
+++ b/js/src/visitor/set.ts
@@ -125,16 +125,7 @@ export const setEpochMsToNanosecondsLong = (data: Int32Array, index: number, epo
 };
 
 /** @ignore */
-export const setVariableWidthBytes = <T extends Int32Array>(values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
-    if (index + 1 < valueOffsets.length) {
-        const x = valueOffsets[index];
-        const y = valueOffsets[index + 1];
-        values.set(value.subarray(0, y - x), x);
-    }
-};
-
-/** @ignore */
-export const setLargeVariableWidthBytes = <T extends BigInt64Array>(values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
+export const setVariableWidthBytes = <T extends Int32Array | BigInt64Array>(values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
     if (index + 1 < valueOffsets.length) {
         const x = bigIntToNumber(valueOffsets[index]);
         const y = bigIntToNumber(valueOffsets[index + 1]);
@@ -176,13 +167,9 @@ export const setFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }
 /** @ignore */
 const setBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets, index, value);
 /** @ignore */
-const setUtf8 = <T extends Utf8>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => {
+const setUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => {
     setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
 };
-/** @ignore */
-const setLargeUtf8 = <T extends LargeUtf8>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => {
-    setLargeVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
-};
 
 /* istanbul ignore next */
 export const setDate = <T extends Date_>(data: Data<T>, index: number, value: T['TValue']): void => {
@@ -381,7 +368,7 @@ SetVisitor.prototype.visitFloat16 = wrapSet(setFloat16);
 SetVisitor.prototype.visitFloat32 = wrapSet(setFloat);
 SetVisitor.prototype.visitFloat64 = wrapSet(setFloat);
 SetVisitor.prototype.visitUtf8 = wrapSet(setUtf8);
-SetVisitor.prototype.visitLargeUtf8 = wrapSet(setLargeUtf8);
+SetVisitor.prototype.visitLargeUtf8 = wrapSet(setUtf8);
 SetVisitor.prototype.visitBinary = wrapSet(setBinary);
 SetVisitor.prototype.visitFixedSizeBinary = wrapSet(setFixedSizeBinary);
 SetVisitor.prototype.visitDate = wrapSet(setDate);
diff --git a/js/src/visitor/vectorassembler.ts b/js/src/visitor/vectorassembler.ts
index 7a9d3bdd57b0d..df820e6f5e00c 100644
--- a/js/src/visitor/vectorassembler.ts
+++ b/js/src/visitor/vectorassembler.ts
@@ -42,6 +42,7 @@ export interface VectorAssembler extends Visitor {
     visitInt<T extends Int>(data: Data<T>): this;
     visitFloat<T extends Float>(data: Data<T>): this;
     visitUtf8<T extends Utf8>(data: Data<T>): this;
+    visitLargeUtf8<T extends LargeUtf8>(data: Data<T>): this;
     visitBinary<T extends Binary>(data: Data<T>): this;
     visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>): this;
     visitDate<T extends Date_>(data: Data<T>): this;
@@ -202,29 +203,10 @@ function assembleFlatVector<T extends Int | Float | FixedSizeBinary | Date_ | Ti
 }
 
 /** @ignore */
-function assembleFlatListVector<T extends Utf8 | Binary>(this: VectorAssembler, data: Data<T>) {
-    const { length, values, valueOffsets } = data;
-    const { [0]: begin, [length]: end } = valueOffsets;
-    return _assembleFlatListVector.call(this, length, begin, end, values, valueOffsets);
-}
-
-/** @ignore */
-function assembleLargeFlatListVector<T extends Utf8 | Binary | LargeUtf8>(this: VectorAssembler, data: Data<T>) {
+function assembleFlatListVector<T extends Utf8 | Binary | LargeUtf8>(this: VectorAssembler, data: Data<T>) {
     const { length, values, valueOffsets } = data;
     const begin = bigIntToNumber(valueOffsets[0]);
     const end = bigIntToNumber(valueOffsets[length]);
-    return _assembleFlatListVector.call(this, length, begin, end, values, valueOffsets);
-}
-
-/** @ignore */
-function _assembleFlatListVector<T extends Utf8 | Binary | LargeUtf8>(
-    this: VectorAssembler,
-    length: number,
-    begin: number,
-    end: number,
-    values: T['TArray'],
-    valueOffsets: T['TOffsetArray']
-) {
     const byteLength = Math.min(end - begin, values.byteLength - begin);
     // Push in the order FlatList types read their buffers
     addBuffer.call(this, rebaseValueOffsets(-begin, length + 1, valueOffsets as any)); // valueOffsets buffer first
@@ -255,7 +237,7 @@ VectorAssembler.prototype.visitBool = assembleBoolVector;
 VectorAssembler.prototype.visitInt = assembleFlatVector;
 VectorAssembler.prototype.visitFloat = assembleFlatVector;
 VectorAssembler.prototype.visitUtf8 = assembleFlatListVector;
-VectorAssembler.prototype.visitLargeUtf8 = assembleLargeFlatListVector;
+VectorAssembler.prototype.visitLargeUtf8 = assembleFlatListVector;
 VectorAssembler.prototype.visitBinary = assembleFlatListVector;
 VectorAssembler.prototype.visitFixedSizeBinary = assembleFlatVector;
 VectorAssembler.prototype.visitDate = assembleFlatVector;

From 4ec654497bb14e7ec0fbaead655c129ca61074ff Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@apache.org>
Date: Sun, 17 Dec 2023 22:27:05 -0500
Subject: [PATCH 057/570] GH-39257: [JS] LargeBinary (#39258)

Merge after #39249
* Closes: #39257
---
 docs/source/status.rst                 |  2 +-
 js/src/Arrow.dom.ts                    |  4 +-
 js/src/Arrow.ts                        |  3 +-
 js/src/builder.ts                      |  6 +--
 js/src/builder/largebinary.ts          | 54 ++++++++++++++++++++++++++
 js/src/builder/largeutf8.ts            | 22 ++---------
 js/src/data.ts                         | 15 ++++++-
 js/src/enum.ts                         |  3 +-
 js/src/interfaces.ts                   |  4 ++
 js/src/ipc/metadata/json.ts            |  3 +-
 js/src/ipc/metadata/message.ts         |  3 +-
 js/src/ipc/writer.ts                   |  5 +--
 js/src/type.ts                         | 18 ++++++++-
 js/src/visitor.ts                      |  6 ++-
 js/src/visitor/builderctor.ts          |  2 +
 js/src/visitor/bytelength.ts           | 21 +++++-----
 js/src/visitor/get.ts                  |  8 ++--
 js/src/visitor/indexof.ts              |  4 +-
 js/src/visitor/iterator.ts             |  4 +-
 js/src/visitor/jsontypeassembler.ts    |  3 ++
 js/src/visitor/jsonvectorassembler.ts  | 10 +++--
 js/src/visitor/set.ts                  | 10 ++---
 js/src/visitor/typeassembler.ts        |  5 +++
 js/src/visitor/typecomparator.ts       |  4 +-
 js/src/visitor/typector.ts             |  1 +
 js/src/visitor/vectorassembler.ts      |  6 ++-
 js/src/visitor/vectorloader.ts         |  5 ++-
 js/test/data/tables.ts                 |  2 +-
 js/test/generate-test-data.ts          | 18 ++++++++-
 js/test/unit/builders/builder-tests.ts |  1 +
 js/test/unit/generated-data-tests.ts   |  1 +
 js/test/unit/visitor-tests.ts          |  6 ++-
 32 files changed, 191 insertions(+), 68 deletions(-)
 create mode 100644 js/src/builder/largebinary.ts

diff --git a/docs/source/status.rst b/docs/source/status.rst
index e52e4e4cd49bc..e860aceb76e15 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -62,7 +62,7 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Binary            | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Large Binary      | ✓     | ✓     | ✓     |            |       |  ✓    | ✓     |       |
+| Large Binary      | ✓     | ✓     | ✓     | ✓          |       |  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Utf8              | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
diff --git a/js/src/Arrow.dom.ts b/js/src/Arrow.dom.ts
index 9ec76fdd009f3..cdb4171162f63 100644
--- a/js/src/Arrow.dom.ts
+++ b/js/src/Arrow.dom.ts
@@ -48,7 +48,7 @@ export {
     Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
     Float, Float16, Float32, Float64,
     Utf8, LargeUtf8,
-    Binary,
+    Binary, LargeBinary,
     FixedSizeBinary,
     Date_, DateDay, DateMillisecond,
     Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
@@ -78,7 +78,7 @@ export {
 } from './Arrow.js';
 
 export {
-    BinaryBuilder,
+    BinaryBuilder, LargeBinaryBuilder,
     BoolBuilder,
     DateBuilder, DateDayBuilder, DateMillisecondBuilder,
     DecimalBuilder,
diff --git a/js/src/Arrow.ts b/js/src/Arrow.ts
index b7e5f63a6ab5a..6251a9e77717b 100644
--- a/js/src/Arrow.ts
+++ b/js/src/Arrow.ts
@@ -37,7 +37,7 @@ export {
     Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
     Float, Float16, Float32, Float64,
     Utf8, LargeUtf8,
-    Binary,
+    Binary, LargeBinary,
     FixedSizeBinary,
     Date_, DateDay, DateMillisecond,
     Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
@@ -80,6 +80,7 @@ export { DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder, Dur
 export { Utf8Builder } from './builder/utf8.js';
 export { LargeUtf8Builder } from './builder/largeutf8.js';
 export { BinaryBuilder } from './builder/binary.js';
+export { LargeBinaryBuilder } from './builder/largebinary.js';
 export { ListBuilder } from './builder/list.js';
 export { FixedSizeListBuilder } from './builder/fixedsizelist.js';
 export { MapBuilder } from './builder/map.js';
diff --git a/js/src/builder.ts b/js/src/builder.ts
index 1a4c52f871bbf..a4e2d4d89325c 100644
--- a/js/src/builder.ts
+++ b/js/src/builder.ts
@@ -22,7 +22,7 @@ import {
     DataType, strideForType,
     Float, Int, Decimal, FixedSizeBinary,
     Date_, Time, Timestamp, Interval, Duration,
-    Utf8, LargeUtf8, Binary, List, Map_,
+    Utf8, LargeUtf8, Binary, LargeBinary, List, Map_,
 } from './type.js';
 import { createIsValidFunction } from './builder/valid.js';
 import { BufferBuilder, BitmapBufferBuilder, DataBufferBuilder, OffsetsBufferBuilder } from './builder/buffer.js';
@@ -285,7 +285,7 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
 
         if (typeIds = _typeIds?.flush(length)) { // Unions, DenseUnions
             valueOffsets = _offsets?.flush(length);
-        } else if (valueOffsets = _offsets?.flush(length)) { // Variable-width primitives (Binary, Utf8, LargeUtf8), and Lists
+        } else if (valueOffsets = _offsets?.flush(length)) { // Variable-width primitives (Binary, LargeBinary, Utf8, LargeUtf8), and Lists
             data = _values?.flush(_offsets.last());
         } else { // Fixed-width primitives (Int, Float, Decimal, Time, Timestamp, Duration and Interval)
             data = _values?.flush(length);
@@ -352,7 +352,7 @@ export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary
 }
 
 /** @ignore */
-export abstract class VariableWidthBuilder<T extends Binary | Utf8 | LargeUtf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
+export abstract class VariableWidthBuilder<T extends Binary | LargeBinary | Utf8 | LargeUtf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
     protected _pendingLength = 0;
     protected _offsets: OffsetsBufferBuilder<T>;
     protected _pending: Map<number, any> | undefined;
diff --git a/js/src/builder/largebinary.ts b/js/src/builder/largebinary.ts
new file mode 100644
index 0000000000000..59aa7144d20a1
--- /dev/null
+++ b/js/src/builder/largebinary.ts
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { LargeBinary } from '../type.js';
+import { toUint8Array } from '../util/buffer.js';
+import { BufferBuilder } from './buffer.js';
+import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
+
+/** @ignore */
+export class LargeBinaryBuilder<TNull = any> extends VariableWidthBuilder<LargeBinary, TNull> {
+    constructor(opts: BuilderOptions<LargeBinary, TNull>) {
+        super(opts);
+        this._values = new BufferBuilder(new Uint8Array(0));
+    }
+    public get byteLength(): number {
+        let size = this._pendingLength + (this.length * 4);
+        this._offsets && (size += this._offsets.byteLength);
+        this._values && (size += this._values.byteLength);
+        this._nulls && (size += this._nulls.byteLength);
+        return size;
+    }
+    public setValue(index: number, value: Uint8Array) {
+        return super.setValue(index, toUint8Array(value));
+    }
+    protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number) {
+        const offsets = this._offsets;
+        const data = this._values.reserve(pendingLength).buffer;
+        let offset = 0;
+        for (const [index, value] of pending) {
+            if (value === undefined) {
+                offsets.set(index, BigInt(0));
+            } else {
+                const length = value.length;
+                data.set(value, offset);
+                offsets.set(index, BigInt(length));
+                offset += length;
+            }
+        }
+    }
+}
diff --git a/js/src/builder/largeutf8.ts b/js/src/builder/largeutf8.ts
index fddfeaf8e7b17..51890100095c1 100644
--- a/js/src/builder/largeutf8.ts
+++ b/js/src/builder/largeutf8.ts
@@ -19,6 +19,7 @@ import { LargeUtf8 } from '../type.js';
 import { encodeUtf8 } from '../util/utf8.js';
 import { BufferBuilder } from './buffer.js';
 import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
+import { LargeBinaryBuilder } from './largebinary.js';
 
 /** @ignore */
 export class LargeUtf8Builder<TNull = any> extends VariableWidthBuilder<LargeUtf8, TNull> {
@@ -36,24 +37,9 @@ export class LargeUtf8Builder<TNull = any> extends VariableWidthBuilder<LargeUtf
     public setValue(index: number, value: string) {
         return super.setValue(index, encodeUtf8(value) as any);
     }
+
     // @ts-ignore
-    // TODO: move to largeBinaryBuilder when implemented
-    // protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number): void { }
-    protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number) {
-        const offsets = this._offsets;
-        const data = this._values.reserve(pendingLength).buffer;
-        let offset = 0;
-        for (const [index, value] of pending) {
-            if (value === undefined) {
-                offsets.set(index, BigInt(0));
-            } else {
-                const length = value.length;
-                data.set(value, offset);
-                offsets.set(index, BigInt(length));
-                offset += length;
-            }
-        }
-    }
+    protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number): void { }
 }
 
-// (LargeUtf8Builder.prototype as any)._flushPending = (LargeBinaryBuilder.prototype as any)._flushPending;
+(LargeUtf8Builder.prototype as any)._flushPending = (LargeBinaryBuilder.prototype as any)._flushPending;
diff --git a/js/src/data.ts b/js/src/data.ts
index 145ee9d049cb4..6f8792508858b 100644
--- a/js/src/data.ts
+++ b/js/src/data.ts
@@ -17,7 +17,7 @@
 
 import { Vector } from './vector.js';
 import { BufferType, Type, UnionMode } from './enum.js';
-import { DataType, LargeUtf8, strideForType } from './type.js';
+import { DataType, strideForType } from './type.js';
 import { popcnt_bit_range, truncateBitmap } from './util/bit.js';
 
 // When slicing, we do not know the null count of the sliced range without
@@ -253,7 +253,7 @@ export class Data<T extends DataType = DataType> {
 
 import {
     Dictionary,
-    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
     Float,
     Int,
     Date_,
@@ -324,6 +324,14 @@ class MakeDataVisitor extends Visitor {
         const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props;
         return new Data(type, offset, length, nullCount, [valueOffsets, data, nullBitmap]);
     }
+    public visitLargeBinary<T extends LargeBinary>(props: LargeBinaryDataProps<T>) {
+        const { ['type']: type, ['offset']: offset = 0 } = props;
+        const data = toUint8Array(props['data']);
+        const nullBitmap = toUint8Array(props['nullBitmap']);
+        const valueOffsets = toBigInt64Array(props['valueOffsets']);
+        const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props;
+        return new Data(type, offset, length, nullCount, [valueOffsets, data, nullBitmap]);
+    }
     public visitFixedSizeBinary<T extends FixedSizeBinary>(props: FixedSizeBinaryDataProps<T>) {
         const { ['type']: type, ['offset']: offset = 0 } = props;
         const nullBitmap = toUint8Array(props['nullBitmap']);
@@ -444,6 +452,7 @@ interface IntervalDataProps<T extends Interval> extends DataProps_<T> { data?: D
 interface DurationDataProps<T extends Duration> extends DataProps_<T> { data?: DataBuffer<T> }
 interface FixedSizeBinaryDataProps<T extends FixedSizeBinary> extends DataProps_<T> { data?: DataBuffer<T> }
 interface BinaryDataProps<T extends Binary> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer<T> }
+interface LargeBinaryDataProps<T extends LargeBinary> extends DataProps_<T> { valueOffsets: LargeValueOffsetsBuffer | ValueOffsetsBuffer; data?: DataBuffer<T> }
 interface Utf8DataProps<T extends Utf8> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer<T> }
 interface LargeUtf8DataProps<T extends LargeUtf8> extends DataProps_<T> { valueOffsets: LargeValueOffsetsBuffer | ValueOffsetsBuffer; data?: DataBuffer<T> }
 interface ListDataProps<T extends List> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; child: Data<T['valueType']> }
@@ -468,6 +477,7 @@ export type DataProps<T extends DataType> = (
     T extends Duration /*        */ ? DurationDataProps<T> :
     T extends FixedSizeBinary /* */ ? FixedSizeBinaryDataProps<T> :
     T extends Binary /*          */ ? BinaryDataProps<T> :
+    T extends LargeBinary /*     */ ? LargeBinaryDataProps<T> :
     T extends Utf8 /*            */ ? Utf8DataProps<T> :
     T extends LargeUtf8 /*       */ ? LargeUtf8DataProps<T> :
     T extends List /*            */ ? ListDataProps<T> :
@@ -495,6 +505,7 @@ export function makeData<T extends Interval>(props: IntervalDataProps<T>): Data<
 export function makeData<T extends Duration>(props: DurationDataProps<T>): Data<T>;
 export function makeData<T extends FixedSizeBinary>(props: FixedSizeBinaryDataProps<T>): Data<T>;
 export function makeData<T extends Binary>(props: BinaryDataProps<T>): Data<T>;
+export function makeData<T extends LargeBinary>(props: LargeBinaryDataProps<T>): Data<T>;
 export function makeData<T extends Utf8>(props: Utf8DataProps<T>): Data<T>;
 export function makeData<T extends LargeUtf8>(props: LargeUtf8DataProps<T>): Data<T>;
 export function makeData<T extends List>(props: ListDataProps<T>): Data<T>;
diff --git a/js/src/enum.ts b/js/src/enum.ts
index 764ea64e63338..0eecc0c68b525 100644
--- a/js/src/enum.ts
+++ b/js/src/enum.ts
@@ -173,7 +173,8 @@ export enum Type {
     FixedSizeBinary = 15, /** Fixed-size binary. Each value occupies the same number of bytes */
     FixedSizeList = 16, /** Fixed-size list. Each value occupies the same number of bytes */
     Map = 17, /** Map of named logical types */
-    Duration = 18, /** Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds. */
+    Duration = 18, /** Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds */
+    LargeBinary = 19, /** Large variable-length bytes (no guarantee of UTF8-ness) */
     LargeUtf8 = 20, /** Large variable-length string as List<Char> */
 
     Dictionary = -1, /** Dictionary aka Category type */
diff --git a/js/src/interfaces.ts b/js/src/interfaces.ts
index 707d01bb14cca..c4119a8bd287a 100644
--- a/js/src/interfaces.ts
+++ b/js/src/interfaces.ts
@@ -35,6 +35,7 @@ import type { DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder
 import type { Utf8Builder } from './builder/utf8.js';
 import type { LargeUtf8Builder } from './builder/largeutf8.js';
 import type { BinaryBuilder } from './builder/binary.js';
+import type { LargeBinaryBuilder } from './builder/largebinary.js';
 import type { ListBuilder } from './builder/list.js';
 import type { FixedSizeListBuilder } from './builder/fixedsizelist.js';
 import type { MapBuilder } from './builder/map.js';
@@ -210,6 +211,7 @@ export type TypeToDataType<T extends Type> = {
     [Type.Utf8]: type.Utf8;
     [Type.LargeUtf8]: type.LargeUtf8;
     [Type.Binary]: type.Binary;
+    [Type.LargeBinary]: type.LargeBinary;
     [Type.FixedSizeBinary]: type.FixedSizeBinary;
     [Type.Date]: type.Date_;
     [Type.DateDay]: type.DateDay;
@@ -264,6 +266,7 @@ type TypeToBuilder<T extends Type = any, TNull = any> = {
     [Type.Utf8]: Utf8Builder<TNull>;
     [Type.LargeUtf8]: LargeUtf8Builder<TNull>;
     [Type.Binary]: BinaryBuilder<TNull>;
+    [Type.LargeBinary]: LargeBinaryBuilder<TNull>;
     [Type.FixedSizeBinary]: FixedSizeBinaryBuilder<TNull>;
     [Type.Date]: DateBuilder<any, TNull>;
     [Type.DateDay]: DateDayBuilder<TNull>;
@@ -318,6 +321,7 @@ type DataTypeToBuilder<T extends DataType = any, TNull = any> = {
     [Type.Utf8]: T extends type.Utf8 ? Utf8Builder<TNull> : never;
     [Type.LargeUtf8]: T extends type.LargeUtf8 ? LargeUtf8Builder<TNull> : never;
     [Type.Binary]: T extends type.Binary ? BinaryBuilder<TNull> : never;
+    [Type.LargeBinary]: T extends type.LargeBinary ? LargeBinaryBuilder<TNull> : never;
     [Type.FixedSizeBinary]: T extends type.FixedSizeBinary ? FixedSizeBinaryBuilder<TNull> : never;
     [Type.Date]: T extends type.Date_ ? DateBuilder<T, TNull> : never;
     [Type.DateDay]: T extends type.DateDay ? DateDayBuilder<TNull> : never;
diff --git a/js/src/ipc/metadata/json.ts b/js/src/ipc/metadata/json.ts
index b669c0c612f8a..8dc81ced3ffd1 100644
--- a/js/src/ipc/metadata/json.ts
+++ b/js/src/ipc/metadata/json.ts
@@ -20,7 +20,7 @@
 import { Schema, Field } from '../../schema.js';
 import {
     DataType, Dictionary, TimeBitWidth,
-    Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary,
+    Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary,
     List, FixedSizeList, Map_, Struct, Union,
     Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, Duration,
 } from '../../type.js';
@@ -149,6 +149,7 @@ function typeFromJSON(f: any, children?: Field[]): DataType<any> {
         case 'NONE': return new Null();
         case 'null': return new Null();
         case 'binary': return new Binary();
+        case 'largebinary': return new LargeBinary();
         case 'utf8': return new Utf8();
         case 'largeutf8': return new LargeUtf8();
         case 'bool': return new Bool();
diff --git a/js/src/ipc/metadata/message.ts b/js/src/ipc/metadata/message.ts
index cf05bff54cfba..552c4d846e863 100644
--- a/js/src/ipc/metadata/message.ts
+++ b/js/src/ipc/metadata/message.ts
@@ -56,7 +56,7 @@ import ByteBuffer = flatbuffers.ByteBuffer;
 
 import {
     DataType, Dictionary, TimeBitWidth,
-    Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary,
+    Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary,
     List, FixedSizeList, Map_, Struct, Union,
     Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, Duration,
 } from '../../type.js';
@@ -432,6 +432,7 @@ function decodeFieldType(f: _Field, children?: Field[]): DataType<any> {
         case Type['NONE']: return new Null();
         case Type['Null']: return new Null();
         case Type['Binary']: return new Binary();
+        case Type['LargeBinary']: return new LargeBinary();
         case Type['Utf8']: return new Utf8();
         case Type['LargeUtf8']: return new LargeUtf8();
         case Type['Bool']: return new Bool();
diff --git a/js/src/ipc/writer.ts b/js/src/ipc/writer.ts
index 54b4b0249e420..565b0825bd9be 100644
--- a/js/src/ipc/writer.ts
+++ b/js/src/ipc/writer.ts
@@ -391,7 +391,7 @@ export class RecordBatchJSONWriter<T extends TypeMap = any> extends RecordBatchW
     protected _writeDictionaryBatch(dictionary: Data, id: number, isDelta = false) {
         this._dictionaryDeltaOffsets.set(id, dictionary.length + (this._dictionaryDeltaOffsets.get(id) || 0));
         this._write(this._dictionaryBlocks.length === 0 ? `    ` : `,\n    `);
-        this._write(`${dictionaryBatchToJSON(dictionary, id, isDelta)}`);
+        this._write(dictionaryBatchToJSON(dictionary, id, isDelta));
         this._dictionaryBlocks.push(new FileBlock(0, 0, 0));
         return this;
     }
@@ -401,7 +401,6 @@ export class RecordBatchJSONWriter<T extends TypeMap = any> extends RecordBatchW
         return this;
     }
     public close() {
-
         if (this._dictionaries.length > 0) {
             this._write(`,\n  "dictionaries": [\n`);
             for (const batch of this._dictionaries) {
@@ -413,7 +412,7 @@ export class RecordBatchJSONWriter<T extends TypeMap = any> extends RecordBatchW
         if (this._recordBatches.length > 0) {
             for (let i = -1, n = this._recordBatches.length; ++i < n;) {
                 this._write(i === 0 ? `,\n  "batches": [\n    ` : `,\n    `);
-                this._write(`${recordBatchToJSON(this._recordBatches[i])}`);
+                this._write(recordBatchToJSON(this._recordBatches[i]));
                 this._recordBatchBlocks.push(new FileBlock(0, 0, 0));
             }
             this._write(`\n  ]`);
diff --git a/js/src/type.ts b/js/src/type.ts
index 6223d0316f17a..dea5301aed355 100644
--- a/js/src/type.ts
+++ b/js/src/type.ts
@@ -58,6 +58,7 @@ export abstract class DataType<TType extends Type = Type, TChildren extends Type
     /** @nocollapse */ static isInt(x: any): x is Int_ { return x?.typeId === Type.Int; }
     /** @nocollapse */ static isFloat(x: any): x is Float { return x?.typeId === Type.Float; }
     /** @nocollapse */ static isBinary(x: any): x is Binary { return x?.typeId === Type.Binary; }
+    /** @nocollapse */ static isLargeBinary(x: any): x is LargeBinary { return x?.typeId === Type.LargeBinary; }
     /** @nocollapse */ static isUtf8(x: any): x is Utf8 { return x?.typeId === Type.Utf8; }
     /** @nocollapse */ static isLargeUtf8(x: any): x is LargeUtf8 { return x?.typeId === Type.LargeUtf8; }
     /** @nocollapse */ static isBool(x: any): x is Bool { return x?.typeId === Type.Bool; }
@@ -250,6 +251,22 @@ export class Binary extends DataType<Type.Binary> {
     })(Binary.prototype);
 }
 
+/** @ignore */
+export interface LargeBinary extends DataType<Type.LargeBinary> { TArray: Uint8Array; TOffsetArray: BigInt64Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array>; OffsetArrayType: BigIntArrayConstructor<BigInt64Array> }
+/** @ignore */
+export class LargeBinary extends DataType<Type.LargeBinary> {
+    constructor() {
+        super();
+    }
+    public get typeId() { return Type.LargeBinary as Type.LargeBinary; }
+    public toString() { return `LargeBinary`; }
+    protected static [Symbol.toStringTag] = ((proto: LargeBinary) => {
+        (<any>proto).ArrayType = Uint8Array;
+        (<any>proto).OffsetArrayType = BigInt64Array;
+        return proto[Symbol.toStringTag] = 'LargeBinary';
+    })(LargeBinary.prototype);
+}
+
 /** @ignore */
 export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TOffsetArray: Int32Array; TValue: string; ArrayType: TypedArrayConstructor<Uint8Array>; OffsetArrayType: TypedArrayConstructor<Int32Array> }
 /** @ignore */
@@ -601,7 +618,6 @@ export class FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
     protected static [Symbol.toStringTag] = ((proto: FixedSizeBinary) => {
         (<any>proto).byteWidth = null;
         (<any>proto).ArrayType = Uint8Array;
-        (<any>proto).OffsetArrayType = Int32Array;
         return proto[Symbol.toStringTag] = 'FixedSizeBinary';
     })(FixedSizeBinary.prototype);
 }
diff --git a/js/src/visitor.ts b/js/src/visitor.ts
index 5b3cc4d3d0593..2fb5e7e14bc22 100644
--- a/js/src/visitor.ts
+++ b/js/src/visitor.ts
@@ -38,6 +38,7 @@ export abstract class Visitor {
     public visitUtf8(_node: any, ..._args: any[]): any { return null; }
     public visitLargeUtf8(_node: any, ..._args: any[]): any { return null; }
     public visitBinary(_node: any, ..._args: any[]): any { return null; }
+    public visitLargeBinary(_node: any, ..._args: any[]): any { return null; }
     public visitFixedSizeBinary(_node: any, ..._args: any[]): any { return null; }
     public visitDate(_node: any, ..._args: any[]): any { return null; }
     public visitTimestamp(_node: any, ..._args: any[]): any { return null; }
@@ -48,7 +49,7 @@ export abstract class Visitor {
     public visitUnion(_node: any, ..._args: any[]): any { return null; }
     public visitDictionary(_node: any, ..._args: any[]): any { return null; }
     public visitInterval(_node: any, ..._args: any[]): any { return null; }
-    public visitDuration(_node: any, ... _args: any[]): any { return null; }
+    public visitDuration(_node: any, ..._args: any[]): any { return null; }
     public visitFixedSizeList(_node: any, ..._args: any[]): any { return null; }
     public visitMap(_node: any, ..._args: any[]): any { return null; }
 }
@@ -92,6 +93,7 @@ function getVisitFnByTypeId(visitor: Visitor, dtype: Type, throwIfNotFound = tru
         case Type.Utf8: fn = visitor.visitUtf8; break;
         case Type.LargeUtf8: fn = visitor.visitLargeUtf8; break;
         case Type.Binary: fn = visitor.visitBinary; break;
+        case Type.LargeBinary: fn = visitor.visitLargeBinary; break;
         case Type.FixedSizeBinary: fn = visitor.visitFixedSizeBinary; break;
         case Type.Date: fn = visitor.visitDate; break;
         case Type.DateDay: fn = visitor.visitDateDay || visitor.visitDate; break;
@@ -153,6 +155,7 @@ function inferDType<T extends DataType>(type: T): Type {
             // @ts-ignore
             return Type.Float;
         case Type.Binary: return Type.Binary;
+        case Type.LargeBinary: return Type.LargeBinary;
         case Type.Utf8: return Type.Utf8;
         case Type.LargeUtf8: return Type.LargeUtf8;
         case Type.Bool: return Type.Bool;
@@ -234,6 +237,7 @@ export interface Visitor {
     visitUtf8(node: any, ...args: any[]): any;
     visitLargeUtf8(node: any, ...args: any[]): any;
     visitBinary(node: any, ...args: any[]): any;
+    visitLargeBinary(node: any, ...args: any[]): any;
     visitFixedSizeBinary(node: any, ...args: any[]): any;
     visitDate(node: any, ...args: any[]): any;
     visitDateDay?(node: any, ...args: any[]): any;
diff --git a/js/src/visitor/builderctor.ts b/js/src/visitor/builderctor.ts
index 83374712b2642..5b3758c4e0cbc 100644
--- a/js/src/visitor/builderctor.ts
+++ b/js/src/visitor/builderctor.ts
@@ -22,6 +22,7 @@ import { DataType } from '../type.js';
 import { Visitor } from '../visitor.js';
 import { BuilderCtor } from '../interfaces.js';
 import { BinaryBuilder } from '../builder/binary.js';
+import { LargeBinaryBuilder } from '../builder/largebinary.js';
 import { BoolBuilder } from '../builder/bool.js';
 import { DateBuilder, DateDayBuilder, DateMillisecondBuilder } from '../builder/date.js';
 import { DecimalBuilder } from '../builder/decimal.js';
@@ -70,6 +71,7 @@ export class GetBuilderCtor extends Visitor {
     public visitUtf8() { return Utf8Builder; }
     public visitLargeUtf8() { return LargeUtf8Builder; }
     public visitBinary() { return BinaryBuilder; }
+    public visitLargeBinary() { return LargeBinaryBuilder; }
     public visitFixedSizeBinary() { return FixedSizeBinaryBuilder; }
     public visitDate() { return DateBuilder; }
     public visitDateDay() { return DateDayBuilder; }
diff --git a/js/src/visitor/bytelength.ts b/js/src/visitor/bytelength.ts
index c3bfadd50e155..43399b2571fe2 100644
--- a/js/src/visitor/bytelength.ts
+++ b/js/src/visitor/bytelength.ts
@@ -26,9 +26,10 @@ import { Type, TimeUnit, UnionMode } from '../enum.js';
 import {
     DataType, Dictionary,
     Float, Int, Date_, Interval, Time, Timestamp, Duration,
-    Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary,
+    Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary,
     List, FixedSizeList, Map_, Struct, Union, DenseUnion, SparseUnion,
 } from '../type.js';
+import { bigIntToNumber } from '../util/bigint.js';
 
 /** @ignore */ const sum = (x: number, y: number) => x + y;
 
@@ -39,6 +40,7 @@ export interface GetByteLengthVisitor extends Visitor {
     getVisitFn<T extends DataType>(node: Data<T> | T): (data: Data<T>, index: number) => number;
     getVisitFn<T extends Type>(node: T): (data: Data<TypeToDataType<T>>, index: number) => number;
     visitBinary<T extends Binary>(data: Data<T>, index: number): number;
+    visitLargeBinary<T extends LargeBinary>(data: Data<T>, index: number): number;
     visitUtf8<T extends Utf8>(data: Data<T>, index: number): number;
     visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, index: number): number;
     visitList<T extends List>(data: Data<T>, index: number): number;
@@ -95,22 +97,15 @@ export class GetByteLengthVisitor extends Visitor {
 }
 
 /** @ignore */
-const getUtf8ByteLength = <T extends Utf8>({ valueOffsets }: Data<T>, index: number): number => {
+const getBinaryByteLength = <T extends Binary | LargeBinary | Utf8 | LargeUtf8>({ valueOffsets }: Data<T>, index: number): number => {
     // 4 + 4 for the indices, `end - start` for the data bytes
-    return 8 + (valueOffsets[index + 1] - valueOffsets[index]);
-};
-
-/** @ignore */
-const getBinaryByteLength = <T extends Binary>({ valueOffsets }: Data<T>, index: number): number => {
-    // 4 + 4 for the indices, `end - start` for the data bytes
-    return 8 + (valueOffsets[index + 1] - valueOffsets[index]);
+    return 8 + bigIntToNumber(valueOffsets[index + 1]) - bigIntToNumber(valueOffsets[index]);
 };
 
 /** @ignore */
 const getListByteLength = <T extends List>({ valueOffsets, stride, children }: Data<T>, index: number): number => {
     const child: Data<T['valueType']> = children[0];
-    const { [index * stride]: start } = valueOffsets;
-    const { [index * stride + 1]: end } = valueOffsets;
+    const { [index * stride]: start, [index * stride + 1]: end } = valueOffsets;
     const visit = instance.getVisitFn(child.type);
     const slice = child.slice(start, end - start);
     let size = 8; // 4 + 4 for the indices
@@ -155,8 +150,10 @@ const getSparseUnionByteLength = <T extends SparseUnion>({ children }: Data<T>,
     return 4 + instance.visitMany(children, children.map(() => index)).reduce(sum, 0);
 };
 
-GetByteLengthVisitor.prototype.visitUtf8 = getUtf8ByteLength;
+GetByteLengthVisitor.prototype.visitUtf8 = getBinaryByteLength;
+GetByteLengthVisitor.prototype.visitLargeUtf8 = getBinaryByteLength;
 GetByteLengthVisitor.prototype.visitBinary = getBinaryByteLength;
+GetByteLengthVisitor.prototype.visitLargeBinary = getBinaryByteLength;
 GetByteLengthVisitor.prototype.visitList = getListByteLength;
 GetByteLengthVisitor.prototype.visitFixedSizeList = getFixedSizeListByteLength;
 GetByteLengthVisitor.prototype.visitUnion = getUnionByteLength;
diff --git a/js/src/visitor/get.ts b/js/src/visitor/get.ts
index 112d2f2983e53..3ab3bcb68c386 100644
--- a/js/src/visitor/get.ts
+++ b/js/src/visitor/get.ts
@@ -28,7 +28,7 @@ import { uint16ToFloat64 } from '../util/math.js';
 import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum.js';
 import {
     DataType, Dictionary,
-    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
     Float, Float16, Float32, Float64,
     Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
     Date_, DateDay, DateMillisecond,
@@ -36,7 +36,7 @@ import {
     Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
     Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
     Duration, DurationSecond, DurationMillisecond, DurationMicrosecond, DurationNanosecond,
-    Union, DenseUnion, SparseUnion, LargeUtf8,
+    Union, DenseUnion, SparseUnion,
 } from '../type.js';
 
 /** @ignore */
@@ -63,6 +63,7 @@ export interface GetVisitor extends Visitor {
     visitUtf8<T extends Utf8>(data: Data<T>, index: number): T['TValue'] | null;
     visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, index: number): T['TValue'] | null;
     visitBinary<T extends Binary>(data: Data<T>, index: number): T['TValue'] | null;
+    visitLargeBinary<T extends LargeBinary>(data: Data<T>, index: number): T['TValue'] | null;
     visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>, index: number): T['TValue'] | null;
     visitDate<T extends Date_>(data: Data<T>, index: number): T['TValue'] | null;
     visitDateDay<T extends DateDay>(data: Data<T>, index: number): T['TValue'] | null;
@@ -151,7 +152,7 @@ const getBigInts = <T extends Numeric2X>({ values }: Data<T>, index: number): T[
 const getFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: Data<T>, index: number): T['TValue'] => values.subarray(stride * index, stride * (index + 1));
 
 /** @ignore */
-const getBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index);
+const getBinary = <T extends Binary | LargeBinary>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index);
 /** @ignore */
 const getUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }: Data<T>, index: number): T['TValue'] => {
     const bytes = getVariableWidthBytes(values, valueOffsets, index);
@@ -332,6 +333,7 @@ GetVisitor.prototype.visitFloat64 = wrapGet(getNumeric);
 GetVisitor.prototype.visitUtf8 = wrapGet(getUtf8);
 GetVisitor.prototype.visitLargeUtf8 = wrapGet(getUtf8);
 GetVisitor.prototype.visitBinary = wrapGet(getBinary);
+GetVisitor.prototype.visitLargeBinary = wrapGet(getBinary);
 GetVisitor.prototype.visitFixedSizeBinary = wrapGet(getFixedSizeBinary);
 GetVisitor.prototype.visitDate = wrapGet(getDate);
 GetVisitor.prototype.visitDateDay = wrapGet(getDateDay);
diff --git a/js/src/visitor/indexof.ts b/js/src/visitor/indexof.ts
index 76f95788c7953..1e1cb87a9840e 100644
--- a/js/src/visitor/indexof.ts
+++ b/js/src/visitor/indexof.ts
@@ -24,7 +24,7 @@ import { getBool, BitIterator } from '../util/bit.js';
 import { createElementComparator } from '../util/vector.js';
 import {
     DataType, Dictionary,
-    Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
     Float, Float16, Float32, Float64,
     Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
     Date_, DateDay, DateMillisecond,
@@ -59,6 +59,7 @@ export interface IndexOfVisitor extends Visitor {
     visitUtf8<T extends Utf8>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
     visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
     visitBinary<T extends Binary>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
+    visitLargeBinary<T extends LargeBinary>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
     visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
     visitDate<T extends Date_>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
     visitDateDay<T extends DateDay>(data: Data<T>, value: T['TValue'] | null, index?: number): number;
@@ -175,6 +176,7 @@ IndexOfVisitor.prototype.visitFloat64 = indexOfValue;
 IndexOfVisitor.prototype.visitUtf8 = indexOfValue;
 IndexOfVisitor.prototype.visitLargeUtf8 = indexOfValue;
 IndexOfVisitor.prototype.visitBinary = indexOfValue;
+IndexOfVisitor.prototype.visitLargeBinary = indexOfValue;
 IndexOfVisitor.prototype.visitFixedSizeBinary = indexOfValue;
 IndexOfVisitor.prototype.visitDate = indexOfValue;
 IndexOfVisitor.prototype.visitDateDay = indexOfValue;
diff --git a/js/src/visitor/iterator.ts b/js/src/visitor/iterator.ts
index 09dfcb0b565ae..bf7e9d1591b40 100644
--- a/js/src/visitor/iterator.ts
+++ b/js/src/visitor/iterator.ts
@@ -21,7 +21,7 @@ import { Type, Precision } from '../enum.js';
 import { TypeToDataType } from '../interfaces.js';
 import {
     DataType, Dictionary,
-    Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
     Float, Float16, Float32, Float64,
     Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
     Date_, DateDay, DateMillisecond,
@@ -57,6 +57,7 @@ export interface IteratorVisitor extends Visitor {
     visitUtf8<T extends Utf8>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
     visitLargeUtf8<T extends LargeUtf8>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
     visitBinary<T extends Binary>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
+    visitLargeBinary<T extends LargeBinary>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
     visitFixedSizeBinary<T extends FixedSizeBinary>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
     visitDate<T extends Date_>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
     visitDateDay<T extends DateDay>(vector: Vector<T>): IterableIterator<T['TValue'] | null>;
@@ -161,6 +162,7 @@ IteratorVisitor.prototype.visitFloat64 = vectorIterator;
 IteratorVisitor.prototype.visitUtf8 = vectorIterator;
 IteratorVisitor.prototype.visitLargeUtf8 = vectorIterator;
 IteratorVisitor.prototype.visitBinary = vectorIterator;
+IteratorVisitor.prototype.visitLargeBinary = vectorIterator;
 IteratorVisitor.prototype.visitFixedSizeBinary = vectorIterator;
 IteratorVisitor.prototype.visitDate = vectorIterator;
 IteratorVisitor.prototype.visitDateDay = vectorIterator;
diff --git a/js/src/visitor/jsontypeassembler.ts b/js/src/visitor/jsontypeassembler.ts
index a6746a858ecb4..823b1dea104c8 100644
--- a/js/src/visitor/jsontypeassembler.ts
+++ b/js/src/visitor/jsontypeassembler.ts
@@ -42,6 +42,9 @@ export class JSONTypeAssembler extends Visitor {
     public visitBinary<T extends type.Binary>({ typeId }: T) {
         return { 'name': ArrowType[typeId].toLowerCase() };
     }
+    public visitLargeBinary<T extends type.LargeBinary>({ typeId }: T) {
+        return { 'name': ArrowType[typeId].toLowerCase() };
+    }
     public visitBool<T extends type.Bool>({ typeId }: T) {
         return { 'name': ArrowType[typeId].toLowerCase() };
     }
diff --git a/js/src/visitor/jsonvectorassembler.ts b/js/src/visitor/jsonvectorassembler.ts
index 9a3cb8601a434..88699d8f168c2 100644
--- a/js/src/visitor/jsonvectorassembler.ts
+++ b/js/src/visitor/jsonvectorassembler.ts
@@ -27,7 +27,7 @@ import { BitIterator, getBit, getBool } from '../util/bit.js';
 import {
     DataType,
     Float, Int, Date_, Interval, Time, Timestamp, Union, Duration,
-    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, IntArray, LargeUtf8,
+    Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, IntArray,
 } from '../type.js';
 
 /** @ignore */
@@ -44,6 +44,7 @@ export interface JSONVectorAssembler extends Visitor {
     visitUtf8<T extends Utf8>(data: Data<T>): { DATA: string[]; OFFSET: number[] };
     visitLargeUtf8<T extends LargeUtf8>(data: Data<T>): { DATA: string[]; OFFSET: string[] };
     visitBinary<T extends Binary>(data: Data<T>): { DATA: string[]; OFFSET: number[] };
+    visitLargeBinary<T extends LargeBinary>(data: Data<T>): { DATA: string[]; OFFSET: string[] };
     visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>): { DATA: string[] };
     visitDate<T extends Date_>(data: Data<T>): { DATA: number[] };
     visitTimestamp<T extends Timestamp>(data: Data<T>): { DATA: string[] };
@@ -105,7 +106,10 @@ export class JSONVectorAssembler extends Visitor {
         return { 'DATA': [...new Vector([data])], 'OFFSET': [...bigNumsToStrings(data.valueOffsets, 2)] };
     }
     public visitBinary<T extends Binary>(data: Data<T>) {
-        return { 'DATA': [...binaryToString(new Vector([data]))], OFFSET: [...data.valueOffsets] };
+        return { 'DATA': [...binaryToString(new Vector([data]))], 'OFFSET': [...data.valueOffsets] };
+    }
+    public visitLargeBinary<T extends LargeBinary>(data: Data<T>) {
+        return { 'DATA': [...binaryToString(new Vector([data]))], 'OFFSET': [...bigNumsToStrings(data.valueOffsets, 2)] };
     }
     public visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>) {
         return { 'DATA': [...binaryToString(new Vector([data]))] };
@@ -168,7 +172,7 @@ export class JSONVectorAssembler extends Visitor {
 }
 
 /** @ignore */
-function* binaryToString(vector: Vector<Binary> | Vector<FixedSizeBinary>) {
+function* binaryToString(vector: Vector<Binary> | Vector<LargeBinary> | Vector<FixedSizeBinary>) {
     for (const octets of vector as Iterable<Uint8Array>) {
         yield octets.reduce((str, byte) => {
             return `${str}${('0' + (byte & 0xFF).toString(16)).slice(-2)}`;
diff --git a/js/src/visitor/set.ts b/js/src/visitor/set.ts
index 15b0721660f55..eb1f280964c8e 100644
--- a/js/src/visitor/set.ts
+++ b/js/src/visitor/set.ts
@@ -26,7 +26,7 @@ import { float64ToUint16 } from '../util/math.js';
 import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum.js';
 import {
     DataType, Dictionary,
-    Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
     Float, Float16, Float32, Float64,
     Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
     Date_, DateDay, DateMillisecond,
@@ -61,6 +61,7 @@ export interface SetVisitor extends Visitor {
     visitUtf8<T extends Utf8>(data: Data<T>, index: number, value: T['TValue']): void;
     visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, index: number, value: T['TValue']): void;
     visitBinary<T extends Binary>(data: Data<T>, index: number, value: T['TValue']): void;
+    visitLargeBinary<T extends LargeBinary>(data: Data<T>, index: number, value: T['TValue']): void;
     visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>, index: number, value: T['TValue']): void;
     visitDate<T extends Date_>(data: Data<T>, index: number, value: T['TValue']): void;
     visitDateDay<T extends DateDay>(data: Data<T>, index: number, value: T['TValue']): void;
@@ -165,11 +166,9 @@ export const setDateMillisecond = <T extends DateMillisecond>({ values }: Data<T
 export const setFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: Data<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, stride), stride * index); };
 
 /** @ignore */
-const setBinary = <T extends Binary>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets, index, value);
+const setBinary = <T extends Binary | LargeBinary>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets, index, value);
 /** @ignore */
-const setUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => {
-    setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
-};
+const setUtf8 = <T extends Utf8 | LargeUtf8>({ values, valueOffsets }: Data<T>, index: number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
 
 /* istanbul ignore next */
 export const setDate = <T extends Date_>(data: Data<T>, index: number, value: T['TValue']): void => {
@@ -370,6 +369,7 @@ SetVisitor.prototype.visitFloat64 = wrapSet(setFloat);
 SetVisitor.prototype.visitUtf8 = wrapSet(setUtf8);
 SetVisitor.prototype.visitLargeUtf8 = wrapSet(setUtf8);
 SetVisitor.prototype.visitBinary = wrapSet(setBinary);
+SetVisitor.prototype.visitLargeBinary = wrapSet(setBinary);
 SetVisitor.prototype.visitFixedSizeBinary = wrapSet(setFixedSizeBinary);
 SetVisitor.prototype.visitDate = wrapSet(setDate);
 SetVisitor.prototype.visitDateDay = wrapSet(setDateDay);
diff --git a/js/src/visitor/typeassembler.ts b/js/src/visitor/typeassembler.ts
index f072714222739..169f3627a4002 100644
--- a/js/src/visitor/typeassembler.ts
+++ b/js/src/visitor/typeassembler.ts
@@ -25,6 +25,7 @@ import { Null } from '../fb/null.js';
 import { Int } from '../fb/int.js';
 import { FloatingPoint } from '../fb/floating-point.js';
 import { Binary } from '../fb/binary.js';
+import { LargeBinary } from '../fb/large-binary.js';
 import { Bool } from '../fb/bool.js';
 import { Utf8 } from '../fb/utf8.js';
 import { LargeUtf8 } from '../fb/large-utf8.js';
@@ -71,6 +72,10 @@ export class TypeAssembler extends Visitor {
         Binary.startBinary(b);
         return Binary.endBinary(b);
     }
+    public visitLargeBinary<T extends type.LargeBinary>(_node: T, b: Builder) {
+        LargeBinary.startLargeBinary(b);
+        return LargeBinary.endLargeBinary(b);
+    }
     public visitBool<T extends type.Bool>(_node: T, b: Builder) {
         Bool.startBool(b);
         return Bool.endBool(b);
diff --git a/js/src/visitor/typecomparator.ts b/js/src/visitor/typecomparator.ts
index 2417dec09c6e9..a113f2ea31e8d 100644
--- a/js/src/visitor/typecomparator.ts
+++ b/js/src/visitor/typecomparator.ts
@@ -21,7 +21,7 @@ import { Visitor } from '../visitor.js';
 import { Schema, Field } from '../schema.js';
 import {
     DataType, TypeMap, Dictionary,
-    Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
     Float, Float16, Float32, Float64,
     Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
     Date_, DateDay, DateMillisecond,
@@ -55,6 +55,7 @@ export interface TypeComparator extends Visitor {
     visitUtf8<T extends Utf8>(type: T, other?: DataType | null): other is T;
     visitLargeUtf8<T extends LargeUtf8>(type: T, other?: DataType | null): other is T;
     visitBinary<T extends Binary>(type: T, other?: DataType | null): other is T;
+    visitLargeBinary<T extends LargeBinary>(type: T, other?: DataType | null): other is T;
     visitFixedSizeBinary<T extends FixedSizeBinary>(type: T, other?: DataType | null): other is T;
     visitDate<T extends Date_>(type: T, other?: DataType | null): other is T;
     visitDateDay<T extends DateDay>(type: T, other?: DataType | null): other is T;
@@ -252,6 +253,7 @@ TypeComparator.prototype.visitFloat64 = compareFloat;
 TypeComparator.prototype.visitUtf8 = compareAny;
 TypeComparator.prototype.visitLargeUtf8 = compareAny;
 TypeComparator.prototype.visitBinary = compareAny;
+TypeComparator.prototype.visitLargeBinary = compareAny;
 TypeComparator.prototype.visitFixedSizeBinary = compareFixedSizeBinary;
 TypeComparator.prototype.visitDate = compareDate;
 TypeComparator.prototype.visitDateDay = compareDate;
diff --git a/js/src/visitor/typector.ts b/js/src/visitor/typector.ts
index 2e0bbc4147abb..a781b5fb14fcc 100644
--- a/js/src/visitor/typector.ts
+++ b/js/src/visitor/typector.ts
@@ -51,6 +51,7 @@ export class GetDataTypeConstructor extends Visitor {
     public visitUtf8() { return type.Utf8; }
     public visitLargeUtf8() { return type.LargeUtf8; }
     public visitBinary() { return type.Binary; }
+    public visitLargeBinary() { return type.LargeBinary; }
     public visitFixedSizeBinary() { return type.FixedSizeBinary; }
     public visitDate() { return type.Date_; }
     public visitDateDay() { return type.DateDay; }
diff --git a/js/src/visitor/vectorassembler.ts b/js/src/visitor/vectorassembler.ts
index df820e6f5e00c..7dc3695582dd7 100644
--- a/js/src/visitor/vectorassembler.ts
+++ b/js/src/visitor/vectorassembler.ts
@@ -27,7 +27,7 @@ import { BufferRegion, FieldNode } from '../ipc/metadata/message.js';
 import {
     DataType, Dictionary,
     Float, Int, Date_, Interval, Time, Timestamp, Union, Duration,
-    Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct, LargeUtf8,
+    Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
 } from '../type.js';
 import { bigIntToNumber } from '../util/bigint.js';
 
@@ -44,6 +44,7 @@ export interface VectorAssembler extends Visitor {
     visitUtf8<T extends Utf8>(data: Data<T>): this;
     visitLargeUtf8<T extends LargeUtf8>(data: Data<T>): this;
     visitBinary<T extends Binary>(data: Data<T>): this;
+    visitLargeBinary<T extends LargeBinary>(data: Data<T>): this;
     visitFixedSizeBinary<T extends FixedSizeBinary>(data: Data<T>): this;
     visitDate<T extends Date_>(data: Data<T>): this;
     visitTimestamp<T extends Timestamp>(data: Data<T>): this;
@@ -203,7 +204,7 @@ function assembleFlatVector<T extends Int | Float | FixedSizeBinary | Date_ | Ti
 }
 
 /** @ignore */
-function assembleFlatListVector<T extends Utf8 | Binary | LargeUtf8>(this: VectorAssembler, data: Data<T>) {
+function assembleFlatListVector<T extends Utf8 | LargeUtf8 | Binary | LargeBinary>(this: VectorAssembler, data: Data<T>) {
     const { length, values, valueOffsets } = data;
     const begin = bigIntToNumber(valueOffsets[0]);
     const end = bigIntToNumber(valueOffsets[length]);
@@ -239,6 +240,7 @@ VectorAssembler.prototype.visitFloat = assembleFlatVector;
 VectorAssembler.prototype.visitUtf8 = assembleFlatListVector;
 VectorAssembler.prototype.visitLargeUtf8 = assembleFlatListVector;
 VectorAssembler.prototype.visitBinary = assembleFlatListVector;
+VectorAssembler.prototype.visitLargeBinary = assembleFlatListVector;
 VectorAssembler.prototype.visitFixedSizeBinary = assembleFlatVector;
 VectorAssembler.prototype.visitDate = assembleFlatVector;
 VectorAssembler.prototype.visitTimestamp = assembleFlatVector;
diff --git a/js/src/visitor/vectorloader.ts b/js/src/visitor/vectorloader.ts
index 35f28f49baada..c9c016d6b463c 100644
--- a/js/src/visitor/vectorloader.ts
+++ b/js/src/visitor/vectorloader.ts
@@ -77,6 +77,9 @@ export class VectorLoader extends Visitor {
     public visitBinary<T extends type.Binary>(type: T, { length, nullCount } = this.nextFieldNode()) {
         return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), data: this.readData(type) });
     }
+    public visitLargeBinary<T extends type.LargeBinary>(type: T, { length, nullCount } = this.nextFieldNode()) {
+        return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), valueOffsets: this.readOffsets(type), data: this.readData(type) });
+    }
     public visitFixedSizeBinary<T extends type.FixedSizeBinary>(type: T, { length, nullCount } = this.nextFieldNode()) {
         return makeData({ type, length, nullCount, nullBitmap: this.readNullBitmap(type, nullCount), data: this.readData(type) });
     }
@@ -169,7 +172,7 @@ export class JSONVectorLoader extends VectorLoader {
             return toArrayBufferView(Uint8Array, Int64.convertArray(sources[offset] as string[]));
         } else if (DataType.isDecimal(type)) {
             return toArrayBufferView(Uint8Array, Int128.convertArray(sources[offset] as string[]));
-        } else if (DataType.isBinary(type) || DataType.isFixedSizeBinary(type)) {
+        } else if (DataType.isBinary(type) || DataType.isLargeBinary(type) || DataType.isFixedSizeBinary(type)) {
             return binaryDataFromJSON(sources[offset] as string[]);
         } else if (DataType.isBool(type)) {
             return packBools(sources[offset] as number[]);
diff --git a/js/test/data/tables.ts b/js/test/data/tables.ts
index 449cfe1fb853a..89cf93eab585b 100644
--- a/js/test/data/tables.ts
+++ b/js/test/data/tables.ts
@@ -27,7 +27,7 @@ const nestedVectorGeneratorNames = ['struct', 'denseUnion', 'sparseUnion', 'map'
 const dictionaryKeyGeneratorNames = ['int8', 'int16', 'int32', 'uint8', 'uint16', 'uint32'];
 const valueVectorGeneratorNames = [
     'null_', 'bool', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64',
-    'float16', 'float32', 'float64', 'utf8', 'largeUtf8', 'binary', 'fixedSizeBinary', 'dateDay', 'dateMillisecond',
+    'float16', 'float32', 'float64', 'utf8', 'largeUtf8', 'binary', 'largeBinary', 'fixedSizeBinary', 'dateDay', 'dateMillisecond',
     'timestampSecond', 'timestampMillisecond', 'timestampMicrosecond', 'timestampNanosecond',
     'timeSecond', 'timeMillisecond', 'timeMicrosecond', 'timeNanosecond', 'decimal',
     'dictionary', 'intervalDayTime', 'intervalYearMonth',
diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts
index 9d7b038331fe6..be248ad2c6ed8 100644
--- a/js/test/generate-test-data.ts
+++ b/js/test/generate-test-data.ts
@@ -25,7 +25,7 @@ import {
     Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
     Float, Float16, Float32, Float64,
     Utf8, LargeUtf8,
-    Binary,
+    Binary, LargeBinary,
     FixedSizeBinary,
     Date_, DateDay, DateMillisecond,
     Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
@@ -54,6 +54,7 @@ interface TestDataVectorGenerator extends Visitor {
     visit<T extends Utf8>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
     visit<T extends LargeUtf8>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
     visit<T extends Binary>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
+    visit<T extends LargeBinary>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
     visit<T extends FixedSizeBinary>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
     visit<T extends Date_>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
     visit<T extends Timestamp>(type: T, length?: number, nullCount?: number): GeneratedVector<T>;
@@ -78,6 +79,7 @@ interface TestDataVectorGenerator extends Visitor {
     visitUtf8: typeof generateUtf8;
     visitLargeUtf8: typeof generateLargeUtf8;
     visitBinary: typeof generateBinary;
+    visitLargeBinary: typeof generateLargeBinary;
     visitFixedSizeBinary: typeof generateFixedSizeBinary;
     visitDate: typeof generateDate;
     visitTimestamp: typeof generateTimestamp;
@@ -104,6 +106,7 @@ TestDataVectorGenerator.prototype.visitFloat = generateFloat;
 TestDataVectorGenerator.prototype.visitUtf8 = generateUtf8;
 TestDataVectorGenerator.prototype.visitLargeUtf8 = generateLargeUtf8;
 TestDataVectorGenerator.prototype.visitBinary = generateBinary;
+TestDataVectorGenerator.prototype.visitLargeBinary = generateLargeBinary;
 TestDataVectorGenerator.prototype.visitFixedSizeBinary = generateFixedSizeBinary;
 TestDataVectorGenerator.prototype.visitDate = generateDate;
 TestDataVectorGenerator.prototype.visitTimestamp = generateTimestamp;
@@ -219,6 +222,7 @@ export const float64 = (length = 100, nullCount = Math.trunc(length * 0.2)) => v
 export const utf8 = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new Utf8(), length, nullCount);
 export const largeUtf8 = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new LargeUtf8(), length, nullCount);
 export const binary = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new Binary(), length, nullCount);
+export const largeBinary = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new LargeBinary(), length, nullCount);
 export const fixedSizeBinary = (length = 100, nullCount = Math.trunc(length * 0.2), byteWidth = 8) => vectorGenerator.visit(new FixedSizeBinary(byteWidth), length, nullCount);
 export const dateDay = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new DateDay(), length, nullCount);
 export const dateMillisecond = (length = 100, nullCount = Math.trunc(length * 0.2)) => vectorGenerator.visit(new DateMillisecond(), length, nullCount);
@@ -246,7 +250,7 @@ export const fixedSizeList = (length = 100, nullCount = Math.trunc(length * 0.2)
 export const map = <TKey extends DataType = any, TValue extends DataType = any>(length = 100, nullCount = Math.trunc(length * 0.2), child: Field<Struct<{ key: TKey; value: TValue }>> = <any>defaultMapChild()) => vectorGenerator.visit(new Map_<TKey, TValue>(child), length, nullCount);
 
 export const vecs = {
-    null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64, utf8, largeUtf8, binary, fixedSizeBinary, dateDay, dateMillisecond, timestampSecond, timestampMillisecond, timestampMicrosecond, timestampNanosecond, timeSecond, timeMillisecond, timeMicrosecond, timeNanosecond, decimal, list, struct, denseUnion, sparseUnion, dictionary, intervalDayTime, intervalYearMonth, fixedSizeList, map, durationSecond, durationMillisecond, durationMicrosecond, durationNanosecond
+    null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64, utf8, largeUtf8, binary, largeBinary, fixedSizeBinary, dateDay, dateMillisecond, timestampSecond, timestampMillisecond, timestampMicrosecond, timestampNanosecond, timeSecond, timeMillisecond, timeMicrosecond, timeNanosecond, decimal, list, struct, denseUnion, sparseUnion, dictionary, intervalDayTime, intervalYearMonth, fixedSizeList, map, durationSecond, durationMillisecond, durationMicrosecond, durationNanosecond
 } as { [k: string]: (...args: any[]) => any };
 
 function generateNull<T extends Null>(this: TestDataVectorGenerator, type: T, length = 100): GeneratedVector<T> {
@@ -368,6 +372,16 @@ function generateBinary<T extends Binary>(this: TestDataVectorGenerator, type: T
     return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, valueOffsets, data })]) };
 }
 
+function generateLargeBinary<T extends LargeBinary>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector<T> {
+    const nullBitmap = createBitmap(length, nullCount);
+    const valueOffsets = createVariableWidthOffsets64(length, nullBitmap, 10, 20, nullCount != 0);
+    const values = [...valueOffsets.slice(1)]
+        .map((o, i) => isValid(nullBitmap, i) ? o - valueOffsets[i] : null)
+        .map((length) => length == null ? null : randomBytes(Number(length)));
+    const data = createVariableWidthBytes(length, nullBitmap, valueOffsets, (i) => values[i]!);
+    return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, valueOffsets, data })]) };
+}
+
 function generateFixedSizeBinary<T extends FixedSizeBinary>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector<T> {
     const nullBitmap = createBitmap(length, nullCount);
     const data = fillRandom(Uint8Array, length * type.byteWidth);
diff --git a/js/test/unit/builders/builder-tests.ts b/js/test/unit/builders/builder-tests.ts
index 0137c7aa66635..4d1be9b225b08 100644
--- a/js/test/unit/builders/builder-tests.ts
+++ b/js/test/unit/builders/builder-tests.ts
@@ -46,6 +46,7 @@ describe('Generated Test Data', () => {
     describe('Utf8Builder', () => { validateBuilder(generate.utf8); });
     describe('LargeUtf8Builder', () => { validateBuilder(generate.largeUtf8); });
     describe('BinaryBuilder', () => { validateBuilder(generate.binary); });
+    describe('LargeBinaryBuilder', () => { validateBuilder(generate.largeBinary); });
     describe('FixedSizeBinaryBuilder', () => { validateBuilder(generate.fixedSizeBinary); });
     describe('DateDayBuilder', () => { validateBuilder(generate.dateDay); });
     describe('DateMillisecondBuilder', () => { validateBuilder(generate.dateMillisecond); });
diff --git a/js/test/unit/generated-data-tests.ts b/js/test/unit/generated-data-tests.ts
index 0a06bcbab8ee0..1e26e74730a2d 100644
--- a/js/test/unit/generated-data-tests.ts
+++ b/js/test/unit/generated-data-tests.ts
@@ -40,6 +40,7 @@ describe('Generated Test Data', () => {
     describe('Utf8', () => { validateVector(generate.utf8()); });
     describe('LargeUtf8', () => { validateVector(generate.largeUtf8()); });
     describe('Binary', () => { validateVector(generate.binary()); });
+    describe('LargeBinary', () => { validateVector(generate.largeBinary()); });
     describe('FixedSizeBinary', () => { validateVector(generate.fixedSizeBinary()); });
     describe('DateDay', () => { validateVector(generate.dateDay()); });
     describe('DateMillisecond', () => { validateVector(generate.dateMillisecond()); });
diff --git a/js/test/unit/visitor-tests.ts b/js/test/unit/visitor-tests.ts
index f78adc59f8e98..6ecb6cca33ed5 100644
--- a/js/test/unit/visitor-tests.ts
+++ b/js/test/unit/visitor-tests.ts
@@ -18,7 +18,7 @@
 import {
     Field, Visitor,
     DataType, Dictionary,
-    Bool, Null, Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+    Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
     Float, Float16, Float32, Float64,
     Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
     Date_, DateDay, DateMillisecond,
@@ -38,6 +38,7 @@ class BasicVisitor extends Visitor {
     public visitUtf8<T extends Utf8>(type: T) { return (this.type = type); }
     public visitLargeUtf8<T extends LargeUtf8>(type: T) { return (this.type = type); }
     public visitBinary<T extends Binary>(type: T) { return (this.type = type); }
+    public visitLargeBinary<T extends LargeBinary>(type: T) { return (this.type = type); }
     public visitFixedSizeBinary<T extends FixedSizeBinary>(type: T) { return (this.type = type); }
     public visitDate<T extends Date_>(type: T) { return (this.type = type); }
     public visitTimestamp<T extends Timestamp>(type: T) { return (this.type = type); }
@@ -71,6 +72,7 @@ class FeatureVisitor extends Visitor {
     public visitUtf8<T extends Utf8>(type: T) { return (this.type = type); }
     public visitLargeUtf8<T extends LargeUtf8>(type: T) { return (this.type = type); }
     public visitBinary<T extends Binary>(type: T) { return (this.type = type); }
+    public visitLargeBinary<T extends LargeBinary>(type: T) { return (this.type = type); }
     public visitFixedSizeBinary<T extends FixedSizeBinary>(type: T) { return (this.type = type); }
     public visitDateDay<T extends DateDay>(type: T) { return (this.type = type); }
     public visitDateMillisecond<T extends DateMillisecond>(type: T) { return (this.type = type); }
@@ -108,6 +110,7 @@ describe('Visitor', () => {
         test(`visits Utf8 types`, () => validateBasicVisitor(new Utf8()));
         test(`visits LargeUtf8 types`, () => validateBasicVisitor(new LargeUtf8()));
         test(`visits Binary types`, () => validateBasicVisitor(new Binary()));
+        test(`visits LargeBinary types`, () => validateBasicVisitor(new LargeBinary()));
         test(`visits FixedSizeBinary types`, () => validateBasicVisitor(new FixedSizeBinary(128)));
         test(`visits Date types`, () => validateBasicVisitor(new Date_(0)));
         test(`visits Timestamp types`, () => validateBasicVisitor(new Timestamp(0, 'UTC')));
@@ -149,6 +152,7 @@ describe('Visitor', () => {
         test(`visits Utf8 types`, () => validateFeatureVisitor(new Utf8()));
         test(`visits LargeUtf8 types`, () => validateFeatureVisitor(new LargeUtf8()));
         test(`visits Binary types`, () => validateFeatureVisitor(new Binary()));
+        test(`visits LargeBinary types`, () => validateFeatureVisitor(new LargeBinary()));
         test(`visits FixedSizeBinary types`, () => validateFeatureVisitor(new FixedSizeBinary(128)));
         test(`visits DateDay types`, () => validateFeatureVisitor(new DateDay()));
         test(`visits DateMillisecond types`, () => validateFeatureVisitor(new DateMillisecond()));

From 81e47b20b241df100f3a24194e97a0423adc0d5e Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Mon, 18 Dec 2023 16:17:29 +0100
Subject: [PATCH 058/570] GH-39243:[R][CI] Remove r-arrow conda nightlies
 (#39244)

### Rationale for this change

The jobs run for ~30h/day for basically no benefit.
See issue for details.

### What changes are included in this PR?

Removal of all r-arrow conda nightlies and auxiliary files.

### Are these changes tested?

No.

### Are there any user-facing changes?

No.
* Closes: #39243

Authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 .../.ci_support/r/linux_64_r_base4.2.yaml     |  27 -----
 .../.ci_support/r/linux_64_r_base4.3.yaml     |  27 -----
 .../r/linux_aarch64_r_base4.2.yaml            |  31 ------
 .../r/linux_aarch64_r_base4.3.yaml            |  31 ------
 .../.ci_support/r/osx_64_r_base4.2.yaml       |  27 -----
 .../.ci_support/r/osx_64_r_base4.3.yaml       |  27 -----
 .../.ci_support/r/osx_arm64_r_base4.2.yaml    |  27 -----
 .../.ci_support/r/osx_arm64_r_base4.3.yaml    |  27 -----
 .../conda-recipes/.ci_support/r/win_64_.yaml  |  12 --
 dev/tasks/conda-recipes/r-arrow/bld.bat       |  14 ---
 dev/tasks/conda-recipes/r-arrow/build.sh      |  14 ---
 dev/tasks/conda-recipes/r-arrow/build_win.sh  |   7 --
 dev/tasks/conda-recipes/r-arrow/configure.win |   8 --
 .../conda-recipes/r-arrow/install.libs.R      |   5 -
 dev/tasks/conda-recipes/r-arrow/meta.yaml     |  73 ------------
 dev/tasks/tasks.yml                           | 104 +-----------------
 16 files changed, 3 insertions(+), 458 deletions(-)
 delete mode 100644 dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.2.yaml
 delete mode 100644 dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.3.yaml
 delete mode 100644 dev/tasks/conda-recipes/.ci_support/r/linux_aarch64_r_base4.2.yaml
 delete mode 100644 dev/tasks/conda-recipes/.ci_support/r/linux_aarch64_r_base4.3.yaml
 delete mode 100644 dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.2.yaml
 delete mode 100644 dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.3.yaml
 delete mode 100644 dev/tasks/conda-recipes/.ci_support/r/osx_arm64_r_base4.2.yaml
 delete mode 100644 dev/tasks/conda-recipes/.ci_support/r/osx_arm64_r_base4.3.yaml
 delete mode 100644 dev/tasks/conda-recipes/.ci_support/r/win_64_.yaml
 delete mode 100644 dev/tasks/conda-recipes/r-arrow/bld.bat
 delete mode 100755 dev/tasks/conda-recipes/r-arrow/build.sh
 delete mode 100755 dev/tasks/conda-recipes/r-arrow/build_win.sh
 delete mode 100755 dev/tasks/conda-recipes/r-arrow/configure.win
 delete mode 100644 dev/tasks/conda-recipes/r-arrow/install.libs.R
 delete mode 100644 dev/tasks/conda-recipes/r-arrow/meta.yaml

diff --git a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.2.yaml b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.2.yaml
deleted file mode 100644
index 6e661e1357d22..0000000000000
--- a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.2.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-c_compiler:
-- gcc
-c_compiler_version:
-- '12'
-cdt_name:
-- cos6
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '12'
-docker_image:
-- quay.io/condaforge/linux-anvil-cos7-x86_64
-pin_run_as_build:
-  r-base:
-    min_pin: x.x
-    max_pin: x.x
-r_base:
-- '4.2'
-target_platform:
-- linux-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.3.yaml b/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.3.yaml
deleted file mode 100644
index a4d06c9f20cdd..0000000000000
--- a/dev/tasks/conda-recipes/.ci_support/r/linux_64_r_base4.3.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-c_compiler:
-- gcc
-c_compiler_version:
-- '12'
-cdt_name:
-- cos6
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '12'
-docker_image:
-- quay.io/condaforge/linux-anvil-cos7-x86_64
-pin_run_as_build:
-  r-base:
-    min_pin: x.x
-    max_pin: x.x
-r_base:
-- '4.3'
-target_platform:
-- linux-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/r/linux_aarch64_r_base4.2.yaml b/dev/tasks/conda-recipes/.ci_support/r/linux_aarch64_r_base4.2.yaml
deleted file mode 100644
index 9dcd0c34c851c..0000000000000
--- a/dev/tasks/conda-recipes/.ci_support/r/linux_aarch64_r_base4.2.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-BUILD:
-- aarch64-conda_cos7-linux-gnu
-c_compiler:
-- gcc
-c_compiler_version:
-- '12'
-cdt_arch:
-- aarch64
-cdt_name:
-- cos7
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '12'
-docker_image:
-- quay.io/condaforge/linux-anvil-cos7-x86_64
-pin_run_as_build:
-  r-base:
-    min_pin: x.x
-    max_pin: x.x
-r_base:
-- '4.2'
-target_platform:
-- linux-aarch64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/r/linux_aarch64_r_base4.3.yaml b/dev/tasks/conda-recipes/.ci_support/r/linux_aarch64_r_base4.3.yaml
deleted file mode 100644
index 028b190bb1ef5..0000000000000
--- a/dev/tasks/conda-recipes/.ci_support/r/linux_aarch64_r_base4.3.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-BUILD:
-- aarch64-conda_cos7-linux-gnu
-c_compiler:
-- gcc
-c_compiler_version:
-- '12'
-cdt_arch:
-- aarch64
-cdt_name:
-- cos7
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-cxx_compiler:
-- gxx
-cxx_compiler_version:
-- '12'
-docker_image:
-- quay.io/condaforge/linux-anvil-cos7-x86_64
-pin_run_as_build:
-  r-base:
-    min_pin: x.x
-    max_pin: x.x
-r_base:
-- '4.3'
-target_platform:
-- linux-aarch64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.2.yaml b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.2.yaml
deleted file mode 100644
index 2116eaf7b8b21..0000000000000
--- a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.2.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-MACOSX_DEPLOYMENT_TARGET:
-- '10.9'
-c_compiler:
-- clang
-c_compiler_version:
-- '15'
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-cxx_compiler:
-- clangxx
-cxx_compiler_version:
-- '15'
-macos_machine:
-- x86_64-apple-darwin13.4.0
-pin_run_as_build:
-  r-base:
-    min_pin: x.x
-    max_pin: x.x
-r_base:
-- '4.2'
-target_platform:
-- osx-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.3.yaml b/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.3.yaml
deleted file mode 100644
index 7b8b62d8e00bb..0000000000000
--- a/dev/tasks/conda-recipes/.ci_support/r/osx_64_r_base4.3.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-MACOSX_DEPLOYMENT_TARGET:
-- '10.9'
-c_compiler:
-- clang
-c_compiler_version:
-- '15'
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-cxx_compiler:
-- clangxx
-cxx_compiler_version:
-- '15'
-macos_machine:
-- x86_64-apple-darwin13.4.0
-pin_run_as_build:
-  r-base:
-    min_pin: x.x
-    max_pin: x.x
-r_base:
-- '4.3'
-target_platform:
-- osx-64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/r/osx_arm64_r_base4.2.yaml b/dev/tasks/conda-recipes/.ci_support/r/osx_arm64_r_base4.2.yaml
deleted file mode 100644
index af8a07c42208e..0000000000000
--- a/dev/tasks/conda-recipes/.ci_support/r/osx_arm64_r_base4.2.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-MACOSX_DEPLOYMENT_TARGET:
-- '11.0'
-c_compiler:
-- clang
-c_compiler_version:
-- '15'
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-cxx_compiler:
-- clangxx
-cxx_compiler_version:
-- '15'
-macos_machine:
-- arm64-apple-darwin20.0.0
-pin_run_as_build:
-  r-base:
-    min_pin: x.x
-    max_pin: x.x
-r_base:
-- '4.2'
-target_platform:
-- osx-arm64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/r/osx_arm64_r_base4.3.yaml b/dev/tasks/conda-recipes/.ci_support/r/osx_arm64_r_base4.3.yaml
deleted file mode 100644
index a8e8aab83d598..0000000000000
--- a/dev/tasks/conda-recipes/.ci_support/r/osx_arm64_r_base4.3.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-MACOSX_DEPLOYMENT_TARGET:
-- '11.0'
-c_compiler:
-- clang
-c_compiler_version:
-- '15'
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-cxx_compiler:
-- clangxx
-cxx_compiler_version:
-- '15'
-macos_machine:
-- arm64-apple-darwin20.0.0
-pin_run_as_build:
-  r-base:
-    min_pin: x.x
-    max_pin: x.x
-r_base:
-- '4.3'
-target_platform:
-- osx-arm64
-zip_keys:
-- - c_compiler_version
-  - cxx_compiler_version
diff --git a/dev/tasks/conda-recipes/.ci_support/r/win_64_.yaml b/dev/tasks/conda-recipes/.ci_support/r/win_64_.yaml
deleted file mode 100644
index 72a5bf336c156..0000000000000
--- a/dev/tasks/conda-recipes/.ci_support/r/win_64_.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-channel_sources:
-- conda-forge
-channel_targets:
-- conda-forge main
-pin_run_as_build:
-  r-base:
-    min_pin: x.x
-    max_pin: x.x
-r_base:
-- '4.1'
-target_platform:
-- win-64
diff --git a/dev/tasks/conda-recipes/r-arrow/bld.bat b/dev/tasks/conda-recipes/r-arrow/bld.bat
deleted file mode 100644
index 04d59f282d84b..0000000000000
--- a/dev/tasks/conda-recipes/r-arrow/bld.bat
+++ /dev/null
@@ -1,14 +0,0 @@
-@echo on
-
-bash %RECIPE_DIR%/build_win.sh
-IF %ERRORLEVEL% NEQ 0 exit 1
-
-cp %RECIPE_DIR%/configure.win r
-IF %ERRORLEVEL% NEQ 0 exit 1
-
-cp %RECIPE_DIR%/install.libs.R r/src
-IF %ERRORLEVEL% NEQ 0 exit 1
-
-set "MAKEFLAGS=-j%CPU_COUNT%"
-"%R%" CMD INSTALL --build r
-IF %ERRORLEVEL% NEQ 0 exit 1
diff --git a/dev/tasks/conda-recipes/r-arrow/build.sh b/dev/tasks/conda-recipes/r-arrow/build.sh
deleted file mode 100755
index 9f5255cbaeee3..0000000000000
--- a/dev/tasks/conda-recipes/r-arrow/build.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-set -ex
-
-# arrow uses C++17
-export ARROW_R_CXXFLAGS="${ARROW_R_CXXFLAGS} -std=c++17"
-export LIBARROW_BUILD=false
-
-if [[ "${target_platform}" == osx-* ]]; then
-    # See https://conda-forge.org/docs/maintainer/knowledge_base.html#newer-c-features-with-old-sdk
-    export ARROW_R_CXXFLAGS="${ARROW_R_CXXFLAGS} -D_LIBCPP_DISABLE_AVAILABILITY"
-fi
-
-# ${R_ARGS} necessary to support cross-compilation
-${R} CMD INSTALL --build r/. ${R_ARGS}
diff --git a/dev/tasks/conda-recipes/r-arrow/build_win.sh b/dev/tasks/conda-recipes/r-arrow/build_win.sh
deleted file mode 100755
index 0c9a85ce3943a..0000000000000
--- a/dev/tasks/conda-recipes/r-arrow/build_win.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-
-set -exuo pipefail
-
-# Rename arrow.dll to lib_arrow.dll to avoid conflicts with the arrow-cpp arrow.dll
-sed -i -e 's/void R_init_arrow/__declspec(dllexport) void R_init_lib_arrow/g' r/src/arrowExports.cpp
-sed -i -e 's/useDynLib(arrow/useDynLib(lib_arrow/g' r/NAMESPACE
diff --git a/dev/tasks/conda-recipes/r-arrow/configure.win b/dev/tasks/conda-recipes/r-arrow/configure.win
deleted file mode 100755
index 0fc96576bde74..0000000000000
--- a/dev/tasks/conda-recipes/r-arrow/configure.win
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/env bash
-
-set -euxo pipefail
-
-echo "PKG_CPPFLAGS=-DNDEBUG -I\"${LIBRARY_PREFIX}/include\" -I\"${PREFIX}/include\" -DARROW_R_WITH_ACERO -DARROW_R_WITH_PARQUET -DARROW_R_WITH_DATASET -DARROW_R_WITH_S3 -DARROW_R_WITH_JSON" > src/Makevars.win
-echo "PKG_CXXFLAGS=\$(CXX_VISIBILITY)" >> src/Makevars.win
-echo 'CXX_STD=CXX17' >> src/Makevars.win
-echo "PKG_LIBS=-L\"${LIBRARY_PREFIX}/lib\" -larrow_dataset -larrow_acero -lparquet -larrow" >> src/Makevars.win
diff --git a/dev/tasks/conda-recipes/r-arrow/install.libs.R b/dev/tasks/conda-recipes/r-arrow/install.libs.R
deleted file mode 100644
index 005bbe16b9984..0000000000000
--- a/dev/tasks/conda-recipes/r-arrow/install.libs.R
+++ /dev/null
@@ -1,5 +0,0 @@
-src_dir <- file.path(R_PACKAGE_SOURCE, "src", fsep = "/")
-dest_dir <- file.path(R_PACKAGE_DIR, paste0("libs", R_ARCH), fsep="/")
-
-dir.create(file.path(R_PACKAGE_DIR, paste0("libs", R_ARCH), fsep="/"), recursive = TRUE, showWarnings = FALSE)
-file.copy(file.path(src_dir, "arrow.dll", fsep = "/"), file.path(dest_dir, "lib_arrow.dll", fsep = "/"))
diff --git a/dev/tasks/conda-recipes/r-arrow/meta.yaml b/dev/tasks/conda-recipes/r-arrow/meta.yaml
deleted file mode 100644
index e8b834254f41c..0000000000000
--- a/dev/tasks/conda-recipes/r-arrow/meta.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-{% set version = ARROW_VERSION %}
-{% set posix = 'm2-' if win else '' %}
-{% set native = 'm2w64-' if win else '' %}
-
-package:
-  name: r-arrow
-  version: {{ version|replace("-", "_") }}
-
-source:
-  path: ../../../../
-
-build:
-  merge_build_host: true  # [win]
-  number: 0
-  rpaths:
-    - lib/R/lib/
-    - lib/
-
-requirements:
-  build:
-    - cross-r-base {{ r_base }}  # [build_platform != target_platform]
-    - r-r6                       # [build_platform != target_platform]
-    - r-assertthat               # [build_platform != target_platform]
-    - r-bit64                    # [build_platform != target_platform]
-    - r-purrr                    # [build_platform != target_platform]
-    - r-rlang                    # [build_platform != target_platform]
-    - r-tidyselect               # [build_platform != target_platform]
-    - {{ compiler('c') }}        # [not win]
-    - {{ compiler('cxx') }}      # [not win]
-    - {{ compiler('r_clang') }}  # [win]
-    - pkg-config
-    - {{ posix }}make
-    - {{ posix }}sed         # [win]
-    - {{ posix }}coreutils   # [win]
-    - {{ posix }}filesystem  # [win]
-    - {{ posix }}zip         # [win]
-  host:
-    # Needs to be here, otherwise merge_build_host runs into issues
-    - pkg-config  # [win]
-    - libarrow {{ version }}
-    - r-base
-    - r-r6
-    - r-cpp11
-    - r-assertthat
-    - r-bit64
-    - r-purrr
-    - r-rlang
-    - r-tidyselect
-  run:
-    - r-base
-    - r-r6
-    - r-assertthat
-    - r-bit64
-    - r-purrr
-    - r-rlang
-    - r-tidyselect
-
-test:
-  commands:
-    - $R -e "library('arrow'); stopifnot(arrow_with_acero(), arrow_with_dataset(), arrow_with_parquet(), arrow_with_s3())"           # [not win]
-    - "\"%R%\" -e \"library('arrow'); stopifnot(arrow_with_acero(), arrow_with_dataset(), arrow_with_parquet(), arrow_with_s3())\""  # [win]
-
-about:
-  home: https://github.com/apache/arrow
-  license: Apache-2.0
-  license_file: LICENSE.txt
-  summary: R Integration to 'Apache' 'Arrow'.
-  license_family: APACHE
-
-extra:
-  recipe-maintainers:
-    - conda-forge/r
-    - conda-forge/arrow-cpp
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 84c3cad6ac899..ed6ea08894f10 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -85,12 +85,6 @@ groups:
 
   r:
     - test*-r-*
-    # r-conda tasks
-    - conda-linux-x64-cpu-r*
-    - conda-linux-aarch64-cpu-r*
-    - conda-osx-x64-cpu-r*
-    - conda-osx-arm64-cpu-r*
-    - conda-win-x64-cpu-r*
     - r-binary-packages
 
   ruby:
@@ -237,32 +231,12 @@ tasks:
   #
   # * On conda-forge the `pyarrow` and `arrow-cpp` packages are built in
   #   the same feedstock as the dependency matrix is the same for them as
-  #   Python and the OS are the main dimension. The R package `r-arrow` is
-  #   an independent feedstock as it doesn't have the Python but the
-  #   R dimension.
+  #   Python and the OS are the main dimension.   
   # * The files in `dev/tasks/conda-recipes/.ci_support/` are automatically
   #   generated and to be synced regularly from the feedstock. We have no way
   #   yet to generate them inside the arrow repository automatically.
-
-  conda-linux-x64-cpu-r43:
-    ci: azure
-    template: conda-recipes/azure.linux.yml
-    params:
-      config: linux_64_cuda_compiler_versionNone
-      r_config: linux_64_r_base4.3
-    artifacts:
-      - libarrow-{no_rc_version}-(h[a-z0-9]+)_0_cpu.conda
-      - r-arrow-{no_rc_version}-r43(h[a-z0-9]+)_0.conda
-
-  conda-linux-x64-cpu-r42:
-    ci: azure
-    template: conda-recipes/azure.linux.yml
-    params:
-      config: linux_64_cuda_compiler_versionNone
-      r_config: linux_64_r_base4.2
-    artifacts:
-      - libarrow-{no_rc_version}-(h[a-z0-9]+)_0_cpu.conda
-      - r-arrow-{no_rc_version}-r42(h[a-z0-9]+)_0.conda
+  # * We no longer run the arrow-r jobs as the feedstock is very stable and
+  #   the complexity is mostly covered by arrow-cpp.
 
   conda-linux-x64-cpu-py3:
     ci: azure
@@ -290,26 +264,6 @@ tasks:
 
   ########################### Conda Linux (aarch64) ###########################
 
-  conda-linux-aarch64-cpu-r43:
-    ci: azure
-    template: conda-recipes/azure.linux.yml
-    params:
-      config: linux_aarch64_cuda_compiler_versionNone
-      r_config: linux_aarch64_r_base4.3
-    artifacts:
-      - libarrow-{no_rc_version}-(h[a-z0-9]+)_0_cpu.conda
-      - r-arrow-{no_rc_version}-r43(h[a-z0-9]+)_0.conda
-
-  conda-linux-aarch64-cpu-r42:
-    ci: azure
-    template: conda-recipes/azure.linux.yml
-    params:
-      config: linux_aarch64_cuda_compiler_versionNone
-      r_config: linux_aarch64_r_base4.2
-    artifacts:
-      - libarrow-{no_rc_version}-(h[a-z0-9]+)_0_cpu.conda
-      - r-arrow-{no_rc_version}-r42(h[a-z0-9]+)_0.conda
-
   conda-linux-aarch64-cpu-py3:
     ci: azure
     template: conda-recipes/azure.linux.yml
@@ -362,26 +316,6 @@ tasks:
 
   ############################## Conda OSX (x64) ##############################
 
-  conda-osx-x64-cpu-r43:
-    ci: azure
-    template: conda-recipes/azure.osx.yml
-    params:
-      config: osx_64_
-      r_config: osx_64_r_base4.3
-    artifacts:
-      - libarrow-{no_rc_version}-(h[a-z0-9]+)_0_cpu.conda
-      - r-arrow-{no_rc_version}-r43(h[a-z0-9]+)_0.conda
-
-  conda-osx-x64-cpu-r42:
-    ci: azure
-    template: conda-recipes/azure.osx.yml
-    params:
-      config: osx_64_
-      r_config: osx_64_r_base4.2
-    artifacts:
-      - libarrow-{no_rc_version}-(h[a-z0-9]+)_0_cpu.conda
-      - r-arrow-{no_rc_version}-r42(h[a-z0-9]+)_0.conda
-
   conda-osx-x64-cpu-py3:
     ci: azure
     template: conda-recipes/azure.osx.yml
@@ -396,26 +330,6 @@ tasks:
 
   ############################# Conda OSX (arm64) #############################
 
-  conda-osx-arm64-cpu-r43:
-    ci: azure
-    template: conda-recipes/azure.osx.yml
-    params:
-      config: osx_arm64_
-      r_config: osx_arm64_r_base4.3
-    artifacts:
-      - libarrow-{no_rc_version}-(h[a-z0-9]+)_0_cpu.conda
-      - r-arrow-{no_rc_version}-r43(h[a-z0-9]+)_0.conda
-
-  conda-osx-arm64-cpu-r42:
-    ci: azure
-    template: conda-recipes/azure.osx.yml
-    params:
-      config: osx_arm64_
-      r_config: osx_arm64_r_base4.2
-    artifacts:
-      - libarrow-{no_rc_version}-(h[a-z0-9]+)_0_cpu.conda
-      - r-arrow-{no_rc_version}-r42(h[a-z0-9]+)_0.conda
-
   conda-osx-arm64-cpu-py3:
     ci: azure
     template: conda-recipes/azure.osx.yml
@@ -430,18 +344,6 @@ tasks:
 
   ############################## Conda Windows ################################
 
-  conda-win-x64-cpu-r41:
-    ci: azure
-    template: conda-recipes/azure.win.yml
-    params:
-      config: win_64_cuda_compiler_versionNone
-      r_config: win_64_
-    artifacts:
-      - libarrow-{no_rc_version}-(h[a-z0-9]+)_0_cpu.conda
-      - r-arrow-{no_rc_version}-r41(h[a-z0-9]+)_0.conda
-
-  # conda-forge does not yet support R 4.2 on windows
-
   conda-win-x64-cpu-py3:
     ci: azure
     template: conda-recipes/azure.win.yml

From 372f0a063f4b174a1ffab5f1cd037094d8da7a8a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 19 Dec 2023 05:44:43 +0900
Subject: [PATCH 059/570] MINOR: [Java] Bump
 org.apache.maven.surefire:surefire-junit-platform from 3.2.2 to 3.2.3 in
 /java (#39280)

Bumps org.apache.maven.surefire:surefire-junit-platform from 3.2.2 to 3.2.3.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.surefire:surefire-junit-platform&package-manager=maven&previous-version=3.2.2&new-version=3.2.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 86eb428ebd571..ccd8418851b02 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -452,7 +452,7 @@
             <dependency>
               <groupId>org.apache.maven.surefire</groupId>
               <artifactId>surefire-junit-platform</artifactId>
-              <version>3.2.2</version>
+              <version>3.2.3</version>
             </dependency>
           </dependencies>
           <configuration>

From 339810bae2e5f22bc6e1d81ab5a21e038e5994fa Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 19 Dec 2023 05:46:15 +0900
Subject: [PATCH 060/570] MINOR: [C#] Bump Grpc.Tools from 2.59.0 to 2.60.0 in
 /csharp (#39283)

Bumps [Grpc.Tools](https://github.com/grpc/grpc) from 2.59.0 to 2.60.0.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/grpc/grpc/commits">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Grpc.Tools&package-manager=nuget&previous-version=2.59.0&new-version=2.60.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj  | 2 +-
 csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
index f04acf3a4c7e2..47b9db2acb155 100644
--- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
+++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.Tools" Version="2.59.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.60.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index 1849bf11b7439..aae26273ac282 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -7,7 +7,7 @@
   <ItemGroup>
     <PackageReference Include="Google.Protobuf" Version="3.25.1" />
     <PackageReference Include="Grpc.Net.Client" Version="2.59.0" />
-    <PackageReference Include="Grpc.Tools" Version="2.59.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.60.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0'">

From 3943f744722031d2706d8ed99c1b06caf5429976 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 19 Dec 2023 06:12:51 +0900
Subject: [PATCH 061/570] MINOR: [Java] Bump
 org.apache.hadoop:hadoop-client-runtime from 3.3.2 to 3.3.6 in /java (#39278)

Bumps org.apache.hadoop:hadoop-client-runtime from 3.3.2 to 3.3.6.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.hadoop:hadoop-client-runtime&package-manager=maven&previous-version=3.3.2&new-version=3.3.6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/adapter/orc/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 72ba13ea81738..803ae5a33826f 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -54,7 +54,7 @@
         <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-client-runtime</artifactId>
-            <version>3.3.2</version>
+            <version>3.3.6</version>
             <scope>test</scope>
             <exclusions>
                 <exclusion>

From 01c461fa8fe668e5750a5e6b96eeb9ef4aedc858 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 19 Dec 2023 08:26:39 +0900
Subject: [PATCH 062/570] MINOR: [Java] Bump io.grpc:grpc-bom from 1.59.0 to
 1.60.0 in /java (#39282)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [io.grpc:grpc-bom](https://github.com/grpc/grpc-java) from 1.59.0 to 1.60.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/grpc/grpc-java/releases">io.grpc:grpc-bom's releases</a>.</em></p>
<blockquote>
<h2>v1.60.0</h2>
<h3>API Changes</h3>
<ul>
<li>api: Stabilize <code>ForwardingServerBuilder</code>, <code>ForwardingChannelBuilder2</code>, and <code>ForwardingChannelBuilder</code>. Note that <code>ForwardingChannelBuilder</code> is stabilized (no changes will be made to it), but immediately deprecated in favor of <code>ForwardingChannelBuilder2</code>. (<a href="https://redirect.github.com/grpc/grpc-java/issues/10586">#10586</a>)</li>
<li>api: Deprecate <code>ForwardingChannelBuilder.delegate()</code>. De facto this deprecates the class itself, since all classes extending <code>ForwardingChannelBuilder</code> implement the <code>delegate()</code> method. See javadoc for details (<a href="https://redirect.github.com/grpc/grpc-java/issues/10587">#10587</a>)</li>
<li>api: Changed recently-introduced <code>LoadBalancer.acceptResolvedAddresses()</code> to return <code>Status</code> instead of <code>boolean</code> (<a href="https://redirect.github.com/grpc/grpc-java/issues/10636">#10636</a>). This is part of continued work to align the LB API cross-language and API stabilization</li>
<li>stub: Deprecate StreamObservers (<a href="https://redirect.github.com/grpc/grpc-java/issues/10654">#10654</a>)</li>
<li>alts: AltsChannelBuilder now extends <code>ForwardingChannelBuilder2</code> (<a href="https://redirect.github.com/grpc/grpc-java/issues/10587">#10587</a>)</li>
<li>protobuf: Stabilize <code>ProtoUtils.metadataMarshaller()</code> (<a href="https://redirect.github.com/grpc/grpc-java/issues/10628">#10628</a>)</li>
<li>protobuf-lite: ProtoLiteUtils experimental comment (<a href="https://redirect.github.com/grpc/grpc-java/issues/10627">#10627</a>)</li>
</ul>
<h3>Behavior Changes</h3>
<ul>
<li>core: <code>ManagedChannel</code>s now check the address types provided by the nameResolver (for the given target) with the address types supported by the channel transport and generate an error in case of mismatch. That dramatically improves the error message when an issue occurs</li>
<li>core: When a server stream is closed due to user's code (an uncaught exception in halfClosed, messagesAvailable, onReady callback of a ServerStream's listener), the <code>Status.UNKNOWN</code> returned to the client will have <code>Application error processing RPC</code> description. Previously the description was empty. This is helpful to differentiate between server errors originated in user application, gRPC library, or even those injected by a proxy. (<a href="https://redirect.github.com/grpc/grpc-java/issues/10643">#10643</a>)</li>
<li>xds: Log ORCA UNIMPLEMENTED error to subchannel logger. This removes them from the normal application logs, reducing log spam</li>
</ul>
<h3>Improvements</h3>
<ul>
<li>Change the underlying implementations of RingHash, RoundRobin, WeightedRoundRobin and LeastRequest load balancers to utilize the pick first load balancer rather than directly manage subchannels. This should only be noticeable if it introduced a bug</li>
<li>core: Avoid flushing headers when the server returns a single response (<a href="https://redirect.github.com/grpc/grpc-java/issues/9314">#9314</a>). This is a performance optimization to reduce the number of packets for non-streaming responses</li>
<li>util: Make grpc-core an implementation dependency. This will prevent the io.grpc.internal classes in grpc-core from being visible during compilation when depending on just grpc-util</li>
<li>netty: Implement <code>Http2Headers.isEmpty()</code>. This fixes compatibility with Netty 4.1.101.Final.</li>
<li>netty: Add <code>NettyServerBuilder.maxRstFramesPerWindow()</code>. This can be used to limit impact of Rapid Reset</li>
<li>netty: Disable huffman coding in headers (<a href="https://redirect.github.com/grpc/grpc-java/issues/10563">#10563</a>). Huffman coding provides modest compression for relatively high CPU usage, especially within a data center. Rely just on the HPACK static and dynamic tables for compression, for higher performance. This only impacts header values 512 bytes or longer, as Netty already disabled Huffman for smaller values</li>
<li>alts: Improve handshake failure error message by propagating original exception (<a href="https://redirect.github.com/grpc/grpc-java/issues/10644">#10644</a>)</li>
</ul>
<h3>Bug Fixes</h3>
<ul>
<li>util: Remove shutdown subchannels from OD tracking (<a href="https://redirect.github.com/grpc/grpc-java/issues/10683">#10683</a>). This could have caused a memory leak on a long-lived channel. But we don’t think it could be triggered with our built-in load balancing policies.</li>
</ul>
<h3>Dependencies</h3>
<ul>
<li>Bump Netty to 4.1.100.Final</li>
</ul>
<h3>Acknowledgements</h3>
<p><a href="https://github.com/anthonyjpratti"><code>@​anthonyjpratti</code></a>
<a href="https://github.com/fedorka"><code>@​fedorka</code></a>
<a href="https://github.com/jpd236"><code>@​jpd236</code></a>
<a href="https://github.com/mateusazis"><code>@​mateusazis</code></a>
<a href="https://github.com/pkoenig10"><code>@​pkoenig10</code></a>
<a href="https://github.com/yannickepstein"><code>@​yannickepstein</code></a>
<a href="https://github.com/amirhadadi"><code>@​amirhadadi</code></a></p>
<h2>v1.59.1</h2>
<ul>
<li>netty: Implement <code>Http2Headers.isEmpty()</code>. This fixes compatibility with Netty 4.1.101.Final.</li>
<li>netty: Add <code>NettyServerBuilder.maxRstFramesPerWindow()</code>. This can be used to limit impact of Rapid Reset</li>
<li>xds: Log ORCA UNIMPLEMENTED error to subchannel logger. This removes them from the normal application logs, reducing log spam</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/grpc/grpc-java/commit/eb8b1d8379008ab89cade89392d265bed90a2692"><code>eb8b1d8</code></a> Bump version to 1.60.0</li>
<li><a href="https://github.com/grpc/grpc-java/commit/5b1bb8cd859b5097c789e9e209edc6b7a97a88b0"><code>5b1bb8c</code></a> Update README etc to reference 1.60.0</li>
<li><a href="https://github.com/grpc/grpc-java/commit/9400613fde680f864e9093e108b15f37bd19b0b1"><code>9400613</code></a> all: Add grpc-inprocess</li>
<li><a href="https://github.com/grpc/grpc-java/commit/69114bfb475fe2d9a0c1da1c0a397daccb75c2e2"><code>69114bf</code></a> inprocess: Add missing anonymous address as supported</li>
<li><a href="https://github.com/grpc/grpc-java/commit/24b3ca1ccb53667f85262b235cdb723dca088d03"><code>24b3ca1</code></a> core: Detect NameResolverProviders passed as Factories</li>
<li><a href="https://github.com/grpc/grpc-java/commit/6c55cd00226eda4ed186937592c3e51532af4031"><code>6c55cd0</code></a> util: Remove shutdown subchannels from OD tracking (<a href="https://redirect.github.com/grpc/grpc-java/issues/10683">#10683</a>)</li>
<li><a href="https://github.com/grpc/grpc-java/commit/43e98d07a1f2885aa87f97ae277b4582403f1882"><code>43e98d0</code></a> netty: Add option to limit RST_STREAM rate</li>
<li><a href="https://github.com/grpc/grpc-java/commit/2b65e660c0bcf7043b0f83d27eb42d135661eaf7"><code>2b65e66</code></a> netty: disable huffman coding in headers (<a href="https://redirect.github.com/grpc/grpc-java/issues/10563">#10563</a>)</li>
<li><a href="https://github.com/grpc/grpc-java/commit/90e76a1b4a42eb07a71a55cdc7ccc4303c5aa032"><code>90e76a1</code></a> Implement Http2Headers.isEmpty (<a href="https://redirect.github.com/grpc/grpc-java/issues/10663">#10663</a>)</li>
<li><a href="https://github.com/grpc/grpc-java/commit/0299788807bcd434109d4924a63949cf44781e3a"><code>0299788</code></a> util: Make grpc-core an implementation dependency</li>
<li>Additional commits viewable in <a href="https://github.com/grpc/grpc-java/compare/v1.59.0...v1.60.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=io.grpc:grpc-bom&package-manager=maven&previous-version=1.59.0&new-version=1.60.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index ccd8418851b02..f6dcfadb81b1e 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -34,7 +34,7 @@
     <dep.slf4j.version>2.0.9</dep.slf4j.version>
     <dep.guava-bom.version>32.1.3-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.100.Final</dep.netty-bom.version>
-    <dep.grpc-bom.version>1.59.0</dep.grpc-bom.version>
+    <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.16.0</dep.jackson-bom.version>
     <dep.hadoop.version>2.7.1</dep.hadoop.version>

From 0552217efa4ba0a1a1a7857a86c92278ecf129c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=B0=A2=E5=A4=A9?= <jsjtxietian@outlook.com>
Date: Tue, 19 Dec 2023 07:39:02 +0800
Subject: [PATCH 063/570] GH-37199: [C++] Expose a span converter for Buffer
 and ArraySpan (#38027)

### Rationale for this change

Convenience. We can have such a helper at the buffer and array data level.

### What changes are included in this PR?

Add `Buffer::span_as`, `Buffer::mutuable_span_as`  and `ArraySpan::GetSpan`.

### Are these changes tested?

No,  but I'm happy to add some test if needed.

### Are there any user-facing changes?

Yes, new public functions.
* Closes: #37199

Authored-by: jsjtxietian <jsjtxietian@outlook.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/array/data.h | 31 +++++++++++++++++++++++++++++++
 cpp/src/arrow/buffer.h     | 13 +++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index 4c2df8381490a..f29f164d19973 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <atomic>  // IWYU pragma: export
+#include <cassert>
 #include <cstdint>
 #include <memory>
 #include <utility>
@@ -438,6 +439,36 @@ struct ARROW_EXPORT ArraySpan {
     return GetValues<T>(i, this->offset);
   }
 
+  /// \brief Access a buffer's data as a span
+  ///
+  /// \param i The buffer index
+  /// \param length The required length (in number of typed values) of the requested span
+  /// \pre i > 0
+  /// \pre length <= the length of the buffer (in number of values) that's expected for
+  /// this array type
+  /// \return A span<const T> of the requested length
+  template <typename T>
+  util::span<const T> GetSpan(int i, int64_t length) const {
+    const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
+    assert(i > 0 && length + offset <= buffer_length);
+    return util::span<const T>(buffers[i].data_as<T>() + this->offset, length);
+  }
+
+  /// \brief Access a buffer's data as a span
+  ///
+  /// \param i The buffer index
+  /// \param length The required length (in number of typed values) of the requested span
+  /// \pre i > 0
+  /// \pre length <= the length of the buffer (in number of values) that's expected for
+  /// this array type
+  /// \return A span<T> of the requested length
+  template <typename T>
+  util::span<T> GetSpan(int i, int64_t length) {
+    const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
+    assert(i > 0 && length + offset <= buffer_length);
+    return util::span<T>(buffers[i].mutable_data_as<T>() + this->offset, length);
+  }
+
   inline bool IsNull(int64_t i) const { return !IsValid(i); }
 
   inline bool IsValid(int64_t i) const {
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index ae76550be26fc..52fd94ec1f7d4 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -30,6 +30,7 @@
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/span.h"
 #include "arrow/util/visibility.h"
 
 namespace arrow {
@@ -233,6 +234,12 @@ class ARROW_EXPORT Buffer {
     return reinterpret_cast<const T*>(data());
   }
 
+  /// \brief Return the buffer's data as a span
+  template <typename T>
+  util::span<const T> span_as() const {
+    return util::span(data_as<T>(), static_cast<size_t>(size() / sizeof(T)));
+  }
+
   /// \brief Return a writable pointer to the buffer's data
   ///
   /// The buffer has to be a mutable CPU buffer (`is_cpu()` and `is_mutable()`
@@ -260,6 +267,12 @@ class ARROW_EXPORT Buffer {
     return reinterpret_cast<T*>(mutable_data());
   }
 
+  /// \brief Return the buffer's mutable data as a span
+  template <typename T>
+  util::span<T> mutable_span_as() const {
+    return util::span(mutable_data_as<T>(), static_cast<size_t>(size() / sizeof(T)));
+  }
+
   /// \brief Return the device address of the buffer's data
   uintptr_t address() const { return reinterpret_cast<uintptr_t>(data_); }
 

From 659b2311bc577eb4322c5772a5e90f919620bb95 Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Tue, 19 Dec 2023 07:32:00 +0000
Subject: [PATCH 064/570] GH-39262: [C++][Azure][FS] Add default credential
 auth configuration (#39263)

### Rationale for this change
Default credential is a useful auth option.

### What changes are included in this PR?
Implement `AzureOptions::ConfigureDefaultCredential` plus a little bit of plumbing to go around it.
Created a simple test.

### Are these changes tested?
Added a simple unittest that everything initialises happily. This does not actually test a successful authentication. I think to do a real authentication with Azure we would need to run the test against real blob storage and we would need to create various identities which are non-trivial to create. Personally I think this is ok because all the complexity is abstracted away by the Azure SDK.

### Are there any user-facing changes?

* Closes: #39262

Lead-authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 24 ++++++++++++++++++++++--
 cpp/src/arrow/filesystem/azurefs.h       |  7 +++++++
 cpp/src/arrow/filesystem/azurefs_test.cc | 18 ++++++------------
 3 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 217885364089b..dd267aac36d35 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -18,6 +18,7 @@
 #include "arrow/filesystem/azurefs.h"
 #include "arrow/filesystem/azurefs_internal.h"
 
+#include <azure/identity.hpp>
 #include <azure/storage/blobs.hpp>
 #include <azure/storage/files/datalake.hpp>
 
@@ -61,6 +62,8 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
       return true;
+    case CredentialKind::kTokenCredential:
+      return token_credential_ == other.token_credential_;
     case CredentialKind::kStorageSharedKeyCredential:
       return storage_shared_key_credential_->AccountName ==
              other.storage_shared_key_credential_->AccountName;
@@ -69,8 +72,7 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
   return false;
 }
 
-Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
-                                                   const std::string& account_key) {
+void AzureOptions::SetUrlsForAccountName(const std::string& account_name) {
   if (this->backend == AzureBackend::kAzurite) {
     account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
     account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
@@ -78,6 +80,18 @@ Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_na
     account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
     account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
   }
+}
+
+Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
+  AzureOptions::SetUrlsForAccountName(account_name);
+  credential_kind_ = CredentialKind::kTokenCredential;
+  token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
+                                                   const std::string& account_key) {
+  AzureOptions::SetUrlsForAccountName(account_name);
   credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
   storage_shared_key_credential_ =
       std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
@@ -89,6 +103,9 @@ Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceC
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
       break;
+    case CredentialKind::kTokenCredential:
+      return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+                                                        token_credential_);
     case CredentialKind::kStorageSharedKeyCredential:
       return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
                                                         storage_shared_key_credential_);
@@ -101,6 +118,9 @@ AzureOptions::MakeDataLakeServiceClient() const {
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
       break;
+    case CredentialKind::kTokenCredential:
+      return std::make_unique<DataLake::DataLakeServiceClient>(account_dfs_url_,
+                                                               token_credential_);
     case CredentialKind::kStorageSharedKeyCredential:
       return std::make_unique<DataLake::DataLakeServiceClient>(
           account_dfs_url_, storage_shared_key_credential_);
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 1266aa2d02b86..b2c7010ff3758 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -70,16 +70,23 @@ struct ARROW_EXPORT AzureOptions {
 
   enum class CredentialKind {
     kAnonymous,
+    kTokenCredential,
     kStorageSharedKeyCredential,
   } credential_kind_ = CredentialKind::kAnonymous;
 
   std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
       storage_shared_key_credential_;
 
+  std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
+
+  void SetUrlsForAccountName(const std::string& account_name);
+
  public:
   AzureOptions();
   ~AzureOptions();
 
+  Status ConfigureDefaultCredential(const std::string& account_name);
+
   Status ConfigureAccountKeyCredential(const std::string& account_name,
                                        const std::string& account_key);
 
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 463ff4e8daf3d..799f3992a2210 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -43,9 +43,6 @@
 #include <gmock/gmock-matchers.h>
 #include <gmock/gmock-more-matchers.h>
 #include <gtest/gtest.h>
-#include <azure/identity/client_secret_credential.hpp>
-#include <azure/identity/default_azure_credential.hpp>
-#include <azure/identity/managed_identity_credential.hpp>
 #include <azure/storage/blobs.hpp>
 #include <azure/storage/common/storage_credential.hpp>
 #include <azure/storage/files/datalake.hpp>
@@ -266,15 +263,12 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {
   bool WithHierarchicalNamespace() const final { return true; }
 };
 
-// Placeholder tests
-// TODO: GH-18014 Remove once a proper test is added
-TEST(AzureFileSystem, InitializeCredentials) {
-  auto default_credential = std::make_shared<Azure::Identity::DefaultAzureCredential>();
-  auto managed_identity_credential =
-      std::make_shared<Azure::Identity::ManagedIdentityCredential>();
-  auto service_principal_credential =
-      std::make_shared<Azure::Identity::ClientSecretCredential>("tenant_id", "client_id",
-                                                                "client_secret");
+TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
+  AzureOptions options;
+  options.backend = AzureBackend::kAzurite;  // Irrelevant for this test because it
+                                             // doesn't connect to the server.
+  ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
+  EXPECT_OK_AND_ASSIGN(auto default_credential_fs, AzureFileSystem::Make(options));
 }
 
 TEST(AzureFileSystem, OptionsCompare) {

From f5dd3d4a1c0efb7c8587287da0c536988bcd1559 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Tue, 19 Dec 2023 09:45:00 +0100
Subject: [PATCH 065/570] GH-38535: [Python] Fix S3FileSystem equals None
 segfault (#39276)

### Rationale for this change

`S3FileSystem` equals `None` currently causes bus error.

### What changes are included in this PR?

Add `not None` to `FileSystem.equals` signature.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #38535

Authored-by: AlenkaF <frim.alenka@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/_fs.pyx          | 2 +-
 python/pyarrow/tests/test_fs.py | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx
index ef8db31bfc2f6..395f488144331 100644
--- a/python/pyarrow/_fs.pyx
+++ b/python/pyarrow/_fs.pyx
@@ -505,7 +505,7 @@ cdef class FileSystem(_Weakrefable):
     cdef inline shared_ptr[CFileSystem] unwrap(self) nogil:
         return self.wrapped
 
-    def equals(self, FileSystem other):
+    def equals(self, FileSystem other not None):
         """
         Parameters
         ----------
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 59c9c449429b3..d0fa253e314e9 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -542,6 +542,13 @@ def test_filesystem_equals():
     assert SubTreeFileSystem('/base', fs0) != SubTreeFileSystem('/other', fs0)
 
 
+def test_filesystem_equals_none(fs):
+    with pytest.raises(TypeError, match="got NoneType"):
+        fs.equals(None)
+
+    assert fs is not None
+
+
 def test_subtree_filesystem():
     localfs = LocalFileSystem()
 

From 9cb78addf7fcd662de1579db9dff55bd1a420fe4 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Tue, 19 Dec 2023 09:45:41 +0100
Subject: [PATCH 066/570] GH-38683: [Python][Docs] Update docstrings for
 Time32Type and Time64Type (#39059)

### Rationale for this change

`Time32Type` and `Time64Type` unit docs are not correctly documented.

### What changes are included in this PR?

Update the docstrings for `Time32Type` and `Time64Type` `unit`.
* Closes: #38683

Authored-by: AlenkaF <frim.alenka@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/types.pxi | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index a0ddf09d69423..912ee39f7d712 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -1108,6 +1108,9 @@ cdef class Time32Type(DataType):
     """
     Concrete class for time32 data types.
 
+    Supported time unit resolutions are 's' [second]
+    and 'ms' [millisecond].
+
     Examples
     --------
     Create an instance of time32 type:
@@ -1124,7 +1127,7 @@ cdef class Time32Type(DataType):
     @property
     def unit(self):
         """
-        The time unit ('s', 'ms', 'us' or 'ns').
+        The time unit ('s' or 'ms').
 
         Examples
         --------
@@ -1140,6 +1143,9 @@ cdef class Time64Type(DataType):
     """
     Concrete class for time64 data types.
 
+    Supported time unit resolutions are 'us' [microsecond]
+    and 'ns' [nanosecond].
+
     Examples
     --------
     Create an instance of time64 type:
@@ -1156,7 +1162,7 @@ cdef class Time64Type(DataType):
     @property
     def unit(self):
         """
-        The time unit ('s', 'ms', 'us' or 'ns').
+        The time unit ('us' or 'ns').
 
         Examples
         --------

From 64fed4e047f6a7b6e1081921135afc86fdcef1e7 Mon Sep 17 00:00:00 2001
From: Abram Fleishman <abfleishman@users.noreply.github.com>
Date: Tue, 19 Dec 2023 01:47:47 -0800
Subject: [PATCH 067/570] GH-39191: [R] throw error when `string_replace` is
 passed vector of values in `pattern` (#39219)

### Rationale for this change
See #39191 This PR will hopefully throw an informative error message to let the user know that while the stringr::str_replace_all function can handle a named vector of values as the pattern argument, the arrow R package implementation cannot.

### What changes are included in this PR?
- [ ] add tests for passing vector to the pattern argument
- [ ] add check for length > 1 to the string replace bindings

### Are these changes tested?
yes (though I need help!)

### Are there any user-facing changes?
 yes. Hopefully the user will be alerted by an informative error message that they cannot pass a vector to the pattern argument. No breaking changes are expected.

* Closes: #39191

Authored-by: Abram B. Fleishman <abram@conservationmetrics.com>
Signed-off-by: Nic Crane <thisisnic@gmail.com>
---
 r/R/dplyr-funcs-string.R                   |  7 ++++++-
 r/tests/testthat/test-dplyr-funcs-string.R | 17 +++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index 9f3220e557f08..a21ce78edd189 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -58,7 +58,6 @@ get_stringr_pattern_options <- function(pattern) {
   }
 
   ensure_opts <- function(opts) {
-
     # default options for the simple cases
     if (is.character(opts)) {
       opts <- list(pattern = opts, fixed = FALSE, ignore_case = FALSE)
@@ -352,6 +351,12 @@ register_bindings_string_regex <- function() {
   # Encapsulate some common logic for sub/gsub/str_replace/str_replace_all
   arrow_r_string_replace_function <- function(max_replacements) {
     function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) {
+      if (length(pattern) != 1) {
+        stop("`pattern` must be a length 1 character vector")
+      }
+      if (length(replacement) != 1) {
+        stop("`replacement` must be a length 1 character vector")
+      }
       Expression$create(
         ifelse(fixed && !ignore.case, "replace_substring", "replace_substring_regex"),
         x,
diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R
index 411b5ae3c738a..039220b88ee00 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -425,6 +425,23 @@ test_that("sub and gsub with namespacing", {
 })
 
 test_that("str_replace and str_replace_all", {
+  x <- Expression$field_ref("x")
+
+  expect_error(
+    call_binding("str_replace_all", x, c("F" = "_", "b" = "")),
+    regexp = "`pattern` must be a length 1 character vector"
+  )
+
+  expect_error(
+    call_binding("str_replace_all", x, c("F", "b"), c("_", "")),
+    regexp = "`pattern` must be a length 1 character vector"
+  )
+
+  expect_error(
+    call_binding("str_replace_all", x, c("F"), c("_", "")),
+    regexp = "`replacement` must be a length 1 character vector"
+  )
+
   df <- tibble(x = c("Foo", "bar"))
 
   compare_dplyr_binding(

From 419bbc4ff6a5a14af18e5d7ca3ca2de41a413bd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Tue, 19 Dec 2023 12:08:38 +0100
Subject: [PATCH 068/570] MINOR: [Release] Update versions for 15.0.0-SNAPSHOT

---
 ci/scripts/PKGBUILD            | 2 +-
 r/DESCRIPTION                  | 2 +-
 r/NEWS.md                      | 4 +++-
 r/pkgdown/assets/versions.json | 4 ++--
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index 2cdd1d42634bf..674acc99f54a9 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -18,7 +18,7 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=14.0.1.9000
+pkgver=14.0.2.9000
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
 arch=("any")
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 1bf25e57a3cce..b290a75f932d5 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: arrow
 Title: Integration to 'Apache' 'Arrow'
-Version: 14.0.1.9000
+Version: 14.0.2.9000
 Authors@R: c(
     person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")),
     person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
diff --git a/r/NEWS.md b/r/NEWS.md
index 8515facdff871..63f12607d8d1b 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,7 +17,9 @@
   under the License.
 -->
 
-# arrow 14.0.1.9000
+# arrow 14.0.2.9000
+
+# arrow 14.0.2
 
 # arrow 14.0.0.2
 
diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json
index 88289e72004b3..35a1ef3b5ecb3 100644
--- a/r/pkgdown/assets/versions.json
+++ b/r/pkgdown/assets/versions.json
@@ -1,10 +1,10 @@
 [
     {
-        "name": "14.0.1.9000 (dev)",
+        "name": "14.0.2.9000 (dev)",
         "version": "dev/"
     },
     {
-        "name": "14.0.1 (release)",
+        "name": "14.0.2 (release)",
         "version": ""
     },
     {

From 0479f8532d5ace54cf554e2e60aa621f06536091 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Tue, 19 Dec 2023 12:08:38 +0100
Subject: [PATCH 069/570] MINOR: [Release] Update .deb/.rpm changelogs for
 14.0.2

---
 .../linux-packages/apache-arrow-apt-source/debian/changelog | 6 ++++++
 .../apache-arrow-release/yum/apache-arrow-release.spec.in   | 3 +++
 dev/tasks/linux-packages/apache-arrow/debian/changelog      | 6 ++++++
 dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in     | 3 +++
 4 files changed, 18 insertions(+)

diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
index 83a388c93051d..32a5a38afebf3 100644
--- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow-apt-source (14.0.2-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Raúl Cumplido <raulcumplido@gmail.com>  Tue, 12 Dec 2023 09:31:43 -0000
+
 apache-arrow-apt-source (14.0.1-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
index 245e8afeaeb1d..348f8064ecc5f 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
@@ -102,6 +102,9 @@ else
 fi
 
 %changelog
+* Tue Dec 12 2023 Raúl Cumplido <raulcumplido@gmail.com> - 14.0.2-1
+- New upstream release.
+
 * Mon Nov 06 2023 Raúl Cumplido <raulcumplido@gmail.com> - 14.0.1-1
 - New upstream release.
 
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog
index 1f3f1bd5abd07..b14bb0985893e 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow (14.0.2-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Raúl Cumplido <raulcumplido@gmail.com>  Tue, 12 Dec 2023 09:31:43 -0000
+
 apache-arrow (14.0.1-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index 87e05558e8cda..44421ce0ea1e4 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -864,6 +864,9 @@ Documentation for Apache Parquet GLib.
 %{_datadir}/gtk-doc/html/parquet-glib/
 
 %changelog
+* Tue Dec 12 2023 Raúl Cumplido <raulcumplido@gmail.com> - 14.0.2-1
+- New upstream release.
+
 * Mon Nov 06 2023 Raúl Cumplido <raulcumplido@gmail.com> - 14.0.1-1
 - New upstream release.
 

From 56991d3efd57e610f5ab604086e19753bd8c834b Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Tue, 19 Dec 2023 10:59:50 -0300
Subject: [PATCH 070/570] GH-39292 [C++][FS]: Remove the AzureBackend enum and
 add more flexible connection options (#39293)

### Rationale for this change

It's good to avoid mentioning the specific test environment in the implementation code.

### What changes are included in this PR?

 - Removal of the enum
 - Removal of the `AzureOptions::backend` class member
 - Addition of more options to `AzureOptions`
 - Removal of some private string members of `AzureOptions` -- the URLs are built on-the-fly when the clients are instantiated now

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Changes to the public interface (`azurefs.h`) that won't affect users because the `AzureFS` implementation is not used yet.
* Closes: #39292

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 61 ++++++++++++++++--------
 cpp/src/arrow/filesystem/azurefs.h       | 51 +++++++++++++-------
 cpp/src/arrow/filesystem/azurefs_test.cc | 21 ++++++--
 3 files changed, 91 insertions(+), 42 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index dd267aac36d35..1aa3e86a6f926 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -51,10 +51,12 @@ AzureOptions::~AzureOptions() = default;
 
 bool AzureOptions::Equals(const AzureOptions& other) const {
   // TODO(GH-38598): update here when more auth methods are added.
-  const bool equals = backend == other.backend &&
+  const bool equals = blob_storage_authority == other.blob_storage_authority &&
+                      dfs_storage_authority == other.dfs_storage_authority &&
+                      blob_storage_scheme == other.blob_storage_scheme &&
+                      dfs_storage_scheme == other.dfs_storage_scheme &&
                       default_metadata == other.default_metadata &&
-                      account_blob_url_ == other.account_blob_url_ &&
-                      account_dfs_url_ == other.account_dfs_url_ &&
+                      account_name_ == other.account_name_ &&
                       credential_kind_ == other.credential_kind_;
   if (!equals) {
     return false;
@@ -72,42 +74,59 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
   return false;
 }
 
-void AzureOptions::SetUrlsForAccountName(const std::string& account_name) {
-  if (this->backend == AzureBackend::kAzurite) {
-    account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
-    account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
-  } else {
-    account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
-    account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
+namespace {
+std::string BuildBaseUrl(const std::string& scheme, const std::string& authority,
+                         const std::string& account_name) {
+  std::string url;
+  url += scheme + "://";
+  if (!authority.empty()) {
+    if (authority[0] == '.') {
+      url += account_name;
+      url += authority;
+    } else {
+      url += authority;
+      url += "/";
+      url += account_name;
+    }
   }
+  url += "/";
+  return url;
 }
+}  // namespace
 
-Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
-  AzureOptions::SetUrlsForAccountName(account_name);
-  credential_kind_ = CredentialKind::kTokenCredential;
-  token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
-  return Status::OK();
+std::string AzureOptions::AccountBlobUrl(const std::string& account_name) const {
+  return BuildBaseUrl(blob_storage_scheme, blob_storage_authority, account_name);
+}
+
+std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const {
+  return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
 }
 
 Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
                                                    const std::string& account_key) {
-  AzureOptions::SetUrlsForAccountName(account_name);
   credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
+  account_name_ = account_name;
   storage_shared_key_credential_ =
       std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
   return Status::OK();
 }
 
+Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
+  credential_kind_ = CredentialKind::kTokenCredential;
+  token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
+  return Status::OK();
+}
+
 Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceClient()
     const {
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
       break;
     case CredentialKind::kTokenCredential:
-      return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
                                                         token_credential_);
     case CredentialKind::kStorageSharedKeyCredential:
-      return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
                                                         storage_shared_key_credential_);
   }
   return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
@@ -119,11 +138,11 @@ AzureOptions::MakeDataLakeServiceClient() const {
     case CredentialKind::kAnonymous:
       break;
     case CredentialKind::kTokenCredential:
-      return std::make_unique<DataLake::DataLakeServiceClient>(account_dfs_url_,
-                                                               token_credential_);
+      return std::make_unique<DataLake::DataLakeServiceClient>(
+          AccountDfsUrl(account_name_), token_credential_);
     case CredentialKind::kStorageSharedKeyCredential:
       return std::make_unique<DataLake::DataLakeServiceClient>(
-          account_dfs_url_, storage_shared_key_credential_);
+          AccountDfsUrl(account_name_), storage_shared_key_credential_);
   }
   return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
 }
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index b2c7010ff3758..35c140b1097c7 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -43,17 +43,37 @@ class DataLakeServiceClient;
 
 namespace arrow::fs {
 
-enum class AzureBackend {
-  /// \brief Official Azure Remote Backend
-  kAzure,
-  /// \brief Local Simulated Storage
-  kAzurite
-};
-
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
-  /// \brief The backend to connect to: Azure or Azurite (for testing).
-  AzureBackend backend = AzureBackend::kAzure;
+  /// \brief hostname[:port] of the Azure Blob Storage Service.
+  ///
+  /// If the hostname is a relative domain name (one that starts with a '.'), then storage
+  /// account URLs will be constructed by prepending the account name to the hostname.
+  /// If the hostname is a fully qualified domain name, then the hostname will be used
+  /// as-is and the account name will follow the hostname in the URL path.
+  ///
+  /// Default: ".blob.core.windows.net"
+  std::string blob_storage_authority = ".blob.core.windows.net";
+
+  /// \brief hostname[:port] of the Azure Data Lake Storage Gen 2 Service.
+  ///
+  /// If the hostname is a relative domain name (one that starts with a '.'), then storage
+  /// account URLs will be constructed by prepending the account name to the hostname.
+  /// If the hostname is a fully qualified domain name, then the hostname will be used
+  /// as-is and the account name will follow the hostname in the URL path.
+  ///
+  /// Default: ".dfs.core.windows.net"
+  std::string dfs_storage_authority = ".dfs.core.windows.net";
+
+  /// \brief Azure Blob Storage connection transport.
+  ///
+  /// Default: "https"
+  std::string blob_storage_scheme = "https";
+
+  /// \brief Azure Data Lake Storage Gen 2 connection transport.
+  ///
+  /// Default: "https"
+  std::string dfs_storage_scheme = "https";
 
   // TODO(GH-38598): Add support for more auth methods.
   // std::string connection_string;
@@ -65,22 +85,17 @@ struct ARROW_EXPORT AzureOptions {
   std::shared_ptr<const KeyValueMetadata> default_metadata;
 
  private:
-  std::string account_blob_url_;
-  std::string account_dfs_url_;
-
   enum class CredentialKind {
     kAnonymous,
     kTokenCredential,
     kStorageSharedKeyCredential,
   } credential_kind_ = CredentialKind::kAnonymous;
 
+  std::string account_name_;
+  std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
   std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
       storage_shared_key_credential_;
 
-  std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
-
-  void SetUrlsForAccountName(const std::string& account_name);
-
  public:
   AzureOptions();
   ~AzureOptions();
@@ -92,8 +107,8 @@ struct ARROW_EXPORT AzureOptions {
 
   bool Equals(const AzureOptions& other) const;
 
-  const std::string& AccountBlobUrl() const { return account_blob_url_; }
-  const std::string& AccountDfsUrl() const { return account_dfs_url_; }
+  std::string AccountBlobUrl(const std::string& account_name) const;
+  std::string AccountDfsUrl(const std::string& account_name) const;
 
   Result<std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>>
   MakeBlobServiceClient() const;
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 799f3992a2210..8a39c4c554897 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -73,6 +73,13 @@ namespace Blobs = Azure::Storage::Blobs;
 namespace Core = Azure::Core;
 namespace DataLake = Azure::Storage::Files::DataLake;
 
+enum class AzureBackend {
+  /// \brief Official Azure Remote Backend
+  kAzure,
+  /// \brief Local Simulated Storage
+  kAzurite
+};
+
 class BaseAzureEnv : public ::testing::Environment {
  protected:
   std::string account_name_;
@@ -265,8 +272,6 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {
 
 TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
   AzureOptions options;
-  options.backend = AzureBackend::kAzurite;  // Irrelevant for this test because it
-                                             // doesn't connect to the server.
   ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
   EXPECT_OK_AND_ASSIGN(auto default_credential_fs, AzureFileSystem::Make(options));
 }
@@ -352,7 +357,17 @@ class TestAzureFileSystem : public ::testing::Test {
 
   static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
     AzureOptions options;
-    options.backend = env->backend();
+    switch (env->backend()) {
+      case AzureBackend::kAzurite:
+        options.blob_storage_authority = "127.0.0.1:10000";
+        options.dfs_storage_authority = "127.0.0.1:10000";
+        options.blob_storage_scheme = "http";
+        options.dfs_storage_scheme = "http";
+        break;
+      case AzureBackend::kAzure:
+        // Use the default values
+        break;
+    }
     ARROW_EXPECT_OK(
         options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
     return options;

From b862b164a644a92f8a802954fcad179bf28e020e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?JB=20Onofr=C3=A9?= <jbonofre@apache.org>
Date: Tue, 19 Dec 2023 17:41:28 +0100
Subject: [PATCH 071/570] GH-39299: [Java] Upgrade to Avro 1.11.3 (#39300)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Upgrade to Avro 1.11.3 to fix CVE-2023-39410

### What changes are included in this PR?

Upgrade to Avro 1.11.3

### Are these changes tested?

Run local tests especially on Avro adapter

### Are there any user-facing changes?

Not directly

* Closes: #39299

Authored-by: JB Onofré <jbonofre@apache.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index f6dcfadb81b1e..75e0946f10811 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -39,7 +39,7 @@
     <dep.jackson-bom.version>2.16.0</dep.jackson-bom.version>
     <dep.hadoop.version>2.7.1</dep.hadoop.version>
     <dep.fbs.version>23.5.26</dep.fbs.version>
-    <dep.avro.version>1.10.0</dep.avro.version>
+    <dep.avro.version>1.11.3</dep.avro.version>
     <arrow.vector.classifier />
     <forkCount>2</forkCount>
     <checkstyle.failOnViolation>true</checkstyle.failOnViolation>

From 3e182f2c9d5c710ce809e342f5c8cee547f979d2 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Tue, 19 Dec 2023 13:34:53 -0500
Subject: [PATCH 072/570] GH-39013: [Go][Integration] Support cABI
 import/export of StringView (#39019)

### Rationale for this change

The Go implementation should support import/export of the new data types. This will enable integration testing between the C++ and Go implementations.

### What changes are included in this PR?

Added import/export for the new data types and arrays of data of those types.

### Are these changes tested?

Yes, they will be covered by the integration tests and existing Go unit tests.

### Are there any user-facing changes?

This is a user facing change

* Closes: #39013

Lead-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Co-authored-by: Matt Topol <zotthewizard@gmail.com>
Co-authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 dev/archery/archery/integration/datagen.py    |  93 +++++++++-
 dev/archery/archery/integration/runner.py     |   2 +
 docs/source/format/Integration.rst            |  23 ++-
 go/arrow/array/encoded.go                     |   6 +-
 go/arrow/array/list.go                        | 146 ++--------------
 go/arrow/avro/reader_types.go                 |   2 +-
 go/arrow/bitutil/endian_default.go            |   1 +
 go/arrow/bitutil/endian_s390x.go              |   2 +-
 go/arrow/cdata/cdata.go                       |  90 ++++++++++
 go/arrow/cdata/cdata_exports.go               |  54 ++++--
 go/arrow/compute/arithmetic.go                |   4 +-
 go/arrow/compute/arithmetic_test.go           |  36 ++--
 go/arrow/compute/exec/utils.go                | 140 +--------------
 go/arrow/compute/exec/utils_test.go           |   2 +-
 go/arrow/compute/fieldref.go                  |  51 +++---
 .../internal/kernels/base_arithmetic.go       |  12 +-
 .../internal/kernels/base_arithmetic_amd64.go |  55 +++---
 .../kernels/basic_arithmetic_noasm.go         |   3 +-
 .../compute/internal/kernels/boolean_cast.go  |   2 +-
 go/arrow/compute/internal/kernels/helpers.go  |  42 ++---
 .../compute/internal/kernels/numeric_cast.go  |  22 +--
 .../internal/kernels/scalar_arithmetic.go     |   6 +-
 .../kernels/scalar_comparison_amd64.go        |   5 +-
 .../kernels/scalar_comparison_noasm.go        |   4 +-
 .../internal/kernels/scalar_comparisons.go    |  30 ++--
 .../compute/internal/kernels/string_casts.go  |  10 +-
 .../compute/internal/kernels/vector_hash.go   |   2 +-
 .../internal/kernels/vector_run_end_encode.go |  24 +--
 .../internal/kernels/vector_selection.go      |  28 +--
 go/arrow/compute/scalar_compare_test.go       |  16 +-
 go/arrow/compute/vector_hash_test.go          |  23 ++-
 go/arrow/compute/vector_selection_test.go     |  30 ++--
 go/arrow/flight/doc.go                        |   1 -
 go/arrow/flight/server.go                     |   2 +-
 go/arrow/internal/arrjson/arrjson.go          |  13 +-
 go/arrow/internal/arrjson/arrjson_test.go     |  32 ++--
 go/arrow/internal/testing/tools/bits.go       |   2 +-
 go/arrow/internal/utils.go                    |  12 ++
 go/arrow/ipc/file_reader.go                   |  30 ++--
 go/arrow/ipc/writer.go                        | 158 +++++++++--------
 go/arrow/memory/util.go                       |   8 +
 go/arrow/type_traits.go                       | 162 ++++++++++++++++++
 go/arrow/type_traits_decimal128.go            |   9 +-
 go/arrow/type_traits_decimal256.go            |   9 +-
 go/arrow/type_traits_float16.go               |   9 +-
 go/arrow/type_traits_interval.go              |  25 +--
 go/arrow/type_traits_numeric.gen.go           | 121 ++++---------
 go/arrow/type_traits_numeric.gen.go.tmpl      |   9 +-
 go/arrow/type_traits_timestamp.go             |   9 +-
 go/arrow/type_traits_view.go                  |   9 +-
 go/internal/bitutils/bit_set_run_reader.go    |   6 +-
 go/internal/utils/math.go                     |  22 +--
 go/parquet/file/column_reader.go              |   2 +-
 go/parquet/file/column_reader_test.go         |   2 +-
 go/parquet/file/level_conversion.go           |   2 +-
 .../internal/encoding/boolean_decoder.go      |   6 +-
 .../internal/encoding/byte_array_decoder.go   |   2 +-
 go/parquet/internal/encoding/decoder.go       |   2 +-
 .../internal/encoding/delta_bit_packing.go    |   4 +-
 .../internal/encoding/delta_byte_array.go     |   2 +-
 .../encoding/delta_length_byte_array.go       |   2 +-
 .../encoding/fixed_len_byte_array_decoder.go  |   2 +-
 .../encoding/plain_encoder_types.gen.go       |  10 +-
 .../encoding/plain_encoder_types.gen.go.tmpl  |   2 +-
 .../internal/encoding/typed_encoder.gen.go    |  28 +--
 .../encoding/typed_encoder.gen.go.tmpl        |   4 +-
 go/parquet/internal/encoding/types.go         |   4 +-
 go/parquet/internal/testutils/pagebuilder.go  |   8 +-
 go/parquet/internal/utils/bit_reader.go       |   4 +-
 go/parquet/internal/utils/rle.go              |   6 +-
 .../internal/utils/typed_rle_dict.gen.go      |  56 +++---
 .../internal/utils/typed_rle_dict.gen.go.tmpl |   8 +-
 go/parquet/pqarrow/column_readers.go          |   6 +-
 73 files changed, 917 insertions(+), 859 deletions(-)
 create mode 100644 go/arrow/type_traits.go

diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 29b203ae130c6..2bbc843836af9 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -927,6 +927,83 @@ class LargeListColumn(_BaseListColumn, _LargeOffsetsMixin):
     pass
 
 
+class ListViewField(Field):
+
+    def __init__(self, name, value_field, *, nullable=True,
+                 metadata=None):
+        super().__init__(name, nullable=nullable,
+                         metadata=metadata)
+        self.value_field = value_field
+
+    @property
+    def column_class(self):
+        return ListViewColumn
+
+    def _get_type(self):
+        return OrderedDict([
+            ('name', 'listview')
+        ])
+
+    def _get_children(self):
+        return [self.value_field.get_json()]
+
+    def generate_column(self, size, name=None):
+        MAX_LIST_SIZE = 4
+        VALUES_SIZE = size * MAX_LIST_SIZE
+
+        is_valid = self._make_is_valid(size)
+
+        MAX_OFFSET = VALUES_SIZE - MAX_LIST_SIZE
+        offsets = np.random.randint(0, MAX_OFFSET + 1, size=size)
+        sizes = np.random.randint(0, MAX_LIST_SIZE + 1, size=size)
+
+        values = self.value_field.generate_column(VALUES_SIZE)
+
+        if name is None:
+            name = self.name
+        return self.column_class(name, size, is_valid, offsets, sizes, values)
+
+
+class LargeListViewField(ListViewField):
+
+    @property
+    def column_class(self):
+        return LargeListViewColumn
+
+    def _get_type(self):
+        return OrderedDict([
+            ('name', 'largelistview')
+        ])
+
+
+class _BaseListViewColumn(Column):
+
+    def __init__(self, name, count, is_valid, offsets, sizes, values):
+        super().__init__(name, count)
+        self.is_valid = is_valid
+        self.offsets = offsets
+        self.sizes = sizes
+        self.values = values
+
+    def _get_buffers(self):
+        return [
+            ('VALIDITY', [int(v) for v in self.is_valid]),
+            ('OFFSET', self._encode_offsets(self.offsets)),
+            ('SIZE', self._encode_offsets(self.sizes)),
+        ]
+
+    def _get_children(self):
+        return [self.values.get_json()]
+
+
+class ListViewColumn(_BaseListViewColumn, _NarrowOffsetsMixin):
+    pass
+
+
+class LargeListViewColumn(_BaseListViewColumn, _LargeOffsetsMixin):
+    pass
+
+
 class MapField(Field):
 
     def __init__(self, name, key_field, item_field, *, nullable=True,
@@ -1663,6 +1740,15 @@ def generate_binary_view_case():
     return _generate_file("binary_view", fields, batch_sizes)
 
 
+def generate_list_view_case():
+    fields = [
+        ListViewField('lv', get_field('item', 'float32')),
+        LargeListViewField('llv', get_field('item', 'float32')),
+    ]
+    batch_sizes = [0, 7, 256]
+    return _generate_file("list_view", fields, batch_sizes)
+
+
 def generate_nested_large_offsets_case():
     fields = [
         LargeListField('large_list_nullable', get_field('item', 'int32')),
@@ -1847,7 +1933,12 @@ def _temp_path():
 
         generate_binary_view_case()
         .skip_tester('C#')
-        .skip_tester('Go')
+        .skip_tester('Java')
+        .skip_tester('JS')
+        .skip_tester('Rust'),
+
+        generate_list_view_case()
+        .skip_tester('C#')
         .skip_tester('Java')
         .skip_tester('JS')
         .skip_tester('Rust'),
diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index bab00e6d70d4a..7fadb7e47cf93 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -193,6 +193,8 @@ def _run_test_cases(self,
         ``case_runner`` ran against ``test_cases``
         """
         def case_wrapper(test_case):
+            if serial:
+                return case_runner(test_case)
             with printer.cork():
                 return case_runner(test_case)
 
diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst
index e1160b287e77c..1a9b1b97f07ee 100644
--- a/docs/source/format/Integration.rst
+++ b/docs/source/format/Integration.rst
@@ -223,7 +223,7 @@ considered equivalent to ``[]`` (no metadata). Duplicated keys are not forbidden
 **Type**: ::
 
     {
-      "name" : "null|struct|list|largelist|fixedsizelist|union|int|floatingpoint|utf8|largeutf8|binary|largebinary|fixedsizebinary|bool|decimal|date|time|timestamp|interval|duration|map"
+      "name" : "null|struct|list|largelist|listview|largelistview|fixedsizelist|union|int|floatingpoint|utf8|largeutf8|binary|largebinary|utf8view|binaryview|fixedsizebinary|bool|decimal|date|time|timestamp|interval|duration|map|runendencoded"
     }
 
 A ``Type`` will have other fields as defined in
@@ -446,12 +446,22 @@ or ``DATA``.
 
 ``BufferData`` is encoded based on the type of buffer:
 
-* ``VALIDITY``: a JSON array of 1 (valid) and 0 (null). Data for  non-nullable
+* ``VALIDITY``: a JSON array of 1 (valid) and 0 (null). Data for non-nullable
   ``Field`` still has a ``VALIDITY`` array, even though all values are 1.
 * ``OFFSET``: a JSON array of integers for 32-bit offsets or
-  string-formatted integers for 64-bit offsets
-* ``TYPE_ID``: a JSON array of integers
-* ``DATA``: a JSON array of encoded values
+  string-formatted integers for 64-bit offsets.
+* ``TYPE_ID``: a JSON array of integers.
+* ``DATA``: a JSON array of encoded values.
+* ``VARIADIC_DATA_BUFFERS``: a JSON array of data buffers represented as
+  hex encoded strings.
+* ``VIEWS``: a JSON array of encoded views, which are JSON objects with:
+  * ``SIZE``: an integer indicating the size of the view,
+  * ``INLINED``: an encoded value (this field will be present if ``SIZE``
+    is smaller than 12, otherwise the next three fields will be present),
+  * ``PREFIX_HEX``: the first four bytes of the view encoded as hex,
+  * ``BUFFER_INDEX``: the index in ``VARIADIC_DATA_BUFFERS`` of the buffer
+    viewed,
+  * ``OFFSET``: the offset in the buffer viewed.
 
 The value encoding for ``DATA`` is different depending on the logical
 type:
@@ -527,6 +537,9 @@ in ``datagen.py``):
   - Signed indices
   - Unsigned indices
   - Nested dictionaries
+* Run end encoded
+* Binary view and string view
+* List view and large list view
 * Extension Types
 
 
diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go
index fa5fa7addf34c..8ca1416b92ab3 100644
--- a/go/arrow/array/encoded.go
+++ b/go/arrow/array/encoded.go
@@ -150,19 +150,19 @@ func (r *RunEndEncoded) LogicalRunEndsArray(mem memory.Allocator) arrow.Array {
 	case *Int16:
 		for _, v := range e.Int16Values()[physOffset : physOffset+physLength] {
 			v -= int16(r.data.offset)
-			v = int16(utils.MinInt(int(v), r.data.length))
+			v = int16(utils.Min(int(v), r.data.length))
 			bldr.(*Int16Builder).Append(v)
 		}
 	case *Int32:
 		for _, v := range e.Int32Values()[physOffset : physOffset+physLength] {
 			v -= int32(r.data.offset)
-			v = int32(utils.MinInt(int(v), r.data.length))
+			v = int32(utils.Min(int(v), r.data.length))
 			bldr.(*Int32Builder).Append(v)
 		}
 	case *Int64:
 		for _, v := range e.Int64Values()[physOffset : physOffset+physLength] {
 			v -= int64(r.data.offset)
-			v = int64(utils.MinInt(int(v), r.data.length))
+			v = int64(utils.Min(int(v), r.data.length))
 			bldr.(*Int64Builder).Append(v)
 		}
 	}
diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go
index 4b62734116797..9d959b5e43b78 100644
--- a/go/arrow/array/list.go
+++ b/go/arrow/array/list.go
@@ -19,7 +19,6 @@ package array
 import (
 	"bytes"
 	"fmt"
-	"math"
 	"strings"
 	"sync/atomic"
 
@@ -1411,118 +1410,19 @@ func (b *baseListViewBuilder) UnmarshalJSON(data []byte) error {
 	return b.Unmarshal(dec)
 }
 
-// Pre-conditions:
-//
-//	input.DataType() is ListViewType
-//	input.Len() > 0 && input.NullN() != input.Len()
-func minListViewOffset32(input arrow.ArrayData) int32 {
-	var bitmap []byte
-	if input.Buffers()[0] != nil {
-		bitmap = input.Buffers()[0].Bytes()
-	}
-	offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():]
-	sizes := arrow.Int32Traits.CastFromBytes(input.Buffers()[2].Bytes())[input.Offset():]
-
-	isNull := func(i int) bool {
-		return bitmap != nil && bitutil.BitIsNotSet(bitmap, input.Offset()+i)
-	}
-
-	// It's very likely that the first non-null non-empty list-view starts at
-	// offset 0 of the child array.
-	i := 0
-	for i < input.Len() && (isNull(i) || sizes[i] == 0) {
-		i += 1
-	}
-	if i >= input.Len() {
-		return 0
-	}
-	minOffset := offsets[i]
-	if minOffset == 0 {
-		// early exit: offset 0 found already
-		return 0
-	}
-
-	// Slow path: scan the buffers entirely.
-	i += 1
-	for ; i < input.Len(); i += 1 {
-		if isNull(i) {
-			continue
-		}
-		offset := offsets[i]
-		if offset < minOffset && sizes[i] > 0 {
-			minOffset = offset
-		}
-	}
-	return minOffset
-}
-
-// Find the maximum offset+size in a LIST_VIEW array.
+// Find the minimum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array.
 //
 // Pre-conditions:
 //
-//	input.DataType() is ListViewType
-//	input.Len() > 0 && input.NullN() != input.Len()
-func maxListViewOffset32(input arrow.ArrayData) int {
-	inputOffset := input.Offset()
-	var bitmap []byte
-	if input.Buffers()[0] != nil {
-		bitmap = input.Buffers()[0].Bytes()
-	}
-	offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[inputOffset:]
-	sizes := arrow.Int32Traits.CastFromBytes(input.Buffers()[2].Bytes())[inputOffset:]
-
-	isNull := func(i int) bool {
-		return bitmap != nil && bitutil.BitIsNotSet(bitmap, inputOffset+i)
-	}
-
-	i := input.Len() - 1 // safe because input.Len() > 0
-	for i != 0 && (isNull(i) || sizes[i] == 0) {
-		i -= 1
-	}
-	offset := offsets[i]
-	size := sizes[i]
-	if i == 0 {
-		if isNull(i) || sizes[i] == 0 {
-			return 0
-		} else {
-			return int(offset + size)
-		}
-	}
-
-	values := input.Children()[0]
-	maxEnd := int(offsets[i] + sizes[i])
-	if maxEnd == values.Len() {
-		// Early-exit: maximum possible view-end found already.
-		return maxEnd
-	}
-
-	// Slow path: scan the buffers entirely.
-	for ; i >= 0; i -= 1 {
-		offset := offsets[i]
-		size := sizes[i]
-		if size > 0 && !isNull(i) {
-			if int(offset+size) > maxEnd {
-				maxEnd = int(offset + size)
-				if maxEnd == values.Len() {
-					return maxEnd
-				}
-			}
-		}
-	}
-	return maxEnd
-}
-
-// Pre-conditions:
-//
-//	input.DataType() is LargeListViewType
+//	input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64
 //	input.Len() > 0 && input.NullN() != input.Len()
-func minLargeListViewOffset64(input arrow.ArrayData) int64 {
+func minListViewOffset[Offset int32 | int64](input arrow.ArrayData) Offset {
 	var bitmap []byte
 	if input.Buffers()[0] != nil {
 		bitmap = input.Buffers()[0].Bytes()
 	}
-	offsets := arrow.Int64Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():]
-	sizes := arrow.Int64Traits.CastFromBytes(input.Buffers()[2].Bytes())[input.Offset():]
+	offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[input.Offset():]
+	sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[input.Offset():]
 
 	isNull := func(i int) bool {
 		return bitmap != nil && bitutil.BitIsNotSet(bitmap, input.Offset()+i)
@@ -1557,27 +1457,25 @@ func minLargeListViewOffset64(input arrow.ArrayData) int64 {
 	return minOffset
 }
 
-// Find the maximum offset+size in a LARGE_LIST_VIEW array.
+// Find the maximum offset+size in a LIST_VIEW/LARGE_LIST_VIEW array.
 //
 // Pre-conditions:
 //
-//	input.DataType() is LargeListViewType
+//	input.DataType() is ListViewType if Offset=int32 or LargeListViewType if Offset=int64
 //	input.Len() > 0 && input.NullN() != input.Len()
-func maxLargeListViewOffset64(input arrow.ArrayData) int64 {
+func maxListViewEnd[Offset int32 | int64](input arrow.ArrayData) Offset {
 	inputOffset := input.Offset()
 	var bitmap []byte
 	if input.Buffers()[0] != nil {
 		bitmap = input.Buffers()[0].Bytes()
 	}
-	offsets := arrow.Int64Traits.CastFromBytes(input.Buffers()[1].Bytes())[inputOffset:]
-	sizes := arrow.Int64Traits.CastFromBytes(input.Buffers()[2].Bytes())[inputOffset:]
+	offsets := arrow.GetData[Offset](input.Buffers()[1].Bytes())[inputOffset:]
+	sizes := arrow.GetData[Offset](input.Buffers()[2].Bytes())[inputOffset:]
 
 	isNull := func(i int) bool {
 		return bitmap != nil && bitutil.BitIsNotSet(bitmap, inputOffset+i)
 	}
 
-	// It's very likely that the first non-null non-empty list-view starts at
-	// offset zero, so we check that first and potentially early-return a 0.
 	i := input.Len() - 1 // safe because input.Len() > 0
 	for i != 0 && (isNull(i) || sizes[i] == 0) {
 		i -= 1
@@ -1592,15 +1490,9 @@ func maxLargeListViewOffset64(input arrow.ArrayData) int64 {
 		}
 	}
 
-	if offset > math.MaxInt64-size {
-		// Early-exit: 64-bit overflow detected. This is not possible on a
-		// valid list-view, but we return the maximum possible value to
-		// avoid undefined behavior.
-		return math.MaxInt64
-	}
 	values := input.Children()[0]
 	maxEnd := offsets[i] + sizes[i]
-	if maxEnd == int64(values.Len()) {
+	if maxEnd == Offset(values.Len()) {
 		// Early-exit: maximum possible view-end found already.
 		return maxEnd
 	}
@@ -1611,14 +1503,8 @@ func maxLargeListViewOffset64(input arrow.ArrayData) int64 {
 		size := sizes[i]
 		if size > 0 && !isNull(i) {
 			if offset+size > maxEnd {
-				if offset > math.MaxInt64-size {
-					// 64-bit overflow detected. This is not possible on a valid list-view,
-					// but we saturate maxEnd to the maximum possible value to avoid
-					// undefined behavior.
-					return math.MaxInt64
-				}
 				maxEnd = offset + size
-				if maxEnd == int64(values.Len()) {
+				if maxEnd == Offset(values.Len()) {
 					return maxEnd
 				}
 			}
@@ -1634,11 +1520,11 @@ func rangeOfValuesUsed(input arrow.ArrayData) (int, int) {
 	var minOffset, maxEnd int
 	switch input.DataType().(type) {
 	case *arrow.ListViewType:
-		minOffset = int(minListViewOffset32(input))
-		maxEnd = maxListViewOffset32(input)
+		minOffset = int(minListViewOffset[int32](input))
+		maxEnd = int(maxListViewEnd[int32](input))
 	case *arrow.LargeListViewType:
-		minOffset = int(minLargeListViewOffset64(input))
-		maxEnd = int(maxLargeListViewOffset64(input))
+		minOffset = int(minListViewOffset[int64](input))
+		maxEnd = int(maxListViewEnd[int64](input))
 	case *arrow.ListType:
 		offsets := arrow.Int32Traits.CastFromBytes(input.Buffers()[1].Bytes())[input.Offset():]
 		minOffset = int(offsets[0])
diff --git a/go/arrow/avro/reader_types.go b/go/arrow/avro/reader_types.go
index 5658c6e587db2..974fea1f14e5a 100644
--- a/go/arrow/avro/reader_types.go
+++ b/go/arrow/avro/reader_types.go
@@ -22,7 +22,7 @@ import (
 	"errors"
 	"fmt"
 	"math/big"
-	
+
 	"github.com/apache/arrow/go/v15/arrow"
 	"github.com/apache/arrow/go/v15/arrow/array"
 	"github.com/apache/arrow/go/v15/arrow/decimal128"
diff --git a/go/arrow/bitutil/endian_default.go b/go/arrow/bitutil/endian_default.go
index 9f5d3cdc7d256..ecbbaa70d04b6 100644
--- a/go/arrow/bitutil/endian_default.go
+++ b/go/arrow/bitutil/endian_default.go
@@ -14,6 +14,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+//go:build !s390x
 // +build !s390x
 
 package bitutil
diff --git a/go/arrow/bitutil/endian_s390x.go b/go/arrow/bitutil/endian_s390x.go
index a9bba4391280e..e99605f5848fa 100644
--- a/go/arrow/bitutil/endian_s390x.go
+++ b/go/arrow/bitutil/endian_s390x.go
@@ -18,7 +18,7 @@ package bitutil
 
 import (
 	"math/bits"
-        "unsafe"
+	"unsafe"
 )
 
 var toFromLEFunc = bits.ReverseBytes64
diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go
index f9693851d7483..64cc8456e8153 100644
--- a/go/arrow/cdata/cdata.go
+++ b/go/arrow/cdata/cdata.go
@@ -82,6 +82,8 @@ var formatToSimpleType = map[string]arrow.DataType{
 	"Z":   arrow.BinaryTypes.LargeBinary,
 	"u":   arrow.BinaryTypes.String,
 	"U":   arrow.BinaryTypes.LargeString,
+	"vz":  arrow.BinaryTypes.BinaryView,
+	"vu":  arrow.BinaryTypes.StringView,
 	"tdD": arrow.FixedWidthTypes.Date32,
 	"tdm": arrow.FixedWidthTypes.Date64,
 	"tts": arrow.FixedWidthTypes.Time32s,
@@ -263,6 +265,12 @@ func importSchema(schema *CArrowSchema) (ret arrow.Field, err error) {
 			dt = arrow.ListOfField(childFields[0])
 		case 'L': // large list
 			dt = arrow.LargeListOfField(childFields[0])
+		case 'v': // list view/large list view
+			if f[2] == 'l' {
+				dt = arrow.ListViewOfField(childFields[0])
+			} else if f[2] == 'L' {
+				dt = arrow.LargeListViewOfField(childFields[0])
+			}
 		case 'w': // fixed size list is w:# where # is the list size.
 			listSize, err := strconv.Atoi(strings.Split(f, ":")[1])
 			if err != nil {
@@ -364,6 +372,16 @@ func (imp *cimporter) doImportChildren() error {
 		if err := imp.children[0].importChild(imp, children[0]); err != nil {
 			return err
 		}
+	case arrow.LIST_VIEW: // only one child to import
+		imp.children[0].dt = imp.dt.(*arrow.ListViewType).Elem()
+		if err := imp.children[0].importChild(imp, children[0]); err != nil {
+			return err
+		}
+	case arrow.LARGE_LIST_VIEW: // only one child to import
+		imp.children[0].dt = imp.dt.(*arrow.LargeListViewType).Elem()
+		if err := imp.children[0].importChild(imp, children[0]); err != nil {
+			return err
+		}
 	case arrow.FIXED_SIZE_LIST: // only one child to import
 		imp.children[0].dt = imp.dt.(*arrow.FixedSizeListType).Elem()
 		if err := imp.children[0].importChild(imp, children[0]); err != nil {
@@ -485,10 +503,18 @@ func (imp *cimporter) doImport() error {
 		return imp.importStringLike(int64(arrow.Int64SizeBytes))
 	case *arrow.LargeBinaryType:
 		return imp.importStringLike(int64(arrow.Int64SizeBytes))
+	case *arrow.StringViewType:
+		return imp.importBinaryViewLike()
+	case *arrow.BinaryViewType:
+		return imp.importBinaryViewLike()
 	case *arrow.ListType:
 		return imp.importListLike()
 	case *arrow.LargeListType:
 		return imp.importListLike()
+	case *arrow.ListViewType:
+		return imp.importListViewLike()
+	case *arrow.LargeListViewType:
+		return imp.importListViewLike()
 	case *arrow.MapType:
 		return imp.importListLike()
 	case *arrow.FixedSizeListType:
@@ -654,6 +680,33 @@ func (imp *cimporter) importStringLike(offsetByteWidth int64) (err error) {
 	return
 }
 
+func (imp *cimporter) importBinaryViewLike() (err error) {
+	if err = imp.checkNoChildren(); err != nil {
+		return
+	}
+
+	buffers := make([]*memory.Buffer, len(imp.cbuffers)-1)
+	defer memory.ReleaseBuffers(buffers)
+
+	if buffers[0], err = imp.importNullBitmap(0); err != nil {
+		return
+	}
+
+	if buffers[1], err = imp.importFixedSizeBuffer(1, int64(arrow.ViewHeaderSizeBytes)); err != nil {
+		return
+	}
+
+	dataBufferSizes := unsafe.Slice((*int64)(unsafe.Pointer(imp.cbuffers[len(buffers)])), len(buffers)-2)
+	for i, size := range dataBufferSizes {
+		if buffers[i+2], err = imp.importVariableValuesBuffer(i+2, 1, size); err != nil {
+			return
+		}
+	}
+
+	imp.data = array.NewData(imp.dt, int(imp.arr.length), buffers, nil, int(imp.arr.null_count), int(imp.arr.offset))
+	return
+}
+
 func (imp *cimporter) importListLike() (err error) {
 	if err = imp.checkNumChildren(1); err != nil {
 		return err
@@ -683,6 +736,43 @@ func (imp *cimporter) importListLike() (err error) {
 	return
 }
 
+func (imp *cimporter) importListViewLike() (err error) {
+	offsetSize := int64(imp.dt.Layout().Buffers[1].ByteWidth)
+
+	if err = imp.checkNumChildren(1); err != nil {
+		return err
+	}
+
+	if err = imp.checkNumBuffers(3); err != nil {
+		return err
+	}
+
+	var nulls, offsets, sizes *memory.Buffer
+	if nulls, err = imp.importNullBitmap(0); err != nil {
+		return
+	}
+	if nulls != nil {
+		defer nulls.Release()
+	}
+
+	if offsets, err = imp.importFixedSizeBuffer(1, offsetSize); err != nil {
+		return
+	}
+	if offsets != nil {
+		defer offsets.Release()
+	}
+
+	if sizes, err = imp.importFixedSizeBuffer(2, offsetSize); err != nil {
+		return
+	}
+	if sizes != nil {
+		defer sizes.Release()
+	}
+
+	imp.data = array.NewData(imp.dt, int(imp.arr.length), []*memory.Buffer{nulls, offsets, sizes}, []arrow.ArrayData{imp.children[0].data}, int(imp.arr.null_count), int(imp.arr.offset))
+	return
+}
+
 func (imp *cimporter) importFixedSizePrimitive() error {
 	if err := imp.checkNoChildren(); err != nil {
 		return err
diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go
index d5fdc0dac15df..9c7c238ffb7b4 100644
--- a/go/arrow/cdata/cdata_exports.go
+++ b/go/arrow/cdata/cdata_exports.go
@@ -167,6 +167,10 @@ func (exp *schemaExporter) exportFormat(dt arrow.DataType) string {
 		return "u"
 	case *arrow.LargeStringType:
 		return "U"
+	case *arrow.BinaryViewType:
+		return "vz"
+	case *arrow.StringViewType:
+		return "vu"
 	case *arrow.Date32Type:
 		return "tdD"
 	case *arrow.Date64Type:
@@ -228,6 +232,10 @@ func (exp *schemaExporter) exportFormat(dt arrow.DataType) string {
 		return "+l"
 	case *arrow.LargeListType:
 		return "+L"
+	case *arrow.ListViewType:
+		return "+vl"
+	case *arrow.LargeListViewType:
+		return "+vL"
 	case *arrow.FixedSizeListType:
 		return fmt.Sprintf("+w:%d", dt.Len())
 	case *arrow.StructType:
@@ -328,6 +336,15 @@ func allocateBufferPtrArr(n int) (out []*C.void) {
 	return
 }
 
+func allocateBufferSizeArr(n int) (out []C.int64_t) {
+	s := (*reflect.SliceHeader)(unsafe.Pointer(&out))
+	s.Data = uintptr(C.calloc(C.size_t(n), C.size_t(unsafe.Sizeof(int64(0)))))
+	s.Len = n
+	s.Cap = n
+
+	return
+}
+
 func (exp *schemaExporter) finish(out *CArrowSchema) {
 	out.dictionary = nil
 	if exp.dict != nil {
@@ -368,15 +385,19 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) {
 		exportField(arrow.Field{Type: arr.DataType()}, outSchema)
 	}
 
-	nbuffers := len(arr.Data().Buffers())
-	buf_offset := 0
+	buffers := arr.Data().Buffers()
 	// Some types don't have validity bitmaps, but we keep them shifted
 	// to make processing easier in other contexts. This means that
 	// we have to adjust when exporting.
 	has_validity_bitmap := internal.DefaultHasValidityBitmap(arr.DataType().ID())
-	if nbuffers > 0 && !has_validity_bitmap {
-		nbuffers--
-		buf_offset++
+	if len(buffers) > 0 && !has_validity_bitmap {
+		buffers = buffers[1:]
+	}
+	nbuffers := len(buffers)
+
+	has_buffer_sizes_buffer := internal.HasBufferSizesBuffer(arr.DataType().ID())
+	if has_buffer_sizes_buffer {
+		nbuffers++
 	}
 
 	out.dictionary = nil
@@ -387,25 +408,34 @@ func exportArray(arr arrow.Array, out *CArrowArray, outSchema *CArrowSchema) {
 	out.buffers = nil
 
 	if nbuffers > 0 {
-		bufs := arr.Data().Buffers()
-		buffers := allocateBufferPtrArr(nbuffers)
-		for i, buf := range bufs[buf_offset:] {
+		cBufs := allocateBufferPtrArr(nbuffers)
+		for i, buf := range buffers {
 			if buf == nil || buf.Len() == 0 {
 				if i > 0 || !has_validity_bitmap {
 					// apache/arrow#33936: export a dummy buffer to be friendly to
 					// implementations that don't import NULL properly
-					buffers[i] = (*C.void)(unsafe.Pointer(&C.kGoCdataZeroRegion))
+					cBufs[i] = (*C.void)(unsafe.Pointer(&C.kGoCdataZeroRegion))
 				} else {
 					// null pointer permitted for the validity bitmap
 					// (assuming null count is 0)
-					buffers[i] = nil
+					cBufs[i] = nil
 				}
 				continue
 			}
 
-			buffers[i] = (*C.void)(unsafe.Pointer(&buf.Bytes()[0]))
+			cBufs[i] = (*C.void)(unsafe.Pointer(&buf.Bytes()[0]))
+		}
+
+		if has_buffer_sizes_buffer {
+			sizes := allocateBufferSizeArr(len(buffers[2:]))
+			for i, buf := range buffers[2:] {
+				sizes[i] = C.int64_t(buf.Len())
+			}
+			if len(sizes) > 0 {
+				cBufs[nbuffers-1] = (*C.void)(unsafe.Pointer(&sizes[0]))
+			}
 		}
-		out.buffers = (*unsafe.Pointer)(unsafe.Pointer(&buffers[0]))
+		out.buffers = (*unsafe.Pointer)(unsafe.Pointer(&cBufs[0]))
 	}
 
 	arr.Data().Retain()
diff --git a/go/arrow/compute/arithmetic.go b/go/arrow/compute/arithmetic.go
index 1ee1959b2ddc8..052d79610bcba 100644
--- a/go/arrow/compute/arithmetic.go
+++ b/go/arrow/compute/arithmetic.go
@@ -678,8 +678,8 @@ func RegisterScalarArithmetic(reg FunctionRegistry) {
 				// the allocated space is for duration (an int64) but we
 				// wrote the time32 - time32 as if the output was time32
 				// so a quick copy in reverse expands the int32s to int64.
-				rawData := exec.GetData[int32](out.Buffers[1].Buf)
-				outData := exec.GetData[int64](out.Buffers[1].Buf)
+				rawData := arrow.GetData[int32](out.Buffers[1].Buf)
+				outData := arrow.GetData[int64](out.Buffers[1].Buf)
 
 				for i := out.Len - 1; i >= 0; i-- {
 					outData[i] = int64(rawData[i])
diff --git a/go/arrow/compute/arithmetic_test.go b/go/arrow/compute/arithmetic_test.go
index c9c3f1ceb03e9..34c1bc6d98d65 100644
--- a/go/arrow/compute/arithmetic_test.go
+++ b/go/arrow/compute/arithmetic_test.go
@@ -195,7 +195,7 @@ func (b *Float16BinaryFuncTestSuite) TestSub() {
 	}
 }
 
-type BinaryArithmeticSuite[T exec.NumericTypes] struct {
+type BinaryArithmeticSuite[T arrow.NumericType] struct {
 	BinaryFuncTestSuite
 
 	opts            compute.ArithmeticOptions
@@ -205,7 +205,7 @@ type BinaryArithmeticSuite[T exec.NumericTypes] struct {
 }
 
 func (BinaryArithmeticSuite[T]) DataType() arrow.DataType {
-	return exec.GetDataType[T]()
+	return arrow.GetDataType[T]()
 }
 
 func (b *BinaryArithmeticSuite[T]) setNansEqual(val bool) {
@@ -564,7 +564,7 @@ func (bs *BinaryFloatingArithmeticSuite[T]) TestLog() {
 	bs.assertBinopErr(compute.Logb, `["-Inf"]`, `[2]`, "logarithm of negative number")
 }
 
-type BinaryIntegralArithmeticSuite[T exec.IntTypes | exec.UintTypes] struct {
+type BinaryIntegralArithmeticSuite[T arrow.IntType | arrow.UintType] struct {
 	BinaryArithmeticSuite[T]
 }
 
@@ -2412,7 +2412,7 @@ func TestUnaryArithmeticNull(t *testing.T) {
 	}
 }
 
-type UnaryArithmeticSuite[T exec.NumericTypes, O fnOpts] struct {
+type UnaryArithmeticSuite[T arrow.NumericType, O fnOpts] struct {
 	suite.Suite
 
 	mem *memory.CheckedAllocator
@@ -2433,7 +2433,7 @@ func (us *UnaryArithmeticSuite[T, O]) TearDownTest() {
 }
 
 func (*UnaryArithmeticSuite[T, O]) datatype() arrow.DataType {
-	return exec.GetDataType[T]()
+	return arrow.GetDataType[T]()
 }
 
 func (us *UnaryArithmeticSuite[T, O]) makeNullScalar() scalar.Scalar {
@@ -2532,7 +2532,7 @@ func (us *UnaryArithmeticSuite[T, O]) assertUnaryOpErr(fn unaryArithmeticFunc[O]
 	us.ErrorContains(err, msg)
 }
 
-type UnaryArithmeticIntegral[T exec.IntTypes | exec.UintTypes] struct {
+type UnaryArithmeticIntegral[T arrow.IntType | arrow.UintType] struct {
 	UnaryArithmeticSuite[T, compute.ArithmeticOptions]
 }
 
@@ -2598,7 +2598,7 @@ func (us *UnaryArithmeticIntegral[T]) TestLog() {
 	}
 }
 
-type UnaryArithmeticSigned[T exec.IntTypes] struct {
+type UnaryArithmeticSigned[T arrow.IntType] struct {
 	UnaryArithmeticIntegral[T]
 }
 
@@ -2678,7 +2678,7 @@ func (us *UnaryArithmeticSigned[T]) TestNegate() {
 	})
 }
 
-type UnaryArithmeticUnsigned[T exec.UintTypes] struct {
+type UnaryArithmeticUnsigned[T arrow.UintType] struct {
 	UnaryArithmeticIntegral[T]
 }
 
@@ -2965,12 +2965,12 @@ func TestUnaryArithmetic(t *testing.T) {
 	suite.Run(t, new(DecimalUnaryArithmeticSuite))
 }
 
-type BitwiseArithmeticSuite[T exec.IntTypes | exec.UintTypes] struct {
+type BitwiseArithmeticSuite[T arrow.IntType | arrow.UintType] struct {
 	BinaryFuncTestSuite
 }
 
 func (bs *BitwiseArithmeticSuite[T]) datatype() arrow.DataType {
-	return exec.GetDataType[T]()
+	return arrow.GetDataType[T]()
 }
 
 // to make it easier to test different widths, tests give bytes which
@@ -3061,7 +3061,7 @@ var roundModes = []compute.RoundMode{
 	compute.RoundHalfToOdd,
 }
 
-type UnaryRoundSuite[T exec.NumericTypes] struct {
+type UnaryRoundSuite[T arrow.NumericType] struct {
 	UnaryArithmeticSuite[T, compute.RoundOptions]
 }
 
@@ -3073,7 +3073,7 @@ func (us *UnaryRoundSuite[T]) setRoundNDigits(v int64) {
 	us.opts.NDigits = v
 }
 
-type UnaryRoundToMultipleSuite[T exec.NumericTypes] struct {
+type UnaryRoundToMultipleSuite[T arrow.NumericType] struct {
 	UnaryArithmeticSuite[T, compute.RoundToMultipleOptions]
 }
 
@@ -3085,15 +3085,15 @@ func (us *UnaryRoundToMultipleSuite[T]) setRoundMultiple(val float64) {
 	us.opts.Multiple = scalar.NewFloat64Scalar(val)
 }
 
-type UnaryRoundIntegral[T exec.IntTypes | exec.UintTypes] struct {
+type UnaryRoundIntegral[T arrow.IntType | arrow.UintType] struct {
 	UnaryRoundSuite[T]
 }
 
-type UnaryRoundToMultipleIntegral[T exec.IntTypes | exec.UintTypes] struct {
+type UnaryRoundToMultipleIntegral[T arrow.IntType | arrow.UintType] struct {
 	UnaryRoundToMultipleSuite[T]
 }
 
-type UnaryRoundSigned[T exec.IntTypes] struct {
+type UnaryRoundSigned[T arrow.IntType] struct {
 	UnaryRoundIntegral[T]
 }
 
@@ -3130,7 +3130,7 @@ func (us *UnaryRoundSigned[T]) TestRound() {
 	}
 }
 
-type UnaryRoundToMultipleSigned[T exec.IntTypes] struct {
+type UnaryRoundToMultipleSigned[T arrow.IntType] struct {
 	UnaryRoundToMultipleIntegral[T]
 }
 
@@ -3164,7 +3164,7 @@ func (us *UnaryRoundToMultipleSigned[T]) TestRoundToMultiple() {
 	}
 }
 
-type UnaryRoundUnsigned[T exec.UintTypes] struct {
+type UnaryRoundUnsigned[T arrow.UintType] struct {
 	UnaryRoundIntegral[T]
 }
 
@@ -3201,7 +3201,7 @@ func (us *UnaryRoundUnsigned[T]) TestRound() {
 	}
 }
 
-type UnaryRoundToMultipleUnsigned[T exec.UintTypes] struct {
+type UnaryRoundToMultipleUnsigned[T arrow.UintType] struct {
 	UnaryRoundToMultipleIntegral[T]
 }
 
diff --git a/go/arrow/compute/exec/utils.go b/go/arrow/compute/exec/utils.go
index 276e4570ca968..1b5e69a502cfd 100644
--- a/go/arrow/compute/exec/utils.go
+++ b/go/arrow/compute/exec/utils.go
@@ -21,96 +21,21 @@ package exec
 import (
 	"fmt"
 	"math"
-	"reflect"
 	"sync/atomic"
 	"unsafe"
 
 	"github.com/apache/arrow/go/v15/arrow"
 	"github.com/apache/arrow/go/v15/arrow/array"
 	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/float16"
 	"github.com/apache/arrow/go/v15/arrow/memory"
 	"golang.org/x/exp/constraints"
 	"golang.org/x/exp/slices"
 )
 
-// IntTypes is a type constraint for raw values represented as signed
-// integer types by Arrow. We aren't just using constraints.Signed
-// because we don't want to include the raw `int` type here whose size
-// changes based on the architecture (int32 on 32-bit architectures and
-// int64 on 64-bit architectures).
-//
-// This will also cover types like MonthInterval or the time types
-// as their underlying types are int32 and int64 which will get covered
-// by using the ~
-type IntTypes interface {
-	~int8 | ~int16 | ~int32 | ~int64
-}
-
-// UintTypes is a type constraint for raw values represented as unsigned
-// integer types by Arrow. We aren't just using constraints.Unsigned
-// because we don't want to include the raw `uint` type here whose size
-// changes based on the architecture (uint32 on 32-bit architectures and
-// uint64 on 64-bit architectures). We also don't want to include uintptr
-type UintTypes interface {
-	~uint8 | ~uint16 | ~uint32 | ~uint64
-}
-
-// FloatTypes is a type constraint for raw values for representing
-// floating point values in Arrow. This consists of constraints.Float and
-// float16.Num
-type FloatTypes interface {
-	float16.Num | constraints.Float
-}
-
-// NumericTypes is a type constraint for just signed/unsigned integers
-// and float32/float64.
-type NumericTypes interface {
-	IntTypes | UintTypes | constraints.Float
-}
-
-// DecimalTypes is a type constraint for raw values representing larger
-// decimal type values in Arrow, specifically decimal128 and decimal256.
-type DecimalTypes interface {
-	decimal128.Num | decimal256.Num
-}
-
-// FixedWidthTypes is a type constraint for raw values in Arrow that
-// can be represented as FixedWidth byte slices. Specifically this is for
-// using Go generics to easily re-type a byte slice to a properly-typed
-// slice. Booleans are excluded here since they are represented by Arrow
-// as a bitmap and thus the buffer can't be just reinterpreted as a []bool
-type FixedWidthTypes interface {
-	IntTypes | UintTypes |
-		FloatTypes | DecimalTypes |
-		arrow.DayTimeInterval | arrow.MonthDayNanoInterval
-}
-
-type TemporalTypes interface {
-	arrow.Date32 | arrow.Date64 | arrow.Time32 | arrow.Time64 |
-		arrow.Timestamp | arrow.Duration | arrow.DayTimeInterval |
-		arrow.MonthInterval | arrow.MonthDayNanoInterval
-}
-
-func GetValues[T FixedWidthTypes](data arrow.ArrayData, i int) []T {
-	if data.Buffers()[i] == nil || data.Buffers()[i].Len() == 0 {
-		return nil
-	}
-	ret := unsafe.Slice((*T)(unsafe.Pointer(&data.Buffers()[i].Bytes()[0])), data.Offset()+data.Len())
-	return ret[data.Offset():]
-}
-
-func GetOffsets[T int32 | int64](data arrow.ArrayData, i int) []T {
-	ret := unsafe.Slice((*T)(unsafe.Pointer(&data.Buffers()[i].Bytes()[0])), data.Offset()+data.Len()+1)
-	return ret[data.Offset():]
-}
-
 // GetSpanValues returns a properly typed slice by reinterpreting
 // the buffer at index i using unsafe.Slice. This will take into account
 // the offset of the given ArraySpan.
-func GetSpanValues[T FixedWidthTypes](span *ArraySpan, i int) []T {
+func GetSpanValues[T arrow.FixedWidthType](span *ArraySpan, i int) []T {
 	if len(span.Buffers[i].Buf) == 0 {
 		return nil
 	}
@@ -126,16 +51,6 @@ func GetSpanOffsets[T int32 | int64](span *ArraySpan, i int) []T {
 	return ret[span.Offset:]
 }
 
-func GetBytes[T FixedWidthTypes](in []T) []byte {
-	var z T
-	return unsafe.Slice((*byte)(unsafe.Pointer(&in[0])), len(in)*int(unsafe.Sizeof(z)))
-}
-
-func GetData[T FixedWidthTypes](in []byte) []T {
-	var z T
-	return unsafe.Slice((*T)(unsafe.Pointer(&in[0])), len(in)/int(unsafe.Sizeof(z)))
-}
-
 func Min[T constraints.Ordered](a, b T) T {
 	if a < b {
 		return a
@@ -165,59 +80,22 @@ func OptionsInit[T any](_ *KernelCtx, args KernelInitArgs) (KernelState, error)
 		arrow.ErrInvalid)
 }
 
-var typMap = map[reflect.Type]arrow.DataType{
-	reflect.TypeOf(false):           arrow.FixedWidthTypes.Boolean,
-	reflect.TypeOf(int8(0)):         arrow.PrimitiveTypes.Int8,
-	reflect.TypeOf(int16(0)):        arrow.PrimitiveTypes.Int16,
-	reflect.TypeOf(int32(0)):        arrow.PrimitiveTypes.Int32,
-	reflect.TypeOf(int64(0)):        arrow.PrimitiveTypes.Int64,
-	reflect.TypeOf(uint8(0)):        arrow.PrimitiveTypes.Uint8,
-	reflect.TypeOf(uint16(0)):       arrow.PrimitiveTypes.Uint16,
-	reflect.TypeOf(uint32(0)):       arrow.PrimitiveTypes.Uint32,
-	reflect.TypeOf(uint64(0)):       arrow.PrimitiveTypes.Uint64,
-	reflect.TypeOf(float32(0)):      arrow.PrimitiveTypes.Float32,
-	reflect.TypeOf(float64(0)):      arrow.PrimitiveTypes.Float64,
-	reflect.TypeOf(string("")):      arrow.BinaryTypes.String,
-	reflect.TypeOf(arrow.Date32(0)): arrow.FixedWidthTypes.Date32,
-	reflect.TypeOf(arrow.Date64(0)): arrow.FixedWidthTypes.Date64,
-	reflect.TypeOf(true):            arrow.FixedWidthTypes.Boolean,
-	reflect.TypeOf(float16.Num{}):   arrow.FixedWidthTypes.Float16,
-	reflect.TypeOf([]byte{}):        arrow.BinaryTypes.Binary,
-}
-
-// GetDataType returns the appropriate arrow.DataType for the given type T
-// only for non-parametric types. This uses a map and reflection internally
-// so don't call this in a tight loop, instead call this once and then use
-// a closure with the result.
-func GetDataType[T NumericTypes | bool | string | []byte | float16.Num]() arrow.DataType {
-	var z T
-	return typMap[reflect.TypeOf(z)]
-}
-
-// GetType returns the appropriate arrow.Type type T, only for non-parametric
-// types. This uses a map and reflection internally so don't call this in
-// a tight loop, instead call it once and then use a closure with the result.
-func GetType[T NumericTypes | bool | string]() arrow.Type {
-	var z T
-	return typMap[reflect.TypeOf(z)].ID()
-}
-
-type arrayBuilder[T NumericTypes | bool] interface {
+type arrayBuilder[T arrow.NumericType | bool] interface {
 	array.Builder
 	Append(T)
 	AppendValues([]T, []bool)
 }
 
-func ArrayFromSlice[T NumericTypes | bool](mem memory.Allocator, data []T) arrow.Array {
-	bldr := array.NewBuilder(mem, typMap[reflect.TypeOf(data).Elem()]).(arrayBuilder[T])
+func ArrayFromSlice[T arrow.NumericType | bool](mem memory.Allocator, data []T) arrow.Array {
+	bldr := array.NewBuilder(mem, arrow.GetDataType[T]()).(arrayBuilder[T])
 	defer bldr.Release()
 
 	bldr.AppendValues(data, nil)
 	return bldr.NewArray()
 }
 
-func ArrayFromSliceWithValid[T NumericTypes | bool](mem memory.Allocator, data []T, valid []bool) arrow.Array {
-	bldr := array.NewBuilder(mem, typMap[reflect.TypeOf(data).Elem()]).(arrayBuilder[T])
+func ArrayFromSliceWithValid[T arrow.NumericType | bool](mem memory.Allocator, data []T, valid []bool) arrow.Array {
+	bldr := array.NewBuilder(mem, arrow.GetDataType[T]()).(arrayBuilder[T])
 	defer bldr.Release()
 
 	bldr.AppendValues(data, valid)
@@ -323,7 +201,7 @@ func (c *ChunkResolver) Resolve(idx int64) (chunk, index int64) {
 }
 
 type arrayTypes interface {
-	FixedWidthTypes | TemporalTypes | bool | string | []byte
+	arrow.FixedWidthType | arrow.TemporalType | bool | string | []byte
 }
 
 type ArrayIter[T arrayTypes] interface {
@@ -345,11 +223,11 @@ func (b *BoolIter) Next() (out bool) {
 	return
 }
 
-type PrimitiveIter[T FixedWidthTypes] struct {
+type PrimitiveIter[T arrow.FixedWidthType] struct {
 	Values []T
 }
 
-func NewPrimitiveIter[T FixedWidthTypes](arr *ArraySpan) ArrayIter[T] {
+func NewPrimitiveIter[T arrow.FixedWidthType](arr *ArraySpan) ArrayIter[T] {
 	return &PrimitiveIter[T]{Values: GetSpanValues[T](arr, 1)}
 }
 
diff --git a/go/arrow/compute/exec/utils_test.go b/go/arrow/compute/exec/utils_test.go
index b26e4ff41e79f..345d6dcf3b4c4 100644
--- a/go/arrow/compute/exec/utils_test.go
+++ b/go/arrow/compute/exec/utils_test.go
@@ -53,7 +53,7 @@ func TestRechunkConsistentArraysTrivial(t *testing.T) {
 	}
 }
 
-func assertEqual[T exec.NumericTypes](t *testing.T, mem memory.Allocator, arr arrow.Array, data []T) {
+func assertEqual[T arrow.NumericType](t *testing.T, mem memory.Allocator, arr arrow.Array, data []T) {
 	exp := exec.ArrayFromSlice(mem, data)
 	defer exp.Release()
 	assert.Truef(t, array.Equal(exp, arr), "expected: %s\ngot: %s", exp, arr)
diff --git a/go/arrow/compute/fieldref.go b/go/arrow/compute/fieldref.go
index 565ae3bfadbd0..036e1e355ed75 100644
--- a/go/arrow/compute/fieldref.go
+++ b/go/arrow/compute/fieldref.go
@@ -282,31 +282,31 @@ type refImpl interface {
 //
 // Nested fields can be referenced as well, given the schema:
 //
-// 		arrow.NewSchema([]arrow.Field{
-//			{Name: "a", Type: arrow.StructOf(arrow.Field{Name: "n", Type: arrow.Null})},
-//  		{Name: "b", Type: arrow.PrimitiveTypes.Int32},
-// 		})
+//			arrow.NewSchema([]arrow.Field{
+//				{Name: "a", Type: arrow.StructOf(arrow.Field{Name: "n", Type: arrow.Null})},
+//	 		{Name: "b", Type: arrow.PrimitiveTypes.Int32},
+//			})
 //
 // the following all indicate the nested field named "n":
 //
-//		FieldRefPath(FieldPath{0, 0})
-//		FieldRefList("a", 0)
-// 		FieldRefList("a", "n")
-// 		FieldRefList(0, "n")
-//		NewFieldRefFromDotPath(".a[0]")
+//	FieldRefPath(FieldPath{0, 0})
+//	FieldRefList("a", 0)
+//	FieldRefList("a", "n")
+//	FieldRefList(0, "n")
+//	NewFieldRefFromDotPath(".a[0]")
 //
 // FieldPaths matching a FieldRef are retrieved with the FindAll* functions
 // Multiple matches are possible because field names may be duplicated within
 // a schema. For example:
 //
-//		aIsAmbiguous := arrow.NewSchema([]arrow.Field{
-//			{Name: "a", Type: arrow.PrimitiveTypes.Int32},
-//			{Name: "a", Type: arrow.PrimitiveTypes.Float32},
-//		})
-//		matches := FieldRefName("a").FindAll(aIsAmbiguous)
-//		assert.Len(matches, 2)
-//		assert.True(matches[0].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(0))
-//		assert.True(matches[1].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(1))
+//	aIsAmbiguous := arrow.NewSchema([]arrow.Field{
+//		{Name: "a", Type: arrow.PrimitiveTypes.Int32},
+//		{Name: "a", Type: arrow.PrimitiveTypes.Float32},
+//	})
+//	matches := FieldRefName("a").FindAll(aIsAmbiguous)
+//	assert.Len(matches, 2)
+//	assert.True(matches[0].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(0))
+//	assert.True(matches[1].Get(aIsAmbiguous).Equals(aIsAmbiguous.Field(1))
 type FieldRef struct {
 	impl refImpl
 }
@@ -346,17 +346,18 @@ func FieldRefList(elems ...interface{}) FieldRef {
 // NewFieldRefFromDotPath parses a dot path into a field ref.
 //
 // dot_path = '.' name
-//			| '[' digit+ ']'
-//			| dot_path+
+//
+//	| '[' digit+ ']'
+//	| dot_path+
 //
 // Examples
 //
-// 		".alpha" => FieldRefName("alpha")
-//		"[2]" => FieldRefIndex(2)
-//		".beta[3]" => FieldRefList("beta", 3)
-//		"[5].gamma.delta[7]" => FieldRefList(5, "gamma", "delta", 7)
-//		".hello world" => FieldRefName("hello world")
-//		`.\[y\]\\tho\.\` => FieldRef(`[y]\tho.\`)
+//	".alpha" => FieldRefName("alpha")
+//	"[2]" => FieldRefIndex(2)
+//	".beta[3]" => FieldRefList("beta", 3)
+//	"[5].gamma.delta[7]" => FieldRefList(5, "gamma", "delta", 7)
+//	".hello world" => FieldRefName("hello world")
+//	`.\[y\]\\tho\.\` => FieldRef(`[y]\tho.\`)
 //
 // Note: when parsing a name, a '\' preceding any other character will be
 // dropped from the resulting name. therefore if a name must contain the characters
diff --git a/go/arrow/compute/internal/kernels/base_arithmetic.go b/go/arrow/compute/internal/kernels/base_arithmetic.go
index 4ef0031f31484..b795c04c39ead 100644
--- a/go/arrow/compute/internal/kernels/base_arithmetic.go
+++ b/go/arrow/compute/internal/kernels/base_arithmetic.go
@@ -81,7 +81,7 @@ const (
 	OpLogbChecked
 )
 
-func mulWithOverflow[T exec.IntTypes | exec.UintTypes](a, b T) (T, error) {
+func mulWithOverflow[T arrow.IntType | arrow.UintType](a, b T) (T, error) {
 	min, max := MinOf[T](), MaxOf[T]()
 	switch {
 	case a > 0:
@@ -107,7 +107,7 @@ func mulWithOverflow[T exec.IntTypes | exec.UintTypes](a, b T) (T, error) {
 	return a * b, nil
 }
 
-func getGoArithmeticBinary[OutT, Arg0T, Arg1T exec.NumericTypes](op func(a Arg0T, b Arg1T, e *error) OutT) binaryOps[OutT, Arg0T, Arg1T] {
+func getGoArithmeticBinary[OutT, Arg0T, Arg1T arrow.NumericType](op func(a Arg0T, b Arg1T, e *error) OutT) binaryOps[OutT, Arg0T, Arg1T] {
 	return binaryOps[OutT, Arg0T, Arg1T]{
 		arrArr: func(_ *exec.KernelCtx, left []Arg0T, right []Arg1T, out []OutT) error {
 			var err error
@@ -143,7 +143,7 @@ var (
 	errLogNeg        = fmt.Errorf("%w: logarithm of negative number", arrow.ErrInvalid)
 )
 
-func getGoArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op ArithmeticOp) exec.ArrayKernelExec {
+func getGoArithmeticOpIntegral[InT, OutT arrow.UintType | arrow.IntType](op ArithmeticOp) exec.ArrayKernelExec {
 	switch op {
 	case OpAdd:
 		return ScalarBinary(getGoArithmeticBinary(func(a, b InT, _ *error) OutT { return OutT(a + b) }))
@@ -178,7 +178,7 @@ func getGoArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op Arit
 
 		if SizeOf[InT]() == SizeOf[OutT]() {
 			return ScalarUnary(func(_ *exec.KernelCtx, arg []InT, out []OutT) error {
-				in, output := exec.GetBytes(arg), exec.GetBytes(out)
+				in, output := arrow.GetBytes(arg), arrow.GetBytes(out)
 				copy(output, in)
 				return nil
 			})
@@ -314,7 +314,7 @@ func getGoArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op Arit
 		}
 		if SizeOf[InT]() == SizeOf[OutT]() {
 			return ScalarUnary(func(_ *exec.KernelCtx, arg []InT, out []OutT) error {
-				in, output := exec.GetBytes(arg), exec.GetBytes(out)
+				in, output := arrow.GetBytes(arg), arrow.GetBytes(out)
 				copy(output, in)
 				return nil
 			})
@@ -837,7 +837,7 @@ func ArithmeticExecSameType(ty arrow.Type, op ArithmeticOp) exec.ArrayKernelExec
 	return nil
 }
 
-func arithmeticExec[InT exec.IntTypes | exec.UintTypes](oty arrow.Type, op ArithmeticOp) exec.ArrayKernelExec {
+func arithmeticExec[InT arrow.IntType | arrow.UintType](oty arrow.Type, op ArithmeticOp) exec.ArrayKernelExec {
 	switch oty {
 	case arrow.INT8:
 		return getArithmeticOpIntegral[InT, int8](op)
diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go
index 942b8e4ff5600..51b1866fb68fa 100644
--- a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go
+++ b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go
@@ -21,63 +21,64 @@ package kernels
 import (
 	"unsafe"
 
+	"github.com/apache/arrow/go/v15/arrow"
 	"github.com/apache/arrow/go/v15/arrow/compute/exec"
 	"github.com/apache/arrow/go/v15/arrow/internal/debug"
 	"golang.org/x/exp/constraints"
 	"golang.org/x/sys/cpu"
 )
 
-func getAvx2ArithmeticBinaryNumeric[T exec.NumericTypes](op ArithmeticOp) binaryOps[T, T, T] {
-	typ := exec.GetType[T]()
+func getAvx2ArithmeticBinaryNumeric[T arrow.NumericType](op ArithmeticOp) binaryOps[T, T, T] {
+	typ := arrow.GetType[T]()
 	return binaryOps[T, T, T]{
 		arrArr: func(_ *exec.KernelCtx, Arg0, Arg1, Out []T) error {
-			arithmeticAvx2(typ, op, exec.GetBytes(Arg0), exec.GetBytes(Arg1), exec.GetBytes(Out), len(Arg0))
+			arithmeticAvx2(typ, op, arrow.GetBytes(Arg0), arrow.GetBytes(Arg1), arrow.GetBytes(Out), len(Arg0))
 			return nil
 		},
 		arrScalar: func(_ *exec.KernelCtx, Arg0 []T, Arg1 T, Out []T) error {
-			arithmeticArrScalarAvx2(typ, op, exec.GetBytes(Arg0), unsafe.Pointer(&Arg1), exec.GetBytes(Out), len(Arg0))
+			arithmeticArrScalarAvx2(typ, op, arrow.GetBytes(Arg0), unsafe.Pointer(&Arg1), arrow.GetBytes(Out), len(Arg0))
 			return nil
 		},
 		scalarArr: func(_ *exec.KernelCtx, Arg0 T, Arg1, Out []T) error {
-			arithmeticScalarArrAvx2(typ, op, unsafe.Pointer(&Arg0), exec.GetBytes(Arg1), exec.GetBytes(Out), len(Arg1))
+			arithmeticScalarArrAvx2(typ, op, unsafe.Pointer(&Arg0), arrow.GetBytes(Arg1), arrow.GetBytes(Out), len(Arg1))
 			return nil
 		},
 	}
 }
 
-func getSSE4ArithmeticBinaryNumeric[T exec.NumericTypes](op ArithmeticOp) binaryOps[T, T, T] {
-	typ := exec.GetType[T]()
+func getSSE4ArithmeticBinaryNumeric[T arrow.NumericType](op ArithmeticOp) binaryOps[T, T, T] {
+	typ := arrow.GetType[T]()
 	return binaryOps[T, T, T]{
 		arrArr: func(_ *exec.KernelCtx, Arg0, Arg1, Out []T) error {
-			arithmeticSSE4(typ, op, exec.GetBytes(Arg0), exec.GetBytes(Arg1), exec.GetBytes(Out), len(Arg0))
+			arithmeticSSE4(typ, op, arrow.GetBytes(Arg0), arrow.GetBytes(Arg1), arrow.GetBytes(Out), len(Arg0))
 			return nil
 		},
 		arrScalar: func(_ *exec.KernelCtx, Arg0 []T, Arg1 T, Out []T) error {
-			arithmeticArrScalarSSE4(typ, op, exec.GetBytes(Arg0), unsafe.Pointer(&Arg1), exec.GetBytes(Out), len(Arg0))
+			arithmeticArrScalarSSE4(typ, op, arrow.GetBytes(Arg0), unsafe.Pointer(&Arg1), arrow.GetBytes(Out), len(Arg0))
 			return nil
 		},
 		scalarArr: func(_ *exec.KernelCtx, Arg0 T, Arg1, Out []T) error {
-			arithmeticScalarArrSSE4(typ, op, unsafe.Pointer(&Arg0), exec.GetBytes(Arg1), exec.GetBytes(Out), len(Arg1))
+			arithmeticScalarArrSSE4(typ, op, unsafe.Pointer(&Arg0), arrow.GetBytes(Arg1), arrow.GetBytes(Out), len(Arg1))
 			return nil
 		},
 	}
 }
 
-func getArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op ArithmeticOp) exec.ArrayKernelExec {
+func getArithmeticOpIntegral[InT, OutT arrow.UintType | arrow.IntType](op ArithmeticOp) exec.ArrayKernelExec {
 	if cpu.X86.HasAVX2 {
 		switch op {
 		case OpAdd, OpSub, OpMul:
 			return ScalarBinary(getAvx2ArithmeticBinaryNumeric[InT](op))
 		case OpAbsoluteValue, OpNegate:
-			typ := exec.GetType[InT]()
+			typ := arrow.GetType[InT]()
 			return ScalarUnary(func(_ *exec.KernelCtx, arg, out []InT) error {
-				arithmeticUnaryAvx2(typ, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg))
+				arithmeticUnaryAvx2(typ, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg))
 				return nil
 			})
 		case OpSign:
-			inType, outType := exec.GetType[InT](), exec.GetType[OutT]()
+			inType, outType := arrow.GetType[InT](), arrow.GetType[OutT]()
 			return ScalarUnary(func(_ *exec.KernelCtx, arg []InT, out []OutT) error {
-				arithmeticUnaryDiffTypesAvx2(inType, outType, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg))
+				arithmeticUnaryDiffTypesAvx2(inType, outType, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg))
 				return nil
 			})
 		}
@@ -86,15 +87,15 @@ func getArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op Arithm
 		case OpAdd, OpSub, OpMul:
 			return ScalarBinary(getSSE4ArithmeticBinaryNumeric[InT](op))
 		case OpAbsoluteValue, OpNegate:
-			typ := exec.GetType[InT]()
+			typ := arrow.GetType[InT]()
 			return ScalarUnary(func(ctx *exec.KernelCtx, arg, out []InT) error {
-				arithmeticUnarySSE4(typ, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg))
+				arithmeticUnarySSE4(typ, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg))
 				return nil
 			})
 		case OpSign:
-			inType, outType := exec.GetType[InT](), exec.GetType[OutT]()
+			inType, outType := arrow.GetType[InT](), arrow.GetType[OutT]()
 			return ScalarUnary(func(_ *exec.KernelCtx, arg []InT, out []OutT) error {
-				arithmeticUnaryDiffTypesSSE4(inType, outType, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg))
+				arithmeticUnaryDiffTypesSSE4(inType, outType, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg))
 				return nil
 			})
 		}
@@ -109,38 +110,38 @@ func getArithmeticOpFloating[InT, OutT constraints.Float](op ArithmeticOp) exec.
 	if cpu.X86.HasAVX2 {
 		switch op {
 		case OpAdd, OpSub, OpAddChecked, OpSubChecked, OpMul, OpMulChecked:
-			if exec.GetType[InT]() != exec.GetType[OutT]() {
+			if arrow.GetType[InT]() != arrow.GetType[OutT]() {
 				debug.Assert(false, "not implemented")
 				return nil
 			}
 			return ScalarBinary(getAvx2ArithmeticBinaryNumeric[InT](op))
 		case OpAbsoluteValue, OpAbsoluteValueChecked, OpNegate, OpNegateChecked, OpSign:
-			if exec.GetType[InT]() != exec.GetType[OutT]() {
+			if arrow.GetType[InT]() != arrow.GetType[OutT]() {
 				debug.Assert(false, "not implemented")
 				return nil
 			}
-			typ := exec.GetType[InT]()
+			typ := arrow.GetType[InT]()
 			return ScalarUnary(func(_ *exec.KernelCtx, arg, out []InT) error {
-				arithmeticUnaryAvx2(typ, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg))
+				arithmeticUnaryAvx2(typ, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg))
 				return nil
 			})
 		}
 	} else if cpu.X86.HasSSE42 {
 		switch op {
 		case OpAdd, OpSub, OpAddChecked, OpSubChecked, OpMul, OpMulChecked:
-			if exec.GetType[InT]() != exec.GetType[OutT]() {
+			if arrow.GetType[InT]() != arrow.GetType[OutT]() {
 				debug.Assert(false, "not implemented")
 				return nil
 			}
 			return ScalarBinary(getSSE4ArithmeticBinaryNumeric[InT](op))
 		case OpAbsoluteValue, OpAbsoluteValueChecked, OpNegate, OpNegateChecked, OpSign:
-			if exec.GetType[InT]() != exec.GetType[OutT]() {
+			if arrow.GetType[InT]() != arrow.GetType[OutT]() {
 				debug.Assert(false, "not implemented")
 				return nil
 			}
-			typ := exec.GetType[InT]()
+			typ := arrow.GetType[InT]()
 			return ScalarUnary(func(_ *exec.KernelCtx, arg, out []InT) error {
-				arithmeticUnarySSE4(typ, op, exec.GetBytes(arg), exec.GetBytes(out), len(arg))
+				arithmeticUnarySSE4(typ, op, arrow.GetBytes(arg), arrow.GetBytes(out), len(arg))
 				return nil
 			})
 		}
diff --git a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go
index 8e46ca030c8b7..2c1559fe0f0fd 100644
--- a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go
+++ b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go
@@ -19,6 +19,7 @@
 package kernels
 
 import (
+	"github.com/apache/arrow/go/v15/arrow"
 	"github.com/apache/arrow/go/v15/arrow/compute/exec"
 	"golang.org/x/exp/constraints"
 )
@@ -27,6 +28,6 @@ func getArithmeticOpFloating[InT, OutT constraints.Float](op ArithmeticOp) exec.
 	return getGoArithmeticOpFloating[InT, OutT](op)
 }
 
-func getArithmeticOpIntegral[InT, OutT exec.UintTypes | exec.IntTypes](op ArithmeticOp) exec.ArrayKernelExec {
+func getArithmeticOpIntegral[InT, OutT arrow.UintType | arrow.IntType](op ArithmeticOp) exec.ArrayKernelExec {
 	return getGoArithmeticOpIntegral[InT, OutT](op)
 }
diff --git a/go/arrow/compute/internal/kernels/boolean_cast.go b/go/arrow/compute/internal/kernels/boolean_cast.go
index 923c5b3f54512..6109d25790940 100644
--- a/go/arrow/compute/internal/kernels/boolean_cast.go
+++ b/go/arrow/compute/internal/kernels/boolean_cast.go
@@ -27,7 +27,7 @@ import (
 	"github.com/apache/arrow/go/v15/arrow/compute/exec"
 )
 
-func isNonZero[T exec.FixedWidthTypes](ctx *exec.KernelCtx, in []T, out []byte) error {
+func isNonZero[T arrow.FixedWidthType](ctx *exec.KernelCtx, in []T, out []byte) error {
 	var zero T
 	for i, v := range in {
 		bitutil.SetBitTo(out, i, v != zero)
diff --git a/go/arrow/compute/internal/kernels/helpers.go b/go/arrow/compute/internal/kernels/helpers.go
index 686c4b3e0c29a..1ac09ba43bfb5 100644
--- a/go/arrow/compute/internal/kernels/helpers.go
+++ b/go/arrow/compute/internal/kernels/helpers.go
@@ -37,9 +37,9 @@ import (
 // which will receive a slice containing the raw input data along with
 // a slice to populate for the output data.
 //
-// Note that bool is not included in exec.FixedWidthTypes since it is
+// Note that bool is not included in arrow.FixedWidthType since it is
 // represented as a bitmap, not as a slice of bool.
-func ScalarUnary[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, []Arg0T, []OutT) error) exec.ArrayKernelExec {
+func ScalarUnary[OutT, Arg0T arrow.FixedWidthType](op func(*exec.KernelCtx, []Arg0T, []OutT) error) exec.ArrayKernelExec {
 	return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error {
 		arg0 := in.Values[0].Array
 		inData := exec.GetSpanValues[Arg0T](&arg0, 1)
@@ -51,7 +51,7 @@ func ScalarUnary[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, []Ar
 // ScalarUnaryNotNull is for generating a kernel to operate only on the
 // non-null values in the input array. The zerovalue of the output type
 // is used for any null input values.
-func ScalarUnaryNotNull[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, Arg0T, *error) OutT) exec.ArrayKernelExec {
+func ScalarUnaryNotNull[OutT, Arg0T arrow.FixedWidthType](op func(*exec.KernelCtx, Arg0T, *error) OutT) exec.ArrayKernelExec {
 	return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error {
 		var (
 			arg0     = &in.Values[0].Array
@@ -78,7 +78,7 @@ func ScalarUnaryNotNull[OutT, Arg0T exec.FixedWidthTypes](op func(*exec.KernelCt
 // ScalarUnaryBoolOutput is like ScalarUnary only it is for cases of boolean
 // output. The function should take in a slice of the input type and a slice
 // of bytes to fill with the output boolean bitmap.
-func ScalarUnaryBoolOutput[Arg0T exec.FixedWidthTypes](op func(*exec.KernelCtx, []Arg0T, []byte) error) exec.ArrayKernelExec {
+func ScalarUnaryBoolOutput[Arg0T arrow.FixedWidthType](op func(*exec.KernelCtx, []Arg0T, []byte) error) exec.ArrayKernelExec {
 	return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error {
 		arg0 := in.Values[0].Array
 		inData := exec.GetSpanValues[Arg0T](&arg0, 1)
@@ -127,7 +127,7 @@ func ScalarUnaryNotNullBinaryArgBoolOut[OffsetT int32 | int64](defVal bool, op f
 // It implements the handling to iterate the offsets and values calling
 // the provided function on each byte slice. The zero value of the OutT
 // will be used as the output for elements of the input that are null.
-func ScalarUnaryNotNullBinaryArg[OutT exec.FixedWidthTypes, OffsetT int32 | int64](op func(*exec.KernelCtx, []byte, *error) OutT) exec.ArrayKernelExec {
+func ScalarUnaryNotNullBinaryArg[OutT arrow.FixedWidthType, OffsetT int32 | int64](op func(*exec.KernelCtx, []byte, *error) OutT) exec.ArrayKernelExec {
 	return func(ctx *exec.KernelCtx, in *exec.ExecSpan, out *exec.ExecResult) error {
 		var (
 			arg0        = &in.Values[0].Array
@@ -156,14 +156,14 @@ func ScalarUnaryNotNullBinaryArg[OutT exec.FixedWidthTypes, OffsetT int32 | int6
 // ScalarUnaryBoolArg is like ScalarUnary except it specifically expects a
 // function that takes a byte slice since booleans arrays are represented
 // as a bitmap.
-func ScalarUnaryBoolArg[OutT exec.FixedWidthTypes](op func(*exec.KernelCtx, []byte, []OutT) error) exec.ArrayKernelExec {
+func ScalarUnaryBoolArg[OutT arrow.FixedWidthType](op func(*exec.KernelCtx, []byte, []OutT) error) exec.ArrayKernelExec {
 	return func(ctx *exec.KernelCtx, input *exec.ExecSpan, out *exec.ExecResult) error {
 		outData := exec.GetSpanValues[OutT](out, 1)
 		return op(ctx, input.Values[0].Array.Buffers[1].Buf, outData)
 	}
 }
 
-func UnboxScalar[T exec.FixedWidthTypes](val scalar.PrimitiveScalar) T {
+func UnboxScalar[T arrow.FixedWidthType](val scalar.PrimitiveScalar) T {
 	return *(*T)(unsafe.Pointer(&val.Data()[0]))
 }
 
@@ -174,11 +174,11 @@ func UnboxBinaryScalar(val scalar.BinaryScalar) []byte {
 	return val.Data()
 }
 
-type arrArrFn[OutT, Arg0T, Arg1T exec.FixedWidthTypes] func(*exec.KernelCtx, []Arg0T, []Arg1T, []OutT) error
-type arrScalarFn[OutT, Arg0T, Arg1T exec.FixedWidthTypes] func(*exec.KernelCtx, []Arg0T, Arg1T, []OutT) error
-type scalarArrFn[OutT, Arg0T, Arg1T exec.FixedWidthTypes] func(*exec.KernelCtx, Arg0T, []Arg1T, []OutT) error
+type arrArrFn[OutT, Arg0T, Arg1T arrow.FixedWidthType] func(*exec.KernelCtx, []Arg0T, []Arg1T, []OutT) error
+type arrScalarFn[OutT, Arg0T, Arg1T arrow.FixedWidthType] func(*exec.KernelCtx, []Arg0T, Arg1T, []OutT) error
+type scalarArrFn[OutT, Arg0T, Arg1T arrow.FixedWidthType] func(*exec.KernelCtx, Arg0T, []Arg1T, []OutT) error
 
-type binaryOps[OutT, Arg0T, Arg1T exec.FixedWidthTypes] struct {
+type binaryOps[OutT, Arg0T, Arg1T arrow.FixedWidthType] struct {
 	arrArr    arrArrFn[OutT, Arg0T, Arg1T]
 	arrScalar arrScalarFn[OutT, Arg0T, Arg1T]
 	scalarArr scalarArrFn[OutT, Arg0T, Arg1T]
@@ -190,7 +190,7 @@ type binaryBoolOps struct {
 	scalarArr func(ctx *exec.KernelCtx, lhs bool, rhs, out bitutil.Bitmap) error
 }
 
-func ScalarBinary[OutT, Arg0T, Arg1T exec.FixedWidthTypes](ops binaryOps[OutT, Arg0T, Arg1T]) exec.ArrayKernelExec {
+func ScalarBinary[OutT, Arg0T, Arg1T arrow.FixedWidthType](ops binaryOps[OutT, Arg0T, Arg1T]) exec.ArrayKernelExec {
 	arrayArray := func(ctx *exec.KernelCtx, arg0, arg1 *exec.ArraySpan, out *exec.ExecResult) error {
 		var (
 			a0      = exec.GetSpanValues[Arg0T](arg0, 1)
@@ -281,7 +281,7 @@ func ScalarBinaryBools(ops *binaryBoolOps) exec.ArrayKernelExec {
 	}
 }
 
-func ScalarBinaryNotNull[OutT, Arg0T, Arg1T exec.FixedWidthTypes](op func(*exec.KernelCtx, Arg0T, Arg1T, *error) OutT) exec.ArrayKernelExec {
+func ScalarBinaryNotNull[OutT, Arg0T, Arg1T arrow.FixedWidthType](op func(*exec.KernelCtx, Arg0T, Arg1T, *error) OutT) exec.ArrayKernelExec {
 	arrayArray := func(ctx *exec.KernelCtx, arg0, arg1 *exec.ArraySpan, out *exec.ExecResult) (err error) {
 		// fast path if one side is entirely null
 		if arg0.UpdateNullCount() == arg0.Len || arg1.UpdateNullCount() == arg1.Len {
@@ -379,7 +379,7 @@ func ScalarBinaryNotNull[OutT, Arg0T, Arg1T exec.FixedWidthTypes](op func(*exec.
 	}
 }
 
-type binaryBinOp[T exec.FixedWidthTypes | bool] func(ctx *exec.KernelCtx, arg0, arg1 []byte) T
+type binaryBinOp[T arrow.FixedWidthType | bool] func(ctx *exec.KernelCtx, arg0, arg1 []byte) T
 
 func ScalarBinaryBinaryArgsBoolOut(itrFn func(*exec.ArraySpan) exec.ArrayIter[[]byte], op binaryBinOp[bool]) exec.ArrayKernelExec {
 	arrArr := func(ctx *exec.KernelCtx, arg0, arg1 *exec.ArraySpan, out *exec.ExecResult) error {
@@ -577,7 +577,7 @@ func intsCanFit(data *exec.ArraySpan, target arrow.Type) error {
 	}
 }
 
-func intsInRange[T exec.IntTypes | exec.UintTypes](data *exec.ArraySpan, lowerBound, upperBound T) error {
+func intsInRange[T arrow.IntType | arrow.UintType](data *exec.ArraySpan, lowerBound, upperBound T) error {
 	if MinOf[T]() >= lowerBound && MaxOf[T]() <= upperBound {
 		return nil
 	}
@@ -653,7 +653,7 @@ func intsInRange[T exec.IntTypes | exec.UintTypes](data *exec.ArraySpan, lowerBo
 }
 
 type numeric interface {
-	exec.IntTypes | exec.UintTypes | constraints.Float
+	arrow.IntType | arrow.UintType | constraints.Float
 }
 
 func memCpySpan[T numeric](in, out *exec.ArraySpan) {
@@ -883,12 +883,12 @@ func (bldr *execBufBuilder) finish() (buf *memory.Buffer) {
 	return
 }
 
-type bufferBuilder[T exec.FixedWidthTypes] struct {
+type bufferBuilder[T arrow.FixedWidthType] struct {
 	execBufBuilder
 	zero T
 }
 
-func newBufferBuilder[T exec.FixedWidthTypes](mem memory.Allocator) *bufferBuilder[T] {
+func newBufferBuilder[T arrow.FixedWidthType](mem memory.Allocator) *bufferBuilder[T] {
 	return &bufferBuilder[T]{
 		execBufBuilder: execBufBuilder{
 			mem: mem,
@@ -901,11 +901,11 @@ func (b *bufferBuilder[T]) reserve(additional int) {
 }
 
 func (b *bufferBuilder[T]) unsafeAppend(value T) {
-	b.execBufBuilder.unsafeAppend(exec.GetBytes([]T{value}))
+	b.execBufBuilder.unsafeAppend(arrow.GetBytes([]T{value}))
 }
 
 func (b *bufferBuilder[T]) unsafeAppendSlice(values []T) {
-	b.execBufBuilder.unsafeAppend(exec.GetBytes(values))
+	b.execBufBuilder.unsafeAppend(arrow.GetBytes(values))
 }
 
 func (b *bufferBuilder[T]) len() int { return b.sz / int(unsafe.Sizeof(b.zero)) }
@@ -914,7 +914,7 @@ func (b *bufferBuilder[T]) cap() int {
 	return cap(b.data) / int(unsafe.Sizeof(b.zero))
 }
 
-func checkIndexBoundsImpl[T exec.IntTypes | exec.UintTypes](values *exec.ArraySpan, upperLimit uint64) error {
+func checkIndexBoundsImpl[T arrow.IntType | arrow.UintType](values *exec.ArraySpan, upperLimit uint64) error {
 	// for unsigned integers, if the values array is larger
 	// than the maximum index value, then there's no need to bounds check
 	isSigned := !arrow.IsUnsignedInteger(values.Type.ID())
diff --git a/go/arrow/compute/internal/kernels/numeric_cast.go b/go/arrow/compute/internal/kernels/numeric_cast.go
index c055552bf7ff5..d31edfdd3087c 100644
--- a/go/arrow/compute/internal/kernels/numeric_cast.go
+++ b/go/arrow/compute/internal/kernels/numeric_cast.go
@@ -69,13 +69,13 @@ func CastIntegerToFloating(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.
 	return nil
 }
 
-type decimal[T exec.DecimalTypes] interface {
+type decimal[T decimal128.Num | decimal256.Num] interface {
 	Less(T) bool
 	GreaterEqual(T) bool
 	LowBits() uint64
 }
 
-func decimalToIntImpl[InT exec.DecimalTypes, OutT exec.IntTypes | exec.UintTypes](allowOverflow bool, min, max InT, v decimal[InT], err *error) OutT {
+func decimalToIntImpl[InT decimal128.Num | decimal256.Num, OutT arrow.IntType | arrow.UintType](allowOverflow bool, min, max InT, v decimal[InT], err *error) OutT {
 	if !allowOverflow && (v.Less(min) || v.GreaterEqual(max)) {
 		debug.Log("integer value out of bounds from decimal")
 		*err = fmt.Errorf("%w: integer value out of bounds", arrow.ErrInvalid)
@@ -84,7 +84,7 @@ func decimalToIntImpl[InT exec.DecimalTypes, OutT exec.IntTypes | exec.UintTypes
 	return OutT(v.LowBits())
 }
 
-func CastDecimal256ToInteger[T exec.IntTypes | exec.UintTypes](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
+func CastDecimal256ToInteger[T arrow.IntType | arrow.UintType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
 	var (
 		opts       = ctx.State.(CastState)
 		inputType  = batch.Values[0].Type().(*arrow.Decimal256Type)
@@ -125,7 +125,7 @@ func CastDecimal256ToInteger[T exec.IntTypes | exec.UintTypes](ctx *exec.KernelC
 	return ex(ctx, batch, out)
 }
 
-func CastDecimal128ToInteger[T exec.IntTypes | exec.UintTypes](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
+func CastDecimal128ToInteger[T arrow.IntType | arrow.UintType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
 	var (
 		opts       = ctx.State.(CastState)
 		inputType  = batch.Values[0].Type().(*arrow.Decimal128Type)
@@ -166,7 +166,7 @@ func CastDecimal128ToInteger[T exec.IntTypes | exec.UintTypes](ctx *exec.KernelC
 	return ex(ctx, batch, out)
 }
 
-func integerToDecimal128[T exec.IntTypes | exec.UintTypes](inType arrow.Type, outScale int32) exec.ArrayKernelExec {
+func integerToDecimal128[T arrow.IntType | arrow.UintType](inType arrow.Type, outScale int32) exec.ArrayKernelExec {
 	var getDecimal func(v T) decimal128.Num
 	switch inType {
 	case arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64:
@@ -183,7 +183,7 @@ func integerToDecimal128[T exec.IntTypes | exec.UintTypes](inType arrow.Type, ou
 	})
 }
 
-func integerToDecimal256[T exec.IntTypes | exec.UintTypes](inType arrow.Type, outScale int32) exec.ArrayKernelExec {
+func integerToDecimal256[T arrow.IntType | arrow.UintType](inType arrow.Type, outScale int32) exec.ArrayKernelExec {
 	var getDecimal func(v T) decimal256.Num
 	switch inType {
 	case arrow.UINT8, arrow.UINT16, arrow.UINT32, arrow.UINT64:
@@ -200,7 +200,7 @@ func integerToDecimal256[T exec.IntTypes | exec.UintTypes](inType arrow.Type, ou
 	})
 }
 
-func CastIntegerToDecimal[OutT exec.DecimalTypes, Arg0 exec.IntTypes | exec.UintTypes](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
+func CastIntegerToDecimal[OutT decimal128.Num | decimal256.Num, Arg0 arrow.IntType | arrow.UintType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
 	var (
 		precision, scale int32
 		executor         exec.ArrayKernelExec
@@ -234,7 +234,7 @@ func CastIntegerToDecimal[OutT exec.DecimalTypes, Arg0 exec.IntTypes | exec.Uint
 	return executor(ctx, batch, out)
 }
 
-func getCastIntToDecimal[T exec.DecimalTypes](inType arrow.Type) exec.ArrayKernelExec {
+func getCastIntToDecimal[T decimal128.Num | decimal256.Num](inType arrow.Type) exec.ArrayKernelExec {
 	switch inType {
 	case arrow.UINT8:
 		return CastIntegerToDecimal[T, uint8]
@@ -543,7 +543,7 @@ func boolToNum[T numeric](_ *exec.KernelCtx, in []byte, out []T) error {
 	return nil
 }
 
-func checkFloatTrunc[InT constraints.Float, OutT exec.IntTypes | exec.UintTypes](in, out *exec.ArraySpan) error {
+func checkFloatTrunc[InT constraints.Float, OutT arrow.IntType | arrow.UintType](in, out *exec.ArraySpan) error {
 	wasTrunc := func(out OutT, in InT) bool {
 		return InT(out) != in
 	}
@@ -665,7 +665,7 @@ func checkIntToFloatTrunc(in *exec.ArraySpan, outType arrow.Type) error {
 	return nil
 }
 
-func parseStringToNumberImpl[T exec.IntTypes | exec.UintTypes | exec.FloatTypes, OffsetT int32 | int64](parseFn func(string) (T, error)) exec.ArrayKernelExec {
+func parseStringToNumberImpl[T arrow.IntType | arrow.UintType | arrow.FloatType, OffsetT int32 | int64](parseFn func(string) (T, error)) exec.ArrayKernelExec {
 	return ScalarUnaryNotNullBinaryArg[T, OffsetT](func(_ *exec.KernelCtx, in []byte, err *error) T {
 		st := *(*string)(unsafe.Pointer(&in))
 		v, e := parseFn(st)
@@ -749,7 +749,7 @@ func addCommonNumberCasts[T numeric](outTy arrow.DataType, kernels []exec.Scalar
 	return kernels
 }
 
-func GetCastToInteger[T exec.IntTypes | exec.UintTypes](outType arrow.DataType) []exec.ScalarKernel {
+func GetCastToInteger[T arrow.IntType | arrow.UintType](outType arrow.DataType) []exec.ScalarKernel {
 	kernels := make([]exec.ScalarKernel, 0)
 
 	output := exec.NewOutputType(outType)
diff --git a/go/arrow/compute/internal/kernels/scalar_arithmetic.go b/go/arrow/compute/internal/kernels/scalar_arithmetic.go
index cf17e9fd9548b..f1ed21065e404 100644
--- a/go/arrow/compute/internal/kernels/scalar_arithmetic.go
+++ b/go/arrow/compute/internal/kernels/scalar_arithmetic.go
@@ -254,7 +254,7 @@ func GetBitwiseBinaryKernels(op BitwiseOp) []exec.ScalarKernel {
 	return append(kernels, NullExecKernel(2))
 }
 
-func bitwiseNot[T exec.IntTypes | exec.UintTypes](_ *exec.KernelCtx, arg T, _ *error) T {
+func bitwiseNot[T arrow.IntType | arrow.UintType](_ *exec.KernelCtx, arg T, _ *error) T {
 	return ^arg
 }
 
@@ -290,7 +290,7 @@ const (
 	ShiftRight
 )
 
-func shiftKernelSignedImpl[T exec.IntTypes, Unsigned exec.UintTypes](dir ShiftDir, checked bool) exec.ArrayKernelExec {
+func shiftKernelSignedImpl[T arrow.IntType, Unsigned arrow.UintType](dir ShiftDir, checked bool) exec.ArrayKernelExec {
 	errShift := fmt.Errorf("%w: shift amount must be >= 0 and less than precision of type", arrow.ErrInvalid)
 	maxShift := T(8*SizeOf[T]() - 1)
 
@@ -334,7 +334,7 @@ func shiftKernelSignedImpl[T exec.IntTypes, Unsigned exec.UintTypes](dir ShiftDi
 	return nil
 }
 
-func shiftKernelUnsignedImpl[T exec.UintTypes](dir ShiftDir, checked bool) exec.ArrayKernelExec {
+func shiftKernelUnsignedImpl[T arrow.UintType](dir ShiftDir, checked bool) exec.ArrayKernelExec {
 	errShift := fmt.Errorf("%w: shift amount must be >= 0 and less than precision of type", arrow.ErrInvalid)
 	maxShift := T(8 * SizeOf[T]())
 
diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go
index 8e5ce1ab7c1ad..52cd2c31a2aa4 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go
@@ -22,7 +22,6 @@ import (
 	"unsafe"
 
 	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
 	"golang.org/x/sys/cpu"
 )
 
@@ -32,12 +31,12 @@ type cmpfn func(arrow.Type, []byte, []byte, []byte, int64, int)
 
 var comparisonMap map[CompareOperator][3]cmpfn
 
-func genCompareKernel[T exec.NumericTypes](op CompareOperator) *CompareData {
+func genCompareKernel[T arrow.NumericType](op CompareOperator) *CompareData {
 	if pureGo {
 		return genGoCompareKernel(getCmpOp[T](op))
 	}
 
-	ty := exec.GetType[T]()
+	ty := arrow.GetType[T]()
 	byteWidth := int(unsafe.Sizeof(T(0)))
 	comparisonFns := comparisonMap[op]
 	return &CompareData{
diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go
index c0aef5a04e9b8..b36524baa126b 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go
@@ -18,8 +18,8 @@
 
 package kernels
 
-import "github.com/apache/arrow/go/v15/arrow/compute/exec"
+import "github.com/apache/arrow/go/v15/arrow"
 
-func genCompareKernel[T exec.NumericTypes](op CompareOperator) *CompareData {
+func genCompareKernel[T arrow.NumericType](op CompareOperator) *CompareData {
 	return genGoCompareKernel(getCmpOp[T](op))
 }
diff --git a/go/arrow/compute/internal/kernels/scalar_comparisons.go b/go/arrow/compute/internal/kernels/scalar_comparisons.go
index 9a7640a8d8a39..29e6db29cb267 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparisons.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparisons.go
@@ -35,22 +35,22 @@ import (
 
 type binaryKernel func(left, right, out []byte, offset int)
 
-type cmpFn[LeftT, RightT exec.FixedWidthTypes] func([]LeftT, []RightT, []uint32)
-type cmpScalarLeft[LeftT, RightT exec.FixedWidthTypes] func(LeftT, []RightT, []uint32)
-type cmpScalarRight[LeftT, RightT exec.FixedWidthTypes] func([]LeftT, RightT, []uint32)
+type cmpFn[LeftT, RightT arrow.FixedWidthType] func([]LeftT, []RightT, []uint32)
+type cmpScalarLeft[LeftT, RightT arrow.FixedWidthType] func(LeftT, []RightT, []uint32)
+type cmpScalarRight[LeftT, RightT arrow.FixedWidthType] func([]LeftT, RightT, []uint32)
 
-type cmpOp[T exec.FixedWidthTypes] struct {
+type cmpOp[T arrow.FixedWidthType] struct {
 	arrArr    cmpFn[T, T]
 	arrScalar cmpScalarRight[T, T]
 	scalarArr cmpScalarLeft[T, T]
 }
 
-func comparePrimitiveArrayArray[T exec.FixedWidthTypes](op cmpFn[T, T]) binaryKernel {
+func comparePrimitiveArrayArray[T arrow.FixedWidthType](op cmpFn[T, T]) binaryKernel {
 	return func(leftBytes, rightBytes, out []byte, offset int) {
 		const batchSize = 32
 		var (
-			left      = exec.GetData[T](leftBytes)
-			right     = exec.GetData[T](rightBytes)
+			left      = arrow.GetData[T](leftBytes)
+			right     = arrow.GetData[T](rightBytes)
 			nvals     = len(left)
 			nbatches  = nvals / batchSize
 			tmpOutput [batchSize]uint32
@@ -83,11 +83,11 @@ func comparePrimitiveArrayArray[T exec.FixedWidthTypes](op cmpFn[T, T]) binaryKe
 	}
 }
 
-func comparePrimitiveArrayScalar[T exec.FixedWidthTypes](op cmpScalarRight[T, T]) binaryKernel {
+func comparePrimitiveArrayScalar[T arrow.FixedWidthType](op cmpScalarRight[T, T]) binaryKernel {
 	return func(leftBytes, rightBytes, out []byte, offset int) {
 		const batchSize = 32
 		var (
-			left      = exec.GetData[T](leftBytes)
+			left      = arrow.GetData[T](leftBytes)
 			rightVal  = *(*T)(unsafe.Pointer(&rightBytes[0]))
 			nvals     = len(left)
 			nbatches  = nvals / batchSize
@@ -121,12 +121,12 @@ func comparePrimitiveArrayScalar[T exec.FixedWidthTypes](op cmpScalarRight[T, T]
 	}
 }
 
-func comparePrimitiveScalarArray[T exec.FixedWidthTypes](op cmpScalarLeft[T, T]) binaryKernel {
+func comparePrimitiveScalarArray[T arrow.FixedWidthType](op cmpScalarLeft[T, T]) binaryKernel {
 	return func(leftBytes, rightBytes, out []byte, offset int) {
 		const batchSize = 32
 		var (
 			leftVal = *(*T)(unsafe.Pointer(&leftBytes[0]))
-			right   = exec.GetData[T](rightBytes)
+			right   = arrow.GetData[T](rightBytes)
 
 			nvals     = len(right)
 			nbatches  = nvals / batchSize
@@ -181,7 +181,7 @@ func getOffsetSpanBytes(span *exec.ArraySpan) []byte {
 	return buf[start : start+(span.Len*byteWidth)]
 }
 
-func compareKernel[T exec.FixedWidthTypes](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
+func compareKernel[T arrow.FixedWidthType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
 	kn := ctx.Kernel.(*exec.ScalarKernel)
 	knData := kn.Data.(CompareFuncData).Funcs()
 
@@ -202,7 +202,7 @@ func compareKernel[T exec.FixedWidthTypes](ctx *exec.KernelCtx, batch *exec.Exec
 	return nil
 }
 
-func genGoCompareKernel[T exec.FixedWidthTypes](op *cmpOp[T]) *CompareData {
+func genGoCompareKernel[T arrow.FixedWidthType](op *cmpOp[T]) *CompareData {
 	return &CompareData{
 		funcAA: comparePrimitiveArrayArray(op.arrArr),
 		funcAS: comparePrimitiveArrayScalar(op.arrScalar),
@@ -376,7 +376,7 @@ func genDecimalCompareKernel[T decimal128.Num | decimal256.Num](op CompareOperat
 	return
 }
 
-func getCmpOp[T exec.NumericTypes](op CompareOperator) *cmpOp[T] {
+func getCmpOp[T arrow.NumericType](op CompareOperator) *cmpOp[T] {
 	switch op {
 	case CmpEQ:
 		return &cmpOp[T]{
@@ -524,7 +524,7 @@ func getBinaryCmp(op CompareOperator) binaryBinOp[bool] {
 	return nil
 }
 
-func numericCompareKernel[T exec.NumericTypes](ty exec.InputType, op CompareOperator) (kn exec.ScalarKernel) {
+func numericCompareKernel[T arrow.NumericType](ty exec.InputType, op CompareOperator) (kn exec.ScalarKernel) {
 	ex := compareKernel[T]
 	kn = exec.NewScalarKernelWithSig(&exec.KernelSignature{
 		InputTypes: []exec.InputType{ty, ty},
diff --git a/go/arrow/compute/internal/kernels/string_casts.go b/go/arrow/compute/internal/kernels/string_casts.go
index 76da901e33f8d..d9cf52320b3aa 100644
--- a/go/arrow/compute/internal/kernels/string_casts.go
+++ b/go/arrow/compute/internal/kernels/string_casts.go
@@ -116,7 +116,7 @@ func CastBinaryToBinary[InOffsetsT, OutOffsetsT int32 | int64](ctx *exec.KernelC
 		outOffsets := exec.GetSpanOffsets[OutOffsetsT](out, 1)
 
 		castNumericUnsafe(arrow.INT64, arrow.INT32,
-			exec.GetBytes(inputOffsets), exec.GetBytes(outOffsets), len(inputOffsets))
+			arrow.GetBytes(inputOffsets), arrow.GetBytes(outOffsets), len(inputOffsets))
 		return nil
 	default:
 		// upcast from int32 -> int64
@@ -127,7 +127,7 @@ func CastBinaryToBinary[InOffsetsT, OutOffsetsT int32 | int64](ctx *exec.KernelC
 		outOffsets := exec.GetSpanOffsets[OutOffsetsT](out, 1)
 
 		castNumericUnsafe(arrow.INT32, arrow.INT64,
-			exec.GetBytes(inputOffsets), exec.GetBytes(outOffsets), len(inputOffsets))
+			arrow.GetBytes(inputOffsets), arrow.GetBytes(outOffsets), len(inputOffsets))
 		return nil
 	}
 }
@@ -201,8 +201,8 @@ func GetFsbCastKernels() []exec.ScalarKernel {
 func float16Formatter(v float16.Num) string                 { return v.String() }
 func date32Formatter(v arrow.Date32) string                 { return v.FormattedString() }
 func date64Formatter(v arrow.Date64) string                 { return v.FormattedString() }
-func numericFormatterSigned[T exec.IntTypes](v T) string    { return strconv.FormatInt(int64(v), 10) }
-func numericFormatterUnsigned[T exec.UintTypes](v T) string { return strconv.FormatUint(uint64(v), 10) }
+func numericFormatterSigned[T arrow.IntType](v T) string    { return strconv.FormatInt(int64(v), 10) }
+func numericFormatterUnsigned[T arrow.UintType](v T) string { return strconv.FormatUint(uint64(v), 10) }
 func float32Formatter(v float32) string                     { return strconv.FormatFloat(float64(v), 'g', -1, 32) }
 func float64Formatter(v float64) string                     { return strconv.FormatFloat(v, 'g', -1, 64) }
 
@@ -247,7 +247,7 @@ func timeToStringCastExec[T timeIntrinsic](ctx *exec.KernelCtx, batch *exec.Exec
 	return nil
 }
 
-func numericToStringCastExec[T exec.IntTypes | exec.UintTypes | exec.FloatTypes](formatter func(T) string) exec.ArrayKernelExec {
+func numericToStringCastExec[T arrow.IntType | arrow.UintType | arrow.FloatType](formatter func(T) string) exec.ArrayKernelExec {
 	return func(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
 		var (
 			input     = &batch.Values[0].Array
diff --git a/go/arrow/compute/internal/kernels/vector_hash.go b/go/arrow/compute/internal/kernels/vector_hash.go
index 9401e31cc5b09..f6c9a7f39db93 100644
--- a/go/arrow/compute/internal/kernels/vector_hash.go
+++ b/go/arrow/compute/internal/kernels/vector_hash.go
@@ -178,7 +178,7 @@ func doAppendFixedSize(action Action, memo hashing.MemoTable, arr *exec.ArraySpa
 		})
 }
 
-func doAppendNumeric[T exec.IntTypes | exec.UintTypes | exec.FloatTypes](action Action, memo hashing.MemoTable, arr *exec.ArraySpan) error {
+func doAppendNumeric[T arrow.IntType | arrow.UintType | arrow.FloatType](action Action, memo hashing.MemoTable, arr *exec.ArraySpan) error {
 	arrData := exec.GetSpanValues[T](arr, 1)
 	shouldEncodeNulls := action.ShouldEncodeNulls()
 	return bitutils.VisitBitBlocksShort(arr.Buffers[0].Buf, arr.Offset, arr.Len,
diff --git a/go/arrow/compute/internal/kernels/vector_run_end_encode.go b/go/arrow/compute/internal/kernels/vector_run_end_encode.go
index 076bef1368438..017b9712025b7 100644
--- a/go/arrow/compute/internal/kernels/vector_run_end_encode.go
+++ b/go/arrow/compute/internal/kernels/vector_run_end_encode.go
@@ -46,18 +46,18 @@ type RunEndsType interface {
 	int16 | int32 | int64
 }
 
-func readFixedWidthVal[V exec.FixedWidthTypes](inputValidity, inputValues []byte, offset int64, out *V) bool {
+func readFixedWidthVal[V arrow.FixedWidthType](inputValidity, inputValues []byte, offset int64, out *V) bool {
 	sz := int64(unsafe.Sizeof(*out))
 	*out = *(*V)(unsafe.Pointer(&inputValues[offset*sz]))
 	return bitutil.BitIsSet(inputValidity, int(offset))
 }
 
-func writeFixedWidthVal[V exec.FixedWidthTypes](result *exec.ExecResult, offset int64, valid bool, value V) {
+func writeFixedWidthVal[V arrow.FixedWidthType](result *exec.ExecResult, offset int64, valid bool, value V) {
 	if len(result.Buffers[0].Buf) != 0 {
 		bitutil.SetBitTo(result.Buffers[0].Buf, int(offset), valid)
 	}
 
-	arr := exec.GetData[V](result.Buffers[1].Buf)
+	arr := arrow.GetData[V](result.Buffers[1].Buf)
 	arr[offset] = value
 }
 
@@ -73,7 +73,7 @@ func writeBoolVal(result *exec.ExecResult, offset int64, valid bool, value bool)
 	bitutil.SetBitTo(result.Buffers[1].Buf, int(offset), value)
 }
 
-type runEndEncodeLoopFixedWidth[R RunEndsType, V exec.FixedWidthTypes | bool] struct {
+type runEndEncodeLoopFixedWidth[R RunEndsType, V arrow.FixedWidthType | bool] struct {
 	inputLen, inputOffset int64
 	inputValidity         []byte
 	inputValues           []byte
@@ -84,7 +84,7 @@ type runEndEncodeLoopFixedWidth[R RunEndsType, V exec.FixedWidthTypes | bool] st
 }
 
 func (re *runEndEncodeLoopFixedWidth[R, V]) WriteEncodedRuns(out *exec.ExecResult) int64 {
-	outputRunEnds := exec.GetData[R](out.Children[0].Buffers[1].Buf)
+	outputRunEnds := arrow.GetData[R](out.Children[0].Buffers[1].Buf)
 
 	readOffset := re.inputOffset
 	var currentRun V
@@ -155,7 +155,7 @@ func (re *runEndEncodeLoopFixedWidth[R, V]) PreallocOutput(ctx *exec.KernelCtx,
 		valueBuffer = ctx.Allocate(int(numOutput) * bufSpec.ByteWidth)
 	}
 
-	reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), re.valueType)
+	reeType := arrow.RunEndEncodedOf(arrow.GetDataType[R](), re.valueType)
 	out.Release()
 
 	*out = exec.ExecResult{
@@ -230,7 +230,7 @@ func (re *runEndEncodeFSB[R]) PreallocOutput(ctx *exec.KernelCtx, numOutput int6
 	}
 
 	valueBuffer := ctx.Allocate(re.width * int(numOutput))
-	reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), re.valueType)
+	reeType := arrow.RunEndEncodedOf(arrow.GetDataType[R](), re.valueType)
 	out.Release()
 
 	*out = exec.ExecResult{
@@ -258,7 +258,7 @@ func (re *runEndEncodeFSB[R]) PreallocOutput(ctx *exec.KernelCtx, numOutput int6
 }
 
 func (re *runEndEncodeFSB[R]) WriteEncodedRuns(out *exec.ExecResult) int64 {
-	outputRunEnds := exec.GetData[R](out.Children[0].Buffers[1].Buf)
+	outputRunEnds := arrow.GetData[R](out.Children[0].Buffers[1].Buf)
 	outputValues := out.Children[1].Buffers[1].Buf
 
 	readOffset := re.inputOffset
@@ -362,7 +362,7 @@ func (re *runEndEncodeLoopBinary[R, O]) PreallocOutput(ctx *exec.KernelCtx, numO
 	valueBuffer := ctx.Allocate(int(re.estimatedValuesLen))
 	offsetsBuffer := ctx.Allocate(int(numOutput+1) * int(SizeOf[O]()))
 
-	reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), re.valueType)
+	reeType := arrow.RunEndEncodedOf(arrow.GetDataType[R](), re.valueType)
 	*out = exec.ExecResult{
 		Type:   reeType,
 		Len:    re.inputLen,
@@ -389,7 +389,7 @@ func (re *runEndEncodeLoopBinary[R, O]) PreallocOutput(ctx *exec.KernelCtx, numO
 }
 
 func (re *runEndEncodeLoopBinary[R, O]) WriteEncodedRuns(out *exec.ExecResult) int64 {
-	outputRunEnds := exec.GetData[R](out.Children[0].Buffers[1].Buf)
+	outputRunEnds := arrow.GetData[R](out.Children[0].Buffers[1].Buf)
 	outputOffsets := exec.GetSpanOffsets[O](&out.Children[1], 1)
 	outputValues := out.Children[1].Buffers[2].Buf
 
@@ -443,7 +443,7 @@ func validateRunEndType[R RunEndsType](length int64) error {
 	return nil
 }
 
-func createEncoder[R RunEndsType, V exec.FixedWidthTypes](input *exec.ArraySpan) *runEndEncodeLoopFixedWidth[R, V] {
+func createEncoder[R RunEndsType, V arrow.FixedWidthType](input *exec.ArraySpan) *runEndEncodeLoopFixedWidth[R, V] {
 	return &runEndEncodeLoopFixedWidth[R, V]{
 		inputLen:      input.Len,
 		inputOffset:   input.Offset,
@@ -539,7 +539,7 @@ func runEndEncodeImpl[R RunEndsType](ctx *exec.KernelCtx, batch *exec.ExecSpan,
 	)
 
 	if inputLen == 0 {
-		reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), inputArr.Type)
+		reeType := arrow.RunEndEncodedOf(arrow.GetDataType[R](), inputArr.Type)
 		*out = exec.ExecResult{
 			Type: reeType,
 			Children: []exec.ArraySpan{
diff --git a/go/arrow/compute/internal/kernels/vector_selection.go b/go/arrow/compute/internal/kernels/vector_selection.go
index 714e452325bfd..f08bb4100bf88 100644
--- a/go/arrow/compute/internal/kernels/vector_selection.go
+++ b/go/arrow/compute/internal/kernels/vector_selection.go
@@ -99,12 +99,12 @@ type builder[T any] interface {
 	UnsafeAppendBoolToBitmap(bool)
 }
 
-func getTakeIndices[T exec.IntTypes | exec.UintTypes](mem memory.Allocator, filter *exec.ArraySpan, nullSelect NullSelectionBehavior) arrow.ArrayData {
+func getTakeIndices[T arrow.IntType | arrow.UintType](mem memory.Allocator, filter *exec.ArraySpan, nullSelect NullSelectionBehavior) arrow.ArrayData {
 	var (
 		filterData      = filter.Buffers[1].Buf
 		haveFilterNulls = filter.MayHaveNulls()
 		filterIsValid   = filter.Buffers[0].Buf
-		idxType         = exec.GetDataType[T]()
+		idxType         = arrow.GetDataType[T]()
 	)
 
 	if haveFilterNulls && nullSelect == EmitNulls {
@@ -394,7 +394,7 @@ func primitiveFilterImpl(wr writeFiltered, values *exec.ArraySpan, filter *exec.
 	}
 }
 
-type filterWriter[T exec.UintTypes] struct {
+type filterWriter[T arrow.UintType] struct {
 	outPosition  int
 	outOffset    int
 	valuesOffset int
@@ -519,7 +519,7 @@ func PrimitiveFilter(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecRe
 	return nil
 }
 
-type primitiveGetter[T exec.IntTypes | bool] interface {
+type primitiveGetter[T arrow.IntType | bool] interface {
 	IsValid(int64) bool
 	GetValue(int64) T
 	NullCount() int64
@@ -542,7 +542,7 @@ func (b *boolGetter) GetValue(i int64) bool {
 func (b *boolGetter) NullCount() int64 { return b.inner.Nulls }
 func (b *boolGetter) Len() int64       { return b.inner.Len }
 
-type primitiveGetterImpl[T exec.IntTypes] struct {
+type primitiveGetterImpl[T arrow.IntType] struct {
 	inner  *exec.ArraySpan
 	values []T
 }
@@ -608,7 +608,7 @@ func (c *chunkedBoolGetter) GetValue(i int64) bool {
 func (c *chunkedBoolGetter) NullCount() int64 { return c.nulls }
 func (c *chunkedBoolGetter) Len() int64       { return c.len }
 
-type chunkedPrimitiveGetter[T exec.IntTypes] struct {
+type chunkedPrimitiveGetter[T arrow.IntType] struct {
 	inner         *arrow.Chunked
 	resolver      *exec.ChunkResolver
 	nulls         int64
@@ -619,7 +619,7 @@ type chunkedPrimitiveGetter[T exec.IntTypes] struct {
 	valuesOffset  []int64
 }
 
-func newChunkedPrimitiveGetter[T exec.IntTypes](arr *arrow.Chunked) *chunkedPrimitiveGetter[T] {
+func newChunkedPrimitiveGetter[T arrow.IntType](arr *arrow.Chunked) *chunkedPrimitiveGetter[T] {
 	nchunks := len(arr.Chunks())
 	lengths := make([]int64, nchunks)
 	valuesData := make([][]T, nchunks)
@@ -630,7 +630,7 @@ func newChunkedPrimitiveGetter[T exec.IntTypes](arr *arrow.Chunked) *chunkedPrim
 		lengths[i] = int64(c.Len())
 		valuesOffset[i] = int64(c.Data().Offset())
 		valuesIsValid[i] = c.NullBitmapBytes()
-		valuesData[i] = exec.GetValues[T](c.Data(), 1)
+		valuesData[i] = arrow.GetValues[T](c.Data(), 1)
 	}
 
 	return &chunkedPrimitiveGetter[T]{
@@ -662,7 +662,7 @@ func (c *chunkedPrimitiveGetter[T]) GetValue(i int64) T {
 func (c *chunkedPrimitiveGetter[T]) NullCount() int64 { return c.nulls }
 func (c *chunkedPrimitiveGetter[T]) Len() int64       { return c.len }
 
-func primitiveTakeImpl[IdxT exec.UintTypes, ValT exec.IntTypes](values primitiveGetter[ValT], indices *exec.ArraySpan, out *exec.ExecResult) {
+func primitiveTakeImpl[IdxT arrow.UintType, ValT arrow.IntType](values primitiveGetter[ValT], indices *exec.ArraySpan, out *exec.ExecResult) {
 	var (
 		indicesData    = exec.GetSpanValues[IdxT](indices, 1)
 		indicesIsValid = indices.Buffers[0].Buf
@@ -747,7 +747,7 @@ func primitiveTakeImpl[IdxT exec.UintTypes, ValT exec.IntTypes](values primitive
 	out.Nulls = out.Len - validCount
 }
 
-func booleanTakeImpl[IdxT exec.UintTypes](values primitiveGetter[bool], indices *exec.ArraySpan, out *exec.ExecResult) {
+func booleanTakeImpl[IdxT arrow.UintType](values primitiveGetter[bool], indices *exec.ArraySpan, out *exec.ExecResult) {
 	var (
 		indicesData    = exec.GetSpanValues[IdxT](indices, 1)
 		indicesIsValid = indices.Buffers[0].Buf
@@ -876,7 +876,7 @@ func booleanTakeDispatch(values, indices *exec.ArraySpan, out *exec.ExecResult)
 	return nil
 }
 
-func takeIdxChunkedDispatch[ValT exec.IntTypes](values, indices *arrow.Chunked, out []*exec.ExecResult) error {
+func takeIdxChunkedDispatch[ValT arrow.IntType](values, indices *arrow.Chunked, out []*exec.ExecResult) error {
 	getter := newChunkedPrimitiveGetter[ValT](values)
 	var fn func(primitiveGetter[ValT], *exec.ArraySpan, *exec.ExecResult)
 
@@ -901,7 +901,7 @@ func takeIdxChunkedDispatch[ValT exec.IntTypes](values, indices *arrow.Chunked,
 	return nil
 }
 
-func takeIdxDispatch[ValT exec.IntTypes](values, indices *exec.ArraySpan, out *exec.ExecResult) error {
+func takeIdxDispatch[ValT arrow.IntType](values, indices *exec.ArraySpan, out *exec.ExecResult) error {
 	getter := &primitiveGetterImpl[ValT]{inner: values, values: exec.GetSpanValues[ValT](values, 1)}
 
 	switch indices.Type.(arrow.FixedWidthDataType).Bytes() {
@@ -1368,7 +1368,7 @@ func binaryFilterImpl[OffsetT int32 | int64](ctx *exec.KernelCtx, values, filter
 	return nil
 }
 
-func takeExecImpl[T exec.UintTypes](ctx *exec.KernelCtx, outputLen int64, values, indices *exec.ArraySpan, out *exec.ExecResult, visitValid func(int64) error, visitNull func() error) error {
+func takeExecImpl[T arrow.UintType](ctx *exec.KernelCtx, outputLen int64, values, indices *exec.ArraySpan, out *exec.ExecResult, visitValid func(int64) error, visitNull func() error) error {
 	var (
 		validityBuilder = validityBuilder{mem: exec.GetAllocator(ctx.Ctx)}
 		indicesValues   = exec.GetSpanValues[T](indices, 1)
@@ -1600,7 +1600,7 @@ func ListImpl[OffsetT int32 | int64](ctx *exec.KernelCtx, batch *exec.ExecSpan,
 	out.Buffers[1].WrapBuffer(offsetBuilder.finish())
 
 	out.Children = make([]exec.ArraySpan, 1)
-	out.Children[0].Type = exec.GetDataType[OffsetT]()
+	out.Children[0].Type = arrow.GetDataType[OffsetT]()
 	out.Children[0].Len = int64(childIdxBuilder.len())
 	out.Children[0].Buffers[1].WrapBuffer(childIdxBuilder.finish())
 
diff --git a/go/arrow/compute/scalar_compare_test.go b/go/arrow/compute/scalar_compare_test.go
index d209f72c800b0..1fa0591692ecb 100644
--- a/go/arrow/compute/scalar_compare_test.go
+++ b/go/arrow/compute/scalar_compare_test.go
@@ -89,7 +89,7 @@ func (c *CompareSuite) validateCompareScalarArr(op kernels.CompareOperator, dt a
 	c.validateCompareDatum(op, lhs, &compute.ArrayDatum{rhs.Data()}, &compute.ArrayDatum{exp.Data()})
 }
 
-func slowCompare[T exec.NumericTypes | string](op kernels.CompareOperator, lhs, rhs T) bool {
+func slowCompare[T arrow.NumericType | string](op kernels.CompareOperator, lhs, rhs T) bool {
 	switch op {
 	case kernels.CmpEQ:
 		return lhs == rhs
@@ -108,7 +108,7 @@ func slowCompare[T exec.NumericTypes | string](op kernels.CompareOperator, lhs,
 	}
 }
 
-// func simpleScalarArrayCompare[T exec.NumericTypes](mem memory.Allocator, op kernels.CompareOperator, lhs, rhs compute.Datum) compute.Datum {
+// func simpleScalarArrayCompare[T arrow.NumericType](mem memory.Allocator, op kernels.CompareOperator, lhs, rhs compute.Datum) compute.Datum {
 // 	var (
 // 		swap  = lhs.Kind() == compute.KindArray
 // 		span  exec.ArraySpan
@@ -230,7 +230,7 @@ type valuer[T any] interface {
 	Value(int) T
 }
 
-func simpleArrArrCompare[T exec.NumericTypes | string](mem memory.Allocator, op kernels.CompareOperator, lhs, rhs compute.Datum) compute.Datum {
+func simpleArrArrCompare[T arrow.NumericType | string](mem memory.Allocator, op kernels.CompareOperator, lhs, rhs compute.Datum) compute.Datum {
 	var (
 		lArr   = lhs.(*compute.ArrayDatum).MakeArray()
 		rArr   = rhs.(*compute.ArrayDatum).MakeArray()
@@ -263,7 +263,7 @@ func simpleArrArrCompare[T exec.NumericTypes | string](mem memory.Allocator, op
 	return compute.NewDatum(result)
 }
 
-type NumericCompareSuite[T exec.NumericTypes] struct {
+type NumericCompareSuite[T arrow.NumericType] struct {
 	CompareSuite
 }
 
@@ -282,7 +282,7 @@ type NumericCompareSuite[T exec.NumericTypes] struct {
 // }
 
 func (n *NumericCompareSuite[T]) TestSimpleCompareArrayScalar() {
-	dt := exec.GetDataType[T]()
+	dt := arrow.GetDataType[T]()
 	one := compute.NewDatum(scalar.MakeScalar(T(1)))
 
 	n.Run(dt.String(), func() {
@@ -361,7 +361,7 @@ func (n *NumericCompareSuite[T]) TestSimpleCompareArrayScalar() {
 }
 
 func (n *NumericCompareSuite[T]) TestSimpleCompareScalarArray() {
-	dt := exec.GetDataType[T]()
+	dt := arrow.GetDataType[T]()
 	one := compute.NewDatum(scalar.MakeScalar(T(1)))
 
 	n.Run(dt.String(), func() {
@@ -440,7 +440,7 @@ func (n *NumericCompareSuite[T]) TestSimpleCompareScalarArray() {
 }
 
 func (n *NumericCompareSuite[T]) TestNullScalar() {
-	dt := exec.GetDataType[T]()
+	dt := arrow.GetDataType[T]()
 	null := compute.NewDatum(scalar.MakeNullScalar(dt))
 
 	n.Run(dt.String(), func() {
@@ -453,7 +453,7 @@ func (n *NumericCompareSuite[T]) TestNullScalar() {
 }
 
 func (n *NumericCompareSuite[T]) TestSimpleCompareArrArr() {
-	dt := exec.GetDataType[T]()
+	dt := arrow.GetDataType[T]()
 
 	n.Run(dt.String(), func() {
 		n.validateCompare(kernels.CmpEQ, dt, `[]`, `[]`, `[]`)
diff --git a/go/arrow/compute/vector_hash_test.go b/go/arrow/compute/vector_hash_test.go
index 9410720de7941..c37db584805d0 100644
--- a/go/arrow/compute/vector_hash_test.go
+++ b/go/arrow/compute/vector_hash_test.go
@@ -26,7 +26,6 @@ import (
 	"github.com/apache/arrow/go/v15/arrow"
 	"github.com/apache/arrow/go/v15/arrow/array"
 	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
 	"github.com/apache/arrow/go/v15/arrow/decimal128"
 	"github.com/apache/arrow/go/v15/arrow/decimal256"
 	"github.com/apache/arrow/go/v15/arrow/memory"
@@ -36,7 +35,7 @@ import (
 	"golang.org/x/exp/constraints"
 )
 
-func checkUniqueDict[I exec.IntTypes | exec.UintTypes](t *testing.T, input compute.ArrayLikeDatum, expected arrow.Array) {
+func checkUniqueDict[I arrow.IntType | arrow.UintType](t *testing.T, input compute.ArrayLikeDatum, expected arrow.Array) {
 	out, err := compute.Unique(context.TODO(), input)
 	require.NoError(t, err)
 	defer out.Release()
@@ -52,8 +51,8 @@ func checkUniqueDict[I exec.IntTypes | exec.UintTypes](t *testing.T, input compu
 
 	require.Truef(t, array.Equal(exDict, resultDict), "wanted: %s\ngot: %s", exDict, resultDict)
 
-	want := exec.GetValues[I](expected.(*array.Dictionary).Indices().Data(), 1)
-	got := exec.GetValues[I](result.Indices().Data(), 1)
+	want := arrow.GetValues[I](expected.(*array.Dictionary).Indices().Data(), 1)
+	got := arrow.GetValues[I](result.Indices().Data(), 1)
 	assert.ElementsMatchf(t, got, want, "wanted: %s\ngot: %s", want, got)
 }
 
@@ -81,15 +80,15 @@ func checkDictionaryUnique(t *testing.T, input compute.ArrayLikeDatum, expected
 	}
 }
 
-func checkUniqueFixedWidth[T exec.FixedWidthTypes](t *testing.T, input, expected arrow.Array) {
+func checkUniqueFixedWidth[T arrow.FixedWidthType](t *testing.T, input, expected arrow.Array) {
 	result, err := compute.UniqueArray(context.TODO(), input)
 	require.NoError(t, err)
 	defer result.Release()
 
 	require.Truef(t, arrow.TypeEqual(result.DataType(), expected.DataType()),
 		"wanted: %s\ngot: %s", expected.DataType(), result.DataType())
-	want := exec.GetValues[T](expected.Data(), 1)
-	got := exec.GetValues[T](expected.Data(), 1)
+	want := arrow.GetValues[T](expected.Data(), 1)
+	got := arrow.GetValues[T](expected.Data(), 1)
 
 	assert.ElementsMatchf(t, got, want, "wanted: %s\ngot: %s", want, got)
 }
@@ -106,7 +105,7 @@ func checkUniqueVariableWidth[OffsetType int32 | int64](t *testing.T, input, exp
 
 	createSlice := func(v arrow.Array) [][]byte {
 		var (
-			offsets = exec.GetOffsets[OffsetType](v.Data(), 1)
+			offsets = arrow.GetOffsets[OffsetType](v.Data(), 1)
 			data    = v.Data().Buffers()[2].Bytes()
 			out     = make([][]byte, v.Len())
 		)
@@ -124,7 +123,7 @@ func checkUniqueVariableWidth[OffsetType int32 | int64](t *testing.T, input, exp
 }
 
 type ArrowType interface {
-	exec.FixedWidthTypes | string | []byte
+	arrow.FixedWidthType | string | []byte
 }
 
 type builder[T ArrowType] interface {
@@ -166,7 +165,7 @@ func checkUniqueFixedSizeBinary(t *testing.T, mem memory.Allocator, dt *arrow.Fi
 	assert.ElementsMatch(t, want, got)
 }
 
-func checkUniqueFW[T exec.FixedWidthTypes](t *testing.T, mem memory.Allocator, dt arrow.DataType, inValues, outValues []T, inValid, outValid []bool) {
+func checkUniqueFW[T arrow.FixedWidthType](t *testing.T, mem memory.Allocator, dt arrow.DataType, inValues, outValues []T, inValid, outValid []bool) {
 	input := makeArray(mem, dt, inValues, inValid)
 	defer input.Release()
 	expected := makeArray(mem, dt, outValues, outValid)
@@ -189,7 +188,7 @@ func checkUniqueVW[T string | []byte](t *testing.T, mem memory.Allocator, dt arr
 	}
 }
 
-type PrimitiveHashKernelSuite[T exec.IntTypes | exec.UintTypes | constraints.Float] struct {
+type PrimitiveHashKernelSuite[T arrow.IntType | arrow.UintType | constraints.Float] struct {
 	suite.Suite
 
 	mem *memory.CheckedAllocator
@@ -197,7 +196,7 @@ type PrimitiveHashKernelSuite[T exec.IntTypes | exec.UintTypes | constraints.Flo
 }
 
 func (ps *PrimitiveHashKernelSuite[T]) SetupSuite() {
-	ps.dt = exec.GetDataType[T]()
+	ps.dt = arrow.GetDataType[T]()
 }
 
 func (ps *PrimitiveHashKernelSuite[T]) SetupTest() {
diff --git a/go/arrow/compute/vector_selection_test.go b/go/arrow/compute/vector_selection_test.go
index f44840ba72034..4e38bc995cdfc 100644
--- a/go/arrow/compute/vector_selection_test.go
+++ b/go/arrow/compute/vector_selection_test.go
@@ -459,9 +459,9 @@ func (f *FilterKernelNumeric) TestFilterNumeric() {
 	})
 }
 
-type comparator[T exec.NumericTypes] func(a, b T) bool
+type comparator[T arrow.NumericType] func(a, b T) bool
 
-func getComparator[T exec.NumericTypes](op kernels.CompareOperator) comparator[T] {
+func getComparator[T arrow.NumericType](op kernels.CompareOperator) comparator[T] {
 	return []comparator[T]{
 		// EQUAL
 		func(a, b T) bool { return a == b },
@@ -478,7 +478,7 @@ func getComparator[T exec.NumericTypes](op kernels.CompareOperator) comparator[T
 	}[int8(op)]
 }
 
-func compareAndFilterImpl[T exec.NumericTypes](mem memory.Allocator, data []T, fn func(T) bool) arrow.Array {
+func compareAndFilterImpl[T arrow.NumericType](mem memory.Allocator, data []T, fn func(T) bool) arrow.Array {
 	filtered := make([]T, 0, len(data))
 	for _, v := range data {
 		if fn(v) {
@@ -488,12 +488,12 @@ func compareAndFilterImpl[T exec.NumericTypes](mem memory.Allocator, data []T, f
 	return exec.ArrayFromSlice(mem, filtered)
 }
 
-func compareAndFilterValue[T exec.NumericTypes](mem memory.Allocator, data []T, val T, op kernels.CompareOperator) arrow.Array {
+func compareAndFilterValue[T arrow.NumericType](mem memory.Allocator, data []T, val T, op kernels.CompareOperator) arrow.Array {
 	cmp := getComparator[T](op)
 	return compareAndFilterImpl(mem, data, func(e T) bool { return cmp(e, val) })
 }
 
-func compareAndFilterSlice[T exec.NumericTypes](mem memory.Allocator, data, other []T, op kernels.CompareOperator) arrow.Array {
+func compareAndFilterSlice[T arrow.NumericType](mem memory.Allocator, data, other []T, op kernels.CompareOperator) arrow.Array {
 	cmp := getComparator[T](op)
 	i := 0
 	return compareAndFilterImpl(mem, data, func(e T) bool {
@@ -503,7 +503,7 @@ func compareAndFilterSlice[T exec.NumericTypes](mem memory.Allocator, data, othe
 	})
 }
 
-func createFilterImpl[T exec.NumericTypes](mem memory.Allocator, data []T, fn func(T) bool) arrow.Array {
+func createFilterImpl[T arrow.NumericType](mem memory.Allocator, data []T, fn func(T) bool) arrow.Array {
 	bldr := array.NewBooleanBuilder(mem)
 	defer bldr.Release()
 	for _, v := range data {
@@ -512,12 +512,12 @@ func createFilterImpl[T exec.NumericTypes](mem memory.Allocator, data []T, fn fu
 	return bldr.NewArray()
 }
 
-func createFilterValue[T exec.NumericTypes](mem memory.Allocator, data []T, val T, op kernels.CompareOperator) arrow.Array {
+func createFilterValue[T arrow.NumericType](mem memory.Allocator, data []T, val T, op kernels.CompareOperator) arrow.Array {
 	cmp := getComparator[T](op)
 	return createFilterImpl(mem, data, func(e T) bool { return cmp(e, val) })
 }
 
-func createFilterSlice[T exec.NumericTypes](mem memory.Allocator, data, other []T, op kernels.CompareOperator) arrow.Array {
+func createFilterSlice[T arrow.NumericType](mem memory.Allocator, data, other []T, op kernels.CompareOperator) arrow.Array {
 	cmp := getComparator[T](op)
 	i := 0
 	return createFilterImpl(mem, data, func(e T) bool {
@@ -527,8 +527,8 @@ func createFilterSlice[T exec.NumericTypes](mem memory.Allocator, data, other []
 	})
 }
 
-func compareScalarAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem memory.Allocator) {
-	dt := exec.GetDataType[T]()
+func compareScalarAndFilterRandomNumeric[T arrow.NumericType](t *testing.T, mem memory.Allocator) {
+	dt := arrow.GetDataType[T]()
 
 	rng := gen.NewRandomArrayGenerator(randomSeed, mem)
 	t.Run("compare scalar and filter", func(t *testing.T) {
@@ -537,7 +537,7 @@ func compareScalarAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem
 			t.Run(fmt.Sprintf("random %d", length), func(t *testing.T) {
 				arr := rng.Numeric(dt.ID(), length, 0, 100, 0)
 				defer arr.Release()
-				data := exec.GetData[T](arr.Data().Buffers()[1].Bytes())
+				data := arrow.GetData[T](arr.Data().Buffers()[1].Bytes())
 				for _, op := range []kernels.CompareOperator{kernels.CmpEQ, kernels.CmpNE, kernels.CmpGT, kernels.CmpLE} {
 					selection := createFilterValue(mem, data, 50, op)
 					defer selection.Release()
@@ -556,8 +556,8 @@ func compareScalarAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem
 	})
 }
 
-func compareArrayAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem memory.Allocator) {
-	dt := exec.GetDataType[T]()
+func compareArrayAndFilterRandomNumeric[T arrow.NumericType](t *testing.T, mem memory.Allocator) {
+	dt := arrow.GetDataType[T]()
 	rng := gen.NewRandomArrayGenerator(randomSeed, mem)
 	t.Run("compare array and filter", func(t *testing.T) {
 		for i := 3; i < 10; i++ {
@@ -568,8 +568,8 @@ func compareArrayAndFilterRandomNumeric[T exec.NumericTypes](t *testing.T, mem m
 				rhs := rng.Numeric(dt.ID(), length, 0, 100, 0)
 				defer rhs.Release()
 
-				data := exec.GetData[T](lhs.Data().Buffers()[1].Bytes())
-				other := exec.GetData[T](rhs.Data().Buffers()[1].Bytes())
+				data := arrow.GetData[T](lhs.Data().Buffers()[1].Bytes())
+				other := arrow.GetData[T](rhs.Data().Buffers()[1].Bytes())
 				for _, op := range []kernels.CompareOperator{kernels.CmpEQ, kernels.CmpNE, kernels.CmpGT, kernels.CmpLE} {
 					selection := createFilterSlice(mem, data, other, op)
 					defer selection.Release()
diff --git a/go/arrow/flight/doc.go b/go/arrow/flight/doc.go
index 68d1ca3458fd4..c36a808b00e4d 100644
--- a/go/arrow/flight/doc.go
+++ b/go/arrow/flight/doc.go
@@ -74,5 +74,4 @@
 // the main thread reset the timer every time a write operation completes successfully
 // (that means one needs to use to_batches() + write_batch and not write_table).
 
-
 package flight
diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go
index c9c8b390a86d8..3e1da64dcf8c4 100644
--- a/go/arrow/flight/server.go
+++ b/go/arrow/flight/server.go
@@ -42,7 +42,7 @@ type (
 	FlightEndpoint                  = flight.FlightEndpoint
 	Location                        = flight.Location
 	FlightInfo                      = flight.FlightInfo
-	PollInfo                       = flight.PollInfo
+	PollInfo                        = flight.PollInfo
 	FlightData                      = flight.FlightData
 	PutResult                       = flight.PutResult
 	Ticket                          = flight.Ticket
diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go
index 84dc638983298..49f711cdacd76 100644
--- a/go/arrow/internal/arrjson/arrjson.go
+++ b/go/arrow/internal/arrjson/arrjson.go
@@ -826,7 +826,8 @@ type Array struct {
 	Offset   interface{}           `json:"OFFSET,omitempty"`
 	Size     interface{}           `json:"SIZE,omitempty"`
 	Children []Array               `json:"children,omitempty"`
-	Variadic []string              `json:"VARIADIC_BUFFERS,omitempty"`
+	Variadic []string              `json:"VARIADIC_DATA_BUFFERS,omitempty"`
+	Views    []interface{}         `json:"VIEWS,omitempty"`
 }
 
 func (a *Array) MarshalJSON() ([]byte, error) {
@@ -1090,7 +1091,7 @@ func arrayFromJSON(mem memory.Allocator, dt arrow.DataType, arr Array) arrow.Arr
 	case arrow.BinaryViewDataType:
 		valids := validsToBitmap(validsFromJSON(arr.Valids), mem)
 		nulls := arr.Count - bitutil.CountSetBits(valids.Bytes(), 0, arr.Count)
-		headers := stringHeadersFromJSON(mem, !dt.IsUtf8(), arr.Data)
+		headers := stringHeadersFromJSON(mem, !dt.IsUtf8(), arr.Views)
 		extraBufs := variadicBuffersFromJSON(arr.Variadic)
 		defer valids.Release()
 		defer headers.Release()
@@ -1513,7 +1514,7 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array {
 			Name:     field.Name,
 			Count:    arr.Len(),
 			Valids:   validsToJSON(arr),
-			Data:     stringHeadersToJSON(arr, false),
+			Views:    stringHeadersToJSON(arr, false),
 			Variadic: variadic,
 		}
 	case *array.BinaryView:
@@ -1522,7 +1523,7 @@ func arrayToJSON(field arrow.Field, arr arrow.Array) Array {
 			Name:     field.Name,
 			Count:    arr.Len(),
 			Valids:   validsToJSON(arr),
-			Data:     stringHeadersToJSON(arr, true),
+			Views:    stringHeadersToJSON(arr, true),
 			Variadic: variadic,
 		}
 	case *array.List:
@@ -2406,7 +2407,7 @@ func stringHeadersFromJSON(mem memory.Allocator, isBinary bool, data []interface
 			}
 
 			values[i].SetIndexOffset(int32(bufIdx), int32(bufOffset))
-			prefix, err := hex.DecodeString(v["PREFIX"].(string))
+			prefix, err := hex.DecodeString(v["PREFIX_HEX"].(string))
 			if err != nil {
 				panic(err)
 			}
@@ -2426,7 +2427,7 @@ func stringHeadersFromJSON(mem memory.Allocator, isBinary bool, data []interface
 func stringHeadersToJSON(arr array.ViewLike, isBinary bool) []interface{} {
 	type StringHeader struct {
 		Size      int     `json:"SIZE"`
-		Prefix    *string `json:"PREFIX,omitempty"`
+		Prefix    *string `json:"PREFIX_HEX,omitempty"`
 		BufferIdx *int    `json:"BUFFER_INDEX,omitempty"`
 		BufferOff *int    `json:"OFFSET,omitempty"`
 		Inlined   *string `json:"INLINED,omitempty"`
diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go
index 31f3cb238ec16..164210cbc230d 100644
--- a/go/arrow/internal/arrjson/arrjson_test.go
+++ b/go/arrow/internal/arrjson/arrjson_test.go
@@ -6165,7 +6165,7 @@ func makeViewTypesWantJSONs() string {
             1,
             1
           ],
-          "DATA": [
+          "VIEWS": [
             {
               "SIZE": 3,
               "INLINED": "31C3A9"
@@ -6187,7 +6187,7 @@ func makeViewTypesWantJSONs() string {
               "INLINED": "35"
             }
           ],
-          "VARIADIC_BUFFERS": [""]
+          "VARIADIC_DATA_BUFFERS": [""]
         },
         {
           "name": "string_view",
@@ -6199,7 +6199,7 @@ func makeViewTypesWantJSONs() string {
             1,
             1
           ],
-          "DATA": [
+          "VIEWS": [
             {
               "SIZE": 3,
               "INLINED": "1é" 
@@ -6221,7 +6221,7 @@ func makeViewTypesWantJSONs() string {
               "INLINED": "5"
             }
           ],
-          "VARIADIC_BUFFERS": [""]
+          "VARIADIC_DATA_BUFFERS": [""]
         }
       ]
     },
@@ -6238,7 +6238,7 @@ func makeViewTypesWantJSONs() string {
             1,
             1
           ],
-          "DATA": [
+          "VIEWS": [
             {
               "SIZE": 3,
               "INLINED": "31C3A9"
@@ -6260,7 +6260,7 @@ func makeViewTypesWantJSONs() string {
               "INLINED": "35353535"
             }
           ],
-          "VARIADIC_BUFFERS": [""]
+          "VARIADIC_DATA_BUFFERS": [""]
         },
         {
           "name": "string_view",
@@ -6272,20 +6272,20 @@ func makeViewTypesWantJSONs() string {
             1,
             1
           ],
-          "DATA": [
+          "VIEWS": [
             {
               "SIZE": 3,
               "INLINED": "1é"              
             },
             {
               "SIZE": 14,
-              "PREFIX": "32323232",
+              "PREFIX_HEX": "32323232",
               "BUFFER_INDEX": 0,
               "OFFSET": 0
             },
             {
               "SIZE": 14,
-              "PREFIX": "33333333",
+              "PREFIX_HEX": "33333333",
               "BUFFER_INDEX": 0,
               "OFFSET": 14
             },
@@ -6298,7 +6298,7 @@ func makeViewTypesWantJSONs() string {
               "INLINED": "5555"
             }
           ],
-          "VARIADIC_BUFFERS": [
+          "VARIADIC_DATA_BUFFERS": [
             "32323232323232323232323232323333333333333333333333333333"
           ]
         }
@@ -6317,20 +6317,20 @@ func makeViewTypesWantJSONs() string {
             1,
             1
           ],
-          "DATA": [
+          "VIEWS": [
             {
               "SIZE": 6,
               "INLINED": "31C3A931C3A9"
             },
             {
               "SIZE": 14,
-              "PREFIX": "32323232",
+              "PREFIX_HEX": "32323232",
               "BUFFER_INDEX": 0,
               "OFFSET": 0
             },
             {
               "SIZE": 14,
-              "PREFIX": "33333333",
+              "PREFIX_HEX": "33333333",
               "BUFFER_INDEX": 0,
               "OFFSET": 14
             },
@@ -6343,7 +6343,7 @@ func makeViewTypesWantJSONs() string {
               "INLINED": "3535"
             }
           ],
-          "VARIADIC_BUFFERS": [
+          "VARIADIC_DATA_BUFFERS": [
             "32323232323232323232323232323333333333333333333333333333"
           ]
         },
@@ -6357,7 +6357,7 @@ func makeViewTypesWantJSONs() string {
             1,
             1
           ],
-          "DATA": [
+          "VIEWS": [
             {
               "SIZE": 6,
               "INLINED": "1é1é"
@@ -6379,7 +6379,7 @@ func makeViewTypesWantJSONs() string {
               "INLINED": "55"
             }
           ],
-          "VARIADIC_BUFFERS": [""]
+          "VARIADIC_DATA_BUFFERS": [""]
         }
       ]
     }
diff --git a/go/arrow/internal/testing/tools/bits.go b/go/arrow/internal/testing/tools/bits.go
index c123573e2fa13..ea6a5432e5c91 100644
--- a/go/arrow/internal/testing/tools/bits.go
+++ b/go/arrow/internal/testing/tools/bits.go
@@ -22,7 +22,7 @@ import "math/bits"
 // The low bit of each nibble is tested, therefore integers should be written as 8-digit
 // hex numbers consisting of 1s or 0s.
 //
-//     IntsToBitsLSB(0x11001010) -> 0x35
+//	IntsToBitsLSB(0x11001010) -> 0x35
 func IntsToBitsLSB(v ...int32) []byte {
 	res := make([]byte, 0, len(v))
 	for _, b := range v {
diff --git a/go/arrow/internal/utils.go b/go/arrow/internal/utils.go
index 619eebd97dc78..7b5df167ea432 100644
--- a/go/arrow/internal/utils.go
+++ b/go/arrow/internal/utils.go
@@ -45,3 +45,15 @@ func HasValidityBitmap(id arrow.Type, version flatbuf.MetadataVersion) bool {
 	}
 	return true
 }
+
+// HasBufferSizesBuffer returns whether a given type has an extra buffer
+// in the C ABI to store the sizes of other buffers. Currently this is only
+// StringView and BinaryView.
+func HasBufferSizesBuffer(id arrow.Type) bool {
+	switch id {
+	case arrow.STRING_VIEW, arrow.BINARY_VIEW:
+		return true
+	default:
+		return false
+	}
+}
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index dd51a761510d8..7bc7f6ebfaa09 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -521,7 +521,7 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) arrow.ArrayData {
 
 	case *arrow.RunEndEncodedType:
 		field, buffers := ctx.loadCommon(dt.ID(), 1)
-		defer releaseBuffers(buffers)
+		defer memory.ReleaseBuffers(buffers)
 
 		runEnds := ctx.loadChild(dt.RunEnds())
 		defer runEnds.Release()
@@ -583,7 +583,7 @@ func (ctx *arrayLoaderContext) loadPrimitive(dt arrow.DataType) arrow.ArrayData
 		buffers = append(buffers, ctx.buffer())
 	}
 
-	defer releaseBuffers(buffers)
+	defer memory.ReleaseBuffers(buffers)
 
 	return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0)
 }
@@ -591,7 +591,7 @@ func (ctx *arrayLoaderContext) loadPrimitive(dt arrow.DataType) arrow.ArrayData
 func (ctx *arrayLoaderContext) loadBinary(dt arrow.DataType) arrow.ArrayData {
 	field, buffers := ctx.loadCommon(dt.ID(), 3)
 	buffers = append(buffers, ctx.buffer(), ctx.buffer())
-	defer releaseBuffers(buffers)
+	defer memory.ReleaseBuffers(buffers)
 
 	return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0)
 }
@@ -603,7 +603,7 @@ func (ctx *arrayLoaderContext) loadBinaryView(dt arrow.DataType) arrow.ArrayData
 	for i := 0; i < int(nVariadicBufs); i++ {
 		buffers = append(buffers, ctx.buffer())
 	}
-	defer releaseBuffers(buffers)
+	defer memory.ReleaseBuffers(buffers)
 
 	return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0)
 }
@@ -611,7 +611,7 @@ func (ctx *arrayLoaderContext) loadBinaryView(dt arrow.DataType) arrow.ArrayData
 func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType) arrow.ArrayData {
 	field, buffers := ctx.loadCommon(dt.ID(), 2)
 	buffers = append(buffers, ctx.buffer())
-	defer releaseBuffers(buffers)
+	defer memory.ReleaseBuffers(buffers)
 
 	return array.NewData(dt, int(field.Length()), buffers, nil, int(field.NullCount()), 0)
 }
@@ -619,7 +619,7 @@ func (ctx *arrayLoaderContext) loadFixedSizeBinary(dt *arrow.FixedSizeBinaryType
 func (ctx *arrayLoaderContext) loadMap(dt *arrow.MapType) arrow.ArrayData {
 	field, buffers := ctx.loadCommon(dt.ID(), 2)
 	buffers = append(buffers, ctx.buffer())
-	defer releaseBuffers(buffers)
+	defer memory.ReleaseBuffers(buffers)
 
 	sub := ctx.loadChild(dt.Elem())
 	defer sub.Release()
@@ -630,7 +630,7 @@ func (ctx *arrayLoaderContext) loadMap(dt *arrow.MapType) arrow.ArrayData {
 func (ctx *arrayLoaderContext) loadList(dt arrow.ListLikeType) arrow.ArrayData {
 	field, buffers := ctx.loadCommon(dt.ID(), 2)
 	buffers = append(buffers, ctx.buffer())
-	defer releaseBuffers(buffers)
+	defer memory.ReleaseBuffers(buffers)
 
 	sub := ctx.loadChild(dt.Elem())
 	defer sub.Release()
@@ -641,7 +641,7 @@ func (ctx *arrayLoaderContext) loadList(dt arrow.ListLikeType) arrow.ArrayData {
 func (ctx *arrayLoaderContext) loadListView(dt arrow.VarLenListLikeType) arrow.ArrayData {
 	field, buffers := ctx.loadCommon(dt.ID(), 3)
 	buffers = append(buffers, ctx.buffer(), ctx.buffer())
-	defer releaseBuffers(buffers)
+	defer memory.ReleaseBuffers(buffers)
 
 	sub := ctx.loadChild(dt.Elem())
 	defer sub.Release()
@@ -651,7 +651,7 @@ func (ctx *arrayLoaderContext) loadListView(dt arrow.VarLenListLikeType) arrow.A
 
 func (ctx *arrayLoaderContext) loadFixedSizeList(dt *arrow.FixedSizeListType) arrow.ArrayData {
 	field, buffers := ctx.loadCommon(dt.ID(), 1)
-	defer releaseBuffers(buffers)
+	defer memory.ReleaseBuffers(buffers)
 
 	sub := ctx.loadChild(dt.Elem())
 	defer sub.Release()
@@ -661,7 +661,7 @@ func (ctx *arrayLoaderContext) loadFixedSizeList(dt *arrow.FixedSizeListType) ar
 
 func (ctx *arrayLoaderContext) loadStruct(dt *arrow.StructType) arrow.ArrayData {
 	field, buffers := ctx.loadCommon(dt.ID(), 1)
-	defer releaseBuffers(buffers)
+	defer memory.ReleaseBuffers(buffers)
 
 	subs := make([]arrow.ArrayData, dt.NumFields())
 	for i, f := range dt.Fields() {
@@ -704,7 +704,7 @@ func (ctx *arrayLoaderContext) loadUnion(dt arrow.UnionType) arrow.ArrayData {
 		}
 	}
 
-	defer releaseBuffers(buffers)
+	defer memory.ReleaseBuffers(buffers)
 	subs := make([]arrow.ArrayData, dt.NumFields())
 	for i, f := range dt.Fields() {
 		subs[i] = ctx.loadChild(f.Type)
@@ -768,11 +768,3 @@ func readDictionary(memo *dictutils.Memo, meta *memory.Buffer, body ReadAtSeeker
 	}
 	return dictutils.KindReplacement, nil
 }
-
-func releaseBuffers(buffers []*memory.Buffer) {
-	for _, b := range buffers {
-		if b != nil {
-			b.Release()
-		}
-	}
-}
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index e9d59f0e35e00..31ce53a0f1af7 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -34,6 +34,7 @@ import (
 	"github.com/apache/arrow/go/v15/arrow/internal/dictutils"
 	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
 	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v15/internal/utils"
 )
 
 type swriter struct {
@@ -746,42 +747,22 @@ func (w *recordEncoder) visit(p *Payload, arr arrow.Array) error {
 		w.depth++
 
 	case *arrow.ListViewType, *arrow.LargeListViewType:
-		data := arr.Data()
 		arr := arr.(array.VarLenListLike)
-		offsetTraits := arr.DataType().(arrow.OffsetsDataType).OffsetTypeTraits()
-		rngOff, rngLen := array.RangeOfValuesUsed(arr)
-		voffsets := w.getValueOffsetsAtBaseValue(arr, rngOff)
-		p.body = append(p.body, voffsets)
 
-		vsizes := data.Buffers()[2]
-		if vsizes != nil {
-			if data.Offset() != 0 || vsizes.Len() > offsetTraits.BytesRequired(arr.Len()) {
-				beg := offsetTraits.BytesRequired(data.Offset())
-				end := beg + offsetTraits.BytesRequired(data.Len())
-				vsizes = memory.NewBufferBytes(vsizes.Bytes()[beg:end])
-			} else {
-				vsizes.Retain()
-			}
-		}
+		voffsets, minOffset, maxEnd := w.getZeroBasedListViewOffsets(arr)
+		vsizes := w.getListViewSizes(arr)
+
+		p.body = append(p.body, voffsets)
 		p.body = append(p.body, vsizes)
 
 		w.depth--
 		var (
-			values        = arr.ListValues()
-			mustRelease   = false
-			values_offset = int64(rngOff)
-			values_end    = int64(rngOff + rngLen)
+			values = arr.ListValues()
 		)
-		defer func() {
-			if mustRelease {
-				values.Release()
-			}
-		}()
 
-		if arr.Len() > 0 && values_end < int64(values.Len()) {
-			// must also slice the values
-			values = array.NewSlice(values, values_offset, values_end)
-			mustRelease = true
+		if minOffset != 0 || maxEnd < int64(values.Len()) {
+			values = array.NewSlice(values, minOffset, maxEnd)
+			defer values.Release()
 		}
 		err := w.visit(p, values)
 
@@ -882,61 +863,92 @@ func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) *memory.Buffer
 	return voffsets
 }
 
-// Truncates the offsets if needed and shifts the values if minOffset > 0.
-// The offsets returned are corrected assuming the child values are truncated
-// and now start at minOffset.
-//
-// This function only works on offset buffers of ListViews and LargeListViews.
-// TODO(felipecrv): Unify this with getZeroBasedValueOffsets.
-func (w *recordEncoder) getValueOffsetsAtBaseValue(arr arrow.Array, minOffset int) *memory.Buffer {
-	data := arr.Data()
-	voffsets := data.Buffers()[1]
-	offsetTraits := arr.DataType().(arrow.OffsetsDataType).OffsetTypeTraits()
-	offsetBytesNeeded := offsetTraits.BytesRequired(data.Len())
+func getZeroBasedListViewOffsets[OffsetT int32 | int64](mem memory.Allocator, arr array.VarLenListLike) (valueOffsets *memory.Buffer, minOffset, maxEnd OffsetT) {
+	requiredBytes := int(unsafe.Sizeof(minOffset)) * arr.Len()
+	if arr.Data().Offset() == 0 {
+		// slice offsets to used extent, in case we have truncated slice
+		minOffset, maxEnd = 0, OffsetT(arr.ListValues().Len())
+		valueOffsets = arr.Data().Buffers()[1]
+		if valueOffsets.Len() > requiredBytes {
+			valueOffsets = memory.SliceBuffer(valueOffsets, 0, requiredBytes)
+		} else {
+			valueOffsets.Retain()
+		}
+		return
+	}
 
-	if voffsets == nil || voffsets.Len() == 0 {
-		return nil
+	// non-zero offset, it's likely that the smallest offset is not zero
+	// we must a) create a new offsets array with shifted offsets and
+	// b) slice the values array accordingly
+
+	valueOffsets = memory.NewResizableBuffer(mem)
+	valueOffsets.Resize(requiredBytes)
+	if arr.Len() > 0 {
+		// max value of int32/int64 based on type
+		minOffset = (^OffsetT(0)) << ((8 * unsafe.Sizeof(minOffset)) - 1)
+		for i := 0; i < arr.Len(); i++ {
+			start, end := arr.ValueOffsets(i)
+			minOffset = utils.Min(minOffset, OffsetT(start))
+			maxEnd = utils.Max(maxEnd, OffsetT(end))
+		}
+	}
+
+	offsets := arrow.GetData[OffsetT](arr.Data().Buffers()[1].Bytes())[arr.Data().Offset():]
+	destOffset := arrow.GetData[OffsetT](valueOffsets.Bytes())
+	for i := 0; i < arr.Len(); i++ {
+		destOffset[i] = offsets[i] - minOffset
 	}
+	return
+}
 
-	needsTruncate := data.Offset() != 0 || offsetBytesNeeded < voffsets.Len()
-	needsShift := minOffset > 0
+func getListViewSizes[OffsetT int32 | int64](arr array.VarLenListLike) *memory.Buffer {
+	var z OffsetT
+	requiredBytes := int(unsafe.Sizeof(z)) * arr.Len()
+	sizes := arr.Data().Buffers()[2]
 
-	if needsTruncate || needsShift {
-		shiftedOffsets := memory.NewResizableBuffer(w.mem)
-		shiftedOffsets.Resize(offsetBytesNeeded)
+	if arr.Data().Offset() != 0 || sizes.Len() > requiredBytes {
+		// slice offsets to used extent, in case we have truncated slice
+		offsetBytes := arr.Data().Offset() * int(unsafe.Sizeof(z))
+		sizes = memory.SliceBuffer(sizes, offsetBytes, requiredBytes)
+	} else {
+		sizes.Retain()
+	}
+	return sizes
+}
 
-		switch arr.DataType().Layout().Buffers[1].ByteWidth {
-		case 8:
-			dest := arrow.Int64Traits.CastFromBytes(shiftedOffsets.Bytes())
-			offsets := arrow.Int64Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()]
+func (w *recordEncoder) getZeroBasedListViewOffsets(arr array.VarLenListLike) (*memory.Buffer, int64, int64) {
+	if arr.Len() == 0 {
+		return nil, 0, 0
+	}
 
-			if minOffset > 0 {
-				for i, o := range offsets {
-					dest[i] = o - int64(minOffset)
-				}
-			} else {
-				copy(dest, offsets)
-			}
-		default:
-			debug.Assert(arr.DataType().Layout().Buffers[1].ByteWidth == 4, "invalid offset bytewidth")
-			dest := arrow.Int32Traits.CastFromBytes(shiftedOffsets.Bytes())
-			offsets := arrow.Int32Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()]
+	var (
+		outOffsets     *memory.Buffer
+		minOff, maxEnd int64
+	)
 
-			if minOffset > 0 {
-				for i, o := range offsets {
-					dest[i] = o - int32(minOffset)
-				}
-			} else {
-				copy(dest, offsets)
-			}
-		}
+	switch v := arr.(type) {
+	case *array.ListView:
+		voffsets, outOff, outEnd := getZeroBasedListViewOffsets[int32](w.mem, v)
+		outOffsets = voffsets
+		minOff, maxEnd = int64(outOff), int64(outEnd)
+	case *array.LargeListView:
+		outOffsets, minOff, maxEnd = getZeroBasedListViewOffsets[int64](w.mem, v)
+	}
+	return outOffsets, minOff, maxEnd
+}
 
-		voffsets = shiftedOffsets
-	} else {
-		voffsets.Retain()
+func (w *recordEncoder) getListViewSizes(arr array.VarLenListLike) *memory.Buffer {
+	if arr.Len() == 0 {
+		return nil
 	}
 
-	return voffsets
+	switch v := arr.(type) {
+	case *array.ListView:
+		return getListViewSizes[int32](v)
+	case *array.LargeListView:
+		return getListViewSizes[int64](v)
+	}
+	return nil
 }
 
 func (w *recordEncoder) rebaseDenseUnionValueOffsets(arr *array.DenseUnion, offsets, lengths []int32) *memory.Buffer {
diff --git a/go/arrow/memory/util.go b/go/arrow/memory/util.go
index 3b0d3a5cb9ef1..6cc7ec91b9638 100644
--- a/go/arrow/memory/util.go
+++ b/go/arrow/memory/util.go
@@ -35,3 +35,11 @@ func isMultipleOfPowerOf2(v int, d int) bool {
 func addressOf(b []byte) uintptr {
 	return uintptr(unsafe.Pointer(&b[0]))
 }
+
+func ReleaseBuffers(buffers []*Buffer) {
+	for _, b := range buffers {
+		if b != nil {
+			b.Release()
+		}
+	}
+}
diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go
new file mode 100644
index 0000000000000..67fa8a266b35f
--- /dev/null
+++ b/go/arrow/type_traits.go
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package arrow
+
+import (
+	"reflect"
+	"unsafe"
+
+	"github.com/apache/arrow/go/v15/arrow/decimal128"
+	"github.com/apache/arrow/go/v15/arrow/decimal256"
+	"github.com/apache/arrow/go/v15/arrow/float16"
+	"golang.org/x/exp/constraints"
+)
+
+// IntType is a type constraint for raw values represented as signed
+// integer types by  We aren't just using constraints.Signed
+// because we don't want to include the raw `int` type here whose size
+// changes based on the architecture (int32 on 32-bit architectures and
+// int64 on 64-bit architectures).
+//
+// This will also cover types like MonthInterval or the time types
+// as their underlying types are int32 and int64 which will get covered
+// by using the ~
+type IntType interface {
+	~int8 | ~int16 | ~int32 | ~int64
+}
+
+// UintType is a type constraint for raw values represented as unsigned
+// integer types by  We aren't just using constraints.Unsigned
+// because we don't want to include the raw `uint` type here whose size
+// changes based on the architecture (uint32 on 32-bit architectures and
+// uint64 on 64-bit architectures). We also don't want to include uintptr
+type UintType interface {
+	~uint8 | ~uint16 | ~uint32 | ~uint64
+}
+
+// FloatType is a type constraint for raw values for representing
+// floating point values in  This consists of constraints.Float and
+// float16.Num
+type FloatType interface {
+	float16.Num | constraints.Float
+}
+
+// NumericType is a type constraint for just signed/unsigned integers
+// and float32/float64.
+type NumericType interface {
+	IntType | UintType | constraints.Float
+}
+
+// FixedWidthType is a type constraint for raw values in Arrow that
+// can be represented as FixedWidth byte slices. Specifically this is for
+// using Go generics to easily re-type a byte slice to a properly-typed
+// slice. Booleans are excluded here since they are represented by Arrow
+// as a bitmap and thus the buffer can't be just reinterpreted as a []bool
+type FixedWidthType interface {
+	IntType | UintType |
+		FloatType | decimal128.Num | decimal256.Num |
+		DayTimeInterval | MonthDayNanoInterval
+}
+
+type TemporalType interface {
+	Date32 | Date64 | Time32 | Time64 |
+		Timestamp | Duration | DayTimeInterval |
+		MonthInterval | MonthDayNanoInterval
+}
+
+func reinterpretSlice[Out, T any](b []T) []Out {
+	if cap(b) == 0 {
+		return nil
+	}
+	out := (*Out)(unsafe.Pointer(&b[:1][0]))
+
+	lenBytes := len(b) * int(unsafe.Sizeof(b[0]))
+	capBytes := cap(b) * int(unsafe.Sizeof(b[0]))
+
+	lenOut := lenBytes / int(unsafe.Sizeof(*out))
+	capOut := capBytes / int(unsafe.Sizeof(*out))
+
+	return unsafe.Slice(out, capOut)[:lenOut]
+}
+
+// GetValues reinterprets the data.Buffers()[i] to a slice of T with len=data.Len().
+//
+// If the buffer is nil, nil will be returned.
+//
+// NOTE: the buffer's length must be a multiple of Sizeof(T).
+func GetValues[T FixedWidthType](data ArrayData, i int) []T {
+	if data.Buffers()[i] == nil || data.Buffers()[i].Len() == 0 {
+		return nil
+	}
+	return reinterpretSlice[T](data.Buffers()[i].Bytes())[data.Offset() : data.Offset()+data.Len()]
+}
+
+// GetOffsets reinterprets the data.Buffers()[i] to a slice of T with len=data.Len()+1.
+//
+// NOTE: the buffer's length must be a multiple of Sizeof(T).
+func GetOffsets[T int32 | int64](data ArrayData, i int) []T {
+	return reinterpretSlice[T](data.Buffers()[i].Bytes())[data.Offset() : data.Offset()+data.Len()+1]
+}
+
+// GetBytes reinterprets a slice of T to a slice of bytes.
+func GetBytes[T FixedWidthType | ViewHeader](in []T) []byte {
+	return reinterpretSlice[byte](in)
+}
+
+// GetData reinterprets a slice of bytes to a slice of T.
+//
+// NOTE: the buffer's length must be a multiple of Sizeof(T).
+func GetData[T FixedWidthType | ViewHeader](in []byte) []T {
+	return reinterpretSlice[T](in)
+}
+
+var typMap = map[reflect.Type]DataType{
+	reflect.TypeOf(false):         FixedWidthTypes.Boolean,
+	reflect.TypeOf(int8(0)):       PrimitiveTypes.Int8,
+	reflect.TypeOf(int16(0)):      PrimitiveTypes.Int16,
+	reflect.TypeOf(int32(0)):      PrimitiveTypes.Int32,
+	reflect.TypeOf(int64(0)):      PrimitiveTypes.Int64,
+	reflect.TypeOf(uint8(0)):      PrimitiveTypes.Uint8,
+	reflect.TypeOf(uint16(0)):     PrimitiveTypes.Uint16,
+	reflect.TypeOf(uint32(0)):     PrimitiveTypes.Uint32,
+	reflect.TypeOf(uint64(0)):     PrimitiveTypes.Uint64,
+	reflect.TypeOf(float32(0)):    PrimitiveTypes.Float32,
+	reflect.TypeOf(float64(0)):    PrimitiveTypes.Float64,
+	reflect.TypeOf(string("")):    BinaryTypes.String,
+	reflect.TypeOf(Date32(0)):     FixedWidthTypes.Date32,
+	reflect.TypeOf(Date64(0)):     FixedWidthTypes.Date64,
+	reflect.TypeOf(true):          FixedWidthTypes.Boolean,
+	reflect.TypeOf(float16.Num{}): FixedWidthTypes.Float16,
+	reflect.TypeOf([]byte{}):      BinaryTypes.Binary,
+}
+
+// GetDataType returns the appropriate DataType for the given type T
+// only for non-parametric types. This uses a map and reflection internally
+// so don't call this in a tight loop, instead call this once and then use
+// a closure with the result.
+func GetDataType[T NumericType | bool | string | []byte | float16.Num]() DataType {
+	var z T
+	return typMap[reflect.TypeOf(z)]
+}
+
+// GetType returns the appropriate Type type T, only for non-parametric
+// types. This uses a map and reflection internally so don't call this in
+// a tight loop, instead call it once and then use a closure with the result.
+func GetType[T NumericType | bool | string]() Type {
+	var z T
+	return typMap[reflect.TypeOf(z)].ID()
+}
diff --git a/go/arrow/type_traits_decimal128.go b/go/arrow/type_traits_decimal128.go
index f573ad3c65a4c..d600ba29c1186 100644
--- a/go/arrow/type_traits_decimal128.go
+++ b/go/arrow/type_traits_decimal128.go
@@ -17,7 +17,6 @@
 package arrow
 
 import (
-	"reflect"
 	"unsafe"
 
 	"github.com/apache/arrow/go/v15/arrow/decimal128"
@@ -47,16 +46,12 @@ func (decimal128Traits) PutValue(b []byte, v decimal128.Num) {
 //
 // NOTE: len(b) must be a multiple of Uint16SizeBytes.
 func (decimal128Traits) CastFromBytes(b []byte) []decimal128.Num {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*decimal128.Num)(unsafe.Pointer(h.Data)), cap(b)/Decimal128SizeBytes)[:len(b)/Decimal128SizeBytes]
+	return GetData[decimal128.Num](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (decimal128Traits) CastToBytes(b []decimal128.Num) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Decimal128SizeBytes)[:len(b)*Decimal128SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
diff --git a/go/arrow/type_traits_decimal256.go b/go/arrow/type_traits_decimal256.go
index adf3cc3e0bc31..fded46a0a52d0 100644
--- a/go/arrow/type_traits_decimal256.go
+++ b/go/arrow/type_traits_decimal256.go
@@ -17,7 +17,6 @@
 package arrow
 
 import (
-	"reflect"
 	"unsafe"
 
 	"github.com/apache/arrow/go/v15/arrow/decimal256"
@@ -44,15 +43,11 @@ func (decimal256Traits) PutValue(b []byte, v decimal256.Num) {
 
 // CastFromBytes reinterprets the slice b to a slice of decimal256
 func (decimal256Traits) CastFromBytes(b []byte) []decimal256.Num {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*decimal256.Num)(unsafe.Pointer(h.Data)), cap(b)/Decimal256SizeBytes)[:len(b)/Decimal256SizeBytes]
+	return GetData[decimal256.Num](b)
 }
 
 func (decimal256Traits) CastToBytes(b []decimal256.Num) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Decimal256SizeBytes)[:len(b)*Decimal256SizeBytes]
+	return GetBytes(b)
 }
 
 func (decimal256Traits) Copy(dst, src []decimal256.Num) { copy(dst, src) }
diff --git a/go/arrow/type_traits_float16.go b/go/arrow/type_traits_float16.go
index e59efd4c248d8..5369ad352f839 100644
--- a/go/arrow/type_traits_float16.go
+++ b/go/arrow/type_traits_float16.go
@@ -17,7 +17,6 @@
 package arrow
 
 import (
-	"reflect"
 	"unsafe"
 
 	"github.com/apache/arrow/go/v15/arrow/endian"
@@ -46,16 +45,12 @@ func (float16Traits) PutValue(b []byte, v float16.Num) {
 //
 // NOTE: len(b) must be a multiple of Uint16SizeBytes.
 func (float16Traits) CastFromBytes(b []byte) []float16.Num {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*float16.Num)(unsafe.Pointer(h.Data)), cap(b)/Float16SizeBytes)[:len(b)/Float16SizeBytes]
+	return GetData[float16.Num](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (float16Traits) CastToBytes(b []float16.Num) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Float16SizeBytes)[:len(b)*Float16SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
diff --git a/go/arrow/type_traits_interval.go b/go/arrow/type_traits_interval.go
index 5fbd7a5248918..ca530a72323ff 100644
--- a/go/arrow/type_traits_interval.go
+++ b/go/arrow/type_traits_interval.go
@@ -17,7 +17,6 @@
 package arrow
 
 import (
-	"reflect"
 	"unsafe"
 
 	"github.com/apache/arrow/go/v15/arrow/endian"
@@ -57,16 +56,12 @@ func (monthTraits) PutValue(b []byte, v MonthInterval) {
 //
 // NOTE: len(b) must be a multiple of MonthIntervalSizeBytes.
 func (monthTraits) CastFromBytes(b []byte) []MonthInterval {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*MonthInterval)(unsafe.Pointer(h.Data)), cap(b)/MonthIntervalSizeBytes)[:len(b)/MonthIntervalSizeBytes]
+	return GetData[MonthInterval](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (monthTraits) CastToBytes(b []MonthInterval) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*MonthIntervalSizeBytes)[:len(b)*MonthIntervalSizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -94,16 +89,12 @@ func (daytimeTraits) PutValue(b []byte, v DayTimeInterval) {
 //
 // NOTE: len(b) must be a multiple of DayTimeIntervalSizeBytes.
 func (daytimeTraits) CastFromBytes(b []byte) []DayTimeInterval {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*DayTimeInterval)(unsafe.Pointer(h.Data)), cap(b)/DayTimeIntervalSizeBytes)[:len(b)/DayTimeIntervalSizeBytes]
+	return GetData[DayTimeInterval](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (daytimeTraits) CastToBytes(b []DayTimeInterval) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*DayTimeIntervalSizeBytes)[:len(b)*DayTimeIntervalSizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -132,16 +123,12 @@ func (monthDayNanoTraits) PutValue(b []byte, v MonthDayNanoInterval) {
 //
 // NOTE: len(b) must be a multiple of MonthDayNanoIntervalSizeBytes.
 func (monthDayNanoTraits) CastFromBytes(b []byte) []MonthDayNanoInterval {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*MonthDayNanoInterval)(unsafe.Pointer(h.Data)), cap(b)/MonthDayNanoIntervalSizeBytes)[:len(b)/MonthDayNanoIntervalSizeBytes]
+	return GetData[MonthDayNanoInterval](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (monthDayNanoTraits) CastToBytes(b []MonthDayNanoInterval) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*MonthDayNanoIntervalSizeBytes)[:len(b)*MonthDayNanoIntervalSizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
diff --git a/go/arrow/type_traits_numeric.gen.go b/go/arrow/type_traits_numeric.gen.go
index 57606c0fce6df..06412466032f9 100644
--- a/go/arrow/type_traits_numeric.gen.go
+++ b/go/arrow/type_traits_numeric.gen.go
@@ -20,7 +20,6 @@ package arrow
 
 import (
 	"math"
-	"reflect"
 	"unsafe"
 
 	"github.com/apache/arrow/go/v15/arrow/endian"
@@ -65,16 +64,12 @@ func (int64Traits) PutValue(b []byte, v int64) {
 //
 // NOTE: len(b) must be a multiple of Int64SizeBytes.
 func (int64Traits) CastFromBytes(b []byte) []int64 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*int64)(unsafe.Pointer(h.Data)), cap(b)/Int64SizeBytes)[:len(b)/Int64SizeBytes]
+	return GetData[int64](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (int64Traits) CastToBytes(b []int64) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int64SizeBytes)[:len(b)*Int64SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -101,16 +96,12 @@ func (uint64Traits) PutValue(b []byte, v uint64) {
 //
 // NOTE: len(b) must be a multiple of Uint64SizeBytes.
 func (uint64Traits) CastFromBytes(b []byte) []uint64 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*uint64)(unsafe.Pointer(h.Data)), cap(b)/Uint64SizeBytes)[:len(b)/Uint64SizeBytes]
+	return GetData[uint64](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (uint64Traits) CastToBytes(b []uint64) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint64SizeBytes)[:len(b)*Uint64SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -137,16 +128,12 @@ func (float64Traits) PutValue(b []byte, v float64) {
 //
 // NOTE: len(b) must be a multiple of Float64SizeBytes.
 func (float64Traits) CastFromBytes(b []byte) []float64 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*float64)(unsafe.Pointer(h.Data)), cap(b)/Float64SizeBytes)[:len(b)/Float64SizeBytes]
+	return GetData[float64](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (float64Traits) CastToBytes(b []float64) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Float64SizeBytes)[:len(b)*Float64SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -173,16 +160,12 @@ func (int32Traits) PutValue(b []byte, v int32) {
 //
 // NOTE: len(b) must be a multiple of Int32SizeBytes.
 func (int32Traits) CastFromBytes(b []byte) []int32 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*int32)(unsafe.Pointer(h.Data)), cap(b)/Int32SizeBytes)[:len(b)/Int32SizeBytes]
+	return GetData[int32](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (int32Traits) CastToBytes(b []int32) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int32SizeBytes)[:len(b)*Int32SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -209,16 +192,12 @@ func (uint32Traits) PutValue(b []byte, v uint32) {
 //
 // NOTE: len(b) must be a multiple of Uint32SizeBytes.
 func (uint32Traits) CastFromBytes(b []byte) []uint32 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*uint32)(unsafe.Pointer(h.Data)), cap(b)/Uint32SizeBytes)[:len(b)/Uint32SizeBytes]
+	return GetData[uint32](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (uint32Traits) CastToBytes(b []uint32) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint32SizeBytes)[:len(b)*Uint32SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -245,16 +224,12 @@ func (float32Traits) PutValue(b []byte, v float32) {
 //
 // NOTE: len(b) must be a multiple of Float32SizeBytes.
 func (float32Traits) CastFromBytes(b []byte) []float32 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*float32)(unsafe.Pointer(h.Data)), cap(b)/Float32SizeBytes)[:len(b)/Float32SizeBytes]
+	return GetData[float32](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (float32Traits) CastToBytes(b []float32) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Float32SizeBytes)[:len(b)*Float32SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -281,16 +256,12 @@ func (int16Traits) PutValue(b []byte, v int16) {
 //
 // NOTE: len(b) must be a multiple of Int16SizeBytes.
 func (int16Traits) CastFromBytes(b []byte) []int16 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*int16)(unsafe.Pointer(h.Data)), cap(b)/Int16SizeBytes)[:len(b)/Int16SizeBytes]
+	return GetData[int16](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (int16Traits) CastToBytes(b []int16) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int16SizeBytes)[:len(b)*Int16SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -317,16 +288,12 @@ func (uint16Traits) PutValue(b []byte, v uint16) {
 //
 // NOTE: len(b) must be a multiple of Uint16SizeBytes.
 func (uint16Traits) CastFromBytes(b []byte) []uint16 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*uint16)(unsafe.Pointer(h.Data)), cap(b)/Uint16SizeBytes)[:len(b)/Uint16SizeBytes]
+	return GetData[uint16](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (uint16Traits) CastToBytes(b []uint16) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint16SizeBytes)[:len(b)*Uint16SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -353,16 +320,12 @@ func (int8Traits) PutValue(b []byte, v int8) {
 //
 // NOTE: len(b) must be a multiple of Int8SizeBytes.
 func (int8Traits) CastFromBytes(b []byte) []int8 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*int8)(unsafe.Pointer(h.Data)), cap(b)/Int8SizeBytes)[:len(b)/Int8SizeBytes]
+	return GetData[int8](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (int8Traits) CastToBytes(b []int8) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Int8SizeBytes)[:len(b)*Int8SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -389,16 +352,12 @@ func (uint8Traits) PutValue(b []byte, v uint8) {
 //
 // NOTE: len(b) must be a multiple of Uint8SizeBytes.
 func (uint8Traits) CastFromBytes(b []byte) []uint8 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*uint8)(unsafe.Pointer(h.Data)), cap(b)/Uint8SizeBytes)[:len(b)/Uint8SizeBytes]
+	return GetData[uint8](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (uint8Traits) CastToBytes(b []uint8) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Uint8SizeBytes)[:len(b)*Uint8SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -425,16 +384,12 @@ func (time32Traits) PutValue(b []byte, v Time32) {
 //
 // NOTE: len(b) must be a multiple of Time32SizeBytes.
 func (time32Traits) CastFromBytes(b []byte) []Time32 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*Time32)(unsafe.Pointer(h.Data)), cap(b)/Time32SizeBytes)[:len(b)/Time32SizeBytes]
+	return GetData[Time32](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (time32Traits) CastToBytes(b []Time32) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Time32SizeBytes)[:len(b)*Time32SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -461,16 +416,12 @@ func (time64Traits) PutValue(b []byte, v Time64) {
 //
 // NOTE: len(b) must be a multiple of Time64SizeBytes.
 func (time64Traits) CastFromBytes(b []byte) []Time64 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*Time64)(unsafe.Pointer(h.Data)), cap(b)/Time64SizeBytes)[:len(b)/Time64SizeBytes]
+	return GetData[Time64](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (time64Traits) CastToBytes(b []Time64) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Time64SizeBytes)[:len(b)*Time64SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -497,16 +448,12 @@ func (date32Traits) PutValue(b []byte, v Date32) {
 //
 // NOTE: len(b) must be a multiple of Date32SizeBytes.
 func (date32Traits) CastFromBytes(b []byte) []Date32 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*Date32)(unsafe.Pointer(h.Data)), cap(b)/Date32SizeBytes)[:len(b)/Date32SizeBytes]
+	return GetData[Date32](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (date32Traits) CastToBytes(b []Date32) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Date32SizeBytes)[:len(b)*Date32SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -533,16 +480,12 @@ func (date64Traits) PutValue(b []byte, v Date64) {
 //
 // NOTE: len(b) must be a multiple of Date64SizeBytes.
 func (date64Traits) CastFromBytes(b []byte) []Date64 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*Date64)(unsafe.Pointer(h.Data)), cap(b)/Date64SizeBytes)[:len(b)/Date64SizeBytes]
+	return GetData[Date64](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (date64Traits) CastToBytes(b []Date64) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*Date64SizeBytes)[:len(b)*Date64SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
@@ -569,16 +512,12 @@ func (durationTraits) PutValue(b []byte, v Duration) {
 //
 // NOTE: len(b) must be a multiple of DurationSizeBytes.
 func (durationTraits) CastFromBytes(b []byte) []Duration {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*Duration)(unsafe.Pointer(h.Data)), cap(b)/DurationSizeBytes)[:len(b)/DurationSizeBytes]
+	return GetData[Duration](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (durationTraits) CastToBytes(b []Duration) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*DurationSizeBytes)[:len(b)*DurationSizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
diff --git a/go/arrow/type_traits_numeric.gen.go.tmpl b/go/arrow/type_traits_numeric.gen.go.tmpl
index c491047b51429..e98f59528c6aa 100644
--- a/go/arrow/type_traits_numeric.gen.go.tmpl
+++ b/go/arrow/type_traits_numeric.gen.go.tmpl
@@ -18,7 +18,6 @@ package arrow
 
 import (
 	"math"
-	"reflect"
 	"unsafe"
 
 	"github.com/apache/arrow/go/v15/arrow/endian"
@@ -66,16 +65,12 @@ func ({{.name}}Traits) PutValue(b []byte, v {{.Type}}) {
 //
 // NOTE: len(b) must be a multiple of {{.Name}}SizeBytes.
 func ({{.name}}Traits) CastFromBytes(b []byte) []{{.Type}} {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*{{.Type}})(unsafe.Pointer(h.Data)), cap(b)/{{.Name}}SizeBytes)[:len(b)/{{.Name}}SizeBytes]
+	return GetData[{{.Type}}](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func ({{.name}}Traits) CastToBytes(b []{{.Type}}) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*{{.Name}}SizeBytes)[:len(b)*{{.Name}}SizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
diff --git a/go/arrow/type_traits_timestamp.go b/go/arrow/type_traits_timestamp.go
index c1a9aba3db386..8e9970a719f54 100644
--- a/go/arrow/type_traits_timestamp.go
+++ b/go/arrow/type_traits_timestamp.go
@@ -17,7 +17,6 @@
 package arrow
 
 import (
-	"reflect"
 	"unsafe"
 
 	"github.com/apache/arrow/go/v15/arrow/endian"
@@ -43,16 +42,12 @@ func (timestampTraits) PutValue(b []byte, v Timestamp) {
 //
 // NOTE: len(b) must be a multiple of TimestampSizeBytes.
 func (timestampTraits) CastFromBytes(b []byte) []Timestamp {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*Timestamp)(unsafe.Pointer(h.Data)), cap(b)/TimestampSizeBytes)[:len(b)/TimestampSizeBytes]
+	return GetData[Timestamp](b)
 }
 
 // CastToBytes reinterprets the slice b to a slice of bytes.
 func (timestampTraits) CastToBytes(b []Timestamp) []byte {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*TimestampSizeBytes)[:len(b)*TimestampSizeBytes]
+	return GetBytes(b)
 }
 
 // Copy copies src to dst.
diff --git a/go/arrow/type_traits_view.go b/go/arrow/type_traits_view.go
index c3846db294681..be3f15fed69ae 100644
--- a/go/arrow/type_traits_view.go
+++ b/go/arrow/type_traits_view.go
@@ -17,7 +17,6 @@
 package arrow
 
 import (
-	"reflect"
 	"unsafe"
 
 	"github.com/apache/arrow/go/v15/arrow/endian"
@@ -39,15 +38,11 @@ func (viewHeaderTraits) PutValue(b []byte, v ViewHeader) {
 }
 
 func (viewHeaderTraits) CastFromBytes(b []byte) (res []ViewHeader) {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*ViewHeader)(unsafe.Pointer(h.Data)), cap(b)/ViewHeaderSizeBytes)[:len(b)/ViewHeaderSizeBytes]
+	return GetData[ViewHeader](b)
 }
 
 func (viewHeaderTraits) CastToBytes(b []ViewHeader) (res []byte) {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	return unsafe.Slice((*byte)(unsafe.Pointer(h.Data)), cap(b)*ViewHeaderSizeBytes)[:len(b)*ViewHeaderSizeBytes]
+	return GetBytes(b)
 }
 
 func (viewHeaderTraits) Copy(dst, src []ViewHeader) { copy(dst, src) }
diff --git a/go/internal/bitutils/bit_set_run_reader.go b/go/internal/bitutils/bit_set_run_reader.go
index 6764ca7912679..374b8d4aab39a 100644
--- a/go/internal/bitutils/bit_set_run_reader.go
+++ b/go/internal/bitutils/bit_set_run_reader.go
@@ -113,7 +113,7 @@ func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) {
 
 		bitOffset := int8(startOffset % 8)
 		if length > 0 && bitOffset != 0 {
-			br.curNumBits = int32(utils.MinInt(int(length), int(8-bitOffset)))
+			br.curNumBits = int32(utils.Min(int(length), int(8-bitOffset)))
 			br.curWord = br.loadPartial(bitOffset, int64(br.curNumBits))
 		}
 		return
@@ -124,7 +124,7 @@ func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) {
 	endBitOffset := int8((startOffset + length) % 8)
 	if length > 0 && endBitOffset != 0 {
 		br.pos++
-		br.curNumBits = int32(utils.MinInt(int(length), int(endBitOffset)))
+		br.curNumBits = int32(utils.Min(int(length), int(endBitOffset)))
 		br.curWord = br.loadPartial(8-endBitOffset, int64(br.curNumBits))
 	}
 }
@@ -219,7 +219,7 @@ func (br *baseSetBitRunReader) skipNextZeros() {
 	if br.remaining > 0 {
 		br.curWord = br.loadPartial(0, br.remaining)
 		br.curNumBits = int32(br.remaining)
-		nzeros := int32(utils.MinInt(int(br.curNumBits), int(br.countFirstZeros(br.curWord))))
+		nzeros := int32(utils.Min(int(br.curNumBits), int(br.countFirstZeros(br.curWord))))
 		br.curWord = br.consumeBits(br.curWord, nzeros)
 		br.curNumBits -= nzeros
 		br.remaining -= int64(nzeros)
diff --git a/go/internal/utils/math.go b/go/internal/utils/math.go
index 62cf96ce43156..c8311750e3a4c 100644
--- a/go/internal/utils/math.go
+++ b/go/internal/utils/math.go
@@ -16,32 +16,16 @@
 
 package utils
 
-// Min is a convenience Min function for int64
-func Min(a, b int64) int64 {
-	if a < b {
-		return a
-	}
-	return b
-}
+import "golang.org/x/exp/constraints"
 
-// MinInt is a convenience Min function for int
-func MinInt(a, b int) int {
+func Min[T constraints.Ordered](a, b T) T {
 	if a < b {
 		return a
 	}
 	return b
 }
 
-// Max is a convenience Max function for int64
-func Max(a, b int64) int64 {
-	if a > b {
-		return a
-	}
-	return b
-}
-
-// MaxInt is a convenience Max function for int
-func MaxInt(a, b int) int {
+func Max[T constraints.Ordered](a, b T) T {
 	if a > b {
 		return a
 	}
diff --git a/go/parquet/file/column_reader.go b/go/parquet/file/column_reader.go
index 766638d88f26c..342fb3b198abe 100644
--- a/go/parquet/file/column_reader.go
+++ b/go/parquet/file/column_reader.go
@@ -517,7 +517,7 @@ func (c *columnChunkReader) readBatch(batchSize int64, defLvls, repLvls []int16,
 		// if this is a required field, ndefs will be 0 since there is no definition
 		// levels stored with it and `read` will be the number of values, otherwise
 		// we use ndefs since it will be equal to or greater than read.
-		totalVals := int64(utils.MaxInt(ndefs, read))
+		totalVals := int64(utils.Max(ndefs, read))
 		c.consumeBufferedValues(totalVals)
 
 		totalLvls += totalVals
diff --git a/go/parquet/file/column_reader_test.go b/go/parquet/file/column_reader_test.go
index 21ea52e2b7bbc..a6725bc02fee0 100755
--- a/go/parquet/file/column_reader_test.go
+++ b/go/parquet/file/column_reader_test.go
@@ -244,7 +244,7 @@ func (p *PrimitiveReaderSuite) checkResults(typ reflect.Type) {
 
 		totalRead += batch
 		batchActual += int(read)
-		batchSize = int32(utils.MinInt(1<<24, utils.MaxInt(int(batchSize*2), 4096)))
+		batchSize = int32(utils.Min(1<<24, utils.Max(int(batchSize*2), 4096)))
 		if batch <= 0 {
 			break
 		}
diff --git a/go/parquet/file/level_conversion.go b/go/parquet/file/level_conversion.go
index f6707fce86d80..251468658ae30 100755
--- a/go/parquet/file/level_conversion.go
+++ b/go/parquet/file/level_conversion.go
@@ -144,7 +144,7 @@ func defLevelsBatchToBitmap(defLevels []int16, remainingUpperBound int64, info L
 
 	var batch []int16
 	for len(defLevels) > 0 {
-		batchSize := shared_utils.MinInt(maxbatch, len(defLevels))
+		batchSize := shared_utils.Min(maxbatch, len(defLevels))
 		batch, defLevels = defLevels[:batchSize], defLevels[batchSize:]
 		definedBitmap := bmi.GreaterThanBitmap(batch, info.DefLevel-1)
 
diff --git a/go/parquet/internal/encoding/boolean_decoder.go b/go/parquet/internal/encoding/boolean_decoder.go
index 3782dc85ea814..353f443855952 100644
--- a/go/parquet/internal/encoding/boolean_decoder.go
+++ b/go/parquet/internal/encoding/boolean_decoder.go
@@ -55,7 +55,7 @@ func (dec *PlainBooleanDecoder) SetData(nvals int, data []byte) error {
 //
 // Returns the number of values decoded
 func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) {
-	max := shared_utils.MinInt(len(out), dec.nvals)
+	max := shared_utils.Min(len(out), dec.nvals)
 
 	// attempts to read all remaining bool values from the current data byte
 	unalignedExtract := func(i int) int {
@@ -148,7 +148,7 @@ func (dec *RleBooleanDecoder) SetData(nvals int, data []byte) error {
 }
 
 func (dec *RleBooleanDecoder) Decode(out []bool) (int, error) {
-	max := shared_utils.MinInt(len(out), dec.nvals)
+	max := shared_utils.Min(len(out), dec.nvals)
 
 	var (
 		buf [1024]uint64
@@ -156,7 +156,7 @@ func (dec *RleBooleanDecoder) Decode(out []bool) (int, error) {
 	)
 
 	for n > 0 {
-		batch := shared_utils.MinInt(len(buf), n)
+		batch := shared_utils.Min(len(buf), n)
 		decoded := dec.rleDec.GetBatch(buf[:batch])
 		if decoded != batch {
 			return max - n, io.ErrUnexpectedEOF
diff --git a/go/parquet/internal/encoding/byte_array_decoder.go b/go/parquet/internal/encoding/byte_array_decoder.go
index 82ce9f84265c5..0c1c858fb48bb 100644
--- a/go/parquet/internal/encoding/byte_array_decoder.go
+++ b/go/parquet/internal/encoding/byte_array_decoder.go
@@ -49,7 +49,7 @@ func (PlainByteArrayDecoder) Type() parquet.Type {
 //
 // Returns the number of values that were decoded.
 func (pbad *PlainByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
-	max := utils.MinInt(len(out), pbad.nvals)
+	max := utils.Min(len(out), pbad.nvals)
 
 	for i := 0; i < max; i++ {
 		// there should always be at least four bytes which is the length of the
diff --git a/go/parquet/internal/encoding/decoder.go b/go/parquet/internal/encoding/decoder.go
index cee624730e993..acb57fbce7806 100644
--- a/go/parquet/internal/encoding/decoder.go
+++ b/go/parquet/internal/encoding/decoder.go
@@ -155,7 +155,7 @@ func (d *dictDecoder) decodeSpaced(out interface{}, nullCount int, validBits []b
 }
 
 func (d *dictDecoder) DecodeIndices(numValues int, bldr array.Builder) (int, error) {
-	n := shared_utils.MinInt(numValues, d.nvals)
+	n := shared_utils.Min(numValues, d.nvals)
 	if cap(d.idxScratchSpace) < n {
 		d.idxScratchSpace = make([]uint64, n, bitutil.NextPowerOf2(n))
 	} else {
diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go
index a00f3457cac7a..560b77f4c66ce 100644
--- a/go/parquet/internal/encoding/delta_bit_packing.go
+++ b/go/parquet/internal/encoding/delta_bit_packing.go
@@ -158,7 +158,7 @@ func (d *DeltaBitPackInt32Decoder) unpackNextMini() error {
 // Decode retrieves min(remaining values, len(out)) values from the data and returns the number
 // of values actually decoded and any errors encountered.
 func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) {
-	max := shared_utils.MinInt(len(out), int(d.totalValues))
+	max := shared_utils.Min(len(out), int(d.totalValues))
 	if max == 0 {
 		return 0, nil
 	}
@@ -249,7 +249,7 @@ func (d *DeltaBitPackInt64Decoder) unpackNextMini() error {
 // Decode retrieves min(remaining values, len(out)) values from the data and returns the number
 // of values actually decoded and any errors encountered.
 func (d *DeltaBitPackInt64Decoder) Decode(out []int64) (int, error) {
-	max := shared_utils.MinInt(len(out), d.nvals)
+	max := shared_utils.Min(len(out), d.nvals)
 	if max == 0 {
 		return 0, nil
 	}
diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go
index 57b0c8a70e5ad..5e5002e34a68f 100644
--- a/go/parquet/internal/encoding/delta_byte_array.go
+++ b/go/parquet/internal/encoding/delta_byte_array.go
@@ -172,7 +172,7 @@ func (d *DeltaByteArrayDecoder) SetData(nvalues int, data []byte) error {
 
 // Decode decodes byte arrays into the slice provided and returns the number of values actually decoded
 func (d *DeltaByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
-	max := utils.MinInt(len(out), d.nvals)
+	max := utils.Min(len(out), d.nvals)
 	if max == 0 {
 		return 0, nil
 	}
diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go
index d5a99c187d11e..183eb453ca0a3 100644
--- a/go/parquet/internal/encoding/delta_length_byte_array.go
+++ b/go/parquet/internal/encoding/delta_length_byte_array.go
@@ -126,7 +126,7 @@ func (d *DeltaLengthByteArrayDecoder) SetData(nvalues int, data []byte) error {
 // Decode populates the passed in slice with data decoded until it hits the length of out
 // or runs out of values in the column to decode, then returns the number of values actually decoded.
 func (d *DeltaLengthByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
-	max := utils.MinInt(len(out), d.nvals)
+	max := utils.Min(len(out), d.nvals)
 	for i := 0; i < max; i++ {
 		out[i] = d.data[:d.lengths[i]:d.lengths[i]]
 		d.data = d.data[d.lengths[i]:]
diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
index 1e589fc2e7be1..2054e1bb85f21 100644
--- a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
+++ b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
@@ -38,7 +38,7 @@ func (PlainFixedLenByteArrayDecoder) Type() parquet.Type {
 // values to decode or the length of out has been filled. Then returns the total number of values
 // that were decoded.
 func (pflba *PlainFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) {
-	max := utils.MinInt(len(out), pflba.nvals)
+	max := utils.Min(len(out), pflba.nvals)
 	numBytesNeeded := max * pflba.typeLen
 	if numBytesNeeded > len(pflba.data) || numBytesNeeded > math.MaxInt32 {
 		return 0, xerrors.New("parquet: eof exception")
diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go b/go/parquet/internal/encoding/plain_encoder_types.gen.go
index 09403d74cb06f..a41f754f62a88 100644
--- a/go/parquet/internal/encoding/plain_encoder_types.gen.go
+++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go
@@ -172,7 +172,7 @@ func (PlainInt32Decoder) Type() parquet.Type {
 // decoding the min(len(out), remaining values).
 // It returns the number of values actually decoded and any error encountered.
 func (dec *PlainInt32Decoder) Decode(out []int32) (int, error) {
-	max := utils.MinInt(len(out), dec.nvals)
+	max := utils.Min(len(out), dec.nvals)
 	nbytes := int64(max) * int64(arrow.Int32SizeBytes)
 	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
 		return 0, fmt.Errorf("parquet: eof exception decode plain Int32, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
@@ -277,7 +277,7 @@ func (PlainInt64Decoder) Type() parquet.Type {
 // decoding the min(len(out), remaining values).
 // It returns the number of values actually decoded and any error encountered.
 func (dec *PlainInt64Decoder) Decode(out []int64) (int, error) {
-	max := utils.MinInt(len(out), dec.nvals)
+	max := utils.Min(len(out), dec.nvals)
 	nbytes := int64(max) * int64(arrow.Int64SizeBytes)
 	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
 		return 0, fmt.Errorf("parquet: eof exception decode plain Int64, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
@@ -382,7 +382,7 @@ func (PlainInt96Decoder) Type() parquet.Type {
 // decoding the min(len(out), remaining values).
 // It returns the number of values actually decoded and any error encountered.
 func (dec *PlainInt96Decoder) Decode(out []parquet.Int96) (int, error) {
-	max := utils.MinInt(len(out), dec.nvals)
+	max := utils.Min(len(out), dec.nvals)
 	nbytes := int64(max) * int64(parquet.Int96SizeBytes)
 	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
 		return 0, fmt.Errorf("parquet: eof exception decode plain Int96, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
@@ -487,7 +487,7 @@ func (PlainFloat32Decoder) Type() parquet.Type {
 // decoding the min(len(out), remaining values).
 // It returns the number of values actually decoded and any error encountered.
 func (dec *PlainFloat32Decoder) Decode(out []float32) (int, error) {
-	max := utils.MinInt(len(out), dec.nvals)
+	max := utils.Min(len(out), dec.nvals)
 	nbytes := int64(max) * int64(arrow.Float32SizeBytes)
 	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
 		return 0, fmt.Errorf("parquet: eof exception decode plain Float32, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
@@ -592,7 +592,7 @@ func (PlainFloat64Decoder) Type() parquet.Type {
 // decoding the min(len(out), remaining values).
 // It returns the number of values actually decoded and any error encountered.
 func (dec *PlainFloat64Decoder) Decode(out []float64) (int, error) {
-	max := utils.MinInt(len(out), dec.nvals)
+	max := utils.Min(len(out), dec.nvals)
 	nbytes := int64(max) * int64(arrow.Float64SizeBytes)
 	if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
 		return 0, fmt.Errorf("parquet: eof exception decode plain Float64, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
index 2838c63a41857..74f63e78bccf3 100644
--- a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
+++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
@@ -133,7 +133,7 @@ func (Plain{{.Name}}Decoder) Type() parquet.Type {
 // decoding the min(len(out), remaining values).
 // It returns the number of values actually decoded and any error encountered.
 func (dec *Plain{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) {
-  max := utils.MinInt(len(out), dec.nvals)
+  max := utils.Min(len(out), dec.nvals)
   nbytes := int64(max) * int64({{.prefix}}.{{.Name}}SizeBytes)
   if nbytes > int64(len(dec.data)) || nbytes > math.MaxInt32 {
     return 0, fmt.Errorf("parquet: eof exception decode plain {{.Name}}, nvals: %d, nbytes: %d, datalen: %d", dec.nvals, nbytes, len(dec.data))
diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go
index 4bc18e8c63c01..04db72178f3ee 100644
--- a/go/parquet/internal/encoding/typed_encoder.gen.go
+++ b/go/parquet/internal/encoding/typed_encoder.gen.go
@@ -195,7 +195,7 @@ func (DictInt32Decoder) Type() parquet.Type {
 // decoding using the dictionary to get the actual values. Returns the number of values
 // actually decoded and any error encountered.
 func (d *DictInt32Decoder) Decode(out []int32) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decode(out[:vals])
 	if err != nil {
 		return decoded, err
@@ -209,7 +209,7 @@ func (d *DictInt32Decoder) Decode(out []int32) (int, error) {
 // Decode spaced is like Decode but will space out the data leaving slots for null values
 // based on the provided bitmap.
 func (d *DictInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
 	if err != nil {
 		return decoded, err
@@ -432,7 +432,7 @@ func (DictInt64Decoder) Type() parquet.Type {
 // decoding using the dictionary to get the actual values. Returns the number of values
 // actually decoded and any error encountered.
 func (d *DictInt64Decoder) Decode(out []int64) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decode(out[:vals])
 	if err != nil {
 		return decoded, err
@@ -446,7 +446,7 @@ func (d *DictInt64Decoder) Decode(out []int64) (int, error) {
 // Decode spaced is like Decode but will space out the data leaving slots for null values
 // based on the provided bitmap.
 func (d *DictInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
 	if err != nil {
 		return decoded, err
@@ -647,7 +647,7 @@ func (DictInt96Decoder) Type() parquet.Type {
 // decoding using the dictionary to get the actual values. Returns the number of values
 // actually decoded and any error encountered.
 func (d *DictInt96Decoder) Decode(out []parquet.Int96) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decode(out[:vals])
 	if err != nil {
 		return decoded, err
@@ -661,7 +661,7 @@ func (d *DictInt96Decoder) Decode(out []parquet.Int96) (int, error) {
 // Decode spaced is like Decode but will space out the data leaving slots for null values
 // based on the provided bitmap.
 func (d *DictInt96Decoder) DecodeSpaced(out []parquet.Int96, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
 	if err != nil {
 		return decoded, err
@@ -872,7 +872,7 @@ func (DictFloat32Decoder) Type() parquet.Type {
 // decoding using the dictionary to get the actual values. Returns the number of values
 // actually decoded and any error encountered.
 func (d *DictFloat32Decoder) Decode(out []float32) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decode(out[:vals])
 	if err != nil {
 		return decoded, err
@@ -886,7 +886,7 @@ func (d *DictFloat32Decoder) Decode(out []float32) (int, error) {
 // Decode spaced is like Decode but will space out the data leaving slots for null values
 // based on the provided bitmap.
 func (d *DictFloat32Decoder) DecodeSpaced(out []float32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
 	if err != nil {
 		return decoded, err
@@ -1097,7 +1097,7 @@ func (DictFloat64Decoder) Type() parquet.Type {
 // decoding using the dictionary to get the actual values. Returns the number of values
 // actually decoded and any error encountered.
 func (d *DictFloat64Decoder) Decode(out []float64) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decode(out[:vals])
 	if err != nil {
 		return decoded, err
@@ -1111,7 +1111,7 @@ func (d *DictFloat64Decoder) Decode(out []float64) (int, error) {
 // Decode spaced is like Decode but will space out the data leaving slots for null values
 // based on the provided bitmap.
 func (d *DictFloat64Decoder) DecodeSpaced(out []float64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
 	if err != nil {
 		return decoded, err
@@ -1365,7 +1365,7 @@ func (DictByteArrayDecoder) Type() parquet.Type {
 // decoding using the dictionary to get the actual values. Returns the number of values
 // actually decoded and any error encountered.
 func (d *DictByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decode(out[:vals])
 	if err != nil {
 		return decoded, err
@@ -1379,7 +1379,7 @@ func (d *DictByteArrayDecoder) Decode(out []parquet.ByteArray) (int, error) {
 // Decode spaced is like Decode but will space out the data leaving slots for null values
 // based on the provided bitmap.
 func (d *DictByteArrayDecoder) DecodeSpaced(out []parquet.ByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
 	if err != nil {
 		return decoded, err
@@ -1544,7 +1544,7 @@ func (DictFixedLenByteArrayDecoder) Type() parquet.Type {
 // decoding using the dictionary to get the actual values. Returns the number of values
 // actually decoded and any error encountered.
 func (d *DictFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decode(out[:vals])
 	if err != nil {
 		return decoded, err
@@ -1558,7 +1558,7 @@ func (d *DictFixedLenByteArrayDecoder) Decode(out []parquet.FixedLenByteArray) (
 // Decode spaced is like Decode but will space out the data leaving slots for null values
 // based on the provided bitmap.
 func (d *DictFixedLenByteArrayDecoder) DecodeSpaced(out []parquet.FixedLenByteArray, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
-	vals := shared_utils.MinInt(len(out), d.nvals)
+	vals := shared_utils.Min(len(out), d.nvals)
 	decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
 	if err != nil {
 		return decoded, err
diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
index d72f31512047a..ceb755caa0b46 100644
--- a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
+++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
@@ -271,7 +271,7 @@ func (Dict{{.Name}}Decoder) Type() parquet.Type {
 // decoding using the dictionary to get the actual values. Returns the number of values
 // actually decoded and any error encountered.
 func (d *Dict{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) {
-  vals := shared_utils.MinInt(len(out), d.nvals)
+  vals := shared_utils.Min(len(out), d.nvals)
   decoded, err := d.decode(out[:vals])
   if err != nil {
     return decoded, err
@@ -285,7 +285,7 @@ func (d *Dict{{.Name}}Decoder) Decode(out []{{.name}}) (int, error) {
 // Decode spaced is like Decode but will space out the data leaving slots for null values
 // based on the provided bitmap.
 func (d *Dict{{.Name}}Decoder) DecodeSpaced(out []{{.name}}, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
-  vals := shared_utils.MinInt(len(out), d.nvals)
+  vals := shared_utils.Min(len(out), d.nvals)
   decoded, err := d.decodeSpaced(out[:vals], nullCount, validBits, validBitsOffset)
   if err != nil {
     return decoded, err
diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go
index 4ab3ab1a1c954..f8d860c88a059 100644
--- a/go/parquet/internal/encoding/types.go
+++ b/go/parquet/internal/encoding/types.go
@@ -185,7 +185,7 @@ func (b *PooledBufferWriter) Reserve(nbytes int) {
 		b.buf = bufferPool.Get().(*memory.Buffer)
 	}
 
-	newCap := utils.MaxInt(b.buf.Cap()+b.offset, 256)
+	newCap := utils.Max(b.buf.Cap()+b.offset, 256)
 	for newCap < b.pos+nbytes {
 		newCap = bitutil.NextPowerOf2(newCap)
 	}
@@ -375,7 +375,7 @@ func (b *BufferWriter) Reserve(nbytes int) {
 	if b.buffer == nil {
 		b.buffer = memory.NewResizableBuffer(b.mem)
 	}
-	newCap := utils.MaxInt(b.buffer.Cap()+b.offset, 256)
+	newCap := utils.Max(b.buffer.Cap()+b.offset, 256)
 	for newCap < b.pos+nbytes+b.offset {
 		newCap = bitutil.NextPowerOf2(newCap)
 	}
diff --git a/go/parquet/internal/testutils/pagebuilder.go b/go/parquet/internal/testutils/pagebuilder.go
index 48ac331640087..525921d9631f9 100644
--- a/go/parquet/internal/testutils/pagebuilder.go
+++ b/go/parquet/internal/testutils/pagebuilder.go
@@ -75,7 +75,7 @@ func (d *DataPageBuilder) appendLevels(lvls []int16, maxLvl int16, e parquet.Enc
 func (d *DataPageBuilder) AppendDefLevels(lvls []int16, maxLvl int16) {
 	d.defLvlBytesLen = d.appendLevels(lvls, maxLvl, parquet.Encodings.RLE)
 
-	d.nvals = utils.MaxInt(len(lvls), d.nvals)
+	d.nvals = utils.Max(len(lvls), d.nvals)
 	d.defLvlEncoding = parquet.Encodings.RLE
 	d.hasDefLvls = true
 }
@@ -83,7 +83,7 @@ func (d *DataPageBuilder) AppendDefLevels(lvls []int16, maxLvl int16) {
 func (d *DataPageBuilder) AppendRepLevels(lvls []int16, maxLvl int16) {
 	d.repLvlBytesLen = d.appendLevels(lvls, maxLvl, parquet.Encodings.RLE)
 
-	d.nvals = utils.MaxInt(len(lvls), d.nvals)
+	d.nvals = utils.Max(len(lvls), d.nvals)
 	d.repLvlEncoding = parquet.Encodings.RLE
 	d.hasRepLvls = true
 }
@@ -122,7 +122,7 @@ func (d *DataPageBuilder) AppendValues(desc *schema.Column, values interface{},
 		panic(err)
 	}
 
-	d.nvals = utils.MaxInt(sz, d.nvals)
+	d.nvals = utils.Max(sz, d.nvals)
 	d.encoding = e
 	d.hasValues = true
 }
@@ -191,7 +191,7 @@ func MakeDataPage(dataPageVersion parquet.DataPageVersion, d *schema.Column, val
 		num = builder.nvals
 	} else {
 		stream.Write(indexBuffer.Bytes())
-		num = utils.MaxInt(builder.nvals, nvals)
+		num = utils.Max(builder.nvals, nvals)
 	}
 
 	buf := stream.Finish()
diff --git a/go/parquet/internal/utils/bit_reader.go b/go/parquet/internal/utils/bit_reader.go
index 0bf501e0488cf..d327be5f5253e 100644
--- a/go/parquet/internal/utils/bit_reader.go
+++ b/go/parquet/internal/utils/bit_reader.go
@@ -266,7 +266,7 @@ func (b *BitReader) GetBatchBools(out []bool) (int, error) {
 	for i < length {
 		// grab byte-aligned bits in a loop since it's more efficient than going
 		// bit by bit when you can grab 8 bools at a time.
-		unpackSize := utils.MinInt(blen, length-i) / 8 * 8
+		unpackSize := utils.Min(blen, length-i) / 8 * 8
 		n, err := b.reader.Read(buf[:bitutil.BytesForBits(int64(unpackSize))])
 		if err != nil {
 			return i, err
@@ -314,7 +314,7 @@ func (b *BitReader) GetBatch(bits uint, out []uint64) (int, error) {
 	b.reader.Seek(b.byteoffset, io.SeekStart)
 	for i < length {
 		// unpack groups of 32 bytes at a time into a buffer since it's more efficient
-		unpackSize := utils.MinInt(buflen, length-i)
+		unpackSize := utils.Min(buflen, length-i)
 		numUnpacked := unpack32(b.reader, b.unpackBuf[:unpackSize], int(bits))
 		if numUnpacked == 0 {
 			break
diff --git a/go/parquet/internal/utils/rle.go b/go/parquet/internal/utils/rle.go
index f367e7dc13cee..dffe55402b95a 100644
--- a/go/parquet/internal/utils/rle.go
+++ b/go/parquet/internal/utils/rle.go
@@ -51,7 +51,7 @@ func MaxRLEBufferSize(width, numValues int) int {
 	minRepeatedRunSize := 1 + int(bitutil.BytesForBits(int64(width)))
 	repeatedMaxSize := int(bitutil.BytesForBits(int64(numValues))) * minRepeatedRunSize
 
-	return utils.MaxInt(literalMaxSize, repeatedMaxSize)
+	return utils.Max(literalMaxSize, repeatedMaxSize)
 }
 
 // Utility classes to do run length encoding (RLE) for fixed bit width values.  If runs
@@ -370,7 +370,7 @@ func (r *RleDecoder) consumeRepeatCounts(read, batchSize, remain int, run bituti
 }
 
 func (r *RleDecoder) consumeLiteralsUint64(dc DictionaryConverter, vals []uint64, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) {
-	batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf))
+	batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf))
 	buf = buf[:batch]
 
 	n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
@@ -388,7 +388,7 @@ func (r *RleDecoder) consumeLiteralsUint64(dc DictionaryConverter, vals []uint64
 	)
 	for read < batch {
 		if run.Set {
-			updateSize := utils.MinInt(batch-read, int(run.Len))
+			updateSize := utils.Min(batch-read, int(run.Len))
 			if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil {
 				return 0, 0, run, err
 			}
diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go b/go/parquet/internal/utils/typed_rle_dict.gen.go
index 886d24564db4b..37dc49a695806 100644
--- a/go/parquet/internal/utils/typed_rle_dict.gen.go
+++ b/go/parquet/internal/utils/typed_rle_dict.gen.go
@@ -130,7 +130,7 @@ func (r *RleDecoder) getspacedInt32(dc DictionaryConverter, vals []int32, batchS
 }
 
 func (r *RleDecoder) consumeLiteralsInt32(dc DictionaryConverter, vals []int32, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) {
-	batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf))
+	batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf))
 	buf = buf[:batch]
 
 	n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
@@ -148,7 +148,7 @@ func (r *RleDecoder) consumeLiteralsInt32(dc DictionaryConverter, vals []int32,
 	)
 	for read < batch {
 		if run.Set {
-			updateSize := utils.MinInt(batch-read, int(run.Len))
+			updateSize := utils.Min(batch-read, int(run.Len))
 			if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil {
 				return 0, 0, run, err
 			}
@@ -185,7 +185,7 @@ func (r *RleDecoder) GetBatchWithDictInt32(dc DictionaryConverter, vals []int32)
 			if !dc.IsValid(idx) {
 				return read, nil
 			}
-			batch := utils.MinInt(remain, int(r.repCount))
+			batch := utils.Min(remain, int(r.repCount))
 			if err := dc.Fill(vals[:batch], idx); err != nil {
 				return read, err
 			}
@@ -193,7 +193,7 @@ func (r *RleDecoder) GetBatchWithDictInt32(dc DictionaryConverter, vals []int32)
 			read += batch
 			vals = vals[batch:]
 		case r.litCount > 0:
-			litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024)
+			litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024)
 			buf := indexbuffer[:litbatch]
 			n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
 			if n != litbatch {
@@ -323,7 +323,7 @@ func (r *RleDecoder) getspacedInt64(dc DictionaryConverter, vals []int64, batchS
 }
 
 func (r *RleDecoder) consumeLiteralsInt64(dc DictionaryConverter, vals []int64, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) {
-	batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf))
+	batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf))
 	buf = buf[:batch]
 
 	n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
@@ -341,7 +341,7 @@ func (r *RleDecoder) consumeLiteralsInt64(dc DictionaryConverter, vals []int64,
 	)
 	for read < batch {
 		if run.Set {
-			updateSize := utils.MinInt(batch-read, int(run.Len))
+			updateSize := utils.Min(batch-read, int(run.Len))
 			if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil {
 				return 0, 0, run, err
 			}
@@ -378,7 +378,7 @@ func (r *RleDecoder) GetBatchWithDictInt64(dc DictionaryConverter, vals []int64)
 			if !dc.IsValid(idx) {
 				return read, nil
 			}
-			batch := utils.MinInt(remain, int(r.repCount))
+			batch := utils.Min(remain, int(r.repCount))
 			if err := dc.Fill(vals[:batch], idx); err != nil {
 				return read, err
 			}
@@ -386,7 +386,7 @@ func (r *RleDecoder) GetBatchWithDictInt64(dc DictionaryConverter, vals []int64)
 			read += batch
 			vals = vals[batch:]
 		case r.litCount > 0:
-			litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024)
+			litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024)
 			buf := indexbuffer[:litbatch]
 			n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
 			if n != litbatch {
@@ -516,7 +516,7 @@ func (r *RleDecoder) getspacedInt96(dc DictionaryConverter, vals []parquet.Int96
 }
 
 func (r *RleDecoder) consumeLiteralsInt96(dc DictionaryConverter, vals []parquet.Int96, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) {
-	batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf))
+	batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf))
 	buf = buf[:batch]
 
 	n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
@@ -534,7 +534,7 @@ func (r *RleDecoder) consumeLiteralsInt96(dc DictionaryConverter, vals []parquet
 	)
 	for read < batch {
 		if run.Set {
-			updateSize := utils.MinInt(batch-read, int(run.Len))
+			updateSize := utils.Min(batch-read, int(run.Len))
 			if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil {
 				return 0, 0, run, err
 			}
@@ -571,7 +571,7 @@ func (r *RleDecoder) GetBatchWithDictInt96(dc DictionaryConverter, vals []parque
 			if !dc.IsValid(idx) {
 				return read, nil
 			}
-			batch := utils.MinInt(remain, int(r.repCount))
+			batch := utils.Min(remain, int(r.repCount))
 			if err := dc.Fill(vals[:batch], idx); err != nil {
 				return read, err
 			}
@@ -579,7 +579,7 @@ func (r *RleDecoder) GetBatchWithDictInt96(dc DictionaryConverter, vals []parque
 			read += batch
 			vals = vals[batch:]
 		case r.litCount > 0:
-			litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024)
+			litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024)
 			buf := indexbuffer[:litbatch]
 			n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
 			if n != litbatch {
@@ -709,7 +709,7 @@ func (r *RleDecoder) getspacedFloat32(dc DictionaryConverter, vals []float32, ba
 }
 
 func (r *RleDecoder) consumeLiteralsFloat32(dc DictionaryConverter, vals []float32, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) {
-	batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf))
+	batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf))
 	buf = buf[:batch]
 
 	n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
@@ -727,7 +727,7 @@ func (r *RleDecoder) consumeLiteralsFloat32(dc DictionaryConverter, vals []float
 	)
 	for read < batch {
 		if run.Set {
-			updateSize := utils.MinInt(batch-read, int(run.Len))
+			updateSize := utils.Min(batch-read, int(run.Len))
 			if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil {
 				return 0, 0, run, err
 			}
@@ -764,7 +764,7 @@ func (r *RleDecoder) GetBatchWithDictFloat32(dc DictionaryConverter, vals []floa
 			if !dc.IsValid(idx) {
 				return read, nil
 			}
-			batch := utils.MinInt(remain, int(r.repCount))
+			batch := utils.Min(remain, int(r.repCount))
 			if err := dc.Fill(vals[:batch], idx); err != nil {
 				return read, err
 			}
@@ -772,7 +772,7 @@ func (r *RleDecoder) GetBatchWithDictFloat32(dc DictionaryConverter, vals []floa
 			read += batch
 			vals = vals[batch:]
 		case r.litCount > 0:
-			litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024)
+			litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024)
 			buf := indexbuffer[:litbatch]
 			n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
 			if n != litbatch {
@@ -902,7 +902,7 @@ func (r *RleDecoder) getspacedFloat64(dc DictionaryConverter, vals []float64, ba
 }
 
 func (r *RleDecoder) consumeLiteralsFloat64(dc DictionaryConverter, vals []float64, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) {
-	batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf))
+	batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf))
 	buf = buf[:batch]
 
 	n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
@@ -920,7 +920,7 @@ func (r *RleDecoder) consumeLiteralsFloat64(dc DictionaryConverter, vals []float
 	)
 	for read < batch {
 		if run.Set {
-			updateSize := utils.MinInt(batch-read, int(run.Len))
+			updateSize := utils.Min(batch-read, int(run.Len))
 			if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil {
 				return 0, 0, run, err
 			}
@@ -957,7 +957,7 @@ func (r *RleDecoder) GetBatchWithDictFloat64(dc DictionaryConverter, vals []floa
 			if !dc.IsValid(idx) {
 				return read, nil
 			}
-			batch := utils.MinInt(remain, int(r.repCount))
+			batch := utils.Min(remain, int(r.repCount))
 			if err := dc.Fill(vals[:batch], idx); err != nil {
 				return read, err
 			}
@@ -965,7 +965,7 @@ func (r *RleDecoder) GetBatchWithDictFloat64(dc DictionaryConverter, vals []floa
 			read += batch
 			vals = vals[batch:]
 		case r.litCount > 0:
-			litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024)
+			litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024)
 			buf := indexbuffer[:litbatch]
 			n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
 			if n != litbatch {
@@ -1095,7 +1095,7 @@ func (r *RleDecoder) getspacedByteArray(dc DictionaryConverter, vals []parquet.B
 }
 
 func (r *RleDecoder) consumeLiteralsByteArray(dc DictionaryConverter, vals []parquet.ByteArray, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) {
-	batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf))
+	batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf))
 	buf = buf[:batch]
 
 	n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
@@ -1113,7 +1113,7 @@ func (r *RleDecoder) consumeLiteralsByteArray(dc DictionaryConverter, vals []par
 	)
 	for read < batch {
 		if run.Set {
-			updateSize := utils.MinInt(batch-read, int(run.Len))
+			updateSize := utils.Min(batch-read, int(run.Len))
 			if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil {
 				return 0, 0, run, err
 			}
@@ -1150,7 +1150,7 @@ func (r *RleDecoder) GetBatchWithDictByteArray(dc DictionaryConverter, vals []pa
 			if !dc.IsValid(idx) {
 				return read, nil
 			}
-			batch := utils.MinInt(remain, int(r.repCount))
+			batch := utils.Min(remain, int(r.repCount))
 			if err := dc.Fill(vals[:batch], idx); err != nil {
 				return read, err
 			}
@@ -1158,7 +1158,7 @@ func (r *RleDecoder) GetBatchWithDictByteArray(dc DictionaryConverter, vals []pa
 			read += batch
 			vals = vals[batch:]
 		case r.litCount > 0:
-			litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024)
+			litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024)
 			buf := indexbuffer[:litbatch]
 			n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
 			if n != litbatch {
@@ -1288,7 +1288,7 @@ func (r *RleDecoder) getspacedFixedLenByteArray(dc DictionaryConverter, vals []p
 }
 
 func (r *RleDecoder) consumeLiteralsFixedLenByteArray(dc DictionaryConverter, vals []parquet.FixedLenByteArray, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) {
-	batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf))
+	batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf))
 	buf = buf[:batch]
 
 	n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
@@ -1306,7 +1306,7 @@ func (r *RleDecoder) consumeLiteralsFixedLenByteArray(dc DictionaryConverter, va
 	)
 	for read < batch {
 		if run.Set {
-			updateSize := utils.MinInt(batch-read, int(run.Len))
+			updateSize := utils.Min(batch-read, int(run.Len))
 			if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil {
 				return 0, 0, run, err
 			}
@@ -1343,7 +1343,7 @@ func (r *RleDecoder) GetBatchWithDictFixedLenByteArray(dc DictionaryConverter, v
 			if !dc.IsValid(idx) {
 				return read, nil
 			}
-			batch := utils.MinInt(remain, int(r.repCount))
+			batch := utils.Min(remain, int(r.repCount))
 			if err := dc.Fill(vals[:batch], idx); err != nil {
 				return read, err
 			}
@@ -1351,7 +1351,7 @@ func (r *RleDecoder) GetBatchWithDictFixedLenByteArray(dc DictionaryConverter, v
 			read += batch
 			vals = vals[batch:]
 		case r.litCount > 0:
-			litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024)
+			litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024)
 			buf := indexbuffer[:litbatch]
 			n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
 			if n != litbatch {
diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl
index abcb419055a92..88c7dd979ebf1 100644
--- a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl
+++ b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl
@@ -129,7 +129,7 @@ func (r *RleDecoder) getspaced{{.Name}}(dc DictionaryConverter, vals []{{.name}}
 }
 
 func (r *RleDecoder) consumeLiterals{{.Name}}(dc DictionaryConverter, vals []{{.name}}, remain int, buf []IndexType, run bitutils.BitRun, bitRdr bitutils.BitRunReader) (int, int, bitutils.BitRun, error) {
-  batch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), len(buf))
+  batch := utils.Min(utils.Min(remain, int(r.litCount)), len(buf))
 	buf = buf[:batch]
 
 	n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
@@ -147,7 +147,7 @@ func (r *RleDecoder) consumeLiterals{{.Name}}(dc DictionaryConverter, vals []{{.
 	)
 	for read < batch {
 		if run.Set {
-			updateSize := utils.MinInt(batch-read, int(run.Len))
+			updateSize := utils.Min(batch-read, int(run.Len))
 			if err := dc.Copy(vals, buf[read:read+updateSize]); err != nil {
 				return 0, 0, run, err
 			}
@@ -184,7 +184,7 @@ func (r *RleDecoder) GetBatchWithDict{{.Name}}(dc DictionaryConverter, vals []{{
       if !dc.IsValid(idx) {
         return read, nil
       }
-      batch := utils.MinInt(remain, int(r.repCount))
+      batch := utils.Min(remain, int(r.repCount))
       if err := dc.Fill(vals[:batch], idx); err != nil {
         return read, err
       }
@@ -192,7 +192,7 @@ func (r *RleDecoder) GetBatchWithDict{{.Name}}(dc DictionaryConverter, vals []{{
       read += batch
       vals = vals[batch:]
     case r.litCount > 0:
-      litbatch := utils.MinInt(utils.MinInt(remain, int(r.litCount)), 1024)
+      litbatch := utils.Min(utils.Min(remain, int(r.litCount)), 1024)
       buf := indexbuffer[:litbatch]
       n, _ := r.r.GetBatchIndex(uint(r.bitWidth), buf)
       if n != litbatch {
diff --git a/go/parquet/pqarrow/column_readers.go b/go/parquet/pqarrow/column_readers.go
index 3c38aba5c32a6..a403b2196a80c 100644
--- a/go/parquet/pqarrow/column_readers.go
+++ b/go/parquet/pqarrow/column_readers.go
@@ -790,7 +790,7 @@ func bigEndianToDecimal128(buf []byte) (decimal128.Num, error) {
 	isNeg := int8(buf[0]) < 0
 
 	// 1. extract high bits
-	highBitsOffset := utils.MaxInt(0, len(buf)-8)
+	highBitsOffset := utils.Max(0, len(buf)-8)
 	var (
 		highBits uint64
 		lowBits  uint64
@@ -811,7 +811,7 @@ func bigEndianToDecimal128(buf []byte) (decimal128.Num, error) {
 	}
 
 	// 2. extract lower bits
-	lowBitsOffset := utils.MinInt(len(buf), 8)
+	lowBitsOffset := utils.Min(len(buf), 8)
 	lowBits = uint64FromBigEndianShifted(buf[highBitsOffset:])
 
 	if lowBitsOffset == 8 {
@@ -850,7 +850,7 @@ func bigEndianToDecimal256(buf []byte) (decimal256.Num, error) {
 	}
 
 	for wordIdx := 0; wordIdx < 4; wordIdx++ {
-		wordLen := utils.MinInt(len(buf), arrow.Uint64SizeBytes)
+		wordLen := utils.Min(len(buf), arrow.Uint64SizeBytes)
 		word := buf[len(buf)-wordLen:]
 
 		if wordLen == 8 {

From 6c326db6a5686a78bc77be662b61236ddbfc66dc Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Tue, 19 Dec 2023 19:58:29 +0100
Subject: [PATCH 073/570] GH-33984: [C++][Python] DLPack implementation for
 Arrow Arrays (producer) (#38472)

### Rationale for this change

DLPack is selected for Array API protocol so it is important to have it implemented for Arrow/PyArrow Arrays also. This is possible for primitive type arrays (int, uint and float) with no validity buffer. Device support is not in scope of this PR (CPU only).

### What changes are included in this PR?

- `ExportArray` and `ExportDevice` methods on Arrow C++ Arrays
- `__dlpack__` method on the base PyArrow Array class exposing `ExportArray` method
-  `__dlpack_device__` method on the base PyArrow Array class exposing `ExportDevice` method

### Are these changes tested?

Yes, tests are added to `dlpack_test.cc` and `test_array.py`.

### Are there any user-facing changes?

No.

* Closes: #33984

Lead-authored-by: AlenkaF <frim.alenka@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/CMakeLists.txt                |   1 +
 cpp/src/arrow/c/CMakeLists.txt              |   1 +
 cpp/src/arrow/c/dlpack.cc                   | 133 ++++++++
 cpp/src/arrow/c/dlpack.h                    |  51 ++++
 cpp/src/arrow/c/dlpack_abi.h                | 321 ++++++++++++++++++++
 cpp/src/arrow/c/dlpack_test.cc              | 129 ++++++++
 dev/release/rat_exclude_files.txt           |   1 +
 docs/source/python/dlpack.rst               |  93 ++++++
 docs/source/python/index.rst                |   1 +
 docs/source/python/interchange_protocol.rst |   6 +-
 python/pyarrow/_dlpack.pxi                  |  46 +++
 python/pyarrow/array.pxi                    |  38 +++
 python/pyarrow/includes/libarrow.pxd        |  19 ++
 python/pyarrow/lib.pyx                      |   3 +
 python/pyarrow/tests/test_dlpack.py         | 142 +++++++++
 15 files changed, 982 insertions(+), 3 deletions(-)
 create mode 100644 cpp/src/arrow/c/dlpack.cc
 create mode 100644 cpp/src/arrow/c/dlpack.h
 create mode 100644 cpp/src/arrow/c/dlpack_abi.h
 create mode 100644 cpp/src/arrow/c/dlpack_test.cc
 create mode 100644 docs/source/python/dlpack.rst
 create mode 100644 python/pyarrow/_dlpack.pxi
 create mode 100644 python/pyarrow/tests/test_dlpack.py

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 46a7aa910633d..00947c6275678 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -192,6 +192,7 @@ set(ARROW_SRCS
     type_traits.cc
     visitor.cc
     c/bridge.cc
+    c/dlpack.cc
     io/buffered.cc
     io/caching.cc
     io/compressed.cc
diff --git a/cpp/src/arrow/c/CMakeLists.txt b/cpp/src/arrow/c/CMakeLists.txt
index 3765477ba09cd..81a81cd3f1103 100644
--- a/cpp/src/arrow/c/CMakeLists.txt
+++ b/cpp/src/arrow/c/CMakeLists.txt
@@ -16,6 +16,7 @@
 # under the License.
 
 add_arrow_test(bridge_test PREFIX "arrow-c")
+add_arrow_test(dlpack_test)
 
 add_arrow_benchmark(bridge_benchmark)
 
diff --git a/cpp/src/arrow/c/dlpack.cc b/cpp/src/arrow/c/dlpack.cc
new file mode 100644
index 0000000000000..13ee2761b0c11
--- /dev/null
+++ b/cpp/src/arrow/c/dlpack.cc
@@ -0,0 +1,133 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/c/dlpack.h"
+
+#include "arrow/array/array_base.h"
+#include "arrow/c/dlpack_abi.h"
+#include "arrow/device.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+
+namespace arrow::dlpack {
+
+namespace {
+
+Result<DLDataType> GetDLDataType(const DataType& type) {
+  DLDataType dtype;
+  dtype.lanes = 1;
+  dtype.bits = type.bit_width();
+  switch (type.id()) {
+    case Type::INT8:
+    case Type::INT16:
+    case Type::INT32:
+    case Type::INT64:
+      dtype.code = DLDataTypeCode::kDLInt;
+      return dtype;
+    case Type::UINT8:
+    case Type::UINT16:
+    case Type::UINT32:
+    case Type::UINT64:
+      dtype.code = DLDataTypeCode::kDLUInt;
+      return dtype;
+    case Type::HALF_FLOAT:
+    case Type::FLOAT:
+    case Type::DOUBLE:
+      dtype.code = DLDataTypeCode::kDLFloat;
+      return dtype;
+    case Type::BOOL:
+      // DLPack supports byte-packed boolean values
+      return Status::TypeError("Bit-packed boolean data type not supported by DLPack.");
+    default:
+      return Status::TypeError("DataType is not compatible with DLPack spec: ",
+                               type.ToString());
+  }
+}
+
+struct ManagerCtx {
+  std::shared_ptr<ArrayData> array;
+  DLManagedTensor tensor;
+};
+
+}  // namespace
+
+Result<DLManagedTensor*> ExportArray(const std::shared_ptr<Array>& arr) {
+  // Define DLDevice struct nad check if array type is supported
+  // by the DLPack protocol at the same time. Raise TypeError if not.
+  // Supported data types: int, uint, float with no validity buffer.
+  ARROW_ASSIGN_OR_RAISE(auto device, ExportDevice(arr))
+
+  // Define the DLDataType struct
+  const DataType& type = *arr->type();
+  std::shared_ptr<ArrayData> data = arr->data();
+  ARROW_ASSIGN_OR_RAISE(auto dlpack_type, GetDLDataType(type));
+
+  // Create ManagerCtx that will serve as the owner of the DLManagedTensor
+  std::unique_ptr<ManagerCtx> ctx(new ManagerCtx);
+
+  // Define the data pointer to the DLTensor
+  // If array is of length 0, data pointer should be NULL
+  if (arr->length() == 0) {
+    ctx->tensor.dl_tensor.data = NULL;
+  } else {
+    const auto data_offset = data->offset * type.byte_width();
+    ctx->tensor.dl_tensor.data =
+        const_cast<uint8_t*>(data->buffers[1]->data() + data_offset);
+  }
+
+  ctx->tensor.dl_tensor.device = device;
+  ctx->tensor.dl_tensor.ndim = 1;
+  ctx->tensor.dl_tensor.dtype = dlpack_type;
+  ctx->tensor.dl_tensor.shape = const_cast<int64_t*>(&data->length);
+  ctx->tensor.dl_tensor.strides = NULL;
+  ctx->tensor.dl_tensor.byte_offset = 0;
+
+  ctx->array = std::move(data);
+  ctx->tensor.manager_ctx = ctx.get();
+  ctx->tensor.deleter = [](struct DLManagedTensor* self) {
+    delete reinterpret_cast<ManagerCtx*>(self->manager_ctx);
+  };
+  return &ctx.release()->tensor;
+}
+
+Result<DLDevice> ExportDevice(const std::shared_ptr<Array>& arr) {
+  // Check if array is supported by the DLPack protocol.
+  if (arr->null_count() > 0) {
+    return Status::TypeError("Can only use DLPack on arrays with no nulls.");
+  }
+  const DataType& type = *arr->type();
+  if (type.id() == Type::BOOL) {
+    return Status::TypeError("Bit-packed boolean data type not supported by DLPack.");
+  }
+  if (!is_integer(type.id()) && !is_floating(type.id())) {
+    return Status::TypeError("DataType is not compatible with DLPack spec: ",
+                             type.ToString());
+  }
+
+  // Define DLDevice struct
+  DLDevice device;
+  if (arr->data()->buffers[1]->device_type() == DeviceAllocationType::kCPU) {
+    device.device_id = 0;
+    device.device_type = DLDeviceType::kDLCPU;
+    return device;
+  } else {
+    return Status::NotImplemented(
+        "DLPack support is implemented only for buffers on CPU device.");
+  }
+}
+
+}  // namespace arrow::dlpack
diff --git a/cpp/src/arrow/c/dlpack.h b/cpp/src/arrow/c/dlpack.h
new file mode 100644
index 0000000000000..d11ccfc1fd722
--- /dev/null
+++ b/cpp/src/arrow/c/dlpack.h
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/array/array_base.h"
+#include "arrow/c/dlpack_abi.h"
+
+namespace arrow::dlpack {
+
+/// \brief Export Arrow array as DLPack tensor.
+///
+/// DLMangedTensor is produced as defined by the DLPack protocol,
+/// see https://dmlc.github.io/dlpack/latest/.
+///
+/// Data types for which the protocol is supported are
+/// integer and floating-point data types.
+///
+/// DLPack protocol only supports arrays with one contiguous
+/// memory region which means Arrow Arrays with validity buffers
+/// are not supported.
+///
+/// \param[in] arr Arrow array
+/// \return DLManagedTensor struct
+ARROW_EXPORT
+Result<DLManagedTensor*> ExportArray(const std::shared_ptr<Array>& arr);
+
+/// \brief Get DLDevice with enumerator specifying the
+/// type of the device data is stored on and index of the
+/// device which is 0 by default for CPU.
+///
+/// \param[in] arr Arrow array
+/// \return DLDevice struct
+ARROW_EXPORT
+Result<DLDevice> ExportDevice(const std::shared_ptr<Array>& arr);
+
+}  // namespace arrow::dlpack
diff --git a/cpp/src/arrow/c/dlpack_abi.h b/cpp/src/arrow/c/dlpack_abi.h
new file mode 100644
index 0000000000000..4af557a7ed5d7
--- /dev/null
+++ b/cpp/src/arrow/c/dlpack_abi.h
@@ -0,0 +1,321 @@
+// Taken from:
+// https://github.com/dmlc/dlpack/blob/ca4d00ad3e2e0f410eeab3264d21b8a39397f362/include/dlpack/dlpack.h
+/*!
+ *  Copyright (c) 2017 by Contributors
+ * \file dlpack.h
+ * \brief The common header of DLPack.
+ */
+#ifndef DLPACK_DLPACK_H_
+#define DLPACK_DLPACK_H_
+
+/**
+ * \brief Compatibility with C++
+ */
+#ifdef __cplusplus
+#define DLPACK_EXTERN_C extern "C"
+#else
+#define DLPACK_EXTERN_C
+#endif
+
+/*! \brief The current major version of dlpack */
+#define DLPACK_MAJOR_VERSION 1
+
+/*! \brief The current minor version of dlpack */
+#define DLPACK_MINOR_VERSION 0
+
+/*! \brief DLPACK_DLL prefix for windows */
+#ifdef _WIN32
+#ifdef DLPACK_EXPORTS
+#define DLPACK_DLL __declspec(dllexport)
+#else
+#define DLPACK_DLL __declspec(dllimport)
+#endif
+#else
+#define DLPACK_DLL
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*!
+ * \brief The DLPack version.
+ *
+ * A change in major version indicates that we have changed the
+ * data layout of the ABI - DLManagedTensorVersioned.
+ *
+ * A change in minor version indicates that we have added new
+ * code, such as a new device type, but the ABI is kept the same.
+ *
+ * If an obtained DLPack tensor has a major version that disagrees
+ * with the version number specified in this header file
+ * (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
+ * (and it is safe to do so). It is not safe to access any other fields
+ * as the memory layout will have changed.
+ *
+ * In the case of a minor version mismatch, the tensor can be safely used as
+ * long as the consumer knows how to interpret all fields. Minor version
+ * updates indicate the addition of enumeration values.
+ */
+typedef struct {
+  /*! \brief DLPack major version. */
+  uint32_t major;
+  /*! \brief DLPack minor version. */
+  uint32_t minor;
+} DLPackVersion;
+
+/*!
+ * \brief The device type in DLDevice.
+ */
+#ifdef __cplusplus
+typedef enum : int32_t {
+#else
+typedef enum {
+#endif
+  /*! \brief CPU device */
+  kDLCPU = 1,
+  /*! \brief CUDA GPU device */
+  kDLCUDA = 2,
+  /*!
+   * \brief Pinned CUDA CPU memory by cudaMallocHost
+   */
+  kDLCUDAHost = 3,
+  /*! \brief OpenCL devices. */
+  kDLOpenCL = 4,
+  /*! \brief Vulkan buffer for next generation graphics. */
+  kDLVulkan = 7,
+  /*! \brief Metal for Apple GPU. */
+  kDLMetal = 8,
+  /*! \brief Verilog simulator buffer */
+  kDLVPI = 9,
+  /*! \brief ROCm GPUs for AMD GPUs */
+  kDLROCM = 10,
+  /*!
+   * \brief Pinned ROCm CPU memory allocated by hipMallocHost
+   */
+  kDLROCMHost = 11,
+  /*!
+   * \brief Reserved extension device type,
+   * used for quickly test extension device
+   * The semantics can differ depending on the implementation.
+   */
+  kDLExtDev = 12,
+  /*!
+   * \brief CUDA managed/unified memory allocated by cudaMallocManaged
+   */
+  kDLCUDAManaged = 13,
+  /*!
+   * \brief Unified shared memory allocated on a oneAPI non-partititioned
+   * device. Call to oneAPI runtime is required to determine the device
+   * type, the USM allocation type and the sycl context it is bound to.
+   *
+   */
+  kDLOneAPI = 14,
+  /*! \brief GPU support for next generation WebGPU standard. */
+  kDLWebGPU = 15,
+  /*! \brief Qualcomm Hexagon DSP */
+  kDLHexagon = 16,
+} DLDeviceType;
+
+/*!
+ * \brief A Device for Tensor and operator.
+ */
+typedef struct {
+  /*! \brief The device type used in the device. */
+  DLDeviceType device_type;
+  /*!
+   * \brief The device index.
+   * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
+   */
+  int32_t device_id;
+} DLDevice;
+
+/*!
+ * \brief The type code options DLDataType.
+ */
+typedef enum {
+  /*! \brief signed integer */
+  kDLInt = 0U,
+  /*! \brief unsigned integer */
+  kDLUInt = 1U,
+  /*! \brief IEEE floating point */
+  kDLFloat = 2U,
+  /*!
+   * \brief Opaque handle type, reserved for testing purposes.
+   * Frameworks need to agree on the handle data type for the exchange to be well-defined.
+   */
+  kDLOpaqueHandle = 3U,
+  /*! \brief bfloat16 */
+  kDLBfloat = 4U,
+  /*!
+   * \brief complex number
+   * (C/C++/Python layout: compact struct per complex number)
+   */
+  kDLComplex = 5U,
+  /*! \brief boolean */
+  kDLBool = 6U,
+} DLDataTypeCode;
+
+/*!
+ * \brief The data type the tensor can hold. The data type is assumed to follow the
+ * native endian-ness. An explicit error message should be raised when attempting to
+ * export an array with non-native endianness
+ *
+ *  Examples
+ *   - float: type_code = 2, bits = 32, lanes = 1
+ *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
+ *   - int8: type_code = 0, bits = 8, lanes = 1
+ *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
+ *   - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention,
+ * the underlying storage size of bool is 8 bits)
+ */
+typedef struct {
+  /*!
+   * \brief Type code of base types.
+   * We keep it uint8_t instead of DLDataTypeCode for minimal memory
+   * footprint, but the value should be one of DLDataTypeCode enum values.
+   * */
+  uint8_t code;
+  /*!
+   * \brief Number of bits, common choices are 8, 16, 32.
+   */
+  uint8_t bits;
+  /*! \brief Number of lanes in the type, used for vector types. */
+  uint16_t lanes;
+} DLDataType;
+
+/*!
+ * \brief Plain C Tensor object, does not manage memory.
+ */
+typedef struct {
+  /*!
+   * \brief The data pointer points to the allocated data. This will be CUDA
+   * device pointer or cl_mem handle in OpenCL. It may be opaque on some device
+   * types. This pointer is always aligned to 256 bytes as in CUDA. The
+   * `byte_offset` field should be used to point to the beginning of the data.
+   *
+   * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
+   * TVM, perhaps others) do not adhere to this 256 byte aligment requirement
+   * on CPU/CUDA/ROCm, and always use `byte_offset=0`.  This must be fixed
+   * (after which this note will be updated); at the moment it is recommended
+   * to not rely on the data pointer being correctly aligned.
+   *
+   * For given DLTensor, the size of memory required to store the contents of
+   * data is calculated as follows:
+   *
+   * \code{.c}
+   * static inline size_t GetDataSize(const DLTensor* t) {
+   *   size_t size = 1;
+   *   for (tvm_index_t i = 0; i < t->ndim; ++i) {
+   *     size *= t->shape[i];
+   *   }
+   *   size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
+   *   return size;
+   * }
+   * \endcode
+   */
+  void* data;
+  /*! \brief The device of the tensor */
+  DLDevice device;
+  /*! \brief Number of dimensions */
+  int32_t ndim;
+  /*! \brief The data type of the pointer*/
+  DLDataType dtype;
+  /*! \brief The shape of the tensor */
+  int64_t* shape;
+  /*!
+   * \brief strides of the tensor (in number of elements, not bytes)
+   *  can be NULL, indicating tensor is compact and row-majored.
+   */
+  int64_t* strides;
+  /*! \brief The offset in bytes to the beginning pointer to data */
+  uint64_t byte_offset;
+} DLTensor;
+
+/*!
+ * \brief C Tensor object, manage memory of DLTensor. This data structure is
+ *  intended to facilitate the borrowing of DLTensor by another framework. It is
+ *  not meant to transfer the tensor. When the borrowing framework doesn't need
+ *  the tensor, it should call the deleter to notify the host that the resource
+ *  is no longer needed.
+ *
+ * \note This data structure is used as Legacy DLManagedTensor
+ *       in DLPack exchange and is deprecated after DLPack v0.8
+ *       Use DLManagedTensorVersioned instead.
+ *       This data structure may get renamed or deleted in future versions.
+ *
+ * \sa DLManagedTensorVersioned
+ */
+typedef struct DLManagedTensor {
+  /*! \brief DLTensor which is being memory managed */
+  DLTensor dl_tensor;
+  /*! \brief the context of the original host framework of DLManagedTensor in
+   *   which DLManagedTensor is used in the framework. It can also be NULL.
+   */
+  void* manager_ctx;
+  /*!
+   * \brief Destructor - this should be called
+   * to destruct the manager_ctx  which backs the DLManagedTensor. It can be
+   * NULL if there is no way for the caller to provide a reasonable destructor.
+   * The destructors deletes the argument self as well.
+   */
+  void (*deleter)(struct DLManagedTensor* self);
+} DLManagedTensor;
+
+// bit masks used in in the DLManagedTensorVersioned
+
+/*! \brief bit mask to indicate that the tensor is read only. */
+#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
+
+/*!
+ * \brief A versioned and managed C Tensor object, manage memory of DLTensor.
+ *
+ * This data structure is intended to facilitate the borrowing of DLTensor by
+ * another framework. It is not meant to transfer the tensor. When the borrowing
+ * framework doesn't need the tensor, it should call the deleter to notify the
+ * host that the resource is no longer needed.
+ *
+ * \note This is the current standard DLPack exchange data structure.
+ */
+struct DLManagedTensorVersioned {
+  /*!
+   * \brief The API and ABI version of the current managed Tensor
+   */
+  DLPackVersion version;
+  /*!
+   * \brief the context of the original host framework.
+   *
+   * Stores DLManagedTensorVersioned is used in the
+   * framework. It can also be NULL.
+   */
+  void* manager_ctx;
+  /*!
+   * \brief Destructor.
+   *
+   * This should be called to destruct manager_ctx which holds the
+   * DLManagedTensorVersioned. It can be NULL if there is no way for the caller to provide
+   * a reasonable destructor. The destructors deletes the argument self as well.
+   */
+  void (*deleter)(struct DLManagedTensorVersioned* self);
+  /*!
+   * \brief Additional bitmask flags information about the tensor.
+   *
+   * By default the flags should be set to 0.
+   *
+   * \note Future ABI changes should keep everything until this field
+   *       stable, to ensure that deleter can be correctly called.
+   *
+   * \sa DLPACK_FLAG_BITMASK_READ_ONLY
+   */
+  uint64_t flags;
+  /*! \brief DLTensor which is being memory managed */
+  DLTensor dl_tensor;
+};
+
+#ifdef __cplusplus
+}  // DLPACK_EXTERN_C
+#endif
+#endif  // DLPACK_DLPACK_H_
diff --git a/cpp/src/arrow/c/dlpack_test.cc b/cpp/src/arrow/c/dlpack_test.cc
new file mode 100644
index 0000000000000..3136506bf39ad
--- /dev/null
+++ b/cpp/src/arrow/c/dlpack_test.cc
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/array/array_base.h"
+#include "arrow/c/dlpack.h"
+#include "arrow/c/dlpack_abi.h"
+#include "arrow/memory_pool.h"
+#include "arrow/testing/gtest_util.h"
+
+namespace arrow::dlpack {
+
+class TestExportArray : public ::testing::Test {
+ public:
+  void SetUp() {}
+};
+
+void CheckDLTensor(const std::shared_ptr<Array>& arr,
+                   const std::shared_ptr<DataType>& arrow_type,
+                   DLDataTypeCode dlpack_type, int64_t length) {
+  ASSERT_OK_AND_ASSIGN(auto dlmtensor, arrow::dlpack::ExportArray(arr));
+  auto dltensor = dlmtensor->dl_tensor;
+
+  const auto byte_width = arr->type()->byte_width();
+  const auto start = arr->offset() * byte_width;
+  ASSERT_OK_AND_ASSIGN(auto sliced_buffer,
+                       SliceBufferSafe(arr->data()->buffers[1], start));
+  ASSERT_EQ(sliced_buffer->data(), dltensor.data);
+
+  ASSERT_EQ(0, dltensor.byte_offset);
+  ASSERT_EQ(NULL, dltensor.strides);
+  ASSERT_EQ(length, dltensor.shape[0]);
+  ASSERT_EQ(1, dltensor.ndim);
+
+  ASSERT_EQ(dlpack_type, dltensor.dtype.code);
+
+  ASSERT_EQ(arrow_type->bit_width(), dltensor.dtype.bits);
+  ASSERT_EQ(1, dltensor.dtype.lanes);
+  ASSERT_EQ(DLDeviceType::kDLCPU, dltensor.device.device_type);
+  ASSERT_EQ(0, dltensor.device.device_id);
+
+  ASSERT_OK_AND_ASSIGN(auto device, arrow::dlpack::ExportDevice(arr));
+  ASSERT_EQ(DLDeviceType::kDLCPU, device.device_type);
+  ASSERT_EQ(0, device.device_id);
+
+  dlmtensor->deleter(dlmtensor);
+}
+
+TEST_F(TestExportArray, TestSupportedArray) {
+  const std::vector<std::pair<std::shared_ptr<DataType>, DLDataTypeCode>> cases = {
+      {int8(), DLDataTypeCode::kDLInt},
+      {uint8(), DLDataTypeCode::kDLUInt},
+      {
+          int16(),
+          DLDataTypeCode::kDLInt,
+      },
+      {uint16(), DLDataTypeCode::kDLUInt},
+      {
+          int32(),
+          DLDataTypeCode::kDLInt,
+      },
+      {uint32(), DLDataTypeCode::kDLUInt},
+      {
+          int64(),
+          DLDataTypeCode::kDLInt,
+      },
+      {uint64(), DLDataTypeCode::kDLUInt},
+      {float16(), DLDataTypeCode::kDLFloat},
+      {float32(), DLDataTypeCode::kDLFloat},
+      {float64(), DLDataTypeCode::kDLFloat}};
+
+  const auto allocated_bytes = arrow::default_memory_pool()->bytes_allocated();
+
+  for (auto [arrow_type, dlpack_type] : cases) {
+    const std::shared_ptr<Array> array =
+        ArrayFromJSON(arrow_type, "[1, 0, 10, 0, 2, 1, 3, 5, 1, 0]");
+    CheckDLTensor(array, arrow_type, dlpack_type, 10);
+    ASSERT_OK_AND_ASSIGN(auto sliced_1, array->SliceSafe(1, 5));
+    CheckDLTensor(sliced_1, arrow_type, dlpack_type, 5);
+    ASSERT_OK_AND_ASSIGN(auto sliced_2, array->SliceSafe(0, 5));
+    CheckDLTensor(sliced_2, arrow_type, dlpack_type, 5);
+    ASSERT_OK_AND_ASSIGN(auto sliced_3, array->SliceSafe(3));
+    CheckDLTensor(sliced_3, arrow_type, dlpack_type, 7);
+  }
+
+  ASSERT_EQ(allocated_bytes, arrow::default_memory_pool()->bytes_allocated());
+}
+
+TEST_F(TestExportArray, TestErrors) {
+  const std::shared_ptr<Array> array_null = ArrayFromJSON(null(), "[]");
+  ASSERT_RAISES_WITH_MESSAGE(TypeError,
+                             "Type error: DataType is not compatible with DLPack spec: " +
+                                 array_null->type()->ToString(),
+                             arrow::dlpack::ExportArray(array_null));
+
+  const std::shared_ptr<Array> array_with_null = ArrayFromJSON(int8(), "[1, 100, null]");
+  ASSERT_RAISES_WITH_MESSAGE(TypeError,
+                             "Type error: Can only use DLPack on arrays with no nulls.",
+                             arrow::dlpack::ExportArray(array_with_null));
+
+  const std::shared_ptr<Array> array_string =
+      ArrayFromJSON(utf8(), R"(["itsy", "bitsy", "spider"])");
+  ASSERT_RAISES_WITH_MESSAGE(TypeError,
+                             "Type error: DataType is not compatible with DLPack spec: " +
+                                 array_string->type()->ToString(),
+                             arrow::dlpack::ExportArray(array_string));
+
+  const std::shared_ptr<Array> array_boolean = ArrayFromJSON(boolean(), "[true, false]");
+  ASSERT_RAISES_WITH_MESSAGE(
+      TypeError, "Type error: Bit-packed boolean data type not supported by DLPack.",
+      arrow::dlpack::ExportDevice(array_boolean));
+}
+
+}  // namespace arrow::dlpack
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index ce637bf839232..4f86a12afe4fb 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -12,6 +12,7 @@ ci/etc/*.patch
 ci/vcpkg/*.patch
 CHANGELOG.md
 cpp/CHANGELOG_PARQUET.md
+cpp/src/arrow/c/dlpack_abi.h
 cpp/src/arrow/io/mman.h
 cpp/src/arrow/util/random.h
 cpp/src/arrow/status.cc
diff --git a/docs/source/python/dlpack.rst b/docs/source/python/dlpack.rst
new file mode 100644
index 0000000000000..f612ebabde5c9
--- /dev/null
+++ b/docs/source/python/dlpack.rst
@@ -0,0 +1,93 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _pyarrow-dlpack:
+
+The DLPack Protocol
+===================
+
+`The DLPack Protocol <https://github.com/dmlc/dlpack>`_
+is a stable in-memory data structure that allows exchange
+between major frameworks working with multidimensional
+arrays or tensors. It is designed for cross hardware
+support meaning it allows exchange of data on devices other
+than the CPU (e.g. GPU).
+
+DLPack protocol had been
+`selected as the Python array API standard <https://data-apis.org/array-api/latest/design_topics/data_interchange.html#dlpack-an-in-memory-tensor-structure>`_
+by the
+`Consortium for Python Data API Standards <https://data-apis.org/>`_
+in order to enable device aware data interchange between array/tensor
+libraries in the Python ecosystem. See more about the standard
+in the
+`protocol documentation <https://data-apis.org/array-api/latest/index.html>`_
+and more about DLPack in the
+`Python Specification for DLPack <https://dmlc.github.io/dlpack/latest/python_spec.html#python-spec>`_.
+
+Implementation of DLPack in PyArrow
+-----------------------------------
+
+The producing side of the DLPack Protocol is implemented for ``pa.Array``
+and can be used to interchange data between PyArrow and other tensor
+libraries. Supported data types are integer, unsigned integer and float. The
+protocol has no missing data support meaning PyArrow arrays with
+missing values cannot be transferred through the DLPack
+protocol. Currently, the Arrow implementation of the protocol only supports
+data on a CPU device.
+
+Data interchange syntax of the protocol includes
+
+1. ``from_dlpack(x)``: consuming an array object that implements a
+   ``__dlpack__`` method and creating a new array while sharing the
+   memory.
+
+2. ``__dlpack__(self, stream=None)`` and ``__dlpack_device__``:
+   producing a PyCapsule with the DLPack struct which is called from
+   within ``from_dlpack(x)``.
+
+PyArrow implements the second part of the protocol
+(``__dlpack__(self, stream=None)`` and ``__dlpack_device__``) and can
+thus be consumed by libraries implementing ``from_dlpack``.
+
+Example
+-------
+
+Convert a PyArrow CPU array to NumPy array:
+
+.. code-block::
+
+    >>> import pyarrow as pa
+    >>> array = pa.array([2, 0, 2, 4])
+    <pyarrow.lib.Int64Array object at 0x121fd4880>
+    [
+    2,
+    0,
+    2,
+    4
+    ]
+
+    >>> import numpy as np
+    >>> np.from_dlpack(array)
+    array([2, 0, 2, 4])
+
+Convert a PyArrow CPU array to PyTorch tensor:
+
+.. code-block::
+
+    >>> import torch
+    >>> torch.from_dlpack(array)
+    tensor([2, 0, 2, 4])    
diff --git a/docs/source/python/index.rst b/docs/source/python/index.rst
index 6a3de3d42b149..08939bc760df6 100644
--- a/docs/source/python/index.rst
+++ b/docs/source/python/index.rst
@@ -53,6 +53,7 @@ files into Arrow structures.
    numpy
    pandas
    interchange_protocol
+   dlpack
    timestamps
    orc
    csv
diff --git a/docs/source/python/interchange_protocol.rst b/docs/source/python/interchange_protocol.rst
index c354541a6779c..2a5ec8afede7b 100644
--- a/docs/source/python/interchange_protocol.rst
+++ b/docs/source/python/interchange_protocol.rst
@@ -37,7 +37,7 @@ libraries in the Python ecosystem. See more about the
 standard in the
 `protocol documentation <https://data-apis.org/dataframe-protocol/latest/index.html>`_.
 
-From pyarrow to other libraries: ``__dataframe__()`` method
+From PyArrow to other libraries: ``__dataframe__()`` method
 -----------------------------------------------------------
 
 The ``__dataframe__()`` method creates a new exchange object that
@@ -54,7 +54,7 @@ This is meant to be used by the consumer library when calling
 the ``from_dataframe()`` function and is not meant to be used manually
 by the user.
 
-From other libraries to pyarrow: ``from_dataframe()``
+From other libraries to PyArrow: ``from_dataframe()``
 -----------------------------------------------------
 
 With the ``from_dataframe()`` function, we can construct a :class:`pyarrow.Table`
@@ -63,7 +63,7 @@ from any dataframe object that implements the
 protocol.
 
 We can for example take a pandas dataframe and construct a
-pyarrow table with the use of the interchange protocol:
+PyArrow table with the use of the interchange protocol:
 
 .. code-block::
 
diff --git a/python/pyarrow/_dlpack.pxi b/python/pyarrow/_dlpack.pxi
new file mode 100644
index 0000000000000..c2f4cff640691
--- /dev/null
+++ b/python/pyarrow/_dlpack.pxi
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+cimport cpython
+from cpython.pycapsule cimport PyCapsule_New
+
+
+cdef void dlpack_pycapsule_deleter(object dltensor) noexcept:
+    cdef DLManagedTensor* dlm_tensor
+    cdef PyObject* err_type
+    cdef PyObject* err_value
+    cdef PyObject* err_traceback
+
+    # Do nothing if the capsule has been consumed
+    if cpython.PyCapsule_IsValid(dltensor, "used_dltensor"):
+        return
+
+    # An exception may be in-flight, we must save it in case
+    # we create another one
+    cpython.PyErr_Fetch(&err_type, &err_value, &err_traceback)
+
+    dlm_tensor = <DLManagedTensor*>cpython.PyCapsule_GetPointer(dltensor, 'dltensor')
+    if dlm_tensor == NULL:
+        cpython.PyErr_WriteUnraisable(dltensor)
+    # The deleter can be NULL if there is no way for the caller
+    # to provide a reasonable destructor
+    elif dlm_tensor.deleter:
+        dlm_tensor.deleter(dlm_tensor)
+        assert (not cpython.PyErr_Occurred())
+
+    # Set the error indicator from err_type, err_value, err_traceback
+    cpython.PyErr_Restore(err_type, err_value, err_traceback)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 789e30d3e9b00..74a196002bfa6 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1779,6 +1779,44 @@ cdef class Array(_PandasConvertible):
 
         return pyarrow_wrap_array(array)
 
+    def __dlpack__(self, stream=None):
+        """Export a primitive array as a DLPack capsule.
+
+        Parameters
+        ----------
+        stream : int, optional
+            A Python integer representing a pointer to a stream. Currently not supported.
+            Stream is provided by the consumer to the producer to instruct the producer
+            to ensure that operations can safely be performed on the array.
+
+        Returns
+        -------
+        capsule : PyCapsule
+            A DLPack capsule for the array, pointing to a DLManagedTensor.
+        """
+        if stream is None:
+            dlm_tensor = GetResultValue(ExportToDLPack(self.sp_array))
+
+            return PyCapsule_New(dlm_tensor, 'dltensor', dlpack_pycapsule_deleter)
+        else:
+            raise NotImplementedError(
+                "Only stream=None is supported."
+            )
+
+    def __dlpack_device__(self):
+        """
+        Return the DLPack device tuple this arrays resides on.
+
+        Returns
+        -------
+        tuple : Tuple[int, int]
+            Tuple with index specifying the type of the device (where
+            CPU = 1, see cpp/src/arrow/c/dpack_abi.h) and index of the
+            device which is 0 by default for CPU.
+        """
+        device = GetResultValue(ExportDevice(self.sp_array))
+        return device.device_type, device.device_id
+
 
 cdef _array_like_to_pandas(obj, options, types_mapper):
     cdef:
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 403846a38f3fd..bad5ec606c268 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1199,6 +1199,25 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     shared_ptr[CScalar] MakeNullScalar(shared_ptr[CDataType] type)
 
 
+cdef extern from "arrow/c/dlpack_abi.h" nogil:
+    ctypedef enum DLDeviceType:
+        kDLCPU = 1
+
+    ctypedef struct DLDevice:
+        DLDeviceType device_type
+        int32_t device_id
+
+    ctypedef struct DLManagedTensor:
+        void (*deleter)(DLManagedTensor*)
+
+
+cdef extern from "arrow/c/dlpack.h" namespace "arrow::dlpack" nogil:
+    CResult[DLManagedTensor*] ExportToDLPack" arrow::dlpack::ExportArray"(
+        const shared_ptr[CArray]& arr)
+
+    CResult[DLDevice] ExportDevice(const shared_ptr[CArray]& arr)
+
+
 cdef extern from "arrow/builder.h" namespace "arrow" nogil:
 
     cdef cppclass CArrayBuilder" arrow::ArrayBuilder":
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 57fb0f42e38bf..29a0bed55949c 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -176,6 +176,9 @@ include "table.pxi"
 # Tensors
 include "tensor.pxi"
 
+# DLPack
+include "_dlpack.pxi"
+
 # File IO
 include "io.pxi"
 
diff --git a/python/pyarrow/tests/test_dlpack.py b/python/pyarrow/tests/test_dlpack.py
new file mode 100644
index 0000000000000..7cf3f4acdbd40
--- /dev/null
+++ b/python/pyarrow/tests/test_dlpack.py
@@ -0,0 +1,142 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import ctypes
+from functools import wraps
+import pytest
+
+import numpy as np
+
+import pyarrow as pa
+from pyarrow.vendored.version import Version
+
+
+def PyCapsule_IsValid(capsule, name):
+    return ctypes.pythonapi.PyCapsule_IsValid(ctypes.py_object(capsule), name) == 1
+
+
+def check_dlpack_export(arr, expected_arr):
+    DLTensor = arr.__dlpack__()
+    assert PyCapsule_IsValid(DLTensor, b"dltensor") is True
+
+    result = np.from_dlpack(arr)
+    np.testing.assert_array_equal(result, expected_arr, strict=True)
+
+    assert arr.__dlpack_device__() == (1, 0)
+
+
+def check_bytes_allocated(f):
+    @wraps(f)
+    def wrapper(*args, **kwargs):
+        allocated_bytes = pa.total_allocated_bytes()
+        try:
+            return f(*args, **kwargs)
+        finally:
+            assert pa.total_allocated_bytes() == allocated_bytes
+    return wrapper
+
+
+@check_bytes_allocated
+@pytest.mark.parametrize(
+    ('value_type', 'np_type'),
+    [
+        (pa.uint8(), np.uint8),
+        (pa.uint16(), np.uint16),
+        (pa.uint32(), np.uint32),
+        (pa.uint64(), np.uint64),
+        (pa.int8(), np.int8),
+        (pa.int16(), np.int16),
+        (pa.int32(), np.int32),
+        (pa.int64(), np.int64),
+        (pa.float16(), np.float16),
+        (pa.float32(), np.float32),
+        (pa.float64(), np.float64),
+    ]
+)
+def test_dlpack(value_type, np_type):
+    if Version(np.__version__) < Version("1.24.0"):
+        pytest.skip("No dlpack support in numpy versions older than 1.22.0, "
+                    "strict keyword in assert_array_equal added in numpy version "
+                    "1.24.0")
+
+    expected = np.array([1, 2, 3], dtype=np_type)
+    arr = pa.array(expected, type=value_type)
+    check_dlpack_export(arr, expected)
+
+    arr_sliced = arr.slice(1, 1)
+    expected = np.array([2], dtype=np_type)
+    check_dlpack_export(arr_sliced, expected)
+
+    arr_sliced = arr.slice(0, 1)
+    expected = np.array([1], dtype=np_type)
+    check_dlpack_export(arr_sliced, expected)
+
+    arr_sliced = arr.slice(1)
+    expected = np.array([2, 3], dtype=np_type)
+    check_dlpack_export(arr_sliced, expected)
+
+    arr_zero = pa.array([], type=value_type)
+    expected = np.array([], dtype=np_type)
+    check_dlpack_export(arr_zero, expected)
+
+
+def test_dlpack_not_supported():
+    if Version(np.__version__) < Version("1.22.0"):
+        pytest.skip("No dlpack support in numpy versions older than 1.22.0.")
+
+    arr = pa.array([1, None, 3])
+    with pytest.raises(TypeError, match="Can only use DLPack "
+                       "on arrays with no nulls."):
+        np.from_dlpack(arr)
+
+    arr = pa.array(
+        [[0, 1], [3, 4]],
+        type=pa.list_(pa.int32())
+    )
+    with pytest.raises(TypeError, match="DataType is not compatible with DLPack spec"):
+        np.from_dlpack(arr)
+
+    arr = pa.array([])
+    with pytest.raises(TypeError, match="DataType is not compatible with DLPack spec"):
+        np.from_dlpack(arr)
+
+    # DLPack doesn't support bit-packed boolean values
+    arr = pa.array([True, False, True])
+    with pytest.raises(TypeError, match="Bit-packed boolean data type "
+                       "not supported by DLPack."):
+        np.from_dlpack(arr)
+
+
+def test_dlpack_cuda_not_supported():
+    cuda = pytest.importorskip("pyarrow.cuda")
+
+    schema = pa.schema([pa.field('f0', pa.int16())])
+    a0 = pa.array([1, 2, 3], type=pa.int16())
+    batch = pa.record_batch([a0], schema=schema)
+
+    cbuf = cuda.serialize_record_batch(batch, cuda.Context(0))
+    cbatch = cuda.read_record_batch(cbuf, batch.schema)
+    carr = cbatch["f0"]
+
+    # CudaBuffers not yet supported
+    with pytest.raises(NotImplementedError, match="DLPack support is implemented "
+                       "only for buffers on CPU device."):
+        np.from_dlpack(carr)
+
+    with pytest.raises(NotImplementedError, match="DLPack support is implemented "
+                       "only for buffers on CPU device."):
+        carr.__dlpack_device__()

From 1c48d69844cb00918be9255f60d7eb0f59792a8b Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Wed, 20 Dec 2023 00:58:38 +0100
Subject: [PATCH 074/570] MINOR: [R] Update NEWS.md for 14.0.2 (#39286)

Update NEWS.md with recent changes

Authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/NEWS.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/r/NEWS.md b/r/NEWS.md
index 63f12607d8d1b..ca062b0390a9f 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -21,6 +21,13 @@
 
 # arrow 14.0.2
 
+##  Minor improvements and fixes
+
+* Fixed C++ compiler warnings caused by implicit conversions (#39138, #39186).
+* Fixed confusing dplyr warnings during tests (#39076).
+* Added missing "-framework Security" pkg-config flag to prevent
+  issues when compiling with strict linker settings (#38861).
+
 # arrow 14.0.0.2
 
 ## Minor improvements and fixes

From cc9e649d0382c70552e6e556199a3e238dbb7576 Mon Sep 17 00:00:00 2001
From: Judah Rand <17158624+judahrand@users.noreply.github.com>
Date: Wed, 20 Dec 2023 09:34:23 +0000
Subject: [PATCH 075/570] GH-35331: [Python] Expose Parquet sorting metadata
 (#37665)

### Rationale for this change

Picking up where #35453 left off.

Closes https://github.com/apache/arrow/issues/35331

This PR builds on top of #37469

### What changes are included in this PR?

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #35331

Lead-authored-by: Judah Rand <17158624+judahrand@users.noreply.github.com>
Co-authored-by: Will Jones <willjones127@gmail.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 docs/source/python/api/formats.rst            |   1 +
 python/pyarrow/_dataset_parquet.pyx           |   2 +
 python/pyarrow/_parquet.pxd                   |  24 +-
 python/pyarrow/_parquet.pyx                   | 284 +++++++++++++++++-
 python/pyarrow/parquet/core.py                |  12 +-
 python/pyarrow/tests/parquet/test_metadata.py |  84 ++++++
 6 files changed, 394 insertions(+), 13 deletions(-)

diff --git a/docs/source/python/api/formats.rst b/docs/source/python/api/formats.rst
index 9ca499c0972e5..86e2585ac2537 100644
--- a/docs/source/python/api/formats.rst
+++ b/docs/source/python/api/formats.rst
@@ -97,6 +97,7 @@ Parquet Metadata
 
    FileMetaData
    RowGroupMetaData
+   SortingColumn
    ColumnChunkMetaData
    Statistics
    ParquetSchema
diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx
index 61e051f56cfb0..58ef6145cf7d1 100644
--- a/python/pyarrow/_dataset_parquet.pyx
+++ b/python/pyarrow/_dataset_parquet.pyx
@@ -609,6 +609,7 @@ cdef class ParquetFileWriteOptions(FileWriteOptions):
             dictionary_pagesize_limit=self._properties["dictionary_pagesize_limit"],
             write_page_index=self._properties["write_page_index"],
             write_page_checksum=self._properties["write_page_checksum"],
+            sorting_columns=self._properties["sorting_columns"],
         )
 
     def _set_arrow_properties(self):
@@ -659,6 +660,7 @@ cdef class ParquetFileWriteOptions(FileWriteOptions):
             write_page_index=False,
             encryption_config=None,
             write_page_checksum=False,
+            sorting_columns=None,
         )
 
         self._set_properties()
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 7ce747e0aa46d..ae4094d8b4b5f 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -328,11 +328,17 @@ cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
         optional[ParquetIndexLocation] GetColumnIndexLocation() const
         optional[ParquetIndexLocation] GetOffsetIndexLocation() const
 
+    struct CSortingColumn" parquet::SortingColumn":
+        int column_idx
+        c_bool descending
+        c_bool nulls_first
+
     cdef cppclass CRowGroupMetaData" parquet::RowGroupMetaData":
         c_bool Equals(const CRowGroupMetaData&) const
-        int num_columns()
-        int64_t num_rows()
-        int64_t total_byte_size()
+        int num_columns() const
+        int64_t num_rows() const
+        int64_t total_byte_size() const
+        vector[CSortingColumn] sorting_columns() const
         unique_ptr[CColumnChunkMetaData] ColumnChunk(int i) const
 
     cdef cppclass CFileMetaData" parquet::FileMetaData":
@@ -421,6 +427,7 @@ cdef extern from "parquet/api/writer.h" namespace "parquet" nogil:
             Builder* disable_dictionary()
             Builder* enable_dictionary()
             Builder* enable_dictionary(const c_string& path)
+            Builder* set_sorting_columns(vector[CSortingColumn] sorting_columns)
             Builder* disable_statistics()
             Builder* enable_statistics()
             Builder* enable_statistics(const c_string& path)
@@ -517,8 +524,8 @@ cdef extern from "parquet/arrow/schema.h" namespace "parquet::arrow" nogil:
 
     CStatus ToParquetSchema(
         const CSchema* arrow_schema,
-        const ArrowReaderProperties& properties,
-        const shared_ptr[const CKeyValueMetadata]& key_value_metadata,
+        const WriterProperties& properties,
+        const ArrowWriterProperties& arrow_properties,
         shared_ptr[SchemaDescriptor]* out)
 
 
@@ -584,7 +591,9 @@ cdef shared_ptr[WriterProperties] _create_writer_properties(
     write_batch_size=*,
     dictionary_pagesize_limit=*,
     write_page_index=*,
-    write_page_checksum=*) except *
+    write_page_checksum=*,
+    sorting_columns=*,
+) except *
 
 
 cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties(
@@ -593,7 +602,8 @@ cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties(
     allow_truncated_timestamps=*,
     writer_engine_version=*,
     use_compliant_nested_type=*,
-    store_schema=*) except *
+    store_schema=*,
+) except *
 
 cdef class ParquetSchema(_Weakrefable):
     cdef:
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 35344eb735516..0b685245655a2 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -18,6 +18,7 @@
 # cython: profile=False
 # distutils: language = c++
 
+from collections.abc import Sequence
 from textwrap import indent
 import warnings
 
@@ -31,6 +32,7 @@ from pyarrow.lib cimport (_Weakrefable, Buffer, Schema,
                           Table, NativeFile,
                           pyarrow_wrap_chunked_array,
                           pyarrow_wrap_schema,
+                          pyarrow_unwrap_schema,
                           pyarrow_wrap_table,
                           pyarrow_wrap_batch,
                           pyarrow_wrap_scalar,
@@ -506,6 +508,204 @@ cdef class ColumnChunkMetaData(_Weakrefable):
         return self.metadata.GetColumnIndexLocation().has_value()
 
 
+cdef class SortingColumn:
+    """
+    Sorting specification for a single column.
+
+    Returned by :meth:`RowGroupMetaData.sorting_columns` and used in
+    :class:`ParquetWriter` to specify the sort order of the data.
+
+    Parameters
+    ----------
+    column_index : int
+        Index of column that data is sorted by.
+    descending : bool, default False
+        Whether column is sorted in descending order.
+    nulls_first : bool, default False
+        Whether null values appear before valid values.
+
+    Notes
+    -----
+
+    Column indices are zero-based, refer only to leaf fields, and are in
+    depth-first order. This may make the column indices for nested schemas
+    different from what you expect. In most cases, it will be easier to
+    specify the sort order using column names instead of column indices
+    and converting using the ``from_ordering`` method.
+
+    Examples
+    --------
+
+    In other APIs, sort order is specified by names, such as:
+
+    >>> sort_order = [('id', 'ascending'), ('timestamp', 'descending')]
+
+    For Parquet, the column index must be used instead:
+
+    >>> import pyarrow.parquet as pq
+    >>> [pq.SortingColumn(0), pq.SortingColumn(1, descending=True)]
+    [SortingColumn(column_index=0, descending=False, nulls_first=False), SortingColumn(column_index=1, descending=True, nulls_first=False)]
+
+    Convert the sort_order into the list of sorting columns with
+    ``from_ordering`` (note that the schema must be provided as well):
+
+    >>> import pyarrow as pa
+    >>> schema = pa.schema([('id', pa.int64()), ('timestamp', pa.timestamp('ms'))])
+    >>> sorting_columns = pq.SortingColumn.from_ordering(schema, sort_order)
+    >>> sorting_columns
+    (SortingColumn(column_index=0, descending=False, nulls_first=False), SortingColumn(column_index=1, descending=True, nulls_first=False))
+
+    Convert back to the sort order with ``to_ordering``:
+
+    >>> pq.SortingColumn.to_ordering(schema, sorting_columns)
+    ((('id', 'ascending'), ('timestamp', 'descending')), 'at_end')
+
+    See Also
+    --------
+    RowGroupMetaData.sorting_columns
+    """
+    cdef int column_index
+    cdef c_bool descending
+    cdef c_bool nulls_first
+
+    def __init__(self, int column_index, c_bool descending=False, c_bool nulls_first=False):
+        self.column_index = column_index
+        self.descending = descending
+        self.nulls_first = nulls_first
+
+    @classmethod
+    def from_ordering(cls, Schema schema, sort_keys, null_placement='at_end'):
+        """
+        Create a tuple of SortingColumn objects from the same arguments as
+        :class:`pyarrow.compute.SortOptions`.
+
+        Parameters
+        ----------
+        schema : Schema
+            Schema of the input data.
+        sort_keys : Sequence of (name, order) tuples
+            Names of field/column keys (str) to sort the input on,
+            along with the order each field/column is sorted in.
+            Accepted values for `order` are "ascending", "descending".
+        null_placement : {'at_start', 'at_end'}, default 'at_end'
+            Where null values should appear in the sort order.
+
+        Returns
+        -------
+        sorting_columns : tuple of SortingColumn
+        """
+        if null_placement == 'at_start':
+            nulls_first = True
+        elif null_placement == 'at_end':
+            nulls_first = False
+        else:
+            raise ValueError('null_placement must be "at_start" or "at_end"')
+
+        col_map = _name_to_index_map(schema)
+
+        sorting_columns = []
+
+        for sort_key in sort_keys:
+            if isinstance(sort_key, str):
+                name = sort_key
+                descending = False
+            elif (isinstance(sort_key, tuple) and len(sort_key) == 2 and
+                    isinstance(sort_key[0], str) and
+                    isinstance(sort_key[1], str)):
+                name, descending = sort_key
+                if descending == "descending":
+                    descending = True
+                elif descending == "ascending":
+                    descending = False
+                else:
+                    raise ValueError("Invalid sort key direction: {0}"
+                                     .format(descending))
+            else:
+                raise ValueError("Invalid sort key: {0}".format(sort_key))
+
+            try:
+                column_index = col_map[name]
+            except KeyError:
+                raise ValueError("Sort key name '{0}' not found in schema:\n{1}"
+                                 .format(name, schema))
+
+            sorting_columns.append(
+                cls(column_index, descending=descending, nulls_first=nulls_first)
+            )
+
+        return tuple(sorting_columns)
+
+    @staticmethod
+    def to_ordering(Schema schema, sorting_columns):
+        """
+        Convert a tuple of SortingColumn objects to the same format as
+        :class:`pyarrow.compute.SortOptions`.
+
+        Parameters
+        ----------
+        schema : Schema
+            Schema of the input data.
+        sorting_columns : tuple of SortingColumn
+            Columns to sort the input on.
+
+        Returns
+        -------
+        sort_keys : tuple of (name, order) tuples
+        null_placement : {'at_start', 'at_end'}
+        """
+        col_map = {i: name for name, i in _name_to_index_map(schema).items()}
+
+        sort_keys = []
+        nulls_first = None
+
+        for sorting_column in sorting_columns:
+            name = col_map[sorting_column.column_index]
+            if sorting_column.descending:
+                order = "descending"
+            else:
+                order = "ascending"
+            sort_keys.append((name, order))
+            if nulls_first is None:
+                nulls_first = sorting_column.nulls_first
+            elif nulls_first != sorting_column.nulls_first:
+                raise ValueError("Sorting columns have inconsistent null placement")
+
+        if nulls_first:
+            null_placement = "at_start"
+        else:
+            null_placement = "at_end"
+
+        return tuple(sort_keys), null_placement
+
+    def __repr__(self):
+        return """{}(column_index={}, descending={}, nulls_first={})""".format(
+            self.__class__.__name__,
+            self.column_index, self.descending, self.nulls_first)
+
+    def __eq__(self, SortingColumn other):
+        return (self.column_index == other.column_index and
+                self.descending == other.descending and
+                self.nulls_first == other.nulls_first)
+
+    def __hash__(self):
+        return hash((self.column_index, self.descending, self.nulls_first))
+
+    @property
+    def column_index(self):
+        """"Index of column data is sorted by (int)."""
+        return self.column_index
+
+    @property
+    def descending(self):
+        """Whether column is sorted in descending order (bool)."""
+        return self.descending
+
+    @property
+    def nulls_first(self):
+        """Whether null values appear before valid values (bool)."""
+        return self.nulls_first
+
+
 cdef class RowGroupMetaData(_Weakrefable):
     """Metadata for a single row group."""
 
@@ -565,10 +765,12 @@ cdef class RowGroupMetaData(_Weakrefable):
         return """{0}
   num_columns: {1}
   num_rows: {2}
-  total_byte_size: {3}""".format(object.__repr__(self),
+  total_byte_size: {3}
+  sorting_columns: {4}""".format(object.__repr__(self),
                                  self.num_columns,
                                  self.num_rows,
-                                 self.total_byte_size)
+                                 self.total_byte_size,
+                                 self.sorting_columns)
 
     def to_dict(self):
         """
@@ -585,6 +787,7 @@ cdef class RowGroupMetaData(_Weakrefable):
             num_rows=self.num_rows,
             total_byte_size=self.total_byte_size,
             columns=columns,
+            sorting_columns=[col.to_dict() for col in self.sorting_columns]
         )
         for i in range(self.num_columns):
             columns.append(self.column(i).to_dict())
@@ -605,6 +808,19 @@ cdef class RowGroupMetaData(_Weakrefable):
         """Total byte size of all the uncompressed column data in this row group (int)."""
         return self.metadata.total_byte_size()
 
+    @property
+    def sorting_columns(self):
+        """Columns the row group is sorted by (tuple of :class:`SortingColumn`))."""
+        out = []
+        cdef vector[CSortingColumn] sorting_columns = self.metadata.sorting_columns()
+        for sorting_col in sorting_columns:
+            out.append(SortingColumn(
+                sorting_col.column_idx,
+                sorting_col.descending,
+                sorting_col.nulls_first
+            ))
+        return tuple(out)
+
 
 def _reconstruct_filemetadata(Buffer serialized):
     cdef:
@@ -1550,6 +1766,28 @@ cdef class ParquetReader(_Weakrefable):
         return closed
 
 
+cdef CSortingColumn _convert_sorting_column(SortingColumn sorting_column):
+    cdef CSortingColumn c_sorting_column
+
+    c_sorting_column.column_idx = sorting_column.column_index
+    c_sorting_column.descending = sorting_column.descending
+    c_sorting_column.nulls_first = sorting_column.nulls_first
+
+    return c_sorting_column
+
+
+cdef vector[CSortingColumn] _convert_sorting_columns(sorting_columns) except *:
+    if not (isinstance(sorting_columns, Sequence)
+            and all(isinstance(col, SortingColumn) for col in sorting_columns)):
+        raise ValueError(
+            "'sorting_columns' must be a list of `SortingColumn`")
+
+    cdef vector[CSortingColumn] c_sorting_columns = [_convert_sorting_column(col)
+                                                     for col in sorting_columns]
+
+    return c_sorting_columns
+
+
 cdef shared_ptr[WriterProperties] _create_writer_properties(
         use_dictionary=None,
         compression=None,
@@ -1564,7 +1802,8 @@ cdef shared_ptr[WriterProperties] _create_writer_properties(
         write_batch_size=None,
         dictionary_pagesize_limit=None,
         write_page_index=False,
-        write_page_checksum=False) except *:
+        write_page_checksum=False,
+        sorting_columns=None) except *:
     """General writer properties"""
     cdef:
         shared_ptr[WriterProperties] properties
@@ -1649,6 +1888,11 @@ cdef shared_ptr[WriterProperties] _create_writer_properties(
         for column in write_statistics:
             props.enable_statistics(tobytes(column))
 
+    # sorting_columns
+
+    if sorting_columns is not None:
+        props.set_sorting_columns(_convert_sorting_columns(sorting_columns))
+
     # use_byte_stream_split
 
     if isinstance(use_byte_stream_split, bool):
@@ -1788,6 +2032,34 @@ cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties(
 
     return arrow_properties
 
+cdef _name_to_index_map(Schema arrow_schema):
+    cdef:
+        shared_ptr[CSchema] sp_arrow_schema
+        shared_ptr[SchemaDescriptor] sp_parquet_schema
+        shared_ptr[WriterProperties] props = _create_writer_properties()
+        shared_ptr[ArrowWriterProperties] arrow_props = _create_arrow_writer_properties(
+            use_deprecated_int96_timestamps=False,
+            coerce_timestamps=None,
+            allow_truncated_timestamps=False,
+            writer_engine_version="V2"
+        )
+
+    sp_arrow_schema = pyarrow_unwrap_schema(arrow_schema)
+
+    with nogil:
+        check_status(ToParquetSchema(
+            sp_arrow_schema.get(), deref(props.get()), deref(arrow_props.get()), &sp_parquet_schema))
+
+    out = dict()
+
+    cdef SchemaDescriptor* parquet_schema = sp_parquet_schema.get()
+
+    for i in range(parquet_schema.num_columns()):
+        name = frombytes(parquet_schema.Column(i).path().get().ToDotString())
+        out[name] = i
+
+    return out
+
 
 cdef class ParquetWriter(_Weakrefable):
     cdef:
@@ -1835,7 +2107,8 @@ cdef class ParquetWriter(_Weakrefable):
                   dictionary_pagesize_limit=None,
                   store_schema=True,
                   write_page_index=False,
-                  write_page_checksum=False):
+                  write_page_checksum=False,
+                  sorting_columns=None):
         cdef:
             shared_ptr[WriterProperties] properties
             shared_ptr[ArrowWriterProperties] arrow_properties
@@ -1867,7 +2140,8 @@ cdef class ParquetWriter(_Weakrefable):
             write_batch_size=write_batch_size,
             dictionary_pagesize_limit=dictionary_pagesize_limit,
             write_page_index=write_page_index,
-            write_page_checksum=write_page_checksum
+            write_page_checksum=write_page_checksum,
+            sorting_columns=sorting_columns,
         )
         arrow_properties = _create_arrow_writer_properties(
             use_deprecated_int96_timestamps=use_deprecated_int96_timestamps,
diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index db22eb3293c86..852b339211b0d 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -48,7 +48,8 @@
                               ParquetSchema, ColumnSchema,
                               ParquetLogicalType,
                               FileEncryptionProperties,
-                              FileDecryptionProperties)
+                              FileDecryptionProperties,
+                              SortingColumn)
 from pyarrow.fs import (LocalFileSystem, FileSystem, FileType,
                         _resolve_filesystem_and_path, _ensure_filesystem)
 from pyarrow import filesystem as legacyfs
@@ -895,6 +896,10 @@ def _sanitize_table(table, new_schema, flavor):
     Whether to write page checksums in general for all columns.
     Page checksums enable detection of data corruption, which might occur during
     transmission or in the storage.
+sorting_columns : Sequence of SortingColumn, default None
+    Specify the sort order of the data being written. The writer does not sort
+    the data nor does it verify that the data is sorted. The sort order is
+    written to the row group metadata, which can then be used by readers.
 """
 
 _parquet_writer_example_doc = """\
@@ -989,6 +994,7 @@ def __init__(self, where, schema, filesystem=None,
                  store_schema=True,
                  write_page_index=False,
                  write_page_checksum=False,
+                 sorting_columns=None,
                  **options):
         if use_deprecated_int96_timestamps is None:
             # Use int96 timestamps for Spark
@@ -1047,6 +1053,7 @@ def __init__(self, where, schema, filesystem=None,
             store_schema=store_schema,
             write_page_index=write_page_index,
             write_page_checksum=write_page_checksum,
+            sorting_columns=sorting_columns,
             **options)
         self.is_open = True
 
@@ -3129,6 +3136,7 @@ def write_table(table, where, row_group_size=None, version='2.6',
                 store_schema=True,
                 write_page_index=False,
                 write_page_checksum=False,
+                sorting_columns=None,
                 **kwargs):
     # Implementor's note: when adding keywords here / updating defaults, also
     # update it in write_to_dataset and _dataset_parquet.pyx ParquetFileWriteOptions
@@ -3158,6 +3166,7 @@ def write_table(table, where, row_group_size=None, version='2.6',
                 store_schema=store_schema,
                 write_page_index=write_page_index,
                 write_page_checksum=write_page_checksum,
+                sorting_columns=sorting_columns,
                 **kwargs) as writer:
             writer.write_table(table, row_group_size=row_group_size)
     except Exception:
@@ -3742,6 +3751,7 @@ def read_schema(where, memory_map=False, decryption_properties=None,
     "ParquetWriter",
     "PartitionSet",
     "RowGroupMetaData",
+    "SortingColumn",
     "Statistics",
     "read_metadata",
     "read_pandas",
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index 3efaf1dbf5526..73284d2e53b9e 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -301,6 +301,90 @@ def test_parquet_write_disable_statistics(tempdir):
     assert cc_b.statistics is None
 
 
+def test_parquet_sorting_column():
+    sorting_col = pq.SortingColumn(10)
+    assert sorting_col.column_index == 10
+    assert sorting_col.descending is False
+    assert sorting_col.nulls_first is False
+
+    sorting_col = pq.SortingColumn(0, descending=True, nulls_first=True)
+    assert sorting_col.column_index == 0
+    assert sorting_col.descending is True
+    assert sorting_col.nulls_first is True
+
+    schema = pa.schema([('a', pa.int64()), ('b', pa.int64())])
+    sorting_cols = (
+        pq.SortingColumn(1, descending=True),
+        pq.SortingColumn(0, descending=False),
+    )
+    sort_order, null_placement = pq.SortingColumn.to_ordering(schema, sorting_cols)
+    assert sort_order == (('b', "descending"), ('a', "ascending"))
+    assert null_placement == "at_end"
+
+    sorting_cols_roundtripped = pq.SortingColumn.from_ordering(
+        schema, sort_order, null_placement)
+    assert sorting_cols_roundtripped == sorting_cols
+
+    sorting_cols = pq.SortingColumn.from_ordering(
+        schema, ('a', ('b', "descending")), null_placement="at_start")
+    expected = (
+        pq.SortingColumn(0, descending=False, nulls_first=True),
+        pq.SortingColumn(1, descending=True, nulls_first=True),
+    )
+    assert sorting_cols == expected
+
+    # Conversions handle empty tuples
+    empty_sorting_cols = pq.SortingColumn.from_ordering(schema, ())
+    assert empty_sorting_cols == ()
+
+    assert pq.SortingColumn.to_ordering(schema, ()) == ((), "at_end")
+
+    with pytest.raises(ValueError):
+        pq.SortingColumn.from_ordering(schema, (("a", "not a valid sort order")))
+
+    with pytest.raises(ValueError, match="inconsistent null placement"):
+        sorting_cols = (
+            pq.SortingColumn(1, nulls_first=True),
+            pq.SortingColumn(0, nulls_first=False),
+        )
+        pq.SortingColumn.to_ordering(schema, sorting_cols)
+
+
+def test_parquet_sorting_column_nested():
+    schema = pa.schema({
+        'a': pa.struct([('x', pa.int64()), ('y', pa.int64())]),
+        'b': pa.int64()
+    })
+
+    sorting_columns = [
+        pq.SortingColumn(0, descending=True),  # a.x
+        pq.SortingColumn(2, descending=False)  # b
+    ]
+
+    sort_order, null_placement = pq.SortingColumn.to_ordering(schema, sorting_columns)
+    assert null_placement == "at_end"
+    assert len(sort_order) == 2
+    assert sort_order[0] == ("a.x", "descending")
+    assert sort_order[1] == ("b", "ascending")
+
+
+def test_parquet_file_sorting_columns():
+    table = pa.table({'a': [1, 2, 3], 'b': ['a', 'b', 'c']})
+
+    sorting_columns = (
+        pq.SortingColumn(column_index=0, descending=True, nulls_first=True),
+        pq.SortingColumn(column_index=1, descending=False),
+    )
+    writer = pa.BufferOutputStream()
+    _write_table(table, writer, sorting_columns=sorting_columns)
+    reader = pa.BufferReader(writer.getvalue())
+
+    # Can retrieve sorting columns from metadata
+    metadata = pq.read_metadata(reader)
+    assert metadata.num_row_groups == 1
+    assert sorting_columns == metadata.row_group(0).sorting_columns
+
+
 def test_field_id_metadata():
     # ARROW-7080
     field_id = b'PARQUET:field_id'

From b1fcba1b395e0aedddcdab19958c14809d780d4c Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Wed, 20 Dec 2023 11:06:57 +0100
Subject: [PATCH 076/570] MINOR: [Docs] local_timestamp kernel docs are not
 linked in python docs (#39274)

### Rationale for this change

local_timestamp kernel docs are linked in [cpp](https://arrow.apache.org/docs/cpp/compute.html#timezone-handling) but not in [python docs](https://arrow.apache.org/docs/python/api/compute.html#timezone-handling).

### What changes are included in this PR?

This adds a rst link in python docs

### Are these changes tested?

No

### Are there any user-facing changes?

Change will be visible in the docs

Authored-by: Rok Mihevc <rok@mihevc.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 docs/source/python/api/compute.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 4ee364fcf636b..b879643017a90 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -468,6 +468,7 @@ Timezone Handling
    :toctree: ../generated/
 
    assume_timezone
+   local_timestamp
 
 Associative Transforms
 ----------------------

From 87865b5a85c722ef7578aed4300e9d0b219c909c Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 20 Dec 2023 12:07:06 +0100
Subject: [PATCH 077/570] GH-39306: [C++][Benchmarking] Remove hardcoded min
 times (#39307)

### Rationale for this change

`MinTime` settings hardcoded in the C++ source code prevent the `--benchmark_min_time` CLI option from working.

### Are these changes tested?

No.

### Are there any user-facing changes?

No.
* Closes: #39306

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../kernels/vector_partition_benchmark.cc     |  1 -
 .../compute/kernels/vector_topk_benchmark.cc  |  1 -
 cpp/src/gandiva/tests/micro_benchmarks.cc     | 38 +++++++++----------
 3 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/vector_partition_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_partition_benchmark.cc
index ff009c65543a6..f21dd8317e493 100644
--- a/cpp/src/arrow/compute/kernels/vector_partition_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_partition_benchmark.cc
@@ -52,7 +52,6 @@ BENCHMARK(NthToIndicesInt64)
     ->Apply(RegressionSetArgs)
     ->Args({1 << 20, 100})
     ->Args({1 << 23, 100})
-    ->MinTime(1.0)
     ->Unit(benchmark::TimeUnit::kNanosecond);
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/vector_topk_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_topk_benchmark.cc
index 3f89eb6bea9cd..e95e7a6f02a04 100644
--- a/cpp/src/arrow/compute/kernels/vector_topk_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_topk_benchmark.cc
@@ -52,7 +52,6 @@ BENCHMARK(SelectKInt64)
     ->Apply(RegressionSetArgs)
     ->Args({1 << 20, 100})
     ->Args({1 << 23, 100})
-    ->MinTime(1.0)
     ->Unit(benchmark::TimeUnit::kNanosecond);
 
 }  // namespace compute
diff --git a/cpp/src/gandiva/tests/micro_benchmarks.cc b/cpp/src/gandiva/tests/micro_benchmarks.cc
index ed77f8ae5045b..f126b769b2010 100644
--- a/cpp/src/gandiva/tests/micro_benchmarks.cc
+++ b/cpp/src/gandiva/tests/micro_benchmarks.cc
@@ -460,24 +460,24 @@ static void DecimalAdd3Large(benchmark::State& state) {
   DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision, 18, true);
 }
 
-BENCHMARK(TimedTestAdd3)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(TimedTestBigNested)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(TimedTestExtractYear)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(TimedTestFilterAdd2)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(TimedTestFilterLike)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(TimedTestCastFloatFromString)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(TimedTestCastIntFromString)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(TimedTestAllocs)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(TimedTestOutputStringAllocs)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(TimedTestMultiOr)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(TimedTestInExpr)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(DecimalAdd2Fast)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(DecimalAdd2LeadingZeroes)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(DecimalAdd2LeadingZeroesWithDiv)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(DecimalAdd2Large)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(DecimalAdd3Fast)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(DecimalAdd3LeadingZeroes)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(DecimalAdd3LeadingZeroesWithDiv)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
-BENCHMARK(DecimalAdd3Large)->MinTime(1.0)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestAdd3)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestBigNested)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestExtractYear)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestFilterAdd2)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestFilterLike)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestCastFloatFromString)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestCastIntFromString)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestAllocs)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestOutputStringAllocs)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestMultiOr)->Unit(benchmark::kMicrosecond);
+BENCHMARK(TimedTestInExpr)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd2Fast)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd2LeadingZeroes)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd2LeadingZeroesWithDiv)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd2Large)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd3Fast)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd3LeadingZeroes)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd3LeadingZeroesWithDiv)->Unit(benchmark::kMicrosecond);
+BENCHMARK(DecimalAdd3Large)->Unit(benchmark::kMicrosecond);
 
 }  // namespace gandiva

From 726568936e345ee2a15d3a5ad5d654e14939d673 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Wed, 20 Dec 2023 10:42:46 -0300
Subject: [PATCH 078/570] GH-39297: [C++][FS]: Inform caller of container
 not-existing when checking for HNS support (#39298)

### Rationale for this change

An operation checking for Hierarchical Namespace support shouldn't fail completely when the reason for the check failing is the container not existing. We can allow the caller to decide what to do in that situation by returning a result that indicates the check didn't succeed because the container doesn't exist.

### What changes are included in this PR?

 - Removal of the `azurefs_intern.h/cc` files
 - Implementation of the check as a free-function instead of a class
 - Memoization of the result in the `AzureFileSystem` class

### Are these changes tested?

Yes. The tests were improved to cover all cases.
* Closes: #39297

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/CMakeLists.txt                 |   4 +-
 cpp/src/arrow/filesystem/azurefs.cc          | 320 ++++++++++++-------
 cpp/src/arrow/filesystem/azurefs.h           |  51 ++-
 cpp/src/arrow/filesystem/azurefs_internal.cc |  94 ------
 cpp/src/arrow/filesystem/azurefs_internal.h  |  39 ---
 cpp/src/arrow/filesystem/azurefs_test.cc     |  64 +++-
 6 files changed, 309 insertions(+), 263 deletions(-)
 delete mode 100644 cpp/src/arrow/filesystem/azurefs_internal.cc
 delete mode 100644 cpp/src/arrow/filesystem/azurefs_internal.h

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 00947c6275678..c1fafeebc035d 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -506,8 +506,8 @@ if(ARROW_FILESYSTEM)
        filesystem/util_internal.cc)
 
   if(ARROW_AZURE)
-    list(APPEND ARROW_SRCS filesystem/azurefs.cc filesystem/azurefs_internal.cc)
-    set_source_files_properties(filesystem/azurefs.cc filesystem/azurefs_internal.cc
+    list(APPEND ARROW_SRCS filesystem/azurefs.cc)
+    set_source_files_properties(filesystem/azurefs.cc
                                 PROPERTIES SKIP_PRECOMPILE_HEADERS ON
                                            SKIP_UNITY_BUILD_INCLUSION ON)
   endif()
diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 1aa3e86a6f926..032cd034e7abb 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -16,7 +16,6 @@
 // under the License.
 
 #include "arrow/filesystem/azurefs.h"
-#include "arrow/filesystem/azurefs_internal.h"
 
 #include <azure/identity.hpp>
 #include <azure/storage/blobs.hpp>
@@ -42,6 +41,8 @@ namespace DataLake = Azure::Storage::Files::DataLake;
 namespace Http = Azure::Core::Http;
 namespace Storage = Azure::Storage;
 
+using internal::HNSSupport;
+
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
 
@@ -263,9 +264,11 @@ Status StatusFromErrorResponse(const std::string& url,
                          "): ", body_text);
 }
 
-bool IsContainerNotFound(const Storage::StorageException& exception) {
-  if (exception.ErrorCode == "ContainerNotFound") {
-    DCHECK_EQ(exception.StatusCode, Http::HttpStatusCode::NotFound);
+bool IsContainerNotFound(const Storage::StorageException& e) {
+  if (e.ErrorCode == "ContainerNotFound" ||
+      e.ReasonPhrase == "The specified container does not exist." ||
+      e.ReasonPhrase == "The specified filesystem does not exist.") {
+    DCHECK_EQ(e.StatusCode, Http::HttpStatusCode::NotFound);
     return true;
   }
   return false;
@@ -441,8 +444,7 @@ class ObjectInputFile final : public io::RandomAccessFile {
       }
       return ExceptionToStatus(
           "GetProperties failed for '" + blob_client_->GetUrl() +
-              "' with an unexpected Azure error. Cannot initialise an ObjectInputFile "
-              "without knowing the file size.",
+              "'. Cannot initialise an ObjectInputFile without knowing the file size.",
           exception);
     }
   }
@@ -520,12 +522,11 @@ class ObjectInputFile final : public io::RandomAccessFile {
           ->DownloadTo(reinterpret_cast<uint8_t*>(out), nbytes, download_options)
           .Value.ContentRange.Length.Value();
     } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus("DownloadTo from '" + blob_client_->GetUrl() +
-                                   "' at position " + std::to_string(position) + " for " +
-                                   std::to_string(nbytes) +
-                                   " bytes failed with an Azure error. ReadAt "
-                                   "failed to read the required byte range.",
-                               exception);
+      return ExceptionToStatus(
+          "DownloadTo from '" + blob_client_->GetUrl() + "' at position " +
+              std::to_string(position) + " for " + std::to_string(nbytes) +
+              " bytes failed. ReadAt failed to read the required byte range.",
+          exception);
     }
   }
 
@@ -576,9 +577,8 @@ Status CreateEmptyBlockBlob(std::shared_ptr<Blobs::BlockBlobClient> block_blob_c
   } catch (const Storage::StorageException& exception) {
     return ExceptionToStatus(
         "UploadFrom failed for '" + block_blob_client->GetUrl() +
-            "' with an unexpected Azure error. There is no existing blob at this "
-            "location or the existing blob must be replaced so ObjectAppendStream must "
-            "create a new empty block blob.",
+            "'. There is no existing blob at this location or the existing blob must be "
+            "replaced so ObjectAppendStream must create a new empty block blob.",
         exception);
   }
   return Status::OK();
@@ -591,8 +591,7 @@ Result<Blobs::Models::GetBlockListResult> GetBlockList(
   } catch (Storage::StorageException& exception) {
     return ExceptionToStatus(
         "GetBlockList failed for '" + block_blob_client->GetUrl() +
-            "' with an unexpected Azure error. Cannot write to a file without first "
-            "fetching the existing block list.",
+            "'. Cannot write to a file without first fetching the existing block list.",
         exception);
   }
 }
@@ -620,8 +619,7 @@ Status CommitBlockList(std::shared_ptr<Storage::Blobs::BlockBlobClient> block_bl
   } catch (const Storage::StorageException& exception) {
     return ExceptionToStatus(
         "CommitBlockList failed for '" + block_blob_client->GetUrl() +
-            "' with an unexpected Azure error. Committing is required to flush an "
-            "output/append stream.",
+            "'. Committing is required to flush an output/append stream.",
         exception);
   }
   return Status::OK();
@@ -665,9 +663,8 @@ class ObjectAppendStream final : public io::OutputStream {
         } else {
           return ExceptionToStatus(
               "GetProperties failed for '" + block_blob_client_->GetUrl() +
-                  "' with an unexpected Azure error. Cannot initialise an "
-                  "ObjectAppendStream without knowing whether a file already exists at "
-                  "this path, and if it exists, its size.",
+                  "'. Cannot initialise an ObjectAppendStream without knowing whether a "
+                  "file already exists at this path, and if it exists, its size.",
               exception);
         }
         content_length_ = 0;
@@ -765,8 +762,7 @@ class ObjectAppendStream final : public io::OutputStream {
       return ExceptionToStatus(
           "StageBlock failed for '" + block_blob_client_->GetUrl() + "' new_block_id: '" +
               new_block_id +
-              "' with an unexpected Azure error. Staging new blocks is fundamental to "
-              "streaming writes to blob storage.",
+              "'. Staging new blocks is fundamental to streaming writes to blob storage.",
           exception);
     }
     block_ids_.push_back(new_block_id);
@@ -786,11 +782,116 @@ class ObjectAppendStream final : public io::OutputStream {
   Storage::Metadata metadata_;
 };
 
+bool IsDfsEmulator(const AzureOptions& options) {
+  return options.dfs_storage_authority != ".dfs.core.windows.net";
+}
+
 }  // namespace
 
+// -----------------------------------------------------------------------
+// internal implementation
+
+namespace internal {
+
+Result<HNSSupport> CheckIfHierarchicalNamespaceIsEnabled(
+    DataLake::DataLakeFileSystemClient& adlfs_client, const AzureOptions& options) {
+  try {
+    auto directory_client = adlfs_client.GetDirectoryClient("");
+    // GetAccessControlList will fail on storage accounts
+    // without hierarchical namespace enabled.
+    directory_client.GetAccessControlList();
+    return HNSSupport::kEnabled;
+  } catch (std::out_of_range& exception) {
+    // Azurite issue detected.
+    DCHECK(IsDfsEmulator(options));
+    return HNSSupport::kDisabled;
+  } catch (const Storage::StorageException& exception) {
+    // Flat namespace storage accounts with "soft delete" enabled return
+    //
+    //   "Conflict - This endpoint does not support BlobStorageEvents
+    //   or SoftDelete. [...]" [1],
+    //
+    // otherwise it returns:
+    //
+    //   "BadRequest - This operation is only supported on a hierarchical namespace
+    //   account."
+    //
+    // [1]:
+    // https://learn.microsoft.com/en-us/answers/questions/1069779/this-endpoint-does-not-support-blobstorageevents-o
+    switch (exception.StatusCode) {
+      case Http::HttpStatusCode::BadRequest:
+      case Http::HttpStatusCode::Conflict:
+        return HNSSupport::kDisabled;
+      case Http::HttpStatusCode::NotFound:
+        if (IsDfsEmulator(options)) {
+          return HNSSupport::kDisabled;
+        }
+        // Did we get an error because of the container not existing?
+        if (IsContainerNotFound(exception)) {
+          return HNSSupport::kContainerNotFound;
+        }
+        [[fallthrough]];
+      default:
+        if (exception.ErrorCode == "HierarchicalNamespaceNotEnabled") {
+          return HNSSupport::kDisabled;
+        }
+        return ExceptionToStatus("Check for Hierarchical Namespace support on '" +
+                                     adlfs_client.GetUrl() + "' failed.",
+                                 exception);
+    }
+  }
+}
+
+}  // namespace internal
+
 // -----------------------------------------------------------------------
 // AzureFilesystem Implementation
 
+namespace {
+
+// In Azure Storage terminology, a "container" and a "filesystem" are the same
+// kind of object, but it can be accessed using different APIs. The Blob Storage
+// API calls it a "container", the Data Lake Storage Gen 2 API calls it a
+// "filesystem". Creating a container using the Blob Storage API will make it
+// accessible using the Data Lake Storage Gen 2 API and vice versa.
+
+template <class ContainerClient>
+Result<FileInfo> GetContainerPropsAsFileInfo(const std::string& container_name,
+                                             ContainerClient& container_client) {
+  FileInfo info{container_name};
+  try {
+    auto properties = container_client.GetProperties();
+    info.set_type(FileType::Directory);
+    info.set_mtime(std::chrono::system_clock::time_point{properties.Value.LastModified});
+    return info;
+  } catch (const Storage::StorageException& exception) {
+    if (IsContainerNotFound(exception)) {
+      info.set_type(FileType::NotFound);
+      return info;
+    }
+    return ExceptionToStatus(
+        "GetProperties for '" + container_client.GetUrl() + "' failed.", exception);
+  }
+}
+
+FileInfo DirectoryFileInfoFromPath(std::string_view path) {
+  return FileInfo{std::string{internal::RemoveTrailingSlash(path)}, FileType::Directory};
+}
+
+FileInfo FileInfoFromBlob(std::string_view container,
+                          const Blobs::Models::BlobItem& blob) {
+  auto path = internal::ConcatAbstractPath(container, blob.Name);
+  if (internal::HasTrailingSlash(blob.Name)) {
+    return DirectoryFileInfoFromPath(path);
+  }
+  FileInfo info{std::move(path), FileType::File};
+  info.set_size(blob.BlobSize);
+  info.set_mtime(std::chrono::system_clock::time_point{blob.Details.LastModified});
+  return info;
+}
+
+}  // namespace
+
 class AzureFileSystem::Impl {
  private:
   io::IOContext io_context_;
@@ -798,7 +899,7 @@ class AzureFileSystem::Impl {
 
   std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
   std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
-  internal::HierarchicalNamespaceDetector hns_detector_;
+  HNSSupport cached_hns_support_ = HNSSupport::kUnknown;
 
   Impl(AzureOptions options, io::IOContext io_context)
       : io_context_(std::move(io_context)), options_(std::move(options)) {}
@@ -812,52 +913,54 @@ class AzureFileSystem::Impl {
                           self->options_.MakeBlobServiceClient());
     ARROW_ASSIGN_OR_RAISE(self->datalake_service_client_,
                           self->options_.MakeDataLakeServiceClient());
-    RETURN_NOT_OK(self->hns_detector_.Init(self->datalake_service_client_.get()));
     return self;
   }
 
   io::IOContext& io_context() { return io_context_; }
   const AzureOptions& options() const { return options_; }
 
+ private:
+  /// \brief Memoized version of CheckIfHierarchicalNamespaceIsEnabled.
+  ///
+  /// \return kEnabled/kDisabled/kContainerNotFound (kUnknown is never returned).
+  Result<HNSSupport> HierarchicalNamespaceSupport(
+      DataLake::DataLakeFileSystemClient& adlfs_client) {
+    switch (cached_hns_support_) {
+      case HNSSupport::kEnabled:
+      case HNSSupport::kDisabled:
+        return cached_hns_support_;
+      case HNSSupport::kUnknown:
+      case HNSSupport::kContainerNotFound:
+        // Try the check again because the support is still unknown or the container
+        // that didn't exist before may exist now.
+        break;
+    }
+    ARROW_ASSIGN_OR_RAISE(
+        cached_hns_support_,
+        internal::CheckIfHierarchicalNamespaceIsEnabled(adlfs_client, options_));
+    DCHECK_NE(cached_hns_support_, HNSSupport::kUnknown);
+    // Caller should handle kContainerNotFound case appropriately.
+    return cached_hns_support_;
+  }
+
  public:
   Result<FileInfo> GetFileInfo(const AzureLocation& location) {
-    FileInfo info;
-    info.set_path(location.all);
-
     if (location.container.empty()) {
-      // The location is invalid if the container is empty but the path is not.
       DCHECK(location.path.empty());
-      // This location must be derived from the root path. FileInfo should describe it
-      // as a directory and there isn't any extra metadata to fetch.
-      info.set_type(FileType::Directory);
-      return info;
+      // Root directory of the storage account.
+      return FileInfo{"", FileType::Directory};
     }
     if (location.path.empty()) {
-      // The location refers to a container. This is a directory if it exists.
+      // We have a container, but no path within the container.
+      // The container itself represents a directory.
       auto container_client =
           blob_service_client_->GetBlobContainerClient(location.container);
-      try {
-        auto properties = container_client.GetProperties();
-        info.set_type(FileType::Directory);
-        info.set_mtime(
-            std::chrono::system_clock::time_point{properties.Value.LastModified});
-        return info;
-      } catch (const Storage::StorageException& exception) {
-        if (IsContainerNotFound(exception)) {
-          info.set_type(FileType::NotFound);
-          return info;
-        }
-        return ExceptionToStatus(
-            "GetProperties for '" + container_client.GetUrl() +
-                "' failed with an unexpected Azure error. GetFileInfo is unable to "
-                "determine whether the container exists.",
-            exception);
-      }
+      return GetContainerPropsAsFileInfo(location.container, container_client);
     }
-
     // There is a path to search within the container.
-    auto file_client = datalake_service_client_->GetFileSystemClient(location.container)
-                           .GetFileClient(location.path);
+    FileInfo info{location.all};
+    auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
+    auto file_client = adlfs_client.GetFileClient(location.path);
     try {
       auto properties = file_client.GetProperties();
       if (properties.Value.IsDirectory) {
@@ -879,11 +982,12 @@ class AzureFileSystem::Impl {
       return info;
     } catch (const Storage::StorageException& exception) {
       if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
-        ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
-                              hns_detector_.Enabled(location.container));
-        if (hierarchical_namespace_enabled) {
-          // If the hierarchical namespace is enabled, then the storage account will have
-          // explicit directories. Neither a file nor a directory was found.
+        ARROW_ASSIGN_OR_RAISE(auto hns_support,
+                              HierarchicalNamespaceSupport(adlfs_client));
+        if (hns_support == HNSSupport::kContainerNotFound ||
+            hns_support == HNSSupport::kEnabled) {
+          // If the hierarchical namespace is enabled, then the storage account will
+          // have explicit directories. Neither a file nor a directory was found.
           info.set_type(FileType::NotFound);
           return info;
         }
@@ -907,16 +1011,15 @@ class AzureFileSystem::Impl {
           return info;
         } catch (const Storage::StorageException& exception) {
           return ExceptionToStatus(
-              "ListBlobs for '" + *list_blob_options.Prefix +
-                  "' failed with an unexpected Azure error. GetFileInfo is unable to "
-                  "determine whether the path should be considered an implied directory.",
+              "ListBlobs failed for prefix='" + *list_blob_options.Prefix +
+                  "' failed. GetFileInfo is unable to determine whether the path should "
+                  "be considered an implied directory.",
               exception);
         }
       }
       return ExceptionToStatus(
-          "GetProperties for '" + file_client.GetUrl() +
-              "' failed with an unexpected "
-              "Azure error. GetFileInfo is unable to determine whether the path exists.",
+          "GetProperties failed for '" + file_client.GetUrl() +
+              "' GetFileInfo is unable to determine whether the path exists.",
           exception);
     }
   }
@@ -940,23 +1043,6 @@ class AzureFileSystem::Impl {
     return Status::OK();
   }
 
-  static FileInfo FileInfoFromBlob(std::string_view container,
-                                   const Blobs::Models::BlobItem& blob) {
-    auto path = internal::ConcatAbstractPath(container, blob.Name);
-    if (internal::HasTrailingSlash(blob.Name)) {
-      return DirectoryFileInfoFromPath(path);
-    }
-    FileInfo info{std::move(path), FileType::File};
-    info.set_size(blob.BlobSize);
-    info.set_mtime(std::chrono::system_clock::time_point{blob.Details.LastModified});
-    return info;
-  }
-
-  static FileInfo DirectoryFileInfoFromPath(std::string_view path) {
-    return FileInfo{std::string{internal::RemoveTrailingSlash(path)},
-                    FileType::Directory};
-  }
-
   static std::string_view BasenameView(std::string_view s) {
     DCHECK(!internal::HasTrailingSlash(s));
     auto offset = s.find_last_of(internal::kSep);
@@ -1158,9 +1244,9 @@ class AzureFileSystem::Impl {
       return Status::Invalid("Cannot create an empty container");
     }
 
+    auto container_client =
+        blob_service_client_->GetBlobContainerClient(location.container);
     if (location.path.empty()) {
-      auto container_client =
-          blob_service_client_->GetBlobContainerClient(location.container);
       try {
         auto response = container_client.Create();
         if (response.Value.Created) {
@@ -1177,18 +1263,25 @@ class AzureFileSystem::Impl {
       }
     }
 
-    ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
-                          hns_detector_.Enabled(location.container));
-    if (!hierarchical_namespace_enabled) {
+    auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
+    ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client));
+    if (hns_support == HNSSupport::kContainerNotFound) {
+      return PathNotFound(location);
+    }
+    if (hns_support == HNSSupport::kDisabled) {
+      ARROW_ASSIGN_OR_RAISE(
+          auto container_info,
+          GetContainerPropsAsFileInfo(location.container, container_client));
+      if (container_info.type() == FileType::NotFound) {
+        return PathNotFound(location);
+      }
       // Without hierarchical namespace enabled Azure blob storage has no directories.
       // Therefore we can't, and don't need to create one. Simply creating a blob with `/`
       // in the name implies directories.
       return Status::OK();
     }
 
-    auto directory_client =
-        datalake_service_client_->GetFileSystemClient(location.container)
-            .GetDirectoryClient(location.path);
+    auto directory_client = adlfs_client.GetDirectoryClient(location.path);
     try {
       auto response = directory_client.Create();
       if (response.Value.Created) {
@@ -1219,19 +1312,19 @@ class AzureFileSystem::Impl {
                                exception);
     }
 
-    ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
-                          hns_detector_.Enabled(location.container));
-    if (!hierarchical_namespace_enabled) {
+    auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
+    ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client));
+    if (hns_support == HNSSupport::kDisabled) {
       // Without hierarchical namespace enabled Azure blob storage has no directories.
       // Therefore we can't, and don't need to create one. Simply creating a blob with `/`
       // in the name implies directories.
       return Status::OK();
     }
+    // Don't handle HNSSupport::kContainerNotFound, just assume it still exists (because
+    // it was created above) and try to create the directory.
 
     if (!location.path.empty()) {
-      auto directory_client =
-          datalake_service_client_->GetFileSystemClient(location.container)
-              .GetDirectoryClient(location.path);
+      auto directory_client = adlfs_client.GetDirectoryClient(location.path);
       try {
         directory_client.CreateIfNotExists();
       } catch (const Storage::StorageException& exception) {
@@ -1344,6 +1437,12 @@ class AzureFileSystem::Impl {
       return Status::Invalid("Cannot delete an empty container");
     }
 
+    auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
+    ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client));
+    if (hns_support == HNSSupport::kContainerNotFound) {
+      return PathNotFound(location);
+    }
+
     if (location.path.empty()) {
       auto container_client =
           blob_service_client_->GetBlobContainerClient(location.container);
@@ -1363,12 +1462,8 @@ class AzureFileSystem::Impl {
       }
     }
 
-    ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
-                          hns_detector_.Enabled(location.container));
-    if (hierarchical_namespace_enabled) {
-      auto directory_client =
-          datalake_service_client_->GetFileSystemClient(location.container)
-              .GetDirectoryClient(location.path);
+    if (hns_support == HNSSupport::kEnabled) {
+      auto directory_client = adlfs_client.GetDirectoryClient(location.path);
       try {
         auto response = directory_client.DeleteRecursive();
         if (response.Value.Deleted) {
@@ -1394,19 +1489,20 @@ class AzureFileSystem::Impl {
       return internal::InvalidDeleteDirContents(location.all);
     }
 
-    ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
-                          hns_detector_.Enabled(location.container));
-    if (hierarchical_namespace_enabled) {
-      auto file_system_client =
-          datalake_service_client_->GetFileSystemClient(location.container);
-      auto directory_client = file_system_client.GetDirectoryClient(location.path);
+    auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
+    ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client));
+    if (hns_support == HNSSupport::kContainerNotFound) {
+      return missing_dir_ok ? Status::OK() : PathNotFound(location);
+    }
+
+    if (hns_support == HNSSupport::kEnabled) {
+      auto directory_client = adlfs_client.GetDirectoryClient(location.path);
       try {
         auto list_response = directory_client.ListPaths(false);
         for (; list_response.HasPage(); list_response.MoveToNextPage()) {
           for (const auto& path : list_response.Paths) {
             if (path.IsDirectory) {
-              auto sub_directory_client =
-                  file_system_client.GetDirectoryClient(path.Name);
+              auto sub_directory_client = adlfs_client.GetDirectoryClient(path.Name);
               try {
                 sub_directory_client.DeleteRecursive();
               } catch (const Storage::StorageException& exception) {
@@ -1416,7 +1512,7 @@ class AzureFileSystem::Impl {
                     exception);
               }
             } else {
-              auto sub_file_client = file_system_client.GetFileClient(path.Name);
+              auto sub_file_client = adlfs_client.GetFileClient(path.Name);
               try {
                 sub_file_client.Delete();
               } catch (const Storage::StorageException& exception) {
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 35c140b1097c7..b7ef2bb3130c2 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -38,8 +38,9 @@ class BlobServiceClient;
 }
 
 namespace Azure::Storage::Files::DataLake {
+class DataLakeFileSystemClient;
 class DataLakeServiceClient;
-}
+}  // namespace Azure::Storage::Files::DataLake
 
 namespace arrow::fs {
 
@@ -117,6 +118,54 @@ struct ARROW_EXPORT AzureOptions {
   MakeDataLakeServiceClient() const;
 };
 
+namespace internal {
+
+enum class HNSSupport {
+  kUnknown = 0,
+  kContainerNotFound = 1,
+  kDisabled = 2,
+  kEnabled = 3,
+};
+
+/// \brief Performs a request to check if the storage account has Hierarchical
+/// Namespace support enabled.
+///
+/// This check requires a DataLakeFileSystemClient for any container of the
+/// storage account. If the container doesn't exist yet, we just forward that
+/// error to the caller (kContainerNotFound) since that's a proper error to the operation
+/// on that container anyways -- no need to try again with or without the knowledge of
+/// Hierarchical Namespace support.
+///
+/// Hierarchical Namespace support can't easily be changed after the storage account is
+/// created and the feature is shared by all containers in the storage account.
+/// This means the result of this check can (and should!) be cached as soon as
+/// it returns a successful result on any container of the storage account (see
+/// AzureFileSystem::Impl).
+///
+/// The check consists of a call to DataLakeFileSystemClient::GetAccessControlList()
+/// on the root directory of the container. An approach taken by the Hadoop Azure
+/// project [1]. A more obvious approach would be to call
+/// BlobServiceClient::GetAccountInfo(), but that endpoint requires elevated
+/// permissions [2] that we can't generally rely on.
+///
+/// [1]:
+/// https://github.com/apache/hadoop/blob/7c6af6a5f626d18d68b656d085cc23e4c1f7a1ef/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java#L356.
+/// [2]:
+/// https://learn.microsoft.com/en-us/rest/api/storageservices/get-blob-service-properties?tabs=azure-ad#authorization
+///
+/// IMPORTANT: If the result is kEnabled or kDisabled, it doesn't necessarily mean that
+/// the container exists.
+///
+/// \param adlfs_client A DataLakeFileSystemClient for a container of the storage
+/// account.
+/// \return kEnabled/kDisabled/kContainerNotFound (kUnknown is never
+/// returned).
+Result<HNSSupport> CheckIfHierarchicalNamespaceIsEnabled(
+    Azure::Storage::Files::DataLake::DataLakeFileSystemClient& adlfs_client,
+    const AzureOptions& options);
+
+}  // namespace internal
+
 /// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and
 /// Azure Data Lake Storage Gen2 (ADLS Gen2) [2].
 ///
diff --git a/cpp/src/arrow/filesystem/azurefs_internal.cc b/cpp/src/arrow/filesystem/azurefs_internal.cc
deleted file mode 100644
index 39c3fb23e3cfd..0000000000000
--- a/cpp/src/arrow/filesystem/azurefs_internal.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "arrow/filesystem/azurefs_internal.h"
-
-#include <azure/storage/files/datalake.hpp>
-
-#include "arrow/result.h"
-
-namespace arrow::fs::internal {
-
-namespace {
-
-// TODO(GH-38772): Remove azurefs_internal.h/.cc by moving the detector to
-// azurefs.cc (which contains a private copy of this helper function already).
-Status ExceptionToStatus(const std::string& prefix,
-                         const Azure::Storage::StorageException& exception) {
-  return Status::IOError(prefix, " Azure Error: ", exception.what());
-}
-
-}  // namespace
-
-Status HierarchicalNamespaceDetector::Init(
-    Azure::Storage::Files::DataLake::DataLakeServiceClient* datalake_service_client) {
-  datalake_service_client_ = datalake_service_client;
-  return Status::OK();
-}
-
-Result<bool> HierarchicalNamespaceDetector::Enabled(const std::string& container_name) {
-  // Hierarchical namespace can't easily be changed after the storage account is created
-  // and its common across all containers in the storage account. Do nothing until we've
-  // checked for a cached result.
-  if (enabled_.has_value()) {
-    return enabled_.value();
-  }
-
-  // This approach is inspired by hadoop-azure
-  // https://github.com/apache/hadoop/blob/7c6af6a5f626d18d68b656d085cc23e4c1f7a1ef/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java#L356.
-  // Unfortunately `blob_service_client->GetAccountInfo()` requires significantly
-  // elevated permissions.
-  // https://learn.microsoft.com/en-us/rest/api/storageservices/get-blob-service-properties?tabs=azure-ad#authorization
-  auto filesystem_client = datalake_service_client_->GetFileSystemClient(container_name);
-  auto directory_client = filesystem_client.GetDirectoryClient("/");
-  try {
-    directory_client.GetAccessControlList();
-    enabled_ = true;
-  } catch (const Azure::Storage::StorageException& exception) {
-    // GetAccessControlList will fail on storage accounts without hierarchical
-    // namespace enabled.
-
-    if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::BadRequest ||
-        exception.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict) {
-      // Flat namespace storage accounts with soft delete enabled return
-      // Conflict - This endpoint does not support BlobStorageEvents or SoftDelete
-      // otherwise it returns: BadRequest - This operation is only supported on a
-      // hierarchical namespace account.
-      enabled_ = false;
-    } else if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) {
-      // Azurite returns NotFound.
-      try {
-        filesystem_client.GetProperties();
-        enabled_ = false;
-      } catch (const Azure::Storage::StorageException& exception) {
-        return ExceptionToStatus("Failed to confirm '" + filesystem_client.GetUrl() +
-                                     "' is an accessible container. Therefore the "
-                                     "hierarchical namespace check was invalid.",
-                                 exception);
-      }
-    } else {
-      return ExceptionToStatus(
-          "GetAccessControlList for '" + directory_client.GetUrl() +
-              "' failed with an unexpected Azure error, while checking "
-              "whether the storage account has hierarchical namespace enabled.",
-          exception);
-    }
-  }
-  return enabled_.value();
-}
-
-}  // namespace arrow::fs::internal
diff --git a/cpp/src/arrow/filesystem/azurefs_internal.h b/cpp/src/arrow/filesystem/azurefs_internal.h
deleted file mode 100644
index 92592cf164f5a..0000000000000
--- a/cpp/src/arrow/filesystem/azurefs_internal.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <optional>
-
-#include <azure/storage/files/datalake.hpp>
-
-#include "arrow/result.h"
-
-namespace arrow::fs::internal {
-
-class HierarchicalNamespaceDetector {
- public:
-  Status Init(
-      Azure::Storage::Files::DataLake::DataLakeServiceClient* datalake_service_client);
-  Result<bool> Enabled(const std::string& container_name);
-
- private:
-  Azure::Storage::Files::DataLake::DataLakeServiceClient* datalake_service_client_;
-  std::optional<bool> enabled_;
-};
-
-}  // namespace arrow::fs::internal
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 8a39c4c554897..db0e133e0d453 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -34,7 +34,6 @@
 #include <boost/process.hpp>
 
 #include "arrow/filesystem/azurefs.h"
-#include "arrow/filesystem/azurefs_internal.h"
 
 #include <memory>
 #include <random>
@@ -520,7 +519,8 @@ class TestAzureFileSystem : public ::testing::Test {
 
   // Tests that are called from more than one implementation of TestAzureFileSystem
 
-  void TestDetectHierarchicalNamespace();
+  void TestDetectHierarchicalNamespace(bool trip_up_azurite);
+  void TestDetectHierarchicalNamespaceOnMissingContainer();
   void TestGetFileInfoObject();
   void TestGetFileInfoObjectWithNestedStructure();
 
@@ -610,14 +610,49 @@ class TestAzureFileSystem : public ::testing::Test {
   }
 };
 
-void TestAzureFileSystem::TestDetectHierarchicalNamespace() {
-  // Check the environments are implemented and injected here correctly.
-  auto expected = WithHierarchicalNamespace();
+void TestAzureFileSystem::TestDetectHierarchicalNamespace(bool trip_up_azurite) {
+  EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+  if (trip_up_azurite && env->backend() != AzureBackend::kAzurite) {
+    GTEST_SKIP() << "trip_up_azurite=true is only for Azurite.";
+  }
 
   auto data = SetUpPreexistingData();
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_OK_AND_EQ(expected, hierarchical_namespace.Enabled(data.container_name));
+  if (trip_up_azurite) {
+    // Azurite causes GetDirectoryClient("/") to throw a std::out_of_range
+    // exception when a "/" blob exists, so we exercise that code path.
+    auto container_client =
+        blob_service_client_->GetBlobContainerClient(data.container_name);
+    CreateBlob(container_client, "/");
+  }
+
+  auto adlfs_client = datalake_service_client_->GetFileSystemClient(data.container_name);
+  ASSERT_OK_AND_ASSIGN(auto hns_support, internal::CheckIfHierarchicalNamespaceIsEnabled(
+                                             adlfs_client, options_));
+  if (env->WithHierarchicalNamespace()) {
+    ASSERT_EQ(hns_support, internal::HNSSupport::kEnabled);
+  } else {
+    ASSERT_EQ(hns_support, internal::HNSSupport::kDisabled);
+  }
+}
+
+void TestAzureFileSystem::TestDetectHierarchicalNamespaceOnMissingContainer() {
+  auto container_name = PreexistingData::RandomContainerName(rng_);
+  auto adlfs_client = datalake_service_client_->GetFileSystemClient(container_name);
+  ASSERT_OK_AND_ASSIGN(auto hns_support, internal::CheckIfHierarchicalNamespaceIsEnabled(
+                                             adlfs_client, options_));
+  EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+  switch (env->backend()) {
+    case AzureBackend::kAzurite:
+      ASSERT_EQ(hns_support, internal::HNSSupport::kDisabled);
+      break;
+    case AzureBackend::kAzure:
+      if (env->WithHierarchicalNamespace()) {
+        ASSERT_EQ(hns_support, internal::HNSSupport::kContainerNotFound);
+      } else {
+        ASSERT_EQ(hns_support, internal::HNSSupport::kDisabled);
+      }
+      break;
+  }
 }
 
 void TestAzureFileSystem::TestGetFileInfoObject() {
@@ -733,7 +768,12 @@ using AllEnvironments =
 TYPED_TEST_SUITE(AzureFileSystemTestOnAllEnvs, AllEnvironments);
 
 TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespace) {
-  this->TestDetectHierarchicalNamespace();
+  this->TestDetectHierarchicalNamespace(true);
+  this->TestDetectHierarchicalNamespace(false);
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespaceOnMissingContainer) {
+  this->TestDetectHierarchicalNamespaceOnMissingContainer();
 }
 
 TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObject) {
@@ -817,12 +857,6 @@ TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsFailureNonexistent) {
 
 // Tests using Azurite (the local Azure emulator)
 
-TEST_F(TestAzuriteFileSystem, DetectHierarchicalNamespaceFailsWithMissingContainer) {
-  auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
-  ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
-  ASSERT_RAISES(IOError, hierarchical_namespace.Enabled("nonexistent-container"));
-}
-
 TEST_F(TestAzuriteFileSystem, GetFileInfoAccount) {
   AssertFileInfo(fs_.get(), "", FileType::Directory);
 

From 708b9733fc0797dabec75cbaa3d4564ffd483fef Mon Sep 17 00:00:00 2001
From: Bob Plotts <rcprcp@users.noreply.github.com>
Date: Wed, 20 Dec 2023 12:21:33 -0500
Subject: [PATCH 079/570] GH-39288: [Java][FlightSQL] Update Apache Avatica to
 version 1.24.0 (#39325)

Updated pom files, and updated several failing tests because UsernamePasswordCredentials() method has been removed from Avatica.
* Closes: #39288

Authored-by: Bob Plotts <bob.plotts@dremio.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/flight/flight-sql-jdbc-core/pom.xml      |  2 +-
 .../driver/jdbc/ConnectionMutualTlsTest.java  | 19 ++++++-------------
 .../jdbc/ConnectionTlsRootCertsTest.java      |  7 ++-----
 .../arrow/driver/jdbc/ConnectionTlsTest.java  | 13 ++++---------
 java/flight/flight-sql-jdbc-driver/pom.xml    |  2 +-
 5 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml
index 74a2f8d320f37..6c577954f8fc5 100644
--- a/java/flight/flight-sql-jdbc-core/pom.xml
+++ b/java/flight/flight-sql-jdbc-core/pom.xml
@@ -128,7 +128,7 @@
         <dependency>
             <groupId>org.apache.calcite.avatica</groupId>
             <artifactId>avatica</artifactId>
-            <version>1.18.0</version>
+            <version>1.24.0</version>
         </dependency>
 
         <dependency>
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionMutualTlsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionMutualTlsTest.java
index 927b3e426c6ba..cc44cc57be9b3 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionMutualTlsTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionMutualTlsTest.java
@@ -36,7 +36,6 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.util.AutoCloseables;
-import org.apache.calcite.avatica.org.apache.http.auth.UsernamePasswordCredentials;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -112,15 +111,13 @@ public void tearDown() throws Exception {
    */
   @Test
   public void testGetEncryptedClientAuthenticated() throws Exception {
-    final UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(
-        userTest, passTest);
 
     try (ArrowFlightSqlClientHandler client =
              new ArrowFlightSqlClientHandler.Builder()
                  .withHost(FLIGHT_SERVER_TEST_RULE.getHost())
                  .withPort(FLIGHT_SERVER_TEST_RULE.getPort())
-                 .withUsername(credentials.getUserName())
-                 .withPassword(credentials.getPassword())
+                 .withUsername(userTest)
+                 .withPassword(passTest)
                  .withTlsRootCertificates(tlsRootCertsPath)
                  .withClientCertificate(clientMTlsCertPath)
                  .withClientKey(clientMTlsKeyPath)
@@ -137,15 +134,13 @@ public void testGetEncryptedClientAuthenticated() throws Exception {
    */
   @Test
   public void testGetEncryptedClientWithBadMTlsCertPath() {
-    final UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(
-            userTest, passTest);
 
     assertThrows(SQLException.class, () -> {
       try (ArrowFlightSqlClientHandler handler = new ArrowFlightSqlClientHandler.Builder()
           .withHost(FLIGHT_SERVER_TEST_RULE.getHost())
           .withPort(FLIGHT_SERVER_TEST_RULE.getPort())
-          .withUsername(credentials.getUserName())
-          .withPassword(credentials.getPassword())
+          .withUsername(userTest)
+          .withPassword(passTest)
           .withTlsRootCertificates(tlsRootCertsPath)
           .withClientCertificate(badClientMTlsCertPath)
           .withClientKey(clientMTlsKeyPath)
@@ -163,15 +158,13 @@ public void testGetEncryptedClientWithBadMTlsCertPath() {
    */
   @Test
   public void testGetEncryptedClientWithBadMTlsKeyPath() {
-    final UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(
-            userTest, passTest);
 
     assertThrows(SQLException.class, () -> {
       try (ArrowFlightSqlClientHandler handler = new ArrowFlightSqlClientHandler.Builder()
           .withHost(FLIGHT_SERVER_TEST_RULE.getHost())
           .withPort(FLIGHT_SERVER_TEST_RULE.getPort())
-          .withUsername(credentials.getUserName())
-          .withPassword(credentials.getPassword())
+          .withUsername(userTest)
+          .withPassword(passTest)
           .withTlsRootCertificates(tlsRootCertsPath)
           .withClientCertificate(clientMTlsCertPath)
           .withClientKey(badClientMTlsKeyPath)
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsRootCertsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsRootCertsTest.java
index 5579cf0cf5f54..e5ffc2bcf79c8 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsRootCertsTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsRootCertsTest.java
@@ -35,7 +35,6 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.util.AutoCloseables;
-import org.apache.calcite.avatica.org.apache.http.auth.UsernamePasswordCredentials;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -95,15 +94,13 @@ public void tearDown() throws Exception {
    */
   @Test
   public void testGetEncryptedClientAuthenticated() throws Exception {
-    final UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(
-        userTest, passTest);
 
     try (ArrowFlightSqlClientHandler client =
              new ArrowFlightSqlClientHandler.Builder()
                  .withHost(FLIGHT_SERVER_TEST_RULE.getHost())
                  .withPort(FLIGHT_SERVER_TEST_RULE.getPort())
-                 .withUsername(credentials.getUserName())
-                 .withPassword(credentials.getPassword())
+                 .withUsername(userTest)
+                 .withPassword(passTest)
                  .withTlsRootCertificates(tlsRootCertsPath)
                  .withBufferAllocator(allocator)
                  .withEncryption(true)
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java
index 7e160f3f0c385..f5a7b68e06cd8 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionTlsTest.java
@@ -36,7 +36,6 @@
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.util.AutoCloseables;
 import org.apache.arrow.util.Preconditions;
-import org.apache.calcite.avatica.org.apache.http.auth.UsernamePasswordCredentials;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -96,15 +95,13 @@ public void tearDown() throws Exception {
    */
   @Test
   public void testGetEncryptedClientAuthenticatedWithDisableCertVerification() throws Exception {
-    final UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(
-        userTest, passTest);
 
     try (ArrowFlightSqlClientHandler client =
              new ArrowFlightSqlClientHandler.Builder()
                  .withHost(FLIGHT_SERVER_TEST_RULE.getHost())
                  .withPort(FLIGHT_SERVER_TEST_RULE.getPort())
-                 .withUsername(credentials.getUserName())
-                 .withPassword(credentials.getPassword())
+                 .withUsername(userTest)
+                 .withPassword(passTest)
                  .withDisableCertificateVerification(true)
                  .withBufferAllocator(allocator)
                  .withEncryption(true)
@@ -120,16 +117,14 @@ public void testGetEncryptedClientAuthenticatedWithDisableCertVerification() thr
    */
   @Test
   public void testGetEncryptedClientAuthenticated() throws Exception {
-    final UsernamePasswordCredentials credentials = new UsernamePasswordCredentials(
-        userTest, passTest);
 
     try (ArrowFlightSqlClientHandler client =
              new ArrowFlightSqlClientHandler.Builder()
                  .withHost(FLIGHT_SERVER_TEST_RULE.getHost())
                  .withPort(FLIGHT_SERVER_TEST_RULE.getPort())
                  .withSystemTrustStore(false)
-                 .withUsername(credentials.getUserName())
-                 .withPassword(credentials.getPassword())
+                 .withUsername(userTest)
+                 .withPassword(passTest)
                  .withTrustStorePath(trustStorePath)
                  .withTrustStorePassword(trustStorePass)
                  .withBufferAllocator(allocator)
diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml
index d4ef1b4ea3b9b..a8e55ea4089d5 100644
--- a/java/flight/flight-sql-jdbc-driver/pom.xml
+++ b/java/flight/flight-sql-jdbc-driver/pom.xml
@@ -114,7 +114,7 @@
         <dependency>
             <groupId>org.apache.calcite.avatica</groupId>
             <artifactId>avatica</artifactId>
-            <version>1.18.0</version>
+            <version>1.24.0</version>
             <scope>runtime</scope>
         </dependency>
 

From 91b2243e2753bb1a4ccd645dd41d74b1d0b077c0 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Wed, 20 Dec 2023 15:28:29 -0300
Subject: [PATCH 080/570] GH-39322: [C++] Forward arguments to
 ExceptionToStatus all the way to Status::FromArgs (#39323)

### Rationale for this change

This simplifies the creation of long error messages and leads to the use of a string builder to construct the error message.

### What changes are included in this PR?

 - std::forward in ExceptionToStatus
 - A few nitpicky changes
 - Simplification of the error message text
 - Moving the signature of `CheckIfHierarchicalNamespaceIsEnabled` to `azurefs_internal.h` to reduce the size of `azurefs.h` -- implementation remains in `azurefs.cc`

### Are these changes tested?

Yes. By existing tests.
* Closes: #39322

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc         | 195 +++++++++-----------
 cpp/src/arrow/filesystem/azurefs.h          |  48 -----
 cpp/src/arrow/filesystem/azurefs_internal.h |  78 ++++++++
 cpp/src/arrow/filesystem/azurefs_test.cc    |  13 +-
 4 files changed, 177 insertions(+), 157 deletions(-)
 create mode 100644 cpp/src/arrow/filesystem/azurefs_internal.h

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 032cd034e7abb..a9795e40a6ce8 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include "arrow/filesystem/azurefs.h"
+#include "arrow/filesystem/azurefs_internal.h"
 
 #include <azure/identity.hpp>
 #include <azure/storage/blobs.hpp>
@@ -41,7 +42,7 @@ namespace DataLake = Azure::Storage::Files::DataLake;
 namespace Http = Azure::Core::Http;
 namespace Storage = Azure::Storage;
 
-using internal::HNSSupport;
+using HNSSupport = internal::HierarchicalNamespaceSupport;
 
 // -----------------------------------------------------------------------
 // AzureOptions Implementation
@@ -217,9 +218,11 @@ struct AzureLocation {
   }
 };
 
-Status ExceptionToStatus(const std::string& prefix,
-                         const Azure::Storage::StorageException& exception) {
-  return Status::IOError(prefix, " Azure Error: ", exception.what());
+template <typename... PrefixArgs>
+Status ExceptionToStatus(const Storage::StorageException& exception,
+                         PrefixArgs&&... prefix_args) {
+  return Status::IOError(std::forward<PrefixArgs>(prefix_args)...,
+                         " Azure Error: ", exception.what());
 }
 
 Status PathNotFound(const AzureLocation& location) {
@@ -418,6 +421,15 @@ std::shared_ptr<const KeyValueMetadata> PropertiesToMetadata(
   return metadata;
 }
 
+Storage::Metadata ArrowMetadataToAzureMetadata(
+    const std::shared_ptr<const KeyValueMetadata>& arrow_metadata) {
+  Storage::Metadata azure_metadata;
+  for (auto key_value : arrow_metadata->sorted_pairs()) {
+    azure_metadata[key_value.first] = key_value.second;
+  }
+  return azure_metadata;
+}
+
 class ObjectInputFile final : public io::RandomAccessFile {
  public:
   ObjectInputFile(std::shared_ptr<Blobs::BlobClient> blob_client,
@@ -443,9 +455,8 @@ class ObjectInputFile final : public io::RandomAccessFile {
         return PathNotFound(location_);
       }
       return ExceptionToStatus(
-          "GetProperties failed for '" + blob_client_->GetUrl() +
-              "'. Cannot initialise an ObjectInputFile without knowing the file size.",
-          exception);
+          exception, "GetProperties failed for '", blob_client_->GetUrl(),
+          "'. Cannot initialise an ObjectInputFile without knowing the file size.");
     }
   }
 
@@ -523,10 +534,9 @@ class ObjectInputFile final : public io::RandomAccessFile {
           .Value.ContentRange.Length.Value();
     } catch (const Storage::StorageException& exception) {
       return ExceptionToStatus(
-          "DownloadTo from '" + blob_client_->GetUrl() + "' at position " +
-              std::to_string(position) + " for " + std::to_string(nbytes) +
-              " bytes failed. ReadAt failed to read the required byte range.",
-          exception);
+          exception, "DownloadTo from '", blob_client_->GetUrl(), "' at position ",
+          position, " for ", nbytes,
+          " bytes failed. ReadAt failed to read the required byte range.");
     }
   }
 
@@ -571,15 +581,14 @@ class ObjectInputFile final : public io::RandomAccessFile {
   std::shared_ptr<const KeyValueMetadata> metadata_;
 };
 
-Status CreateEmptyBlockBlob(std::shared_ptr<Blobs::BlockBlobClient> block_blob_client) {
+Status CreateEmptyBlockBlob(const Blobs::BlockBlobClient& block_blob_client) {
   try {
-    block_blob_client->UploadFrom(nullptr, 0);
+    block_blob_client.UploadFrom(nullptr, 0);
   } catch (const Storage::StorageException& exception) {
     return ExceptionToStatus(
-        "UploadFrom failed for '" + block_blob_client->GetUrl() +
-            "'. There is no existing blob at this location or the existing blob must be "
-            "replaced so ObjectAppendStream must create a new empty block blob.",
-        exception);
+        exception, "UploadFrom failed for '", block_blob_client.GetUrl(),
+        "'. There is no existing blob at this location or the existing blob must be "
+        "replaced so ObjectAppendStream must create a new empty block blob.");
   }
   return Status::OK();
 }
@@ -590,19 +599,9 @@ Result<Blobs::Models::GetBlockListResult> GetBlockList(
     return block_blob_client->GetBlockList().Value;
   } catch (Storage::StorageException& exception) {
     return ExceptionToStatus(
-        "GetBlockList failed for '" + block_blob_client->GetUrl() +
-            "'. Cannot write to a file without first fetching the existing block list.",
-        exception);
-  }
-}
-
-Storage::Metadata ArrowMetadataToAzureMetadata(
-    const std::shared_ptr<const KeyValueMetadata>& arrow_metadata) {
-  Storage::Metadata azure_metadata;
-  for (auto key_value : arrow_metadata->sorted_pairs()) {
-    azure_metadata[key_value.first] = key_value.second;
+        exception, "GetBlockList failed for '", block_blob_client->GetUrl(),
+        "'. Cannot write to a file without first fetching the existing block list.");
   }
-  return azure_metadata;
 }
 
 Status CommitBlockList(std::shared_ptr<Storage::Blobs::BlockBlobClient> block_blob_client,
@@ -618,9 +617,8 @@ Status CommitBlockList(std::shared_ptr<Storage::Blobs::BlockBlobClient> block_bl
     block_blob_client->CommitBlockList(block_ids, options);
   } catch (const Storage::StorageException& exception) {
     return ExceptionToStatus(
-        "CommitBlockList failed for '" + block_blob_client->GetUrl() +
-            "'. Committing is required to flush an output/append stream.",
-        exception);
+        exception, "CommitBlockList failed for '", block_blob_client->GetUrl(),
+        "'. Committing is required to flush an output/append stream.");
   }
   return Status::OK();
 }
@@ -659,13 +657,12 @@ class ObjectAppendStream final : public io::OutputStream {
         pos_ = content_length_;
       } catch (const Storage::StorageException& exception) {
         if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
-          RETURN_NOT_OK(CreateEmptyBlockBlob(block_blob_client_));
+          RETURN_NOT_OK(CreateEmptyBlockBlob(*block_blob_client_));
         } else {
           return ExceptionToStatus(
-              "GetProperties failed for '" + block_blob_client_->GetUrl() +
-                  "'. Cannot initialise an ObjectAppendStream without knowing whether a "
-                  "file already exists at this path, and if it exists, its size.",
-              exception);
+              exception, "GetProperties failed for '", block_blob_client_->GetUrl(),
+              "'. Cannot initialise an ObjectAppendStream without knowing whether a "
+              "file already exists at this path, and if it exists, its size.");
         }
         content_length_ = 0;
       }
@@ -760,10 +757,9 @@ class ObjectAppendStream final : public io::OutputStream {
       block_blob_client_->StageBlock(new_block_id, block_content);
     } catch (const Storage::StorageException& exception) {
       return ExceptionToStatus(
-          "StageBlock failed for '" + block_blob_client_->GetUrl() + "' new_block_id: '" +
-              new_block_id +
-              "'. Staging new blocks is fundamental to streaming writes to blob storage.",
-          exception);
+          exception, "StageBlock failed for '", block_blob_client_->GetUrl(),
+          "' new_block_id: '", new_block_id,
+          "'. Staging new blocks is fundamental to streaming writes to blob storage.");
     }
     block_ids_.push_back(new_block_id);
     pos_ += nbytes;
@@ -835,9 +831,9 @@ Result<HNSSupport> CheckIfHierarchicalNamespaceIsEnabled(
         if (exception.ErrorCode == "HierarchicalNamespaceNotEnabled") {
           return HNSSupport::kDisabled;
         }
-        return ExceptionToStatus("Check for Hierarchical Namespace support on '" +
-                                     adlfs_client.GetUrl() + "' failed.",
-                                 exception);
+        return ExceptionToStatus(exception,
+                                 "Check for Hierarchical Namespace support on '",
+                                 adlfs_client.GetUrl(), "' failed.");
     }
   }
 }
@@ -855,6 +851,8 @@ namespace {
 // "filesystem". Creating a container using the Blob Storage API will make it
 // accessible using the Data Lake Storage Gen 2 API and vice versa.
 
+const char kDelimiter[] = {internal::kSep, '\0'};
+
 template <class ContainerClient>
 Result<FileInfo> GetContainerPropsAsFileInfo(const std::string& container_name,
                                              ContainerClient& container_client) {
@@ -869,8 +867,8 @@ Result<FileInfo> GetContainerPropsAsFileInfo(const std::string& container_name,
       info.set_type(FileType::NotFound);
       return info;
     }
-    return ExceptionToStatus(
-        "GetProperties for '" + container_client.GetUrl() + "' failed.", exception);
+    return ExceptionToStatus(exception, "GetProperties for '", container_client.GetUrl(),
+                             "' failed.");
   }
 }
 
@@ -1011,16 +1009,14 @@ class AzureFileSystem::Impl {
           return info;
         } catch (const Storage::StorageException& exception) {
           return ExceptionToStatus(
-              "ListBlobs failed for prefix='" + *list_blob_options.Prefix +
-                  "' failed. GetFileInfo is unable to determine whether the path should "
-                  "be considered an implied directory.",
-              exception);
+              exception, "ListBlobs failed for prefix='", *list_blob_options.Prefix,
+              "' failed. GetFileInfo is unable to determine whether the path should "
+              "be considered an implied directory.");
         }
       }
       return ExceptionToStatus(
-          "GetProperties failed for '" + file_client.GetUrl() +
-              "' GetFileInfo is unable to determine whether the path exists.",
-          exception);
+          exception, "GetProperties failed for '", file_client.GetUrl(),
+          "' GetFileInfo is unable to determine whether the path exists.");
     }
   }
 
@@ -1038,7 +1034,7 @@ class AzureFileSystem::Impl {
         }
       }
     } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus("Failed to list account containers.", exception);
+      return ExceptionToStatus(exception, "Failed to list account containers.");
     }
     return Status::OK();
   }
@@ -1153,9 +1149,9 @@ class AzureFileSystem::Impl {
       if (IsContainerNotFound(exception)) {
         found = false;
       } else {
-        return ExceptionToStatus("Failed to list blobs in a directory: " +
-                                     select.base_dir + ": " + container_client.GetUrl(),
-                                 exception);
+        return ExceptionToStatus(exception,
+                                 "Failed to list blobs in a directory: ", select.base_dir,
+                                 ": ", container_client.GetUrl());
       }
     }
 
@@ -1241,7 +1237,7 @@ class AzureFileSystem::Impl {
 
   Status CreateDir(const AzureLocation& location) {
     if (location.container.empty()) {
-      return Status::Invalid("Cannot create an empty container");
+      return Status::Invalid("CreateDir requires a non-empty path.");
     }
 
     auto container_client =
@@ -1249,17 +1245,13 @@ class AzureFileSystem::Impl {
     if (location.path.empty()) {
       try {
         auto response = container_client.Create();
-        if (response.Value.Created) {
-          return Status::OK();
-        } else {
-          return StatusFromErrorResponse(
-              container_client.GetUrl(), *response.RawResponse,
-              "Failed to create a container: " + location.container);
-        }
+        return response.Value.Created
+                   ? Status::OK()
+                   : Status::AlreadyExists("Directory already exists: " + location.all);
       } catch (const Storage::StorageException& exception) {
-        return ExceptionToStatus("Failed to create a container: " + location.container +
-                                     ": " + container_client.GetUrl(),
-                                 exception);
+        return ExceptionToStatus(exception,
+                                 "Failed to create a container: ", location.container,
+                                 ": ", container_client.GetUrl());
       }
     }
 
@@ -1291,15 +1283,14 @@ class AzureFileSystem::Impl {
                                        "Failed to create a directory: " + location.path);
       }
     } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus("Failed to create a directory: " + location.path + ": " +
-                                   directory_client.GetUrl(),
-                               exception);
+      return ExceptionToStatus(exception, "Failed to create a directory: ", location.path,
+                               ": ", directory_client.GetUrl());
     }
   }
 
   Status CreateDirRecursive(const AzureLocation& location) {
     if (location.container.empty()) {
-      return Status::Invalid("Cannot create an empty container");
+      return Status::Invalid("CreateDir requires a non-empty path.");
     }
 
     auto container_client =
@@ -1307,9 +1298,9 @@ class AzureFileSystem::Impl {
     try {
       container_client.CreateIfNotExists();
     } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus("Failed to create a container: " + location.container +
-                                   " (" + container_client.GetUrl() + ")",
-                               exception);
+      return ExceptionToStatus(exception,
+                               "Failed to create a container: ", location.container, " (",
+                               container_client.GetUrl(), ")");
     }
 
     auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
@@ -1328,9 +1319,9 @@ class AzureFileSystem::Impl {
       try {
         directory_client.CreateIfNotExists();
       } catch (const Storage::StorageException& exception) {
-        return ExceptionToStatus("Failed to create a directory: " + location.path + " (" +
-                                     directory_client.GetUrl() + ")",
-                                 exception);
+        return ExceptionToStatus(exception,
+                                 "Failed to create a directory: ", location.path, " (",
+                                 directory_client.GetUrl(), ")");
       }
     }
 
@@ -1349,7 +1340,7 @@ class AzureFileSystem::Impl {
 
     std::shared_ptr<ObjectAppendStream> stream;
     if (truncate) {
-      RETURN_NOT_OK(CreateEmptyBlockBlob(block_blob_client));
+      RETURN_NOT_OK(CreateEmptyBlockBlob(*block_blob_client));
       stream = std::make_shared<ObjectAppendStream>(block_blob_client, fs->io_context(),
                                                     location, metadata, options_, 0);
     } else {
@@ -1393,9 +1384,8 @@ class AzureFileSystem::Impl {
         try {
           container_client.SubmitBatch(batch);
         } catch (const Storage::StorageException& exception) {
-          return ExceptionToStatus("Failed to delete blobs in a directory: " +
-                                       location.path + ": " + container_client.GetUrl(),
-                                   exception);
+          return ExceptionToStatus(exception, "Failed to delete blobs in a directory: ",
+                                   location.path, ": ", container_client.GetUrl());
         }
         std::vector<std::string> failed_blob_names;
         for (size_t i = 0; i < deferred_responses.size(); ++i) {
@@ -1424,9 +1414,9 @@ class AzureFileSystem::Impl {
         }
       }
     } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus("Failed to list blobs in a directory: " + location.path +
-                                   ": " + container_client.GetUrl(),
-                               exception);
+      return ExceptionToStatus(exception,
+                               "Failed to list blobs in a directory: ", location.path,
+                               ": ", container_client.GetUrl());
     }
     return Status::OK();
   }
@@ -1434,7 +1424,7 @@ class AzureFileSystem::Impl {
  public:
   Status DeleteDir(const AzureLocation& location) {
     if (location.container.empty()) {
-      return Status::Invalid("Cannot delete an empty container");
+      return Status::Invalid("DeleteDir requires a non-empty path.");
     }
 
     auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
@@ -1456,9 +1446,9 @@ class AzureFileSystem::Impl {
               "Failed to delete a container: " + location.container);
         }
       } catch (const Storage::StorageException& exception) {
-        return ExceptionToStatus("Failed to delete a container: " + location.container +
-                                     ": " + container_client.GetUrl(),
-                                 exception);
+        return ExceptionToStatus(exception,
+                                 "Failed to delete a container: ", location.container,
+                                 ": ", container_client.GetUrl());
       }
     }
 
@@ -1474,9 +1464,9 @@ class AzureFileSystem::Impl {
               "Failed to delete a directory: " + location.path);
         }
       } catch (const Storage::StorageException& exception) {
-        return ExceptionToStatus("Failed to delete a directory: " + location.path + ": " +
-                                     directory_client.GetUrl(),
-                                 exception);
+        return ExceptionToStatus(exception,
+                                 "Failed to delete a directory: ", location.path, ": ",
+                                 directory_client.GetUrl());
       }
     } else {
       return DeleteDirContentsWithoutHierarchicalNamespace(location,
@@ -1507,9 +1497,8 @@ class AzureFileSystem::Impl {
                 sub_directory_client.DeleteRecursive();
               } catch (const Storage::StorageException& exception) {
                 return ExceptionToStatus(
-                    "Failed to delete a sub directory: " + location.container +
-                        internal::kSep + path.Name + ": " + sub_directory_client.GetUrl(),
-                    exception);
+                    exception, "Failed to delete a sub directory: ", location.container,
+                    kDelimiter, path.Name, ": ", sub_directory_client.GetUrl());
               }
             } else {
               auto sub_file_client = adlfs_client.GetFileClient(path.Name);
@@ -1517,9 +1506,8 @@ class AzureFileSystem::Impl {
                 sub_file_client.Delete();
               } catch (const Storage::StorageException& exception) {
                 return ExceptionToStatus(
-                    "Failed to delete a sub file: " + location.container +
-                        internal::kSep + path.Name + ": " + sub_file_client.GetUrl(),
-                    exception);
+                    exception, "Failed to delete a sub file: ", location.container,
+                    kDelimiter, path.Name, ": ", sub_file_client.GetUrl());
               }
             }
           }
@@ -1528,9 +1516,9 @@ class AzureFileSystem::Impl {
         if (missing_dir_ok && exception.StatusCode == Http::HttpStatusCode::NotFound) {
           return Status::OK();
         } else {
-          return ExceptionToStatus("Failed to delete directory contents: " +
-                                       location.path + ": " + directory_client.GetUrl(),
-                                   exception);
+          return ExceptionToStatus(exception,
+                                   "Failed to delete directory contents: ", location.path,
+                                   ": ", directory_client.GetUrl());
         }
       }
       return Status::OK();
@@ -1553,9 +1541,8 @@ class AzureFileSystem::Impl {
     try {
       dest_blob_client.CopyFromUri(src_url);
     } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus(
-          "Failed to copy a blob. (" + src_url + " -> " + dest_blob_client.GetUrl() + ")",
-          exception);
+      return ExceptionToStatus(exception, "Failed to copy a blob. (", src_url, " -> ",
+                               dest_blob_client.GetUrl(), ")");
     }
     return Status::OK();
   }
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index b7ef2bb3130c2..0c41c42928121 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -118,54 +118,6 @@ struct ARROW_EXPORT AzureOptions {
   MakeDataLakeServiceClient() const;
 };
 
-namespace internal {
-
-enum class HNSSupport {
-  kUnknown = 0,
-  kContainerNotFound = 1,
-  kDisabled = 2,
-  kEnabled = 3,
-};
-
-/// \brief Performs a request to check if the storage account has Hierarchical
-/// Namespace support enabled.
-///
-/// This check requires a DataLakeFileSystemClient for any container of the
-/// storage account. If the container doesn't exist yet, we just forward that
-/// error to the caller (kContainerNotFound) since that's a proper error to the operation
-/// on that container anyways -- no need to try again with or without the knowledge of
-/// Hierarchical Namespace support.
-///
-/// Hierarchical Namespace support can't easily be changed after the storage account is
-/// created and the feature is shared by all containers in the storage account.
-/// This means the result of this check can (and should!) be cached as soon as
-/// it returns a successful result on any container of the storage account (see
-/// AzureFileSystem::Impl).
-///
-/// The check consists of a call to DataLakeFileSystemClient::GetAccessControlList()
-/// on the root directory of the container. An approach taken by the Hadoop Azure
-/// project [1]. A more obvious approach would be to call
-/// BlobServiceClient::GetAccountInfo(), but that endpoint requires elevated
-/// permissions [2] that we can't generally rely on.
-///
-/// [1]:
-/// https://github.com/apache/hadoop/blob/7c6af6a5f626d18d68b656d085cc23e4c1f7a1ef/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java#L356.
-/// [2]:
-/// https://learn.microsoft.com/en-us/rest/api/storageservices/get-blob-service-properties?tabs=azure-ad#authorization
-///
-/// IMPORTANT: If the result is kEnabled or kDisabled, it doesn't necessarily mean that
-/// the container exists.
-///
-/// \param adlfs_client A DataLakeFileSystemClient for a container of the storage
-/// account.
-/// \return kEnabled/kDisabled/kContainerNotFound (kUnknown is never
-/// returned).
-Result<HNSSupport> CheckIfHierarchicalNamespaceIsEnabled(
-    Azure::Storage::Files::DataLake::DataLakeFileSystemClient& adlfs_client,
-    const AzureOptions& options);
-
-}  // namespace internal
-
 /// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and
 /// Azure Data Lake Storage Gen2 (ADLS Gen2) [2].
 ///
diff --git a/cpp/src/arrow/filesystem/azurefs_internal.h b/cpp/src/arrow/filesystem/azurefs_internal.h
new file mode 100644
index 0000000000000..13d84c9b542b4
--- /dev/null
+++ b/cpp/src/arrow/filesystem/azurefs_internal.h
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/result.h"
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeFileSystemClient;
+class DataLakeServiceClient;
+}  // namespace Azure::Storage::Files::DataLake
+
+namespace arrow::fs {
+
+struct AzureOptions;
+
+namespace internal {
+
+enum class HierarchicalNamespaceSupport {
+  kUnknown = 0,
+  kContainerNotFound = 1,
+  kDisabled = 2,
+  kEnabled = 3,
+};
+
+/// \brief Performs a request to check if the storage account has Hierarchical
+/// Namespace support enabled.
+///
+/// This check requires a DataLakeFileSystemClient for any container of the
+/// storage account. If the container doesn't exist yet, we just forward that
+/// error to the caller (kContainerNotFound) since that's a proper error to the operation
+/// on that container anyways -- no need to try again with or without the knowledge of
+/// Hierarchical Namespace support.
+///
+/// Hierarchical Namespace support can't easily be changed after the storage account is
+/// created and the feature is shared by all containers in the storage account.
+/// This means the result of this check can (and should!) be cached as soon as
+/// it returns a successful result on any container of the storage account (see
+/// AzureFileSystem::Impl).
+///
+/// The check consists of a call to DataLakeFileSystemClient::GetAccessControlList()
+/// on the root directory of the container. An approach taken by the Hadoop Azure
+/// project [1]. A more obvious approach would be to call
+/// BlobServiceClient::GetAccountInfo(), but that endpoint requires elevated
+/// permissions [2] that we can't generally rely on.
+///
+/// [1]:
+/// https://github.com/apache/hadoop/blob/7c6af6a5f626d18d68b656d085cc23e4c1f7a1ef/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java#L356.
+/// [2]:
+/// https://learn.microsoft.com/en-us/rest/api/storageservices/get-blob-service-properties?tabs=azure-ad#authorization
+///
+/// IMPORTANT: If the result is kEnabled or kDisabled, it doesn't necessarily mean that
+/// the container exists.
+///
+/// \param adlfs_client A DataLakeFileSystemClient for a container of the storage
+/// account.
+/// \return kEnabled/kDisabled/kContainerNotFound (kUnknown is never
+/// returned).
+Result<HierarchicalNamespaceSupport> CheckIfHierarchicalNamespaceIsEnabled(
+    Azure::Storage::Files::DataLake::DataLakeFileSystemClient& adlfs_client,
+    const arrow::fs::AzureOptions& options);
+
+}  // namespace internal
+}  // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index db0e133e0d453..53e71f3658dd9 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -34,6 +34,7 @@
 #include <boost/process.hpp>
 
 #include "arrow/filesystem/azurefs.h"
+#include "arrow/filesystem/azurefs_internal.h"
 
 #include <memory>
 #include <random>
@@ -72,6 +73,8 @@ namespace Blobs = Azure::Storage::Blobs;
 namespace Core = Azure::Core;
 namespace DataLake = Azure::Storage::Files::DataLake;
 
+using HNSSupport = internal::HierarchicalNamespaceSupport;
+
 enum class AzureBackend {
   /// \brief Official Azure Remote Backend
   kAzure,
@@ -629,9 +632,9 @@ void TestAzureFileSystem::TestDetectHierarchicalNamespace(bool trip_up_azurite)
   ASSERT_OK_AND_ASSIGN(auto hns_support, internal::CheckIfHierarchicalNamespaceIsEnabled(
                                              adlfs_client, options_));
   if (env->WithHierarchicalNamespace()) {
-    ASSERT_EQ(hns_support, internal::HNSSupport::kEnabled);
+    ASSERT_EQ(hns_support, HNSSupport::kEnabled);
   } else {
-    ASSERT_EQ(hns_support, internal::HNSSupport::kDisabled);
+    ASSERT_EQ(hns_support, HNSSupport::kDisabled);
   }
 }
 
@@ -643,13 +646,13 @@ void TestAzureFileSystem::TestDetectHierarchicalNamespaceOnMissingContainer() {
   EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
   switch (env->backend()) {
     case AzureBackend::kAzurite:
-      ASSERT_EQ(hns_support, internal::HNSSupport::kDisabled);
+      ASSERT_EQ(hns_support, HNSSupport::kDisabled);
       break;
     case AzureBackend::kAzure:
       if (env->WithHierarchicalNamespace()) {
-        ASSERT_EQ(hns_support, internal::HNSSupport::kContainerNotFound);
+        ASSERT_EQ(hns_support, HNSSupport::kContainerNotFound);
       } else {
-        ASSERT_EQ(hns_support, internal::HNSSupport::kDisabled);
+        ASSERT_EQ(hns_support, HNSSupport::kDisabled);
       }
       break;
   }

From 37616a8da57e4d98c82e8213ba1999cff4354334 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Diego=20Fern=C3=A1ndez=20Giraldo?=
 <aiguo.fernandez@gmail.com>
Date: Wed, 20 Dec 2023 13:14:11 -0700
Subject: [PATCH 081/570] GH-39328: [Java] Make default getConsumer public
 (#39329)

### Rationale for this change

This can be useful for people implementing their own getConsumer

### What changes are included in this PR?

Make the default getConsumer public.

### Are these changes tested?

N/A

### Are there any user-facing changes?

Users can now call JdbcToArrowUtils.getConsumer

* Closes: #39328

Authored-by: Diego Fernandez <aiguo.fernandez@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../apache/arrow/adapter/jdbc/JdbcToArrowUtils.java | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index f8a13b93b1ed8..b66e133785f42 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -430,7 +430,18 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT
     }
   }
 
-  static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boolean nullable,
+  /**
+   * Default function used for JdbcConsumerFactory. This function gets a JdbcConsumer for the
+   * given column based on the Arrow type and provided vector.
+   *
+   * @param arrowType   Arrow type for the column.
+   * @param columnIndex Column index to fetch from the ResultSet
+   * @param nullable    Whether the value is nullable or not
+   * @param vector      Vector to store the consumed value
+   * @param config      Associated JdbcToArrowConfig, used mainly for the Calendar.
+   * @return {@link JdbcConsumer}
+   */
+  public static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boolean nullable,
       FieldVector vector, JdbcToArrowConfig config) {
     final Calendar calendar = config.getCalendar();
 

From 3c66491846a24f17014b31a22fafdda0229f881a Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Thu, 21 Dec 2023 00:29:47 +0000
Subject: [PATCH 082/570] GH-39318: [C++][FS][Azure] Add workload identity auth
 configuration (#39319)

### Rationale for this change
Workload identity is a useful Azure authentication method.

### What changes are included in this PR?
Implement `AzureOptions::ConfigureWorkloadIdentityCredential`

### Are these changes tested?
Added a simple test initialising a fileystem using `ConfigureWorkloadIdentityCredential`. This is not the most comprehensive test but its the same as what we agreed on for https://github.com/apache/arrow/pull/39263.

### Are there any user-facing changes?
Workload identity authentication is now supported.

* Closes: #39318

Authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 7 +++++++
 cpp/src/arrow/filesystem/azurefs.h       | 2 ++
 cpp/src/arrow/filesystem/azurefs_test.cc | 8 +++++++-
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index a9795e40a6ce8..d72ead92ed111 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -119,6 +119,13 @@ Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name)
   return Status::OK();
 }
 
+Status AzureOptions::ConfigureWorkloadIdentityCredential(
+    const std::string& account_name) {
+  credential_kind_ = CredentialKind::kTokenCredential;
+  token_credential_ = std::make_shared<Azure::Identity::WorkloadIdentityCredential>();
+  return Status::OK();
+}
+
 Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceClient()
     const {
   switch (credential_kind_) {
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 0c41c42928121..be3ca5ba238ae 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -103,6 +103,8 @@ struct ARROW_EXPORT AzureOptions {
 
   Status ConfigureDefaultCredential(const std::string& account_name);
 
+  Status ConfigureWorkloadIdentityCredential(const std::string& account_name);
+
   Status ConfigureAccountKeyCredential(const std::string& account_name,
                                        const std::string& account_key);
 
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 53e71f3658dd9..ecf7522b98eef 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -275,7 +275,13 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {
 TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
   AzureOptions options;
   ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
-  EXPECT_OK_AND_ASSIGN(auto default_credential_fs, AzureFileSystem::Make(options));
+  EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
+}
+
+TEST(AzureFileSystem, InitializeFilesystemWithWorkloadIdentityCredential) {
+  AzureOptions options;
+  ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential("dummy-account-name"));
+  EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
 TEST(AzureFileSystem, OptionsCompare) {

From 5df541de94b4cf76a8b9a1cad6155ae781ea55dc Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 21 Dec 2023 18:52:36 +0900
Subject: [PATCH 083/570] GH-39333: [C++] Don't use "if constexpr" in lambda
 (#39334)

### Rationale for this change

It seems that it's not portable. At least it doesn't work as expected with Visual Studio 2017:

    C:/arrow/cpp/src/arrow/array/array_nested.cc(291): error C2065: 'validity': undeclared identifier (compiling source file C:\arrow-build\src\arrow\CMakeFiles\arrow_shared.dir\Unity\unity_0_cxx.cxx) [C:\arrow-build\src\arrow\arrow_shared.vcxproj]
      C:/arrow/cpp/src/arrow/array/array_nested.cc(660): note: see reference to function template instantiation 'arrow::Result<std::shared_ptr<arrow::Array>> arrow::`anonymous-namespace'::FlattenListViewArray<arrow::ListViewArray,false>(const ListViewArrayT &,arrow::MemoryPool *)' being compiled
              with
              [
                  ListViewArrayT=arrow::ListViewArray
              ] (compiling source file C:\arrow-build\src\arrow\CMakeFiles\arrow_shared.dir\Unity\unity_0_cxx.cxx)
      memory_pool.cc
    C:/arrow/cpp/src/arrow/array/array_nested.cc(291): error C2065: 'list_view_array_offset': undeclared identifier (compiling source file C:\arrow-build\src\arrow\CMakeFiles\arrow_shared.dir\Unity\unity_0_cxx.cxx) [C:\arrow-build\src\arrow\arrow_shared.vcxproj]
### What changes are included in this PR?

Avoid "if constexpr" in lambda.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39333

Lead-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/array/array_nested.cc | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc
index 03f3e5af29908..acdd0a0742468 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -287,10 +287,8 @@ Result<std::shared_ptr<Array>> FlattenListViewArray(const ListViewArrayT& list_v
   const auto* sizes = list_view_array.data()->template GetValues<offset_type>(2);
 
   auto is_null_or_empty = [&](int64_t i) {
-    if constexpr (HasNulls) {
-      if (!bit_util::GetBit(validity, list_view_array_offset + i)) {
-        return true;
-      }
+    if (HasNulls && !bit_util::GetBit(validity, list_view_array_offset + i)) {
+      return true;
     }
     return sizes[i] == 0;
   };

From 2308cdfeaedf1af062c25d6edb2eeb1606fb105e Mon Sep 17 00:00:00 2001
From: "Rossi(Ruoxi) Sun" <zanmato1984@gmail.com>
Date: Thu, 21 Dec 2023 01:55:47 -0800
Subject: [PATCH 084/570] GH-15192: [C++] Bring back `case_when` tests for
 union types (#39308)

### Rationale for this change

Bring back the problematic test case of random `case_when` on union(bool, string) type. This case used to fail. However #36018 already addressed the issue.

More information about how it used to fail, please refer to https://github.com/apache/arrow/issues/15192#issuecomment-1862252174.

### What changes are included in this PR?

Bring back the test code.

### Are these changes tested?

Yes, the change is the test.

### Are there any user-facing changes?

No.

* Closes: #15192

Authored-by: zanmato <zanmato1984@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../compute/kernels/scalar_if_else_test.cc     | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index a11aab81742ed..771261cac9140 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -2485,16 +2485,14 @@ TEST(TestCaseWhen, UnionBoolString) {
   }
 }
 
-// FIXME(GH-15192): enabling this test produces test failures
-
-// TEST(TestCaseWhen, UnionBoolStringRandom) {
-//   for (const auto& type : std::vector<std::shared_ptr<DataType>>{
-//            sparse_union({field("a", boolean()), field("b", utf8())}, {2, 7}),
-//            dense_union({field("a", boolean()), field("b", utf8())}, {2, 7})}) {
-//     ARROW_SCOPED_TRACE(type->ToString());
-//     TestCaseWhenRandom(type);
-//   }
-// }
+TEST(TestCaseWhen, UnionBoolStringRandom) {
+  for (const auto& type : std::vector<std::shared_ptr<DataType>>{
+           sparse_union({field("a", boolean()), field("b", utf8())}, {2, 7}),
+           dense_union({field("a", boolean()), field("b", utf8())}, {2, 7})}) {
+    ARROW_SCOPED_TRACE(type->ToString());
+    TestCaseWhenRandom(type);
+  }
+}
 
 TEST(TestCaseWhen, DispatchBest) {
   CheckDispatchBest("case_when", {struct_({field("", boolean())}), int64(), int32()},

From 596259ee47b5c675b71432743d9bfd196efe08e3 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com>
Date: Thu, 21 Dec 2023 19:02:38 +0530
Subject: [PATCH 085/570] GH-38725: [Java] decompression in
 Lz4CompressionCodec.java does not set writer index (#38840)

### Rationale for this change

The `doDecompress` function in `Lz4CompressionCodec` misses writing the index when it is compared with the functionality in `ZstdCompressionCodec`. This PR fixes that issue.

### What changes are included in this PR?

Writes the index for the decompressed ArrowBuf.

### Are these changes tested?

No

### Are there any user-facing changes?

No
* Closes: #38725

Lead-authored-by: Vibhatha Lakmal Abeykoon <vibhatha@gmail.com>
Co-authored-by: vibhatha <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../org/apache/arrow/compression/Lz4CompressionCodec.java | 1 +
 .../apache/arrow/compression/TestCompressionCodec.java    | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java b/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
index daa35b7e15be6..e8b780638e2c1 100644
--- a/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
+++ b/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
@@ -79,6 +79,7 @@ protected ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBu
     byte[] outBytes = out.toByteArray();
     ArrowBuf decompressedBuffer = allocator.buffer(outBytes.length);
     decompressedBuffer.setBytes(/*index=*/0, outBytes);
+    decompressedBuffer.writerIndex(decompressedLength);
     return decompressedBuffer;
   }
 
diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
index 403130edba52e..01156fa2b0e0b 100644
--- a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
+++ b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
@@ -117,6 +117,12 @@ private List<ArrowBuf> deCompressBuffers(CompressionCodec codec, List<ArrowBuf>
     return outputBuffers;
   }
 
+  private void assertWriterIndex(List<ArrowBuf> decompressedBuffers) {
+    for (ArrowBuf decompressedBuf : decompressedBuffers) {
+      assertTrue(decompressedBuf.writerIndex() > 0);
+    }
+  }
+
   @ParameterizedTest
   @MethodSource("codecs")
   void testCompressFixedWidthBuffers(int vectorLength, CompressionCodec codec) throws Exception {
@@ -139,6 +145,7 @@ void testCompressFixedWidthBuffers(int vectorLength, CompressionCodec codec) thr
     List<ArrowBuf> decompressedBuffers = deCompressBuffers(codec, compressedBuffers);
 
     assertEquals(2, decompressedBuffers.size());
+    assertWriterIndex(decompressedBuffers);
 
     // orchestrate new vector
     IntVector newVec = new IntVector("new vec", allocator);
@@ -180,6 +187,7 @@ void testCompressVariableWidthBuffers(int vectorLength, CompressionCodec codec)
     List<ArrowBuf> decompressedBuffers = deCompressBuffers(codec, compressedBuffers);
 
     assertEquals(3, decompressedBuffers.size());
+    assertWriterIndex(decompressedBuffers);
 
     // orchestrate new vector
     VarCharVector newVec = new VarCharVector("new vec", allocator);

From 2f9f892a0075d990a1b42dc97a97d490b6b08345 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 21 Dec 2023 15:53:41 +0100
Subject: [PATCH 086/570] GH-39196: [Python][Docs] Document the Arrow PyCapsule
 protocol in the 'extending pyarrow' section of the Python docs (#39199)

### Rationale for this change

While the Arrow PyCapsule protocol itself is defined in the specification part of the docs, this PR adds a section about it in the Python user guide as well (referring to the specification for most details), where users might typically look for Python specific docs.
* Closes: #39196

Lead-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 .../CDataInterface/PyCapsuleInterface.rst     |  2 ++
 docs/source/python/extending_types.rst        | 32 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/docs/source/format/CDataInterface/PyCapsuleInterface.rst b/docs/source/format/CDataInterface/PyCapsuleInterface.rst
index 0c1a01d7c6778..03095aa2e9356 100644
--- a/docs/source/format/CDataInterface/PyCapsuleInterface.rst
+++ b/docs/source/format/CDataInterface/PyCapsuleInterface.rst
@@ -16,6 +16,8 @@
 .. under the License.
 
 
+.. _arrow-pycapsule-interface:
+
 =============================
 The Arrow PyCapsule Interface
 =============================
diff --git a/docs/source/python/extending_types.rst b/docs/source/python/extending_types.rst
index ee92cebcb549c..b7261005e66ee 100644
--- a/docs/source/python/extending_types.rst
+++ b/docs/source/python/extending_types.rst
@@ -21,6 +21,38 @@
 Extending pyarrow
 =================
 
+Controlling conversion to (Py)Arrow with the PyCapsule Interface
+----------------------------------------------------------------
+
+The :ref:`Arrow C data interface <c-data-interface>` allows moving Arrow data between
+different implementations of Arrow. This is a generic, cross-language interface not
+specific to Python, but for Python libraries this interface is extended with a Python
+specific layer: :ref:`arrow-pycapsule-interface`.
+
+This Python interface ensures that different libraries that support the C Data interface
+can export Arrow data structures in a standard way and recognize each other's objects.
+
+If you have a Python library providing data structures that hold Arrow-compatible data
+under the hood, you can implement the following methods on those objects:
+
+- ``__arrow_c_schema__`` for schema or type-like objects.
+- ``__arrow_c_array__`` for arrays and record batches (contiguous tables).
+- ``__arrow_c_stream__`` for chunked tables or streams of data.
+
+Those methods return `PyCapsule <https://docs.python.org/3/c-api/capsule.html>`__
+objects, and more details on the exact semantics can be found in the
+:ref:`specification <arrow-pycapsule-interface>`.
+
+When your data structures have those methods defined, the PyArrow constructors
+(such as :func:`pyarrow.array` or :func:`pyarrow.table`) will recognize those objects as
+supporting this protocol, and convert them to PyArrow data structures zero-copy. And the
+same can be true for any other library supporting this protocol on ingesting data.
+
+Similarly, if your library has functions that accept user-provided data, you can add
+support for this protocol by checking for the presence of those methods, and
+therefore accept any Arrow data (instead of harcoding support for a specific
+Arrow producer such as PyArrow).
+
 .. _arrow_array_protocol:
 
 Controlling conversion to pyarrow.Array with the ``__arrow_array__`` protocol

From 535b925bf073fb1af4e6e23ab54027f30dc8751f Mon Sep 17 00:00:00 2001
From: Jin Shang <shangjin1997@gmail.com>
Date: Fri, 22 Dec 2023 01:34:06 +0800
Subject: [PATCH 087/570] GH-39232: [C++] Support binary to fixed_size_binary
 cast (#39236)

### Rationale for this change
Add binary to fixed_size_binary cast.

### What changes are included in this PR?
Add binary to fixed_size_binary cast.

### Are these changes tested?
Yes

### Are there any user-facing changes?
No

* Closes: #39232

Authored-by: Jin Shang <shangjin1997@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../compute/kernels/scalar_cast_string.cc     | 61 ++++++++++++++++---
 .../arrow/compute/kernels/scalar_cast_test.cc | 16 +++++
 2 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index ebeb597207a81..a6576e4e4c26f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -20,11 +20,14 @@
 
 #include "arrow/array/array_base.h"
 #include "arrow/array/builder_binary.h"
+#include "arrow/compute/kernels/base_arithmetic_internal.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/scalar_cast_internal.h"
 #include "arrow/compute/kernels/temporal_internal.h"
 #include "arrow/result.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/formatting.h"
 #include "arrow/util/int_util.h"
 #include "arrow/util/utf8_internal.h"
@@ -284,9 +287,8 @@ Status CastBinaryToBinaryOffsets<int64_t, int32_t>(KernelContext* ctx,
 }
 
 template <typename O, typename I>
-enable_if_base_binary<I, Status> BinaryToBinaryCastExec(KernelContext* ctx,
-                                                        const ExecSpan& batch,
-                                                        ExecResult* out) {
+enable_if_t<is_base_binary_type<I>::value && !is_fixed_size_binary_type<O>::value, Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const ArraySpan& input = batch[0].array;
 
@@ -387,6 +389,33 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
   return ZeroCopyCastExec(ctx, batch, out);
 }
 
+template <typename O, typename I>
+enable_if_t<is_base_binary_type<I>::value && std::is_same<O, FixedSizeBinaryType>::value,
+            Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+  FixedSizeBinaryBuilder builder(options.to_type.GetSharedPtr(), ctx->memory_pool());
+  const ArraySpan& input = batch[0].array;
+  RETURN_NOT_OK(builder.Reserve(input.length));
+
+  RETURN_NOT_OK(VisitArraySpanInline<I>(
+      input,
+      [&](std::string_view v) {
+        if (v.size() != static_cast<size_t>(builder.byte_width())) {
+          return Status::Invalid("Failed casting from ", input.type->ToString(), " to ",
+                                 options.to_type.ToString(), ": widths must match");
+        }
+        builder.UnsafeAppend(v);
+        return Status::OK();
+      },
+      [&] {
+        builder.UnsafeAppendNull();
+        return Status::OK();
+      }));
+
+  return builder.FinishInternal(&std::get<std::shared_ptr<ArrayData>>(out->value));
+}
+
 #if defined(_MSC_VER)
 #pragma warning(pop)
 #endif
@@ -452,6 +481,26 @@ void AddBinaryToBinaryCast(CastFunction* func) {
   AddBinaryToBinaryCast<OutType, FixedSizeBinaryType>(func);
 }
 
+template <typename InType>
+void AddBinaryToFixedSizeBinaryCast(CastFunction* func) {
+  auto resolver_fsb = [](KernelContext* ctx, const std::vector<TypeHolder>&) {
+    const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+    return options.to_type;
+  };
+
+  DCHECK_OK(func->AddKernel(InType::type_id, {InputType(InType::type_id)}, resolver_fsb,
+                            BinaryToBinaryCastExec<FixedSizeBinaryType, InType>,
+                            NullHandling::COMPUTED_NO_PREALLOCATE));
+}
+
+void AddBinaryToFixedSizeBinaryCast(CastFunction* func) {
+  AddBinaryToFixedSizeBinaryCast<StringType>(func);
+  AddBinaryToFixedSizeBinaryCast<BinaryType>(func);
+  AddBinaryToFixedSizeBinaryCast<LargeStringType>(func);
+  AddBinaryToFixedSizeBinaryCast<LargeBinaryType>(func);
+  AddBinaryToFixedSizeBinaryCast<FixedSizeBinaryType>(func);
+}
+
 }  // namespace
 
 std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
@@ -483,11 +532,7 @@ std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
       std::make_shared<CastFunction>("cast_fixed_size_binary", Type::FIXED_SIZE_BINARY);
   AddCommonCasts(Type::FIXED_SIZE_BINARY, OutputType(ResolveOutputFromOptions),
                  cast_fsb.get());
-  DCHECK_OK(cast_fsb->AddKernel(
-      Type::FIXED_SIZE_BINARY, {InputType(Type::FIXED_SIZE_BINARY)},
-      OutputType(FirstType),
-      BinaryToBinaryCastExec<FixedSizeBinaryType, FixedSizeBinaryType>,
-      NullHandling::COMPUTED_NO_PREALLOCATE));
+  AddBinaryToFixedSizeBinaryCast(cast_fsb.get());
 
   return {cast_binary, cast_large_binary, cast_string, cast_large_string, cast_fsb};
 }
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index c84125bbdd19e..b429c8175b020 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -2171,6 +2171,22 @@ TEST(Cast, StringToString) {
   }
 }
 
+TEST(Cast, BinaryOrStringToFixedSizeBinary) {
+  for (auto in_type : {utf8(), large_utf8(), binary(), large_binary()}) {
+    auto valid_input = ArrayFromJSON(in_type, R"(["foo", null, "bar", "baz", "quu"])");
+    auto invalid_input = ArrayFromJSON(in_type, R"(["foo", null, "bar", "baz", "quux"])");
+
+    CheckCast(valid_input, ArrayFromJSON(fixed_size_binary(3), R"(["foo", null, "bar",
+          "baz", "quu"])"));
+    CheckCastFails(invalid_input, CastOptions::Safe(fixed_size_binary(3)));
+    CheckCastFails(valid_input, CastOptions::Safe(fixed_size_binary(5)));
+
+    auto empty_input = ArrayFromJSON(in_type, "[]");
+    CheckCast(empty_input, ArrayFromJSON(fixed_size_binary(3), "[]"));
+    CheckCast(empty_input, ArrayFromJSON(fixed_size_binary(5), "[]"));
+  }
+}
+
 TEST(Cast, IntToString) {
   for (auto string_type : {utf8(), large_utf8()}) {
     CheckCast(ArrayFromJSON(int8(), "[0, 1, 127, -128, null]"),

From e5145bff901778360f6faba3be27efa3d9522976 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Thu, 21 Dec 2023 15:00:22 -0300
Subject: [PATCH 088/570] GH-39339:  [C++] Add
 ForceCachedHierarchicalNamespaceSupport to help with testing (#39340)

### Rationale for this change

This ensures all the branches in the `AzureFileSystem` code operations are tested.
For instance, many operations executed on a missing container, wouldn't
get a `HNSSupport::kContainerNotFound` error if the cached `HNSSupport` was
already known due to a previous operation that cached the `HNSSupport` value.

### What changes are included in this PR?

Introduction of the helper that overrides `cached_hns_support_` and enumeration of the scenarios.

### Are these changes tested?

Yes. This is a test improvement PR.

* Closes: #39339

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      |  36 +-
 cpp/src/arrow/filesystem/azurefs.h       |   5 +
 cpp/src/arrow/filesystem/azurefs_test.cc | 453 +++++++++++++----------
 3 files changed, 291 insertions(+), 203 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index d72ead92ed111..27bdb5092a3ea 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -941,14 +941,38 @@ class AzureFileSystem::Impl {
         break;
     }
     ARROW_ASSIGN_OR_RAISE(
-        cached_hns_support_,
+        auto hns_support,
         internal::CheckIfHierarchicalNamespaceIsEnabled(adlfs_client, options_));
-    DCHECK_NE(cached_hns_support_, HNSSupport::kUnknown);
-    // Caller should handle kContainerNotFound case appropriately.
-    return cached_hns_support_;
+    DCHECK_NE(hns_support, HNSSupport::kUnknown);
+    if (hns_support == HNSSupport::kContainerNotFound) {
+      // Caller should handle kContainerNotFound case appropriately as it knows the
+      // container this refers to, but the cached value in that case should remain
+      // kUnknown before we get a CheckIfHierarchicalNamespaceIsEnabled result that
+      // is not kContainerNotFound.
+      cached_hns_support_ = HNSSupport::kUnknown;
+    } else {
+      cached_hns_support_ = hns_support;
+    }
+    return hns_support;
   }
 
  public:
+  /// This is used from unit tests to ensure we perform operations on all the
+  /// possible states of cached_hns_support_.
+  void ForceCachedHierarchicalNamespaceSupport(int support) {
+    auto hns_support = static_cast<HNSSupport>(support);
+    switch (hns_support) {
+      case HNSSupport::kUnknown:
+      case HNSSupport::kContainerNotFound:
+      case HNSSupport::kDisabled:
+      case HNSSupport::kEnabled:
+        cached_hns_support_ = hns_support;
+        return;
+    }
+    // This is reachable if an invalid int is cast to enum class HNSSupport.
+    DCHECK(false) << "Invalid enum HierarchicalNamespaceSupport value.";
+  }
+
   Result<FileInfo> GetFileInfo(const AzureLocation& location) {
     if (location.container.empty()) {
       DCHECK(location.path.empty());
@@ -1560,6 +1584,10 @@ AzureFileSystem::AzureFileSystem(std::unique_ptr<Impl>&& impl)
   default_async_is_sync_ = false;
 }
 
+void AzureFileSystem::ForceCachedHierarchicalNamespaceSupport(int hns_support) {
+  impl_->ForceCachedHierarchicalNamespaceSupport(hns_support);
+}
+
 Result<std::shared_ptr<AzureFileSystem>> AzureFileSystem::Make(
     const AzureOptions& options, const io::IOContext& io_context) {
   ARROW_ASSIGN_OR_RAISE(auto impl, AzureFileSystem::Impl::Make(options, io_context));
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index be3ca5ba238ae..69f6295237043 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -44,6 +44,8 @@ class DataLakeServiceClient;
 
 namespace arrow::fs {
 
+class TestAzureFileSystem;
+
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
   /// \brief hostname[:port] of the Azure Blob Storage Service.
@@ -156,6 +158,9 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {
 
   explicit AzureFileSystem(std::unique_ptr<Impl>&& impl);
 
+  friend class TestAzureFileSystem;
+  void ForceCachedHierarchicalNamespaceSupport(int hns_support);
+
  public:
   ~AzureFileSystem() override = default;
 
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index ecf7522b98eef..3266c1bfda2dc 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -62,7 +62,6 @@ namespace arrow {
 using internal::TemporaryDir;
 namespace fs {
 using internal::ConcatAbstractPath;
-namespace {
 namespace bp = boost::process;
 
 using ::testing::IsEmpty;
@@ -354,7 +353,7 @@ class TestAzureFileSystem : public ::testing::Test {
   bool set_up_succeeded_ = false;
   AzureOptions options_;
 
-  std::shared_ptr<FileSystem> fs_;
+  std::shared_ptr<AzureFileSystem> fs_dont_use_directly_;  // use fs()
   std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
   std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
 
@@ -362,6 +361,18 @@ class TestAzureFileSystem : public ::testing::Test {
   TestAzureFileSystem() : rng_(std::random_device()()) {}
 
   virtual Result<BaseAzureEnv*> GetAzureEnv() const = 0;
+  virtual HNSSupport CachedHNSSupport(const BaseAzureEnv& env) const = 0;
+
+  FileSystem* fs(HNSSupport cached_hns_support) const {
+    auto* fs_ptr = fs_dont_use_directly_.get();
+    fs_ptr->ForceCachedHierarchicalNamespaceSupport(static_cast<int>(cached_hns_support));
+    return fs_ptr;
+  }
+
+  FileSystem* fs() const {
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    return fs(CachedHNSSupport(*env));
+  }
 
   static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
     AzureOptions options;
@@ -395,7 +406,7 @@ class TestAzureFileSystem : public ::testing::Test {
       EXPECT_OK_AND_ASSIGN(options_, options_res);
     }
 
-    ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_));
+    ASSERT_OK_AND_ASSIGN(fs_dont_use_directly_, AzureFileSystem::Make(options_));
     EXPECT_OK_AND_ASSIGN(blob_service_client_, options_.MakeBlobServiceClient());
     EXPECT_OK_AND_ASSIGN(datalake_service_client_, options_.MakeDataLakeServiceClient());
     set_up_succeeded_ = true;
@@ -435,7 +446,7 @@ class TestAzureFileSystem : public ::testing::Test {
 
   void UploadLines(const std::vector<std::string>& lines, const std::string& path,
                    int total_size) {
-    ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
+    ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
     const auto all_lines = std::accumulate(lines.begin(), lines.end(), std::string(""));
     ASSERT_OK(output->Write(all_lines));
     ASSERT_OK(output->Close());
@@ -461,19 +472,19 @@ class TestAzureFileSystem : public ::testing::Test {
     const auto sub_directory_path = ConcatAbstractPath(directory_path, "new-sub");
     const auto sub_blob_path = ConcatAbstractPath(sub_directory_path, "sub.txt");
     const auto top_blob_path = ConcatAbstractPath(directory_path, "top.txt");
-    ASSERT_OK(fs_->CreateDir(sub_directory_path, true));
-    ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path));
+    ASSERT_OK(fs()->CreateDir(sub_directory_path, true));
+    ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(sub_blob_path));
     ASSERT_OK(output->Write(std::string_view("sub")));
     ASSERT_OK(output->Close());
-    ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(top_blob_path));
+    ASSERT_OK_AND_ASSIGN(output, fs()->OpenOutputStream(top_blob_path));
     ASSERT_OK(output->Write(std::string_view("top")));
     ASSERT_OK(output->Close());
 
-    AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
-    AssertFileInfo(fs_.get(), directory_path, FileType::Directory);
-    AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory);
-    AssertFileInfo(fs_.get(), sub_blob_path, FileType::File);
-    AssertFileInfo(fs_.get(), top_blob_path, FileType::File);
+    AssertFileInfo(fs(), data.container_name, FileType::Directory);
+    AssertFileInfo(fs(), directory_path, FileType::Directory);
+    AssertFileInfo(fs(), sub_directory_path, FileType::Directory);
+    AssertFileInfo(fs(), sub_blob_path, FileType::File);
+    AssertFileInfo(fs(), top_blob_path, FileType::File);
 
     paths->container = data.container_name;
     paths->directory = directory_path;
@@ -538,52 +549,52 @@ class TestAzureFileSystem : public ::testing::Test {
     const auto directory_path = data.RandomDirectoryPath(rng_);
 
     if (WithHierarchicalNamespace()) {
-      ASSERT_OK(fs_->CreateDir(directory_path, true));
-      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory);
-      ASSERT_OK(fs_->DeleteDir(directory_path));
-      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+      ASSERT_OK(fs()->CreateDir(directory_path, true));
+      AssertFileInfo(fs(), directory_path, FileType::Directory);
+      ASSERT_OK(fs()->DeleteDir(directory_path));
+      AssertFileInfo(fs(), directory_path, FileType::NotFound);
     } else {
       // There is only virtual directory without hierarchical namespace
       // support. So the CreateDir() and DeleteDir() do nothing.
-      ASSERT_OK(fs_->CreateDir(directory_path));
-      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
-      ASSERT_OK(fs_->DeleteDir(directory_path));
-      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+      ASSERT_OK(fs()->CreateDir(directory_path));
+      AssertFileInfo(fs(), directory_path, FileType::NotFound);
+      ASSERT_OK(fs()->DeleteDir(directory_path));
+      AssertFileInfo(fs(), directory_path, FileType::NotFound);
     }
   }
 
   void TestCreateDirSuccessContainerAndDirectory() {
     auto data = SetUpPreexistingData();
     const auto path = data.RandomDirectoryPath(rng_);
-    ASSERT_OK(fs_->CreateDir(path, false));
+    ASSERT_OK(fs()->CreateDir(path, false));
     if (WithHierarchicalNamespace()) {
-      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+      AssertFileInfo(fs(), path, FileType::Directory);
     } else {
       // There is only virtual directory without hierarchical namespace
       // support. So the CreateDir() does nothing.
-      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+      AssertFileInfo(fs(), path, FileType::NotFound);
     }
   }
 
   void TestCreateDirRecursiveSuccessContainerOnly() {
     auto container_name = PreexistingData::RandomContainerName(rng_);
-    ASSERT_OK(fs_->CreateDir(container_name, true));
-    arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
+    ASSERT_OK(fs()->CreateDir(container_name, true));
+    AssertFileInfo(fs(), container_name, FileType::Directory);
   }
 
   void TestCreateDirRecursiveSuccessDirectoryOnly() {
     auto data = SetUpPreexistingData();
     const auto parent = data.RandomDirectoryPath(rng_);
     const auto path = ConcatAbstractPath(parent, "new-sub");
-    ASSERT_OK(fs_->CreateDir(path, true));
+    ASSERT_OK(fs()->CreateDir(path, true));
     if (WithHierarchicalNamespace()) {
-      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
-      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+      AssertFileInfo(fs(), path, FileType::Directory);
+      AssertFileInfo(fs(), parent, FileType::Directory);
     } else {
       // There is only virtual directory without hierarchical namespace
       // support. So the CreateDir() does nothing.
-      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
-      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+      AssertFileInfo(fs(), path, FileType::NotFound);
+      AssertFileInfo(fs(), parent, FileType::NotFound);
     }
   }
 
@@ -591,31 +602,31 @@ class TestAzureFileSystem : public ::testing::Test {
     auto data = SetUpPreexistingData();
     const auto parent = data.RandomDirectoryPath(rng_);
     const auto path = ConcatAbstractPath(parent, "new-sub");
-    ASSERT_OK(fs_->CreateDir(path, true));
+    ASSERT_OK(fs()->CreateDir(path, true));
     if (WithHierarchicalNamespace()) {
-      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
-      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
-      arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
+      AssertFileInfo(fs(), path, FileType::Directory);
+      AssertFileInfo(fs(), parent, FileType::Directory);
+      AssertFileInfo(fs(), data.container_name, FileType::Directory);
     } else {
       // There is only virtual directory without hierarchical namespace
       // support. So the CreateDir() does nothing.
-      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
-      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
-      arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
+      AssertFileInfo(fs(), path, FileType::NotFound);
+      AssertFileInfo(fs(), parent, FileType::NotFound);
+      AssertFileInfo(fs(), data.container_name, FileType::Directory);
     }
   }
 
   void TestDeleteDirContentsSuccessNonexistent() {
     auto data = SetUpPreexistingData();
     const auto directory_path = data.RandomDirectoryPath(rng_);
-    ASSERT_OK(fs_->DeleteDirContents(directory_path, true));
-    arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+    ASSERT_OK(fs()->DeleteDirContents(directory_path, true));
+    AssertFileInfo(fs(), directory_path, FileType::NotFound);
   }
 
   void TestDeleteDirContentsFailureNonexistent() {
     auto data = SetUpPreexistingData();
     const auto directory_path = data.RandomDirectoryPath(rng_);
-    ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false));
+    ASSERT_RAISES(IOError, fs()->DeleteDirContents(directory_path, false));
   }
 };
 
@@ -672,12 +683,12 @@ void TestAzureFileSystem::TestGetFileInfoObject() {
           .GetProperties()
           .Value;
 
-  AssertFileInfo(fs_.get(), data.ObjectPath(), FileType::File,
+  AssertFileInfo(fs(), data.ObjectPath(), FileType::File,
                  std::chrono::system_clock::time_point{object_properties.LastModified},
                  static_cast<int64_t>(object_properties.BlobSize));
 
   // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + std::string{data.kObjectName}));
+  ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + std::string{data.kObjectName}));
 }
 
 void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() {
@@ -685,37 +696,37 @@ void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() {
   // Adds detailed tests to handle cases of different edge cases
   // with directory naming conventions (e.g. with and without slashes).
   const std::string kObjectName = "test-object-dir/some_other_dir/another_dir/foo";
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(data.ContainerPath(kObjectName),
-                                                          /*metadata=*/{}));
+  ASSERT_OK_AND_ASSIGN(auto output,
+                       fs()->OpenOutputStream(data.ContainerPath(kObjectName),
+                                              /*metadata=*/{}));
   const std::string_view lorem_ipsum(PreexistingData::kLoremIpsum);
   ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
 
   // 0 is immediately after "/" lexicographically, ensure that this doesn't
   // cause unexpected issues.
-  ASSERT_OK_AND_ASSIGN(
-      output, fs_->OpenOutputStream(data.ContainerPath("test-object-dir/some_other_dir0"),
-                                    /*metadata=*/{}));
+  ASSERT_OK_AND_ASSIGN(output, fs()->OpenOutputStream(
+                                   data.ContainerPath("test-object-dir/some_other_dir0"),
+                                   /*metadata=*/{}));
   ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
   ASSERT_OK_AND_ASSIGN(output,
-                       fs_->OpenOutputStream(data.ContainerPath(kObjectName + "0"),
-                                             /*metadata=*/{}));
+                       fs()->OpenOutputStream(data.ContainerPath(kObjectName + "0"),
+                                              /*metadata=*/{}));
   ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
 
-  AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName), FileType::File);
-  AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName) + "/", FileType::NotFound);
-  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir"), FileType::Directory);
-  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir") + "/",
-                 FileType::Directory);
-  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_dir"),
+  AssertFileInfo(fs(), data.ContainerPath(kObjectName), FileType::File);
+  AssertFileInfo(fs(), data.ContainerPath(kObjectName) + "/", FileType::NotFound);
+  AssertFileInfo(fs(), data.ContainerPath("test-object-dir"), FileType::Directory);
+  AssertFileInfo(fs(), data.ContainerPath("test-object-dir") + "/", FileType::Directory);
+  AssertFileInfo(fs(), data.ContainerPath("test-object-dir/some_other_dir"),
                  FileType::Directory);
-  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_dir") + "/",
+  AssertFileInfo(fs(), data.ContainerPath("test-object-dir/some_other_dir") + "/",
                  FileType::Directory);
 
-  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-di"), FileType::NotFound);
-  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_di"),
+  AssertFileInfo(fs(), data.ContainerPath("test-object-di"), FileType::NotFound);
+  AssertFileInfo(fs(), data.ContainerPath("test-object-dir/some_other_di"),
                  FileType::NotFound);
 
   if (WithHierarchicalNamespace()) {
@@ -723,17 +734,45 @@ void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() {
         .GetDirectoryClient("test-empty-object-dir")
         .Create();
 
-    AssertFileInfo(fs_.get(), data.ContainerPath("test-empty-object-dir"),
+    AssertFileInfo(fs(), data.ContainerPath("test-empty-object-dir"),
                    FileType::Directory);
   }
 }
 
-template <class AzureEnvClass>
+template <class AzureEnv, bool HNSSupportShouldBeKnown = false>
+struct TestingScenario {
+  using AzureEnvClass = AzureEnv;
+  static constexpr bool kHNSSupportShouldBeKnown = HNSSupportShouldBeKnown;
+};
+
+template <class TestingScenario>
 class AzureFileSystemTestImpl : public TestAzureFileSystem {
  public:
+  using AzureEnvClass = typename TestingScenario::AzureEnvClass;
+
   using TestAzureFileSystem::TestAzureFileSystem;
 
   Result<BaseAzureEnv*> GetAzureEnv() const final { return AzureEnvClass::GetInstance(); }
+
+  /// \brief HNSSupport value that should be assumed as the cached
+  /// HNSSupport on every fs()->Operation(...) call in tests.
+  ///
+  /// If TestingScenario::kHNSSupportShouldBeKnown is true, this value
+  /// will be HNSSupport::kEnabled or HNSSupport::kDisabled, depending
+  /// on the environment. Otherwise, this value will be HNSSupport::kUnknown.
+  ///
+  /// This ensures all the branches in the AzureFileSystem code operations are tested.
+  /// For instance, many operations executed on a missing container, wouldn't
+  /// get a HNSSupport::kContainerNotFound error if the cached HNSSupport was
+  /// already known due to a previous operation that cached the HNSSupport value.
+  HNSSupport CachedHNSSupport(const BaseAzureEnv& env) const final {
+    if constexpr (TestingScenario::kHNSSupportShouldBeKnown) {
+      return env.WithHierarchicalNamespace() ? HNSSupport::kEnabled
+                                             : HNSSupport::kDisabled;
+    } else {
+      return HNSSupport::kUnknown;
+    }
+  }
 };
 
 // How to enable the non-Azurite tests:
@@ -762,54 +801,71 @@ class AzureFileSystemTestImpl : public TestAzureFileSystem {
 // [1]: https://azure.microsoft.com/en-gb/free/
 // [2]:
 // https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
-using TestAzureFlatNSFileSystem = AzureFileSystemTestImpl<AzureFlatNSEnv>;
-using TestAzureHierarchicalNSFileSystem = AzureFileSystemTestImpl<AzureHierarchicalNSEnv>;
-using TestAzuriteFileSystem = AzureFileSystemTestImpl<AzuriteEnv>;
+using TestAzureFlatNSFileSystem =
+    AzureFileSystemTestImpl<TestingScenario<AzureFlatNSEnv>>;
+using TestAzureHierarchicalNSFileSystem =
+    AzureFileSystemTestImpl<TestingScenario<AzureHierarchicalNSEnv>>;
+using TestAzuriteFileSystem = AzureFileSystemTestImpl<TestingScenario<AzuriteEnv>>;
 
-// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS)
-
-template <class AzureEnvClass>
-using AzureFileSystemTestOnAllEnvs = AzureFileSystemTestImpl<AzureEnvClass>;
+// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS).
+template <class TestingScenario>
+using TestAzureFileSystemOnAllEnvs = AzureFileSystemTestImpl<TestingScenario>;
 
 using AllEnvironments =
-    ::testing::Types<AzuriteEnv, AzureFlatNSEnv, AzureHierarchicalNSEnv>;
+    ::testing::Types<TestingScenario<AzuriteEnv>, TestingScenario<AzureFlatNSEnv>,
+                     TestingScenario<AzureHierarchicalNSEnv>>;
 
-TYPED_TEST_SUITE(AzureFileSystemTestOnAllEnvs, AllEnvironments);
+TYPED_TEST_SUITE(TestAzureFileSystemOnAllEnvs, AllEnvironments);
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespace) {
+TYPED_TEST(TestAzureFileSystemOnAllEnvs, DetectHierarchicalNamespace) {
   this->TestDetectHierarchicalNamespace(true);
   this->TestDetectHierarchicalNamespace(false);
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespaceOnMissingContainer) {
+TYPED_TEST(TestAzureFileSystemOnAllEnvs, DetectHierarchicalNamespaceOnMissingContainer) {
   this->TestDetectHierarchicalNamespaceOnMissingContainer();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObject) {
+// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS)
+// combined with the two scenarios for AzureFileSystem::cached_hns_support_ -- unknown and
+// known according to the environment.
+template <class TestingScenario>
+using TestAzureFileSystemOnAllScenarios = AzureFileSystemTestImpl<TestingScenario>;
+
+using AllScenarios = ::testing::Types<
+    TestingScenario<AzuriteEnv, true>, TestingScenario<AzuriteEnv, false>,
+    TestingScenario<AzureFlatNSEnv, true>, TestingScenario<AzureFlatNSEnv, false>,
+    TestingScenario<AzureHierarchicalNSEnv, true>,
+    TestingScenario<AzureHierarchicalNSEnv, false>>;
+
+TYPED_TEST_SUITE(TestAzureFileSystemOnAllScenarios, AllScenarios);
+
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, GetFileInfoObject) {
   this->TestGetFileInfoObject();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, DeleteDirSuccessEmpty) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessEmpty) {
   this->TestDeleteDirSuccessEmpty();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObjectWithNestedStructure) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, GetFileInfoObjectWithNestedStructure) {
   this->TestGetFileInfoObjectWithNestedStructure();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirSuccessContainerAndDirectory) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirSuccessContainerAndDirectory) {
   this->TestCreateDirSuccessContainerAndDirectory();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerOnly) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirRecursiveSuccessContainerOnly) {
   this->TestCreateDirRecursiveSuccessContainerOnly();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessDirectoryOnly) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirRecursiveSuccessDirectoryOnly) {
   this->TestCreateDirRecursiveSuccessDirectoryOnly();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerAndDirectory) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios,
+           CreateDirRecursiveSuccessContainerAndDirectory) {
   this->TestCreateDirRecursiveSuccessContainerAndDirectory();
 }
 
@@ -818,41 +874,41 @@ TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerAndDi
 TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirFailureNonexistent) {
   auto data = SetUpPreexistingData();
   const auto path = data.RandomDirectoryPath(rng_);
-  ASSERT_RAISES(IOError, fs_->DeleteDir(path));
+  ASSERT_RAISES(IOError, fs()->DeleteDir(path));
 }
 
 TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveBlob) {
   auto data = SetUpPreexistingData();
   const auto directory_path = data.RandomDirectoryPath(rng_);
   const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
+  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(blob_path));
   ASSERT_OK(output->Write(std::string_view("hello")));
   ASSERT_OK(output->Close());
-  arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File);
-  ASSERT_OK(fs_->DeleteDir(directory_path));
-  arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
+  AssertFileInfo(fs(), blob_path, FileType::File);
+  ASSERT_OK(fs()->DeleteDir(directory_path));
+  AssertFileInfo(fs(), blob_path, FileType::NotFound);
 }
 
 TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveDirectory) {
   auto data = SetUpPreexistingData();
   const auto parent = data.RandomDirectoryPath(rng_);
   const auto path = ConcatAbstractPath(parent, "new-sub");
-  ASSERT_OK(fs_->CreateDir(path, true));
-  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
-  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
-  ASSERT_OK(fs_->DeleteDir(parent));
-  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
-  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+  ASSERT_OK(fs()->CreateDir(path, true));
+  AssertFileInfo(fs(), path, FileType::Directory);
+  AssertFileInfo(fs(), parent, FileType::Directory);
+  ASSERT_OK(fs()->DeleteDir(parent));
+  AssertFileInfo(fs(), path, FileType::NotFound);
+  AssertFileInfo(fs(), parent, FileType::NotFound);
 }
 
 TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessExist) {
   auto preexisting_data = SetUpPreexistingData();
   HierarchicalPaths paths;
   CreateHierarchicalData(&paths);
-  ASSERT_OK(fs_->DeleteDirContents(paths.directory));
-  arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory);
+  ASSERT_OK(fs()->DeleteDirContents(paths.directory));
+  AssertFileInfo(fs(), paths.directory, FileType::Directory);
   for (const auto& sub_path : paths.sub_paths) {
-    arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
+    AssertFileInfo(fs(), sub_path, FileType::NotFound);
   }
 }
 
@@ -867,20 +923,20 @@ TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsFailureNonexistent) {
 // Tests using Azurite (the local Azure emulator)
 
 TEST_F(TestAzuriteFileSystem, GetFileInfoAccount) {
-  AssertFileInfo(fs_.get(), "", FileType::Directory);
+  AssertFileInfo(fs(), "", FileType::Directory);
 
   // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://"));
+  ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://"));
 }
 
 TEST_F(TestAzuriteFileSystem, GetFileInfoContainer) {
   auto data = SetUpPreexistingData();
-  AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
+  AssertFileInfo(fs(), data.container_name, FileType::Directory);
 
-  AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound);
+  AssertFileInfo(fs(), "nonexistent-container", FileType::NotFound);
 
   // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + data.container_name));
+  ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + data.container_name));
 }
 
 TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) {
@@ -891,7 +947,7 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) {
 
   // Root dir
   select.base_dir = "";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 2);
   ASSERT_EQ(infos, SortedInfos(infos));
   AssertFileInfo(infos[0], "container", FileType::Directory);
@@ -899,18 +955,18 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) {
 
   // Empty container
   select.base_dir = "empty-container";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
   // Nonexistent container
   select.base_dir = "nonexistent-container";
-  ASSERT_RAISES(IOError, fs_->GetFileInfo(select));
+  ASSERT_RAISES(IOError, fs()->GetFileInfo(select));
   select.allow_not_found = true;
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
   select.allow_not_found = false;
   // Non-empty container
   select.base_dir = "container";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos, SortedInfos(infos));
   ASSERT_EQ(infos.size(), 4);
   AssertFileInfo(infos[0], "container/emptydir", FileType::Directory);
@@ -920,33 +976,33 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) {
 
   // Empty "directory"
   select.base_dir = "container/emptydir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
   // Non-empty "directories"
   select.base_dir = "container/somedir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 1);
   AssertFileInfo(infos[0], "container/somedir/subdir", FileType::Directory);
   select.base_dir = "container/somedir/subdir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 1);
   AssertFileInfo(infos[0], "container/somedir/subdir/subfile", FileType::File, 8);
   // Nonexistent
   select.base_dir = "container/nonexistent";
-  ASSERT_RAISES(IOError, fs_->GetFileInfo(select));
+  ASSERT_RAISES(IOError, fs()->GetFileInfo(select));
   select.allow_not_found = true;
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
   select.allow_not_found = false;
 
   // Trailing slashes
   select.base_dir = "empty-container/";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
   select.base_dir = "nonexistent-container/";
-  ASSERT_RAISES(IOError, fs_->GetFileInfo(select));
+  ASSERT_RAISES(IOError, fs()->GetFileInfo(select));
   select.base_dir = "container/";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos, SortedInfos(infos));
   ASSERT_EQ(infos.size(), 4);
 }
@@ -960,19 +1016,19 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorRecursive) {
   std::vector<FileInfo> infos;
   // Root dir
   select.base_dir = "";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 12);
   ASSERT_EQ(infos, SortedInfos(infos));
   AssertInfoAllContainersRecursive(infos);
 
   // Empty container
   select.base_dir = "empty-container";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
 
   // Non-empty container
   select.base_dir = "container";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos, SortedInfos(infos));
   ASSERT_EQ(infos.size(), 10);
   AssertFileInfo(infos[0], "container/emptydir", FileType::Directory);
@@ -988,19 +1044,19 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorRecursive) {
 
   // Empty "directory"
   select.base_dir = "container/emptydir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
 
   // Non-empty "directories"
   select.base_dir = "container/somedir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos, SortedInfos(infos));
   ASSERT_EQ(infos.size(), 2);
   AssertFileInfo(infos[0], "container/somedir/subdir", FileType::Directory);
   AssertFileInfo(infos[1], "container/somedir/subdir/subfile", FileType::File, 8);
 
   select.base_dir = "container/otherdir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos, SortedInfos(infos));
   ASSERT_EQ(infos.size(), 4);
   AssertFileInfo(infos[0], "container/otherdir/1", FileType::Directory);
@@ -1023,13 +1079,13 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorExplicitImplicitDirDedup) {
   FileSelector select;  // non-recursive
   select.base_dir = "container";
 
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 1);
   ASSERT_EQ(infos, SortedInfos(infos));
   AssertFileInfo(infos[0], "container/mydir", FileType::Directory);
 
   select.base_dir = "container/mydir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 4);
   ASSERT_EQ(infos, SortedInfos(infos));
   AssertFileInfo(infos[0], "container/mydir/emptydir1", FileType::Directory);
@@ -1038,55 +1094,55 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorExplicitImplicitDirDedup) {
   AssertFileInfo(infos[3], "container/mydir/nonemptydir2", FileType::Directory);
 
   select.base_dir = "container/mydir/emptydir1";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
 
   select.base_dir = "container/mydir/emptydir2";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
 
   select.base_dir = "container/mydir/nonemptydir1";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 1);
   AssertFileInfo(infos[0], "container/mydir/nonemptydir1/somefile", FileType::File);
 
   select.base_dir = "container/mydir/nonemptydir2";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 1);
   AssertFileInfo(infos[0], "container/mydir/nonemptydir2/somefile", FileType::File);
 }
 
 TEST_F(TestAzuriteFileSystem, CreateDirFailureNoContainer) {
-  ASSERT_RAISES(Invalid, fs_->CreateDir("", false));
+  ASSERT_RAISES(Invalid, fs()->CreateDir("", false));
 }
 
 TEST_F(TestAzuriteFileSystem, CreateDirSuccessContainerOnly) {
   auto container_name = PreexistingData::RandomContainerName(rng_);
-  ASSERT_OK(fs_->CreateDir(container_name, false));
-  arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
+  ASSERT_OK(fs()->CreateDir(container_name, false));
+  AssertFileInfo(fs(), container_name, FileType::Directory);
 }
 
 TEST_F(TestAzuriteFileSystem, CreateDirFailureDirectoryWithMissingContainer) {
   const auto path = std::string("not-a-container/new-directory");
-  ASSERT_RAISES(IOError, fs_->CreateDir(path, false));
+  ASSERT_RAISES(IOError, fs()->CreateDir(path, false));
 }
 
 TEST_F(TestAzuriteFileSystem, CreateDirRecursiveFailureNoContainer) {
-  ASSERT_RAISES(Invalid, fs_->CreateDir("", true));
+  ASSERT_RAISES(Invalid, fs()->CreateDir("", true));
 }
 
 TEST_F(TestAzuriteFileSystem, CreateDirUri) {
   ASSERT_RAISES(
       Invalid,
-      fs_->CreateDir("abfs://" + PreexistingData::RandomContainerName(rng_), true));
+      fs()->CreateDir("abfs://" + PreexistingData::RandomContainerName(rng_), true));
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirSuccessContainer) {
   const auto container_name = PreexistingData::RandomContainerName(rng_);
-  ASSERT_OK(fs_->CreateDir(container_name));
-  arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
-  ASSERT_OK(fs_->DeleteDir(container_name));
-  arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::NotFound);
+  ASSERT_OK(fs()->CreateDir(container_name));
+  AssertFileInfo(fs(), container_name, FileType::Directory);
+  ASSERT_OK(fs()->DeleteDir(container_name));
+  AssertFileInfo(fs(), container_name, FileType::NotFound);
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirSuccessNonexistent) {
@@ -1094,8 +1150,8 @@ TEST_F(TestAzuriteFileSystem, DeleteDirSuccessNonexistent) {
   const auto directory_path = data.RandomDirectoryPath(rng_);
   // There is only virtual directory without hierarchical namespace
   // support. So the DeleteDir() for nonexistent directory does nothing.
-  ASSERT_OK(fs_->DeleteDir(directory_path));
-  arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+  ASSERT_OK(fs()->DeleteDir(directory_path));
+  AssertFileInfo(fs(), directory_path, FileType::NotFound);
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirSuccessHaveBlobs) {
@@ -1110,21 +1166,21 @@ TEST_F(TestAzuriteFileSystem, DeleteDirSuccessHaveBlobs) {
   int64_t n_blobs = 257;
   for (int64_t i = 0; i < n_blobs; ++i) {
     const auto blob_path = ConcatAbstractPath(directory_path, std::to_string(i) + ".txt");
-    ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
+    ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(blob_path));
     ASSERT_OK(output->Write(std::string_view(std::to_string(i))));
     ASSERT_OK(output->Close());
-    arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File);
+    AssertFileInfo(fs(), blob_path, FileType::File);
   }
-  ASSERT_OK(fs_->DeleteDir(directory_path));
+  ASSERT_OK(fs()->DeleteDir(directory_path));
   for (int64_t i = 0; i < n_blobs; ++i) {
     const auto blob_path = ConcatAbstractPath(directory_path, std::to_string(i) + ".txt");
-    arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
+    AssertFileInfo(fs(), blob_path, FileType::NotFound);
   }
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirUri) {
   auto data = SetUpPreexistingData();
-  ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + data.container_name + "/"));
+  ASSERT_RAISES(Invalid, fs()->DeleteDir("abfs://" + data.container_name + "/"));
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) {
@@ -1135,11 +1191,11 @@ TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) {
   auto data = SetUpPreexistingData();
   HierarchicalPaths paths;
   CreateHierarchicalData(&paths);
-  ASSERT_OK(fs_->DeleteDirContents(paths.container));
-  arrow::fs::AssertFileInfo(fs_.get(), paths.container, FileType::Directory);
-  arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound);
+  ASSERT_OK(fs()->DeleteDirContents(paths.container));
+  AssertFileInfo(fs(), paths.container, FileType::Directory);
+  AssertFileInfo(fs(), paths.directory, FileType::NotFound);
   for (const auto& sub_path : paths.sub_paths) {
-    arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
+    AssertFileInfo(fs(), sub_path, FileType::NotFound);
   }
 }
 
@@ -1151,11 +1207,11 @@ TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessDirectory) {
   auto data = SetUpPreexistingData();
   HierarchicalPaths paths;
   CreateHierarchicalData(&paths);
-  ASSERT_OK(fs_->DeleteDirContents(paths.directory));
+  ASSERT_OK(fs()->DeleteDirContents(paths.directory));
   // GH-38772: We may change this to FileType::Directory.
-  arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound);
+  AssertFileInfo(fs(), paths.directory, FileType::NotFound);
   for (const auto& sub_path : paths.sub_paths) {
-    arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
+    AssertFileInfo(fs(), sub_path, FileType::NotFound);
   }
 }
 
@@ -1170,52 +1226,52 @@ TEST_F(TestAzuriteFileSystem, DeleteDirContentsFailureNonexistent) {
 TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationNonexistent) {
   auto data = SetUpPreexistingData();
   const auto destination_path = data.ContainerPath("copy-destionation");
-  ASSERT_OK(fs_->CopyFile(data.ObjectPath(), destination_path));
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(destination_path));
-  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info));
+  ASSERT_OK(fs()->CopyFile(data.ObjectPath(), destination_path));
+  ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(destination_path));
+  ASSERT_OK_AND_ASSIGN(auto stream, fs()->OpenInputStream(info));
   ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
   EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString());
 }
 
 TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationSame) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK(fs_->CopyFile(data.ObjectPath(), data.ObjectPath()));
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath()));
-  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info));
+  ASSERT_OK(fs()->CopyFile(data.ObjectPath(), data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto stream, fs()->OpenInputStream(info));
   ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
   EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString());
 }
 
 TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationTrailingSlash) {
   auto data = SetUpPreexistingData();
-  ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(),
-                                       internal::EnsureTrailingSlash(data.ObjectPath())));
+  ASSERT_RAISES(IOError, fs()->CopyFile(data.ObjectPath(), internal::EnsureTrailingSlash(
+                                                               data.ObjectPath())));
 }
 
 TEST_F(TestAzuriteFileSystem, CopyFileFailureSourceNonexistent) {
   auto data = SetUpPreexistingData();
   const auto destination_path = data.ContainerPath("copy-destionation");
-  ASSERT_RAISES(IOError, fs_->CopyFile(data.NotFoundObjectPath(), destination_path));
+  ASSERT_RAISES(IOError, fs()->CopyFile(data.NotFoundObjectPath(), destination_path));
 }
 
 TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationParentNonexistent) {
   auto data = SetUpPreexistingData();
   const auto destination_path =
       ConcatAbstractPath(PreexistingData::RandomContainerName(rng_), "copy-destionation");
-  ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), destination_path));
+  ASSERT_RAISES(IOError, fs()->CopyFile(data.ObjectPath(), destination_path));
 }
 
 TEST_F(TestAzuriteFileSystem, CopyFileUri) {
   auto data = SetUpPreexistingData();
   const auto destination_path = data.ContainerPath("copy-destionation");
-  ASSERT_RAISES(Invalid, fs_->CopyFile("abfs://" + data.ObjectPath(), destination_path));
-  ASSERT_RAISES(Invalid, fs_->CopyFile(data.ObjectPath(), "abfs://" + destination_path));
+  ASSERT_RAISES(Invalid, fs()->CopyFile("abfs://" + data.ObjectPath(), destination_path));
+  ASSERT_RAISES(Invalid, fs()->CopyFile(data.ObjectPath(), "abfs://" + destination_path));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamString) {
   auto data = SetUpPreexistingData();
   std::shared_ptr<io::InputStream> stream;
-  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(stream, fs()->OpenInputStream(data.ObjectPath()));
 
   ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
   EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum);
@@ -1224,7 +1280,7 @@ TEST_F(TestAzuriteFileSystem, OpenInputStreamString) {
 TEST_F(TestAzuriteFileSystem, OpenInputStreamStringBuffers) {
   auto data = SetUpPreexistingData();
   std::shared_ptr<io::InputStream> stream;
-  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(stream, fs()->OpenInputStream(data.ObjectPath()));
 
   std::string contents;
   std::shared_ptr<Buffer> buffer;
@@ -1238,10 +1294,10 @@ TEST_F(TestAzuriteFileSystem, OpenInputStreamStringBuffers) {
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamInfo) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(data.ObjectPath()));
 
   std::shared_ptr<io::InputStream> stream;
-  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(info));
+  ASSERT_OK_AND_ASSIGN(stream, fs()->OpenInputStream(info));
 
   ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
   EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum);
@@ -1255,7 +1311,7 @@ TEST_F(TestAzuriteFileSystem, OpenInputStreamEmpty) {
       .GetBlockBlobClient(path_to_file)
       .UploadFrom(nullptr, 0);
 
-  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(auto stream, fs()->OpenInputStream(path));
   std::array<char, 1024> buffer{};
   std::int64_t size;
   ASSERT_OK_AND_ASSIGN(size, stream->Read(buffer.size(), buffer.data()));
@@ -1264,26 +1320,26 @@ TEST_F(TestAzuriteFileSystem, OpenInputStreamEmpty) {
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamNotFound) {
   auto data = SetUpPreexistingData();
-  ASSERT_RAISES(IOError, fs_->OpenInputStream(data.NotFoundObjectPath()));
+  ASSERT_RAISES(IOError, fs()->OpenInputStream(data.NotFoundObjectPath()));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamInfoInvalid) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name + "/"));
-  ASSERT_RAISES(IOError, fs_->OpenInputStream(info));
+  ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(data.container_name + "/"));
+  ASSERT_RAISES(IOError, fs()->OpenInputStream(info));
 
-  ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath()));
-  ASSERT_RAISES(IOError, fs_->OpenInputStream(info2));
+  ASSERT_OK_AND_ASSIGN(auto info2, fs()->GetFileInfo(data.NotFoundObjectPath()));
+  ASSERT_RAISES(IOError, fs()->OpenInputStream(info2));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamUri) {
   auto data = SetUpPreexistingData();
-  ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + data.ObjectPath()));
+  ASSERT_RAISES(Invalid, fs()->OpenInputStream("abfs://" + data.ObjectPath()));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamTrailingSlash) {
   auto data = SetUpPreexistingData();
-  ASSERT_RAISES(IOError, fs_->OpenInputStream(data.ObjectPath() + '/'));
+  ASSERT_RAISES(IOError, fs()->OpenInputStream(data.ObjectPath() + '/'));
 }
 
 namespace {
@@ -1324,7 +1380,7 @@ std::shared_ptr<const KeyValueMetadata> NormalizerKeyValueMetadata(
 TEST_F(TestAzuriteFileSystem, OpenInputStreamReadMetadata) {
   auto data = SetUpPreexistingData();
   std::shared_ptr<io::InputStream> stream;
-  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(stream, fs()->OpenInputStream(data.ObjectPath()));
 
   std::shared_ptr<const KeyValueMetadata> actual;
   ASSERT_OK_AND_ASSIGN(actual, stream->ReadMetadata());
@@ -1354,7 +1410,7 @@ TEST_F(TestAzuriteFileSystem, OpenInputStreamReadMetadata) {
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamClosed) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto stream, fs()->OpenInputStream(data.ObjectPath()));
   ASSERT_OK(stream->Close());
   std::array<char, 16> buffer{};
   ASSERT_RAISES(Invalid, stream->Read(buffer.size(), buffer.data()));
@@ -1399,13 +1455,13 @@ TEST_F(TestAzuriteFileSystem, WriteMetadata) {
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmall) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("test-write-object");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
   const std::string_view expected(PreexistingData::kLoremIpsum);
   ASSERT_OK(output->Write(expected));
   ASSERT_OK(output->Close());
 
   // Verify we can read the object back.
-  ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(auto input, fs()->OpenInputStream(path));
 
   std::array<char, 1024> inbuf{};
   ASSERT_OK_AND_ASSIGN(auto size, input->Read(inbuf.size(), inbuf.data()));
@@ -1416,7 +1472,7 @@ TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmall) {
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("test-write-object");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
   std::array<std::int64_t, 3> sizes{257 * 1024, 258 * 1024, 259 * 1024};
   std::array<std::string, 3> buffers{
       std::string(sizes[0], 'A'),
@@ -1432,7 +1488,7 @@ TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) {
   ASSERT_OK(output->Close());
 
   // Verify we can read the object back.
-  ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(auto input, fs()->OpenInputStream(path));
 
   std::string contents;
   std::shared_ptr<Buffer> buffer;
@@ -1448,26 +1504,26 @@ TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) {
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamTruncatesExistingFile) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("test-write-object");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
   const std::string_view expected0("Existing blob content");
   ASSERT_OK(output->Write(expected0));
   ASSERT_OK(output->Close());
 
   // Check that the initial content has been written - if not this test is not achieving
   // what it's meant to.
-  ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(auto input, fs()->OpenInputStream(path));
 
   std::array<char, 1024> inbuf{};
   ASSERT_OK_AND_ASSIGN(auto size, input->Read(inbuf.size(), inbuf.data()));
   EXPECT_EQ(expected0, std::string_view(inbuf.data(), size));
 
-  ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(output, fs()->OpenOutputStream(path, {}));
   const std::string_view expected1(PreexistingData::kLoremIpsum);
   ASSERT_OK(output->Write(expected1));
   ASSERT_OK(output->Close());
 
   // Verify that the initial content has been overwritten.
-  ASSERT_OK_AND_ASSIGN(input, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(input, fs()->OpenInputStream(path));
   ASSERT_OK_AND_ASSIGN(size, input->Read(inbuf.size(), inbuf.data()));
   EXPECT_EQ(expected1, std::string_view(inbuf.data(), size));
 }
@@ -1475,27 +1531,27 @@ TEST_F(TestAzuriteFileSystem, OpenOutputStreamTruncatesExistingFile) {
 TEST_F(TestAzuriteFileSystem, OpenAppendStreamDoesNotTruncateExistingFile) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("test-write-object");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
   const std::string_view expected0("Existing blob content");
   ASSERT_OK(output->Write(expected0));
   ASSERT_OK(output->Close());
 
   // Check that the initial content has been written - if not this test is not achieving
   // what it's meant to.
-  ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(auto input, fs()->OpenInputStream(path));
 
   std::array<char, 1024> inbuf{};
   ASSERT_OK_AND_ASSIGN(auto size, input->Read(inbuf.size(), inbuf.data()));
   EXPECT_EQ(expected0, std::string_view(inbuf.data()));
 
-  ASSERT_OK_AND_ASSIGN(output, fs_->OpenAppendStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(output, fs()->OpenAppendStream(path, {}));
   const std::string_view expected1(PreexistingData::kLoremIpsum);
   ASSERT_OK(output->Write(expected1));
   ASSERT_OK(output->Close());
 
   // Verify that the initial content has not been overwritten and that the block from
   // the other client was not committed.
-  ASSERT_OK_AND_ASSIGN(input, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(input, fs()->OpenInputStream(path));
   ASSERT_OK_AND_ASSIGN(size, input->Read(inbuf.size(), inbuf.data()));
   EXPECT_EQ(std::string(inbuf.data(), size),
             std::string(expected0) + std::string(expected1));
@@ -1504,7 +1560,7 @@ TEST_F(TestAzuriteFileSystem, OpenAppendStreamDoesNotTruncateExistingFile) {
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamClosed) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("open-output-stream-closed.txt");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
   ASSERT_OK(output->Close());
   ASSERT_RAISES(Invalid, output->Write(PreexistingData::kLoremIpsum,
                                        std::strlen(PreexistingData::kLoremIpsum)));
@@ -1515,7 +1571,7 @@ TEST_F(TestAzuriteFileSystem, OpenOutputStreamClosed) {
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamUri) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("open-output-stream-uri.txt");
-  ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + path));
+  ASSERT_RAISES(Invalid, fs()->OpenInputStream("abfs://" + path));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputFileMixedReadVsReadAt) {
@@ -1534,7 +1590,7 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileMixedReadVsReadAt) {
   UploadLines(lines, path, kLineCount * kLineWidth);
 
   std::shared_ptr<io::RandomAccessFile> file;
-  ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path));
+  ASSERT_OK_AND_ASSIGN(file, fs()->OpenInputFile(path));
   for (int i = 0; i != 32; ++i) {
     SCOPED_TRACE("Iteration " + std::to_string(i));
     // Verify sequential reads work as expected.
@@ -1582,7 +1638,7 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileRandomSeek) {
   UploadLines(lines, path, kLineCount * kLineWidth);
 
   std::shared_ptr<io::RandomAccessFile> file;
-  ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path));
+  ASSERT_OK_AND_ASSIGN(file, fs()->OpenInputFile(path));
   for (int i = 0; i != 32; ++i) {
     SCOPED_TRACE("Iteration " + std::to_string(i));
     // Verify sequential reads work as expected.
@@ -1607,16 +1663,16 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileIoContext) {
                          contents.length());
 
   std::shared_ptr<io::RandomAccessFile> file;
-  ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path));
-  EXPECT_EQ(fs_->io_context().external_id(), file->io_context().external_id());
+  ASSERT_OK_AND_ASSIGN(file, fs()->OpenInputFile(path));
+  EXPECT_EQ(fs()->io_context().external_id(), file->io_context().external_id());
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputFileInfo) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(data.ObjectPath()));
 
   std::shared_ptr<io::RandomAccessFile> file;
-  ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(info));
+  ASSERT_OK_AND_ASSIGN(file, fs()->OpenInputFile(info));
 
   std::array<char, 1024> buffer{};
   std::int64_t size;
@@ -1629,21 +1685,21 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileInfo) {
 
 TEST_F(TestAzuriteFileSystem, OpenInputFileNotFound) {
   auto data = SetUpPreexistingData();
-  ASSERT_RAISES(IOError, fs_->OpenInputFile(data.NotFoundObjectPath()));
+  ASSERT_RAISES(IOError, fs()->OpenInputFile(data.NotFoundObjectPath()));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputFileInfoInvalid) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name));
-  ASSERT_RAISES(IOError, fs_->OpenInputFile(info));
+  ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(data.container_name));
+  ASSERT_RAISES(IOError, fs()->OpenInputFile(info));
 
-  ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath()));
-  ASSERT_RAISES(IOError, fs_->OpenInputFile(info2));
+  ASSERT_OK_AND_ASSIGN(auto info2, fs()->GetFileInfo(data.NotFoundObjectPath()));
+  ASSERT_RAISES(IOError, fs()->OpenInputFile(info2));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputFile(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto stream, fs()->OpenInputFile(data.ObjectPath()));
   ASSERT_OK(stream->Close());
   std::array<char, 16> buffer{};
   ASSERT_RAISES(Invalid, stream->Tell());
@@ -1654,6 +1710,5 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) {
   ASSERT_RAISES(Invalid, stream->Seek(2));
 }
 
-}  // namespace
 }  // namespace fs
 }  // namespace arrow

From b70ad0b8801d9ca0634c1937df1fc02c1609548e Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Thu, 21 Dec 2023 22:00:25 +0100
Subject: [PATCH 089/570] GH-31303: [Python] Remove the legacy ParquetDataset
 custom python-based implementation (#39112)

### Rationale for this change

Legacy ParquetDataset has been deprecated for a while now, see
https://github.com/apache/arrow/issues/31529. This PR is removing the
legacy implementation from the code.

### What changes are included in this PR?

The PR is removing:
- `ParquetDatasetPiece `
-  `ParquetManifest`
-  `_ParquetDatasetMetadata `
-  `ParquetDataset`

The PR is renaming `_ParquetDatasetV2` to `ParquetDataset` which was
removed. It is also updating the docstrings.

The PR is updating:
- `read_table`
-  `write_to_dataset`

The PR is updating all the tests to not use `use_legacy_dataset` keyword
or legacy parametrisation.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Deprecated code is removed.
* Closes: #31303
---
 docs/source/python/parquet.rst                |   38 +-
 python/benchmarks/parquet.py                  |   29 -
 python/pyarrow/parquet/core.py                | 1882 +++--------------
 python/pyarrow/tests/parquet/__init__.py      |    3 -
 python/pyarrow/tests/parquet/common.py        |   39 +-
 python/pyarrow/tests/parquet/test_basic.py    |  278 +--
 .../parquet/test_compliant_nested_type.py     |   19 +-
 .../pyarrow/tests/parquet/test_data_types.py  |   94 +-
 python/pyarrow/tests/parquet/test_dataset.py  |  926 ++------
 python/pyarrow/tests/parquet/test_datetime.py |   14 +-
 python/pyarrow/tests/parquet/test_pandas.py   |  192 +-
 .../tests/parquet/test_parquet_file.py        |   25 +-
 .../tests/parquet/test_parquet_writer.py      |   27 +-
 python/pyarrow/tests/test_dataset.py          |   68 +-
 python/pyarrow/tests/test_hdfs.py             |   25 +-
 15 files changed, 630 insertions(+), 3029 deletions(-)

diff --git a/docs/source/python/parquet.rst b/docs/source/python/parquet.rst
index 85a9674a689ca..d4717897660b6 100644
--- a/docs/source/python/parquet.rst
+++ b/docs/source/python/parquet.rst
@@ -511,36 +511,20 @@ from a remote filesystem into a pandas dataframe you may need to run
 ``sort_index`` to maintain row ordering (as long as the ``preserve_index``
 option was enabled on write).
 
-.. note::
-
-   The ParquetDataset is being reimplemented based on the new generic Dataset
-   API (see the :ref:`dataset` docs for an overview). This is not yet the
-   default, but can already be enabled by passing the ``use_legacy_dataset=False``
-   keyword to :class:`ParquetDataset` or :func:`read_table`::
-
-      pq.ParquetDataset('dataset_name/', use_legacy_dataset=False)
-
-   Enabling this gives the following new features:
-
-   - Filtering on all columns (using row group statistics) instead of only on
-     the partition keys.
-   - More fine-grained partitioning: support for a directory partitioning scheme
-     in addition to the Hive-like partitioning (e.g. "/2019/11/15/" instead of
-     "/year=2019/month=11/day=15/"), and the ability to specify a schema for
-     the partition keys.
-   - General performance improvement and bug fixes.
+Other features:
 
-   It also has the following changes in behaviour:
+- Filtering on all columns (using row group statistics) instead of only on
+  the partition keys.
+- Fine-grained partitioning: support for a directory partitioning scheme
+  in addition to the Hive-like partitioning (e.g. "/2019/11/15/" instead of
+  "/year=2019/month=11/day=15/"), and the ability to specify a schema for
+  the partition keys.
 
-   - The partition keys need to be explicitly included in the ``columns``
-     keyword when you want to include them in the result while reading a
-     subset of the columns
+Note:
 
-   This new implementation is already enabled in ``read_table``, and in the
-   future, this will be turned on by default for ``ParquetDataset``. The new
-   implementation does not yet cover all existing ParquetDataset features (e.g.
-   specifying the ``metadata``, or the ``pieces`` property API). Feedback is
-   very welcome.
+- The partition keys need to be explicitly included in the ``columns``
+  keyword when you want to include them in the result while reading a
+  subset of the columns
 
 
 Using with Spark
diff --git a/python/benchmarks/parquet.py b/python/benchmarks/parquet.py
index 3aeca425bc8f0..e459ea2c369b4 100644
--- a/python/benchmarks/parquet.py
+++ b/python/benchmarks/parquet.py
@@ -29,35 +29,6 @@
     pq = None
 
 
-class ParquetManifestCreation(object):
-    """Benchmark creating a parquet manifest."""
-
-    size = 10 ** 6
-    tmpdir = None
-
-    param_names = ('num_partitions', 'num_threads')
-    params = [(10, 100, 1000), (1, 8)]
-
-    def setup(self, num_partitions, num_threads):
-        if pq is None:
-            raise NotImplementedError("Parquet support not enabled")
-
-        self.tmpdir = tempfile.mkdtemp('benchmark_parquet')
-        rnd = np.random.RandomState(42)
-        num1 = rnd.randint(0, num_partitions, size=self.size)
-        num2 = rnd.randint(0, 1000, size=self.size)
-        output_df = pd.DataFrame({'num1': num1, 'num2': num2})
-        output_table = pa.Table.from_pandas(output_df)
-        pq.write_to_dataset(output_table, self.tmpdir, ['num1'])
-
-    def teardown(self, num_partitions, num_threads):
-        if self.tmpdir is not None:
-            shutil.rmtree(self.tmpdir)
-
-    def time_manifest_creation(self, num_partitions, num_threads):
-        pq.ParquetManifest(self.tmpdir, metadata_nthreads=num_threads)
-
-
 class ParquetWriteBinary(object):
 
     def setup(self):
diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 852b339211b0d..98a4b2a1138c7 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -17,22 +17,17 @@
 
 
 from collections import defaultdict
-from concurrent import futures
 from contextlib import nullcontext
-from functools import partial, reduce
+from functools import reduce
 
 import inspect
 import json
-from collections.abc import Collection
-import numpy as np
 import os
 import re
 import operator
-import urllib.parse
 import warnings
 
 import pyarrow as pa
-import pyarrow.lib as lib
 
 try:
     import pyarrow._parquet as _parquet
@@ -55,28 +50,6 @@
 from pyarrow import filesystem as legacyfs
 from pyarrow.util import guid, _is_path_like, _stringify_path, _deprecate_api
 
-_URI_STRIP_SCHEMES = ('hdfs',)
-
-
-def _parse_uri(path):
-    path = _stringify_path(path)
-    parsed_uri = urllib.parse.urlparse(path)
-    if parsed_uri.scheme in _URI_STRIP_SCHEMES:
-        return parsed_uri.path
-    else:
-        # ARROW-4073: On Windows returning the path with the scheme
-        # stripped removes the drive letter, if any
-        return path
-
-
-def _get_filesystem_and_path(passed_filesystem, path):
-    if passed_filesystem is None:
-        return legacyfs.resolve_filesystem_and_path(path, passed_filesystem)
-    else:
-        passed_filesystem = legacyfs._ensure_filesystem(passed_filesystem)
-        parsed_path = _parse_uri(path)
-        return passed_filesystem, parsed_path
-
 
 def _check_contains_null(val):
     if isinstance(val, bytes):
@@ -1148,516 +1121,15 @@ def _get_pandas_index_columns(keyvalues):
             ['index_columns'])
 
 
-# ----------------------------------------------------------------------
-# Metadata container providing instructions about reading a single Parquet
-# file, possibly part of a partitioned dataset
-
-
-class ParquetDatasetPiece:
-    """
-    DEPRECATED: A single chunk of a potentially larger Parquet dataset to read.
-
-    The arguments will indicate to read either a single row group or all row
-    groups, and whether to add partition keys to the resulting pyarrow.Table.
-
-    .. deprecated:: 5.0
-        Directly constructing a ``ParquetDatasetPiece`` is deprecated, as well
-        as accessing the pieces of a ``ParquetDataset`` object. Specify
-        ``use_legacy_dataset=False`` when constructing the ``ParquetDataset``
-        and use the ``ParquetDataset.fragments`` attribute instead.
-
-    Parameters
-    ----------
-    path : str or pathlib.Path
-        Path to file in the file system where this piece is located.
-    open_file_func : callable
-        Function to use for obtaining file handle to dataset piece.
-    file_options : dict
-        Options
-    row_group : int, default None
-        Row group to load. By default, reads all row groups.
-    partition_keys : list of tuples
-        Two-element tuples of ``(column name, ordinal index)``.
-    """
-
-    def __init__(self, path, open_file_func=partial(open, mode='rb'),
-                 file_options=None, row_group=None, partition_keys=None):
-        warnings.warn(
-            "ParquetDatasetPiece is deprecated as of pyarrow 5.0.0 and will "
-            "be removed in a future version.",
-            FutureWarning, stacklevel=2)
-        self._init(
-            path, open_file_func, file_options, row_group, partition_keys)
-
-    @staticmethod
-    def _create(path, open_file_func=partial(open, mode='rb'),
-                file_options=None, row_group=None, partition_keys=None):
-        self = ParquetDatasetPiece.__new__(ParquetDatasetPiece)
-        self._init(
-            path, open_file_func, file_options, row_group, partition_keys)
-        return self
-
-    def _init(self, path, open_file_func, file_options, row_group,
-              partition_keys):
-        self.path = _stringify_path(path)
-        self.open_file_func = open_file_func
-        self.row_group = row_group
-        self.partition_keys = partition_keys or []
-        self.file_options = file_options or {}
-
-    def __eq__(self, other):
-        if not isinstance(other, ParquetDatasetPiece):
-            return False
-        return (self.path == other.path and
-                self.row_group == other.row_group and
-                self.partition_keys == other.partition_keys)
-
-    def __repr__(self):
-        return ('{}({!r}, row_group={!r}, partition_keys={!r})'
-                .format(type(self).__name__, self.path,
-                        self.row_group,
-                        self.partition_keys))
-
-    def __str__(self):
-        result = ''
-
-        if len(self.partition_keys) > 0:
-            partition_str = ', '.join('{}={}'.format(name, index)
-                                      for name, index in self.partition_keys)
-            result += 'partition[{}] '.format(partition_str)
-
-        result += self.path
-
-        if self.row_group is not None:
-            result += ' | row_group={}'.format(self.row_group)
-
-        return result
-
-    def get_metadata(self):
-        """
-        Return the file's metadata.
-
-        Returns
-        -------
-        metadata : FileMetaData
-            The file's metadata
-        """
-        with self.open() as parquet:
-            return parquet.metadata
-
-    def open(self):
-        """
-        Return instance of ParquetFile.
-        """
-        reader = self.open_file_func(self.path)
-        if not isinstance(reader, ParquetFile):
-            reader = ParquetFile(reader, **self.file_options)
-
-        # ensure reader knows it's responsible for closing source
-        # since we opened the source here internally.
-        reader._close_source = True
-        return reader
-
-    def read(self, columns=None, use_threads=True, partitions=None,
-             file=None, use_pandas_metadata=False):
-        """
-        Read this piece as a pyarrow.Table.
-
-        Parameters
-        ----------
-        columns : list of column names, default None
-        use_threads : bool, default True
-            Perform multi-threaded column reads.
-        partitions : ParquetPartitions, default None
-        file : file-like object
-            Passed to ParquetFile.
-        use_pandas_metadata : bool
-            If pandas metadata should be used or not.
-
-        Returns
-        -------
-        table : pyarrow.Table
-            The piece as a pyarrow.Table.
-        """
-        if self.open_file_func is not None:
-            reader = self.open()
-        elif file is not None:
-            reader = ParquetFile(file, **self.file_options)
-        else:
-            # try to read the local path
-            reader = ParquetFile(self.path, **self.file_options)
-
-        options = dict(columns=columns,
-                       use_threads=use_threads,
-                       use_pandas_metadata=use_pandas_metadata)
-
-        if self.row_group is not None:
-            table = reader.read_row_group(self.row_group, **options)
-        else:
-            table = reader.read(**options)
-
-        if len(self.partition_keys) > 0:
-            if partitions is None:
-                raise ValueError('Must pass partition sets')
-
-            # Here, the index is the categorical code of the partition where
-            # this piece is located. Suppose we had
-            #
-            # /foo=a/0.parq
-            # /foo=b/0.parq
-            # /foo=c/0.parq
-            #
-            # Then we assign a=0, b=1, c=2. And the resulting Table pieces will
-            # have a DictionaryArray column named foo having the constant index
-            # value as indicated. The distinct categories of the partition have
-            # been computed in the ParquetManifest
-            for i, (name, index) in enumerate(self.partition_keys):
-                # The partition code is the same for all values in this piece
-                indices = np.full(len(table), index, dtype='i4')
-
-                # This is set of all partition values, computed as part of the
-                # manifest, so ['a', 'b', 'c'] as in our example above.
-                dictionary = partitions.levels[i].dictionary
-
-                arr = pa.DictionaryArray.from_arrays(indices, dictionary)
-                table = table.append_column(name, arr)
-
-        # To ParquetFile the source looked like it was already open, so won't
-        # actually close it without overriding.
-        reader.close(force=True)
-        return table
-
-
-class PartitionSet:
-    """
-    A data structure for cataloguing the observed Parquet partitions at a
-    particular level. So if we have
-
-    /foo=a/bar=0
-    /foo=a/bar=1
-    /foo=a/bar=2
-    /foo=b/bar=0
-    /foo=b/bar=1
-    /foo=b/bar=2
-
-    Then we have two partition sets, one for foo, another for bar. As we visit
-    levels of the partition hierarchy, a PartitionSet tracks the distinct
-    values and assigns categorical codes to use when reading the pieces
-
-    Parameters
-    ----------
-    name : str
-        Name of the partition set. Under which key to collect all values.
-    keys : list
-        All possible values that have been collected for that partition set.
-    """
-
-    def __init__(self, name, keys=None):
-        self.name = name
-        self.keys = keys or []
-        self.key_indices = {k: i for i, k in enumerate(self.keys)}
-        self._dictionary = None
-
-    def get_index(self, key):
-        """
-        Get the index of the partition value if it is known, otherwise assign
-        one
-
-        Parameters
-        ----------
-        key : str or int
-            The value for which we want to known the index.
-        """
-        if key in self.key_indices:
-            return self.key_indices[key]
-        else:
-            index = len(self.key_indices)
-            self.keys.append(key)
-            self.key_indices[key] = index
-            return index
-
-    @property
-    def dictionary(self):
-        if self._dictionary is not None:
-            return self._dictionary
-
-        if len(self.keys) == 0:
-            raise ValueError('No known partition keys')
-
-        # Only integer and string partition types are supported right now
-        try:
-            integer_keys = [int(x) for x in self.keys]
-            dictionary = lib.array(integer_keys)
-        except ValueError:
-            dictionary = lib.array(self.keys)
-
-        self._dictionary = dictionary
-        return dictionary
-
-    @property
-    def is_sorted(self):
-        return list(self.keys) == sorted(self.keys)
-
-
-class ParquetPartitions:
-
-    def __init__(self):
-        self.levels = []
-        self.partition_names = set()
-
-    def __len__(self):
-        return len(self.levels)
-
-    def __getitem__(self, i):
-        return self.levels[i]
-
-    def equals(self, other):
-        if not isinstance(other, ParquetPartitions):
-            raise TypeError('`other` must be an instance of ParquetPartitions')
-
-        return (self.levels == other.levels and
-                self.partition_names == other.partition_names)
-
-    def __eq__(self, other):
-        try:
-            return self.equals(other)
-        except TypeError:
-            return NotImplemented
-
-    def get_index(self, level, name, key):
-        """
-        Record a partition value at a particular level, returning the distinct
-        code for that value at that level.
-
-        Examples
-        --------
-
-        partitions.get_index(1, 'foo', 'a') returns 0
-        partitions.get_index(1, 'foo', 'b') returns 1
-        partitions.get_index(1, 'foo', 'c') returns 2
-        partitions.get_index(1, 'foo', 'a') returns 0
-
-        Parameters
-        ----------
-        level : int
-            The nesting level of the partition we are observing
-        name : str
-            The partition name
-        key : str or int
-            The partition value
-        """
-        if level == len(self.levels):
-            if name in self.partition_names:
-                raise ValueError('{} was the name of the partition in '
-                                 'another level'.format(name))
-
-            part_set = PartitionSet(name)
-            self.levels.append(part_set)
-            self.partition_names.add(name)
-
-        return self.levels[level].get_index(key)
-
-    def filter_accepts_partition(self, part_key, filter, level):
-        p_column, p_value_index = part_key
-        f_column, op, f_value = filter
-        if p_column != f_column:
-            return True
-
-        f_type = type(f_value)
-
-        if op in {'in', 'not in'}:
-            if not isinstance(f_value, Collection):
-                raise TypeError(
-                    "'%s' object is not a collection", f_type.__name__)
-            if not f_value:
-                raise ValueError("Cannot use empty collection as filter value")
-            if len({type(item) for item in f_value}) != 1:
-                raise ValueError("All elements of the collection '%s' must be"
-                                 " of same type", f_value)
-            f_type = type(next(iter(f_value)))
-
-        elif not isinstance(f_value, str) and isinstance(f_value, Collection):
-            raise ValueError(
-                "Op '%s' not supported with a collection value", op)
-
-        p_value = f_type(self.levels[level]
-                         .dictionary[p_value_index].as_py())
-
-        if op == "=" or op == "==":
-            return p_value == f_value
-        elif op == "!=":
-            return p_value != f_value
-        elif op == '<':
-            return p_value < f_value
-        elif op == '>':
-            return p_value > f_value
-        elif op == '<=':
-            return p_value <= f_value
-        elif op == '>=':
-            return p_value >= f_value
-        elif op == 'in':
-            return p_value in f_value
-        elif op == 'not in':
-            return p_value not in f_value
-        else:
-            raise ValueError("'%s' is not a valid operator in predicates.",
-                             filter[1])
-
-
-class ParquetManifest:
-
-    def __init__(self, dirpath, open_file_func=None, filesystem=None,
-                 pathsep='/', partition_scheme='hive', metadata_nthreads=1):
-        filesystem, dirpath = _get_filesystem_and_path(filesystem, dirpath)
-        self.filesystem = filesystem
-        self.open_file_func = open_file_func
-        self.pathsep = pathsep
-        self.dirpath = _stringify_path(dirpath)
-        self.partition_scheme = partition_scheme
-        self.partitions = ParquetPartitions()
-        self.pieces = []
-        self._metadata_nthreads = metadata_nthreads
-        self._thread_pool = futures.ThreadPoolExecutor(
-            max_workers=metadata_nthreads)
-
-        self.common_metadata_path = None
-        self.metadata_path = None
-
-        self._visit_level(0, self.dirpath, [])
-
-        # Due to concurrency, pieces will potentially by out of order if the
-        # dataset is partitioned so we sort them to yield stable results
-        self.pieces.sort(key=lambda piece: piece.path)
-
-        if self.common_metadata_path is None:
-            # _common_metadata is a subset of _metadata
-            self.common_metadata_path = self.metadata_path
-
-        self._thread_pool.shutdown()
-
-    def _visit_level(self, level, base_path, part_keys):
-        fs = self.filesystem
-
-        _, directories, files = next(fs.walk(base_path))
-
-        filtered_files = []
-        for path in files:
-            full_path = self.pathsep.join((base_path, path))
-            if path.endswith('_common_metadata'):
-                self.common_metadata_path = full_path
-            elif path.endswith('_metadata'):
-                self.metadata_path = full_path
-            elif self._should_silently_exclude(path):
-                continue
-            else:
-                filtered_files.append(full_path)
-
-        # ARROW-1079: Filter out "private" directories starting with underscore
-        filtered_directories = [self.pathsep.join((base_path, x))
-                                for x in directories
-                                if not _is_private_directory(x)]
-
-        filtered_files.sort()
-        filtered_directories.sort()
-
-        if len(filtered_files) > 0 and len(filtered_directories) > 0:
-            raise ValueError('Found files in an intermediate '
-                             'directory: {}'.format(base_path))
-        elif len(filtered_directories) > 0:
-            self._visit_directories(level, filtered_directories, part_keys)
-        else:
-            self._push_pieces(filtered_files, part_keys)
-
-    def _should_silently_exclude(self, file_name):
-        return (file_name.endswith('.crc') or  # Checksums
-                file_name.endswith('_$folder$') or  # HDFS directories in S3
-                file_name.startswith('.') or  # Hidden files starting with .
-                file_name.startswith('_') or  # Hidden files starting with _
-                file_name in EXCLUDED_PARQUET_PATHS)
-
-    def _visit_directories(self, level, directories, part_keys):
-        futures_list = []
-        for path in directories:
-            head, tail = _path_split(path, self.pathsep)
-            name, key = _parse_hive_partition(tail)
-
-            index = self.partitions.get_index(level, name, key)
-            dir_part_keys = part_keys + [(name, index)]
-            # If you have less threads than levels, the wait call will block
-            # indefinitely due to multiple waits within a thread.
-            if level < self._metadata_nthreads:
-                future = self._thread_pool.submit(self._visit_level,
-                                                  level + 1,
-                                                  path,
-                                                  dir_part_keys)
-                futures_list.append(future)
-            else:
-                self._visit_level(level + 1, path, dir_part_keys)
-        if futures_list:
-            futures.wait(futures_list)
-
-    def _parse_partition(self, dirname):
-        if self.partition_scheme == 'hive':
-            return _parse_hive_partition(dirname)
-        else:
-            raise NotImplementedError('partition schema: {}'
-                                      .format(self.partition_scheme))
-
-    def _push_pieces(self, files, part_keys):
-        self.pieces.extend([
-            ParquetDatasetPiece._create(path, partition_keys=part_keys,
-                                        open_file_func=self.open_file_func)
-            for path in files
-        ])
-
-
-def _parse_hive_partition(value):
-    if '=' not in value:
-        raise ValueError('Directory name did not appear to be a '
-                         'partition: {}'.format(value))
-    return value.split('=', 1)
-
-
-def _is_private_directory(x):
-    _, tail = os.path.split(x)
-    return (tail.startswith('_') or tail.startswith('.')) and '=' not in tail
-
-
-def _path_split(path, sep):
-    i = path.rfind(sep) + 1
-    head, tail = path[:i], path[i:]
-    head = head.rstrip(sep)
-    return head, tail
-
-
 EXCLUDED_PARQUET_PATHS = {'_SUCCESS'}
 
 
-class _ParquetDatasetMetadata:
-    __slots__ = ('fs', 'memory_map', 'read_dictionary', 'common_metadata',
-                 'buffer_size')
-
-
-def _open_dataset_file(dataset, path, meta=None):
-    if (dataset.fs is not None and
-            not isinstance(dataset.fs, legacyfs.LocalFileSystem)):
-        path = dataset.fs.open(path, mode='rb')
-    return ParquetFile(
-        path,
-        metadata=meta,
-        memory_map=dataset.memory_map,
-        read_dictionary=dataset.read_dictionary,
-        common_metadata=dataset.common_metadata,
-        buffer_size=dataset.buffer_size
+def _is_local_file_system(fs):
+    return isinstance(fs, LocalFileSystem) or isinstance(
+        fs, legacyfs.LocalFileSystem
     )
 
 
-_DEPR_MSG = (
-    "'{}' attribute is deprecated as of pyarrow 5.0.0 and will be removed "
-    "in a future version.{}"
-)
-
-
 _read_docstring_common = """\
 read_dictionary : list, default None
     List of names or column paths (for nested types) to read directly
@@ -1680,6 +1152,7 @@ def _open_dataset_file(dataset, path, meta=None):
     you need to specify the field names or a full schema. See the
     ``pyarrow.dataset.partitioning()`` function for more details."""
 
+
 _parquet_dataset_example = """\
 Generate an example PyArrow Table and write it to a partitioned dataset:
 
@@ -1688,15 +1161,13 @@ def _open_dataset_file(dataset, path, meta=None):
 ...                   'n_legs': [2, 2, 4, 4, 5, 100],
 ...                   'animal': ["Flamingo", "Parrot", "Dog", "Horse",
 ...                              "Brittle stars", "Centipede"]})
-
 >>> import pyarrow.parquet as pq
->>> pq.write_to_dataset(table, root_path='dataset_name',
-...                     partition_cols=['year'],
-...                     use_legacy_dataset=False)
+>>> pq.write_to_dataset(table, root_path='dataset_v2',
+...                     partition_cols=['year'])
 
 create a ParquetDataset object from the dataset source:
 
->>> dataset = pq.ParquetDataset('dataset_name/', use_legacy_dataset=False)
+>>> dataset = pq.ParquetDataset('dataset_v2/')
 
 and read the data:
 
@@ -1711,7 +1182,7 @@ def _open_dataset_file(dataset, path, meta=None):
 
 create a ParquetDataset object with filter:
 
->>> dataset = pq.ParquetDataset('dataset_name/', use_legacy_dataset=False,
+>>> dataset = pq.ParquetDataset('dataset_v2/',
 ...                             filters=[('n_legs','=',4)])
 >>> dataset.read().to_pandas()
    n_legs animal  year
@@ -1721,7 +1192,6 @@ def _open_dataset_file(dataset, path, meta=None):
 
 
 class ParquetDataset:
-
     __doc__ = """
 Encapsulates details of reading a complete Parquet dataset possibly
 consisting of multiple files and partitions in subdirectories.
@@ -1735,39 +1205,26 @@ class ParquetDataset:
     Path will try to be found in the local on-disk filesystem otherwise
     it will be parsed as an URI to determine the filesystem.
 schema : pyarrow.parquet.Schema
-    Use schema obtained elsewhere to validate file schemas. Alternative to
-    metadata parameter.
-metadata : pyarrow.parquet.FileMetaData
-    Use metadata obtained elsewhere to validate file schemas.
-split_row_groups : bool, default False
-    Divide files into pieces for each row group in the file.
-validate_schema : bool, default True
-    Check that individual file schemas are all the same / compatible.
+    Optionally provide the Schema for the Dataset, in which case it will
+    not be inferred from the source.
 filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
     Rows which do not match the filter predicate will be removed from scanned
     data. Partition keys embedded in a nested directory structure will be
     exploited to avoid loading files at all if they contain no matching rows.
-    If `use_legacy_dataset` is True, filters can only reference partition
-    keys and only a hive-style directory structure is supported. When
-    setting `use_legacy_dataset` to False, also within-file level filtering
-    and different partitioning schemes are supported.
+    Within-file level filtering and different partitioning schemes are supported.
 
     {1}
-metadata_nthreads : int, default 1
-    How many threads to allow the thread pool which is used to read the
-    dataset metadata. Increasing this is helpful to read partitioned
-    datasets.
 {0}
-use_legacy_dataset : bool, default False
-    Set to False to enable the new code path (using the
-    new Arrow Dataset API). Among other things, this allows to pass
-    `filters` for all columns and not only the partition keys, enables
-    different partitioning schemes, etc.
+ignore_prefixes : list, optional
+    Files matching any of these prefixes will be ignored by the
+    discovery process.
+    This is matched to the basename of a path.
+    By default this is ['.', '_'].
+    Note that discovery happens only if a directory is passed as source.
 pre_buffer : bool, default True
     Coalesce and issue file reads in parallel to improve performance on
     high-latency filesystems (e.g. S3, GCS). If True, Arrow will use a
-    background I/O thread pool. This option is only supported for
-    use_legacy_dataset=False. If using a filesystem layer that itself
+    background I/O thread pool. If using a filesystem layer that itself
     performs readahead (e.g. fsspec's S3FS), disable readahead for best
     results. Set to False if you want to prioritize minimal memory usage
     over maximum speed.
@@ -1775,6 +1232,10 @@ class ParquetDataset:
     Cast timestamps that are stored in INT96 format to a particular resolution
     (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96
     timestamps will be inferred as timestamps in nanoseconds.
+decryption_properties : FileDecryptionProperties or None
+    File-level decryption properties.
+    The decryption properties can be created using
+    ``CryptoFactory.file_decryption_properties()``.
 thrift_string_size_limit : int, default None
     If not None, override the maximum total string size allocated
     when decoding Thrift structures. The default limit should be
@@ -1785,739 +1246,95 @@ class ParquetDataset:
     sufficient for most Parquet files.
 page_checksum_verification : bool, default False
     If True, verify the page checksum for each page read from the file.
+use_legacy_dataset : bool, optional
+    Deprecated and has no effect from PyArrow version 15.0.0.
 
 Examples
 --------
 {2}
 """.format(_read_docstring_common, _DNF_filter_doc, _parquet_dataset_example)
 
-    def __new__(cls, path_or_paths=None, filesystem=None, schema=None,
-                metadata=None, split_row_groups=False, validate_schema=True,
-                filters=None, metadata_nthreads=None, read_dictionary=None,
-                memory_map=False, buffer_size=0, partitioning="hive",
-                use_legacy_dataset=None, pre_buffer=True,
-                coerce_int96_timestamp_unit=None,
-                thrift_string_size_limit=None,
-                thrift_container_size_limit=None,
-                page_checksum_verification=False):
-
-        extra_msg = ""
-        if use_legacy_dataset is None:
-            # if an old filesystem is passed -> still use to old implementation
-            if isinstance(filesystem, legacyfs.FileSystem):
-                use_legacy_dataset = True
-                extra_msg = (
-                    " The legacy behaviour was still chosen because a "
-                    "deprecated 'pyarrow.filesystem' filesystem was specified "
-                    "(use the filesystems from pyarrow.fs instead)."
-                )
-            # otherwise the default is already False
-            else:
-                use_legacy_dataset = False
-
-        if not use_legacy_dataset:
-            return _ParquetDatasetV2(
-                path_or_paths, filesystem=filesystem,
-                filters=filters,
-                partitioning=partitioning,
-                read_dictionary=read_dictionary,
-                memory_map=memory_map,
-                buffer_size=buffer_size,
-                pre_buffer=pre_buffer,
-                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
-                # unsupported keywords
-                schema=schema, metadata=metadata,
-                split_row_groups=split_row_groups,
-                validate_schema=validate_schema,
-                metadata_nthreads=metadata_nthreads,
-                thrift_string_size_limit=thrift_string_size_limit,
-                thrift_container_size_limit=thrift_container_size_limit,
-                page_checksum_verification=page_checksum_verification,
-            )
-        warnings.warn(
-            "Passing 'use_legacy_dataset=True' to get the legacy behaviour is "
-            "deprecated as of pyarrow 11.0.0, and the legacy implementation "
-            "will be removed in a future version." + extra_msg,
-            FutureWarning, stacklevel=2)
-        self = object.__new__(cls)
-        return self
-
-    def __init__(self, path_or_paths, filesystem=None, schema=None,
-                 metadata=None, split_row_groups=False, validate_schema=True,
-                 filters=None, metadata_nthreads=None, read_dictionary=None,
-                 memory_map=False, buffer_size=0, partitioning="hive",
-                 use_legacy_dataset=None, pre_buffer=True,
+    def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None,
+                 read_dictionary=None, memory_map=False, buffer_size=None,
+                 partitioning="hive", ignore_prefixes=None, pre_buffer=True,
                  coerce_int96_timestamp_unit=None,
-                 thrift_string_size_limit=None,
+                 decryption_properties=None, thrift_string_size_limit=None,
                  thrift_container_size_limit=None,
-                 page_checksum_verification=False):
-        if partitioning != "hive":
-            raise ValueError(
-                'Only "hive" for hive-like partitioning is supported when '
-                'using use_legacy_dataset=True')
-        if metadata_nthreads is not None:
-            warnings.warn(
-                "Specifying the 'metadata_nthreads' argument is deprecated as "
-                "of pyarrow 8.0.0, and the argument will be removed in a "
-                "future version",
-                FutureWarning, stacklevel=2,
-            )
-        else:
-            metadata_nthreads = 1
-
-        self._ds_metadata = _ParquetDatasetMetadata()
-        a_path = path_or_paths
-        if isinstance(a_path, list):
-            a_path = a_path[0]
-
-        self._ds_metadata.fs, _ = _get_filesystem_and_path(filesystem, a_path)
-        if isinstance(path_or_paths, list):
-            self.paths = [_parse_uri(path) for path in path_or_paths]
-        else:
-            self.paths = _parse_uri(path_or_paths)
-
-        self._ds_metadata.read_dictionary = read_dictionary
-        self._ds_metadata.memory_map = memory_map
-        self._ds_metadata.buffer_size = buffer_size
-
-        (self._pieces,
-         self._partitions,
-         self._common_metadata_path,
-         self._metadata_path) = _make_manifest(
-             path_or_paths, self._fs, metadata_nthreads=metadata_nthreads,
-             open_file_func=partial(_open_dataset_file, self._ds_metadata)
-        )
-
-        if self._common_metadata_path is not None:
-            with self._fs.open(self._common_metadata_path) as f:
-                self._ds_metadata.common_metadata = read_metadata(
-                    f,
-                    memory_map=memory_map
-                )
-        else:
-            self._ds_metadata.common_metadata = None
+                 page_checksum_verification=False,
+                 use_legacy_dataset=None):
 
-        if metadata is not None:
+        if use_legacy_dataset is not None:
             warnings.warn(
-                "Specifying the 'metadata' argument with 'use_legacy_dataset="
-                "True' is deprecated as of pyarrow 8.0.0.",
+                "Passing 'use_legacy_dataset' is deprecated as of pyarrow 15.0.0 "
+                "and will be removed in a future version.",
                 FutureWarning, stacklevel=2)
 
-        if metadata is None and self._metadata_path is not None:
-            with self._fs.open(self._metadata_path) as f:
-                self._metadata = read_metadata(f, memory_map=memory_map)
-        else:
-            self._metadata = metadata
-
-        if schema is not None:
-            warnings.warn(
-                "Specifying the 'schema' argument with 'use_legacy_dataset="
-                "True' is deprecated as of pyarrow 8.0.0. You can still "
-                "specify it in combination with 'use_legacy_dataset=False', "
-                "but in that case you need to specify a pyarrow.Schema "
-                "instead of a ParquetSchema.",
-                FutureWarning, stacklevel=2)
-        self._schema = schema
+        import pyarrow.dataset as ds
 
-        self.split_row_groups = split_row_groups
+        # map format arguments
+        read_options = {
+            "pre_buffer": pre_buffer,
+            "coerce_int96_timestamp_unit": coerce_int96_timestamp_unit,
+            "thrift_string_size_limit": thrift_string_size_limit,
+            "thrift_container_size_limit": thrift_container_size_limit,
+            "page_checksum_verification": page_checksum_verification,
+        }
+        if buffer_size:
+            read_options.update(use_buffered_stream=True,
+                                buffer_size=buffer_size)
+        if read_dictionary is not None:
+            read_options.update(dictionary_columns=read_dictionary)
 
-        if split_row_groups:
-            raise NotImplementedError("split_row_groups not yet implemented")
+        if decryption_properties is not None:
+            read_options.update(decryption_properties=decryption_properties)
 
+        self._filter_expression = None
         if filters is not None:
-            if hasattr(filters, "cast"):
-                raise TypeError(
-                    "Expressions as filter not supported for legacy dataset")
-            filters = _check_filters(filters)
-            self._filter(filters)
-
-        if validate_schema:
-            self.validate_schemas()
-
-    def __getnewargs_ex__(self):
-        # when creating a new instance while unpickling, force to use the
-        # legacy code path to create a ParquetDataset instance
-        # instead of a _ParquetDatasetV2 instance
-        return ((), dict(use_legacy_dataset=True))
-
-    def equals(self, other):
-        if not isinstance(other, ParquetDataset):
-            raise TypeError('`other` must be an instance of ParquetDataset')
+            self._filter_expression = filters_to_expression(filters)
 
-        if self._fs.__class__ != other._fs.__class__:
-            return False
-        for prop in ('paths', '_pieces', '_partitions',
-                     '_common_metadata_path', '_metadata_path',
-                     '_common_metadata', '_metadata', '_schema',
-                     'split_row_groups'):
-            if getattr(self, prop) != getattr(other, prop):
-                return False
-        for prop in ('memory_map', 'buffer_size'):
-            if (
-                getattr(self._ds_metadata, prop) !=
-                getattr(other._ds_metadata, prop)
-            ):
-                return False
-
-        return True
+        # map old filesystems to new one
+        if filesystem is not None:
+            filesystem = _ensure_filesystem(
+                filesystem, use_mmap=memory_map)
+        elif filesystem is None and memory_map:
+            # if memory_map is specified, assume local file system (string
+            # path can in principle be URI for any filesystem)
+            filesystem = LocalFileSystem(use_mmap=memory_map)
 
-    def __eq__(self, other):
-        try:
-            return self.equals(other)
-        except TypeError:
-            return NotImplemented
+        # This needs to be checked after _ensure_filesystem, because that
+        # handles the case of an fsspec LocalFileSystem
+        if (
+            hasattr(path_or_paths, "__fspath__") and
+            filesystem is not None and
+            not _is_local_file_system(filesystem)
+        ):
+            raise TypeError(
+                "Path-like objects with __fspath__ must only be used with "
+                f"local file systems, not {type(filesystem)}"
+            )
 
-    def validate_schemas(self):
-        if self._metadata is None and self._schema is None:
-            if self._common_metadata is not None:
-                self._schema = self._common_metadata.schema
+        # check for single fragment dataset
+        single_file = None
+        self._base_dir = None
+        if not isinstance(path_or_paths, list):
+            if _is_path_like(path_or_paths):
+                path_or_paths = _stringify_path(path_or_paths)
+                if filesystem is None:
+                    # path might be a URI describing the FileSystem as well
+                    try:
+                        filesystem, path_or_paths = FileSystem.from_uri(
+                            path_or_paths)
+                    except ValueError:
+                        filesystem = LocalFileSystem(use_mmap=memory_map)
+                finfo = filesystem.get_file_info(path_or_paths)
+                if finfo.is_file:
+                    single_file = path_or_paths
+                if finfo.type == FileType.Directory:
+                    self._base_dir = path_or_paths
             else:
-                self._schema = self._pieces[0].get_metadata().schema
-        elif self._schema is None:
-            self._schema = self._metadata.schema
-
-        # Verify schemas are all compatible
-        dataset_schema = self._schema.to_arrow_schema()
-        # Exclude the partition columns from the schema, they are provided
-        # by the path, not the DatasetPiece
-        if self._partitions is not None:
-            for partition_name in self._partitions.partition_names:
-                if dataset_schema.get_field_index(partition_name) != -1:
-                    field_idx = dataset_schema.get_field_index(partition_name)
-                    dataset_schema = dataset_schema.remove(field_idx)
-
-        for piece in self._pieces:
-            file_metadata = piece.get_metadata()
-            file_schema = file_metadata.schema.to_arrow_schema()
-            if not dataset_schema.equals(file_schema, check_metadata=False):
-                raise ValueError('Schema in {!s} was different. \n'
-                                 '{!s}\n\nvs\n\n{!s}'
-                                 .format(piece, file_schema,
-                                         dataset_schema))
+                single_file = path_or_paths
 
-    def read(self, columns=None, use_threads=True, use_pandas_metadata=False):
-        """
-        Read multiple Parquet files as a single pyarrow.Table.
-
-        Parameters
-        ----------
-        columns : List[str]
-            Names of columns to read from the file.
-        use_threads : bool, default True
-            Perform multi-threaded column reads
-        use_pandas_metadata : bool, default False
-            Passed through to each dataset piece.
-
-        Returns
-        -------
-        pyarrow.Table
-            Content of the file as a table (of columns).
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
-        ...                   'n_legs': [2, 2, 4, 4, 5, 100],
-        ...                   'animal': ["Flamingo", "Parrot", "Dog", "Horse",
-        ...                              "Brittle stars", "Centipede"]})
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path='dataset_name_read',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_name_read/',
-        ...                             use_legacy_dataset=False)
-
-        Read multiple Parquet files as a single pyarrow.Table:
-
-        >>> dataset.read(columns=["n_legs"])
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[5],[2],[4,100],[2,4]]
-        """
-        tables = []
-        for piece in self._pieces:
-            table = piece.read(columns=columns,
-                               use_threads=use_threads,
-                               partitions=self._partitions,
-                               use_pandas_metadata=use_pandas_metadata)
-            tables.append(table)
-
-        all_data = lib.concat_tables(tables)
-
-        if use_pandas_metadata:
-            # We need to ensure that this metadata is set in the Table's schema
-            # so that Table.to_pandas will construct pandas.DataFrame with the
-            # right index
-            common_metadata = self._get_common_pandas_metadata()
-            current_metadata = all_data.schema.metadata or {}
-
-            if common_metadata and b'pandas' not in current_metadata:
-                all_data = all_data.replace_schema_metadata({
-                    b'pandas': common_metadata})
-
-        return all_data
-
-    def read_pandas(self, **kwargs):
-        """
-        Read dataset including pandas metadata, if any. Other arguments passed
-        through to ParquetDataset.read, see docstring for further details.
-
-        Parameters
-        ----------
-        **kwargs : optional
-            All additional options to pass to the reader.
-
-        Returns
-        -------
-        pyarrow.Table
-            Content of the file as a table (of columns).
-
-        Examples
-        --------
-        Generate an example PyArrow Table and write it to a partitioned
-        dataset:
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame({'year': [2020, 2022, 2021, 2022, 2019, 2021],
-        ...                    'n_legs': [2, 2, 4, 4, 5, 100],
-        ...                    'animal': ["Flamingo", "Parrot", "Dog", "Horse",
-        ...                    "Brittle stars", "Centipede"]})
-        >>> table = pa.Table.from_pandas(df)
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, 'table.parquet')
-        >>> dataset = pq.ParquetDataset('table.parquet',
-        ...                             use_legacy_dataset=False)
-
-        Read dataset including pandas metadata:
-
-        >>> dataset.read_pandas(columns=["n_legs"])
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[2,2,4,4,5,100]]
-
-        Select pandas metadata:
-
-        >>> dataset.read_pandas(columns=["n_legs"]).schema.pandas_metadata
-        {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, ...}
-        """
-        return self.read(use_pandas_metadata=True, **kwargs)
-
-    def _get_common_pandas_metadata(self):
-        if self._common_metadata is None:
-            return None
-
-        keyvalues = self._common_metadata.metadata
-        return keyvalues.get(b'pandas', None)
-
-    def _filter(self, filters):
-        accepts_filter = self._partitions.filter_accepts_partition
-
-        def one_filter_accepts(piece, filter):
-            return all(accepts_filter(part_key, filter, level)
-                       for level, part_key in enumerate(piece.partition_keys))
-
-        def all_filters_accept(piece):
-            return any(all(one_filter_accepts(piece, f) for f in conjunction)
-                       for conjunction in filters)
-
-        self._pieces = [p for p in self._pieces if all_filters_accept(p)]
-
-    @property
-    def pieces(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format(
-                "ParquetDataset.pieces",
-                " Specify 'use_legacy_dataset=False' while constructing the "
-                "ParquetDataset, and then use the '.fragments' attribute "
-                "instead."),
-            FutureWarning, stacklevel=2)
-        return self._pieces
-
-    @property
-    def partitions(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format(
-                "ParquetDataset.partitions",
-                " Specify 'use_legacy_dataset=False' while constructing the "
-                "ParquetDataset, and then use the '.partitioning' attribute "
-                "instead."),
-            FutureWarning, stacklevel=2)
-        return self._partitions
-
-    @property
-    def schema(self):
-        warnings.warn(
-            _DEPR_MSG.format(
-                "ParquetDataset.schema",
-                " Specify 'use_legacy_dataset=False' while constructing the "
-                "ParquetDataset, and then use the '.schema' attribute "
-                "instead (which will return an Arrow schema instead of a "
-                "Parquet schema)."),
-            FutureWarning, stacklevel=2)
-        return self._schema
-
-    @property
-    def memory_map(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.memory_map", ""),
-            FutureWarning, stacklevel=2)
-        return self._ds_metadata.memory_map
-
-    @property
-    def read_dictionary(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.read_dictionary", ""),
-            FutureWarning, stacklevel=2)
-        return self._ds_metadata.read_dictionary
-
-    @property
-    def buffer_size(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.buffer_size", ""),
-            FutureWarning, stacklevel=2)
-        return self._ds_metadata.buffer_size
-
-    _fs = property(
-        operator.attrgetter('_ds_metadata.fs')
-    )
-
-    @property
-    def fs(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format(
-                "ParquetDataset.fs",
-                " Specify 'use_legacy_dataset=False' while constructing the "
-                "ParquetDataset, and then use the '.filesystem' attribute "
-                "instead."),
-            FutureWarning, stacklevel=2)
-        return self._ds_metadata.fs
-
-    @property
-    def metadata(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.metadata", ""),
-            FutureWarning, stacklevel=2)
-        return self._metadata
-
-    @property
-    def metadata_path(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.metadata_path", ""),
-            FutureWarning, stacklevel=2)
-        return self._metadata_path
-
-    @property
-    def common_metadata_path(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.common_metadata_path", ""),
-            FutureWarning, stacklevel=2)
-        return self._common_metadata_path
-
-    _common_metadata = property(
-        operator.attrgetter('_ds_metadata.common_metadata')
-    )
-
-    @property
-    def common_metadata(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.common_metadata", ""),
-            FutureWarning, stacklevel=2)
-        return self._ds_metadata.common_metadata
-
-    @property
-    def fragments(self):
-        """
-        A list of the Dataset source fragments or pieces with absolute
-        file paths. To use this property set 'use_legacy_dataset=False'
-        while constructing ParquetDataset object.
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
-        ...                   'n_legs': [2, 2, 4, 4, 5, 100],
-        ...                   'animal': ["Flamingo", "Parrot", "Dog", "Horse",
-        ...                              "Brittle stars", "Centipede"]})
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path='dataset_name_fragments',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_name_fragments/',
-        ...                             use_legacy_dataset=False)
-
-        List the fragments:
-
-        >>> dataset.fragments
-        [<pyarrow.dataset.ParquetFileFragment path=dataset_name_fragments/...
-        """
-        raise NotImplementedError(
-            "To use this property set 'use_legacy_dataset=False' while "
-            "constructing the ParquetDataset")
-
-    @property
-    def files(self):
-        """
-        A list of absolute Parquet file paths in the Dataset source.
-        To use this property set 'use_legacy_dataset=False'
-        while constructing ParquetDataset object.
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
-        ...                   'n_legs': [2, 2, 4, 4, 5, 100],
-        ...                   'animal': ["Flamingo", "Parrot", "Dog", "Horse",
-        ...                              "Brittle stars", "Centipede"]})
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path='dataset_name_files',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_name_files/',
-        ...                             use_legacy_dataset=False)
-
-        List the files:
-
-        >>> dataset.files
-        ['dataset_name_files/year=2019/...-0.parquet', ...
-        """
-        raise NotImplementedError(
-            "To use this property set 'use_legacy_dataset=False' while "
-            "constructing the ParquetDataset")
-
-    @property
-    def filesystem(self):
-        """
-        The filesystem type of the Dataset source.
-        To use this property set 'use_legacy_dataset=False'
-        while constructing ParquetDataset object.
-        """
-        raise NotImplementedError(
-            "To use this property set 'use_legacy_dataset=False' while "
-            "constructing the ParquetDataset")
-
-    @property
-    def partitioning(self):
-        """
-        The partitioning of the Dataset source, if discovered.
-        To use this property set 'use_legacy_dataset=False'
-        while constructing ParquetDataset object.
-        """
-        raise NotImplementedError(
-            "To use this property set 'use_legacy_dataset=False' while "
-            "constructing the ParquetDataset")
-
-
-def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1,
-                   open_file_func=None):
-    partitions = None
-    common_metadata_path = None
-    metadata_path = None
-
-    if isinstance(path_or_paths, list) and len(path_or_paths) == 1:
-        # Dask passes a directory as a list of length 1
-        path_or_paths = path_or_paths[0]
-
-    if _is_path_like(path_or_paths) and fs.isdir(path_or_paths):
-        manifest = ParquetManifest(path_or_paths, filesystem=fs,
-                                   open_file_func=open_file_func,
-                                   pathsep=getattr(fs, "pathsep", "/"),
-                                   metadata_nthreads=metadata_nthreads)
-        common_metadata_path = manifest.common_metadata_path
-        metadata_path = manifest.metadata_path
-        pieces = manifest.pieces
-        partitions = manifest.partitions
-    else:
-        if not isinstance(path_or_paths, list):
-            path_or_paths = [path_or_paths]
-
-        # List of paths
-        if len(path_or_paths) == 0:
-            raise ValueError('Must pass at least one file path')
-
-        pieces = []
-        for path in path_or_paths:
-            if not fs.isfile(path):
-                raise OSError('Passed non-file path: {}'
-                              .format(path))
-            piece = ParquetDatasetPiece._create(
-                path, open_file_func=open_file_func)
-            pieces.append(piece)
-
-    return pieces, partitions, common_metadata_path, metadata_path
-
-
-def _is_local_file_system(fs):
-    return isinstance(fs, LocalFileSystem) or isinstance(
-        fs, legacyfs.LocalFileSystem
-    )
-
-
-class _ParquetDatasetV2:
-    """
-    ParquetDataset shim using the Dataset API under the hood.
-
-    Examples
-    --------
-    Generate an example PyArrow Table and write it to a partitioned dataset:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
-    ...                   'n_legs': [2, 2, 4, 4, 5, 100],
-    ...                   'animal': ["Flamingo", "Parrot", "Dog", "Horse",
-    ...                              "Brittle stars", "Centipede"]})
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_to_dataset(table, root_path='dataset_v2',
-    ...                     partition_cols=['year'],
-    ...                     use_legacy_dataset=False)
-
-    create a ParquetDataset object from the dataset source:
-
-    >>> dataset = pq.ParquetDataset('dataset_v2/', use_legacy_dataset=False)
-
-    and read the data:
-
-    >>> dataset.read().to_pandas()
-       n_legs         animal  year
-    0       5  Brittle stars  2019
-    1       2       Flamingo  2020
-    2       4            Dog  2021
-    3     100      Centipede  2021
-    4       2         Parrot  2022
-    5       4          Horse  2022
-
-    create a ParquetDataset object with filter:
-
-    >>> dataset = pq.ParquetDataset('dataset_v2/',
-    ...                             filters=[('n_legs','=',4)],
-    ...                             use_legacy_dataset=False)
-    >>> dataset.read().to_pandas()
-       n_legs animal  year
-    0       4    Dog  2021
-    1       4  Horse  2022
-    """
-
-    def __init__(self, path_or_paths, filesystem=None, *, filters=None,
-                 partitioning="hive", read_dictionary=None, buffer_size=None,
-                 memory_map=False, ignore_prefixes=None, pre_buffer=True,
-                 coerce_int96_timestamp_unit=None, schema=None,
-                 decryption_properties=None, thrift_string_size_limit=None,
-                 thrift_container_size_limit=None,
-                 page_checksum_verification=False,
-                 **kwargs):
-        import pyarrow.dataset as ds
-
-        # Raise error for not supported keywords
-        for keyword, default in [
-                ("metadata", None), ("split_row_groups", False),
-                ("validate_schema", True), ("metadata_nthreads", None)]:
-            if keyword in kwargs and kwargs[keyword] is not default:
-                raise ValueError(
-                    "Keyword '{0}' is not yet supported with the new "
-                    "Dataset API".format(keyword))
-
-        # map format arguments
-        read_options = {
-            "pre_buffer": pre_buffer,
-            "coerce_int96_timestamp_unit": coerce_int96_timestamp_unit,
-            "thrift_string_size_limit": thrift_string_size_limit,
-            "thrift_container_size_limit": thrift_container_size_limit,
-            "page_checksum_verification": page_checksum_verification,
-        }
-        if buffer_size:
-            read_options.update(use_buffered_stream=True,
-                                buffer_size=buffer_size)
-        if read_dictionary is not None:
-            read_options.update(dictionary_columns=read_dictionary)
-
-        if decryption_properties is not None:
-            read_options.update(decryption_properties=decryption_properties)
-
-        self._filter_expression = None
-        if filters is not None:
-            self._filter_expression = filters_to_expression(filters)
-
-        # map old filesystems to new one
-        if filesystem is not None:
-            filesystem = _ensure_filesystem(
-                filesystem, use_mmap=memory_map)
-        elif filesystem is None and memory_map:
-            # if memory_map is specified, assume local file system (string
-            # path can in principle be URI for any filesystem)
-            filesystem = LocalFileSystem(use_mmap=memory_map)
-
-        # This needs to be checked after _ensure_filesystem, because that
-        # handles the case of an fsspec LocalFileSystem
-        if (
-            hasattr(path_or_paths, "__fspath__") and
-            filesystem is not None and
-            not _is_local_file_system(filesystem)
-        ):
-            raise TypeError(
-                "Path-like objects with __fspath__ must only be used with "
-                f"local file systems, not {type(filesystem)}"
-            )
-
-        # check for single fragment dataset
-        single_file = None
-        self._base_dir = None
-        if not isinstance(path_or_paths, list):
-            if _is_path_like(path_or_paths):
-                path_or_paths = _stringify_path(path_or_paths)
-                if filesystem is None:
-                    # path might be a URI describing the FileSystem as well
-                    try:
-                        filesystem, path_or_paths = FileSystem.from_uri(
-                            path_or_paths)
-                    except ValueError:
-                        filesystem = LocalFileSystem(use_mmap=memory_map)
-                finfo = filesystem.get_file_info(path_or_paths)
-                if finfo.is_file:
-                    single_file = path_or_paths
-                if finfo.type == FileType.Directory:
-                    self._base_dir = path_or_paths
-            else:
-                single_file = path_or_paths
-
-        parquet_format = ds.ParquetFileFormat(**read_options)
+        parquet_format = ds.ParquetFileFormat(**read_options)
 
         if single_file is not None:
             fragment = parquet_format.make_fragment(single_file, filesystem)
@@ -2540,12 +1357,7 @@ def __init__(self, path_or_paths, filesystem=None, *, filters=None,
                                    ignore_prefixes=ignore_prefixes)
 
     def equals(self, other):
-        if isinstance(other, ParquetDataset):
-            raise TypeError(
-                "`other` must be an instance of ParquetDataset constructed "
-                "with `use_legacy_dataset=False`"
-            )
-        if not isinstance(other, _ParquetDatasetV2):
+        if not isinstance(other, ParquetDataset):
             raise TypeError('`other` must be an instance of ParquetDataset')
 
         return (self.schema == other.schema and
@@ -2576,10 +1388,8 @@ def schema(self):
         ...                              "Brittle stars", "Centipede"]})
         >>> import pyarrow.parquet as pq
         >>> pq.write_to_dataset(table, root_path='dataset_v2_schema',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_v2_schema/',
-        ...                             use_legacy_dataset=False)
+        ...                     partition_cols=['year'])
+        >>> dataset = pq.ParquetDataset('dataset_v2_schema/')
 
         Read the schema:
 
@@ -2598,8 +1408,7 @@ def read(self, columns=None, use_threads=True, use_pandas_metadata=False):
         ----------
         columns : List[str]
             Names of columns to read from the dataset. The partition fields
-            are not automatically included (in contrast to when setting
-            ``use_legacy_dataset=True``).
+            are not automatically included.
         use_threads : bool, default True
             Perform multi-threaded column reads.
         use_pandas_metadata : bool, default False
@@ -2622,10 +1431,8 @@ def read(self, columns=None, use_threads=True, use_pandas_metadata=False):
         ...                              "Brittle stars", "Centipede"]})
         >>> import pyarrow.parquet as pq
         >>> pq.write_to_dataset(table, root_path='dataset_v2_read',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_v2_read/',
-        ...                             use_legacy_dataset=False)
+        ...                     partition_cols=['year'])
+        >>> dataset = pq.ParquetDataset('dataset_v2_read/')
 
         Read the dataset:
 
@@ -2694,7 +1501,12 @@ def _get_common_pandas_metadata(self):
     def read_pandas(self, **kwargs):
         """
         Read dataset including pandas metadata, if any. Other arguments passed
-        through to ParquetDataset.read, see docstring for further details.
+        through to :func:`read`, see docstring for further details.
+
+        Parameters
+        ----------
+        **kwargs : optional
+            Additional options for :func:`read`
 
         Examples
         --------
@@ -2709,8 +1521,7 @@ def read_pandas(self, **kwargs):
         >>> table = pa.Table.from_pandas(df)
         >>> import pyarrow.parquet as pq
         >>> pq.write_table(table, 'table_V2.parquet')
-        >>> dataset = pq.ParquetDataset('table_V2.parquet',
-        ...                             use_legacy_dataset=False)
+        >>> dataset = pq.ParquetDataset('table_V2.parquet')
 
         Read the dataset with pandas metadata:
 
@@ -2725,14 +1536,6 @@ def read_pandas(self, **kwargs):
         """
         return self.read(use_pandas_metadata=True, **kwargs)
 
-    @property
-    def pieces(self):
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.pieces",
-                             " Use the '.fragments' attribute instead"),
-            FutureWarning, stacklevel=2)
-        return list(self._dataset.get_fragments())
-
     @property
     def fragments(self):
         """
@@ -2750,10 +1553,8 @@ def fragments(self):
         ...                              "Brittle stars", "Centipede"]})
         >>> import pyarrow.parquet as pq
         >>> pq.write_to_dataset(table, root_path='dataset_v2_fragments',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_v2_fragments/',
-        ...                             use_legacy_dataset=False)
+        ...                     partition_cols=['year'])
+        >>> dataset = pq.ParquetDataset('dataset_v2_fragments/')
 
         List the fragments:
 
@@ -2778,10 +1579,8 @@ def files(self):
         ...                              "Brittle stars", "Centipede"]})
         >>> import pyarrow.parquet as pq
         >>> pq.write_to_dataset(table, root_path='dataset_v2_files',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_v2_files/',
-        ...                             use_legacy_dataset=False)
+        ...                     partition_cols=['year'])
+        >>> dataset = pq.ParquetDataset('dataset_v2_files/')
 
         List the files:
 
@@ -2822,8 +1621,6 @@ def partitioning(self):
     no columns.
 use_threads : bool, default True
     Perform multi-threaded column reads.
-metadata : FileMetaData
-    If separately computed
 schema : Schema, optional
     Optionally provide the Schema for the parquet dataset, in which case it
     will not be inferred from the source.
@@ -2836,30 +1633,21 @@ def partitioning(self):
     Rows which do not match the filter predicate will be removed from scanned
     data. Partition keys embedded in a nested directory structure will be
     exploited to avoid loading files at all if they contain no matching rows.
-    If `use_legacy_dataset` is True, filters can only reference partition
-    keys and only a hive-style directory structure is supported. When
-    setting `use_legacy_dataset` to False, also within-file level filtering
-    and different partitioning schemes are supported.
+    Within-file level filtering and different partitioning schemes are supported.
 
     {3}
-use_legacy_dataset : bool, default False
-    By default, `read_table` uses the new Arrow Datasets API since
-    pyarrow 1.0.0. Among other things, this allows to pass `filters`
-    for all columns and not only the partition keys, enables
-    different partitioning schemes, etc.
-    Set to True to use the legacy behaviour (this option is deprecated,
-    and the legacy implementation will be removed in a future version).
+use_legacy_dataset : bool, optional
+    Deprecated and has no effect from PyArrow version 15.0.0.
 ignore_prefixes : list, optional
     Files matching any of these prefixes will be ignored by the
-    discovery process if use_legacy_dataset=False.
+    discovery process.
     This is matched to the basename of a path.
     By default this is ['.', '_'].
     Note that discovery happens only if a directory is passed as source.
 pre_buffer : bool, default True
     Coalesce and issue file reads in parallel to improve performance on
     high-latency filesystems (e.g. S3). If True, Arrow will use a
-    background I/O thread pool. This option is only supported for
-    use_legacy_dataset=False. If using a filesystem layer that itself
+    background I/O thread pool. If using a filesystem layer that itself
     performs readahead (e.g. fsspec's S3FS), disable readahead for best
     results.
 coerce_int96_timestamp_unit : str, default None
@@ -2968,129 +1756,78 @@ def partitioning(self):
 """
 
 
-def read_table(source, *, columns=None, use_threads=True, metadata=None,
+def read_table(source, *, columns=None, use_threads=True,
                schema=None, use_pandas_metadata=False, read_dictionary=None,
                memory_map=False, buffer_size=0, partitioning="hive",
-               filesystem=None, filters=None, use_legacy_dataset=False,
+               filesystem=None, filters=None, use_legacy_dataset=None,
                ignore_prefixes=None, pre_buffer=True,
                coerce_int96_timestamp_unit=None,
                decryption_properties=None, thrift_string_size_limit=None,
                thrift_container_size_limit=None,
                page_checksum_verification=False):
-    if not use_legacy_dataset:
-        if metadata is not None:
+
+    if use_legacy_dataset is not None:
+        warnings.warn(
+            "Passing 'use_legacy_dataset' is deprecated as of pyarrow 15.0.0 "
+            "and will be removed in a future version.",
+            FutureWarning, stacklevel=2)
+
+    try:
+        dataset = ParquetDataset(
+            source,
+            schema=schema,
+            filesystem=filesystem,
+            partitioning=partitioning,
+            memory_map=memory_map,
+            read_dictionary=read_dictionary,
+            buffer_size=buffer_size,
+            filters=filters,
+            ignore_prefixes=ignore_prefixes,
+            pre_buffer=pre_buffer,
+            coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
+            thrift_string_size_limit=thrift_string_size_limit,
+            thrift_container_size_limit=thrift_container_size_limit,
+            page_checksum_verification=page_checksum_verification,
+        )
+    except ImportError:
+        # fall back on ParquetFile for simple cases when pyarrow.dataset
+        # module is not available
+        if filters is not None:
             raise ValueError(
-                "The 'metadata' keyword is no longer supported with the new "
-                "datasets-based implementation. Specify "
-                "'use_legacy_dataset=True' to temporarily recover the old "
-                "behaviour."
-            )
-        try:
-            dataset = _ParquetDatasetV2(
-                source,
-                schema=schema,
-                filesystem=filesystem,
-                partitioning=partitioning,
-                memory_map=memory_map,
-                read_dictionary=read_dictionary,
-                buffer_size=buffer_size,
-                filters=filters,
-                ignore_prefixes=ignore_prefixes,
-                pre_buffer=pre_buffer,
-                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
-                thrift_string_size_limit=thrift_string_size_limit,
-                thrift_container_size_limit=thrift_container_size_limit,
-                page_checksum_verification=page_checksum_verification,
+                "the 'filters' keyword is not supported when the "
+                "pyarrow.dataset module is not available"
             )
-        except ImportError:
-            # fall back on ParquetFile for simple cases when pyarrow.dataset
-            # module is not available
-            if filters is not None:
-                raise ValueError(
-                    "the 'filters' keyword is not supported when the "
-                    "pyarrow.dataset module is not available"
-                )
-            if partitioning != "hive":
-                raise ValueError(
-                    "the 'partitioning' keyword is not supported when the "
-                    "pyarrow.dataset module is not available"
-                )
-            if schema is not None:
-                raise ValueError(
-                    "the 'schema' argument is not supported when the "
-                    "pyarrow.dataset module is not available"
-                )
-            filesystem, path = _resolve_filesystem_and_path(source, filesystem)
-            if filesystem is not None:
-                source = filesystem.open_input_file(path)
-            # TODO test that source is not a directory or a list
-            dataset = ParquetFile(
-                source, metadata=metadata, read_dictionary=read_dictionary,
-                memory_map=memory_map, buffer_size=buffer_size,
-                pre_buffer=pre_buffer,
-                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
-                decryption_properties=decryption_properties,
-                thrift_string_size_limit=thrift_string_size_limit,
-                thrift_container_size_limit=thrift_container_size_limit,
-                page_checksum_verification=page_checksum_verification,
+        if partitioning != "hive":
+            raise ValueError(
+                "the 'partitioning' keyword is not supported when the "
+                "pyarrow.dataset module is not available"
             )
-
-        return dataset.read(columns=columns, use_threads=use_threads,
-                            use_pandas_metadata=use_pandas_metadata)
-
-    warnings.warn(
-        "Passing 'use_legacy_dataset=True' to get the legacy behaviour is "
-        "deprecated as of pyarrow 8.0.0, and the legacy implementation will "
-        "be removed in a future version.",
-        FutureWarning, stacklevel=2)
-
-    if ignore_prefixes is not None:
-        raise ValueError(
-            "The 'ignore_prefixes' keyword is only supported when "
-            "use_legacy_dataset=False")
-
-    if page_checksum_verification:
-        raise ValueError(
-            "The 'page_checksum_verification' keyword is only supported when "
-            "use_legacy_dataset=False")
-
-    if schema is not None:
-        raise ValueError(
-            "The 'schema' argument is only supported when "
-            "use_legacy_dataset=False")
-
-    if _is_path_like(source):
-        with warnings.catch_warnings():
-            # Suppress second warning from ParquetDataset constructor
-            warnings.filterwarnings(
-                "ignore", "Passing 'use_legacy_dataset", FutureWarning)
-            pf = ParquetDataset(
-                source, metadata=metadata, memory_map=memory_map,
-                read_dictionary=read_dictionary,
-                buffer_size=buffer_size,
-                filesystem=filesystem, filters=filters,
-                partitioning=partitioning,
-                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
-                use_legacy_dataset=True,
+        if schema is not None:
+            raise ValueError(
+                "the 'schema' argument is not supported when the "
+                "pyarrow.dataset module is not available"
             )
-    else:
-        pf = ParquetFile(
-            source, metadata=metadata,
-            read_dictionary=read_dictionary,
-            memory_map=memory_map,
-            buffer_size=buffer_size,
+        filesystem, path = _resolve_filesystem_and_path(source, filesystem)
+        if filesystem is not None:
+            source = filesystem.open_input_file(path)
+        # TODO test that source is not a directory or a list
+        dataset = ParquetFile(
+            source, read_dictionary=read_dictionary,
+            memory_map=memory_map, buffer_size=buffer_size,
+            pre_buffer=pre_buffer,
             coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
-            decryption_properties=decryption_properties
+            decryption_properties=decryption_properties,
+            thrift_string_size_limit=thrift_string_size_limit,
+            thrift_container_size_limit=thrift_container_size_limit,
+            page_checksum_verification=page_checksum_verification,
         )
-    return pf.read(columns=columns, use_threads=use_threads,
-                   use_pandas_metadata=use_pandas_metadata)
 
+    return dataset.read(columns=columns, use_threads=use_threads,
+                        use_pandas_metadata=use_pandas_metadata)
 
-read_table.__doc__ = _read_table_docstring.format(
-    """Read a Table from Parquet format
 
-Note: starting with pyarrow 1.0, the default for `use_legacy_dataset` is
-switched to False.""",
+read_table.__doc__ = _read_table_docstring.format(
+    """Read a Table from Parquet format""",
     "\n".join(("""use_pandas_metadata : bool, default False
     If True and file has custom pandas schema metadata, ensure that
     index columns are also loaded.""", _read_docstring_common)),
@@ -3233,23 +1970,13 @@ def write_table(table, where, row_group_size=None, version='2.6',
 """.format(_parquet_writer_arg_docs, _write_table_example)
 
 
-def _mkdir_if_not_exists(fs, path):
-    if fs._isfilestore() and not fs.exists(path):
-        try:
-            fs.mkdir(path)
-        except OSError:
-            assert fs.exists(path)
-
-
 def write_to_dataset(table, root_path, partition_cols=None,
-                     partition_filename_cb=None, filesystem=None,
-                     use_legacy_dataset=None, schema=None,
-                     partitioning=None, basename_template=None,
-                     use_threads=None, file_visitor=None,
-                     existing_data_behavior=None,
+                     filesystem=None, use_legacy_dataset=None,
+                     schema=None, partitioning=None,
+                     basename_template=None, use_threads=None,
+                     file_visitor=None, existing_data_behavior=None,
                      **kwargs):
-    """Wrapper around dataset.write_dataset (when use_legacy_dataset=False) or
-    parquet.write_table (when use_legacy_dataset=True) for writing a Table to
+    """Wrapper around dataset.write_dataset for writing a Table to
     Parquet format by partitions.
     For each combination of partition columns and values,
     a subdirectories are created in the following
@@ -3271,45 +1998,31 @@ def write_to_dataset(table, root_path, partition_cols=None,
     ----------
     table : pyarrow.Table
     root_path : str, pathlib.Path
-        The root directory of the dataset
+        The root directory of the dataset.
     partition_cols : list,
         Column names by which to partition the dataset.
-        Columns are partitioned in the order they are given
-    partition_filename_cb : callable,
-        A callback function that takes the partition key(s) as an argument
-        and allow you to override the partition filename. If nothing is
-        passed, the filename will consist of a uuid.
-        This option is only supported for use_legacy_dataset=True.
-        When use_legacy_dataset=None and this option is specified,
-        use_legacy_dataset will be set to True.
+        Columns are partitioned in the order they are given.
     filesystem : FileSystem, default None
         If nothing passed, will be inferred based on path.
         Path will try to be found in the local on-disk filesystem otherwise
         it will be parsed as an URI to determine the filesystem.
-    use_legacy_dataset : bool
-        Default is False. Set to True to use the legacy behaviour
-        (this option is deprecated, and the legacy implementation will be
-        removed in a future version). The legacy implementation still
-        supports the `partition_filename_cb` keyword but is less efficient
-        when using partition columns.
+    use_legacy_dataset : bool, optional
+        Deprecated and has no effect from PyArrow version 15.0.0.
     schema : Schema, optional
-        This option is only supported for use_legacy_dataset=False.
+        This Schema of the dataset.
     partitioning : Partitioning or list[str], optional
         The partitioning scheme specified with the
         ``pyarrow.dataset.partitioning()`` function or a list of field names.
         When providing a list of field names, you can use
         ``partitioning_flavor`` to drive which partitioning type should be
         used.
-        This option is only supported for use_legacy_dataset=False.
     basename_template : str, optional
         A template string used to generate basenames of written data files.
         The token '{i}' will be replaced with an automatically incremented
         integer. If not specified, it defaults to "guid-{i}.parquet".
-        This option is only supported for use_legacy_dataset=False.
     use_threads : bool, default True
         Write files in parallel. If enabled, then maximum parallelism will be
         used determined by the number of available CPU cores.
-        This option is only supported for use_legacy_dataset=False.
     file_visitor : function
         If set, this function will be called with a WrittenFile instance
         for each file created during the call.  This object will have both
@@ -3330,7 +2043,6 @@ def write_to_dataset(table, root_path, partition_cols=None,
             def file_visitor(written_file):
                 visited_paths.append(written_file.path)
 
-        This option is only supported for use_legacy_dataset=False.
     existing_data_behavior : 'overwrite_or_ignore' | 'error' | \
 'delete_matching'
         Controls how the dataset will handle data that already exists in
@@ -3348,15 +2060,12 @@ def file_visitor(written_file):
         dataset.  The first time each partition directory is encountered
         the entire directory will be deleted.  This allows you to overwrite
         old partitions completely.
-        This option is only supported for use_legacy_dataset=False.
     **kwargs : dict,
-        When use_legacy_dataset=False, used as additional kwargs for
-        `dataset.write_dataset` function for matching kwargs, and remainder to
-        `ParquetFileFormat.make_write_options`. See the docstring
-        of `write_table` and `dataset.write_dataset` for the available options.
-        When use_legacy_dataset=True, used as additional kwargs for
-        `parquet.write_table` function (See docstring for `write_table`
-        or `ParquetWriter` for more information).
+        Used as additional kwargs for :func:`pyarrow.dataset.write_dataset`
+        function for matching kwargs, and remainder to
+        :func:`pyarrow.dataset.ParquetFileFormat.make_write_options`.
+        See the docstring of :func:`write_table` and
+        :func:`pyarrow.dataset.write_dataset` for the available options.
         Using `metadata_collector` in kwargs allows one to collect the
         file metadata instances of dataset pieces. The file paths in the
         ColumnChunkMetaData will be set relative to `root_path`.
@@ -3376,194 +2085,79 @@ def file_visitor(written_file):
     >>> import pyarrow.parquet as pq
     >>> pq.write_to_dataset(table, root_path='dataset_name_3',
     ...                     partition_cols=['year'])
-    >>> pq.ParquetDataset('dataset_name_3', use_legacy_dataset=False).files
+    >>> pq.ParquetDataset('dataset_name_3').files
     ['dataset_name_3/year=2019/...-0.parquet', ...
 
     Write a single Parquet file into the root folder:
 
     >>> pq.write_to_dataset(table, root_path='dataset_name_4')
-    >>> pq.ParquetDataset('dataset_name_4/', use_legacy_dataset=False).files
+    >>> pq.ParquetDataset('dataset_name_4/').files
     ['dataset_name_4/...-0.parquet']
     """
-    # Choose the implementation
-    if use_legacy_dataset is None:
-        # if partition_filename_cb is specified ->
-        # default to the old implementation
-        if partition_filename_cb:
-            use_legacy_dataset = True
-        # otherwise the default is False
-        else:
-            use_legacy_dataset = False
+    if use_legacy_dataset is not None:
+        warnings.warn(
+            "Passing 'use_legacy_dataset' is deprecated as of pyarrow 15.0.0 "
+            "and will be removed in a future version.",
+            FutureWarning, stacklevel=2)
+
+    metadata_collector = kwargs.pop('metadata_collector', None)
 
     # Check for conflicting keywords
-    msg_confl_0 = (
-        "The '{0}' argument is not supported by use_legacy_dataset={2}. "
-        "Use only '{1}' instead."
-    )
-    msg_confl_1 = (
-        "The '{1}' argument is not supported by use_legacy_dataset={2}. "
+    msg_confl = (
+        "The '{1}' argument is not supported. "
         "Use only '{0}' instead."
     )
-    msg_confl = msg_confl_0 if use_legacy_dataset else msg_confl_1
-    if partition_filename_cb is not None and basename_template is not None:
-        raise ValueError(msg_confl.format("basename_template",
-                                          "partition_filename_cb",
-                                          use_legacy_dataset))
-
     if partition_cols is not None and partitioning is not None:
         raise ValueError(msg_confl.format("partitioning",
-                                          "partition_cols",
-                                          use_legacy_dataset))
+                                          "partition_cols"))
 
-    metadata_collector = kwargs.pop('metadata_collector', None)
     if metadata_collector is not None and file_visitor is not None:
         raise ValueError(msg_confl.format("file_visitor",
-                                          "metadata_collector",
-                                          use_legacy_dataset))
+                                          "metadata_collector"))
 
-    # New dataset implementation
-    if not use_legacy_dataset:
-        import pyarrow.dataset as ds
+    import pyarrow.dataset as ds
 
-        # extract write_dataset specific options
-        # reset assumed to go to make_write_options
-        write_dataset_kwargs = dict()
-        for key in inspect.signature(ds.write_dataset).parameters:
-            if key in kwargs:
-                write_dataset_kwargs[key] = kwargs.pop(key)
-        write_dataset_kwargs['max_rows_per_group'] = kwargs.pop(
-            'row_group_size', kwargs.pop("chunk_size", None)
-        )
-        # raise for unsupported keywords
-        msg = (
-            "The '{}' argument is not supported with the new dataset "
-            "implementation."
-        )
-        if metadata_collector is not None:
-            def file_visitor(written_file):
-                metadata_collector.append(written_file.metadata)
-        if partition_filename_cb is not None:
-            raise ValueError(msg.format("partition_filename_cb"))
+    # extract write_dataset specific options
+    # reset assumed to go to make_write_options
+    write_dataset_kwargs = dict()
+    for key in inspect.signature(ds.write_dataset).parameters:
+        if key in kwargs:
+            write_dataset_kwargs[key] = kwargs.pop(key)
+    write_dataset_kwargs['max_rows_per_group'] = kwargs.pop(
+        'row_group_size', kwargs.pop("chunk_size", None)
+    )
 
-        # map format arguments
-        parquet_format = ds.ParquetFileFormat()
-        write_options = parquet_format.make_write_options(**kwargs)
+    if metadata_collector is not None:
+        def file_visitor(written_file):
+            metadata_collector.append(written_file.metadata)
 
-        # map old filesystems to new one
-        if filesystem is not None:
-            filesystem = _ensure_filesystem(filesystem)
-
-        if partition_cols:
-            part_schema = table.select(partition_cols).schema
-            partitioning = ds.partitioning(part_schema, flavor="hive")
-
-        if basename_template is None:
-            basename_template = guid() + '-{i}.parquet'
-
-        if existing_data_behavior is None:
-            existing_data_behavior = 'overwrite_or_ignore'
-
-        ds.write_dataset(
-            table, root_path, filesystem=filesystem,
-            format=parquet_format, file_options=write_options, schema=schema,
-            partitioning=partitioning, use_threads=use_threads,
-            file_visitor=file_visitor,
-            basename_template=basename_template,
-            existing_data_behavior=existing_data_behavior,
-            **write_dataset_kwargs)
-        return
-
-    # warnings and errors when using legacy implementation
-    if use_legacy_dataset:
-        warnings.warn(
-            "Passing 'use_legacy_dataset=True' to get the legacy behaviour is "
-            "deprecated as of pyarrow 8.0.0, and the legacy implementation "
-            "will be removed in a future version.",
-            FutureWarning, stacklevel=2)
-    msg2 = (
-        "The '{}' argument is not supported with the legacy "
-        "implementation. To use this argument specify "
-        "'use_legacy_dataset=False' while constructing the "
-        "ParquetDataset."
-    )
-    if schema is not None:
-        raise ValueError(msg2.format("schema"))
-    if partitioning is not None:
-        raise ValueError(msg2.format("partitioning"))
-    if use_threads is not None:
-        raise ValueError(msg2.format("use_threads"))
-    if file_visitor is not None:
-        raise ValueError(msg2.format("file_visitor"))
-    if existing_data_behavior is not None:
-        raise ValueError(msg2.format("existing_data_behavior"))
-    if basename_template is not None:
-        raise ValueError(msg2.format("basename_template"))
-    if partition_filename_cb is not None:
-        warnings.warn(
-            _DEPR_MSG.format("partition_filename_cb", " Specify "
-                             "'use_legacy_dataset=False' while constructing "
-                             "the ParquetDataset, and then use the "
-                             "'basename_template' parameter instead. For "
-                             "usage see `pyarrow.dataset.write_dataset`"),
-            FutureWarning, stacklevel=2)
+    # map format arguments
+    parquet_format = ds.ParquetFileFormat()
+    write_options = parquet_format.make_write_options(**kwargs)
 
-    # Legacy implementation
-    fs, root_path = legacyfs.resolve_filesystem_and_path(root_path, filesystem)
-
-    _mkdir_if_not_exists(fs, root_path)
-
-    if partition_cols is not None and len(partition_cols) > 0:
-        df = table.to_pandas()
-        partition_keys = [df[col] for col in partition_cols]
-        data_df = df.drop(partition_cols, axis='columns')
-        data_cols = df.columns.drop(partition_cols)
-        if len(data_cols) == 0:
-            raise ValueError('No data left to save outside partition columns')
-
-        subschema = table.schema
-
-        # ARROW-2891: Ensure the output_schema is preserved when writing a
-        # partitioned dataset
-        for col in table.schema.names:
-            if col in partition_cols:
-                subschema = subschema.remove(subschema.get_field_index(col))
-
-        # ARROW-17829: avoid deprecation warnings for df.groupby
-        # https://github.com/pandas-dev/pandas/issues/42795
-        if len(partition_keys) == 1:
-            partition_keys = partition_keys[0]
-
-        for keys, subgroup in data_df.groupby(partition_keys, observed=True):
-            if not isinstance(keys, tuple):
-                keys = (keys,)
-            subdir = '/'.join(
-                ['{colname}={value}'.format(colname=name, value=val)
-                 for name, val in zip(partition_cols, keys)])
-            subtable = pa.Table.from_pandas(subgroup, schema=subschema,
-                                            safe=False)
-            _mkdir_if_not_exists(fs, '/'.join([root_path, subdir]))
-            if partition_filename_cb:
-                outfile = partition_filename_cb(keys)
-            else:
-                outfile = guid() + '.parquet'
-            relative_path = '/'.join([subdir, outfile])
-            full_path = '/'.join([root_path, relative_path])
-            with fs.open(full_path, 'wb') as f:
-                write_table(subtable, f, metadata_collector=metadata_collector,
-                            **kwargs)
-            if metadata_collector is not None:
-                metadata_collector[-1].set_file_path(relative_path)
-    else:
-        if partition_filename_cb:
-            outfile = partition_filename_cb(None)
-        else:
-            outfile = guid() + '.parquet'
-        full_path = '/'.join([root_path, outfile])
-        with fs.open(full_path, 'wb') as f:
-            write_table(table, f, metadata_collector=metadata_collector,
-                        **kwargs)
-        if metadata_collector is not None:
-            metadata_collector[-1].set_file_path(outfile)
+    # map old filesystems to new one
+    if filesystem is not None:
+        filesystem = _ensure_filesystem(filesystem)
+
+    if partition_cols:
+        part_schema = table.select(partition_cols).schema
+        partitioning = ds.partitioning(part_schema, flavor="hive")
+
+    if basename_template is None:
+        basename_template = guid() + '-{i}.parquet'
+
+    if existing_data_behavior is None:
+        existing_data_behavior = 'overwrite_or_ignore'
+
+    ds.write_dataset(
+        table, root_path, filesystem=filesystem,
+        format=parquet_format, file_options=write_options, schema=schema,
+        partitioning=partitioning, use_threads=use_threads,
+        file_visitor=file_visitor,
+        basename_template=basename_template,
+        existing_data_behavior=existing_data_behavior,
+        **write_dataset_kwargs)
+    return
 
 
 def write_metadata(schema, where, metadata_collector=None, filesystem=None,
@@ -3741,15 +2335,11 @@ def read_schema(where, memory_map=False, decryption_properties=None,
     "FileEncryptionProperties",
     "FileMetaData",
     "ParquetDataset",
-    "ParquetDatasetPiece",
     "ParquetFile",
     "ParquetLogicalType",
-    "ParquetManifest",
-    "ParquetPartitions",
     "ParquetReader",
     "ParquetSchema",
     "ParquetWriter",
-    "PartitionSet",
     "RowGroupMetaData",
     "SortingColumn",
     "Statistics",
diff --git a/python/pyarrow/tests/parquet/__init__.py b/python/pyarrow/tests/parquet/__init__.py
index 4c4e8240b8736..d08d67d2860f4 100644
--- a/python/pyarrow/tests/parquet/__init__.py
+++ b/python/pyarrow/tests/parquet/__init__.py
@@ -21,7 +21,4 @@
 # Ignore these with pytest ... -m 'not parquet'
 pytestmark = [
     pytest.mark.parquet,
-    pytest.mark.filterwarnings(
-        "ignore:Passing 'use_legacy_dataset=True':FutureWarning"
-    ),
 ]
diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
index 4401d3ca6bb75..8365ed5b28543 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -18,31 +18,10 @@
 import io
 
 import numpy as np
-import pytest
 
 import pyarrow as pa
 from pyarrow.tests import util
 
-legacy_filter_mark = pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy:FutureWarning"
-)
-
-parametrize_legacy_dataset = pytest.mark.parametrize(
-    "use_legacy_dataset",
-    [pytest.param(True, marks=legacy_filter_mark),
-     pytest.param(False, marks=pytest.mark.dataset)]
-)
-parametrize_legacy_dataset_not_supported = pytest.mark.parametrize(
-    "use_legacy_dataset",
-    [pytest.param(True, marks=legacy_filter_mark),
-     pytest.param(False, marks=pytest.mark.skip)]
-)
-parametrize_legacy_dataset_fixed = pytest.mark.parametrize(
-    "use_legacy_dataset",
-    [pytest.param(True, marks=[pytest.mark.xfail, legacy_filter_mark]),
-     pytest.param(False, marks=pytest.mark.dataset)]
-)
-
 
 def _write_table(table, path, **kwargs):
     # So we see the ImportError somewhere
@@ -65,19 +44,18 @@ def _read_table(*args, **kwargs):
 
 
 def _roundtrip_table(table, read_table_kwargs=None,
-                     write_table_kwargs=None, use_legacy_dataset=False):
+                     write_table_kwargs=None):
     read_table_kwargs = read_table_kwargs or {}
     write_table_kwargs = write_table_kwargs or {}
 
     writer = pa.BufferOutputStream()
     _write_table(table, writer, **write_table_kwargs)
     reader = pa.BufferReader(writer.getvalue())
-    return _read_table(reader, use_legacy_dataset=use_legacy_dataset,
-                       **read_table_kwargs)
+    return _read_table(reader, **read_table_kwargs)
 
 
 def _check_roundtrip(table, expected=None, read_table_kwargs=None,
-                     use_legacy_dataset=False, **write_table_kwargs):
+                     **write_table_kwargs):
     if expected is None:
         expected = table
 
@@ -85,20 +63,17 @@ def _check_roundtrip(table, expected=None, read_table_kwargs=None,
 
     # intentionally check twice
     result = _roundtrip_table(table, read_table_kwargs=read_table_kwargs,
-                              write_table_kwargs=write_table_kwargs,
-                              use_legacy_dataset=use_legacy_dataset)
+                              write_table_kwargs=write_table_kwargs)
     assert result.equals(expected)
     result = _roundtrip_table(result, read_table_kwargs=read_table_kwargs,
-                              write_table_kwargs=write_table_kwargs,
-                              use_legacy_dataset=use_legacy_dataset)
+                              write_table_kwargs=write_table_kwargs)
     assert result.equals(expected)
 
 
-def _roundtrip_pandas_dataframe(df, write_kwargs, use_legacy_dataset=False):
+def _roundtrip_pandas_dataframe(df, write_kwargs):
     table = pa.Table.from_pandas(df)
     result = _roundtrip_table(
-        table, write_table_kwargs=write_kwargs,
-        use_legacy_dataset=use_legacy_dataset)
+        table, write_table_kwargs=write_kwargs)
     return result.to_pandas()
 
 
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 83e6ebeb7a1fc..3c867776ac052 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -28,7 +28,6 @@
 from pyarrow.filesystem import LocalFileSystem, FileSystem
 from pyarrow.tests import util
 from pyarrow.tests.parquet.common import (_check_roundtrip, _roundtrip_table,
-                                          parametrize_legacy_dataset,
                                           _test_dataframe)
 
 try:
@@ -63,21 +62,18 @@ def test_parquet_invalid_version(tempdir):
                      data_page_version="2.2")
 
 
-@parametrize_legacy_dataset
-def test_set_data_page_size(use_legacy_dataset):
+def test_set_data_page_size():
     arr = pa.array([1, 2, 3] * 100000)
     t = pa.Table.from_arrays([arr], names=['f0'])
 
     # 128K, 512K
     page_sizes = [2 << 16, 2 << 18]
     for target_page_size in page_sizes:
-        _check_roundtrip(t, data_page_size=target_page_size,
-                         use_legacy_dataset=use_legacy_dataset)
+        _check_roundtrip(t, data_page_size=target_page_size)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_set_write_batch_size(use_legacy_dataset):
+def test_set_write_batch_size():
     df = _test_dataframe(100)
     table = pa.Table.from_pandas(df, preserve_index=False)
 
@@ -87,8 +83,7 @@ def test_set_write_batch_size(use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_set_dictionary_pagesize_limit(use_legacy_dataset):
+def test_set_dictionary_pagesize_limit():
     df = _test_dataframe(100)
     table = pa.Table.from_pandas(df, preserve_index=False)
 
@@ -101,8 +96,7 @@ def test_set_dictionary_pagesize_limit(use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_chunked_table_write(use_legacy_dataset):
+def test_chunked_table_write():
     # ARROW-232
     tables = []
     batch = pa.RecordBatch.from_pandas(alltypes_sample(size=10))
@@ -116,66 +110,56 @@ def test_chunked_table_write(use_legacy_dataset):
             for table in tables:
                 _check_roundtrip(
                     table, version='2.6',
-                    use_legacy_dataset=use_legacy_dataset,
                     data_page_version=data_page_version,
                     use_dictionary=use_dictionary)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_memory_map(tempdir, use_legacy_dataset):
+def test_memory_map(tempdir):
     df = alltypes_sample(size=10)
 
     table = pa.Table.from_pandas(df)
     _check_roundtrip(table, read_table_kwargs={'memory_map': True},
-                     version='2.6', use_legacy_dataset=use_legacy_dataset)
+                     version='2.6')
 
     filename = str(tempdir / 'tmp_file')
     with open(filename, 'wb') as f:
         _write_table(table, f, version='2.6')
-    table_read = pq.read_pandas(filename, memory_map=True,
-                                use_legacy_dataset=use_legacy_dataset)
+    table_read = pq.read_pandas(filename, memory_map=True)
     assert table_read.equals(table)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_enable_buffered_stream(tempdir, use_legacy_dataset):
+def test_enable_buffered_stream(tempdir):
     df = alltypes_sample(size=10)
 
     table = pa.Table.from_pandas(df)
     _check_roundtrip(table, read_table_kwargs={'buffer_size': 1025},
-                     version='2.6', use_legacy_dataset=use_legacy_dataset)
+                     version='2.6')
 
     filename = str(tempdir / 'tmp_file')
     with open(filename, 'wb') as f:
         _write_table(table, f, version='2.6')
-    table_read = pq.read_pandas(filename, buffer_size=4096,
-                                use_legacy_dataset=use_legacy_dataset)
+    table_read = pq.read_pandas(filename, buffer_size=4096)
     assert table_read.equals(table)
 
 
-@parametrize_legacy_dataset
-def test_special_chars_filename(tempdir, use_legacy_dataset):
+def test_special_chars_filename(tempdir):
     table = pa.Table.from_arrays([pa.array([42])], ["ints"])
     filename = "foo # bar"
     path = tempdir / filename
     assert not path.exists()
     _write_table(table, str(path))
     assert path.exists()
-    table_read = _read_table(str(path), use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(str(path))
     assert table_read.equals(table)
 
 
-@parametrize_legacy_dataset
-def test_invalid_source(use_legacy_dataset):
+def test_invalid_source():
     # Test that we provide an helpful error message pointing out
     # that None wasn't expected when trying to open a Parquet None file.
-    #
-    # Depending on use_legacy_dataset the message changes slightly
-    # but in both cases it should point out that None wasn't expected.
     with pytest.raises(TypeError, match="None"):
-        pq.read_table(None, use_legacy_dataset=use_legacy_dataset)
+        pq.read_table(None)
 
     with pytest.raises(TypeError, match="None"):
         pq.ParquetFile(None)
@@ -193,8 +177,7 @@ def test_file_with_over_int16_max_row_groups():
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_empty_table_roundtrip(use_legacy_dataset):
+def test_empty_table_roundtrip():
     df = alltypes_sample(size=10)
 
     # Create a non-empty table to infer the types correctly, then slice to 0
@@ -206,19 +189,17 @@ def test_empty_table_roundtrip(use_legacy_dataset):
     assert table.schema.field('null').type == pa.null()
     assert table.schema.field('null_list').type == pa.list_(pa.null())
     _check_roundtrip(
-        table, version='2.6', use_legacy_dataset=use_legacy_dataset)
+        table, version='2.6')
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_empty_table_no_columns(use_legacy_dataset):
+def test_empty_table_no_columns():
     df = pd.DataFrame()
     empty = pa.Table.from_pandas(df, preserve_index=False)
-    _check_roundtrip(empty, use_legacy_dataset=use_legacy_dataset)
+    _check_roundtrip(empty)
 
 
-@parametrize_legacy_dataset
-def test_write_nested_zero_length_array_chunk_failure(use_legacy_dataset):
+def test_write_nested_zero_length_array_chunk_failure():
     # Bug report in ARROW-3792
     cols = OrderedDict(
         int32=pa.int32(),
@@ -243,17 +224,16 @@ def test_write_nested_zero_length_array_chunk_failure(use_legacy_dataset):
     my_batches = [pa.RecordBatch.from_arrays(batch, schema=pa.schema(cols))
                   for batch in my_arrays]
     tbl = pa.Table.from_batches(my_batches, pa.schema(cols))
-    _check_roundtrip(tbl, use_legacy_dataset=use_legacy_dataset)
+    _check_roundtrip(tbl)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_multiple_path_types(tempdir, use_legacy_dataset):
+def test_multiple_path_types(tempdir):
     # Test compatibility with PEP 519 path-like objects
     path = tempdir / 'zzz.parquet'
     df = pd.DataFrame({'x': np.arange(10, dtype=np.int64)})
     _write_table(df, path)
-    table_read = _read_table(path, use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(path)
     df_read = table_read.to_pandas()
     tm.assert_frame_equal(df, df_read)
 
@@ -261,13 +241,12 @@ def test_multiple_path_types(tempdir, use_legacy_dataset):
     path = str(tempdir) + 'zzz.parquet'
     df = pd.DataFrame({'x': np.arange(10, dtype=np.int64)})
     _write_table(df, path)
-    table_read = _read_table(path, use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(path)
     df_read = table_read.to_pandas()
     tm.assert_frame_equal(df, df_read)
 
 
-@parametrize_legacy_dataset
-def test_fspath(tempdir, use_legacy_dataset):
+def test_fspath(tempdir):
     # ARROW-12472 support __fspath__ objects without using str()
     path = tempdir / "test.parquet"
     table = pa.table({"a": [1, 2, 3]})
@@ -275,9 +254,7 @@ def test_fspath(tempdir, use_legacy_dataset):
 
     fs_protocol_obj = util.FSProtocolClass(path)
 
-    result = _read_table(
-        fs_protocol_obj, use_legacy_dataset=use_legacy_dataset
-    )
+    result = _read_table(fs_protocol_obj)
     assert result.equals(table)
 
     # combined with non-local filesystem raises
@@ -285,15 +262,11 @@ def test_fspath(tempdir, use_legacy_dataset):
         _read_table(fs_protocol_obj, filesystem=FileSystem())
 
 
-@pytest.mark.dataset
-@parametrize_legacy_dataset
 @pytest.mark.parametrize("filesystem", [
     None, fs.LocalFileSystem(), LocalFileSystem._get_instance()
 ])
 @pytest.mark.parametrize("name", ("data.parquet", "例.parquet"))
-def test_relative_paths(tempdir, use_legacy_dataset, filesystem, name):
-    if use_legacy_dataset and isinstance(filesystem, fs.FileSystem):
-        pytest.skip("Passing new filesystem not supported for legacy reader")
+def test_relative_paths(tempdir, filesystem, name):
     # reading and writing from relative paths
     table = pa.table({"a": [1, 2, 3]})
     path = tempdir / name
@@ -301,8 +274,7 @@ def test_relative_paths(tempdir, use_legacy_dataset, filesystem, name):
     # reading
     pq.write_table(table, str(path))
     with util.change_cwd(tempdir):
-        result = pq.read_table(name, filesystem=filesystem,
-                               use_legacy_dataset=use_legacy_dataset)
+        result = pq.read_table(name, filesystem=filesystem)
     assert result.equals(table)
 
     path.unlink()
@@ -334,24 +306,21 @@ def seek(self, *args):
         pq.read_table(BogusFile(b""))
 
 
-@parametrize_legacy_dataset
-def test_parquet_read_from_buffer(tempdir, use_legacy_dataset):
+def test_parquet_read_from_buffer(tempdir):
     # reading from a buffer from python's open()
     table = pa.table({"a": [1, 2, 3]})
     pq.write_table(table, str(tempdir / "data.parquet"))
 
     with open(str(tempdir / "data.parquet"), "rb") as f:
-        result = pq.read_table(f, use_legacy_dataset=use_legacy_dataset)
+        result = pq.read_table(f)
     assert result.equals(table)
 
     with open(str(tempdir / "data.parquet"), "rb") as f:
-        result = pq.read_table(pa.PythonFile(f),
-                               use_legacy_dataset=use_legacy_dataset)
+        result = pq.read_table(pa.PythonFile(f))
     assert result.equals(table)
 
 
-@parametrize_legacy_dataset
-def test_byte_stream_split(use_legacy_dataset):
+def test_byte_stream_split():
     # This is only a smoke test.
     arr_float = pa.array(list(map(float, range(100))))
     arr_int = pa.array(list(map(int, range(100))))
@@ -385,12 +354,10 @@ def test_byte_stream_split(use_legacy_dataset):
     table = pa.Table.from_arrays([arr_int], names=['tmp'])
     with pytest.raises(IOError):
         _check_roundtrip(table, expected=table, use_byte_stream_split=True,
-                         use_dictionary=False,
-                         use_legacy_dataset=use_legacy_dataset)
+                         use_dictionary=False)
 
 
-@parametrize_legacy_dataset
-def test_column_encoding(use_legacy_dataset):
+def test_column_encoding():
     arr_float = pa.array(list(map(float, range(100))))
     arr_int = pa.array(list(map(int, range(100))))
     arr_bin = pa.array([str(x) for x in range(100)], type=pa.binary())
@@ -406,30 +373,26 @@ def test_column_encoding(use_legacy_dataset):
     _check_roundtrip(mixed_table, expected=mixed_table, use_dictionary=False,
                      column_encoding={'a': "BYTE_STREAM_SPLIT",
                                       'b': "PLAIN",
-                                      'c': "PLAIN"},
-                     use_legacy_dataset=use_legacy_dataset)
+                                      'c': "PLAIN"})
 
     # Check "PLAIN" for all columns.
     _check_roundtrip(mixed_table, expected=mixed_table,
                      use_dictionary=False,
-                     column_encoding="PLAIN",
-                     use_legacy_dataset=use_legacy_dataset)
+                     column_encoding="PLAIN")
 
     # Check "DELTA_BINARY_PACKED" for integer columns.
     _check_roundtrip(mixed_table, expected=mixed_table,
                      use_dictionary=False,
                      column_encoding={'a': "PLAIN",
                                       'b': "DELTA_BINARY_PACKED",
-                                      'c': "PLAIN"},
-                     use_legacy_dataset=use_legacy_dataset)
+                                      'c': "PLAIN"})
 
     # Check "DELTA_LENGTH_BYTE_ARRAY" for byte columns.
     _check_roundtrip(mixed_table, expected=mixed_table,
                      use_dictionary=False,
                      column_encoding={'a': "PLAIN",
                                       'b': "DELTA_BINARY_PACKED",
-                                      'c': "DELTA_LENGTH_BYTE_ARRAY"},
-                     use_legacy_dataset=use_legacy_dataset)
+                                      'c': "DELTA_LENGTH_BYTE_ARRAY"})
 
     # Check "DELTA_BYTE_ARRAY" for byte columns.
     _check_roundtrip(mixed_table, expected=mixed_table,
@@ -437,14 +400,12 @@ def test_column_encoding(use_legacy_dataset):
                      column_encoding={'a': "PLAIN",
                                       'b': "DELTA_BINARY_PACKED",
                                       'c': "DELTA_BYTE_ARRAY",
-                                      'd': "DELTA_BYTE_ARRAY"},
-                     use_legacy_dataset=use_legacy_dataset)
+                                      'd': "DELTA_BYTE_ARRAY"})
 
     # Check "RLE" for boolean columns.
     _check_roundtrip(mixed_table, expected=mixed_table,
                      use_dictionary=False,
-                     column_encoding={'e': "RLE"},
-                     use_legacy_dataset=use_legacy_dataset)
+                     column_encoding={'e': "RLE"})
 
     # Try to pass "BYTE_STREAM_SPLIT" column encoding for integer column 'b'.
     # This should throw an error as it is only supports FLOAT and DOUBLE.
@@ -455,8 +416,7 @@ def test_column_encoding(use_legacy_dataset):
                          use_dictionary=False,
                          column_encoding={'a': "PLAIN",
                                           'b': "BYTE_STREAM_SPLIT",
-                                          'c': "PLAIN"},
-                         use_legacy_dataset=use_legacy_dataset)
+                                          'c': "PLAIN"})
 
     # Try to pass use "DELTA_BINARY_PACKED" encoding on float column.
     # This should throw an error as only integers are supported.
@@ -465,8 +425,7 @@ def test_column_encoding(use_legacy_dataset):
                          use_dictionary=False,
                          column_encoding={'a': "DELTA_BINARY_PACKED",
                                           'b': "PLAIN",
-                                          'c': "PLAIN"},
-                         use_legacy_dataset=use_legacy_dataset)
+                                          'c': "PLAIN"})
 
     # Try to pass "RLE_DICTIONARY".
     # This should throw an error as dictionary encoding is already used by
@@ -474,30 +433,26 @@ def test_column_encoding(use_legacy_dataset):
     with pytest.raises(ValueError):
         _check_roundtrip(mixed_table, expected=mixed_table,
                          use_dictionary=False,
-                         column_encoding="RLE_DICTIONARY",
-                         use_legacy_dataset=use_legacy_dataset)
+                         column_encoding="RLE_DICTIONARY")
 
     # Try to pass unsupported encoding.
     with pytest.raises(ValueError):
         _check_roundtrip(mixed_table, expected=mixed_table,
                          use_dictionary=False,
-                         column_encoding={'a': "MADE_UP_ENCODING"},
-                         use_legacy_dataset=use_legacy_dataset)
+                         column_encoding={'a': "MADE_UP_ENCODING"})
 
     # Try to pass column_encoding and use_dictionary.
     # This should throw an error.
     with pytest.raises(ValueError):
         _check_roundtrip(mixed_table, expected=mixed_table,
                          use_dictionary=['b'],
-                         column_encoding={'b': "PLAIN"},
-                         use_legacy_dataset=use_legacy_dataset)
+                         column_encoding={'b': "PLAIN"})
 
     # Try to pass column_encoding and use_dictionary=True (default value).
     # This should throw an error.
     with pytest.raises(ValueError):
         _check_roundtrip(mixed_table, expected=mixed_table,
-                         column_encoding={'b': "PLAIN"},
-                         use_legacy_dataset=use_legacy_dataset)
+                         column_encoding={'b': "PLAIN"})
 
     # Try to pass column_encoding and use_byte_stream_split on same column.
     # This should throw an error.
@@ -507,8 +462,7 @@ def test_column_encoding(use_legacy_dataset):
                          use_byte_stream_split=['a'],
                          column_encoding={'a': "RLE",
                                           'b': "BYTE_STREAM_SPLIT",
-                                          'c': "PLAIN"},
-                         use_legacy_dataset=use_legacy_dataset)
+                                          'c': "PLAIN"})
 
     # Try to pass column_encoding and use_byte_stream_split=True.
     # This should throw an error.
@@ -518,54 +472,45 @@ def test_column_encoding(use_legacy_dataset):
                          use_byte_stream_split=True,
                          column_encoding={'a': "RLE",
                                           'b': "BYTE_STREAM_SPLIT",
-                                          'c': "PLAIN"},
-                         use_legacy_dataset=use_legacy_dataset)
+                                          'c': "PLAIN"})
 
     # Try to pass column_encoding=True.
     # This should throw an error.
     with pytest.raises(TypeError):
         _check_roundtrip(mixed_table, expected=mixed_table,
                          use_dictionary=False,
-                         column_encoding=True,
-                         use_legacy_dataset=use_legacy_dataset)
+                         column_encoding=True)
 
 
-@parametrize_legacy_dataset
-def test_compression_level(use_legacy_dataset):
+def test_compression_level():
     arr = pa.array(list(map(int, range(1000))))
     data = [arr, arr]
     table = pa.Table.from_arrays(data, names=['a', 'b'])
 
     # Check one compression level.
     _check_roundtrip(table, expected=table, compression="gzip",
-                     compression_level=1,
-                     use_legacy_dataset=use_legacy_dataset)
+                     compression_level=1)
 
     # Check another one to make sure that compression_level=1 does not
     # coincide with the default one in Arrow.
     _check_roundtrip(table, expected=table, compression="gzip",
-                     compression_level=5,
-                     use_legacy_dataset=use_legacy_dataset)
+                     compression_level=5)
 
     # Check that the user can provide a compression per column
     _check_roundtrip(table, expected=table,
-                     compression={'a': "gzip", 'b': "snappy"},
-                     use_legacy_dataset=use_legacy_dataset)
+                     compression={'a': "gzip", 'b': "snappy"})
 
     # Check that the user can provide a compression level per column
     _check_roundtrip(table, expected=table, compression="gzip",
-                     compression_level={'a': 2, 'b': 3},
-                     use_legacy_dataset=use_legacy_dataset)
+                     compression_level={'a': 2, 'b': 3})
 
     # Check if both LZ4 compressors are working
     # (level < 3 -> fast, level >= 3 -> HC)
     _check_roundtrip(table, expected=table, compression="lz4",
-                     compression_level=1,
-                     use_legacy_dataset=use_legacy_dataset)
+                     compression_level=1)
 
     _check_roundtrip(table, expected=table, compression="lz4",
-                     compression_level=9,
-                     use_legacy_dataset=use_legacy_dataset)
+                     compression_level=9)
 
     # Check that specifying a compression level for a codec which does allow
     # specifying one, results into an error.
@@ -594,8 +539,7 @@ def test_sanitized_spark_field_names():
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_multithreaded_read(use_legacy_dataset):
+def test_multithreaded_read():
     df = alltypes_sample(size=10000)
 
     table = pa.Table.from_pandas(df)
@@ -604,19 +548,16 @@ def test_multithreaded_read(use_legacy_dataset):
     _write_table(table, buf, compression='SNAPPY', version='2.6')
 
     buf.seek(0)
-    table1 = _read_table(
-        buf, use_threads=True, use_legacy_dataset=use_legacy_dataset)
+    table1 = _read_table(buf, use_threads=True)
 
     buf.seek(0)
-    table2 = _read_table(
-        buf, use_threads=False, use_legacy_dataset=use_legacy_dataset)
+    table2 = _read_table(buf, use_threads=False)
 
     assert table1.equals(table2)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_min_chunksize(use_legacy_dataset):
+def test_min_chunksize():
     data = pd.DataFrame([np.arange(4)], columns=['A', 'B', 'C', 'D'])
     table = pa.Table.from_pandas(data.reset_index())
 
@@ -624,7 +565,7 @@ def test_min_chunksize(use_legacy_dataset):
     _write_table(table, buf, chunk_size=-1)
 
     buf.seek(0)
-    result = _read_table(buf, use_legacy_dataset=use_legacy_dataset)
+    result = _read_table(buf)
 
     assert result.equals(table)
 
@@ -659,57 +600,46 @@ def test_write_error_deletes_incomplete_file(tempdir):
     assert not filename.exists()
 
 
-@parametrize_legacy_dataset
-def test_read_non_existent_file(tempdir, use_legacy_dataset):
+def test_read_non_existent_file(tempdir):
     path = 'nonexistent-file.parquet'
     try:
-        pq.read_table(path, use_legacy_dataset=use_legacy_dataset)
+        pq.read_table(path)
     except Exception as e:
         assert path in e.args[0]
 
 
-@parametrize_legacy_dataset
-def test_read_table_doesnt_warn(datadir, use_legacy_dataset):
-    if use_legacy_dataset:
-        msg = "Passing 'use_legacy_dataset=True'"
-        with pytest.warns(FutureWarning, match=msg):
-            pq.read_table(datadir / 'v0.7.1.parquet',
-                          use_legacy_dataset=use_legacy_dataset)
-    else:
-        with warnings.catch_warnings():
-            warnings.simplefilter(action="error")
-            pq.read_table(datadir / 'v0.7.1.parquet',
-                          use_legacy_dataset=use_legacy_dataset)
+def test_read_table_doesnt_warn(datadir):
+    with warnings.catch_warnings():
+        warnings.simplefilter(action="error")
+        pq.read_table(datadir / 'v0.7.1.parquet')
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_zlib_compression_bug(use_legacy_dataset):
+def test_zlib_compression_bug():
     # ARROW-3514: "zlib deflate failed, output buffer too small"
     table = pa.Table.from_arrays([pa.array(['abc', 'def'])], ['some_col'])
     f = io.BytesIO()
     pq.write_table(table, f, compression='gzip')
 
     f.seek(0)
-    roundtrip = pq.read_table(f, use_legacy_dataset=use_legacy_dataset)
+    roundtrip = pq.read_table(f)
     tm.assert_frame_equal(roundtrip.to_pandas(), table.to_pandas())
 
 
-@parametrize_legacy_dataset
-def test_parquet_file_too_small(tempdir, use_legacy_dataset):
+def test_parquet_file_too_small(tempdir):
     path = str(tempdir / "test.parquet")
     # TODO(dataset) with datasets API it raises OSError instead
     with pytest.raises((pa.ArrowInvalid, OSError),
                        match='size is 0 bytes'):
         with open(path, 'wb') as f:
             pass
-        pq.read_table(path, use_legacy_dataset=use_legacy_dataset)
+        pq.read_table(path)
 
     with pytest.raises((pa.ArrowInvalid, OSError),
                        match='size is 4 bytes'):
         with open(path, 'wb') as f:
             f.write(b'ffff')
-        pq.read_table(path, use_legacy_dataset=use_legacy_dataset)
+        pq.read_table(path)
 
 
 @pytest.mark.pandas
@@ -752,17 +682,15 @@ def test_fastparquet_cross_compatibility(tempdir):
     tm.assert_frame_equal(table_fp.to_pandas(), df)
 
 
-@parametrize_legacy_dataset
 @pytest.mark.parametrize('array_factory', [
     lambda: pa.array([0, None] * 10),
     lambda: pa.array([0, None] * 10).dictionary_encode(),
     lambda: pa.array(["", None] * 10),
     lambda: pa.array(["", None] * 10).dictionary_encode(),
 ])
-@pytest.mark.parametrize('use_dictionary', [False, True])
 @pytest.mark.parametrize('read_dictionary', [False, True])
 def test_buffer_contents(
-        array_factory, use_dictionary, read_dictionary, use_legacy_dataset
+        array_factory, read_dictionary
 ):
     # Test that null values are deterministically initialized to zero
     # after a roundtrip through Parquet.
@@ -773,8 +701,7 @@ def test_buffer_contents(
     bio.seek(0)
     read_dictionary = ['col'] if read_dictionary else None
     table = pq.read_table(bio, use_threads=False,
-                          read_dictionary=read_dictionary,
-                          use_legacy_dataset=use_legacy_dataset)
+                          read_dictionary=read_dictionary)
 
     for col in table.columns:
         [chunk] = col.chunks
@@ -826,7 +753,6 @@ def test_reads_over_batch(tempdir):
     assert table == table2
 
 
-@pytest.mark.dataset
 def test_permutation_of_column_order(tempdir):
     # ARROW-2366
     case = tempdir / "dataset_column_order_permutation"
@@ -846,18 +772,6 @@ def test_permutation_of_column_order(tempdir):
     assert table == table2
 
 
-def test_read_table_legacy_deprecated(tempdir):
-    # ARROW-15870
-    table = pa.table({'a': [1, 2, 3]})
-    path = tempdir / 'data.parquet'
-    pq.write_table(table, path)
-
-    with pytest.warns(
-        FutureWarning, match="Passing 'use_legacy_dataset=True'"
-    ):
-        pq.read_table(path, use_legacy_dataset=True)
-
-
 def test_thrift_size_limits(tempdir):
     path = tempdir / 'largethrift.parquet'
 
@@ -942,28 +856,9 @@ def test_page_checksum_verification_write_table(tempdir):
     with pytest.raises(OSError, match="CRC checksum verification"):
         _ = corrupted_pq_file.read()
 
-    # Case 5: Check that enabling page checksum verification in combination
-    # with legacy dataset raises an exception
-    with pytest.raises(ValueError, match="page_checksum_verification"):
-        _ = pq.read_table(corrupted_path,
-                          page_checksum_verification=True,
-                          use_legacy_dataset=True)
-
 
 @pytest.mark.dataset
-@pytest.mark.parametrize(
-    "use_legacy_dataset",
-    [
-        False,
-        pytest.param(
-            True,
-            marks=pytest.mark.filterwarnings(
-                "ignore:Passing 'use_legacy_dataset=True':FutureWarning"
-            ),
-        ),
-    ],
-)
-def test_checksum_write_to_dataset(tempdir, use_legacy_dataset):
+def test_checksum_write_to_dataset(tempdir):
     """Check that checksum verification works for datasets created with
     pq.write_to_dataset"""
 
@@ -973,8 +868,7 @@ def test_checksum_write_to_dataset(tempdir, use_legacy_dataset):
     original_dir_path = tempdir / 'correct_dir'
     pq.write_to_dataset(table_orig,
                         original_dir_path,
-                        write_page_checksum=True,
-                        use_legacy_dataset=use_legacy_dataset)
+                        write_page_checksum=True)
 
     # Read file and verify that the data is correct
     original_file_path_list = list(original_dir_path.iterdir())
@@ -1014,3 +908,23 @@ def test_checksum_write_to_dataset(tempdir, use_legacy_dataset):
     # checksum verification enabled raises an exception
     with pytest.raises(OSError, match="CRC checksum verification"):
         _ = pq.read_table(corrupted_file_path, page_checksum_verification=True)
+
+
+@pytest.mark.dataset
+def test_deprecated_use_legacy_dataset(tempdir):
+    # Test that specifying use_legacy_dataset in ParquetDataset, write_to_dataset
+    # and read_table doesn't raise an error but gives a warning.
+    table = pa.table({"a": [1, 2, 3]})
+    path = tempdir / "deprecate_legacy"
+
+    msg = "Passing 'use_legacy_dataset'"
+    with pytest.warns(FutureWarning, match=msg):
+        pq.write_to_dataset(table, path, use_legacy_dataset=False)
+
+    pq.write_to_dataset(table, path)
+
+    with pytest.warns(FutureWarning, match=msg):
+        pq.read_table(path, use_legacy_dataset=False)
+
+    with pytest.warns(FutureWarning, match=msg):
+        pq.ParquetDataset(path, use_legacy_dataset=False)
diff --git a/python/pyarrow/tests/parquet/test_compliant_nested_type.py b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
index ca1ad7ee32255..2345855a3321b 100644
--- a/python/pyarrow/tests/parquet/test_compliant_nested_type.py
+++ b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
@@ -18,7 +18,6 @@
 import pytest
 
 import pyarrow as pa
-from pyarrow.tests.parquet.common import parametrize_legacy_dataset
 
 try:
     import pyarrow.parquet as pq
@@ -58,16 +57,13 @@
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @parametrize_test_data
-def test_write_compliant_nested_type_enable(tempdir,
-                                            use_legacy_dataset, test_data):
+def test_write_compliant_nested_type_enable(tempdir, test_data):
     # prepare dataframe for testing
     df = pd.DataFrame(data=test_data)
     # verify that we can read/write pandas df with new flag (default behaviour)
     _roundtrip_pandas_dataframe(df,
-                                write_kwargs={},
-                                use_legacy_dataset=use_legacy_dataset)
+                                write_kwargs={})
 
     # Write to a parquet file with compliant nested type
     table = pa.Table.from_pandas(df, preserve_index=False)
@@ -83,21 +79,17 @@ def test_write_compliant_nested_type_enable(tempdir,
     assert new_table.schema.types[0].value_field.name == 'element'
 
     # Verify that the new table can be read/written correctly
-    _check_roundtrip(new_table,
-                     use_legacy_dataset=use_legacy_dataset)
+    _check_roundtrip(new_table)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @parametrize_test_data
-def test_write_compliant_nested_type_disable(tempdir,
-                                             use_legacy_dataset, test_data):
+def test_write_compliant_nested_type_disable(tempdir, test_data):
     # prepare dataframe for testing
     df = pd.DataFrame(data=test_data)
     # verify that we can read/write with new flag disabled
     _roundtrip_pandas_dataframe(df, write_kwargs={
-        'use_compliant_nested_type': False},
-        use_legacy_dataset=use_legacy_dataset)
+        'use_compliant_nested_type': False})
 
     # Write to a parquet file while disabling compliant nested type
     table = pa.Table.from_pandas(df, preserve_index=False)
@@ -114,5 +106,4 @@ def test_write_compliant_nested_type_disable(tempdir,
 
     # Verify that the new table can be read/written correctly
     _check_roundtrip(new_table,
-                     use_legacy_dataset=use_legacy_dataset,
                      use_compliant_nested_type=False)
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index 32fe128bbae9b..e6b66b00428fb 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -23,8 +23,7 @@
 
 import pyarrow as pa
 from pyarrow.tests import util
-from pyarrow.tests.parquet.common import (_check_roundtrip,
-                                          parametrize_legacy_dataset)
+from pyarrow.tests.parquet.common import _check_roundtrip
 
 try:
     import pyarrow.parquet as pq
@@ -54,9 +53,8 @@
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @pytest.mark.parametrize('chunk_size', [None, 1000])
-def test_parquet_2_0_roundtrip(tempdir, chunk_size, use_legacy_dataset):
+def test_parquet_2_0_roundtrip(tempdir, chunk_size):
     df = alltypes_sample(size=10000, categorical=True)
 
     filename = tempdir / 'pandas_roundtrip.parquet'
@@ -65,8 +63,7 @@ def test_parquet_2_0_roundtrip(tempdir, chunk_size, use_legacy_dataset):
 
     _write_table(arrow_table, filename, version='2.6',
                  chunk_size=chunk_size)
-    table_read = pq.read_pandas(
-        filename, use_legacy_dataset=use_legacy_dataset)
+    table_read = pq.read_pandas(filename)
     assert table_read.schema.pandas_metadata is not None
 
     read_metadata = table_read.schema.metadata
@@ -77,8 +74,7 @@ def test_parquet_2_0_roundtrip(tempdir, chunk_size, use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_parquet_1_0_roundtrip(tempdir, use_legacy_dataset):
+def test_parquet_1_0_roundtrip(tempdir):
     size = 10000
     np.random.seed(0)
     df = pd.DataFrame({
@@ -100,7 +96,7 @@ def test_parquet_1_0_roundtrip(tempdir, use_legacy_dataset):
     filename = tempdir / 'pandas_roundtrip.parquet'
     arrow_table = pa.Table.from_pandas(df)
     _write_table(arrow_table, filename, version='1.0')
-    table_read = _read_table(filename, use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(filename)
     df_read = table_read.to_pandas()
 
     # We pass uint32_t as int64_t if we write Parquet version 1.0
@@ -113,18 +109,17 @@ def test_parquet_1_0_roundtrip(tempdir, use_legacy_dataset):
 # -----------------------------------------------------------------------------
 
 
-def _simple_table_write_read(table, use_legacy_dataset):
+def _simple_table_write_read(table):
     bio = pa.BufferOutputStream()
     pq.write_table(table, bio)
     contents = bio.getvalue()
     return pq.read_table(
-        pa.BufferReader(contents), use_legacy_dataset=use_legacy_dataset
+        pa.BufferReader(contents)
     )
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_direct_read_dictionary(use_legacy_dataset):
+def test_direct_read_dictionary():
     # ARROW-3325
     repeats = 10
     nunique = 5
@@ -140,8 +135,7 @@ def test_direct_read_dictionary(use_legacy_dataset):
     contents = bio.getvalue()
 
     result = pq.read_table(pa.BufferReader(contents),
-                           read_dictionary=['f0'],
-                           use_legacy_dataset=use_legacy_dataset)
+                           read_dictionary=['f0'])
 
     # Compute dictionary-encoded subfield
     expected = pa.table([table[0].dictionary_encode()], names=['f0'])
@@ -149,8 +143,7 @@ def test_direct_read_dictionary(use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_direct_read_dictionary_subfield(use_legacy_dataset):
+def test_direct_read_dictionary_subfield():
     repeats = 10
     nunique = 5
 
@@ -163,8 +156,7 @@ def test_direct_read_dictionary_subfield(use_legacy_dataset):
     pq.write_table(table, bio)
     contents = bio.getvalue()
     result = pq.read_table(pa.BufferReader(contents),
-                           read_dictionary=['f0.list.element'],
-                           use_legacy_dataset=use_legacy_dataset)
+                           read_dictionary=['f0.list.element'])
 
     arr = pa.array(data[0])
     values_as_dict = arr.values.dictionary_encode()
@@ -181,8 +173,7 @@ def test_direct_read_dictionary_subfield(use_legacy_dataset):
     assert result[0].num_chunks == 1
 
 
-@parametrize_legacy_dataset
-def test_dictionary_array_automatically_read(use_legacy_dataset):
+def test_dictionary_array_automatically_read():
     # ARROW-3246
 
     # Make a large dictionary, a little over 4MB of data
@@ -200,7 +191,7 @@ def test_dictionary_array_automatically_read(use_legacy_dataset):
                                                      dict_values))
 
     table = pa.table([pa.chunked_array(chunks)], names=['f0'])
-    result = _simple_table_write_read(table, use_legacy_dataset)
+    result = _simple_table_write_read(table)
 
     assert result.equals(table)
 
@@ -213,8 +204,7 @@ def test_dictionary_array_automatically_read(use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_decimal_roundtrip(tempdir, use_legacy_dataset):
+def test_decimal_roundtrip(tempdir):
     num_values = 10
 
     columns = {}
@@ -234,8 +224,7 @@ def test_decimal_roundtrip(tempdir, use_legacy_dataset):
     string_filename = str(filename)
     table = pa.Table.from_pandas(expected)
     _write_table(table, string_filename)
-    result_table = _read_table(
-        string_filename, use_legacy_dataset=use_legacy_dataset)
+    result_table = _read_table(string_filename)
     result = result_table.to_pandas()
     tm.assert_frame_equal(result, expected)
 
@@ -259,14 +248,13 @@ def test_decimal_roundtrip_negative_scale(tempdir):
 # -----------------------------------------------------------------------------
 
 
-@parametrize_legacy_dataset
 @pytest.mark.parametrize('dtype', [int, float])
-def test_single_pylist_column_roundtrip(tempdir, dtype, use_legacy_dataset):
+def test_single_pylist_column_roundtrip(tempdir, dtype,):
     filename = tempdir / 'single_{}_column.parquet'.format(dtype.__name__)
     data = [pa.array(list(map(dtype, range(5))))]
     table = pa.Table.from_arrays(data, names=['a'])
     _write_table(table, filename)
-    table_read = _read_table(filename, use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(filename)
     for i in range(table.num_columns):
         col_written = table[i]
         col_read = table_read[i]
@@ -277,16 +265,14 @@ def test_single_pylist_column_roundtrip(tempdir, dtype, use_legacy_dataset):
         assert data_written.equals(data_read)
 
 
-@parametrize_legacy_dataset
-def test_empty_lists_table_roundtrip(use_legacy_dataset):
+def test_empty_lists_table_roundtrip():
     # ARROW-2744: Shouldn't crash when writing an array of empty lists
     arr = pa.array([[], []], type=pa.list_(pa.int32()))
     table = pa.Table.from_arrays([arr], ["A"])
-    _check_roundtrip(table, use_legacy_dataset=use_legacy_dataset)
+    _check_roundtrip(table)
 
 
-@parametrize_legacy_dataset
-def test_nested_list_nonnullable_roundtrip_bug(use_legacy_dataset):
+def test_nested_list_nonnullable_roundtrip_bug():
     # Reproduce failure in ARROW-5630
     typ = pa.list_(pa.field("item", pa.float32(), False))
     num_rows = 10000
@@ -295,26 +281,22 @@ def test_nested_list_nonnullable_roundtrip_bug(use_legacy_dataset):
                   (num_rows // 10)), type=typ)
     ], ['a'])
     _check_roundtrip(
-        t, data_page_size=4096, use_legacy_dataset=use_legacy_dataset)
+        t, data_page_size=4096)
 
 
-@parametrize_legacy_dataset
-def test_nested_list_struct_multiple_batches_roundtrip(
-    tempdir, use_legacy_dataset
-):
+def test_nested_list_struct_multiple_batches_roundtrip(tempdir):
     # Reproduce failure in ARROW-11024
     data = [[{'x': 'abc', 'y': 'abc'}]]*100 + [[{'x': 'abc', 'y': 'gcb'}]]*100
     table = pa.table([pa.array(data)], names=['column'])
     _check_roundtrip(
-        table, row_group_size=20, use_legacy_dataset=use_legacy_dataset)
+        table, row_group_size=20)
 
     # Reproduce failure in ARROW-11069 (plain non-nested structs with strings)
     data = pa.array(
         [{'a': '1', 'b': '2'}, {'a': '3', 'b': '4'}, {'a': '5', 'b': '6'}]*10
     )
     table = pa.table({'column': data})
-    _check_roundtrip(
-        table, row_group_size=10, use_legacy_dataset=use_legacy_dataset)
+    _check_roundtrip(table, row_group_size=10)
 
 
 def test_writing_empty_lists():
@@ -366,8 +348,7 @@ def test_large_list_records():
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_parquet_nested_convenience(tempdir, use_legacy_dataset):
+def test_parquet_nested_convenience(tempdir):
     # ARROW-1684
     df = pd.DataFrame({
         'a': [[1, 2, 3], None, [4, 5], []],
@@ -380,11 +361,11 @@ def test_parquet_nested_convenience(tempdir, use_legacy_dataset):
     _write_table(table, path)
 
     read = pq.read_table(
-        path, columns=['a'], use_legacy_dataset=use_legacy_dataset)
+        path, columns=['a'])
     tm.assert_frame_equal(read.to_pandas(), df[['a']])
 
     read = pq.read_table(
-        path, columns=['a', 'b'], use_legacy_dataset=use_legacy_dataset)
+        path, columns=['a', 'b'])
     tm.assert_frame_equal(read.to_pandas(), df)
 
 
@@ -420,17 +401,16 @@ def test_large_table_int32_overflow():
     _write_table(table, f)
 
 
-def _simple_table_roundtrip(table, use_legacy_dataset=False, **write_kwargs):
+def _simple_table_roundtrip(table, **write_kwargs):
     stream = pa.BufferOutputStream()
     _write_table(table, stream, **write_kwargs)
     buf = stream.getvalue()
-    return _read_table(buf, use_legacy_dataset=use_legacy_dataset)
+    return _read_table(buf)
 
 
 @pytest.mark.slow
 @pytest.mark.large_memory
-@parametrize_legacy_dataset
-def test_byte_array_exactly_2gb(use_legacy_dataset):
+def test_byte_array_exactly_2gb():
     # Test edge case reported in ARROW-3762
     val = b'x' * (1 << 10)
 
@@ -444,15 +424,14 @@ def test_byte_array_exactly_2gb(use_legacy_dataset):
         values = pa.chunked_array([base, pa.array(case)])
         t = pa.table([values], names=['f0'])
         result = _simple_table_roundtrip(
-            t, use_legacy_dataset=use_legacy_dataset, use_dictionary=False)
+            t, use_dictionary=False)
         assert t.equals(result)
 
 
 @pytest.mark.slow
 @pytest.mark.pandas
 @pytest.mark.large_memory
-@parametrize_legacy_dataset
-def test_binary_array_overflow_to_chunked(use_legacy_dataset):
+def test_binary_array_overflow_to_chunked():
     # ARROW-3762
 
     # 2^31 + 1 bytes
@@ -462,8 +441,7 @@ def test_binary_array_overflow_to_chunked(use_legacy_dataset):
     df = pd.DataFrame({'byte_col': values})
 
     tbl = pa.Table.from_pandas(df, preserve_index=False)
-    read_tbl = _simple_table_roundtrip(
-        tbl, use_legacy_dataset=use_legacy_dataset)
+    read_tbl = _simple_table_roundtrip(tbl)
 
     col0_data = read_tbl[0]
     assert isinstance(col0_data, pa.ChunkedArray)
@@ -477,8 +455,7 @@ def test_binary_array_overflow_to_chunked(use_legacy_dataset):
 @pytest.mark.slow
 @pytest.mark.pandas
 @pytest.mark.large_memory
-@parametrize_legacy_dataset
-def test_list_of_binary_large_cell(use_legacy_dataset):
+def test_list_of_binary_large_cell():
     # ARROW-4688
     data = []
 
@@ -491,8 +468,7 @@ def test_list_of_binary_large_cell(use_legacy_dataset):
 
     arr = pa.array(data)
     table = pa.Table.from_arrays([arr], ['chunky_cells'])
-    read_table = _simple_table_roundtrip(
-        table, use_legacy_dataset=use_legacy_dataset)
+    read_table = _simple_table_roundtrip(table)
     assert table.equals(read_table)
 
 
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index a9e99d5d65cf9..b6e351bdef9a7 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -29,9 +29,6 @@
 from pyarrow import fs
 from pyarrow.filesystem import LocalFileSystem
 from pyarrow.tests import util
-from pyarrow.tests.parquet.common import (
-    parametrize_legacy_dataset, parametrize_legacy_dataset_fixed,
-    parametrize_legacy_dataset_not_supported)
 from pyarrow.util import guid
 from pyarrow.vendored.version import Version
 
@@ -53,76 +50,10 @@
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet'
-pytestmark = pytest.mark.parquet
+pytestmark = [pytest.mark.parquet, pytest.mark.dataset]
 
 
-@pytest.mark.pandas
-def test_parquet_piece_read(tempdir):
-    df = _test_dataframe(1000)
-    table = pa.Table.from_pandas(df)
-
-    path = tempdir / 'parquet_piece_read.parquet'
-    _write_table(table, path, version='2.6')
-
-    with pytest.warns(FutureWarning):
-        piece1 = pq.ParquetDatasetPiece(path)
-
-    result = piece1.read()
-    assert result.equals(table)
-
-
-@pytest.mark.pandas
-def test_parquet_piece_open_and_get_metadata(tempdir):
-    df = _test_dataframe(100)
-    table = pa.Table.from_pandas(df)
-
-    path = tempdir / 'parquet_piece_read.parquet'
-    _write_table(table, path, version='2.6')
-
-    with pytest.warns(FutureWarning):
-        piece = pq.ParquetDatasetPiece(path)
-
-    table1 = piece.read()
-    assert isinstance(table1, pa.Table)
-    meta1 = piece.get_metadata()
-    assert isinstance(meta1, pq.FileMetaData)
-
-    assert table.equals(table1)
-
-
-@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:FutureWarning")
-def test_parquet_piece_basics():
-    path = '/baz.parq'
-
-    piece1 = pq.ParquetDatasetPiece(path)
-    piece2 = pq.ParquetDatasetPiece(path, row_group=1)
-    piece3 = pq.ParquetDatasetPiece(
-        path, row_group=1, partition_keys=[('foo', 0), ('bar', 1)])
-
-    assert str(piece1) == path
-    assert str(piece2) == '/baz.parq | row_group=1'
-    assert str(piece3) == 'partition[foo=0, bar=1] /baz.parq | row_group=1'
-
-    assert piece1 == piece1
-    assert piece2 == piece2
-    assert piece3 == piece3
-    assert piece1 != piece3
-
-
-def test_partition_set_dictionary_type():
-    set1 = pq.PartitionSet('key1', ['foo', 'bar', 'baz'])
-    set2 = pq.PartitionSet('key2', [2007, 2008, 2009])
-
-    assert isinstance(set1.dictionary, pa.StringArray)
-    assert isinstance(set2.dictionary, pa.IntegerArray)
-
-    set3 = pq.PartitionSet('key2', [datetime.datetime(2007, 1, 1)])
-    with pytest.raises(TypeError):
-        set3.dictionary
-
-
-@parametrize_legacy_dataset_fixed
-def test_filesystem_uri(tempdir, use_legacy_dataset):
+def test_filesystem_uri(tempdir):
     table = pa.table({"a": [1, 2, 3]})
 
     directory = tempdir / "data_dir"
@@ -132,72 +63,36 @@ def test_filesystem_uri(tempdir, use_legacy_dataset):
 
     # filesystem object
     result = pq.read_table(
-        path, filesystem=fs.LocalFileSystem(),
-        use_legacy_dataset=use_legacy_dataset)
+        path, filesystem=fs.LocalFileSystem())
     assert result.equals(table)
 
     # filesystem URI
     result = pq.read_table(
-        "data_dir/data.parquet", filesystem=util._filesystem_uri(tempdir),
-        use_legacy_dataset=use_legacy_dataset)
+        "data_dir/data.parquet", filesystem=util._filesystem_uri(tempdir))
     assert result.equals(table)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_read_partitioned_directory(tempdir, use_legacy_dataset):
+def test_read_partitioned_directory(tempdir):
     fs = LocalFileSystem._get_instance()
-    _partition_test_for_filesystem(fs, tempdir, use_legacy_dataset)
+    _partition_test_for_filesystem(fs, tempdir)
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
 @pytest.mark.pandas
-def test_create_parquet_dataset_multi_threaded(tempdir):
-    fs = LocalFileSystem._get_instance()
-    base_path = tempdir
-
-    _partition_test_for_filesystem(fs, base_path)
-
-    manifest = pq.ParquetManifest(base_path, filesystem=fs,
-                                  metadata_nthreads=1)
-    with pytest.warns(
-        FutureWarning, match="Specifying the 'metadata_nthreads'"
-    ):
-        dataset = pq.ParquetDataset(
-            base_path, filesystem=fs, metadata_nthreads=16,
-            use_legacy_dataset=True
-        )
-    assert len(dataset.pieces) > 0
-    partitions = dataset.partitions
-    assert len(partitions.partition_names) > 0
-    assert partitions.partition_names == manifest.partitions.partition_names
-    assert len(partitions.levels) == len(manifest.partitions.levels)
-
-
-@pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_read_partitioned_columns_selection(tempdir, use_legacy_dataset):
+def test_read_partitioned_columns_selection(tempdir):
     # ARROW-3861 - do not include partition columns in resulting table when
     # `columns` keyword was passed without those columns
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
     _partition_test_for_filesystem(fs, base_path)
 
-    dataset = pq.ParquetDataset(
-        base_path, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(base_path)
     result = dataset.read(columns=["values"])
-    if use_legacy_dataset:
-        # ParquetDataset implementation always includes the partition columns
-        # automatically, and we can't easily "fix" this since dask relies on
-        # this behaviour (ARROW-8644)
-        assert result.column_names == ["values", "foo", "bar"]
-    else:
-        assert result.column_names == ["values"]
+    assert result.column_names == ["values"]
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_filters_equivalency(tempdir, use_legacy_dataset):
+def test_filters_equivalency(tempdir):
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -225,7 +120,6 @@ def test_filters_equivalency(tempdir, use_legacy_dataset):
         base_path, filesystem=fs,
         filters=[('integer', '=', 1), ('string', '!=', 'b'),
                  ('boolean', '==', 'True')],
-        use_legacy_dataset=use_legacy_dataset,
     )
     table = dataset.read()
     result_df = (table.to_pandas().reset_index(drop=True))
@@ -247,8 +141,7 @@ def test_filters_equivalency(tempdir, use_legacy_dataset):
         [('integer', '=', 0), ('boolean', '==', 'False')]
     ]
     dataset = pq.ParquetDataset(
-        base_path, filesystem=fs, filters=filters,
-        use_legacy_dataset=use_legacy_dataset)
+        base_path, filesystem=fs, filters=filters)
     table = dataset.read()
     result_df = table.to_pandas().reset_index(drop=True)
 
@@ -262,30 +155,15 @@ def test_filters_equivalency(tempdir, use_legacy_dataset):
     assert df_filter_2.sum() > 0
     assert result_df.shape[0] == (df_filter_1.sum() + df_filter_2.sum())
 
-    if use_legacy_dataset:
-        # Check for \0 in predicate values. Until they are correctly
-        # implemented in ARROW-3391, they would otherwise lead to weird
-        # results with the current code.
-        with pytest.raises(NotImplementedError):
-            filters = [[('string', '==', b'1\0a')]]
-            pq.ParquetDataset(base_path, filesystem=fs, filters=filters,
-                              use_legacy_dataset=True)
-        with pytest.raises(NotImplementedError):
-            filters = [[('string', '==', '1\0a')]]
-            pq.ParquetDataset(base_path, filesystem=fs, filters=filters,
-                              use_legacy_dataset=True)
-    else:
-        for filters in [[[('string', '==', b'1\0a')]],
-                        [[('string', '==', '1\0a')]]]:
-            dataset = pq.ParquetDataset(
-                base_path, filesystem=fs, filters=filters,
-                use_legacy_dataset=False)
-            assert dataset.read().num_rows == 0
+    for filters in [[[('string', '==', b'1\0a')]],
+                    [[('string', '==', '1\0a')]]]:
+        dataset = pq.ParquetDataset(
+            base_path, filesystem=fs, filters=filters)
+        assert dataset.read().num_rows == 0
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_filters_cutoff_exclusive_integer(tempdir, use_legacy_dataset):
+def test_filters_cutoff_exclusive_integer(tempdir):
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -308,7 +186,6 @@ def test_filters_cutoff_exclusive_integer(tempdir, use_legacy_dataset):
             ('integers', '<', 4),
             ('integers', '>', 1),
         ],
-        use_legacy_dataset=use_legacy_dataset
     )
     table = dataset.read()
     result_df = (table.to_pandas()
@@ -319,15 +196,14 @@ def test_filters_cutoff_exclusive_integer(tempdir, use_legacy_dataset):
     assert result_list == [2, 3]
 
 
-@pytest.mark.pandas
-@parametrize_legacy_dataset
 @pytest.mark.xfail(
     # different error with use_legacy_datasets because result_df is no longer
     # categorical
     raises=(TypeError, AssertionError),
     reason='Loss of type information in creation of categoricals.'
 )
-def test_filters_cutoff_exclusive_datetime(tempdir, use_legacy_dataset):
+@pytest.mark.pandas
+def test_filters_cutoff_exclusive_datetime(tempdir):
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -356,7 +232,6 @@ def test_filters_cutoff_exclusive_datetime(tempdir, use_legacy_dataset):
             ('dates', '<', "2018-04-12"),
             ('dates', '>', "2018-04-10")
         ],
-        use_legacy_dataset=use_legacy_dataset
     )
     table = dataset.read()
     result_df = (table.to_pandas()
@@ -371,7 +246,6 @@ def test_filters_cutoff_exclusive_datetime(tempdir, use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@pytest.mark.dataset
 def test_filters_inclusive_datetime(tempdir):
     # ARROW-11480
     path = tempdir / 'timestamps.parquet'
@@ -389,8 +263,7 @@ def test_filters_inclusive_datetime(tempdir):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_filters_inclusive_integer(tempdir, use_legacy_dataset):
+def test_filters_inclusive_integer(tempdir):
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -413,7 +286,6 @@ def test_filters_inclusive_integer(tempdir, use_legacy_dataset):
             ('integers', '<=', 3),
             ('integers', '>=', 2),
         ],
-        use_legacy_dataset=use_legacy_dataset
     )
     table = dataset.read()
     result_df = (table.to_pandas()
@@ -425,8 +297,7 @@ def test_filters_inclusive_integer(tempdir, use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_filters_inclusive_set(tempdir, use_legacy_dataset):
+def test_filters_inclusive_set(tempdir):
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -451,7 +322,6 @@ def test_filters_inclusive_set(tempdir, use_legacy_dataset):
     dataset = pq.ParquetDataset(
         base_path, filesystem=fs,
         filters=[('string', 'in', 'ab')],
-        use_legacy_dataset=use_legacy_dataset
     )
     table = dataset.read()
     result_df = (table.to_pandas().reset_index(drop=True))
@@ -464,7 +334,6 @@ def test_filters_inclusive_set(tempdir, use_legacy_dataset):
         base_path, filesystem=fs,
         filters=[('integer', 'in', [1]), ('string', 'in', ('a', 'b')),
                  ('boolean', 'not in', {'False'})],
-        use_legacy_dataset=use_legacy_dataset
     )
     table = dataset.read()
     result_df = (table.to_pandas().reset_index(drop=True))
@@ -475,8 +344,7 @@ def test_filters_inclusive_set(tempdir, use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_filters_invalid_pred_op(tempdir, use_legacy_dataset):
+def test_filters_invalid_pred_op(tempdir):
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -496,49 +364,30 @@ def test_filters_invalid_pred_op(tempdir, use_legacy_dataset):
     with pytest.raises(TypeError):
         pq.ParquetDataset(base_path,
                           filesystem=fs,
-                          filters=[('integers', 'in', 3), ],
-                          use_legacy_dataset=use_legacy_dataset)
+                          filters=[('integers', 'in', 3), ])
 
     with pytest.raises(ValueError):
         pq.ParquetDataset(base_path,
                           filesystem=fs,
-                          filters=[('integers', '=<', 3), ],
-                          use_legacy_dataset=use_legacy_dataset)
-
-    if use_legacy_dataset:
-        with pytest.raises(ValueError):
-            pq.ParquetDataset(base_path,
-                              filesystem=fs,
-                              filters=[('integers', 'in', set()), ],
-                              use_legacy_dataset=use_legacy_dataset)
-    else:
-        # Dataset API returns empty table instead
-        dataset = pq.ParquetDataset(base_path,
-                                    filesystem=fs,
-                                    filters=[('integers', 'in', set()), ],
-                                    use_legacy_dataset=use_legacy_dataset)
-        assert dataset.read().num_rows == 0
+                          filters=[('integers', '=<', 3), ])
 
-    if use_legacy_dataset:
-        with pytest.raises(ValueError):
-            pq.ParquetDataset(base_path,
-                              filesystem=fs,
-                              filters=[('integers', '!=', {3})],
-                              use_legacy_dataset=use_legacy_dataset)
-    else:
-        dataset = pq.ParquetDataset(base_path,
-                                    filesystem=fs,
-                                    filters=[('integers', '!=', {3})],
-                                    use_legacy_dataset=use_legacy_dataset)
-        with pytest.raises(NotImplementedError):
-            assert dataset.read().num_rows == 0
+    # Dataset API returns empty table
+    dataset = pq.ParquetDataset(base_path,
+                                filesystem=fs,
+                                filters=[('integers', 'in', set()), ])
+    assert dataset.read().num_rows == 0
+
+    dataset = pq.ParquetDataset(base_path,
+                                filesystem=fs,
+                                filters=[('integers', '!=', {3})])
+    with pytest.raises(NotImplementedError):
+        assert dataset.read().num_rows == 0
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset_fixed
-def test_filters_invalid_column(tempdir, use_legacy_dataset):
+def test_filters_invalid_column(tempdir):
     # ARROW-5572 - raise error on invalid name in filter specification
-    # works with new dataset / xfail with legacy implementation
+    # works with new dataset
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -556,12 +405,10 @@ def test_filters_invalid_column(tempdir, use_legacy_dataset):
     msg = r"No match for FieldRef.Name\(non_existent_column\)"
     with pytest.raises(ValueError, match=msg):
         pq.ParquetDataset(base_path, filesystem=fs,
-                          filters=[('non_existent_column', '<', 3), ],
-                          use_legacy_dataset=use_legacy_dataset).read()
+                          filters=[('non_existent_column', '<', 3), ]).read()
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @pytest.mark.parametrize("filters",
                          ([('integers', '<', 3)],
                           [[('integers', '<', 3)]],
@@ -569,7 +416,7 @@ def test_filters_invalid_column(tempdir, use_legacy_dataset):
                           pc.field('nested', 'a') < 3,
                           pc.field('nested', 'b').cast(pa.int64()) < 3))
 @pytest.mark.parametrize("read_method", ("read_table", "read_pandas"))
-def test_filters_read_table(tempdir, use_legacy_dataset, filters, read_method):
+def test_filters_read_table(tempdir, filters, read_method):
     read = getattr(pq, read_method)
     # test that filters keyword is passed through in read_table
     fs = LocalFileSystem._get_instance()
@@ -589,24 +436,15 @@ def test_filters_read_table(tempdir, use_legacy_dataset, filters, read_method):
 
     _generate_partition_directories(fs, base_path, partition_spec, df)
 
-    kwargs = dict(filesystem=fs, filters=filters,
-                  use_legacy_dataset=use_legacy_dataset)
+    kwargs = dict(filesystem=fs, filters=filters)
 
-    # Using Expression in legacy dataset not supported
-    if use_legacy_dataset and isinstance(filters, pc.Expression):
-        msg = "Expressions as filter not supported for legacy dataset"
-        with pytest.raises(TypeError, match=msg):
-            read(base_path, **kwargs)
-    else:
-        table = read(base_path, **kwargs)
-        assert table.num_rows == 3
+    table = read(base_path, **kwargs)
+    assert table.num_rows == 3
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset_fixed
-def test_partition_keys_with_underscores(tempdir, use_legacy_dataset):
+def test_partition_keys_with_underscores(tempdir):
     # ARROW-5666 - partition field values with underscores preserve underscores
-    # xfail with legacy dataset -> they get interpreted as integers
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -623,60 +461,47 @@ def test_partition_keys_with_underscores(tempdir, use_legacy_dataset):
 
     _generate_partition_directories(fs, base_path, partition_spec, df)
 
-    dataset = pq.ParquetDataset(
-        base_path, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(base_path)
     result = dataset.read()
     assert result.column("year_week").to_pylist() == string_keys
 
 
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_read_s3fs(s3_example_s3fs, use_legacy_dataset):
+def test_read_s3fs(s3_example_s3fs, ):
     fs, path = s3_example_s3fs
     path = path + "/test.parquet"
     table = pa.table({"a": [1, 2, 3]})
     _write_table(table, path, filesystem=fs)
 
-    result = _read_table(
-        path, filesystem=fs, use_legacy_dataset=use_legacy_dataset
-    )
+    result = _read_table(path, filesystem=fs)
     assert result.equals(table)
 
 
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_read_directory_s3fs(s3_example_s3fs, use_legacy_dataset):
+def test_read_directory_s3fs(s3_example_s3fs):
     fs, directory = s3_example_s3fs
     path = directory + "/test.parquet"
     table = pa.table({"a": [1, 2, 3]})
     _write_table(table, path, filesystem=fs)
 
-    result = _read_table(
-        directory, filesystem=fs, use_legacy_dataset=use_legacy_dataset
-    )
+    result = _read_table(directory, filesystem=fs)
     assert result.equals(table)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_read_single_file_list(tempdir, use_legacy_dataset):
+def test_read_single_file_list(tempdir):
     data_path = str(tempdir / 'data.parquet')
 
     table = pa.table({"a": [1, 2, 3]})
     _write_table(table, data_path)
 
-    result = pq.ParquetDataset(
-        [data_path], use_legacy_dataset=use_legacy_dataset
-    ).read()
+    result = pq.ParquetDataset([data_path]).read()
     assert result.equals(table)
 
 
 @pytest.mark.pandas
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_read_partitioned_directory_s3fs_wrapper(
-    s3_example_s3fs, use_legacy_dataset
-):
+def test_read_partitioned_directory_s3fs_wrapper(s3_example_s3fs):
     import s3fs
 
     from pyarrow.filesystem import S3FSWrapper
@@ -690,23 +515,18 @@ def test_read_partitioned_directory_s3fs_wrapper(
     _partition_test_for_filesystem(wrapper, path)
 
     # Check that we can auto-wrap
-    dataset = pq.ParquetDataset(
-        path, filesystem=fs, use_legacy_dataset=use_legacy_dataset
-    )
+    dataset = pq.ParquetDataset(path, filesystem=fs)
     dataset.read()
 
 
 @pytest.mark.pandas
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_read_partitioned_directory_s3fs(s3_example_s3fs, use_legacy_dataset):
+def test_read_partitioned_directory_s3fs(s3_example_s3fs):
     fs, path = s3_example_s3fs
-    _partition_test_for_filesystem(
-        fs, path, use_legacy_dataset=use_legacy_dataset
-    )
+    _partition_test_for_filesystem(fs, path)
 
 
-def _partition_test_for_filesystem(fs, base_path, use_legacy_dataset=True):
+def _partition_test_for_filesystem(fs, base_path):
     foo_keys = [0, 1]
     bar_keys = ['a', 'b', 'c']
     partition_spec = [
@@ -724,8 +544,7 @@ def _partition_test_for_filesystem(fs, base_path, use_legacy_dataset=True):
 
     _generate_partition_directories(fs, base_path, partition_spec, df)
 
-    dataset = pq.ParquetDataset(
-        base_path, filesystem=fs, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(base_path, filesystem=fs)
     table = dataset.read()
     result_df = (table.to_pandas()
                  .sort_values(by='index')
@@ -735,15 +554,11 @@ def _partition_test_for_filesystem(fs, base_path, use_legacy_dataset=True):
                    .reset_index(drop=True)
                    .reindex(columns=result_df.columns))
 
-    if use_legacy_dataset or Version(pd.__version__) < Version("2.0.0"):
-        expected_df['foo'] = pd.Categorical(df['foo'], categories=foo_keys)
-        expected_df['bar'] = pd.Categorical(df['bar'], categories=bar_keys)
-    else:
-        # With pandas 2.0.0 Index can store all numeric dtypes (not just
-        # int64/uint64/float64). Using astype() to create a categorical
-        # column preserves original dtype (int32)
-        expected_df['foo'] = expected_df['foo'].astype("category")
-        expected_df['bar'] = expected_df['bar'].astype("category")
+    # With pandas 2.0.0 Index can store all numeric dtypes (not just
+    # int64/uint64/float64). Using astype() to create a categorical
+    # column preserves original dtype (int32)
+    expected_df['foo'] = expected_df['foo'].astype("category")
+    expected_df['bar'] = expected_df['bar'].astype("category")
 
     assert (result_df.columns == ['index', 'values', 'foo', 'bar']).all()
 
@@ -790,83 +605,6 @@ def _visit_level(base_dir, level, part_keys):
     _visit_level(base_dir, 0, [])
 
 
-def _test_read_common_metadata_files(fs, base_path):
-    import pandas as pd
-
-    import pyarrow.parquet as pq
-
-    N = 100
-    df = pd.DataFrame({
-        'index': np.arange(N),
-        'values': np.random.randn(N)
-    }, columns=['index', 'values'])
-
-    base_path = str(base_path)
-    data_path = os.path.join(base_path, 'data.parquet')
-
-    table = pa.Table.from_pandas(df)
-
-    with fs.open(data_path, 'wb') as f:
-        _write_table(table, f)
-
-    metadata_path = os.path.join(base_path, '_common_metadata')
-    with fs.open(metadata_path, 'wb') as f:
-        pq.write_metadata(table.schema, f)
-
-    dataset = pq.ParquetDataset(base_path, filesystem=fs,
-                                use_legacy_dataset=True)
-    with pytest.warns(FutureWarning):
-        assert dataset.common_metadata_path == str(metadata_path)
-
-    with fs.open(data_path) as f:
-        common_schema = pq.read_metadata(f).schema
-    assert dataset.schema.equals(common_schema)
-
-    # handle list of one directory
-    dataset2 = pq.ParquetDataset([base_path], filesystem=fs,
-                                 use_legacy_dataset=True)
-    assert dataset2.schema.equals(dataset.schema)
-
-
-@pytest.mark.pandas
-@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
-def test_read_common_metadata_files(tempdir):
-    fs = LocalFileSystem._get_instance()
-    _test_read_common_metadata_files(fs, tempdir)
-
-
-@pytest.mark.pandas
-@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
-def test_read_metadata_files(tempdir):
-    fs = LocalFileSystem._get_instance()
-
-    N = 100
-    df = pd.DataFrame({
-        'index': np.arange(N),
-        'values': np.random.randn(N)
-    }, columns=['index', 'values'])
-
-    data_path = tempdir / 'data.parquet'
-
-    table = pa.Table.from_pandas(df)
-
-    with fs.open(data_path, 'wb') as f:
-        _write_table(table, f)
-
-    metadata_path = tempdir / '_metadata'
-    with fs.open(metadata_path, 'wb') as f:
-        pq.write_metadata(table.schema, f)
-
-    dataset = pq.ParquetDataset(tempdir, filesystem=fs,
-                                use_legacy_dataset=True)
-    with pytest.warns(FutureWarning):
-        assert dataset.metadata_path == str(metadata_path)
-
-    with fs.open(data_path) as f:
-        metadata_schema = pq.read_metadata(f).schema
-    assert dataset.schema.equals(metadata_schema)
-
-
 def _filter_partition(df, part_keys):
     predicate = np.ones(len(df), dtype=bool)
 
@@ -883,9 +621,8 @@ def _filter_partition(df, part_keys):
     return df[predicate].drop(to_drop, axis=1)
 
 
-@parametrize_legacy_dataset
 @pytest.mark.pandas
-def test_filter_before_validate_schema(tempdir, use_legacy_dataset):
+def test_filter_before_validate_schema(tempdir):
     # ARROW-4076 apply filter before schema validation
     # to avoid checking unneeded schemas
 
@@ -902,16 +639,12 @@ def test_filter_before_validate_schema(tempdir, use_legacy_dataset):
     pq.write_table(table2, dir2 / 'data.parquet')
 
     # read single file using filter
-    table = pq.read_table(tempdir, filters=[[('A', '==', 0)]],
-                          use_legacy_dataset=use_legacy_dataset)
+    table = pq.read_table(tempdir, filters=[[('A', '==', 0)]])
     assert table.column('B').equals(pa.chunked_array([[1, 2, 3]]))
 
 
 @pytest.mark.pandas
-@pytest.mark.filterwarnings(
-    "ignore:Specifying the 'metadata':FutureWarning")
-@parametrize_legacy_dataset
-def test_read_multiple_files(tempdir, use_legacy_dataset):
+def test_read_multiple_files(tempdir):
     nfiles = 10
     size = 5
 
@@ -938,8 +671,7 @@ def test_read_multiple_files(tempdir, use_legacy_dataset):
     (dirpath / '_SUCCESS.crc').touch()
 
     def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
-        dataset = pq.ParquetDataset(
-            paths, use_legacy_dataset=use_legacy_dataset, **kwargs)
+        dataset = pq.ParquetDataset(paths, **kwargs)
         return dataset.read(columns=columns, use_threads=use_threads)
 
     result = read_multiple_files(paths)
@@ -947,37 +679,18 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
 
     assert result.equals(expected)
 
-    # Read with provided metadata
-    # TODO(dataset) specifying metadata not yet supported
-    metadata = pq.read_metadata(paths[0])
-    if use_legacy_dataset:
-        result2 = read_multiple_files(paths, metadata=metadata)
-        assert result2.equals(expected)
-
-        with pytest.warns(FutureWarning, match="Specifying the 'schema'"):
-            result3 = pq.ParquetDataset(dirpath, schema=metadata.schema,
-                                        use_legacy_dataset=True).read()
-        assert result3.equals(expected)
-    else:
-        with pytest.raises(ValueError, match="no longer supported"):
-            pq.read_table(paths, metadata=metadata, use_legacy_dataset=False)
-
     # Read column subset
     to_read = [0, 2, 6, result.num_columns - 1]
 
     col_names = [result.field(i).name for i in to_read]
-    out = pq.read_table(
-        dirpath, columns=col_names, use_legacy_dataset=use_legacy_dataset
-    )
+    out = pq.read_table(dirpath, columns=col_names)
     expected = pa.Table.from_arrays([result.column(i) for i in to_read],
                                     names=col_names,
                                     metadata=result.schema.metadata)
     assert out.equals(expected)
 
     # Read with multiple threads
-    pq.read_table(
-        dirpath, use_threads=True, use_legacy_dataset=use_legacy_dataset
-    )
+    pq.read_table(dirpath, use_threads=True)
 
     # Test failure modes with non-uniform metadata
     bad_apple = _test_dataframe(size, seed=i).iloc[:, :4]
@@ -986,31 +699,24 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
     t = pa.Table.from_pandas(bad_apple)
     _write_table(t, bad_apple_path)
 
-    if not use_legacy_dataset:
-        # TODO(dataset) Dataset API skips bad files
-        return
+    # TODO(dataset) Dataset API skips bad files
 
-    bad_meta = pq.read_metadata(bad_apple_path)
+    # bad_meta = pq.read_metadata(bad_apple_path)
 
-    with pytest.raises(ValueError):
-        read_multiple_files(paths + [bad_apple_path])
+    # with pytest.raises(ValueError):
+    #     read_multiple_files(paths + [bad_apple_path])
 
-    with pytest.raises(ValueError):
-        read_multiple_files(paths, metadata=bad_meta)
+    # with pytest.raises(ValueError):
+    #     read_multiple_files(paths, metadata=bad_meta)
 
-    mixed_paths = [bad_apple_path, paths[0]]
+    # mixed_paths = [bad_apple_path, paths[0]]
 
-    with pytest.raises(ValueError):
-        with pytest.warns(FutureWarning, match="Specifying the 'schema'"):
-            read_multiple_files(mixed_paths, schema=bad_meta.schema)
-
-    with pytest.raises(ValueError):
-        read_multiple_files(mixed_paths)
+    # with pytest.raises(ValueError):
+    #     read_multiple_files(mixed_paths)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_dataset_read_pandas(tempdir, use_legacy_dataset):
+def test_dataset_read_pandas(tempdir):
     nfiles = 5
     size = 5
 
@@ -1033,7 +739,7 @@ def test_dataset_read_pandas(tempdir, use_legacy_dataset):
         frames.append(df)
         paths.append(path)
 
-    dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(dirpath)
     columns = ['uint8', 'strings']
     result = dataset.read_pandas(columns=columns).to_pandas()
     expected = pd.concat([x[columns] for x in frames])
@@ -1047,10 +753,8 @@ def test_dataset_read_pandas(tempdir, use_legacy_dataset):
     tm.assert_frame_equal(result.reindex(columns=expected.columns), expected)
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_dataset_memory_map(tempdir, use_legacy_dataset):
+def test_dataset_memory_map(tempdir):
     # ARROW-2627: Check that we can use ParquetDataset with memory-mapping
     dirpath = tempdir / guid()
     dirpath.mkdir()
@@ -1061,15 +765,12 @@ def test_dataset_memory_map(tempdir, use_legacy_dataset):
     _write_table(table, path, version='2.6')
 
     dataset = pq.ParquetDataset(
-        dirpath, memory_map=True, use_legacy_dataset=use_legacy_dataset)
+        dirpath, memory_map=True)
     assert dataset.read().equals(table)
-    if use_legacy_dataset:
-        assert dataset.pieces[0].read().equals(table)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_dataset_enable_buffered_stream(tempdir, use_legacy_dataset):
+def test_dataset_enable_buffered_stream(tempdir):
     dirpath = tempdir / guid()
     dirpath.mkdir()
 
@@ -1080,19 +781,16 @@ def test_dataset_enable_buffered_stream(tempdir, use_legacy_dataset):
 
     with pytest.raises(ValueError):
         pq.ParquetDataset(
-            dirpath, buffer_size=-64,
-            use_legacy_dataset=use_legacy_dataset)
+            dirpath, buffer_size=-64)
 
     for buffer_size in [128, 1024]:
         dataset = pq.ParquetDataset(
-            dirpath, buffer_size=buffer_size,
-            use_legacy_dataset=use_legacy_dataset)
+            dirpath, buffer_size=buffer_size)
         assert dataset.read().equals(table)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_dataset_enable_pre_buffer(tempdir, use_legacy_dataset):
+def test_dataset_enable_pre_buffer(tempdir):
     dirpath = tempdir / guid()
     dirpath.mkdir()
 
@@ -1103,11 +801,9 @@ def test_dataset_enable_pre_buffer(tempdir, use_legacy_dataset):
 
     for pre_buffer in (True, False):
         dataset = pq.ParquetDataset(
-            dirpath, pre_buffer=pre_buffer,
-            use_legacy_dataset=use_legacy_dataset)
+            dirpath, pre_buffer=pre_buffer)
         assert dataset.read().equals(table)
-        actual = pq.read_table(dirpath, pre_buffer=pre_buffer,
-                               use_legacy_dataset=use_legacy_dataset)
+        actual = pq.read_table(dirpath, pre_buffer=pre_buffer)
         assert actual.equals(table)
 
 
@@ -1123,18 +819,14 @@ def _make_example_multifile_dataset(base_path, nfiles=10, file_nrows=5):
     return paths
 
 
-def _assert_dataset_paths(dataset, paths, use_legacy_dataset):
-    if use_legacy_dataset:
-        assert set(map(str, paths)) == {x.path for x in dataset._pieces}
-    else:
-        paths = [str(path.as_posix()) for path in paths]
-        assert set(paths) == set(dataset._dataset.files)
+def _assert_dataset_paths(dataset, paths):
+    paths = [str(path.as_posix()) for path in paths]
+    assert set(paths) == set(dataset.files)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @pytest.mark.parametrize('dir_prefix', ['_', '.'])
-def test_ignore_private_directories(tempdir, dir_prefix, use_legacy_dataset):
+def test_ignore_private_directories(tempdir, dir_prefix):
     dirpath = tempdir / guid()
     dirpath.mkdir()
 
@@ -1144,14 +836,13 @@ def test_ignore_private_directories(tempdir, dir_prefix, use_legacy_dataset):
     # private directory
     (dirpath / '{}staging'.format(dir_prefix)).mkdir()
 
-    dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(dirpath)
 
-    _assert_dataset_paths(dataset, paths, use_legacy_dataset)
+    _assert_dataset_paths(dataset, paths)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_ignore_hidden_files_dot(tempdir, use_legacy_dataset):
+def test_ignore_hidden_files_dot(tempdir):
     dirpath = tempdir / guid()
     dirpath.mkdir()
 
@@ -1164,14 +855,13 @@ def test_ignore_hidden_files_dot(tempdir, use_legacy_dataset):
     with (dirpath / '.private').open('wb') as f:
         f.write(b'gibberish')
 
-    dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(dirpath)
 
-    _assert_dataset_paths(dataset, paths, use_legacy_dataset)
+    _assert_dataset_paths(dataset, paths)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_ignore_hidden_files_underscore(tempdir, use_legacy_dataset):
+def test_ignore_hidden_files_underscore(tempdir):
     dirpath = tempdir / guid()
     dirpath.mkdir()
 
@@ -1184,17 +874,14 @@ def test_ignore_hidden_files_underscore(tempdir, use_legacy_dataset):
     with (dirpath / '_started_321').open('wb') as f:
         f.write(b'abcd')
 
-    dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(dirpath)
 
-    _assert_dataset_paths(dataset, paths, use_legacy_dataset)
+    _assert_dataset_paths(dataset, paths)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @pytest.mark.parametrize('dir_prefix', ['_', '.'])
-def test_ignore_no_private_directories_in_base_path(
-    tempdir, dir_prefix, use_legacy_dataset
-):
+def test_ignore_no_private_directories_in_base_path(tempdir, dir_prefix):
     # ARROW-8427 - don't ignore explicitly listed files if parent directory
     # is a private directory
     dirpath = tempdir / "{0}data".format(dir_prefix) / guid()
@@ -1203,17 +890,15 @@ def test_ignore_no_private_directories_in_base_path(
     paths = _make_example_multifile_dataset(dirpath, nfiles=10,
                                             file_nrows=5)
 
-    dataset = pq.ParquetDataset(paths, use_legacy_dataset=use_legacy_dataset)
-    _assert_dataset_paths(dataset, paths, use_legacy_dataset)
+    dataset = pq.ParquetDataset(paths)
+    _assert_dataset_paths(dataset, paths)
 
     # ARROW-9644 - don't ignore full directory with underscore in base path
-    dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset)
-    _assert_dataset_paths(dataset, paths, use_legacy_dataset)
+    dataset = pq.ParquetDataset(dirpath)
+    _assert_dataset_paths(dataset, paths)
 
 
-@pytest.mark.pandas
-@parametrize_legacy_dataset_fixed
-def test_ignore_custom_prefixes(tempdir, use_legacy_dataset):
+def test_ignore_custom_prefixes(tempdir):
     # ARROW-9573 - allow override of default ignore_prefixes
     part = ["xxx"] * 3 + ["yyy"] * 3
     table = pa.table([
@@ -1221,7 +906,6 @@ def test_ignore_custom_prefixes(tempdir, use_legacy_dataset):
         pa.array(part).dictionary_encode(),
     ], names=['index', '_part'])
 
-    # TODO use_legacy_dataset ARROW-10247
     pq.write_to_dataset(table, str(tempdir), partition_cols=['_part'])
 
     private_duplicate = tempdir / '_private_duplicate'
@@ -1230,29 +914,23 @@ def test_ignore_custom_prefixes(tempdir, use_legacy_dataset):
                         partition_cols=['_part'])
 
     read = pq.read_table(
-        tempdir, use_legacy_dataset=use_legacy_dataset,
-        ignore_prefixes=['_private'])
+        tempdir, ignore_prefixes=['_private'])
 
     assert read.equals(table)
 
 
-@parametrize_legacy_dataset_fixed
-def test_empty_directory(tempdir, use_legacy_dataset):
-    # ARROW-5310 - reading empty directory
-    # fails with legacy implementation
+def test_empty_directory(tempdir):
+    # ARROW-5310
     empty_dir = tempdir / 'dataset'
     empty_dir.mkdir()
 
-    dataset = pq.ParquetDataset(
-        empty_dir, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(empty_dir)
     result = dataset.read()
     assert result.num_rows == 0
     assert result.num_columns == 0
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
 def _test_write_to_dataset_with_partitions(base_path,
-                                           use_legacy_dataset=True,
                                            filesystem=None,
                                            schema=None,
                                            index_name=None):
@@ -1275,8 +953,7 @@ def _test_write_to_dataset_with_partitions(base_path,
     output_table = pa.Table.from_pandas(output_df, schema=schema, safe=False,
                                         preserve_index=False)
     pq.write_to_dataset(output_table, base_path, partition_by,
-                        filesystem=filesystem,
-                        use_legacy_dataset=use_legacy_dataset)
+                        filesystem=filesystem)
 
     metadata_path = os.path.join(str(base_path), '_common_metadata')
 
@@ -1286,19 +963,11 @@ def _test_write_to_dataset_with_partitions(base_path,
     else:
         pq.write_metadata(output_table.schema, metadata_path)
 
-    # ARROW-2891: Ensure the output_schema is preserved when writing a
-    # partitioned dataset
     dataset = pq.ParquetDataset(base_path,
-                                filesystem=filesystem,
-                                validate_schema=True,
-                                use_legacy_dataset=use_legacy_dataset)
+                                filesystem=filesystem)
     # ARROW-2209: Ensure the dataset schema also includes the partition columns
-    if use_legacy_dataset:
-        with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"):
-            dataset_cols = set(dataset.schema.to_arrow_schema().names)
-    else:
-        # NB schema property is an arrow and not parquet schema
-        dataset_cols = set(dataset.schema.names)
+    # NB schema property is an arrow and not parquet schema
+    dataset_cols = set(dataset.schema.names)
 
     assert dataset_cols == set(output_table.schema.names)
 
@@ -1323,7 +992,6 @@ def _test_write_to_dataset_with_partitions(base_path,
 
 
 def _test_write_to_dataset_no_partitions(base_path,
-                                         use_legacy_dataset=True,
                                          filesystem=None):
     import pandas as pd
 
@@ -1347,7 +1015,6 @@ def _test_write_to_dataset_no_partitions(base_path,
     n = 5
     for i in range(n):
         pq.write_to_dataset(output_table, base_path,
-                            use_legacy_dataset=use_legacy_dataset,
                             filesystem=filesystem)
     output_files = [file for file in filesystem.ls(str(base_path))
                     if file.endswith(".parquet")]
@@ -1356,8 +1023,7 @@ def _test_write_to_dataset_no_partitions(base_path,
     # Deduplicated incoming DataFrame should match
     # original outgoing Dataframe
     input_table = pq.ParquetDataset(
-        base_path, filesystem=filesystem,
-        use_legacy_dataset=use_legacy_dataset
+        base_path, filesystem=filesystem
     ).read()
     input_df = input_table.to_pandas()
     input_df = input_df.drop_duplicates()
@@ -1366,131 +1032,71 @@ def _test_write_to_dataset_no_partitions(base_path,
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_with_partitions(tempdir, use_legacy_dataset):
-    _test_write_to_dataset_with_partitions(str(tempdir), use_legacy_dataset)
+def test_write_to_dataset_with_partitions(tempdir):
+    _test_write_to_dataset_with_partitions(str(tempdir))
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_with_partitions_and_schema(
-    tempdir, use_legacy_dataset
-):
+def test_write_to_dataset_with_partitions_and_schema(tempdir):
     schema = pa.schema([pa.field('group1', type=pa.string()),
                         pa.field('group2', type=pa.string()),
                         pa.field('num', type=pa.int64()),
                         pa.field('nan', type=pa.int32()),
                         pa.field('date', type=pa.timestamp(unit='us'))])
     _test_write_to_dataset_with_partitions(
-        str(tempdir), use_legacy_dataset, schema=schema)
+        str(tempdir), schema=schema)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_with_partitions_and_index_name(
-    tempdir, use_legacy_dataset
-):
+def test_write_to_dataset_with_partitions_and_index_name(tempdir):
     _test_write_to_dataset_with_partitions(
-        str(tempdir), use_legacy_dataset, index_name='index_name')
+        str(tempdir), index_name='index_name')
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_no_partitions(tempdir, use_legacy_dataset):
-    _test_write_to_dataset_no_partitions(str(tempdir), use_legacy_dataset)
+def test_write_to_dataset_no_partitions(tempdir):
+    _test_write_to_dataset_no_partitions(str(tempdir))
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_pathlib(tempdir, use_legacy_dataset):
-    _test_write_to_dataset_with_partitions(
-        tempdir / "test1", use_legacy_dataset)
-    _test_write_to_dataset_no_partitions(
-        tempdir / "test2", use_legacy_dataset)
+def test_write_to_dataset_pathlib(tempdir):
+    _test_write_to_dataset_with_partitions(tempdir / "test1")
+    _test_write_to_dataset_no_partitions(tempdir / "test2")
 
 
 @pytest.mark.pandas
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_write_to_dataset_pathlib_nonlocal(
-    tempdir, s3_example_s3fs, use_legacy_dataset
-):
+def test_write_to_dataset_pathlib_nonlocal(tempdir, s3_example_s3fs):
     # pathlib paths are only accepted for local files
     fs, _ = s3_example_s3fs
 
     with pytest.raises(TypeError, match="path-like objects are only allowed"):
         _test_write_to_dataset_with_partitions(
-            tempdir / "test1", use_legacy_dataset, filesystem=fs)
+            tempdir / "test1", filesystem=fs)
 
     with pytest.raises(TypeError, match="path-like objects are only allowed"):
         _test_write_to_dataset_no_partitions(
-            tempdir / "test2", use_legacy_dataset, filesystem=fs)
+            tempdir / "test2", filesystem=fs)
 
 
 @pytest.mark.pandas
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_write_to_dataset_with_partitions_s3fs(
-    s3_example_s3fs, use_legacy_dataset
-):
+def test_write_to_dataset_with_partitions_s3fs(s3_example_s3fs):
     fs, path = s3_example_s3fs
 
     _test_write_to_dataset_with_partitions(
-        path, use_legacy_dataset, filesystem=fs)
+        path, filesystem=fs)
 
 
 @pytest.mark.pandas
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_write_to_dataset_no_partitions_s3fs(
-    s3_example_s3fs, use_legacy_dataset
-):
+def test_write_to_dataset_no_partitions_s3fs(s3_example_s3fs):
     fs, path = s3_example_s3fs
 
     _test_write_to_dataset_no_partitions(
-        path, use_legacy_dataset, filesystem=fs)
+        path, filesystem=fs)
 
 
-@pytest.mark.filterwarnings(
-    "ignore:'ParquetDataset:FutureWarning",
-    "ignore:'partition_filename_cb':FutureWarning")
-@pytest.mark.pandas
-@parametrize_legacy_dataset_not_supported
-def test_write_to_dataset_with_partitions_and_custom_filenames(
-    tempdir, use_legacy_dataset
-):
-    output_df = pd.DataFrame({'group1': list('aaabbbbccc'),
-                              'group2': list('eefeffgeee'),
-                              'num': list(range(10)),
-                              'nan': [np.nan] * 10,
-                              'date': np.arange('2017-01-01', '2017-01-11',
-                                                dtype='datetime64[D]')})
-    partition_by = ['group1', 'group2']
-    output_table = pa.Table.from_pandas(output_df)
-    path = str(tempdir)
-
-    def partition_filename_callback(keys):
-        return "{}-{}.parquet".format(*keys)
-
-    pq.write_to_dataset(output_table, path,
-                        partition_by, partition_filename_callback,
-                        use_legacy_dataset=use_legacy_dataset)
-
-    dataset = pq.ParquetDataset(path, use_legacy_dataset=use_legacy_dataset)
-
-    # ARROW-3538: Ensure partition filenames match the given pattern
-    # defined in the local function partition_filename_callback
-    expected_basenames = [
-        'a-e.parquet', 'a-f.parquet',
-        'b-e.parquet', 'b-f.parquet',
-        'b-g.parquet', 'c-e.parquet'
-    ]
-    output_basenames = [os.path.basename(p.path) for p in dataset.pieces]
-
-    assert sorted(expected_basenames) == sorted(output_basenames)
-
-
-@pytest.mark.dataset
 @pytest.mark.pandas
 def test_write_to_dataset_filesystem(tempdir):
     df = pd.DataFrame({'A': [1, 2, 3]})
@@ -1502,7 +1108,7 @@ def test_write_to_dataset_filesystem(tempdir):
     assert result.equals(table)
 
 
-def _make_dataset_for_pickling(tempdir, use_legacy_dataset=False, N=100):
+def _make_dataset_for_pickling(tempdir, N=100):
     path = tempdir / 'data.parquet'
     fs = LocalFileSystem._get_instance()
 
@@ -1525,42 +1131,22 @@ def _make_dataset_for_pickling(tempdir, use_legacy_dataset=False, N=100):
         pq.write_metadata(table.schema, f)
 
     dataset = pq.ParquetDataset(
-        tempdir, filesystem=fs, use_legacy_dataset=use_legacy_dataset)
-    if use_legacy_dataset:
-        with pytest.warns(FutureWarning):
-            assert dataset.metadata_path == str(metadata_path)
+        tempdir, filesystem=fs)
 
     return dataset
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pickle_dataset(tempdir, datadir, use_legacy_dataset, pickle_module):
+def test_pickle_dataset(tempdir, pickle_module):
     def is_pickleable(obj):
         return obj == pickle_module.loads(pickle_module.dumps(obj))
 
-    dataset = _make_dataset_for_pickling(tempdir, use_legacy_dataset)
+    dataset = _make_dataset_for_pickling(tempdir)
     assert is_pickleable(dataset)
-    if use_legacy_dataset:
-        with pytest.warns(FutureWarning):
-            metadata = dataset.metadata
-        assert is_pickleable(metadata)
-        assert is_pickleable(metadata.schema)
-        assert len(metadata.schema)
-        for column in metadata.schema:
-            assert is_pickleable(column)
-
-        for piece in dataset._pieces:
-            assert is_pickleable(piece)
-            metadata = piece.get_metadata()
-            assert metadata.num_row_groups
-            for i in range(metadata.num_row_groups):
-                assert is_pickleable(metadata.row_group(i))
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_partitioned_dataset(tempdir, use_legacy_dataset):
+def test_partitioned_dataset(tempdir):
     # ARROW-3208: Segmentation fault when reading a Parquet partitioned dataset
     # to a Parquet file
     path = tempdir / "ARROW-3208"
@@ -1571,27 +1157,20 @@ def test_partitioned_dataset(tempdir, use_legacy_dataset):
     })
     table = pa.Table.from_pandas(df)
     pq.write_to_dataset(table, root_path=str(path),
-                        partition_cols=['one', 'two'],
-                        use_legacy_dataset=use_legacy_dataset)
-    table = pq.ParquetDataset(
-        path, use_legacy_dataset=use_legacy_dataset).read()
+                        partition_cols=['one', 'two'])
+    table = pq.ParquetDataset(path).read()
     pq.write_table(table, path / "output.parquet")
 
 
-@pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_dataset_read_dictionary(tempdir, use_legacy_dataset):
+def test_dataset_read_dictionary(tempdir):
     path = tempdir / "ARROW-3325-dataset"
     t1 = pa.table([[util.rands(10) for i in range(5)] * 10], names=['f0'])
     t2 = pa.table([[util.rands(10) for i in range(5)] * 10], names=['f0'])
-    pq.write_to_dataset(t1, root_path=str(path),
-                        use_legacy_dataset=use_legacy_dataset)
-    pq.write_to_dataset(t2, root_path=str(path),
-                        use_legacy_dataset=use_legacy_dataset)
+    pq.write_to_dataset(t1, root_path=str(path))
+    pq.write_to_dataset(t2, root_path=str(path))
 
     result = pq.ParquetDataset(
-        path, read_dictionary=['f0'],
-        use_legacy_dataset=use_legacy_dataset).read()
+        path, read_dictionary=['f0']).read()
 
     # The order of the chunks is non-deterministic
     ex_chunks = [t1[0].chunk(0).dictionary_encode(),
@@ -1606,9 +1185,6 @@ def test_dataset_read_dictionary(tempdir, use_legacy_dataset):
         assert c1.equals(ex_chunks[0])
 
 
-@pytest.mark.dataset
-@pytest.mark.pandas
-@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning")
 def test_read_table_schema(tempdir):
     # test that schema keyword is passed through in read_table
     table = pa.table({'a': pa.array([1, 2, 3], pa.int32())})
@@ -1627,42 +1203,24 @@ def test_read_table_schema(tempdir):
     expected = pa.table({'a': [1, 2, 3, 1, 2, 3]}, schema=schema)
     assert result.equals(expected)
 
-    # don't allow it with the legacy reader
-    with pytest.raises(
-        ValueError, match="The 'schema' argument is only supported"
-    ):
-        pq.read_table(tempdir / "data.parquet", schema=schema,
-                      use_legacy_dataset=True)
-
-    # using ParquetDataset directory with non-legacy implementation
-    result = pq.ParquetDataset(
-        tempdir, schema=schema, use_legacy_dataset=False
-    )
+    result = pq.ParquetDataset(tempdir, schema=schema)
     expected = pa.table({'a': [1, 2, 3, 1, 2, 3]}, schema=schema)
     assert result.read().equals(expected)
 
 
-@pytest.mark.dataset
-def test_dataset_unsupported_keywords():
-
-    with pytest.raises(ValueError, match="not yet supported with the new"):
-        pq.ParquetDataset("", use_legacy_dataset=False, metadata=pa.schema([]))
+def test_read_table_duplicate_column_selection(tempdir):
+    # test that duplicate column selection gives duplicate columns
+    table = pa.table({'a': pa.array([1, 2, 3], pa.int32()),
+                      'b': pa.array([1, 2, 3], pa.uint8())})
+    pq.write_table(table, tempdir / "data.parquet")
 
-    with pytest.raises(ValueError, match="not yet supported with the new"):
-        pq.ParquetDataset("", use_legacy_dataset=False, validate_schema=False)
+    result = pq.read_table(tempdir / "data.parquet", columns=['a', 'a'])
+    expected_schema = pa.schema([('a', 'int32'), ('a', 'int32')])
 
-    with pytest.raises(ValueError, match="not yet supported with the new"):
-        pq.ParquetDataset("", use_legacy_dataset=False, split_row_groups=True)
+    assert result.column_names == ['a', 'a']
+    assert result.schema == expected_schema
 
-    with pytest.raises(ValueError, match="not yet supported with the new"):
-        pq.ParquetDataset("", use_legacy_dataset=False, metadata_nthreads=4)
 
-    with pytest.raises(ValueError, match="no longer supported"):
-        pq.read_table("", use_legacy_dataset=False, metadata=pa.schema([]))
-
-
-@pytest.mark.dataset
-@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning")
 def test_dataset_partitioning(tempdir):
     import pyarrow.dataset as ds
 
@@ -1679,42 +1237,25 @@ def test_dataset_partitioning(tempdir):
     # read_table
     part = ds.partitioning(field_names=["year", "month", "day"])
     result = pq.read_table(
-        str(root_path), partitioning=part, use_legacy_dataset=False)
+        str(root_path), partitioning=part)
     assert result.column_names == ["a", "year", "month", "day"]
 
     result = pq.ParquetDataset(
-        str(root_path), partitioning=part, use_legacy_dataset=False).read()
+        str(root_path), partitioning=part).read()
     assert result.column_names == ["a", "year", "month", "day"]
 
-    # This raises an error for legacy dataset
-    with pytest.raises(ValueError):
-        pq.read_table(
-            str(root_path), partitioning=part, use_legacy_dataset=True)
-
-    with pytest.raises(ValueError):
-        pq.ParquetDataset(
-            str(root_path), partitioning=part, use_legacy_dataset=True)
-
 
-@pytest.mark.dataset
 def test_parquet_dataset_new_filesystem(tempdir):
     # Ensure we can pass new FileSystem object to ParquetDataset
-    # (use new implementation automatically without specifying
-    #  use_legacy_dataset=False)
     table = pa.table({'a': [1, 2, 3]})
     pq.write_table(table, tempdir / 'data.parquet')
-    # don't use simple LocalFileSystem (as that gets mapped to legacy one)
     filesystem = fs.SubTreeFileSystem(str(tempdir), fs.LocalFileSystem())
     dataset = pq.ParquetDataset('.', filesystem=filesystem)
     result = dataset.read()
     assert result.equals(table)
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
-@parametrize_legacy_dataset
-def test_parquet_dataset_partitions_piece_path_with_fsspec(
-    tempdir, use_legacy_dataset
-):
+def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir):
     # ARROW-10462 ensure that on Windows we properly use posix-style paths
     # as used by fsspec
     fsspec = pytest.importorskip("fsspec")
@@ -1725,109 +1266,12 @@ def test_parquet_dataset_partitions_piece_path_with_fsspec(
     # pass a posix-style path (using "/" also on Windows)
     path = str(tempdir).replace("\\", "/")
     dataset = pq.ParquetDataset(
-        path, filesystem=filesystem, use_legacy_dataset=use_legacy_dataset)
+        path, filesystem=filesystem)
     # ensure the piece path is also posix-style
     expected = path + "/data.parquet"
-    assert dataset.pieces[0].path == expected
-
-
-@pytest.mark.dataset
-def test_parquet_dataset_deprecated_properties(tempdir):
-    table = pa.table({'a': [1, 2, 3]})
-    path = tempdir / 'data.parquet'
-    pq.write_table(table, path)
-    dataset = pq.ParquetDataset(path, use_legacy_dataset=True)
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"):
-        dataset.pieces
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.partitions"):
-        dataset.partitions
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.memory_map"):
-        dataset.memory_map
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.read_dictio"):
-        dataset.read_dictionary
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.buffer_size"):
-        dataset.buffer_size
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.fs"):
-        dataset.fs
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"):
-        dataset.schema
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.common_metadata'"):
-        dataset.common_metadata
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.metadata"):
-        dataset.metadata
+    assert dataset.fragments[0].path == expected
 
-    with pytest.warns(FutureWarning, match="'ParquetDataset.metadata_path"):
-        dataset.metadata_path
 
-    with pytest.warns(FutureWarning,
-                      match="'ParquetDataset.common_metadata_path"):
-        dataset.common_metadata_path
-
-    dataset2 = pq.ParquetDataset(path, use_legacy_dataset=False)
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"):
-        dataset2.pieces
-
-
-@pytest.mark.dataset
-def test_parquet_write_to_dataset_deprecated_properties(tempdir):
-    table = pa.table({'a': [1, 2, 3]})
-    path = tempdir / 'data.parquet'
-
-    with pytest.warns(FutureWarning,
-                      match="Passing 'use_legacy_dataset=True'"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True)
-
-    # check also that legacy implementation is set when
-    # partition_filename_cb is specified
-    with pytest.warns(FutureWarning,
-                      match="Passing 'use_legacy_dataset=True'"):
-        pq.write_to_dataset(table, path,
-                            partition_filename_cb=lambda x: 'filename.parquet')
-
-
-@pytest.mark.dataset
-def test_parquet_write_to_dataset_unsupported_keywords_in_legacy(tempdir):
-    table = pa.table({'a': [1, 2, 3]})
-    path = tempdir / 'data.parquet'
-
-    with pytest.raises(ValueError, match="schema"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                            schema=pa.schema([
-                                ('a', pa.int32())
-                            ]))
-
-    with pytest.raises(ValueError, match="partitioning"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                            partitioning=["a"])
-
-    with pytest.raises(ValueError, match="use_threads"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                            use_threads=False)
-
-    with pytest.raises(ValueError, match="file_visitor"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                            file_visitor=lambda x: x)
-
-    with pytest.raises(ValueError, match="existing_data_behavior"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                            existing_data_behavior='error')
-
-    with pytest.raises(ValueError, match="basename_template"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                            basename_template='part-{i}.parquet')
-
-
-@pytest.mark.dataset
 def test_parquet_write_to_dataset_exposed_keywords(tempdir):
     table = pa.table({'a': [1, 2, 3]})
     path = tempdir / 'partitioning'
@@ -1841,8 +1285,7 @@ def file_visitor(written_file):
 
     pq.write_to_dataset(table, path, partitioning=["a"],
                         file_visitor=file_visitor,
-                        basename_template=basename_template,
-                        use_legacy_dataset=False)
+                        basename_template=basename_template)
 
     expected_paths = {
         path / '1' / 'part-0.parquet',
@@ -1853,53 +1296,6 @@ def file_visitor(written_file):
     assert paths_written_set == expected_paths
 
 
-@pytest.mark.dataset
-def test_write_to_dataset_conflicting_keywords(tempdir):
-    table = pa.table({'a': [1, 2, 3]})
-    path = tempdir / 'data.parquet'
-
-    with pytest.raises(ValueError, match="'basename_template' argument "
-                       "is not supported by use_legacy_dataset=True"):
-        pq.write_to_dataset(table, path,
-                            use_legacy_dataset=True,
-                            partition_filename_cb=lambda x: 'filename.parquet',
-                            basename_template='file-{i}.parquet')
-    with pytest.raises(ValueError, match="'partition_filename_cb' argument "
-                       "is not supported by use_legacy_dataset=False"):
-        pq.write_to_dataset(table, path,
-                            use_legacy_dataset=False,
-                            partition_filename_cb=lambda x: 'filename.parquet',
-                            basename_template='file-{i}.parquet')
-
-    with pytest.raises(ValueError, match="'partitioning' argument "
-                       "is not supported by use_legacy_dataset=True"):
-        pq.write_to_dataset(table, path,
-                            use_legacy_dataset=True,
-                            partition_cols=["a"],
-                            partitioning=["a"])
-
-    with pytest.raises(ValueError, match="'partition_cols' argument "
-                       "is not supported by use_legacy_dataset=False"):
-        pq.write_to_dataset(table, path,
-                            use_legacy_dataset=False,
-                            partition_cols=["a"],
-                            partitioning=["a"])
-
-    with pytest.raises(ValueError, match="'file_visitor' argument "
-                       "is not supported by use_legacy_dataset=True"):
-        pq.write_to_dataset(table, path,
-                            use_legacy_dataset=True,
-                            metadata_collector=[],
-                            file_visitor=lambda x: x)
-    with pytest.raises(ValueError, match="'metadata_collector' argument "
-                       "is not supported by use_legacy_dataset=False"):
-        pq.write_to_dataset(table, path,
-                            use_legacy_dataset=False,
-                            metadata_collector=[],
-                            file_visitor=lambda x: x)
-
-
-@pytest.mark.dataset
 @pytest.mark.parametrize("write_dataset_kwarg", (
     ("create_dir", True),
     ("create_dir", False),
@@ -1926,8 +1322,7 @@ def test_write_to_dataset_kwargs_passed(tempdir, write_dataset_kwarg):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_category_observed(tempdir, use_legacy_dataset):
+def test_write_to_dataset_category_observed(tempdir):
     # if we partition on a categorical variable with "unobserved" categories
     # (values present in the dictionary, but not in the actual data)
     # ensure those are not creating empty files/directories
@@ -1938,8 +1333,7 @@ def test_write_to_dataset_category_observed(tempdir, use_legacy_dataset):
     table = pa.table(df)
     path = tempdir / "dataset"
     pq.write_to_dataset(
-        table, tempdir / "dataset", partition_cols=["cat"],
-        use_legacy_dataset=use_legacy_dataset
+        table, tempdir / "dataset", partition_cols=["cat"]
     )
     subdirs = [f.name for f in path.iterdir() if f.is_dir()]
     assert len(subdirs) == 2
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index f97c451df7ad7..6a9cbd4f73d4f 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -23,8 +23,7 @@
 import pytest
 
 import pyarrow as pa
-from pyarrow.tests.parquet.common import (
-    _check_roundtrip, parametrize_legacy_dataset)
+from pyarrow.tests.parquet.common import _check_roundtrip
 
 try:
     import pyarrow.parquet as pq
@@ -48,8 +47,7 @@
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_datetime_tz(use_legacy_dataset):
+def test_pandas_parquet_datetime_tz():
     # Pandas v2 defaults to [ns], but Arrow defaults to [us] time units
     # so we need to cast the pandas dtype. Pandas v1 will always silently
     # coerce to [ns] due to lack of non-[ns] support.
@@ -69,21 +67,19 @@ def test_pandas_parquet_datetime_tz(use_legacy_dataset):
     _write_table(arrow_table, f)
     f.seek(0)
 
-    table_read = pq.read_pandas(f, use_legacy_dataset=use_legacy_dataset)
+    table_read = pq.read_pandas(f)
 
     df_read = table_read.to_pandas()
     tm.assert_frame_equal(df, df_read)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_datetime_timezone_tzinfo(use_legacy_dataset):
+def test_datetime_timezone_tzinfo():
     value = datetime.datetime(2018, 1, 1, 1, 23, 45,
                               tzinfo=datetime.timezone.utc)
     df = pd.DataFrame({'foo': [value]})
 
-    _roundtrip_pandas_dataframe(
-        df, write_kwargs={}, use_legacy_dataset=use_legacy_dataset)
+    _roundtrip_pandas_dataframe(df, write_kwargs={})
 
 
 @pytest.mark.pandas
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index 0ed305bff1945..f194d12876968 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -23,8 +23,6 @@
 
 import pyarrow as pa
 from pyarrow.fs import LocalFileSystem, SubTreeFileSystem
-from pyarrow.tests.parquet.common import (
-    parametrize_legacy_dataset, parametrize_legacy_dataset_not_supported)
 from pyarrow.util import guid
 from pyarrow.vendored.version import Version
 
@@ -101,8 +99,7 @@ def test_merging_parquet_tables_with_different_pandas_metadata(tempdir):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_column_multiindex(tempdir, use_legacy_dataset):
+def test_pandas_parquet_column_multiindex(tempdir):
     df = alltypes_sample(size=10)
     df.columns = pd.MultiIndex.from_tuples(
         list(zip(df.columns, df.columns[::-1])),
@@ -115,17 +112,13 @@ def test_pandas_parquet_column_multiindex(tempdir, use_legacy_dataset):
 
     _write_table(arrow_table, filename)
 
-    table_read = pq.read_pandas(
-        filename, use_legacy_dataset=use_legacy_dataset)
+    table_read = pq.read_pandas(filename)
     df_read = table_read.to_pandas()
     tm.assert_frame_equal(df, df_read)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_2_0_roundtrip_read_pandas_no_index_written(
-    tempdir, use_legacy_dataset
-):
+def test_pandas_parquet_2_0_roundtrip_read_pandas_no_index_written(tempdir):
     df = alltypes_sample(size=10000)
 
     filename = tempdir / 'pandas_roundtrip.parquet'
@@ -137,8 +130,7 @@ def test_pandas_parquet_2_0_roundtrip_read_pandas_no_index_written(
     assert js['columns']
 
     _write_table(arrow_table, filename)
-    table_read = pq.read_pandas(
-        filename, use_legacy_dataset=use_legacy_dataset)
+    table_read = pq.read_pandas(filename)
 
     js = table_read.schema.pandas_metadata
     assert not js['index_columns']
@@ -150,52 +142,20 @@ def test_pandas_parquet_2_0_roundtrip_read_pandas_no_index_written(
     tm.assert_frame_equal(df, df_read)
 
 
-# TODO(dataset) duplicate column selection actually gives duplicate columns now
-@pytest.mark.pandas
-@parametrize_legacy_dataset_not_supported
-def test_pandas_column_selection(tempdir, use_legacy_dataset):
-    size = 10000
-    np.random.seed(0)
-    df = pd.DataFrame({
-        'uint8': np.arange(size, dtype=np.uint8),
-        'uint16': np.arange(size, dtype=np.uint16)
-    })
-    filename = tempdir / 'pandas_roundtrip.parquet'
-    arrow_table = pa.Table.from_pandas(df)
-    _write_table(arrow_table, filename)
-    table_read = _read_table(
-        filename, columns=['uint8'], use_legacy_dataset=use_legacy_dataset)
-    df_read = table_read.to_pandas()
-
-    tm.assert_frame_equal(df[['uint8']], df_read)
-
-    # ARROW-4267: Selection of duplicate columns still leads to these columns
-    # being read uniquely.
-    table_read = _read_table(
-        filename, columns=['uint8', 'uint8'],
-        use_legacy_dataset=use_legacy_dataset)
-    df_read = table_read.to_pandas()
-
-    tm.assert_frame_equal(df[['uint8']], df_read)
-
-
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_native_file_roundtrip(tempdir, use_legacy_dataset):
+def test_pandas_parquet_native_file_roundtrip():
     df = _test_dataframe(10000)
     arrow_table = pa.Table.from_pandas(df)
     imos = pa.BufferOutputStream()
     _write_table(arrow_table, imos, version='2.6')
     buf = imos.getvalue()
     reader = pa.BufferReader(buf)
-    df_read = _read_table(
-        reader, use_legacy_dataset=use_legacy_dataset).to_pandas()
+    df_read = _read_table(reader).to_pandas()
     tm.assert_frame_equal(df, df_read)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_read_pandas_column_subset(tempdir, use_legacy_dataset):
+def test_read_pandas_column_subset():
     df = _test_dataframe(10000)
     arrow_table = pa.Table.from_pandas(df)
     imos = pa.BufferOutputStream()
@@ -204,27 +164,24 @@ def test_read_pandas_column_subset(tempdir, use_legacy_dataset):
     reader = pa.BufferReader(buf)
     df_read = pq.read_pandas(
         reader, columns=['strings', 'uint8'],
-        use_legacy_dataset=use_legacy_dataset
     ).to_pandas()
     tm.assert_frame_equal(df[['strings', 'uint8']], df_read)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_empty_roundtrip(tempdir, use_legacy_dataset):
+def test_pandas_parquet_empty_roundtrip():
     df = _test_dataframe(0)
     arrow_table = pa.Table.from_pandas(df)
     imos = pa.BufferOutputStream()
     _write_table(arrow_table, imos, version='2.6')
     buf = imos.getvalue()
     reader = pa.BufferReader(buf)
-    df_read = _read_table(
-        reader, use_legacy_dataset=use_legacy_dataset).to_pandas()
+    df_read = _read_table(reader).to_pandas()
     tm.assert_frame_equal(df, df_read)
 
 
 @pytest.mark.pandas
-def test_pandas_can_write_nested_data(tempdir):
+def test_pandas_can_write_nested_data():
     data = {
         "agg_col": [
             {"page_type": 1},
@@ -241,8 +198,7 @@ def test_pandas_can_write_nested_data(tempdir):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_pyfile_roundtrip(tempdir, use_legacy_dataset):
+def test_pandas_parquet_pyfile_roundtrip(tempdir):
     filename = tempdir / 'pandas_pyfile_roundtrip.parquet'
     size = 5
     df = pd.DataFrame({
@@ -260,14 +216,13 @@ def test_pandas_parquet_pyfile_roundtrip(tempdir, use_legacy_dataset):
 
     data = io.BytesIO(filename.read_bytes())
 
-    table_read = _read_table(data, use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(data)
     df_read = table_read.to_pandas()
     tm.assert_frame_equal(df, df_read)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_configuration_options(tempdir, use_legacy_dataset):
+def test_pandas_parquet_configuration_options(tempdir):
     size = 10000
     np.random.seed(0)
     df = pd.DataFrame({
@@ -289,16 +244,14 @@ def test_pandas_parquet_configuration_options(tempdir, use_legacy_dataset):
     for use_dictionary in [True, False]:
         _write_table(arrow_table, filename, version='2.6',
                      use_dictionary=use_dictionary)
-        table_read = _read_table(
-            filename, use_legacy_dataset=use_legacy_dataset)
+        table_read = _read_table(filename)
         df_read = table_read.to_pandas()
         tm.assert_frame_equal(df, df_read)
 
     for write_statistics in [True, False]:
         _write_table(arrow_table, filename, version='2.6',
                      write_statistics=write_statistics)
-        table_read = _read_table(filename,
-                                 use_legacy_dataset=use_legacy_dataset)
+        table_read = _read_table(filename)
         df_read = table_read.to_pandas()
         tm.assert_frame_equal(df, df_read)
 
@@ -308,8 +261,7 @@ def test_pandas_parquet_configuration_options(tempdir, use_legacy_dataset):
             continue
         _write_table(arrow_table, filename, version='2.6',
                      compression=compression)
-        table_read = _read_table(
-            filename, use_legacy_dataset=use_legacy_dataset)
+        table_read = _read_table(filename)
         df_read = table_read.to_pandas()
         tm.assert_frame_equal(df, df_read)
 
@@ -327,8 +279,7 @@ def test_spark_flavor_preserves_pandas_metadata():
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_index_column_name_duplicate(tempdir, use_legacy_dataset):
+def test_index_column_name_duplicate(tempdir):
     data = {
         'close': {
             pd.Timestamp('2017-06-30 01:31:00'): 154.99958999999998,
@@ -352,14 +303,13 @@ def test_index_column_name_duplicate(tempdir, use_legacy_dataset):
 
     tdfx = pa.Table.from_pandas(dfx)
     _write_table(tdfx, path)
-    arrow_table = _read_table(path, use_legacy_dataset=use_legacy_dataset)
+    arrow_table = _read_table(path)
     result_df = arrow_table.to_pandas()
     tm.assert_frame_equal(result_df, dfx)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_multiindex_duplicate_values(tempdir, use_legacy_dataset):
+def test_multiindex_duplicate_values(tempdir):
     num_rows = 3
     numbers = list(range(num_rows))
     index = pd.MultiIndex.from_arrays(
@@ -373,7 +323,7 @@ def test_multiindex_duplicate_values(tempdir, use_legacy_dataset):
     filename = tempdir / 'dup_multi_index_levels.parquet'
 
     _write_table(table, filename)
-    result_table = _read_table(filename, use_legacy_dataset=use_legacy_dataset)
+    result_table = _read_table(filename)
     assert table.equals(result_table)
 
     result_df = result_table.to_pandas()
@@ -381,8 +331,7 @@ def test_multiindex_duplicate_values(tempdir, use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_backwards_compatible_index_naming(datadir, use_legacy_dataset):
+def test_backwards_compatible_index_naming(datadir):
     expected_string = b"""\
 carat        cut  color  clarity  depth  table  price     x     y     z
  0.23      Ideal      E      SI2   61.5   55.0    326  3.95  3.98  2.43
@@ -397,17 +346,13 @@ def test_backwards_compatible_index_naming(datadir, use_legacy_dataset):
  0.23  Very Good      H      VS1   59.4   61.0    338  4.00  4.05  2.39"""
     expected = pd.read_csv(io.BytesIO(expected_string), sep=r'\s{2,}',
                            index_col=None, header=0, engine='python')
-    table = _read_table(
-        datadir / 'v0.7.1.parquet', use_legacy_dataset=use_legacy_dataset)
+    table = _read_table(datadir / 'v0.7.1.parquet')
     result = table.to_pandas()
     tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_backwards_compatible_index_multi_level_named(
-    datadir, use_legacy_dataset
-):
+def test_backwards_compatible_index_multi_level_named(datadir):
     expected_string = b"""\
 carat        cut  color  clarity  depth  table  price     x     y     z
  0.23      Ideal      E      SI2   61.5   55.0    326  3.95  3.98  2.43
@@ -426,17 +371,13 @@ def test_backwards_compatible_index_multi_level_named(
         header=0, engine='python'
     ).sort_index()
 
-    table = _read_table(datadir / 'v0.7.1.all-named-index.parquet',
-                        use_legacy_dataset=use_legacy_dataset)
+    table = _read_table(datadir / 'v0.7.1.all-named-index.parquet')
     result = table.to_pandas()
     tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_backwards_compatible_index_multi_level_some_named(
-        datadir, use_legacy_dataset
-):
+def test_backwards_compatible_index_multi_level_some_named(datadir):
     expected_string = b"""\
 carat        cut  color  clarity  depth  table  price     x     y     z
  0.23      Ideal      E      SI2   61.5   55.0    326  3.95  3.98  2.43
@@ -456,17 +397,13 @@ def test_backwards_compatible_index_multi_level_some_named(
     ).sort_index()
     expected.index = expected.index.set_names(['cut', None, 'clarity'])
 
-    table = _read_table(datadir / 'v0.7.1.some-named-index.parquet',
-                        use_legacy_dataset=use_legacy_dataset)
+    table = _read_table(datadir / 'v0.7.1.some-named-index.parquet')
     result = table.to_pandas()
     tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_backwards_compatible_column_metadata_handling(
-    datadir, use_legacy_dataset
-):
+def test_backwards_compatible_column_metadata_handling(datadir):
     expected = pd.DataFrame(
         {'a': [1, 2, 3], 'b': [.1, .2, .3],
          'c': pd.date_range("2017-01-01", periods=3, tz='Europe/Brussels')})
@@ -476,19 +413,18 @@ def test_backwards_compatible_column_metadata_handling(
         names=['index', None])
 
     path = datadir / 'v0.7.1.column-metadata-handling.parquet'
-    table = _read_table(path, use_legacy_dataset=use_legacy_dataset)
+    table = _read_table(path)
     result = table.to_pandas()
     tm.assert_frame_equal(result, expected)
 
     table = _read_table(
-        path, columns=['a'], use_legacy_dataset=use_legacy_dataset)
+        path, columns=['a'])
     result = table.to_pandas()
     tm.assert_frame_equal(result, expected[['a']].reset_index(drop=True))
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_categorical_index_survives_roundtrip(use_legacy_dataset):
+def test_categorical_index_survives_roundtrip():
     # ARROW-3652, addressed by ARROW-3246
     df = pd.DataFrame([['a', 'b'], ['c', 'd']], columns=['c1', 'c2'])
     df['c1'] = df['c1'].astype('category')
@@ -497,15 +433,13 @@ def test_categorical_index_survives_roundtrip(use_legacy_dataset):
     table = pa.Table.from_pandas(df)
     bos = pa.BufferOutputStream()
     pq.write_table(table, bos)
-    ref_df = pq.read_pandas(
-        bos.getvalue(), use_legacy_dataset=use_legacy_dataset).to_pandas()
+    ref_df = pq.read_pandas(bos.getvalue()).to_pandas()
     assert isinstance(ref_df.index, pd.CategoricalIndex)
     assert ref_df.index.equals(df.index)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_categorical_order_survives_roundtrip(use_legacy_dataset):
+def test_categorical_order_survives_roundtrip():
     # ARROW-6302
     df = pd.DataFrame({"a": pd.Categorical(
         ["a", "b", "c", "a"], categories=["b", "c", "d"], ordered=True)})
@@ -515,15 +449,13 @@ def test_categorical_order_survives_roundtrip(use_legacy_dataset):
     pq.write_table(table, bos)
 
     contents = bos.getvalue()
-    result = pq.read_pandas(
-        contents, use_legacy_dataset=use_legacy_dataset).to_pandas()
+    result = pq.read_pandas(contents).to_pandas()
 
     tm.assert_frame_equal(result, df)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_categorical_na_type_row_groups(use_legacy_dataset):
+def test_pandas_categorical_na_type_row_groups():
     # ARROW-5085
     df = pd.DataFrame({"col": [None] * 100, "int": [1.0] * 100})
     df_category = df.astype({"col": "category", "int": "category"})
@@ -533,8 +465,7 @@ def test_pandas_categorical_na_type_row_groups(use_legacy_dataset):
 
     # it works
     pq.write_table(table_cat, buf, version='2.6', chunk_size=10)
-    result = pq.read_table(
-        buf.getvalue(), use_legacy_dataset=use_legacy_dataset)
+    result = pq.read_table(buf.getvalue())
 
     # Result is non-categorical
     assert result[0].equals(table[0])
@@ -542,8 +473,7 @@ def test_pandas_categorical_na_type_row_groups(use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_categorical_roundtrip(use_legacy_dataset):
+def test_pandas_categorical_roundtrip():
     # ARROW-5480, this was enabled by ARROW-3246
 
     # Have one of the categories unobserved and include a null (-1)
@@ -555,8 +485,7 @@ def test_pandas_categorical_roundtrip(use_legacy_dataset):
     buf = pa.BufferOutputStream()
     pq.write_table(pa.table(df), buf)
 
-    result = pq.read_table(
-        buf.getvalue(), use_legacy_dataset=use_legacy_dataset).to_pandas()
+    result = pq.read_table(buf.getvalue()).to_pandas()
     assert result.x.dtype == 'category'
     assert (result.x.cat.categories == categories).all()
     tm.assert_frame_equal(result, df)
@@ -587,41 +516,28 @@ def test_categories_with_string_pyarrow_dtype(tempdir):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_pandas_preserve_extensiondtypes(
-    tempdir, use_legacy_dataset
-):
+def test_write_to_dataset_pandas_preserve_extensiondtypes(tempdir):
     df = pd.DataFrame({'part': 'a', "col": [1, 2, 3]})
     df['col'] = df['col'].astype("Int64")
     table = pa.table(df)
 
     pq.write_to_dataset(
         table, str(tempdir / "case1"), partition_cols=['part'],
-        use_legacy_dataset=use_legacy_dataset
     )
-    result = pq.read_table(
-        str(tempdir / "case1"), use_legacy_dataset=use_legacy_dataset
-    ).to_pandas()
+    result = pq.read_table(str(tempdir / "case1")).to_pandas()
     tm.assert_frame_equal(result[["col"]], df[["col"]])
 
-    pq.write_to_dataset(
-        table, str(tempdir / "case2"), use_legacy_dataset=use_legacy_dataset
-    )
-    result = pq.read_table(
-        str(tempdir / "case2"), use_legacy_dataset=use_legacy_dataset
-    ).to_pandas()
+    pq.write_to_dataset(table, str(tempdir / "case2"))
+    result = pq.read_table(str(tempdir / "case2")).to_pandas()
     tm.assert_frame_equal(result[["col"]], df[["col"]])
 
     pq.write_table(table, str(tempdir / "data.parquet"))
-    result = pq.read_table(
-        str(tempdir / "data.parquet"), use_legacy_dataset=use_legacy_dataset
-    ).to_pandas()
+    result = pq.read_table(str(tempdir / "data.parquet")).to_pandas()
     tm.assert_frame_equal(result[["col"]], df[["col"]])
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_pandas_preserve_index(tempdir, use_legacy_dataset):
+def test_write_to_dataset_pandas_preserve_index(tempdir):
     # ARROW-8251 - preserve pandas index in roundtrip
 
     df = pd.DataFrame({'part': ['a', 'a', 'b'], "col": [1, 2, 3]})
@@ -632,34 +548,24 @@ def test_write_to_dataset_pandas_preserve_index(tempdir, use_legacy_dataset):
 
     pq.write_to_dataset(
         table, str(tempdir / "case1"), partition_cols=['part'],
-        use_legacy_dataset=use_legacy_dataset
     )
-    result = pq.read_table(
-        str(tempdir / "case1"), use_legacy_dataset=use_legacy_dataset
-    ).to_pandas()
+    result = pq.read_table(str(tempdir / "case1")).to_pandas()
     tm.assert_frame_equal(result, df_cat)
 
-    pq.write_to_dataset(
-        table, str(tempdir / "case2"), use_legacy_dataset=use_legacy_dataset
-    )
-    result = pq.read_table(
-        str(tempdir / "case2"), use_legacy_dataset=use_legacy_dataset
-    ).to_pandas()
+    pq.write_to_dataset(table, str(tempdir / "case2"))
+    result = pq.read_table(str(tempdir / "case2")).to_pandas()
     tm.assert_frame_equal(result, df)
 
     pq.write_table(table, str(tempdir / "data.parquet"))
-    result = pq.read_table(
-        str(tempdir / "data.parquet"), use_legacy_dataset=use_legacy_dataset
-    ).to_pandas()
+    result = pq.read_table(str(tempdir / "data.parquet")).to_pandas()
     tm.assert_frame_equal(result, df)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @pytest.mark.parametrize('preserve_index', [True, False, None])
 @pytest.mark.parametrize('metadata_fname', ["_metadata", "_common_metadata"])
 def test_dataset_read_pandas_common_metadata(
-    tempdir, use_legacy_dataset, preserve_index, metadata_fname
+    tempdir, preserve_index, metadata_fname
 ):
     # ARROW-1103
     nfiles = 5
@@ -696,7 +602,7 @@ def test_dataset_read_pandas_common_metadata(
     )
     pq.write_metadata(table_for_metadata.schema, dirpath / metadata_fname)
 
-    dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(dirpath)
     columns = ['uint8', 'strings']
     result = dataset.read_pandas(columns=columns).to_pandas()
     expected = pd.concat([x[columns] for x in frames])
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index 9f920206a107e..93097a1afaac9 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -18,7 +18,6 @@
 import io
 import os
 import sys
-from unittest import mock
 
 import pytest
 
@@ -296,28 +295,6 @@ def test_parquet_file_explicitly_closed(tempdir):
     table = pa.table({'col1': [0, 1], 'col2': [0, 1]})
     pq.write_table(table, fn)
 
-    # read_table (legacy) with opened file (will leave open)
-    with open(fn, 'rb') as f:
-        pq.read_table(f, use_legacy_dataset=True)
-        assert not f.closed  # Didn't close it internally after read_table
-
-    # read_table (legacy) with unopened file (will close)
-    with mock.patch.object(pq.ParquetFile, "close") as mock_close:
-        pq.read_table(fn, use_legacy_dataset=True)
-        mock_close.assert_called()
-
-    # ParquetDataset test (legacy) with unopened file (will close)
-    with mock.patch.object(pq.ParquetFile, "close") as mock_close:
-        pq.ParquetDataset(fn, use_legacy_dataset=True).read()
-        mock_close.assert_called()
-
-    # ParquetDataset test (legacy) with opened file (will leave open)
-    with open(fn, 'rb') as f:
-        # ARROW-8075: support ParquetDataset from file-like, not just path-like
-        with pytest.raises(TypeError, match='not a path-like object'):
-            pq.ParquetDataset(f, use_legacy_dataset=True).read()
-            assert not f.closed
-
     # ParquetFile with opened file (will leave open)
     with open(fn, 'rb') as f:
         with pq.ParquetFile(f) as p:
@@ -338,7 +315,7 @@ def test_parquet_file_explicitly_closed(tempdir):
 
 @pytest.mark.s3
 @pytest.mark.parametrize("use_uri", (True, False))
-def test_parquet_file_with_filesystem(tempdir, s3_example_fs, use_uri):
+def test_parquet_file_with_filesystem(s3_example_fs, use_uri):
     s3_fs, s3_uri, s3_path = s3_example_fs
 
     args = (s3_uri if use_uri else s3_path,)
diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py
index b902541015aa2..16584684f5c7f 100644
--- a/python/pyarrow/tests/parquet/test_parquet_writer.py
+++ b/python/pyarrow/tests/parquet/test_parquet_writer.py
@@ -20,7 +20,6 @@
 import pyarrow as pa
 from pyarrow import fs
 from pyarrow.filesystem import FileSystem, LocalFileSystem
-from pyarrow.tests.parquet.common import parametrize_legacy_dataset
 
 try:
     import pyarrow.parquet as pq
@@ -44,8 +43,7 @@
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_parquet_incremental_file_build(tempdir, use_legacy_dataset):
+def test_parquet_incremental_file_build(tempdir):
     df = _test_dataframe(100)
     df['unique_id'] = 0
 
@@ -65,8 +63,7 @@ def test_parquet_incremental_file_build(tempdir, use_legacy_dataset):
     writer.close()
 
     buf = out.getvalue()
-    result = _read_table(
-        pa.BufferReader(buf), use_legacy_dataset=use_legacy_dataset)
+    result = _read_table(pa.BufferReader(buf))
 
     expected = pd.concat(frames, ignore_index=True)
     tm.assert_frame_equal(result.to_pandas(), expected)
@@ -105,8 +102,7 @@ def test_parquet_invalid_writer(tempdir):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_parquet_writer_context_obj(tempdir, use_legacy_dataset):
+def test_parquet_writer_context_obj(tempdir):
     df = _test_dataframe(100)
     df['unique_id'] = 0
 
@@ -124,18 +120,14 @@ def test_parquet_writer_context_obj(tempdir, use_legacy_dataset):
             frames.append(df.copy())
 
     buf = out.getvalue()
-    result = _read_table(
-        pa.BufferReader(buf), use_legacy_dataset=use_legacy_dataset)
+    result = _read_table(pa.BufferReader(buf))
 
     expected = pd.concat(frames, ignore_index=True)
     tm.assert_frame_equal(result.to_pandas(), expected)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_parquet_writer_context_obj_with_exception(
-    tempdir, use_legacy_dataset
-):
+def test_parquet_writer_context_obj_with_exception(tempdir):
     df = _test_dataframe(100)
     df['unique_id'] = 0
 
@@ -160,8 +152,7 @@ def test_parquet_writer_context_obj_with_exception(
         assert str(e) == error_text
 
     buf = out.getvalue()
-    result = _read_table(
-        pa.BufferReader(buf), use_legacy_dataset=use_legacy_dataset)
+    result = _read_table(pa.BufferReader(buf))
 
     expected = pd.concat(frames, ignore_index=True)
     tm.assert_frame_equal(result.to_pandas(), expected)
@@ -340,8 +331,7 @@ def test_parquet_writer_filesystem_buffer_raises():
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_parquet_writer_with_caller_provided_filesystem(use_legacy_dataset):
+def test_parquet_writer_with_caller_provided_filesystem():
     out = pa.BufferOutputStream()
 
     class CustomFS(FileSystem):
@@ -368,8 +358,7 @@ def open(self, path, mode='rb'):
     assert out.closed
 
     buf = out.getvalue()
-    table_read = _read_table(
-        pa.BufferReader(buf), use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(pa.BufferReader(buf))
     df_read = table_read.to_pandas()
     tm.assert_frame_equal(df_read, df)
 
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index a37eb1e426f7a..e2bb4400c8bde 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -1148,7 +1148,6 @@ def _create_dataset_for_fragments(tempdir, chunk_size=None, filesystem=None):
 
     path = str(tempdir / "test_parquet_dataset")
 
-    # write_to_dataset currently requires pandas
     pq.write_to_dataset(table, path,
                         partition_cols=["part"], chunk_size=chunk_size)
     dataset = ds.dataset(
@@ -1158,10 +1157,7 @@ def _create_dataset_for_fragments(tempdir, chunk_size=None, filesystem=None):
     return table, dataset
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments(tempdir, dataset_reader):
     table, dataset = _create_dataset_for_fragments(tempdir)
 
@@ -1208,10 +1204,7 @@ def test_fragments_implicit_cast(tempdir):
     assert len(list(fragments)) == 1
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_reconstruct(tempdir, dataset_reader, pickle_module):
     table, dataset = _create_dataset_for_fragments(tempdir)
 
@@ -1272,10 +1265,7 @@ def assert_yields_projected(fragment, row_slice,
         dataset_reader.to_table(new_fragment, filter=ds.field('part') == 'a')
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_row_groups(tempdir, dataset_reader):
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=2)
 
@@ -1326,8 +1316,6 @@ def test_fragments_parquet_num_row_groups(tempdir):
 @pytest.mark.pandas
 @pytest.mark.parquet
 def test_fragments_parquet_row_groups_dictionary(tempdir, dataset_reader):
-    import pandas as pd
-
     df = pd.DataFrame(dict(col1=['a', 'b'], col2=[1, 2]))
     df['col1'] = df['col1'].astype("category")
 
@@ -1340,10 +1328,7 @@ def test_fragments_parquet_row_groups_dictionary(tempdir, dataset_reader):
     assert (df.iloc[0] == result.to_pandas()).all().all()
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, pickle_module):
     fs, assert_opens = open_logging_fs
     _, dataset = _create_dataset_for_fragments(
@@ -1384,7 +1369,6 @@ def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, pickle_modu
         assert row_group.statistics is not None
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
 def test_fragments_parquet_pickle_no_metadata(tempdir, open_logging_fs, pickle_module):
     # https://issues.apache.org/jira/browse/ARROW-15796
@@ -1454,16 +1438,13 @@ def _create_dataset_all_types(tempdir, chunk_size=None):
     path = str(tempdir / "test_parquet_dataset_all_types")
 
     # write_to_dataset currently requires pandas
-    pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                        chunk_size=chunk_size)
+    pq.write_to_dataset(table, path, chunk_size=chunk_size)
 
     return table, ds.dataset(path, format="parquet", partitioning="hive")
 
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_parquet_fragment_statistics(tempdir):
     table, dataset = _create_dataset_all_types(tempdir)
 
@@ -1529,10 +1510,7 @@ def test_parquet_empty_row_group_statistics(tempdir):
     assert fragments[0].row_groups[0].statistics == {}
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_row_groups_predicate(tempdir):
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=2)
 
@@ -1555,10 +1533,7 @@ def test_fragments_parquet_row_groups_predicate(tempdir):
     assert len(row_group_fragments) == 0
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_row_groups_reconstruct(tempdir, dataset_reader,
                                                   pickle_module):
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=2)
@@ -1600,10 +1575,7 @@ def test_fragments_parquet_row_groups_reconstruct(tempdir, dataset_reader,
         dataset_reader.to_table(new_fragment)
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_subset_ids(tempdir, open_logging_fs,
                                       dataset_reader):
     fs, assert_opens = open_logging_fs
@@ -1631,10 +1603,7 @@ def test_fragments_parquet_subset_ids(tempdir, open_logging_fs,
     assert result.equals(table[:0])
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_subset_filter(tempdir, open_logging_fs,
                                          dataset_reader):
     fs, assert_opens = open_logging_fs
@@ -1666,10 +1635,7 @@ def test_fragments_parquet_subset_filter(tempdir, open_logging_fs,
     assert subfrag.num_row_groups == 4
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_subset_invalid(tempdir):
     _, dataset = _create_dataset_for_fragments(tempdir, chunk_size=1)
     fragment = list(dataset.get_fragments())[0]
@@ -3591,10 +3557,7 @@ def test_parquet_dataset_factory_fsspec(tempdir):
 
 @pytest.mark.parquet
 @pytest.mark.pandas  # write_to_dataset currently requires pandas
-@pytest.mark.parametrize('use_legacy_dataset', [False, True])
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
-def test_parquet_dataset_factory_roundtrip(tempdir, use_legacy_dataset):
+def test_parquet_dataset_factory_roundtrip(tempdir):
     # Simple test to ensure we can roundtrip dataset to
     # _metadata/common_metadata and back.  A more complex test
     # using partitioning will have to wait for ARROW-13269.  The
@@ -3606,7 +3569,6 @@ def test_parquet_dataset_factory_roundtrip(tempdir, use_legacy_dataset):
     metadata_collector = []
     pq.write_to_dataset(
         table, str(root_path), metadata_collector=metadata_collector,
-        use_legacy_dataset=use_legacy_dataset
     )
     metadata_path = str(root_path / '_metadata')
     # write _metadata file
@@ -3820,7 +3782,6 @@ def test_dataset_project_only_partition_columns(tempdir, dataset_reader):
 @pytest.mark.parquet
 @pytest.mark.pandas
 def test_dataset_project_null_column(tempdir, dataset_reader):
-    import pandas as pd
     df = pd.DataFrame({"col": np.array([None, None, None], dtype='object')})
 
     f = tempdir / "test_dataset_project_null_column.parquet"
@@ -3930,8 +3891,7 @@ def test_write_to_dataset_given_null_just_works(tempdir):
                       'col': list(range(4))}, schema=schema)
 
     path = str(tempdir / 'test_dataset')
-    pq.write_to_dataset(table, path, partition_cols=[
-                        'part'], use_legacy_dataset=False)
+    pq.write_to_dataset(table, path, partition_cols=['part'])
 
     actual_table = pq.read_table(tempdir / 'test_dataset')
     # column.equals can handle the difference in chunking but not the fact
@@ -3941,28 +3901,6 @@ def test_write_to_dataset_given_null_just_works(tempdir):
     assert actual_table.column('col').equals(table.column('col'))
 
 
-@pytest.mark.parquet
-@pytest.mark.pandas
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
-def test_legacy_write_to_dataset_drops_null(tempdir):
-    schema = pa.schema([
-        pa.field('col', pa.int64()),
-        pa.field('part', pa.dictionary(pa.int32(), pa.string()))
-    ])
-    table = pa.table({'part': ['a', 'a', None, None],
-                      'col': list(range(4))}, schema=schema)
-    expected = pa.table(
-        {'part': ['a', 'a'], 'col': list(range(2))}, schema=schema)
-
-    path = str(tempdir / 'test_dataset')
-    pq.write_to_dataset(table, path, partition_cols=[
-                        'part'], use_legacy_dataset=True)
-
-    actual = pq.read_table(tempdir / 'test_dataset')
-    assert actual == expected
-
-
 def _sort_table(tab, sort_col):
     import pyarrow.compute as pc
     sorted_indices = pc.sort_indices(
diff --git a/python/pyarrow/tests/test_hdfs.py b/python/pyarrow/tests/test_hdfs.py
index 511dbf9a1c4e1..5b94c200f35de 100644
--- a/python/pyarrow/tests/test_hdfs.py
+++ b/python/pyarrow/tests/test_hdfs.py
@@ -27,7 +27,7 @@
 from pyarrow.tests import util
 from pyarrow.tests.parquet.common import _test_dataframe
 from pyarrow.tests.parquet.test_dataset import (
-    _test_read_common_metadata_files, _test_write_to_dataset_with_partitions,
+    _test_write_to_dataset_with_partitions,
     _test_write_to_dataset_no_partitions
 )
 from pyarrow.util import guid
@@ -309,6 +309,9 @@ def _write_multiple_hdfs_pq_files(self, tmpdir):
         expected = pa.concat_tables(test_data)
         return expected
 
+    @pytest.mark.xfail(reason="legacy.FileSystem not supported with ParquetDataset "
+                       "due to legacy path being removed in PyArrow 15.0.0.",
+                       raises=TypeError)
     @pytest.mark.pandas
     @pytest.mark.parquet
     def test_read_multiple_parquet_files(self):
@@ -343,6 +346,9 @@ def test_read_multiple_parquet_files_with_uri(self):
             expected.to_pandas()
         )
 
+    @pytest.mark.xfail(reason="legacy.FileSystem not supported with ParquetDataset "
+                       "due to legacy path being removed in PyArrow 15.0.0.",
+                       raises=TypeError)
     @pytest.mark.pandas
     @pytest.mark.parquet
     def test_read_write_parquet_files_with_uri(self):
@@ -360,19 +366,13 @@ def test_read_write_parquet_files_with_uri(self):
 
         pq.write_table(table, path, filesystem=self.hdfs)
 
-        result = pq.read_table(
-            path, filesystem=self.hdfs, use_legacy_dataset=True
-        ).to_pandas()
+        result = pq.read_table(path, filesystem=self.hdfs).to_pandas()
 
         assert_frame_equal(result, df)
 
-    @pytest.mark.parquet
-    @pytest.mark.pandas
-    def test_read_common_metadata_files(self):
-        tmpdir = pjoin(self.tmp_path, 'common-metadata-' + guid())
-        self.hdfs.mkdir(tmpdir)
-        _test_read_common_metadata_files(self.hdfs, tmpdir)
-
+    @pytest.mark.xfail(reason="legacy.FileSystem not supported with ParquetDataset "
+                       "due to legacy path being removed in PyArrow 15.0.0.",
+                       raises=TypeError)
     @pytest.mark.parquet
     @pytest.mark.pandas
     def test_write_to_dataset_with_partitions(self):
@@ -381,6 +381,9 @@ def test_write_to_dataset_with_partitions(self):
         _test_write_to_dataset_with_partitions(
             tmpdir, filesystem=self.hdfs)
 
+    @pytest.mark.xfail(reason="legacy.FileSystem not supported with ParquetDataset "
+                       "due to legacy path being removed in PyArrow 15.0.0.",
+                       raises=TypeError)
     @pytest.mark.parquet
     @pytest.mark.pandas
     def test_write_to_dataset_no_partitions(self):

From 2abb3fb7095241300e2bb2aadd953b0f23970237 Mon Sep 17 00:00:00 2001
From: "Rossi(Ruoxi) Sun" <zanmato1984@gmail.com>
Date: Thu, 21 Dec 2023 14:14:45 -0800
Subject: [PATCH 090/570] GH-32570: [C++] Fix the issue of `ExecBatchBuilder`
 when appending consecutive tail rows with the same id may exceed buffer
 boundary (#39234)

### Rationale for this change

Addressed in https://github.com/apache/arrow/issues/32570#issuecomment-1856473812

### What changes are included in this PR?

1. Skip consecutive rows with the same id when calculating rows to skip when appending to `ExecBatchBuilder`.
2. Fix the bug that column offset is neglected when calculating rows to skip.

### Are these changes tested?

Yes. New UT included and the change is also protected by the existing case mentioned in the issue.

### Are there any user-facing changes?

No.

**This PR contains a "Critical Fix".**

Because #32570 is labeled critical, and causes a crash even when the API contract is upheld.

* Closes: #32570

Authored-by: zanmato <zanmato1984@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/light_array.cc      |  7 ++++--
 cpp/src/arrow/compute/light_array.h       |  4 +++-
 cpp/src/arrow/compute/light_array_test.cc | 26 +++++++++++++++++++++++
 3 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/compute/light_array.cc b/cpp/src/arrow/compute/light_array.cc
index 4e8b2b2d7cc3a..93a054de1957c 100644
--- a/cpp/src/arrow/compute/light_array.cc
+++ b/cpp/src/arrow/compute/light_array.cc
@@ -398,9 +398,12 @@ int ExecBatchBuilder::NumRowsToSkip(const std::shared_ptr<ArrayData>& column,
     } else {
       --num_rows_left;
       int row_id_removed = row_ids[num_rows_left];
-      const uint32_t* offsets =
-          reinterpret_cast<const uint32_t*>(column->buffers[1]->data());
+      const int32_t* offsets = column->GetValues<int32_t>(1);
       num_bytes_skipped += offsets[row_id_removed + 1] - offsets[row_id_removed];
+      // Skip consecutive rows with the same id
+      while (num_rows_left > 0 && row_id_removed == row_ids[num_rows_left - 1]) {
+        --num_rows_left;
+      }
     }
   }
 
diff --git a/cpp/src/arrow/compute/light_array.h b/cpp/src/arrow/compute/light_array.h
index 87f6b6c76a12c..84aa86d64bb62 100644
--- a/cpp/src/arrow/compute/light_array.h
+++ b/cpp/src/arrow/compute/light_array.h
@@ -416,7 +416,9 @@ class ARROW_EXPORT ExecBatchBuilder {
   // without checking buffer bounds (useful with SIMD or fixed size memory loads
   // and stores).
   //
-  // The sequence of row_ids provided must be non-decreasing.
+  // The sequence of row_ids provided must be non-decreasing. In case of consecutive rows
+  // with the same row id, they are skipped all at once because they occupy the same
+  // space.
   //
   static int NumRowsToSkip(const std::shared_ptr<ArrayData>& column, int num_rows,
                            const uint16_t* row_ids, int num_tail_bytes_to_skip);
diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc
index 4e33f7b578ea8..52121530fe91d 100644
--- a/cpp/src/arrow/compute/light_array_test.cc
+++ b/cpp/src/arrow/compute/light_array_test.cc
@@ -471,6 +471,32 @@ TEST(ExecBatchBuilder, AppendBatchesSomeRows) {
   ASSERT_EQ(0, pool->bytes_allocated());
 }
 
+TEST(ExecBatchBuilder, AppendBatchDupRows) {
+  std::unique_ptr<MemoryPool> owned_pool = MemoryPool::CreateDefault();
+  MemoryPool* pool = owned_pool.get();
+  // Case of cross-word copying for the last row, which may exceed the buffer boundary.
+  // This is a simplified case of GH-32570
+  {
+    // 64-byte data fully occupying one minimal 64-byte aligned memory region.
+    ExecBatch batch_string = JSONToExecBatch({binary()}, R"([["123456789ABCDEF0"],
+      ["123456789ABCDEF0"],
+      ["123456789ABCDEF0"],
+      ["ABCDEF0"],
+      ["123456789"]])");  // 9-byte tail row, larger than a word.
+    ASSERT_EQ(batch_string[0].array()->buffers[1]->capacity(), 64);
+    ASSERT_EQ(batch_string[0].array()->buffers[2]->capacity(), 64);
+    ExecBatchBuilder builder;
+    uint16_t row_ids[2] = {4, 4};
+    ASSERT_OK(builder.AppendSelected(pool, batch_string, 2, row_ids, /*num_cols=*/1));
+    ExecBatch built = builder.Flush();
+    ExecBatch batch_string_appended =
+        JSONToExecBatch({binary()}, R"([["123456789"], ["123456789"]])");
+    ASSERT_EQ(batch_string_appended, built);
+    ASSERT_NE(0, pool->bytes_allocated());
+  }
+  ASSERT_EQ(0, pool->bytes_allocated());
+}
+
 TEST(ExecBatchBuilder, AppendBatchesSomeCols) {
   std::unique_ptr<MemoryPool> owned_pool = MemoryPool::CreateDefault();
   MemoryPool* pool = owned_pool.get();

From 929c40bcbded7184a5f6894db208f16975de4d37 Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Fri, 22 Dec 2023 00:37:29 +0000
Subject: [PATCH 091/570] GH-39343: [C++][FS][Azure] Add client secret auth
 configuration (#39346)

### Rationale for this change
Client is a useful Azure authentication

### What changes are included in this PR?
Implement `AzureOptions::ConfigureClientSecretCredential`

### Are these changes tested?
Simple unittest

### Are there any user-facing changes?
Client secret auth is now supported on the Azure filesystem.

* Closes: #39343

Authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 10 ++++++++++
 cpp/src/arrow/filesystem/azurefs.h       |  5 +++++
 cpp/src/arrow/filesystem/azurefs_test.cc |  7 +++++++
 3 files changed, 22 insertions(+)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 27bdb5092a3ea..26c2761886050 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -113,6 +113,16 @@ Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_na
   return Status::OK();
 }
 
+Status AzureOptions::ConfigureClientSecretCredential(const std::string& account_name,
+                                                     const std::string& tenant_id,
+                                                     const std::string& client_id,
+                                                     const std::string& client_secret) {
+  credential_kind_ = CredentialKind::kTokenCredential;
+  token_credential_ = std::make_shared<Azure::Identity::ClientSecretCredential>(
+      tenant_id, client_id, client_secret);
+  return Status::OK();
+}
+
 Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
   credential_kind_ = CredentialKind::kTokenCredential;
   token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 69f6295237043..346dd349e935c 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -110,6 +110,11 @@ struct ARROW_EXPORT AzureOptions {
   Status ConfigureAccountKeyCredential(const std::string& account_name,
                                        const std::string& account_key);
 
+  Status ConfigureClientSecretCredential(const std::string& account_name,
+                                         const std::string& tenant_id,
+                                         const std::string& client_id,
+                                         const std::string& client_secret);
+
   bool Equals(const AzureOptions& other) const;
 
   std::string AccountBlobUrl(const std::string& account_name) const;
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 3266c1bfda2dc..62c5ef2232045 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -271,6 +271,13 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {
   bool WithHierarchicalNamespace() const final { return true; }
 };
 
+TEST(AzureFileSystem, InitializeFilesystemWithClientSecretCredential) {
+  AzureOptions options;
+  ARROW_EXPECT_OK(options.ConfigureClientSecretCredential(
+      "dummy-account-name", "tenant_id", "client_id", "client_secret"));
+  EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
+}
+
 TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
   AzureOptions options;
   ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));

From 51970e066e69ab01f9bdcc81219781ae07b9799b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Fri, 22 Dec 2023 02:06:50 +0100
Subject: [PATCH 092/570] GH-39006: [Python] Extract libparquet requirements
 out of libarrow_python.so to new libarrow_python_parquet_encryption.so
 (#39316)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

If I build pyarrow with everything and then I remove some of the Arrow CPP .so in order to have a minimal build I can't import pyarrow because it requires libarrow and libparquet. This is relevant in order to have a minimal build for Conda. Please see the related issue for more information.

### What changes are included in this PR?

Move libarrow parquet encryption for pyarrow to its own shared object.

### Are these changes tested?

I will run extensive CI with extra python archery tests.

### Are there any user-facing changes?

No, and yes :) There will be a new .so on pyarrow but shouldn't be relevant in my opinion.
* Closes: #39006

Lead-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/scripts/python_test.sh                     |  2 +
 ci/scripts/python_wheel_unix_test.sh          |  1 +
 ci/scripts/python_wheel_windows_test.bat      |  1 +
 python/CMakeLists.txt                         | 38 ++++++++++---------
 .../src/arrow/python/parquet_encryption.h     | 33 +++++++++++++---
 5 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 8d818346faa6e..341c2dd0577ef 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -45,6 +45,7 @@ export ARROW_DEBUG_MEMORY_POOL=trap
 : ${PYARROW_TEST_HDFS:=${ARROW_HDFS:-ON}}
 : ${PYARROW_TEST_ORC:=${ARROW_ORC:-ON}}
 : ${PYARROW_TEST_PARQUET:=${ARROW_PARQUET:-ON}}
+: ${PYARROW_TEST_PARQUET_ENCRYPTION:=${PARQUET_REQUIRE_ENCRYPTION:-ON}}
 : ${PYARROW_TEST_S3:=${ARROW_S3:-ON}}
 
 export PYARROW_TEST_ACERO
@@ -56,6 +57,7 @@ export PYARROW_TEST_GCS
 export PYARROW_TEST_HDFS
 export PYARROW_TEST_ORC
 export PYARROW_TEST_PARQUET
+export PYARROW_TEST_PARQUET_ENCRYPTION
 export PYARROW_TEST_S3
 
 # Testing PyArrow
diff --git a/ci/scripts/python_wheel_unix_test.sh b/ci/scripts/python_wheel_unix_test.sh
index a6cc3bb7b29b7..01250ff7ef40c 100755
--- a/ci/scripts/python_wheel_unix_test.sh
+++ b/ci/scripts/python_wheel_unix_test.sh
@@ -46,6 +46,7 @@ export PYARROW_TEST_HDFS=ON
 export PYARROW_TEST_ORC=ON
 export PYARROW_TEST_PANDAS=ON
 export PYARROW_TEST_PARQUET=ON
+export PYARROW_TEST_PARQUET_ENCRYPTION=ON
 export PYARROW_TEST_SUBSTRAIT=${ARROW_SUBSTRAIT}
 export PYARROW_TEST_S3=${ARROW_S3}
 export PYARROW_TEST_TENSORFLOW=ON
diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat
index c73b0cfd1b9bd..b14bfddfb36d3 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -26,6 +26,7 @@ set PYARROW_TEST_GCS=ON
 set PYARROW_TEST_HDFS=ON
 set PYARROW_TEST_ORC=OFF
 set PYARROW_TEST_PARQUET=ON
+set PYARROW_TEST_PARQUET_ENCRYPTION=ON
 set PYARROW_TEST_SUBSTRAIT=ON
 set PYARROW_TEST_S3=OFF
 set PYARROW_TEST_TENSORFLOW=ON
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 3f810d27271e5..2df1e67b9f4c7 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -332,22 +332,6 @@ if(PYARROW_BUILD_PARQUET OR PYARROW_BUILD_PARQUET_ENCRYPTION)
   find_package(Parquet REQUIRED)
 endif()
 
-if(PYARROW_BUILD_PARQUET_ENCRYPTION)
-  if(PARQUET_REQUIRE_ENCRYPTION)
-    list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
-    if(ARROW_BUILD_SHARED)
-      list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_shared)
-    else()
-      list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_static)
-    endif()
-    message(STATUS "Parquet Encryption Enabled")
-  else()
-    message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON")
-  endif()
-else()
-  message(STATUS "Parquet Encryption is NOT Enabled")
-endif()
-
 if(PYARROW_BUILD_HDFS)
   if(NOT ARROW_HDFS)
     message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
@@ -391,6 +375,26 @@ install(TARGETS arrow_python
         LIBRARY DESTINATION .
         RUNTIME DESTINATION .)
 
+set(PYARROW_CPP_ENCRYPTION_SRCS ${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
+if(NOT PYARROW_BUILD_PARQUET_ENCRYPTION)
+  message(STATUS "Parquet Encryption is NOT Enabled")
+else()
+  if(PARQUET_REQUIRE_ENCRYPTION)
+    add_library(arrow_python_parquet_encryption SHARED ${PYARROW_CPP_ENCRYPTION_SRCS})
+    target_link_libraries(arrow_python_parquet_encryption PUBLIC arrow_python
+                                                                 ${PARQUET_LINK_LIBS})
+    target_compile_definitions(arrow_python_parquet_encryption
+                               PRIVATE ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
+    install(TARGETS arrow_python_parquet_encryption
+            ARCHIVE DESTINATION .
+            LIBRARY DESTINATION .
+            RUNTIME DESTINATION .)
+    message(STATUS "Parquet Encryption Enabled")
+  else()
+    message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON")
+  endif()
+endif()
+
 set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc)
 if(PYARROW_BUILD_FLIGHT)
   if(NOT ARROW_FLIGHT)
@@ -814,6 +818,6 @@ endif()
 if(PYARROW_BUILD_PARQUET)
   target_link_libraries(_parquet PRIVATE ${PARQUET_LINK_LIBS})
   if(PYARROW_BUILD_PARQUET_ENCRYPTION)
-    target_link_libraries(_parquet_encryption PRIVATE ${PARQUET_LINK_LIBS})
+    target_link_libraries(_parquet_encryption PRIVATE arrow_python_parquet_encryption)
   endif()
 endif()
diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/arrow/python/parquet_encryption.h
index 23ee478348ecd..a1aaa30e260f5 100644
--- a/python/pyarrow/src/arrow/python/parquet_encryption.h
+++ b/python/pyarrow/src/arrow/python/parquet_encryption.h
@@ -26,6 +26,27 @@
 #include "parquet/encryption/kms_client.h"
 #include "parquet/encryption/kms_client_factory.h"
 
+#if defined(_WIN32) || defined(__CYGWIN__)  // Windows
+#if defined(_MSC_VER)
+#pragma warning(disable : 4251)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#ifdef ARROW_PYTHON_STATIC
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#elif defined(ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllexport)
+#else
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllimport)
+#endif
+
+#else  // Not Windows
+#ifndef ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __attribute__((visibility("default")))
+#endif
+#endif  // Non-Windows
+
 namespace arrow {
 namespace py {
 namespace parquet {
@@ -33,7 +54,7 @@ namespace encryption {
 
 /// \brief A table of function pointers for calling from C++ into
 /// Python.
-class ARROW_PYTHON_EXPORT PyKmsClientVtable {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientVtable {
  public:
   std::function<void(PyObject*, const std::string& key_bytes,
                      const std::string& master_key_identifier, std::string* out)>
@@ -44,7 +65,8 @@ class ARROW_PYTHON_EXPORT PyKmsClientVtable {
 };
 
 /// \brief A helper for KmsClient implementation in Python.
-class ARROW_PYTHON_EXPORT PyKmsClient : public ::parquet::encryption::KmsClient {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClient
+    : public ::parquet::encryption::KmsClient {
  public:
   PyKmsClient(PyObject* handler, PyKmsClientVtable vtable);
   ~PyKmsClient() override;
@@ -62,7 +84,7 @@ class ARROW_PYTHON_EXPORT PyKmsClient : public ::parquet::encryption::KmsClient
 
 /// \brief A table of function pointers for calling from C++ into
 /// Python.
-class ARROW_PYTHON_EXPORT PyKmsClientFactoryVtable {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientFactoryVtable {
  public:
   std::function<void(
       PyObject*, const ::parquet::encryption::KmsConnectionConfig& kms_connection_config,
@@ -71,7 +93,7 @@ class ARROW_PYTHON_EXPORT PyKmsClientFactoryVtable {
 };
 
 /// \brief A helper for KmsClientFactory implementation in Python.
-class ARROW_PYTHON_EXPORT PyKmsClientFactory
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientFactory
     : public ::parquet::encryption::KmsClientFactory {
  public:
   PyKmsClientFactory(PyObject* handler, PyKmsClientFactoryVtable vtable);
@@ -86,7 +108,8 @@ class ARROW_PYTHON_EXPORT PyKmsClientFactory
 };
 
 /// \brief A CryptoFactory that returns Results instead of throwing exceptions.
-class ARROW_PYTHON_EXPORT PyCryptoFactory : public ::parquet::encryption::CryptoFactory {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyCryptoFactory
+    : public ::parquet::encryption::CryptoFactory {
  public:
   arrow::Result<std::shared_ptr<::parquet::FileEncryptionProperties>>
   SafeGetFileEncryptionProperties(

From cd5a1bd259a95eb9342569fb01d41a5924aec30f Mon Sep 17 00:00:00 2001
From: Ravjot Brar <83892020+ravjotbrar@users.noreply.github.com>
Date: Fri, 22 Dec 2023 07:03:32 -0800
Subject: [PATCH 093/570] GH-39014: [Java] Add default truststore along with
 KeychainStore when on Mac system (#39235)

### Rationale for this change
As described in #39014, when using the system TrustStore on Mac, the certificates returned do not include Root CAs trusted by the system. This change adds the default KeyStore instance along with the KeyChainStore to include trusted Root CAs. The reason we add the default KeyStore instance is because there is no easy way to get the certificates from the System Roots keychain.

### What changes are included in this PR?

I've updated ClientAuthenticationUtils to get the default KeyStore instance when the operating system is macOS and have updated the tests to include this change.

### Are these changes tested?

See changes made in ClientAuthenticationUtilsTest.java.

### Are there any user-facing changes?

No

* Closes: #39014

Authored-by: Ravjot Brar <ravjot.brar@dremio.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../utils/ClientAuthenticationUtils.java      | 21 ++++++----
 .../utils/ClientAuthenticationUtilsTest.java  | 42 +++++++++++++++++--
 2 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java
index d50dc385a62e1..ffb0048181c7c 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java
@@ -115,6 +115,16 @@ static KeyStore getKeyStoreInstance(String instance)
     return keyStore;
   }
 
+  @VisibleForTesting
+  static KeyStore getDefaultKeyStoreInstance(String password)
+      throws KeyStoreException, CertificateException, NoSuchAlgorithmException, IOException {
+    try (InputStream fileInputStream = getKeystoreInputStream()) {
+      KeyStore keyStore = KeyStore.getInstance(KeyStore.getDefaultType());
+      keyStore.load(fileInputStream, password == null ? null : password.toCharArray());
+      return keyStore;
+    }
+  }
+
   static String getOperatingSystem() {
     return System.getProperty("os.name");
   }
@@ -156,16 +166,9 @@ public static InputStream getCertificateInputStreamFromSystem(String password) t
       keyStoreList.add(getKeyStoreInstance("Windows-MY"));
     } else if (isMac()) {
       keyStoreList.add(getKeyStoreInstance("KeychainStore"));
+      keyStoreList.add(getDefaultKeyStoreInstance(password));
     } else {
-      try (InputStream fileInputStream = getKeystoreInputStream()) {
-        KeyStore keyStore = KeyStore.getInstance(KeyStore.getDefaultType());
-        if (password == null) {
-          keyStore.load(fileInputStream, null);
-        } else {
-          keyStore.load(fileInputStream, password.toCharArray());
-        }
-        keyStoreList.add(keyStore);
-      }
+      keyStoreList.add(getDefaultKeyStoreInstance(password));
     }
 
     return getCertificatesInputStream(keyStoreList);
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
index 27bba64587367..b7977462e9c01 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
@@ -77,6 +77,33 @@ public void testGetKeyStoreInstance() throws IOException,
     }
   }
 
+  @Test
+  public void testGetDefaultKeyStoreInstancePassword() throws IOException,
+          KeyStoreException, CertificateException, NoSuchAlgorithmException {
+    try (MockedStatic<KeyStore> keyStoreMockedStatic = Mockito.mockStatic(KeyStore.class)) {
+
+      keyStoreMockedStatic
+         .when(() -> ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit"))
+         .thenReturn(keyStoreMock);
+      KeyStore receiveKeyStore = ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit");
+      Assert.assertEquals(receiveKeyStore, keyStoreMock);
+    }
+  }
+
+  @Test
+  public void testGetDefaultKeyStoreInstanceNoPassword() throws IOException,
+          KeyStoreException, CertificateException, NoSuchAlgorithmException {
+    try (MockedStatic<KeyStore> keyStoreMockedStatic = Mockito.mockStatic(KeyStore.class)) {
+
+      keyStoreMockedStatic
+          .when(() -> ClientAuthenticationUtils.getDefaultKeyStoreInstance(null))
+          .thenReturn(keyStoreMock);
+      KeyStore receiveKeyStore = ClientAuthenticationUtils.getDefaultKeyStoreInstance(null);
+      Assert.assertEquals(receiveKeyStore, keyStoreMock);
+    }
+  }
+
+
   @Test
   public void testGetCertificateInputStreamFromMacSystem() throws IOException,
       KeyStoreException, CertificateException, NoSuchAlgorithmException {
@@ -90,11 +117,18 @@ public void testGetCertificateInputStreamFromMacSystem() throws IOException,
       keyStoreMockedStatic.when(() -> ClientAuthenticationUtils
           .getKeyStoreInstance("KeychainStore"))
           .thenReturn(keyStoreMock);
+      keyStoreMockedStatic.when(() -> ClientAuthenticationUtils
+          .getDefaultKeyStoreInstance("changeit"))
+          .thenReturn(keyStoreMock);
+      clientAuthenticationUtilsMockedStatic
+          .when(ClientAuthenticationUtils::getKeystoreInputStream)
+          .thenCallRealMethod();
+      keyStoreMockedStatic.when(KeyStore::getDefaultType).thenCallRealMethod();
       keyStoreMockedStatic.when(() -> ClientAuthenticationUtils
           .getCertificatesInputStream(Mockito.any()))
           .thenReturn(mock);
 
-      InputStream inputStream = ClientAuthenticationUtils.getCertificateInputStreamFromSystem("test");
+      InputStream inputStream = ClientAuthenticationUtils.getCertificateInputStreamFromSystem("changeit");
       Assert.assertEquals(inputStream, mock);
     }
   }
@@ -136,9 +170,11 @@ public void testGetCertificateInputStreamFromLinuxSystem() throws IOException,
 
       setOperatingSystemMock(clientAuthenticationUtilsMockedStatic, false, false);
       keyStoreMockedStatic.when(() -> ClientAuthenticationUtils
-              .getCertificatesInputStream(Mockito.any()))
+          .getCertificatesInputStream(Mockito.any()))
           .thenReturn(mock);
-
+      keyStoreMockedStatic.when(() -> ClientAuthenticationUtils
+          .getDefaultKeyStoreInstance(Mockito.any()))
+          .thenReturn(keyStoreMock);
       clientAuthenticationUtilsMockedStatic
           .when(ClientAuthenticationUtils::getKeystoreInputStream)
           .thenCallRealMethod();

From a4a3d3f4825eb025657121e70c9d86e8d6ecff35 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Fri, 22 Dec 2023 23:17:58 +0800
Subject: [PATCH 094/570] GH-39265: [Java] Make it run well with the netty
 newest version 4.1.104 (#39266)

### Describe the enhancement requested

When I used `netty arrow memory 14.0.1` and `netty 4.1.104.Final` in Spark, the following error occurred,
After pr: https://github.com/netty/netty/pull/13613, `PoolArena` no longer extends `SizeClasses`, but instead uses it as one of its fields, as follows:
<img width="1051" alt="image" src="https://github.com/apache/arrow/assets/15246973/6112757b-b2b7-42aa-b4c1-6ab473b91a09">
in order to ensure that `netty arrow memory 14.0.1` works well with `netty 4.1.104.Final` version, I suggest making similar modifications here.
1.Compilation errors are as follows:
https://github.com/panbingkun/spark/actions/runs/7237466030/job/19717162391
<img width="1005" alt="image" src="https://github.com/apache/arrow/assets/15246973/98edb6a1-f0e6-4d4e-b568-fbdbffe612f0">

2.Some bugs have been fixed in `netty 4.1.104.Final` as follows:
<img width="862" alt="image" src="https://github.com/apache/arrow/assets/15246973/12354a1e-cddd-4ab8-b168-e92712d84cea">
<img width="861" alt="image" src="https://github.com/apache/arrow/assets/15246973/bd7d27e1-3953-451c-8c9b-24ecb0d61efd">

4.1.104.Final release note: https://netty.io/news/2023/12/15/4-1-104-Final.html
4.1.103.Final release note: https://netty.io/news/2023/12/13/4-1-103-Final.html
4.1.101.Final release note: https://netty.io/news/2023/11/09/4-1-101-Final.html

### Component(s)

Java
* Closes: #39265

Authored-by: panbingkun <pbk1982@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../main/java/io/netty/buffer/PooledByteBufAllocatorL.java | 7 ++-----
 java/pom.xml                                               | 2 +-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java b/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
index 06c6669cfd162..ba9aba353c351 100644
--- a/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
+++ b/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
@@ -71,7 +71,7 @@ public UnsafeDirectLittleEndian allocate(long size) {
   }
 
   public int getChunkSize() {
-    return allocator.chunkSize;
+    return allocator.chunkSize();
   }
 
   public long getHugeBufferSize() {
@@ -137,7 +137,6 @@ private class InnerAllocator extends PooledByteBufAllocator {
 
     private final PoolArena<ByteBuffer>[] directArenas;
     private final MemoryStatusThread statusThread;
-    private final int chunkSize;
 
     public InnerAllocator() {
       super(true);
@@ -150,8 +149,6 @@ public InnerAllocator() {
         throw new RuntimeException("Failure while initializing allocator.  Unable to retrieve direct arenas field.", e);
       }
 
-      this.chunkSize = directArenas[0].chunkSize;
-
       if (memoryLogger.isTraceEnabled()) {
         statusThread = new MemoryStatusThread(this);
         statusThread.start();
@@ -166,7 +163,7 @@ private UnsafeDirectLittleEndian newDirectBufferL(int initialCapacity, int maxCa
 
       if (directArena != null) {
 
-        if (initialCapacity > directArena.chunkSize) {
+        if (initialCapacity > chunkSize()) {
           // This is beyond chunk size so we'll allocate separately.
           ByteBuf buf = UnpooledByteBufAllocator.DEFAULT.directBuffer(initialCapacity, maxCapacity);
 
diff --git a/java/pom.xml b/java/pom.xml
index 75e0946f10811..4cca5e7245f0f 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -33,7 +33,7 @@
     <dep.junit.jupiter.version>5.10.1</dep.junit.jupiter.version>
     <dep.slf4j.version>2.0.9</dep.slf4j.version>
     <dep.guava-bom.version>32.1.3-jre</dep.guava-bom.version>
-    <dep.netty-bom.version>4.1.100.Final</dep.netty-bom.version>
+    <dep.netty-bom.version>4.1.104.Final</dep.netty-bom.version>
     <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.16.0</dep.jackson-bom.version>

From 87971df049c09671bae8a207fe2b29704fe21e8d Mon Sep 17 00:00:00 2001
From: John Garland <johnnyg@users.noreply.github.com>
Date: Sat, 23 Dec 2023 04:27:20 +1100
Subject: [PATCH 095/570] GH-39335: [C#] Support creating FlightClient with
 Grpc.Core.Channel (#39348)

as well as Grpc.Net.Client.GrpcChannel by changing our constructor arg to Grpc.Core.ChannelBase which both classes inherit from.

### Rationale for this change

### What changes are included in this PR?

Changing the constructor of C#'s Flight Client to take in a ChannelBase which allows for multiple implementations of gRPC channels to be passed in.

### Are these changes tested?

Existing tests already cover the use but have also manually tested in a separate app (

### Are there any user-facing changes?

No as we're just changing the constructor to take in a parent/base class instead.
* Closes: #39335

Authored-by: John Garland <johnnybg@gmail.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
index 5dc0d1b434b6d..a7c459935c240 100644
--- a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
+++ b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
@@ -16,10 +16,8 @@
 using System.Threading.Tasks;
 using Apache.Arrow.Flight.Internal;
 using Apache.Arrow.Flight.Protocol;
-using Apache.Arrow.Flight.Server;
 using Apache.Arrow.Flight.Server.Internal;
 using Grpc.Core;
-using Grpc.Net.Client;
 
 namespace Apache.Arrow.Flight.Client
 {
@@ -29,7 +27,7 @@ public class FlightClient
 
         private readonly FlightService.FlightServiceClient _client;
 
-        public FlightClient(GrpcChannel grpcChannel)
+        public FlightClient(ChannelBase grpcChannel)
         {
             _client = new FlightService.FlightServiceClient(grpcChannel);
         }

From 7b71156d99557168d46292c010f82b812947ffb8 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Fri, 22 Dec 2023 17:02:31 -0400
Subject: [PATCH 096/570] GH-39138: [R] Fix implicit conversion warnings
 (#39250)

### Rationale for this change

We have failing CRAN checks because this warning occurs on one check machine.

### What changes are included in this PR?

Implicit integer casts are made explicit and/or variable declarations were fixed so that fewer implicit integer casts were performed. Fully solving the warnings also requires https://github.com/r-lib/cpp11/pull/349 since some errors occur in those headers.

### Are these changes tested?

This particular test we can't do on CI because the MacOS runner we have doesn't have a new enough `clang` to support the requisite `-W` flags. I tested this locally by adding `PKG_CXXFLAGS=-Wconversion -Wno-sign-conversion -Wsign-compare -Werror` to `Makevars.in`.

### Are there any user-facing changes?

No
* Closes: #39138

Authored-by: Dewey Dunnington <dewey@fishandwhistle.net>
Signed-off-by: Dewey Dunnington <dewey@fishandwhistle.net>
---
 r/src/altrep.cpp          | 56 +++++++++++++++++++----------
 r/src/array.cpp           | 18 ++++++----
 r/src/array_to_vector.cpp | 14 ++++----
 r/src/arraydata.cpp       | 12 +++----
 r/src/arrowExports.cpp    | 76 +++++++++++++++++++--------------------
 r/src/arrow_cpp11.h       | 14 +++++++-
 r/src/arrow_types.h       |  4 +--
 r/src/chunkedarray.cpp    |  5 ++-
 r/src/compression.cpp     |  2 +-
 r/src/compute.cpp         | 15 ++++----
 r/src/dataset.cpp         |  4 +--
 r/src/datatype.cpp        |  2 +-
 r/src/io.cpp              | 11 ++++--
 r/src/message.cpp         |  4 +--
 r/src/r_to_arrow.cpp      | 18 +++++-----
 r/src/recordbatch.cpp     | 14 ++++----
 r/src/schema.cpp          |  4 +--
 r/src/table.cpp           | 16 ++++-----
 18 files changed, 165 insertions(+), 124 deletions(-)

diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
index 9745393d01bbc..bdaac0a9ce5d2 100644
--- a/r/src/altrep.cpp
+++ b/r/src/altrep.cpp
@@ -275,7 +275,8 @@ struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sex
     auto altrep_data =
         reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
     auto resolve = altrep_data->locate(i);
-    const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
+    const auto& array =
+        altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
     auto j = resolve.index_in_chunk;
 
     return array->IsNull(j) ? cpp11::na<c_type>()
@@ -466,10 +467,10 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
       std::unique_ptr<arrow::DictionaryUnifier> unifier_ =
           ValueOrStop(DictionaryUnifier::Make(arr_type.value_type()));
 
-      size_t n_arrays = chunked_array->num_chunks();
+      int n_arrays = chunked_array->num_chunks();
       BufferVector arrays_transpose(n_arrays);
 
-      for (size_t i = 0; i < n_arrays; i++) {
+      for (int i = 0; i < n_arrays; i++) {
         const auto& dict_i =
             *internal::checked_cast<const DictionaryArray&>(*chunked_array->chunk(i))
                  .dictionary();
@@ -559,17 +560,14 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
     return dup;
   }
 
-  // The value at position i
-  static int Elt(SEXP alt, R_xlen_t i) {
-    if (Base::IsMaterialized(alt)) {
-      return INTEGER_ELT(Representation(alt), i);
-    }
-
+  // The value at position i as an int64_t (to make bounds checking less verbose)
+  static int64_t Elt64(SEXP alt, R_xlen_t i) {
     auto altrep_data =
         reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
     auto resolve = altrep_data->locate(i);
 
-    const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
+    const auto& array =
+        altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
     auto j = resolve.index_in_chunk;
 
     if (!array->IsNull(j)) {
@@ -578,7 +576,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
 
       if (WasUnified(alt)) {
         const auto* transpose_data = reinterpret_cast<const int32_t*>(
-            GetArrayTransposed(alt, resolve.chunk_index)->data());
+            GetArrayTransposed(alt, static_cast<int>(resolve.chunk_index))->data());
 
         switch (indices->type_id()) {
           case Type::UINT8:
@@ -617,7 +615,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
           case Type::INT64:
             return indices->data()->GetValues<int64_t>(1)[j] + 1;
           case Type::UINT64:
-            return indices->data()->GetValues<uint64_t>(1)[j] + 1;
+            return static_cast<int64_t>(indices->data()->GetValues<uint64_t>(1)[j] + 1);
           default:
             break;
         }
@@ -628,6 +626,18 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
     return NA_INTEGER;
   }
 
+  // The value at position i as an int (which R needs because this is a factor)
+  static int Elt(SEXP alt, R_xlen_t i) {
+    if (Base::IsMaterialized(alt)) {
+      return INTEGER_ELT(Representation(alt), i);
+    }
+
+    int64_t elt64 = Elt64(alt, i);
+    ARROW_R_DCHECK(elt64 == NA_INTEGER || elt64 >= 1);
+    ARROW_R_DCHECK(elt64 <= std::numeric_limits<int>::max());
+    return static_cast<int>(elt64);
+  }
+
   static R_xlen_t Get_region(SEXP alt, R_xlen_t start, R_xlen_t n, int* buf) {
     // If we have data2, we can just copy the region into buf
     // using the standard Get_region for this R type
@@ -667,7 +677,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
         // using the transpose data for this chunk
         const auto* transpose_data =
             reinterpret_cast<const int32_t*>(GetArrayTransposed(alt, j)->data());
-        auto transpose = [transpose_data](int x) { return transpose_data[x]; };
+        auto transpose = [transpose_data](int64_t x) { return transpose_data[x]; };
 
         GetRegionDispatch(array, indices, transpose, out);
 
@@ -677,7 +687,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
 
     } else {
       // simpler case, identity transpose
-      auto transpose = [](int x) { return x; };
+      auto transpose = [](int64_t x) { return static_cast<int>(x); };
 
       int* out = buf;
       for (const auto& array : slice->chunks()) {
@@ -718,7 +728,13 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
 
     VisitArraySpanInline<Type>(
         *array->data(),
-        /*valid_func=*/[&](index_type index) { *out++ = transpose(index) + 1; },
+        /*valid_func=*/
+        [&](index_type index) {
+          int64_t transposed = transpose(index) + 1;
+          ARROW_R_DCHECK(transposed >= 1);
+          ARROW_R_DCHECK(transposed <= std::numeric_limits<int>::max());
+          *out++ = static_cast<int>(transposed);
+        },
         /*null_func=*/[&]() { *out++ = cpp11::na<int>(); });
   }
 
@@ -765,7 +781,8 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
       bool no_nul = std::find(view_.begin(), view_.end(), '\0') == view_.end();
 
       if (no_nul) {
-        return Rf_mkCharLenCE(view_.data(), view_.size(), CE_UTF8);
+        ARROW_R_DCHECK(view_.size() <= std::numeric_limits<int>::max());
+        return Rf_mkCharLenCE(view_.data(), static_cast<int>(view_.size()), CE_UTF8);
       } else if (strip_out_nuls_) {
         return ConvertStripNul();
       } else {
@@ -802,7 +819,9 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
       }
 
       nul_was_stripped_ = true;
-      return Rf_mkCharLenCE(stripped_string_.data(), stripped_len, CE_UTF8);
+      ARROW_R_DCHECK(stripped_len <= std::numeric_limits<int>::max());
+      return Rf_mkCharLenCE(stripped_string_.data(), static_cast<int>(stripped_len),
+                            CE_UTF8);
     }
 
     bool nul_was_stripped() const { return nul_was_stripped_; }
@@ -847,7 +866,8 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
     auto altrep_data =
         reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
     auto resolve = altrep_data->locate(i);
-    const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
+    const auto& array =
+        altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
     auto j = resolve.index_in_chunk;
 
     SEXP s = NA_STRING;
diff --git a/r/src/array.cpp b/r/src/array.cpp
index ae76c01a94910..38406e494d67b 100644
--- a/r/src/array.cpp
+++ b/r/src/array.cpp
@@ -92,7 +92,7 @@ std::shared_ptr<arrow::Array> Array__Slice2(const std::shared_ptr<arrow::Array>&
   return array->Slice(offset, length);
 }
 
-void arrow::r::validate_index(int i, int len) {
+void arrow::r::validate_index(int64_t i, int64_t len) {
   if (i == NA_INTEGER) {
     cpp11::stop("'i' cannot be NA");
   }
@@ -119,10 +119,14 @@ r_vec_size Array__length(const std::shared_ptr<arrow::Array>& x) {
 }
 
 // [[arrow::export]]
-int Array__offset(const std::shared_ptr<arrow::Array>& x) { return x->offset(); }
+r_vec_size Array__offset(const std::shared_ptr<arrow::Array>& x) {
+  return r_vec_size(x->offset());
+}
 
 // [[arrow::export]]
-int Array__null_count(const std::shared_ptr<arrow::Array>& x) { return x->null_count(); }
+r_vec_size Array__null_count(const std::shared_ptr<arrow::Array>& x) {
+  return r_vec_size(x->null_count());
+}
 
 // [[arrow::export]]
 std::shared_ptr<arrow::DataType> Array__type(const std::shared_ptr<arrow::Array>& x) {
@@ -263,9 +267,9 @@ r_vec_size LargeListArray__value_length(
 }
 
 // [[arrow::export]]
-r_vec_size FixedSizeListArray__value_length(
+int FixedSizeListArray__value_length(
     const std::shared_ptr<arrow::FixedSizeListArray>& array, int64_t i) {
-  return r_vec_size(array->value_length(i));
+  return array->value_length(i);
 }
 
 // [[arrow::export]]
@@ -294,10 +298,10 @@ cpp11::writable::integers ListArray__raw_value_offsets(
 }
 
 // [[arrow::export]]
-cpp11::writable::integers LargeListArray__raw_value_offsets(
+cpp11::writable::doubles LargeListArray__raw_value_offsets(
     const std::shared_ptr<arrow::LargeListArray>& array) {
   auto offsets = array->raw_value_offsets();
-  return cpp11::writable::integers(offsets, offsets + array->length());
+  return cpp11::writable::doubles(offsets, offsets + array->length());
 }
 
 // [[arrow::export]]
diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp
index bf026d2723a1a..2f0508eb7a47a 100644
--- a/r/src/array_to_vector.cpp
+++ b/r/src/array_to_vector.cpp
@@ -375,7 +375,7 @@ struct Converter_String : public Converter {
 
  private:
   static SEXP r_string_from_view(std::string_view view) {
-    return Rf_mkCharLenCE(view.data(), view.size(), CE_UTF8);
+    return Rf_mkCharLenCE(view.data(), static_cast<int>(view.size()), CE_UTF8);
   }
 
   static SEXP r_string_from_view_strip_nul(std::string_view view,
@@ -576,10 +576,10 @@ class Converter_Dictionary : public Converter {
       const auto& arr_type = checked_cast<const DictionaryType&>(*chunked_array->type());
       unifier_ = ValueOrStop(DictionaryUnifier::Make(arr_type.value_type()));
 
-      size_t n_arrays = chunked_array->num_chunks();
+      int n_arrays = chunked_array->num_chunks();
       arrays_transpose_.resize(n_arrays);
 
-      for (size_t i = 0; i < n_arrays; i++) {
+      for (int i = 0; i < n_arrays; i++) {
         const auto& dict_i =
             *checked_cast<const DictionaryArray&>(*chunked_array->chunk(i)).dictionary();
         StopIfNotOk(unifier_->Unify(dict_i, &arrays_transpose_[i]));
@@ -748,7 +748,7 @@ class Converter_Struct : public Converter {
     auto colnames = arrow::r::to_r_strings(
         type->fields(),
         [](const std::shared_ptr<Field>& field) { return field->name(); });
-    out.attr(symbols::row_names) = arrow::r::short_row_names(n);
+    out.attr(symbols::row_names) = arrow::r::short_row_names(static_cast<int>(n));
     out.attr(R_NamesSymbol) = colnames;
     out.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;
 
@@ -756,7 +756,7 @@ class Converter_Struct : public Converter {
   }
 
   Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
-    int nf = converters.size();
+    int nf = static_cast<int>(converters.size());
     for (int i = 0; i < nf; i++) {
       SEXP data_i = VECTOR_ELT(data, i);
 
@@ -771,7 +771,7 @@ class Converter_Struct : public Converter {
   Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
                            R_xlen_t start, R_xlen_t n, size_t chunk_index) const {
     auto struct_array = checked_cast<const arrow::StructArray*>(array.get());
-    int nf = converters.size();
+    int nf = static_cast<int>(converters.size());
     // Flatten() deals with merging of nulls
     auto arrays = ValueOrStop(struct_array->Flatten(gc_memory_pool()));
     for (int i = 0; i < nf; i++) {
@@ -1384,7 +1384,7 @@ cpp11::writable::list to_data_frame(const std::shared_ptr<Rectangle>& data,
 
   tbl.attr(R_NamesSymbol) = names;
   tbl.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;
-  tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(nr);
+  tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(static_cast<int>(nr));
 
   return tbl;
 }
diff --git a/r/src/arraydata.cpp b/r/src/arraydata.cpp
index cdab38f1147aa..d879e807323af 100644
--- a/r/src/arraydata.cpp
+++ b/r/src/arraydata.cpp
@@ -26,18 +26,18 @@ std::shared_ptr<arrow::DataType> ArrayData__get_type(
 }
 
 // [[arrow::export]]
-int ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x) {
-  return x->length;
+r_vec_size ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x) {
+  return r_vec_size(x->length);
 }
 
 // [[arrow::export]]
-int ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x) {
-  return x->null_count;
+r_vec_size ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x) {
+  return r_vec_size(x->null_count);
 }
 
 // [[arrow::export]]
-int ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x) {
-  return x->offset;
+r_vec_size ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x) {
+  return r_vec_size(x->offset);
 }
 
 // [[arrow::export]]
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 790207efce1d2..75e0f27b4002e 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -110,7 +110,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // array.cpp
-int Array__offset(const std::shared_ptr<arrow::Array>& x);
+r_vec_size Array__offset(const std::shared_ptr<arrow::Array>& x);
 extern "C" SEXP _arrow_Array__offset(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Array>&>::type x(x_sexp);
@@ -118,7 +118,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // array.cpp
-int Array__null_count(const std::shared_ptr<arrow::Array>& x);
+r_vec_size Array__null_count(const std::shared_ptr<arrow::Array>& x);
 extern "C" SEXP _arrow_Array__null_count(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Array>&>::type x(x_sexp);
@@ -315,7 +315,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // array.cpp
-r_vec_size FixedSizeListArray__value_length(const std::shared_ptr<arrow::FixedSizeListArray>& array, int64_t i);
+int FixedSizeListArray__value_length(const std::shared_ptr<arrow::FixedSizeListArray>& array, int64_t i);
 extern "C" SEXP _arrow_FixedSizeListArray__value_length(SEXP array_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::FixedSizeListArray>&>::type array(array_sexp);
@@ -359,7 +359,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // array.cpp
-cpp11::writable::integers LargeListArray__raw_value_offsets(const std::shared_ptr<arrow::LargeListArray>& array);
+cpp11::writable::doubles LargeListArray__raw_value_offsets(const std::shared_ptr<arrow::LargeListArray>& array);
 extern "C" SEXP _arrow_LargeListArray__raw_value_offsets(SEXP array_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::LargeListArray>&>::type array(array_sexp);
@@ -467,7 +467,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // arraydata.cpp
-int ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x);
+r_vec_size ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x);
 extern "C" SEXP _arrow_ArrayData__get_length(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::ArrayData>&>::type x(x_sexp);
@@ -475,7 +475,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // arraydata.cpp
-int ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x);
+r_vec_size ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x);
 extern "C" SEXP _arrow_ArrayData__get_null_count(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::ArrayData>&>::type x(x_sexp);
@@ -483,7 +483,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // arraydata.cpp
-int ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x);
+r_vec_size ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x);
 extern "C" SEXP _arrow_ArrayData__get_offset(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::ArrayData>&>::type x(x_sexp);
@@ -765,7 +765,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // chunkedarray.cpp
-r_vec_size ChunkedArray__num_chunks(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
+int ChunkedArray__num_chunks(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
 extern "C" SEXP _arrow_ChunkedArray__num_chunks(SEXP chunked_array_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::ChunkedArray>&>::type chunked_array(chunked_array_sexp);
@@ -869,11 +869,11 @@ BEGIN_CPP11
 END_CPP11
 }
 // compression.cpp
-std::shared_ptr<arrow::util::Codec> util___Codec__Create(arrow::Compression::type codec, R_xlen_t compression_level);
+std::shared_ptr<arrow::util::Codec> util___Codec__Create(arrow::Compression::type codec, int compression_level);
 extern "C" SEXP _arrow_util___Codec__Create(SEXP codec_sexp, SEXP compression_level_sexp){
 BEGIN_CPP11
 	arrow::r::Input<arrow::Compression::type>::type codec(codec_sexp);
-	arrow::r::Input<R_xlen_t>::type compression_level(compression_level_sexp);
+	arrow::r::Input<int>::type compression_level(compression_level_sexp);
 	return cpp11::as_sexp(util___Codec__Create(codec, compression_level));
 END_CPP11
 }
@@ -2024,14 +2024,14 @@ extern "C" SEXP _arrow_dataset___JsonFragmentScanOptions__Make(SEXP parse_option
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::ParquetFragmentScanOptions> dataset___ParquetFragmentScanOptions__Make(bool use_buffered_stream, int64_t buffer_size, bool pre_buffer, int64_t thrift_string_size_limit, int64_t thrift_container_size_limit);
+std::shared_ptr<ds::ParquetFragmentScanOptions> dataset___ParquetFragmentScanOptions__Make(bool use_buffered_stream, int64_t buffer_size, bool pre_buffer, int32_t thrift_string_size_limit, int32_t thrift_container_size_limit);
 extern "C" SEXP _arrow_dataset___ParquetFragmentScanOptions__Make(SEXP use_buffered_stream_sexp, SEXP buffer_size_sexp, SEXP pre_buffer_sexp, SEXP thrift_string_size_limit_sexp, SEXP thrift_container_size_limit_sexp){
 BEGIN_CPP11
 	arrow::r::Input<bool>::type use_buffered_stream(use_buffered_stream_sexp);
 	arrow::r::Input<int64_t>::type buffer_size(buffer_size_sexp);
 	arrow::r::Input<bool>::type pre_buffer(pre_buffer_sexp);
-	arrow::r::Input<int64_t>::type thrift_string_size_limit(thrift_string_size_limit_sexp);
-	arrow::r::Input<int64_t>::type thrift_container_size_limit(thrift_container_size_limit_sexp);
+	arrow::r::Input<int32_t>::type thrift_string_size_limit(thrift_string_size_limit_sexp);
+	arrow::r::Input<int32_t>::type thrift_container_size_limit(thrift_container_size_limit_sexp);
 	return cpp11::as_sexp(dataset___ParquetFragmentScanOptions__Make(use_buffered_stream, buffer_size, pre_buffer, thrift_string_size_limit, thrift_container_size_limit));
 END_CPP11
 }
@@ -2567,10 +2567,10 @@ BEGIN_CPP11
 END_CPP11
 }
 // datatype.cpp
-std::shared_ptr<arrow::DataType> FixedSizeBinary__initialize(R_xlen_t byte_width);
+std::shared_ptr<arrow::DataType> FixedSizeBinary__initialize(int32_t byte_width);
 extern "C" SEXP _arrow_FixedSizeBinary__initialize(SEXP byte_width_sexp){
 BEGIN_CPP11
-	arrow::r::Input<R_xlen_t>::type byte_width(byte_width_sexp);
+	arrow::r::Input<int32_t>::type byte_width(byte_width_sexp);
 	return cpp11::as_sexp(FixedSizeBinary__initialize(byte_width));
 END_CPP11
 }
@@ -3976,7 +3976,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // message.cpp
-r_vec_size ipc___Message__Verify(const std::unique_ptr<arrow::ipc::Message>& message);
+bool ipc___Message__Verify(const std::unique_ptr<arrow::ipc::Message>& message);
 extern "C" SEXP _arrow_ipc___Message__Verify(SEXP message_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::unique_ptr<arrow::ipc::Message>&>::type message(message_sexp);
@@ -4684,7 +4684,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // recordbatch.cpp
-r_vec_size RecordBatch__num_columns(const std::shared_ptr<arrow::RecordBatch>& x);
+int RecordBatch__num_columns(const std::shared_ptr<arrow::RecordBatch>& x);
 extern "C" SEXP _arrow_RecordBatch__num_columns(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type x(x_sexp);
@@ -4734,11 +4734,11 @@ BEGIN_CPP11
 END_CPP11
 }
 // recordbatch.cpp
-std::shared_ptr<arrow::Array> RecordBatch__column(const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i);
+std::shared_ptr<arrow::Array> RecordBatch__column(const std::shared_ptr<arrow::RecordBatch>& batch, int i);
 extern "C" SEXP _arrow_RecordBatch__column(SEXP batch_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type batch(batch_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	return cpp11::as_sexp(RecordBatch__column(batch, i));
 END_CPP11
 }
@@ -4771,42 +4771,42 @@ BEGIN_CPP11
 END_CPP11
 }
 // recordbatch.cpp
-std::shared_ptr<arrow::RecordBatch> RecordBatch__AddColumn(const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::Array>& column);
+std::shared_ptr<arrow::RecordBatch> RecordBatch__AddColumn(const std::shared_ptr<arrow::RecordBatch>& batch, int i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::Array>& column);
 extern "C" SEXP _arrow_RecordBatch__AddColumn(SEXP batch_sexp, SEXP i_sexp, SEXP field_sexp, SEXP column_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type batch(batch_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Array>&>::type column(column_sexp);
 	return cpp11::as_sexp(RecordBatch__AddColumn(batch, i, field, column));
 END_CPP11
 }
 // recordbatch.cpp
-std::shared_ptr<arrow::RecordBatch> RecordBatch__SetColumn(const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::Array>& column);
+std::shared_ptr<arrow::RecordBatch> RecordBatch__SetColumn(const std::shared_ptr<arrow::RecordBatch>& batch, int i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::Array>& column);
 extern "C" SEXP _arrow_RecordBatch__SetColumn(SEXP batch_sexp, SEXP i_sexp, SEXP field_sexp, SEXP column_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type batch(batch_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Array>&>::type column(column_sexp);
 	return cpp11::as_sexp(RecordBatch__SetColumn(batch, i, field, column));
 END_CPP11
 }
 // recordbatch.cpp
-std::shared_ptr<arrow::RecordBatch> RecordBatch__RemoveColumn(const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i);
+std::shared_ptr<arrow::RecordBatch> RecordBatch__RemoveColumn(const std::shared_ptr<arrow::RecordBatch>& batch, int i);
 extern "C" SEXP _arrow_RecordBatch__RemoveColumn(SEXP batch_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type batch(batch_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	return cpp11::as_sexp(RecordBatch__RemoveColumn(batch, i));
 END_CPP11
 }
 // recordbatch.cpp
-std::string RecordBatch__column_name(const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i);
+std::string RecordBatch__column_name(const std::shared_ptr<arrow::RecordBatch>& batch, int i);
 extern "C" SEXP _arrow_RecordBatch__column_name(SEXP batch_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type batch(batch_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	return cpp11::as_sexp(RecordBatch__column_name(batch, i));
 END_CPP11
 }
@@ -5346,7 +5346,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // table.cpp
-r_vec_size Table__num_columns(const std::shared_ptr<arrow::Table>& x);
+int Table__num_columns(const std::shared_ptr<arrow::Table>& x);
 extern "C" SEXP _arrow_Table__num_columns(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type x(x_sexp);
@@ -5379,20 +5379,20 @@ BEGIN_CPP11
 END_CPP11
 }
 // table.cpp
-std::shared_ptr<arrow::ChunkedArray> Table__column(const std::shared_ptr<arrow::Table>& table, R_xlen_t i);
+std::shared_ptr<arrow::ChunkedArray> Table__column(const std::shared_ptr<arrow::Table>& table, int i);
 extern "C" SEXP _arrow_Table__column(SEXP table_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	return cpp11::as_sexp(Table__column(table, i));
 END_CPP11
 }
 // table.cpp
-std::shared_ptr<arrow::Field> Table__field(const std::shared_ptr<arrow::Table>& table, R_xlen_t i);
+std::shared_ptr<arrow::Field> Table__field(const std::shared_ptr<arrow::Table>& table, int i);
 extern "C" SEXP _arrow_Table__field(SEXP table_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	return cpp11::as_sexp(Table__field(table, i));
 END_CPP11
 }
@@ -5476,31 +5476,31 @@ BEGIN_CPP11
 END_CPP11
 }
 // table.cpp
-std::shared_ptr<arrow::Table> Table__RemoveColumn(const std::shared_ptr<arrow::Table>& table, R_xlen_t i);
+std::shared_ptr<arrow::Table> Table__RemoveColumn(const std::shared_ptr<arrow::Table>& table, int i);
 extern "C" SEXP _arrow_Table__RemoveColumn(SEXP table_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	return cpp11::as_sexp(Table__RemoveColumn(table, i));
 END_CPP11
 }
 // table.cpp
-std::shared_ptr<arrow::Table> Table__AddColumn(const std::shared_ptr<arrow::Table>& table, R_xlen_t i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::ChunkedArray>& column);
+std::shared_ptr<arrow::Table> Table__AddColumn(const std::shared_ptr<arrow::Table>& table, int i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::ChunkedArray>& column);
 extern "C" SEXP _arrow_Table__AddColumn(SEXP table_sexp, SEXP i_sexp, SEXP field_sexp, SEXP column_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::ChunkedArray>&>::type column(column_sexp);
 	return cpp11::as_sexp(Table__AddColumn(table, i, field, column));
 END_CPP11
 }
 // table.cpp
-std::shared_ptr<arrow::Table> Table__SetColumn(const std::shared_ptr<arrow::Table>& table, R_xlen_t i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::ChunkedArray>& column);
+std::shared_ptr<arrow::Table> Table__SetColumn(const std::shared_ptr<arrow::Table>& table, int i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::ChunkedArray>& column);
 extern "C" SEXP _arrow_Table__SetColumn(SEXP table_sexp, SEXP i_sexp, SEXP field_sexp, SEXP column_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::ChunkedArray>&>::type column(column_sexp);
 	return cpp11::as_sexp(Table__SetColumn(table, i, field, column));
diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h
index d8c4b719d1d3e..ab60586628164 100644
--- a/r/src/arrow_cpp11.h
+++ b/r/src/arrow_cpp11.h
@@ -27,6 +27,18 @@
 
 #include "./nameof.h"
 
+// Simple dcheck that doesn't use assert (i.e., won't crash the R session)
+// Condition this on our own debug flag to avoid this ending up in any CRAN
+// checks.
+#if defined(ARROW_R_DEBUG)
+#define ARROW_R_DCHECK(EXPR)                                              \
+  do {                                                                    \
+    if (!(EXPR)) Rf_error("Failed DCHECK: %s evaluated to false", #EXPR); \
+  } while (false)
+#else
+#define ARROW_R_DCHECK(EXPR)
+#endif
+
 // borrowed from enc package
 // because R does not make these macros available (i.e. from Defn.h)
 #define UTF8_MASK (1 << 3)
@@ -465,7 +477,7 @@ inline SEXP as_sexp(r_vec_size size) {
   if (x > std::numeric_limits<int>::max()) {
     return Rf_ScalarReal(x);
   } else {
-    return Rf_ScalarInteger(x);
+    return Rf_ScalarInteger(static_cast<int>(x));
   }
 }
 
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index fadc39c75fc06..05c8f6062dabb 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -189,13 +189,13 @@ void validate_slice_offset(R_xlen_t offset, int64_t len);
 
 void validate_slice_length(R_xlen_t length, int64_t available);
 
-void validate_index(int i, int len);
+void validate_index(int64_t i, int64_t len);
 
 template <typename Lambda>
 void TraverseDots(cpp11::list dots, int num_fields, Lambda lambda) {
   cpp11::strings names(dots.attr(R_NamesSymbol));
 
-  for (R_xlen_t i = 0, j = 0; j < num_fields; i++) {
+  for (int i = 0, j = 0; j < num_fields; i++) {
     auto name_i = names[i];
 
     if (name_i.size() == 0) {
diff --git a/r/src/chunkedarray.cpp b/r/src/chunkedarray.cpp
index 36884bb531b62..258013fc4da57 100644
--- a/r/src/chunkedarray.cpp
+++ b/r/src/chunkedarray.cpp
@@ -34,9 +34,8 @@ r_vec_size ChunkedArray__null_count(
 }
 
 // [[arrow::export]]
-r_vec_size ChunkedArray__num_chunks(
-    const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
-  return r_vec_size(chunked_array->num_chunks());
+int ChunkedArray__num_chunks(const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
+  return chunked_array->num_chunks();
 }
 
 // [[arrow::export]]
diff --git a/r/src/compression.cpp b/r/src/compression.cpp
index 148c6e14002f5..bc893afd8d28b 100644
--- a/r/src/compression.cpp
+++ b/r/src/compression.cpp
@@ -22,7 +22,7 @@
 
 // [[arrow::export]]
 std::shared_ptr<arrow::util::Codec> util___Codec__Create(arrow::Compression::type codec,
-                                                         R_xlen_t compression_level) {
+                                                         int compression_level) {
   return ValueOrStop(arrow::util::Codec::Create(codec, compression_level));
 }
 
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 87d1326ed3419..bd97e30005ca3 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -241,10 +241,10 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
               interpolation);
     }
     if (!Rf_isNull(options["min_count"])) {
-      out->min_count = cpp11::as_cpp<int64_t>(options["min_count"]);
+      out->min_count = cpp11::as_cpp<uint32_t>(options["min_count"]);
     }
     if (!Rf_isNull(options["skip_nulls"])) {
-      out->skip_nulls = cpp11::as_cpp<int64_t>(options["skip_nulls"]);
+      out->skip_nulls = cpp11::as_cpp<bool>(options["skip_nulls"]);
     }
     return out;
   }
@@ -479,9 +479,9 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
       func_name == "hash_stddev") {
     using Options = arrow::compute::VarianceOptions;
     auto out = std::make_shared<Options>();
-    out->ddof = cpp11::as_cpp<int64_t>(options["ddof"]);
+    out->ddof = cpp11::as_cpp<int>(options["ddof"]);
     if (!Rf_isNull(options["min_count"])) {
-      out->min_count = cpp11::as_cpp<int64_t>(options["min_count"]);
+      out->min_count = cpp11::as_cpp<uint32_t>(options["min_count"]);
     }
     if (!Rf_isNull(options["skip_nulls"])) {
       out->skip_nulls = cpp11::as_cpp<bool>(options["skip_nulls"]);
@@ -683,7 +683,7 @@ arrow::Status CallRScalarUDF(arrow::compute::KernelContext* context,
           }
         }
 
-        cpp11::sexp batch_length_sexp = cpp11::as_sexp(span.length);
+        cpp11::sexp batch_length_sexp = cpp11::as_sexp(static_cast<double>(span.length));
 
         std::shared_ptr<arrow::DataType> output_type = result->type()->GetSharedPtr();
         cpp11::sexp output_type_sexp = cpp11::to_r6<arrow::DataType>(output_type);
@@ -738,8 +738,7 @@ void RegisterScalarUDF(std::string name, cpp11::list func_sexp) {
 
   // Compute the Arity from the list of input kernels. We don't currently handle
   // variable numbers of arguments in a user-defined function.
-  int64_t n_args =
-      cpp11::as_cpp<std::shared_ptr<arrow::Schema>>(in_type_r[0])->num_fields();
+  int n_args = cpp11::as_cpp<std::shared_ptr<arrow::Schema>>(in_type_r[0])->num_fields();
   for (R_xlen_t i = 1; i < n_kernels; i++) {
     auto in_types = cpp11::as_cpp<std::shared_ptr<arrow::Schema>>(in_type_r[i]);
     if (in_types->num_fields() != n_args) {
@@ -767,7 +766,7 @@ void RegisterScalarUDF(std::string name, cpp11::list func_sexp) {
     cpp11::sexp out_type_func = out_type_r[i];
 
     std::vector<arrow::compute::InputType> compute_in_types(in_types->num_fields());
-    for (int64_t j = 0; j < in_types->num_fields(); j++) {
+    for (int j = 0; j < in_types->num_fields(); j++) {
       compute_in_types[j] = arrow::compute::InputType(in_types->field(j)->type());
     }
 
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index 83c430fb634d3..e53fc03bdb413 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -343,8 +343,8 @@ std::shared_ptr<ds::JsonFragmentScanOptions> dataset___JsonFragmentScanOptions__
 std::shared_ptr<ds::ParquetFragmentScanOptions>
 dataset___ParquetFragmentScanOptions__Make(bool use_buffered_stream, int64_t buffer_size,
                                            bool pre_buffer,
-                                           int64_t thrift_string_size_limit,
-                                           int64_t thrift_container_size_limit) {
+                                           int32_t thrift_string_size_limit,
+                                           int32_t thrift_container_size_limit) {
   auto options = std::make_shared<ds::ParquetFragmentScanOptions>();
   if (use_buffered_stream) {
     options->reader_properties->enable_buffered_stream();
diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp
index f19ba92527157..2f2b89d658d91 100644
--- a/r/src/datatype.cpp
+++ b/r/src/datatype.cpp
@@ -201,7 +201,7 @@ std::shared_ptr<arrow::DataType> DayTimeInterval__initialize() {
 }
 
 // [[arrow::export]]
-std::shared_ptr<arrow::DataType> FixedSizeBinary__initialize(R_xlen_t byte_width) {
+std::shared_ptr<arrow::DataType> FixedSizeBinary__initialize(int32_t byte_width) {
   if (byte_width == NA_INTEGER) {
     cpp11::stop("'byte_width' cannot be NA");
   }
diff --git a/r/src/io.cpp b/r/src/io.cpp
index 321b1b17febc3..4d5ee31794ae8 100644
--- a/r/src/io.cpp
+++ b/r/src/io.cpp
@@ -253,11 +253,16 @@ class RConnectionFileInterface : public virtual arrow::io::FileInterface {
       return arrow::Status::IOError("R connection is closed");
     }
 
+    if (nbytes > std::numeric_limits<int>::max()) {
+      return arrow::Status::Invalid(
+          "Can't read more than INT_MAX bytes from an R connection");
+    }
+
     return SafeCallIntoR<int64_t>(
         [&] {
           cpp11::function read_bin = cpp11::package("base")["readBin"];
           cpp11::writable::raws ptype((R_xlen_t)0);
-          cpp11::integers n = cpp11::as_sexp<int>(nbytes);
+          cpp11::integers n = cpp11::as_sexp<int>(static_cast<int>(nbytes));
 
           cpp11::sexp result = read_bin(connection_sexp_, ptype, n);
 
@@ -512,8 +517,8 @@ struct ReencodeUTF8TransformFunctionWrapper {
     // UTF-16, and UTF-32.
     while (in_bytes_left > 0) {
       // Make enough place in the output to hopefully consume all of the input.
-      RETURN_NOT_OK(
-          builder.Reserve(std::max<int64_t>(in_bytes_left * kOversizeFactor, 4)));
+      RETURN_NOT_OK(builder.Reserve(
+          std::max<int64_t>(static_cast<int64_t>(in_bytes_left * kOversizeFactor), 4)));
       out_buf = builder.mutable_data() + builder.length();
       out_bytes_left = builder.capacity() - builder.length();
 
diff --git a/r/src/message.cpp b/r/src/message.cpp
index d9832ddc22a74..3f21873fea3b2 100644
--- a/r/src/message.cpp
+++ b/r/src/message.cpp
@@ -39,8 +39,8 @@ std::shared_ptr<arrow::Buffer> ipc___Message__body(
 }
 
 // [[arrow::export]]
-r_vec_size ipc___Message__Verify(const std::unique_ptr<arrow::ipc::Message>& message) {
-  return r_vec_size(message->Verify());
+bool ipc___Message__Verify(const std::unique_ptr<arrow::ipc::Message>& message) {
+  return message->Verify();
 }
 
 // [[arrow::export]]
diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp
index d9bf848e24292..d2db11e14a787 100644
--- a/r/src/r_to_arrow.cpp
+++ b/r/src/r_to_arrow.cpp
@@ -335,7 +335,7 @@ struct RConvert {
   template <typename Type, typename From>
   static enable_if_integer<Type, Result<typename Type::c_type>> Convert(Type*,
                                                                         From from) {
-    return CIntFromRScalarImpl<typename Type::c_type>(from);
+    return CIntFromRScalarImpl<typename Type::c_type>(static_cast<int64_t>(from));
   }
 
   // ---- convert R integer types to double
@@ -461,7 +461,7 @@ class RPrimitiveConverter<
 
     if (std::is_same<typename T::c_type, r_value_type>::value) {
       auto append_value = [this](r_value_type value) {
-        this->primitive_builder_->UnsafeAppend(value);
+        this->primitive_builder_->UnsafeAppend(static_cast<typename T::c_type>(value));
         return Status::OK();
       };
       return VisitVector(it, size, append_null, append_value);
@@ -595,19 +595,21 @@ class RPrimitiveConverter<T, enable_if_t<is_date_type<T>::value>>
     return VisitVector(it, size, append_null, append_value);
   }
 
-  static int FromRDate(const Date32Type*, int from) { return from; }
+  static int FromRDate(const Date32Type*, double from) { return static_cast<int>(from); }
 
-  static int64_t FromRDate(const Date64Type*, int from) {
+  static int64_t FromRDate(const Date64Type*, double from) {
     constexpr int64_t kMilliSecondsPerDay = 86400000;
-    return from * kMilliSecondsPerDay;
+    return static_cast<int64_t>(from * kMilliSecondsPerDay);
   }
 
   static int FromPosixct(const Date32Type*, double from) {
     constexpr int64_t kSecondsPerDay = 86400;
-    return from / kSecondsPerDay;
+    return static_cast<int>(from / kSecondsPerDay);
   }
 
-  static int64_t FromPosixct(const Date64Type*, double from) { return from * 1000; }
+  static int64_t FromPosixct(const Date64Type*, double from) {
+    return static_cast<int64_t>(from * 1000);
+  }
 };
 
 int64_t get_TimeUnit_multiplier(TimeUnit::type unit) {
@@ -1081,7 +1083,7 @@ class RListConverter : public ListConverter<T, RConverter, RConverterTrait> {
     auto append_value = [this](SEXP value) {
       // TODO: if we decide that this can be run concurrently
       //       we'll have to do vec_size() upfront
-      int n = arrow::r::vec_size(value);
+      R_xlen_t n = arrow::r::vec_size(value);
 
       RETURN_NOT_OK(this->list_builder_->ValidateOverflow(n));
       RETURN_NOT_OK(this->list_builder_->Append());
diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp
index aca3a74fd81df..bf88e98ed1026 100644
--- a/r/src/recordbatch.cpp
+++ b/r/src/recordbatch.cpp
@@ -27,8 +27,8 @@
 #include <arrow/util/key_value_metadata.h>
 
 // [[arrow::export]]
-r_vec_size RecordBatch__num_columns(const std::shared_ptr<arrow::RecordBatch>& x) {
-  return r_vec_size(x->num_columns());
+int RecordBatch__num_columns(const std::shared_ptr<arrow::RecordBatch>& x) {
+  return x->num_columns();
 }
 
 // [[arrow::export]]
@@ -80,7 +80,7 @@ cpp11::list RecordBatch__columns(const std::shared_ptr<arrow::RecordBatch>& batc
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Array> RecordBatch__column(
-    const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i) {
+    const std::shared_ptr<arrow::RecordBatch>& batch, int i) {
   arrow::r::validate_index(i, batch->num_columns());
   return batch->column(i);
 }
@@ -106,7 +106,7 @@ bool RecordBatch__Equals(const std::shared_ptr<arrow::RecordBatch>& self,
 
 // [[arrow::export]]
 std::shared_ptr<arrow::RecordBatch> RecordBatch__AddColumn(
-    const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i,
+    const std::shared_ptr<arrow::RecordBatch>& batch, int i,
     const std::shared_ptr<arrow::Field>& field,
     const std::shared_ptr<arrow::Array>& column) {
   return ValueOrStop(batch->AddColumn(i, field, column));
@@ -114,7 +114,7 @@ std::shared_ptr<arrow::RecordBatch> RecordBatch__AddColumn(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::RecordBatch> RecordBatch__SetColumn(
-    const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i,
+    const std::shared_ptr<arrow::RecordBatch>& batch, int i,
     const std::shared_ptr<arrow::Field>& field,
     const std::shared_ptr<arrow::Array>& column) {
   return ValueOrStop(batch->SetColumn(i, field, column));
@@ -122,14 +122,14 @@ std::shared_ptr<arrow::RecordBatch> RecordBatch__SetColumn(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::RecordBatch> RecordBatch__RemoveColumn(
-    const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i) {
+    const std::shared_ptr<arrow::RecordBatch>& batch, int i) {
   arrow::r::validate_index(i, batch->num_columns());
   return ValueOrStop(batch->RemoveColumn(i));
 }
 
 // [[arrow::export]]
 std::string RecordBatch__column_name(const std::shared_ptr<arrow::RecordBatch>& batch,
-                                     R_xlen_t i) {
+                                     int i) {
   arrow::r::validate_index(i, batch->num_columns());
   return batch->column_name(i);
 }
diff --git a/r/src/schema.cpp b/r/src/schema.cpp
index cf959707305a7..41d3d38d2eda3 100644
--- a/r/src/schema.cpp
+++ b/r/src/schema.cpp
@@ -29,14 +29,14 @@ std::shared_ptr<arrow::Schema> Schema__from_fields(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Schema> Schema__from_list(cpp11::list field_list) {
-  int n = field_list.size();
+  R_xlen_t n = field_list.size();
 
   bool nullable = true;
   cpp11::strings names(field_list.attr(R_NamesSymbol));
 
   std::vector<std::shared_ptr<arrow::Field>> fields(n);
 
-  for (int i = 0; i < n; i++) {
+  for (R_xlen_t i = 0; i < n; i++) {
     fields[i] = arrow::field(
         names[i], cpp11::as_cpp<std::shared_ptr<arrow::DataType>>(field_list[i]),
         nullable);
diff --git a/r/src/table.cpp b/r/src/table.cpp
index 04537000f5d48..04a8c7caf24fd 100644
--- a/r/src/table.cpp
+++ b/r/src/table.cpp
@@ -23,8 +23,8 @@
 #include <arrow/util/key_value_metadata.h>
 
 // [[arrow::export]]
-r_vec_size Table__num_columns(const std::shared_ptr<arrow::Table>& x) {
-  return r_vec_size(x->num_columns());
+int Table__num_columns(const std::shared_ptr<arrow::Table>& x) {
+  return x->num_columns();
 }
 
 // [[arrow::export]]
@@ -49,14 +49,14 @@ std::shared_ptr<arrow::Table> Table__ReplaceSchemaMetadata(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::ChunkedArray> Table__column(
-    const std::shared_ptr<arrow::Table>& table, R_xlen_t i) {
+    const std::shared_ptr<arrow::Table>& table, int i) {
   arrow::r::validate_index(i, table->num_columns());
   return table->column(i);
 }
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Field> Table__field(const std::shared_ptr<arrow::Table>& table,
-                                           R_xlen_t i) {
+                                           int i) {
   arrow::r::validate_index(i, table->num_columns());
   return table->field(i);
 }
@@ -123,13 +123,13 @@ std::shared_ptr<arrow::ChunkedArray> Table__GetColumnByName(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Table> Table__RemoveColumn(
-    const std::shared_ptr<arrow::Table>& table, R_xlen_t i) {
+    const std::shared_ptr<arrow::Table>& table, int i) {
   return ValueOrStop(table->RemoveColumn(i));
 }
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Table> Table__AddColumn(
-    const std::shared_ptr<arrow::Table>& table, R_xlen_t i,
+    const std::shared_ptr<arrow::Table>& table, int i,
     const std::shared_ptr<arrow::Field>& field,
     const std::shared_ptr<arrow::ChunkedArray>& column) {
   return ValueOrStop(table->AddColumn(i, field, column));
@@ -137,7 +137,7 @@ std::shared_ptr<arrow::Table> Table__AddColumn(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Table> Table__SetColumn(
-    const std::shared_ptr<arrow::Table>& table, R_xlen_t i,
+    const std::shared_ptr<arrow::Table>& table, int i,
     const std::shared_ptr<arrow::Field>& field,
     const std::shared_ptr<arrow::ChunkedArray>& column) {
   return ValueOrStop(table->SetColumn(i, field, column));
@@ -241,7 +241,7 @@ arrow::Status AddMetadataFromDots(SEXP lst, int num_fields,
 
   // Remove metadata for ExtensionType columns, because these have their own mechanism for
   // preserving R type information
-  for (R_xlen_t i = 0; i < schema->num_fields(); i++) {
+  for (int i = 0; i < schema->num_fields(); i++) {
     if (schema->field(i)->type()->id() == Type::EXTENSION) {
       metadata_columns[i] = R_NilValue;
     }

From d51954415882423584f2a95b0897aa4d073a4e1c Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Sat, 23 Dec 2023 15:03:47 +0000
Subject: [PATCH 097/570] GH-39320: [C++][FS][Azure] Add managed identity auth
 configuration (#39321)

### Rationale for this change
Workload identity is a useful Azure authentication method. Also I failed to set the account_name correctly for a bunch of auths (I think this got lost in a rebase then I copy pasted the broken code).

### What changes are included in this PR?
- Make filesystem initialisation fail if `account_name_.empty()`. This prevents the account name configuration bug we had. Also added a test asserting that filesystem initialization fails in this case.
- Remove account name configuration on all auth configs, in favour of setting in separately from the auth configuration.
- Implement `AzureOptions::ConfigureManagedIdentityCredential`

### Are these changes tested?
Added a simple test initialising a filesystem using `ConfigureManagedIdentityCredential`. This is not the most comprehensive test but its the same as what we agreed on for https://github.com/apache/arrow/pull/39263.

### Are there any user-facing changes?
Managed identity authentication is now supported.

* Closes: #39320

Authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 38 ++++++++++++++++--------
 cpp/src/arrow/filesystem/azurefs.h       | 16 +++++-----
 cpp/src/arrow/filesystem/azurefs_test.cc | 34 +++++++++++++++++----
 3 files changed, 62 insertions(+), 26 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 26c2761886050..21350a490411a 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -58,7 +58,7 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
                       blob_storage_scheme == other.blob_storage_scheme &&
                       dfs_storage_scheme == other.dfs_storage_scheme &&
                       default_metadata == other.default_metadata &&
-                      account_name_ == other.account_name_ &&
+                      account_name == other.account_name &&
                       credential_kind_ == other.credential_kind_;
   if (!equals) {
     return false;
@@ -104,17 +104,17 @@ std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const {
   return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
 }
 
-Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
-                                                   const std::string& account_key) {
+Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_key) {
   credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
-  account_name_ = account_name;
+  if (account_name.empty()) {
+    return Status::Invalid("AzureOptions doesn't contain a valid account name");
+  }
   storage_shared_key_credential_ =
       std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
   return Status::OK();
 }
 
-Status AzureOptions::ConfigureClientSecretCredential(const std::string& account_name,
-                                                     const std::string& tenant_id,
+Status AzureOptions::ConfigureClientSecretCredential(const std::string& tenant_id,
                                                      const std::string& client_id,
                                                      const std::string& client_secret) {
   credential_kind_ = CredentialKind::kTokenCredential;
@@ -123,14 +123,20 @@ Status AzureOptions::ConfigureClientSecretCredential(const std::string& account_
   return Status::OK();
 }
 
-Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
+Status AzureOptions::ConfigureDefaultCredential() {
   credential_kind_ = CredentialKind::kTokenCredential;
   token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
   return Status::OK();
 }
 
-Status AzureOptions::ConfigureWorkloadIdentityCredential(
-    const std::string& account_name) {
+Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& client_id) {
+  credential_kind_ = CredentialKind::kTokenCredential;
+  token_credential_ =
+      std::make_shared<Azure::Identity::ManagedIdentityCredential>(client_id);
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureWorkloadIdentityCredential() {
   credential_kind_ = CredentialKind::kTokenCredential;
   token_credential_ = std::make_shared<Azure::Identity::WorkloadIdentityCredential>();
   return Status::OK();
@@ -138,14 +144,17 @@ Status AzureOptions::ConfigureWorkloadIdentityCredential(
 
 Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceClient()
     const {
+  if (account_name.empty()) {
+    return Status::Invalid("AzureOptions doesn't contain a valid account name");
+  }
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
       break;
     case CredentialKind::kTokenCredential:
-      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
+      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
                                                         token_credential_);
     case CredentialKind::kStorageSharedKeyCredential:
-      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
+      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
                                                         storage_shared_key_credential_);
   }
   return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
@@ -153,15 +162,18 @@ Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceC
 
 Result<std::unique_ptr<DataLake::DataLakeServiceClient>>
 AzureOptions::MakeDataLakeServiceClient() const {
+  if (account_name.empty()) {
+    return Status::Invalid("AzureOptions doesn't contain a valid account name");
+  }
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
       break;
     case CredentialKind::kTokenCredential:
       return std::make_unique<DataLake::DataLakeServiceClient>(
-          AccountDfsUrl(account_name_), token_credential_);
+          AccountDfsUrl(account_name), token_credential_);
     case CredentialKind::kStorageSharedKeyCredential:
       return std::make_unique<DataLake::DataLakeServiceClient>(
-          AccountDfsUrl(account_name_), storage_shared_key_credential_);
+          AccountDfsUrl(account_name), storage_shared_key_credential_);
   }
   return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
 }
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 346dd349e935c..78e0a8148c616 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -48,6 +48,9 @@ class TestAzureFileSystem;
 
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
+  /// \brief account name of the Azure Storage account.
+  std::string account_name;
+
   /// \brief hostname[:port] of the Azure Blob Storage Service.
   ///
   /// If the hostname is a relative domain name (one that starts with a '.'), then storage
@@ -94,7 +97,6 @@ struct ARROW_EXPORT AzureOptions {
     kStorageSharedKeyCredential,
   } credential_kind_ = CredentialKind::kAnonymous;
 
-  std::string account_name_;
   std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
   std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
       storage_shared_key_credential_;
@@ -103,15 +105,15 @@ struct ARROW_EXPORT AzureOptions {
   AzureOptions();
   ~AzureOptions();
 
-  Status ConfigureDefaultCredential(const std::string& account_name);
+  Status ConfigureDefaultCredential();
+
+  Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string());
 
-  Status ConfigureWorkloadIdentityCredential(const std::string& account_name);
+  Status ConfigureWorkloadIdentityCredential();
 
-  Status ConfigureAccountKeyCredential(const std::string& account_name,
-                                       const std::string& account_key);
+  Status ConfigureAccountKeyCredential(const std::string& account_key);
 
-  Status ConfigureClientSecretCredential(const std::string& account_name,
-                                         const std::string& tenant_id,
+  Status ConfigureClientSecretCredential(const std::string& tenant_id,
                                          const std::string& client_id,
                                          const std::string& client_secret);
 
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 62c5ef2232045..f6af9f722dbac 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -271,22 +271,44 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {
   bool WithHierarchicalNamespace() const final { return true; }
 };
 
+TEST(AzureFileSystem, InitializingFilesystemWithoutAccountNameFails) {
+  AzureOptions options;
+  ASSERT_RAISES(Invalid, options.ConfigureAccountKeyCredential("account_key"));
+
+  ARROW_EXPECT_OK(
+      options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
+  ASSERT_RAISES(Invalid, AzureFileSystem::Make(options));
+}
+
 TEST(AzureFileSystem, InitializeFilesystemWithClientSecretCredential) {
   AzureOptions options;
-  ARROW_EXPECT_OK(options.ConfigureClientSecretCredential(
-      "dummy-account-name", "tenant_id", "client_id", "client_secret"));
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(
+      options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
 TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
   AzureOptions options;
-  ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(options.ConfigureDefaultCredential());
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
+TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) {
+  AzureOptions options;
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential());
+  EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
+
+  ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential("specific-client-id"));
+  EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options));
+}
+
 TEST(AzureFileSystem, InitializeFilesystemWithWorkloadIdentityCredential) {
   AzureOptions options;
-  ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential("dummy-account-name"));
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential());
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
@@ -383,6 +405,7 @@ class TestAzureFileSystem : public ::testing::Test {
 
   static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
     AzureOptions options;
+    options.account_name = env->account_name();
     switch (env->backend()) {
       case AzureBackend::kAzurite:
         options.blob_storage_authority = "127.0.0.1:10000";
@@ -394,8 +417,7 @@ class TestAzureFileSystem : public ::testing::Test {
         // Use the default values
         break;
     }
-    ARROW_EXPECT_OK(
-        options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
+    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredential(env->account_key()));
     return options;
   }
 

From ec41209ea02bdb410bc7e049cb3100afedf4ba2f Mon Sep 17 00:00:00 2001
From: Jin Shang <shangjin1997@gmail.com>
Date: Sat, 23 Dec 2023 23:50:39 +0800
Subject: [PATCH 098/570] GH-37055: [C++] Optimize hash kernels for Dictionary
 ChunkedArrays (#38394)

### Rationale for this change

When merging dictionaries across chunks, the hash kernels unnecessarily unify the existing dictionary, dragging down the performance.

### What changes are included in this PR?

Reuse the dictionary unifier across chunks.

### Are these changes tested?

Yes, with a new benchmark for dictionary chunked arrays.

### Are there any user-facing changes?

No.

* Closes: #37055

Lead-authored-by: Jin Shang <shangjin1997@gmail.com>
Co-authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/compute/kernels/vector_hash.cc  | 55 +++++++++++++------
 .../compute/kernels/vector_hash_benchmark.cc  | 36 ++++++++++++
 2 files changed, 74 insertions(+), 17 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc
index 65e59d1a2eb14..800deba3a5ed2 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash.cc
@@ -26,17 +26,20 @@
 #include "arrow/array/concatenate.h"
 #include "arrow/array/dict_internal.h"
 #include "arrow/array/util.h"
+#include "arrow/buffer.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/result.h"
 #include "arrow/util/hashing.h"
+#include "arrow/util/int_util.h"
 #include "arrow/util/unreachable.h"
 
 namespace arrow {
 
 using internal::DictionaryTraits;
 using internal::HashTraits;
+using internal::TransposeInts;
 
 namespace compute {
 namespace internal {
@@ -448,9 +451,9 @@ class DictionaryHashKernel : public HashKernel {
 
   Status Append(const ArraySpan& arr) override {
     auto arr_dict = arr.dictionary().ToArray();
-    if (!dictionary_) {
-      dictionary_ = arr_dict;
-    } else if (!dictionary_->Equals(*arr_dict)) {
+    if (!first_dictionary_) {
+      first_dictionary_ = arr_dict;
+    } else if (!first_dictionary_->Equals(*arr_dict)) {
       // NOTE: This approach computes a new dictionary unification per chunk.
       // This is in effect O(n*k) where n is the total chunked array length and
       // k is the number of chunks (therefore O(n**2) if chunks have a fixed size).
@@ -458,21 +461,23 @@ class DictionaryHashKernel : public HashKernel {
       // A better approach may be to run the kernel over each individual chunk,
       // and then hash-aggregate all results (for example sum-group-by for
       // the "value_counts" kernel).
-      auto out_dict_type = dictionary_->type();
+      if (dictionary_unifier_ == nullptr) {
+        ARROW_ASSIGN_OR_RAISE(dictionary_unifier_,
+                              DictionaryUnifier::Make(first_dictionary_->type()));
+        RETURN_NOT_OK(dictionary_unifier_->Unify(*first_dictionary_));
+      }
+      auto out_dict_type = first_dictionary_->type();
       std::shared_ptr<Buffer> transpose_map;
-      std::shared_ptr<Array> out_dict;
-      ARROW_ASSIGN_OR_RAISE(auto unifier, DictionaryUnifier::Make(out_dict_type));
 
-      ARROW_CHECK_OK(unifier->Unify(*dictionary_));
-      ARROW_CHECK_OK(unifier->Unify(*arr_dict, &transpose_map));
-      ARROW_CHECK_OK(unifier->GetResult(&out_dict_type, &out_dict));
+      RETURN_NOT_OK(dictionary_unifier_->Unify(*arr_dict, &transpose_map));
 
-      dictionary_ = out_dict;
       auto transpose = reinterpret_cast<const int32_t*>(transpose_map->data());
-      auto in_dict_array = arr.ToArray();
+      auto in_array = arr.ToArray();
+      const auto& in_dict_array =
+          arrow::internal::checked_cast<const DictionaryArray&>(*in_array);
       ARROW_ASSIGN_OR_RAISE(
-          auto tmp, arrow::internal::checked_cast<const DictionaryArray&>(*in_dict_array)
-                        .Transpose(arr.type->GetSharedPtr(), out_dict, transpose));
+          auto tmp, in_dict_array.Transpose(arr.type->GetSharedPtr(),
+                                            in_dict_array.dictionary(), transpose));
       return indices_kernel_->Append(*tmp->data());
     }
 
@@ -495,12 +500,27 @@ class DictionaryHashKernel : public HashKernel {
     return dictionary_value_type_;
   }
 
-  std::shared_ptr<Array> dictionary() const { return dictionary_; }
+  /// This can't be called more than once because DictionaryUnifier::GetResult()
+  /// can't be called more than once and produce the same output.
+  Result<std::shared_ptr<Array>> dictionary() const {
+    if (!first_dictionary_) {  // Append was never called
+      return nullptr;
+    }
+    if (!dictionary_unifier_) {  // Append was called only once
+      return first_dictionary_;
+    }
+
+    auto out_dict_type = first_dictionary_->type();
+    std::shared_ptr<Array> out_dict;
+    RETURN_NOT_OK(dictionary_unifier_->GetResult(&out_dict_type, &out_dict));
+    return out_dict;
+  }
 
  private:
   std::unique_ptr<HashKernel> indices_kernel_;
-  std::shared_ptr<Array> dictionary_;
+  std::shared_ptr<Array> first_dictionary_;
   std::shared_ptr<DataType> dictionary_value_type_;
+  std::unique_ptr<DictionaryUnifier> dictionary_unifier_;
 };
 
 // ----------------------------------------------------------------------
@@ -630,8 +650,9 @@ Status ValueCountsFinalize(KernelContext* ctx, std::vector<Datum>* out) {
 // hence have no dictionary.
 Result<std::shared_ptr<ArrayData>> EnsureHashDictionary(KernelContext* ctx,
                                                         DictionaryHashKernel* hash) {
-  if (hash->dictionary()) {
-    return hash->dictionary()->data();
+  ARROW_ASSIGN_OR_RAISE(auto dict, hash->dictionary());
+  if (dict) {
+    return dict->data();
   }
   ARROW_ASSIGN_OR_RAISE(auto null, MakeArrayOfNull(hash->dictionary_value_type(),
                                                    /*length=*/0, ctx->memory_pool()));
diff --git a/cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc
index e9548e133aa00..472f50db8cf92 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc
@@ -25,6 +25,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/logging.h"
 
 #include "arrow/compute/api.h"
 
@@ -226,6 +227,33 @@ static void UniqueString100bytes(benchmark::State& state) {
   BenchUnique(state, HashParams<StringType>{general_bench_cases[state.range(0)], 100});
 }
 
+template <typename ParamType>
+void BenchValueCountsDictionaryChunks(benchmark::State& state, const ParamType& params) {
+  std::shared_ptr<Array> arr;
+  params.GenerateTestData(&arr);
+  // chunk arr to 100 slices
+  std::vector<std::shared_ptr<Array>> chunks;
+  const int64_t chunk_size = arr->length() / 100;
+  for (int64_t i = 0; i < 100; ++i) {
+    auto slice = arr->Slice(i * chunk_size, chunk_size);
+    auto datum = DictionaryEncode(slice).ValueOrDie();
+    ARROW_CHECK(datum.is_array());
+    chunks.push_back(datum.make_array());
+  }
+  auto chunked_array = std::make_shared<ChunkedArray>(chunks);
+
+  while (state.KeepRunning()) {
+    ABORT_NOT_OK(ValueCounts(chunked_array).status());
+  }
+  params.SetMetadata(state);
+}
+
+static void ValueCountsDictionaryChunks(benchmark::State& state) {
+  // Dictionary of byte strings with 10 bytes each
+  BenchValueCountsDictionaryChunks(
+      state, HashParams<StringType>{general_bench_cases[state.range(0)], 10});
+}
+
 void HashSetArgs(benchmark::internal::Benchmark* bench) {
   for (int i = 0; i < static_cast<int>(general_bench_cases.size()); ++i) {
     bench->Arg(i);
@@ -239,6 +267,14 @@ BENCHMARK(UniqueInt64)->Apply(HashSetArgs);
 BENCHMARK(UniqueString10bytes)->Apply(HashSetArgs);
 BENCHMARK(UniqueString100bytes)->Apply(HashSetArgs);
 
+void DictionaryChunksHashSetArgs(benchmark::internal::Benchmark* bench) {
+  for (int i = 0; i < static_cast<int>(general_bench_cases.size()); ++i) {
+    bench->Arg(i);
+  }
+}
+
+BENCHMARK(ValueCountsDictionaryChunks)->Apply(DictionaryChunksHashSetArgs);
+
 void UInt8SetArgs(benchmark::internal::Benchmark* bench) {
   for (int i = 0; i < static_cast<int>(uint8_bench_cases.size()); ++i) {
     bench->Arg(i);

From 90f7ecab559870dc862d34b5ac323c77c7050353 Mon Sep 17 00:00:00 2001
From: Kyle Barron <kylebarron2@gmail.com>
Date: Mon, 25 Dec 2023 05:23:17 -0500
Subject: [PATCH 099/570] GH-39017: [JS] Add `typeId` as attribute (#39018)

### Rationale for this change

Support reconstructing `DataType` after `postMessage`.

### What changes are included in this PR?

Make `typeId` an attribute, not a getter.

### Are these changes tested?

Passes all existing tests.

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

### Are there any user-facing changes?

No

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please uncomment the
line below and explain which changes are breaking.
-->
<!-- **This PR includes breaking changes to public APIs.** -->

<!--
Please uncomment the line below (and provide explanation) if the changes
fix either (a) a security vulnerability, (b) a bug that caused incorrect
or invalid data to be produced, or (c) a bug that causes a crash (even
when the API contract is upheld). We use this to highlight fixes to
issues that may affect users without their knowledge. For this reason,
fixing bugs that cause errors don't count, since those are usually
obvious.
-->
<!-- **This PR contains a "Critical Fix".** -->
* Closes: #39017
---
 js/src/type.ts | 70 ++++++++++++++++++++------------------------------
 1 file changed, 28 insertions(+), 42 deletions(-)

diff --git a/js/src/type.ts b/js/src/type.ts
index dea5301aed355..ae3aefa025999 100644
--- a/js/src/type.ts
+++ b/js/src/type.ts
@@ -79,7 +79,11 @@ export abstract class DataType<TType extends Type = Type, TChildren extends Type
     /** @nocollapse */ static isDenseUnion(x: any): x is DenseUnion { return DataType.isUnion(x) && x.mode === UnionMode.Dense; }
     /** @nocollapse */ static isSparseUnion(x: any): x is SparseUnion { return DataType.isUnion(x) && x.mode === UnionMode.Sparse; }
 
-    public get typeId(): TType { return <any>Type.NONE; }
+    declare public readonly typeId: TType;
+
+    constructor(typeId: TType) {
+        this.typeId = typeId;
+    }
 
     protected static [Symbol.toStringTag] = ((proto: DataType) => {
         (<any>proto).children = null;
@@ -93,8 +97,10 @@ export abstract class DataType<TType extends Type = Type, TChildren extends Type
 export interface Null extends DataType<Type.Null> { TArray: void; TValue: null }
 /** @ignore */
 export class Null extends DataType<Type.Null> {
+    constructor() {
+        super(Type.Null);
+    }
     public toString() { return `Null`; }
-    public get typeId() { return Type.Null as Type.Null; }
     protected static [Symbol.toStringTag] = ((proto: Null) => proto[Symbol.toStringTag] = 'Null')(Null.prototype);
 }
 
@@ -119,9 +125,8 @@ interface Int_<T extends Ints = Ints> extends DataType<T> { TArray: IType[T]['TA
 class Int_<T extends Ints = Ints> extends DataType<T> {
     constructor(public readonly isSigned: IType[T]['isSigned'],
         public readonly bitWidth: IType[T]['bitWidth']) {
-        super();
+        super(Type.Int as T);
     }
-    public get typeId() { return Type.Int as T; }
     public get ArrayType() {
         switch (this.bitWidth) {
             case 8: return this.isSigned ? Int8Array : Uint8Array;
@@ -206,9 +211,8 @@ export interface Float<T extends Floats = Floats> extends DataType<T> { TArray:
 /** @ignore */
 export class Float<T extends Floats = Floats> extends DataType<T> {
     constructor(public readonly precision: Precision) {
-        super();
+        super(Type.Float as T);
     }
-    public get typeId() { return Type.Float as T; }
     public get ArrayType(): TypedArrayConstructor<FType[T]['TArray']> {
         switch (this.precision) {
             case Precision.HALF: return Uint16Array;
@@ -241,9 +245,8 @@ export interface Binary extends DataType<Type.Binary> { TArray: Uint8Array; TOff
 /** @ignore */
 export class Binary extends DataType<Type.Binary> {
     constructor() {
-        super();
+        super(Type.Binary);
     }
-    public get typeId() { return Type.Binary as Type.Binary; }
     public toString() { return `Binary`; }
     protected static [Symbol.toStringTag] = ((proto: Binary) => {
         (<any>proto).ArrayType = Uint8Array;
@@ -256,9 +259,8 @@ export interface LargeBinary extends DataType<Type.LargeBinary> { TArray: Uint8A
 /** @ignore */
 export class LargeBinary extends DataType<Type.LargeBinary> {
     constructor() {
-        super();
+        super(Type.LargeBinary);
     }
-    public get typeId() { return Type.LargeBinary as Type.LargeBinary; }
     public toString() { return `LargeBinary`; }
     protected static [Symbol.toStringTag] = ((proto: LargeBinary) => {
         (<any>proto).ArrayType = Uint8Array;
@@ -272,9 +274,8 @@ export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TOffsetA
 /** @ignore */
 export class Utf8 extends DataType<Type.Utf8> {
     constructor() {
-        super();
+        super(Type.Utf8);
     }
-    public get typeId() { return Type.Utf8 as Type.Utf8; }
     public toString() { return `Utf8`; }
     protected static [Symbol.toStringTag] = ((proto: Utf8) => {
         (<any>proto).ArrayType = Uint8Array;
@@ -287,9 +288,8 @@ export interface LargeUtf8 extends DataType<Type.LargeUtf8> { TArray: Uint8Array
 /** @ignore */
 export class LargeUtf8 extends DataType<Type.LargeUtf8> {
     constructor() {
-        super();
+        super(Type.LargeUtf8);
     }
-    public get typeId() { return Type.LargeUtf8 as Type.LargeUtf8; }
     public toString() { return `LargeUtf8`; }
     protected static [Symbol.toStringTag] = ((proto: LargeUtf8) => {
         (<any>proto).ArrayType = Uint8Array;
@@ -303,9 +303,8 @@ export interface Bool extends DataType<Type.Bool> { TArray: Uint8Array; TValue:
 /** @ignore */
 export class Bool extends DataType<Type.Bool> {
     constructor() {
-        super();
+        super(Type.Bool);
     }
-    public get typeId() { return Type.Bool as Type.Bool; }
     public toString() { return `Bool`; }
     protected static [Symbol.toStringTag] = ((proto: Bool) => {
         (<any>proto).ArrayType = Uint8Array;
@@ -320,9 +319,8 @@ export class Decimal extends DataType<Type.Decimal> {
     constructor(public readonly scale: number,
         public readonly precision: number,
         public readonly bitWidth: number = 128) {
-        super();
+        super(Type.Decimal);
     }
-    public get typeId() { return Type.Decimal as Type.Decimal; }
     public toString() { return `Decimal[${this.precision}e${this.scale > 0 ? `+` : ``}${this.scale}]`; }
     protected static [Symbol.toStringTag] = ((proto: Decimal) => {
         (<any>proto).scale = null;
@@ -339,9 +337,8 @@ export interface Date_<T extends Dates = Dates> extends DataType<T> { TArray: In
 /** @ignore */
 export class Date_<T extends Dates = Dates> extends DataType<T> {
     constructor(public readonly unit: DateUnit) {
-        super();
+        super(Type.Date as T);
     }
-    public get typeId() { return Type.Date as T; }
     public toString() { return `Date${(this.unit + 1) * 32}<${DateUnit[this.unit]}>`; }
     protected static [Symbol.toStringTag] = ((proto: Date_) => {
         (<any>proto).unit = null;
@@ -375,9 +372,8 @@ interface Time_<T extends Times = Times> extends DataType<T> {
 class Time_<T extends Times = Times> extends DataType<T> {
     constructor(public readonly unit: TimesType[T]['unit'],
         public readonly bitWidth: TimeBitWidth) {
-        super();
+        super(Type.Time as T);
     }
-    public get typeId() { return Type.Time as T; }
     public toString() { return `Time${this.bitWidth}<${TimeUnit[this.unit]}>`; }
     public get ArrayType() {
         switch (this.bitWidth) {
@@ -418,9 +414,8 @@ interface Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
 class Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
     constructor(public readonly unit: TimeUnit,
         public readonly timezone?: string | null) {
-        super();
+        super(Type.Timestamp as T);
     }
-    public get typeId() { return Type.Timestamp as T; }
     public toString() { return `Timestamp<${TimeUnit[this.unit]}${this.timezone ? `, ${this.timezone}` : ``}>`; }
     protected static [Symbol.toStringTag] = ((proto: Timestamp_) => {
         (<any>proto).unit = null;
@@ -453,9 +448,8 @@ interface Interval_<T extends Intervals = Intervals> extends DataType<T> {
 /** @ignore */
 class Interval_<T extends Intervals = Intervals> extends DataType<T> {
     constructor(public readonly unit: IntervalUnit) {
-        super();
+        super(Type.Interval as T);
     }
-    public get typeId() { return Type.Interval as T; }
     public toString() { return `Interval<${IntervalUnit[this.unit]}>`; }
     protected static [Symbol.toStringTag] = ((proto: Interval_) => {
         (<any>proto).unit = null;
@@ -483,9 +477,8 @@ export interface Duration<T extends Durations = Durations> extends DataType<T> {
 /** @ignore */
 export class Duration<T extends Durations = Durations> extends DataType<T> {
     constructor(public readonly unit: TimeUnit) {
-        super();
+        super(Type.Duration as T);
     }
-    public get typeId() { return Type.Duration as T; }
     public toString() { return `Duration<${TimeUnit[this.unit]}>`; }
     protected static [Symbol.toStringTag] = ((proto: Duration) => {
         (<any>proto).unit = null;
@@ -513,11 +506,10 @@ export interface List<T extends DataType = any> extends DataType<Type.List, { [0
 /** @ignore */
 export class List<T extends DataType = any> extends DataType<Type.List, { [0]: T }> {
     constructor(child: Field<T>) {
-        super();
+        super(Type.List);
         this.children = [child];
     }
     public declare readonly children: Field<T>[];
-    public get typeId() { return Type.List as Type.List; }
     public toString() { return `List<${this.valueType}>`; }
     public get valueType(): T { return this.children[0].type as T; }
     public get valueField(): Field<T> { return this.children[0] as Field<T>; }
@@ -540,10 +532,9 @@ export class Struct<T extends TypeMap = any> extends DataType<Type.Struct, T> {
     public declare _row: StructRow<T>;
     public declare readonly children: Field<T[keyof T]>[];
     constructor(children: Field<T[keyof T]>[]) {
-        super();
+        super(Type.Struct);
         this.children = children;
     }
-    public get typeId() { return Type.Struct as Type.Struct; }
     public toString() { return `Struct<{${this.children.map((f) => `${f.name}:${f.type}`).join(`, `)}}>`; }
     protected static [Symbol.toStringTag] = ((proto: Struct) => {
         (<any>proto).children = null;
@@ -564,13 +555,12 @@ class Union_<T extends Unions = Unions> extends DataType<T> {
     constructor(mode: UnionMode,
         typeIds: number[] | Int32Array,
         children: Field<any>[]) {
-        super();
+        super(Type.Union as T);
         this.mode = mode;
         this.children = children;
         this.typeIds = typeIds = Int32Array.from(typeIds);
         this.typeIdToChildIndex = typeIds.reduce((typeIdToChildIndex, typeId, idx) => (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex, Object.create(null) as { [key: number]: number });
     }
-    public get typeId() { return Type.Union as T; }
     public toString() {
         return `${this[Symbol.toStringTag]}<${this.children.map((x) => `${x.type}`).join(` | `)
             }>`;
@@ -611,9 +601,8 @@ export interface FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
 /** @ignore */
 export class FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
     constructor(public readonly byteWidth: number) {
-        super();
+        super(Type.FixedSizeBinary);
     }
-    public get typeId() { return Type.FixedSizeBinary as Type.FixedSizeBinary; }
     public toString() { return `FixedSizeBinary[${this.byteWidth}]`; }
     protected static [Symbol.toStringTag] = ((proto: FixedSizeBinary) => {
         (<any>proto).byteWidth = null;
@@ -632,10 +621,9 @@ export interface FixedSizeList<T extends DataType = any> extends DataType<Type.F
 export class FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList, { [0]: T }> {
     public declare readonly children: Field<T>[];
     constructor(public readonly listSize: number, child: Field<T>) {
-        super();
+        super(Type.FixedSizeList);
         this.children = [child];
     }
-    public get typeId() { return Type.FixedSizeList as Type.FixedSizeList; }
     public get valueType(): T { return this.children[0].type as T; }
     public get valueField(): Field<T> { return this.children[0] as Field<T>; }
     public get ArrayType(): T['ArrayType'] { return this.valueType.ArrayType; }
@@ -657,7 +645,7 @@ export interface Map_<TKey extends DataType = any, TValue extends DataType = any
 /** @ignore */
 export class Map_<TKey extends DataType = any, TValue extends DataType = any> extends DataType<Type.Map, { [0]: Struct<{ key: TKey; value: TValue }> }> {
     constructor(entries: Field<Struct<{ key: TKey; value: TValue }>>, keysSorted = false) {
-        super();
+        super(Type.Map);
         this.children = [entries];
         this.keysSorted = keysSorted;
         // ARROW-8716
@@ -678,7 +666,6 @@ export class Map_<TKey extends DataType = any, TValue extends DataType = any> ex
     }
     public declare readonly keysSorted: boolean;
     public declare readonly children: Field<Struct<{ key: TKey; value: TValue }>>[];
-    public get typeId() { return Type.Map as Type.Map; }
     public get keyType(): TKey { return this.children[0].type.children[0].type as TKey; }
     public get valueType(): TValue { return this.children[0].type.children[1].type as TValue; }
     public get childType() { return this.children[0].type as Struct<{ key: TKey; value: TValue }>; }
@@ -709,13 +696,12 @@ export class Dictionary<T extends DataType = any, TKey extends TKeys = TKeys> ex
     public declare readonly dictionary: T;
     public declare readonly isOrdered: boolean;
     constructor(dictionary: T, indices: TKey, id?: bigint | number | null, isOrdered?: boolean | null) {
-        super();
+        super(Type.Dictionary);
         this.indices = indices;
         this.dictionary = dictionary;
         this.isOrdered = isOrdered || false;
         this.id = id == null ? getId() : bigIntToNumber(id);
     }
-    public get typeId() { return Type.Dictionary as Type.Dictionary; }
     public get children() { return this.dictionary.children; }
     public get valueType(): T { return this.dictionary as T; }
     public get ArrayType(): T['ArrayType'] { return this.dictionary.ArrayType; }

From 4d9a860196c2959c8595e117452ef5094ce7363c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 26 Dec 2023 11:09:10 +0900
Subject: [PATCH 100/570] MINOR: [C#] Bump xunit.runner.visualstudio from 2.5.5
 to 2.5.6 in /csharp (#39369)

Bumps [xunit.runner.visualstudio](https://github.com/xunit/visualstudio.xunit) from 2.5.5 to 2.5.6.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/bf9b858c26abf12bbac8794c875cef4352b41e5a"><code>bf9b858</code></a> v2.5.6</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/b05cd381bd774a1ed95be6a7bd887a747da6deb3"><code>b05cd38</code></a> Latest dependencies</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/342393753fcacc4f8b96e3e2d4679da085e3f9e4"><code>3423937</code></a> Experiment: Rename adapter assembly</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/0e604e42f6039e85a034a65b101cc467ac59ed09"><code>0e604e4</code></a> Experiment: Remove all file copies</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/6dff7574a83bf209b964866caca44826bc618cac"><code>6dff757</code></a> Bump up to v2.5.6-pre</li>
<li>See full diff in <a href="https://github.com/xunit/visualstudio.xunit/compare/2.5.5...2.5.6">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit.runner.visualstudio&package-manager=nuget&previous-version=2.5.5&new-version=2.5.6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../Apache.Arrow.Compression.Tests.csproj                       | 2 +-
 .../Apache.Arrow.Flight.Sql.Tests.csproj                        | 2 +-
 .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj  | 2 +-
 csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index 94ef4b5f3c5f5..e3d86f0dd9992 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -9,7 +9,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
     <PackageReference Include="xunit" Version="2.6.3" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 46d0a59b5d8e1..4dd479545a74c 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -8,7 +8,7 @@
     <ItemGroup>
       <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
       <PackageReference Include="xunit" Version="2.6.3" />
-      <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5" />
+      <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
       <PackageReference Include="coverlet.collector" Version="6.0.0" />
     </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index 99c772770d6c6..114e76ad984f1 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -8,7 +8,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
     <PackageReference Include="xunit" Version="2.6.3" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
     <PackageReference Include="coverlet.collector" Version="6.0.0" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index fde30a90e6479..71f68fe2d49e3 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -16,7 +16,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
     <PackageReference Include="xunit" Version="2.6.3" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5">
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
     </PackageReference>

From 35db6f78a2e2b45e55109979c85649150d205326 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 26 Dec 2023 11:10:24 +0900
Subject: [PATCH 101/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-surefire-plugin from 3.0.0-M7 to 3.2.3 in
 /java (#39372)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.apache.maven.plugins:maven-surefire-plugin](https://github.com/apache/maven-surefire) from 3.0.0-M7 to 3.2.3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/maven-surefire/releases">org.apache.maven.plugins:maven-surefire-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.2.2</h2>

<h2>🐛 Bug Fixes</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/SUREFIRE-2205">[SUREFIRE-2205]</a> - Use maven-plugin-report-plugin only in plugins modules (<a href="https://redirect.github.com/apache/maven-surefire/pull/681">#681</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/SUREFIRE-2206">[SUREFIRE-2206]</a> - Downgrade plexus-xml to 3.0.0 (<a href="https://redirect.github.com/apache/maven-surefire/pull/675">#675</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
</ul>
<h2>📦 Dependency updates</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/SUREFIRE-2208">[SUREFIRE-2208]</a> - Bump org.codehaus.plexus:plexus-java from 1.1.2 to 1.2.0 (<a href="https://redirect.github.com/apache/maven-surefire/pull/682">#682</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/SUREFIRE-2206">[SUREFIRE-2206]</a> - Downgrade plexus-xml to 3.0.0 (<a href="https://redirect.github.com/apache/maven-surefire/pull/675">#675</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
</ul>
<h2>🔧 Build</h2>
<ul>
<li>Use Maven 3.x.x and 3.6.3 on Jenkins (<a href="https://redirect.github.com/apache/maven-surefire/pull/674">#674</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
</ul>
<h2>3.2.1</h2>

<h2>🚀 New features and improvements</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/SUREFIRE-1124">[SUREFIRE-1124]</a> - Support forkNumber in environment variables (<a href="https://redirect.github.com/apache/maven-surefire/pull/664">#664</a>) <a href="https://github.com/swismer"><code>@​swismer</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/SUREFIRE-2177">[SUREFIRE-2177]</a> - Use junit-bom instead of single JUnit 5 versions (<a href="https://redirect.github.com/apache/maven-surefire/pull/663">#663</a>) <a href="https://github.com/scordio"><code>@​scordio</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/SUREFIRE-2179">[SUREFIRE-2179]</a> - Support adding additional Maven dependencies to the test runtime classpath (<a href="https://redirect.github.com/apache/maven-surefire/pull/667">#667</a>) <a href="https://github.com/kwin"><code>@​kwin</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/SUREFIRE-2178">[SUREFIRE-2178]</a> - clarify classpathDependencyExcludes (<a href="https://redirect.github.com/apache/maven-surefire/pull/666">#666</a>) <a href="https://github.com/kwin"><code>@​kwin</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/SUREFIRE-2182">[SUREFIRE-2182]</a> - Log starter implementation on DEBUG level (<a href="https://redirect.github.com/apache/maven-surefire/pull/665">#665</a>) <a href="https://github.com/kwin"><code>@​kwin</code></a></li>
</ul>
<h2>🐛 Bug Fixes</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/SUREFIRE-2190">[SUREFIRE-2190]</a> - Fix module dependencies for compile only dependencies (<a href="https://redirect.github.com/apache/maven-surefire/pull/668">#668</a>) <a href="https://github.com/hgschmie"><code>@​hgschmie</code></a></li>
</ul>
<h2>📝 Documentation updates</h2>
<ul>
<li>Fix TestNG web site URL (<a href="https://redirect.github.com/apache/maven-surefire/pull/671">#671</a>) <a href="https://github.com/sabi0"><code>@​sabi0</code></a></li>
</ul>
<h2>👻 Maintenance</h2>
<ul>
<li>Fix TestNG web site URL (<a href="https://redirect.github.com/apache/maven-surefire/pull/671">#671</a>) <a href="https://github.com/sabi0"><code>@​sabi0</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MNG-6847">[MNG-6847]</a> - Use diamond operator (<a href="https://redirect.github.com/apache/maven-surefire/pull/669">#669</a>) <a href="https://github.com/timtebeek"><code>@​timtebeek</code></a></li>
</ul>
<h2>3.1.2</h2>
<h2><a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12317927&amp;version=12353294">Release Notes - Maven Surefire - Version 3.1.2</a></h2>

<ul>
<li>update commons compress to 1.23.0 (<a href="https://redirect.github.com/apache/maven-surefire/pull/655">#655</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
<li>Drop unused commons-lang 2.6 from management (<a href="https://redirect.github.com/apache/maven-surefire/pull/661">#661</a>) <a href="https://github.com/elilja"><code>@​elilja</code></a></li>
<li>Remove old junittoolbox dependency no longer used (<a href="https://redirect.github.com/apache/maven-surefire/pull/658">#658</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
<li>update test libraries (<a href="https://redirect.github.com/apache/maven-surefire/pull/657">#657</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
<li>update commons-io to 2.12.0 (<a href="https://redirect.github.com/apache/maven-surefire/pull/653">#653</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-surefire/commit/ac9e574a9444cc825b09bae66f30155eb028da92"><code>ac9e574</code></a> [maven-release-plugin] prepare release surefire-3.2.3</li>
<li><a href="https://github.com/apache/maven-surefire/commit/2d6cbc63cb8f0dc34e3556e0c5a629129d2e7d91"><code>2d6cbc6</code></a> [SUREFIRE-2220] SurefireForkChannel#getForkNodeConnectionString() returns inv...</li>
<li><a href="https://github.com/apache/maven-surefire/commit/05322d992778dd33e2f2e41661a2e66ef7539a68"><code>05322d9</code></a> [SUREFIRE-2212] OutOfMemoryError raised when parsing files with huge stderr/s...</li>
<li><a href="https://github.com/apache/maven-surefire/commit/55ccd06a027f1693557c2a3ec3690ac91dcc59ba"><code>55ccd06</code></a> [SUREFIRE-2211] additionalClasspathElement with UNC path not working with Mav...</li>
<li><a href="https://github.com/apache/maven-surefire/commit/aa864f4532282100667bf3d81dc7cbd460845408"><code>aa864f4</code></a> [SUREFIRE-2216] Upgrade plugins and components (in ITs)</li>
<li><a href="https://github.com/apache/maven-surefire/commit/6662e07f5957d0fa4d12dc3e331be8f93cf355f8"><code>6662e07</code></a> [SUREFIRE-2215] Upgrade to Parent 41</li>
<li><a href="https://github.com/apache/maven-surefire/commit/f5b73ab3d18baa4baf244b2526d854574e51f87e"><code>f5b73ab</code></a> [SUREFIRE-2214] Uprade to HtmlUnit 3.8.0</li>
<li><a href="https://github.com/apache/maven-surefire/commit/47c5816ae6d3e596d13a3253e214939f3a479b1f"><code>47c5816</code></a> [SUREFIRE-2210] - Restore ordering of additional class path elements</li>
<li><a href="https://github.com/apache/maven-surefire/commit/9b7ecf141f4686c094219038272c28fecccffa30"><code>9b7ecf1</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li><a href="https://github.com/apache/maven-surefire/commit/2d7675397e884b18d59a596c004e73982368ee7c"><code>2d76753</code></a> [maven-release-plugin] prepare release surefire-3.2.2</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-surefire/compare/surefire-3.0.0-M7...surefire-3.2.3">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-surefire-plugin&package-manager=maven&previous-version=3.0.0-M7&new-version=3.2.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/performance/pom.xml | 2 +-
 java/pom.xml             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index a3e4da85b4321..888c0fb367932 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -211,7 +211,7 @@
                 </plugin>
                 <plugin>
                     <artifactId>maven-surefire-plugin</artifactId>
-                    <version>3.0.0-M7</version>
+                    <version>3.2.3</version>
                 </plugin>
             </plugins>
         </pluginManagement>
diff --git a/java/pom.xml b/java/pom.xml
index 4cca5e7245f0f..27d1504016ee6 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -442,7 +442,7 @@
         </plugin>
         <plugin>
           <artifactId>maven-surefire-plugin</artifactId>
-          <version>3.0.0-M7</version>
+          <version>3.2.3</version>
           <dependencies>
             <dependency>
               <groupId>org.junit.jupiter</groupId>

From 9126021e675e7e021a11a90a7ab7d67bd6529712 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 26 Dec 2023 11:10:49 +0900
Subject: [PATCH 102/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-resources-plugin from 2.6 to 3.3.1 in /java
 (#39373)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.apache.maven.plugins:maven-resources-plugin](https://github.com/apache/maven-resources-plugin) from 2.6 to 3.3.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/maven-resources-plugin/releases">org.apache.maven.plugins:maven-resources-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.3.1</h2>

<ul>
<li><a href="https://issues.apache.org/jira/browse/MRESOURCES-288">[MRESOURCES-288]</a> - Make tests-jar reproducible (<a href="https://redirect.github.com/apache/maven-resources-plugin/pull/56">#56</a>) <a href="https://github.com/cstamas"><code>@​cstamas</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MRESOURCES-297">[MRESOURCES-297]</a> - Update to parent POM 39, reformat (<a href="https://redirect.github.com/apache/maven-resources-plugin/pull/55">#55</a>) <a href="https://github.com/cstamas"><code>@​cstamas</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MRESOURCES-293">[MRESOURCES-293]</a> - Make resources param not read-only (<a href="https://redirect.github.com/apache/maven-resources-plugin/pull/54">#54</a>) <a href="https://github.com/cstamas"><code>@​cstamas</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MRESOURCES-295">[MRESOURCES-295]</a> - Drop Plexus legacy code (<a href="https://redirect.github.com/apache/maven-resources-plugin/pull/53">#53</a>) <a href="https://github.com/cstamas"><code>@​cstamas</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MRESOURCES-294">[MRESOURCES-294]</a> - Upgrade plexus-utils to 3.5.1 (<a href="https://redirect.github.com/apache/maven-resources-plugin/pull/52">#52</a>) <a href="https://github.com/slachiewicz"><code>@​slachiewicz</code></a></li>
</ul>
<h2>🚨 Removed</h2>
<ul>
<li>remove specific IDE m2e files (<a href="https://redirect.github.com/apache/maven-resources-plugin/pull/40">#40</a>) <a href="https://github.com/olamy"><code>@​olamy</code></a></li>
</ul>
<h2>📦 Dependency updates</h2>
<ul>
<li>Bump apache/maven-gh-actions-shared from 2 to 3 (<a href="https://redirect.github.com/apache/maven-resources-plugin/pull/46">#46</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
</ul>
<h2>📝 Documentation updates</h2>
<ul>
<li>doc: adds alternative variable syntax using @ delimiters to the documentation (<a href="https://redirect.github.com/apache/maven-resources-plugin/pull/36">#36</a>) <a href="https://github.com/kevin0x90"><code>@​kevin0x90</code></a></li>
</ul>
<h2>3.3.0</h2>

<h2>📦 Dependency updates</h2>
<ul>
<li>Bump maven-filtering from 3.2.0 to 3.3.0 (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/32">#32</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump plexus-utils from 3.4.1 to 3.4.2 (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/28">#28</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump commons-io from 1.4 to 2.7 in /src/it/user-filters (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/18">#18</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MRESOURCES-282">[MRESOURCES-282]</a> - Bump parent-pom from 34 to 36 (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/24">#24</a>) <a href="https://github.com/slachiewicz"><code>@​slachiewicz</code></a></li>
</ul>
<h2>📝 Documentation updates</h2>
<ul>
<li>(doc) Fix XML formatting (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/26">#26</a>) <a href="https://github.com/pzygielo"><code>@​pzygielo</code></a></li>
<li>(doc) Fix XML formatting (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/15">#15</a>) <a href="https://github.com/pzygielo"><code>@​pzygielo</code></a></li>
</ul>
<h2>👻 Maintenance</h2>
<ul>
<li>add release drafter (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/29">#29</a>) <a href="https://github.com/olamy"><code>@​olamy</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MRESOURCES-277">[MRESOURCES-277]</a> - Update plugin (requires Maven 3.2.5+) - Java 8 (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/23">#23</a>) <a href="https://github.com/slachiewicz"><code>@​slachiewicz</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MRESOURCES-283">[MRESOURCES-283]</a> - Require Java 8 and upgrade deps (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/22">#22</a>) <a href="https://github.com/slachiewicz"><code>@​slachiewicz</code></a></li>
</ul>
<h2>3.2.0</h2>
<h2>What's Changed</h2>
<ul>
<li>[MRESOURCES-259] update commons-io by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-resources-plugin/pull/1">apache/maven-resources-plugin#1</a></li>
<li>add .checkstyle to .gitignore by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-resources-plugin/pull/2">apache/maven-resources-plugin#2</a></li>
<li>close file by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-resources-plugin/pull/3">apache/maven-resources-plugin#3</a></li>
<li>[MRESOURCES-261] update to Maven 3.1 by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-resources-plugin/pull/4">apache/maven-resources-plugin#4</a></li>
<li>try with resources by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-resources-plugin/pull/5">apache/maven-resources-plugin#5</a></li>
<li>future proof method that is marked for incompatible change by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-resources-plugin/pull/6">apache/maven-resources-plugin#6</a></li>
</ul>
<h2>New Contributors</h2>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-resources-plugin/commit/978ce1e9a1f4fb60d384a998a6ea473a21bb81d5"><code>978ce1e</code></a> [maven-release-plugin] prepare release maven-resources-plugin-3.3.1</li>
<li><a href="https://github.com/apache/maven-resources-plugin/commit/b7cd080d7ba98e5cefe58b1ef78c209cad8ae0fe"><code>b7cd080</code></a> [MRESOURCES-296] Upgrade to maven-filtering 3.3.1</li>
<li><a href="https://github.com/apache/maven-resources-plugin/commit/1c9f610d5c2866fdd7fad182709f4dff2e157787"><code>1c9f610</code></a> [MRESOURCES-288] Make tests-jar reproducible (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/56">#56</a>)</li>
<li><a href="https://github.com/apache/maven-resources-plugin/commit/1946127aac4acb752fd3018dff8c7751434d6ca7"><code>1946127</code></a> [MRESOURCES-293] Rollback</li>
<li><a href="https://github.com/apache/maven-resources-plugin/commit/f7a6f229db49673ddd21ff521027b9af49f69bb8"><code>f7a6f22</code></a> [MRESOURCES-297] Update to parent POM 39, reformat (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/55">#55</a>)</li>
<li><a href="https://github.com/apache/maven-resources-plugin/commit/22d64ca56ed38b6e8dd5af7df0d5edb230d6fb86"><code>22d64ca</code></a> remove specific IDE m2e files (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/40">#40</a>)</li>
<li><a href="https://github.com/apache/maven-resources-plugin/commit/02c2d010acfaae652bd69f447328ce14287cfbd4"><code>02c2d01</code></a> [MRESOURCES-293] Make resources param not read-only (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/54">#54</a>)</li>
<li><a href="https://github.com/apache/maven-resources-plugin/commit/6bb3e1ffb767eb00f8ebac7dadd64bc090ee3e44"><code>6bb3e1f</code></a> [MRESOURCES-295] Drop Plexus legacy code (<a href="https://redirect.github.com/apache/maven-resources-plugin/issues/53">#53</a>)</li>
<li><a href="https://github.com/apache/maven-resources-plugin/commit/df7e17212980dfd079024cb7cfdafc1a469f106b"><code>df7e172</code></a> [MRESOURCES-294] Upgrade plexus-utils to 3.5.1</li>
<li><a href="https://github.com/apache/maven-resources-plugin/commit/9354ecd248302c42be4f0822fa1c32b202a50947"><code>9354ecd</code></a> Bump apache/maven-gh-actions-shared from 2 to 3</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-resources-plugin/compare/maven-resources-plugin-2.6...maven-resources-plugin-3.3.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-resources-plugin&package-manager=maven&previous-version=2.6&new-version=3.3.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/performance/pom.xml | 2 +-
 java/pom.xml             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index 888c0fb367932..4d449af46b6b1 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -199,7 +199,7 @@
                 </plugin>
                 <plugin>
                     <artifactId>maven-resources-plugin</artifactId>
-                    <version>2.6</version>
+                    <version>3.3.1</version>
                 </plugin>
                 <plugin>
                     <artifactId>maven-site-plugin</artifactId>
diff --git a/java/pom.xml b/java/pom.xml
index 27d1504016ee6..1776407e3d030 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -412,7 +412,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-resources-plugin</artifactId>
-          <version>2.6</version>
+          <version>3.3.1</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>

From 6bb77464940bf97dbd042bbf1c6048439f4c0695 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 26 Dec 2023 14:23:37 +0900
Subject: [PATCH 103/570] MINOR: [C#] Bump xunit from 2.6.3 to 2.6.4 in /csharp
 (#39370)

Bumps [xunit](https://github.com/xunit/xunit) from 2.6.3 to 2.6.4.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/xunit/xunit/commit/0f87ff772a29aa3d4ef7cb5b34a8aeeaab2c2a28"><code>0f87ff7</code></a> v2.6.4</li>
<li><a href="https://github.com/xunit/xunit/commit/e466a7c13337a93c07da6b3480f6637c005ccdc9"><code>e466a7c</code></a> Unit tests for <a href="https://redirect.github.com/xunit/xunit/issues/2850">#2850</a> (v2)</li>
<li><a href="https://github.com/xunit/xunit/commit/0cc026be3bc3a9f97dda99194b72e13f597e791b"><code>0cc026b</code></a> Add KeyValuePair tests with collections and IEquatable objects in the key slot</li>
<li><a href="https://github.com/xunit/xunit/commit/c729d7fd799c7db658b2a0f7bb6c27a9ef252934"><code>c729d7f</code></a> Add collection dictionary tests for IEquatable&lt;&gt; objects in the value slot</li>
<li><a href="https://github.com/xunit/xunit/commit/0eb76d2b780819865a3291d24d4eced6ed744002"><code>0eb76d2</code></a> Attempt to fix race condition reported in <a href="https://github.com/visualstudio.xunit/issues/issues/396">visualstudio.xunit/issues#396</a></li>
<li><a href="https://github.com/xunit/xunit/commit/348c56ddd3e877e821e9a9f59918a1fbbab3ad45"><code>348c56d</code></a> Bump up to v2.6.4-pre</li>
<li>See full diff in <a href="https://github.com/xunit/xunit/compare/2.6.3...2.6.4">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit&package-manager=nuget&previous-version=2.6.3&new-version=2.6.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../Apache.Arrow.Compression.Tests.csproj                       | 2 +-
 .../Apache.Arrow.Flight.Sql.Tests.csproj                        | 2 +-
 .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj  | 2 +-
 csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index e3d86f0dd9992..dd2c75dd3df90 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -8,7 +8,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.3" />
+    <PackageReference Include="xunit" Version="2.6.4" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 4dd479545a74c..0e9c02d61977c 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -7,7 +7,7 @@
 
     <ItemGroup>
       <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-      <PackageReference Include="xunit" Version="2.6.3" />
+      <PackageReference Include="xunit" Version="2.6.4" />
       <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
       <PackageReference Include="coverlet.collector" Version="6.0.0" />
     </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index 114e76ad984f1..d38413ba45b3a 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -7,7 +7,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.3" />
+    <PackageReference Include="xunit" Version="2.6.4" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
     <PackageReference Include="coverlet.collector" Version="6.0.0" />
   </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index 71f68fe2d49e3..0afd1490e7b69 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -15,7 +15,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.3" />
+    <PackageReference Include="xunit" Version="2.6.4" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>

From 526b2eb298292849b133f9ddae7facdf8ee1d35f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 26 Dec 2023 14:24:09 +0900
Subject: [PATCH 104/570] MINOR: [Java] Bump org.assertj:assertj-core from
 3.23.1 to 3.24.2 in /java (#39375)

Bumps org.assertj:assertj-core from 3.23.1 to 3.24.2.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.assertj:assertj-core&package-manager=maven&previous-version=3.23.1&new-version=3.24.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 1776407e3d030..523e5642720cd 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -609,7 +609,7 @@
       <dependency>
         <groupId>org.assertj</groupId>
         <artifactId>assertj-core</artifactId>
-        <version>3.23.1</version>
+        <version>3.24.2</version>
         <scope>test</scope>
       </dependency>
       <dependency>

From b32f71a157eb90a7eb107c540b9cadd343e5e388 Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Tue, 26 Dec 2023 15:25:51 +0900
Subject: [PATCH 105/570] GH-39363: [C++] Use Cast() instead of CastTo() for
 Parquet  (#39364)

### Rationale for this change

Remove legacy code

### What changes are included in this PR?

Replace the legacy scalar CastTo implementation for Parquet.

### Are these changes tested?

Yes. It is passed by existing all test cases for Parquet.

### Are there any user-facing changes?

Maybe, Yes.

There is a dependency on the Parquet schema that the user handles. There may be a problem if the user has to deal with a type for which Casting is not implemented. However, in this case, it should be treated as a new issue with an implementation that improves the `Cast` compute kernel.

* Closes: #39363

Authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/arrow/dataset/file_parquet.cc | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 3afe4ec85cf49..1c2fd2dea6307 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/compute/cast.h"
 #include "arrow/compute/exec.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/parquet_encryption_config.h"
@@ -58,6 +59,8 @@ using parquet::arrow::SchemaField;
 using parquet::arrow::SchemaManifest;
 using parquet::arrow::StatisticsAsScalars;
 
+using compute::Cast;
+
 namespace {
 
 parquet::ReaderProperties MakeReaderProperties(
@@ -370,12 +373,12 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
     return std::nullopt;
   }
 
-  auto maybe_min = min->CastTo(field.type());
-  auto maybe_max = max->CastTo(field.type());
+  auto maybe_min = Cast(min, field.type());
+  auto maybe_max = Cast(max, field.type());
 
   if (maybe_min.ok() && maybe_max.ok()) {
-    min = maybe_min.MoveValueUnsafe();
-    max = maybe_max.MoveValueUnsafe();
+    min = maybe_min.MoveValueUnsafe().scalar();
+    max = maybe_max.MoveValueUnsafe().scalar();
 
     if (min->Equals(*max)) {
       auto single_value = compute::equal(field_expr, compute::literal(std::move(min)));

From cf44793204d88e0156669af102ff65f180a6b003 Mon Sep 17 00:00:00 2001
From: "Rossi(Ruoxi) Sun" <zanmato1984@gmail.com>
Date: Tue, 26 Dec 2023 09:14:32 -0800
Subject: [PATCH 106/570] GH-39357: [C++] Reduce function.h includes (#39312)

### Rationale for this change

As proposed in #36246 , by splitting function option structs from `function.h`, we can reduce the including of `function.h`. So that the total build time could be reduced.

The total parser time could be reduced from 722.3s to 709.7s. And the `function.h` along with its transitive inclusion of `kernel.h` don't show up in expensive headers any more.

The detailed analysis result before and after this PR are attached:
[analyze-before.txt](https://github.com/apache/arrow/files/13756923/analyze-before.txt)
[analyze-after.txt](https://github.com/apache/arrow/files/13756924/analyze-after.txt)

Disclaimer (quote from https://github.com/apache/arrow/issues/36246#issuecomment-1866974963):
> Note that the time diff is not absolute. The ClangBuildAnalyzer result differs from time to time. I guess it depends on the idle-ness of the building machine when doing the experiment. But the time reduction is almost certain, though sometimes more sometimes less. And the inclusion times of the questioning headers are reduced for sure, as shown in the attachments in my other comment.

### What changes are included in this PR?

Move function option structs into own `compute/options.h`, and change including `function.h` to including `options.h` wherever fits.

### Are these changes tested?

Build is testing.

### Are there any user-facing changes?

There could be potential build failures for user code (quote from https://github.com/apache/arrow/issues/36246#issuecomment-1866980969):
> The header function.h remains in compute/api.h, with and without this PR. The proposed PR removes function.h from api_xxx.h (then includes options.h instead), as proposed in the initial description of this issue. This results in compile failures for user code which includes only compute/api_xxx.h but not compute/api.h, and meanwhile uses CallFunction which is declared in function.h.

But I think it's OK as described in https://github.com/apache/arrow/issues/36246#issuecomment-1867018578.

* Closes: #39357

Authored-by: zanmato <zanmato1984@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 .../arrow/compute_and_write_csv_example.cc    |  2 +-
 cpp/src/arrow/acero/aggregate_internal.cc     |  1 +
 cpp/src/arrow/acero/scalar_aggregate_node.cc  |  1 +
 cpp/src/arrow/compute/api.h                   | 21 +++--
 cpp/src/arrow/compute/api_aggregate.h         |  2 +-
 cpp/src/arrow/compute/api_scalar.h            |  2 +-
 cpp/src/arrow/compute/api_vector.h            |  3 +-
 cpp/src/arrow/compute/cast.h                  |  1 +
 cpp/src/arrow/compute/function.cc             |  1 +
 cpp/src/arrow/compute/function.h              | 46 +----------
 cpp/src/arrow/compute/function_options.h      | 81 +++++++++++++++++++
 .../kernels/scalar_if_else_benchmark.cc       |  1 +
 cpp/src/arrow/compute/kernels/vector_rank.cc  |  1 +
 .../kernels/vector_replace_benchmark.cc       |  1 +
 .../kernels/vector_run_end_encode_test.cc     |  1 +
 .../arrow/compute/kernels/vector_select_k.cc  |  1 +
 cpp/src/arrow/compute/kernels/vector_sort.cc  |  1 +
 cpp/src/arrow/compute/registry_test.cc        |  1 +
 cpp/src/arrow/compute/type_fwd.h              |  1 +
 19 files changed, 111 insertions(+), 58 deletions(-)
 create mode 100644 cpp/src/arrow/compute/function_options.h

diff --git a/cpp/examples/arrow/compute_and_write_csv_example.cc b/cpp/examples/arrow/compute_and_write_csv_example.cc
index edf21e45b2bb7..7e0f6cdf1ce16 100644
--- a/cpp/examples/arrow/compute_and_write_csv_example.cc
+++ b/cpp/examples/arrow/compute_and_write_csv_example.cc
@@ -16,7 +16,7 @@
 // under the License.
 
 #include <arrow/api.h>
-#include <arrow/compute/api_aggregate.h>
+#include <arrow/compute/api.h>
 #include <arrow/csv/api.h>
 #include <arrow/csv/writer.h>
 #include <arrow/io/api.h>
diff --git a/cpp/src/arrow/acero/aggregate_internal.cc b/cpp/src/arrow/acero/aggregate_internal.cc
index 3cd5491720dcd..9c4b7fe5ae98c 100644
--- a/cpp/src/arrow/acero/aggregate_internal.cc
+++ b/cpp/src/arrow/acero/aggregate_internal.cc
@@ -25,6 +25,7 @@
 #include "arrow/acero/exec_plan.h"
 #include "arrow/acero/options.h"
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function.h"
 #include "arrow/compute/registry.h"
 #include "arrow/compute/row/grouper.h"
 #include "arrow/datum.h"
diff --git a/cpp/src/arrow/acero/scalar_aggregate_node.cc b/cpp/src/arrow/acero/scalar_aggregate_node.cc
index ae59aa692096a..c7805f4d24eb2 100644
--- a/cpp/src/arrow/acero/scalar_aggregate_node.cc
+++ b/cpp/src/arrow/acero/scalar_aggregate_node.cc
@@ -25,6 +25,7 @@
 #include "arrow/acero/options.h"
 #include "arrow/acero/util.h"
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function.h"
 #include "arrow/compute/registry.h"
 #include "arrow/compute/row/grouper.h"
 #include "arrow/datum.h"
diff --git a/cpp/src/arrow/compute/api.h b/cpp/src/arrow/compute/api.h
index 5b5dfdf69eb94..b701d9928691f 100644
--- a/cpp/src/arrow/compute/api.h
+++ b/cpp/src/arrow/compute/api.h
@@ -20,18 +20,23 @@
 
 #pragma once
 
+/// \defgroup compute-functions Abstract compute function API
+/// @{
+/// @}
+
 /// \defgroup compute-concrete-options Concrete option classes for compute functions
 /// @{
 /// @}
 
-#include "arrow/compute/api_aggregate.h"  // IWYU pragma: export
-#include "arrow/compute/api_scalar.h"     // IWYU pragma: export
-#include "arrow/compute/api_vector.h"     // IWYU pragma: export
-#include "arrow/compute/cast.h"           // IWYU pragma: export
-#include "arrow/compute/function.h"       // IWYU pragma: export
-#include "arrow/compute/kernel.h"         // IWYU pragma: export
-#include "arrow/compute/registry.h"       // IWYU pragma: export
-#include "arrow/datum.h"                  // IWYU pragma: export
+#include "arrow/compute/api_aggregate.h"     // IWYU pragma: export
+#include "arrow/compute/api_scalar.h"        // IWYU pragma: export
+#include "arrow/compute/api_vector.h"        // IWYU pragma: export
+#include "arrow/compute/cast.h"              // IWYU pragma: export
+#include "arrow/compute/function.h"          // IWYU pragma: export
+#include "arrow/compute/function_options.h"  // IWYU pragma: export
+#include "arrow/compute/kernel.h"            // IWYU pragma: export
+#include "arrow/compute/registry.h"          // IWYU pragma: export
+#include "arrow/datum.h"                     // IWYU pragma: export
 
 #include "arrow/compute/expression.h"  // IWYU pragma: export
 
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index 3493c3146310d..4d2c814a69bbb 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -22,7 +22,7 @@
 
 #include <vector>
 
-#include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/datum.h"
 #include "arrow/result.h"
 #include "arrow/util/macros.h"
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 9f12471ddca14..26fbe64f74293 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -24,7 +24,7 @@
 #include <string>
 #include <utility>
 
-#include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/datum.h"
 #include "arrow/result.h"
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index 0233090ef6fb9..759f9e5c1a408 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -20,9 +20,8 @@
 #include <memory>
 #include <utility>
 
-#include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/ordering.h"
-#include "arrow/datum.h"
 #include "arrow/result.h"
 #include "arrow/type_fwd.h"
 
diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h
index 613e8a55addd2..18e56092dda2a 100644
--- a/cpp/src/arrow/compute/cast.h
+++ b/cpp/src/arrow/compute/cast.h
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index c0433145dd1d0..e1a2e8c5d8879 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -26,6 +26,7 @@
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec_internal.h"
 #include "arrow/compute/function_internal.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/datum.h"
diff --git a/cpp/src/arrow/compute/function.h b/cpp/src/arrow/compute/function.h
index 333c9a65c56c4..be934a3c5abfc 100644
--- a/cpp/src/arrow/compute/function.h
+++ b/cpp/src/arrow/compute/function.h
@@ -36,53 +36,9 @@
 namespace arrow {
 namespace compute {
 
-/// \defgroup compute-functions Abstract compute function API
-///
+/// \addtogroup compute-functions
 /// @{
 
-/// \brief Extension point for defining options outside libarrow (but
-/// still within this project).
-class ARROW_EXPORT FunctionOptionsType {
- public:
-  virtual ~FunctionOptionsType() = default;
-
-  virtual const char* type_name() const = 0;
-  virtual std::string Stringify(const FunctionOptions&) const = 0;
-  virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
-  virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
-  virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
-      const Buffer& buffer) const;
-  virtual std::unique_ptr<FunctionOptions> Copy(const FunctionOptions&) const = 0;
-};
-
-/// \brief Base class for specifying options configuring a function's behavior,
-/// such as error handling.
-class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
- public:
-  virtual ~FunctionOptions() = default;
-
-  const FunctionOptionsType* options_type() const { return options_type_; }
-  const char* type_name() const { return options_type()->type_name(); }
-
-  bool Equals(const FunctionOptions& other) const;
-  std::string ToString() const;
-  std::unique_ptr<FunctionOptions> Copy() const;
-  /// \brief Serialize an options struct to a buffer.
-  Result<std::shared_ptr<Buffer>> Serialize() const;
-  /// \brief Deserialize an options struct from a buffer.
-  /// Note: this will only look for `type_name` in the default FunctionRegistry;
-  /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
-  /// call FunctionOptionsType::Deserialize().
-  static Result<std::unique_ptr<FunctionOptions>> Deserialize(
-      const std::string& type_name, const Buffer& buffer);
-
- protected:
-  explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
-  const FunctionOptionsType* options_type_;
-};
-
-ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);
-
 /// \brief Contains the number of required arguments for the function.
 ///
 /// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
diff --git a/cpp/src/arrow/compute/function_options.h b/cpp/src/arrow/compute/function_options.h
new file mode 100644
index 0000000000000..88ec2fd2d0679
--- /dev/null
+++ b/cpp/src/arrow/compute/function_options.h
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE: API is EXPERIMENTAL and will change without going through a
+// deprecation cycle.
+
+#pragma once
+
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace compute {
+
+/// \addtogroup compute-functions
+/// @{
+
+/// \brief Extension point for defining options outside libarrow (but
+/// still within this project).
+class ARROW_EXPORT FunctionOptionsType {
+ public:
+  virtual ~FunctionOptionsType() = default;
+
+  virtual const char* type_name() const = 0;
+  virtual std::string Stringify(const FunctionOptions&) const = 0;
+  virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
+  virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
+  virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const;
+  virtual std::unique_ptr<FunctionOptions> Copy(const FunctionOptions&) const = 0;
+};
+
+/// \brief Base class for specifying options configuring a function's behavior,
+/// such as error handling.
+class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
+ public:
+  virtual ~FunctionOptions() = default;
+
+  const FunctionOptionsType* options_type() const { return options_type_; }
+  const char* type_name() const { return options_type()->type_name(); }
+
+  bool Equals(const FunctionOptions& other) const;
+  std::string ToString() const;
+  std::unique_ptr<FunctionOptions> Copy() const;
+  /// \brief Serialize an options struct to a buffer.
+  Result<std::shared_ptr<Buffer>> Serialize() const;
+  /// \brief Deserialize an options struct from a buffer.
+  /// Note: this will only look for `type_name` in the default FunctionRegistry;
+  /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
+  /// call FunctionOptionsType::Deserialize().
+  static Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const std::string& type_name, const Buffer& buffer);
+
+ protected:
+  explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
+  const FunctionOptionsType* options_type_;
+};
+
+ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);
+
+/// @}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
index b72402bbccd4e..58bc560f52842 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
@@ -21,6 +21,7 @@
 #include "arrow/array/concatenate.h"
 #include "arrow/array/util.h"
 #include "arrow/compute/api_scalar.h"
+#include "arrow/compute/function.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/key_value_metadata.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_rank.cc b/cpp/src/arrow/compute/kernels/vector_rank.cc
index 780ae25d96360..0cea7246e516c 100644
--- a/cpp/src/arrow/compute/kernels/vector_rank.cc
+++ b/cpp/src/arrow/compute/kernels/vector_rank.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
 
diff --git a/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
index 719969d46ea7c..971a841de0773 100644
--- a/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
@@ -18,6 +18,7 @@
 #include <benchmark/benchmark.h>
 
 #include "arrow/array.h"
+#include "arrow/datum.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 
diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc b/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc
index 0bd8e3386e7cc..f02aee1b35996 100644
--- a/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc
@@ -21,6 +21,7 @@
 #include "arrow/array/validate.h"
 #include "arrow/builder.h"
 #include "arrow/compute/api_vector.h"
+#include "arrow/datum.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/logging.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_select_k.cc b/cpp/src/arrow/compute/kernels/vector_select_k.cc
index 5000de8996280..1740a9b7f0bb4 100644
--- a/cpp/src/arrow/compute/kernels/vector_select_k.cc
+++ b/cpp/src/arrow/compute/kernels/vector_select_k.cc
@@ -17,6 +17,7 @@
 
 #include <queue>
 
+#include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
 
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index 8ddcbb9905cb2..e08a2bc10372f 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -17,6 +17,7 @@
 
 #include <unordered_set>
 
+#include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
 
diff --git a/cpp/src/arrow/compute/registry_test.cc b/cpp/src/arrow/compute/registry_test.cc
index 7fee136de7a0b..2d69f119df1f4 100644
--- a/cpp/src/arrow/compute/registry_test.cc
+++ b/cpp/src/arrow/compute/registry_test.cc
@@ -22,6 +22,7 @@
 #include <gtest/gtest.h>
 
 #include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/registry.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h
index 3f990b1814311..89f32ceb0f906 100644
--- a/cpp/src/arrow/compute/type_fwd.h
+++ b/cpp/src/arrow/compute/type_fwd.h
@@ -27,6 +27,7 @@ struct TypeHolder;
 namespace compute {
 
 class Function;
+class ScalarAggregateFunction;
 class FunctionExecutor;
 class FunctionOptions;
 class FunctionRegistry;

From ae627c09b08dbd9b4faac545170f4706645ca4ce Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@apache.org>
Date: Wed, 27 Dec 2023 15:06:23 +0100
Subject: [PATCH 107/570] GH-39251: [JS] Use resizable buffer in builder
 (#39252)

---
 js/src/builder.ts           |  2 +-
 js/src/builder/binary.ts    |  4 ++--
 js/src/builder/buffer.ts    | 44 +++++++++++++++++++++++++------------
 js/src/builder/largeutf8.ts |  2 +-
 js/src/builder/union.ts     |  4 ++--
 js/src/builder/utf8.ts      |  2 +-
 6 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/js/src/builder.ts b/js/src/builder.ts
index a4e2d4d89325c..1880db3818ca5 100644
--- a/js/src/builder.ts
+++ b/js/src/builder.ts
@@ -342,7 +342,7 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
 export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary | Date_ | Timestamp | Time | Decimal | Interval | Duration = any, TNull = any> extends Builder<T, TNull> {
     constructor(opts: BuilderOptions<T, TNull>) {
         super(opts);
-        this._values = new DataBufferBuilder(new this.ArrayType(0), this.stride);
+        this._values = new DataBufferBuilder(this.ArrayType, 0, this.stride);
     }
     public setValue(index: number, value: T['TValue']) {
         const values = this._values;
diff --git a/js/src/builder/binary.ts b/js/src/builder/binary.ts
index 3c12ddf34abb0..fa9a11b24ec39 100644
--- a/js/src/builder/binary.ts
+++ b/js/src/builder/binary.ts
@@ -16,15 +16,15 @@
 // under the License.
 
 import { Binary } from '../type.js';
-import { toUint8Array } from '../util/buffer.js';
 import { BufferBuilder } from './buffer.js';
 import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
+import { toUint8Array } from '../util/buffer.js';
 
 /** @ignore */
 export class BinaryBuilder<TNull = any> extends VariableWidthBuilder<Binary, TNull> {
     constructor(opts: BuilderOptions<Binary, TNull>) {
         super(opts);
-        this._values = new BufferBuilder(new Uint8Array(0));
+        this._values = new BufferBuilder(Uint8Array);
     }
     public get byteLength(): number {
         let size = this._pendingLength + (this.length * 4);
diff --git a/js/src/builder/buffer.ts b/js/src/builder/buffer.ts
index 402172059682c..18c6dcda738b9 100644
--- a/js/src/builder/buffer.ts
+++ b/js/src/builder/buffer.ts
@@ -24,20 +24,36 @@ function roundLengthUpToNearest64Bytes(len: number, BPE: number) {
     const bytesMinus1 = Math.ceil(len) * BPE - 1;
     return ((bytesMinus1 - bytesMinus1 % 64 + 64) || 64) / BPE;
 }
+
 /** @ignore */
-const sliceOrExtendArray = <T extends TypedArray | BigIntArray>(arr: T, len = 0) => (
-    arr.length >= len ? arr.subarray(0, len) : memcpy(new (arr.constructor as any)(len), arr, 0)
-) as T;
+function resizeArray<T extends TypedArray | BigIntArray>(arr: T, len = 0): T {
+    // TODO: remove when https://github.com/microsoft/TypeScript/issues/54636 is fixed
+    const buffer = arr.buffer as ArrayBufferLike & { resizable: boolean; resize: (byteLength: number) => void; maxByteLength: number };
+    const byteLength = len * arr.BYTES_PER_ELEMENT;
+    if (buffer.resizable && byteLength <= buffer.maxByteLength) {
+        buffer.resize(byteLength);
+        return arr;
+    }
+
+    // Fallback for non-resizable buffers
+    return arr.length >= len ?
+        arr.subarray(0, len) as T :
+        memcpy(new (arr.constructor as any)(len), arr, 0);
+}
+
+/** @ignore */
+export const SAFE_ARRAY_SIZE = 2 ** 32 - 1;
 
 /** @ignore */
 export class BufferBuilder<T extends TypedArray | BigIntArray> {
 
-    constructor(buffer: T, stride = 1) {
-        this.buffer = buffer;
+    constructor(bufferType: ArrayCtor<T>, initialSize = 0, stride = 1) {
+        this.length = Math.ceil(initialSize / stride);
+        // TODO: remove as any when https://github.com/microsoft/TypeScript/issues/54636 is fixed
+        this.buffer = new bufferType(new (ArrayBuffer as any)(this.length * bufferType.BYTES_PER_ELEMENT, { maxByteLength: SAFE_ARRAY_SIZE })) as T;
         this.stride = stride;
-        this.BYTES_PER_ELEMENT = buffer.BYTES_PER_ELEMENT;
-        this.ArrayType = buffer.constructor as ArrayCtor<T>;
-        this._resize(this.length = Math.ceil(buffer.length / stride));
+        this.BYTES_PER_ELEMENT = bufferType.BYTES_PER_ELEMENT;
+        this.ArrayType = bufferType;
     }
 
     public buffer: T;
@@ -72,17 +88,18 @@ export class BufferBuilder<T extends TypedArray | BigIntArray> {
     }
     public flush(length = this.length) {
         length = roundLengthUpToNearest64Bytes(length * this.stride, this.BYTES_PER_ELEMENT);
-        const array = sliceOrExtendArray<T>(this.buffer, length);
+        const array = resizeArray<T>(this.buffer, length);
         this.clear();
         return array;
     }
     public clear() {
         this.length = 0;
-        this._resize(0);
+        // TODO: remove as any when https://github.com/microsoft/TypeScript/issues/54636 is fixed
+        this.buffer = new this.ArrayType(new (ArrayBuffer as any)(0, { maxByteLength: SAFE_ARRAY_SIZE })) as T;
         return this;
     }
     protected _resize(newLength: number) {
-        return this.buffer = <T>memcpy(new this.ArrayType(newLength), this.buffer);
+        return this.buffer = resizeArray<T>(this.buffer, newLength);
     }
 }
 
@@ -100,7 +117,7 @@ export class DataBufferBuilder<T extends TypedArray | BigIntArray> extends Buffe
 /** @ignore */
 export class BitmapBufferBuilder extends DataBufferBuilder<Uint8Array> {
 
-    constructor(data = new Uint8Array(0)) { super(data, 1 / 8); }
+    constructor() { super(Uint8Array, 0, 1 / 8); }
 
     public numValid = 0;
     public get numInvalid() { return this.length - this.numValid; }
@@ -123,9 +140,8 @@ export class BitmapBufferBuilder extends DataBufferBuilder<Uint8Array> {
 /** @ignore */
 export class OffsetsBufferBuilder<T extends DataType> extends DataBufferBuilder<T['TOffsetArray']> {
     constructor(type: T) {
-        super(new type.OffsetArrayType(1), 1);
+        super(type.OffsetArrayType as ArrayCtor<T['TOffsetArray']>, 1, 1);
     }
-
     public append(value: T['TOffsetArray'][0]) {
         return this.set(this.length - 1, value);
     }
diff --git a/js/src/builder/largeutf8.ts b/js/src/builder/largeutf8.ts
index 51890100095c1..90a0bde9f3443 100644
--- a/js/src/builder/largeutf8.ts
+++ b/js/src/builder/largeutf8.ts
@@ -25,7 +25,7 @@ import { LargeBinaryBuilder } from './largebinary.js';
 export class LargeUtf8Builder<TNull = any> extends VariableWidthBuilder<LargeUtf8, TNull> {
     constructor(opts: BuilderOptions<LargeUtf8, TNull>) {
         super(opts);
-        this._values = new BufferBuilder(new Uint8Array(0));
+        this._values = new BufferBuilder(Uint8Array);
     }
     public get byteLength(): number {
         let size = this._pendingLength + (this.length * 4);
diff --git a/js/src/builder/union.ts b/js/src/builder/union.ts
index ac8a13191a549..7bee460a77de1 100644
--- a/js/src/builder/union.ts
+++ b/js/src/builder/union.ts
@@ -31,7 +31,7 @@ export abstract class UnionBuilder<T extends Union, TNull = any> extends Builder
 
     constructor(options: UnionBuilderOptions<T, TNull>) {
         super(options);
-        this._typeIds = new DataBufferBuilder(new Int8Array(0), 1);
+        this._typeIds = new DataBufferBuilder(Int8Array, 0, 1);
         if (typeof options['valueToChildTypeId'] === 'function') {
             this._valueToChildTypeId = options['valueToChildTypeId'];
         }
@@ -84,7 +84,7 @@ export class DenseUnionBuilder<T extends DenseUnion, TNull = any> extends UnionB
 
     constructor(options: UnionBuilderOptions<T, TNull>) {
         super(options);
-        this._offsets = new DataBufferBuilder(new Int32Array(0));
+        this._offsets = new DataBufferBuilder(Int32Array);
     }
 
     /** @ignore */
diff --git a/js/src/builder/utf8.ts b/js/src/builder/utf8.ts
index 53b8306cbaffd..aac0aec54fe90 100644
--- a/js/src/builder/utf8.ts
+++ b/js/src/builder/utf8.ts
@@ -25,7 +25,7 @@ import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
 export class Utf8Builder<TNull = any> extends VariableWidthBuilder<Utf8, TNull> {
     constructor(opts: BuilderOptions<Utf8, TNull>) {
         super(opts);
-        this._values = new BufferBuilder(new Uint8Array(0));
+        this._values = new BufferBuilder(Uint8Array);
     }
     public get byteLength(): number {
         let size = this._pendingLength + (this.length * 4);

From 9e33d12f1b022c902cc831026ceb3e0016ca4b3c Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Wed, 27 Dec 2023 10:10:46 -0800
Subject: [PATCH 108/570] GH-39341: [C#] Support Utf8View, BinaryView and
 ListView (#39342)

### What changes are included in this PR?

Support for reading, writing and representing Utf8View, BinaryView and ListView.

### Are these changes tested?

Yes

### Are there any user-facing changes?

New classes and APIs for Utf8View, BinaryView and ListView.

* Closes: #39341

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Arrays/ArrayDataConcatenator.cs           | 102 +++++-
 .../Arrays/ArrowArrayBuilderFactory.cs        |   6 +
 .../Apache.Arrow/Arrays/ArrowArrayFactory.cs  |   6 +
 .../Apache.Arrow/Arrays/BinaryViewArray.cs    | 344 ++++++++++++++++++
 .../src/Apache.Arrow/Arrays/ListViewArray.cs  | 217 +++++++++++
 .../Apache.Arrow/Arrays/StringViewArray.cs    | 110 ++++++
 .../src/Apache.Arrow/C/CArrowArrayExporter.cs |  22 +-
 .../src/Apache.Arrow/C/CArrowArrayImporter.cs |  48 +++
 .../Apache.Arrow/C/CArrowSchemaExporter.cs    |   3 +
 .../Apache.Arrow/C/CArrowSchemaImporter.cs    |   6 +-
 .../Extensions/ArrayDataExtensions.cs         |  11 +
 .../Extensions/FlatbufExtensions.cs           |  19 -
 csharp/src/Apache.Arrow/Flatbuf/BinaryView.cs |  47 +++
 .../Flatbuf/Enums/MetadataVersion.cs          |  12 +-
 csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs |  16 +
 csharp/src/Apache.Arrow/Flatbuf/Field.cs      |   4 +
 .../src/Apache.Arrow/Flatbuf/LargeListView.cs |  42 +++
 csharp/src/Apache.Arrow/Flatbuf/ListView.cs   |  43 +++
 .../src/Apache.Arrow/Flatbuf/RecordBatch.cs   |  37 +-
 .../src/Apache.Arrow/Flatbuf/SparseTensor.cs  |   4 +
 csharp/src/Apache.Arrow/Flatbuf/Tensor.cs     |   4 +
 csharp/src/Apache.Arrow/Flatbuf/Utf8View.cs   |  47 +++
 .../Ipc/ArrowReaderImplementation.cs          |  79 ++--
 .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs |  57 ++-
 .../Ipc/ArrowTypeFlatbufferBuilder.cs         |  39 ++
 .../src/Apache.Arrow/Ipc/MessageSerializer.cs |  10 +
 csharp/src/Apache.Arrow/Scalars/BinaryView.cs | 111 ++++++
 .../src/Apache.Arrow/Types/BinaryViewType.cs  |  28 ++
 csharp/src/Apache.Arrow/Types/IArrowType.cs   |   3 +
 csharp/src/Apache.Arrow/Types/ListViewType.cs |  35 ++
 .../src/Apache.Arrow/Types/StringViewType.cs  |  28 ++
 .../ArrowWriterBenchmark.cs                   |   2 +-
 .../Apache.Arrow.IntegrationTest/JsonFile.cs  | 156 +++++++-
 .../Properties/launchSettings.json            |   8 +
 .../ArrowArrayConcatenatorTests.cs            |  89 +++++
 .../Apache.Arrow.Tests/ArrowReaderVerifier.cs |  61 ++++
 .../Apache.Arrow.Tests/BinaryViewTests.cs     |  89 +++++
 .../CDataInterfacePythonTests.cs              |   4 +-
 csharp/test/Apache.Arrow.Tests/TableTests.cs  |   6 +-
 csharp/test/Apache.Arrow.Tests/TestData.cs    | 198 ++++++++--
 dev/archery/archery/integration/datagen.py    |   3 +-
 docs/source/status.rst                        |  10 +-
 42 files changed, 2017 insertions(+), 149 deletions(-)
 create mode 100644 csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs
 create mode 100644 csharp/src/Apache.Arrow/Arrays/ListViewArray.cs
 create mode 100644 csharp/src/Apache.Arrow/Arrays/StringViewArray.cs
 create mode 100644 csharp/src/Apache.Arrow/Flatbuf/BinaryView.cs
 create mode 100644 csharp/src/Apache.Arrow/Flatbuf/LargeListView.cs
 create mode 100644 csharp/src/Apache.Arrow/Flatbuf/ListView.cs
 create mode 100644 csharp/src/Apache.Arrow/Flatbuf/Utf8View.cs
 create mode 100644 csharp/src/Apache.Arrow/Scalars/BinaryView.cs
 create mode 100644 csharp/src/Apache.Arrow/Types/BinaryViewType.cs
 create mode 100644 csharp/src/Apache.Arrow/Types/ListViewType.cs
 create mode 100644 csharp/src/Apache.Arrow/Types/StringViewType.cs
 create mode 100644 csharp/test/Apache.Arrow.IntegrationTest/Properties/launchSettings.json
 create mode 100644 csharp/test/Apache.Arrow.Tests/BinaryViewTests.cs

diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
index 463ca49e29c94..698d74e4bac84 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
@@ -14,6 +14,7 @@
 // limitations under the License.
 
 using Apache.Arrow.Memory;
+using Apache.Arrow.Scalars;
 using Apache.Arrow.Types;
 using System;
 using System.Collections.Generic;
@@ -46,8 +47,11 @@ private class ArrayDataConcatenationVisitor :
             IArrowTypeVisitor<BooleanType>,
             IArrowTypeVisitor<FixedWidthType>,
             IArrowTypeVisitor<BinaryType>,
+            IArrowTypeVisitor<BinaryViewType>,
             IArrowTypeVisitor<StringType>,
+            IArrowTypeVisitor<StringViewType>,
             IArrowTypeVisitor<ListType>,
+            IArrowTypeVisitor<ListViewType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<StructType>,
             IArrowTypeVisitor<UnionType>,
@@ -84,17 +88,50 @@ public void Visit(FixedWidthType type)
             {
                 CheckData(type, 2);
                 ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
-                ArrowBuffer valueBuffer = ConcatenateFixedWidthTypeValueBuffer(type);
+                ArrowBuffer valueBuffer = ConcatenateFixedWidthTypeValueBuffer(1, type);
 
                 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer });
             }
 
             public void Visit(BinaryType type) => ConcatenateVariableBinaryArrayData(type);
 
+            public void Visit(BinaryViewType type) => ConcatenateBinaryViewArrayData(type);
+
             public void Visit(StringType type) => ConcatenateVariableBinaryArrayData(type);
 
+            public void Visit(StringViewType type) => ConcatenateBinaryViewArrayData(type);
+
             public void Visit(ListType type) => ConcatenateLists(type);
 
+            public void Visit(ListViewType type)
+            {
+                CheckData(type, 3);
+                ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
+
+                var offsetsBuilder = new ArrowBuffer.Builder<int>(_totalLength);
+                int baseOffset = 0;
+
+                foreach (ArrayData arrayData in _arrayDataList)
+                {
+                    if (arrayData.Length > 0)
+                    {
+                        ReadOnlySpan<int> span = arrayData.Buffers[1].Span.CastTo<int>().Slice(0, arrayData.Length);
+                        foreach (int offset in span)
+                        {
+                            offsetsBuilder.Append(baseOffset + offset);
+                        }
+                    }
+
+                    baseOffset += arrayData.Children[0].Length;
+                }
+
+                ArrowBuffer offsetBuffer = offsetsBuilder.Build(_allocator);
+                ArrowBuffer sizesBuffer = ConcatenateFixedWidthTypeValueBuffer(2, Int32Type.Default);
+                ArrayData child = Concatenate(SelectChildren(0), _allocator);
+
+                Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, sizesBuffer }, new[] { child });
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 CheckData(type, 1);
@@ -161,6 +198,15 @@ private void CheckData(IArrowType type, int expectedBufferCount)
                 }
             }
 
+            private void CheckDataVariadicCount(IArrowType type, int expectedBufferCount)
+            {
+                foreach (ArrayData arrayData in _arrayDataList)
+                {
+                    arrayData.EnsureDataType(type.TypeId);
+                    arrayData.EnsureVariadicBufferCount(expectedBufferCount);
+                }
+            }
+
             private void ConcatenateVariableBinaryArrayData(IArrowType type)
             {
                 CheckData(type, 3);
@@ -171,6 +217,26 @@ private void ConcatenateVariableBinaryArrayData(IArrowType type)
                 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer });
             }
 
+            private void ConcatenateBinaryViewArrayData(IArrowType type)
+            {
+                CheckDataVariadicCount(type, 2);
+                ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
+                ArrowBuffer viewBuffer = ConcatenateViewBuffer(out int variadicBufferCount);
+                ArrowBuffer[] buffers = new ArrowBuffer[2 + variadicBufferCount];
+                buffers[0] = validityBuffer;
+                buffers[1] = viewBuffer;
+                int index = 2;
+                foreach (ArrayData arrayData in _arrayDataList)
+                {
+                    for (int i = 2; i < arrayData.Buffers.Length; i++)
+                    {
+                        buffers[index++] = arrayData.Buffers[i];
+                    }
+                }
+
+                Result = new ArrayData(type, _totalLength, _totalNullCount, 0, buffers);
+            }
+
             private void ConcatenateLists(NestedType type)
             {
                 CheckData(type, 2);
@@ -206,7 +272,7 @@ private ArrowBuffer ConcatenateBitmapBuffer(int bufferIndex)
                 return builder.Build(_allocator);
             }
 
-            private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(FixedWidthType type)
+            private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(int bufferIndex, FixedWidthType type)
             {
                 int typeByteWidth = type.BitWidth / 8;
                 var builder = new ArrowBuffer.Builder<byte>(_totalLength * typeByteWidth);
@@ -216,7 +282,7 @@ private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(FixedWidthType type)
                     int length = arrayData.Length;
                     int byteLength = length * typeByteWidth;
 
-                    builder.Append(arrayData.Buffers[1].Span.Slice(0, byteLength));
+                    builder.Append(arrayData.Buffers[bufferIndex].Span.Slice(0, byteLength));
                 }
 
                 return builder.Build(_allocator);
@@ -265,6 +331,36 @@ private ArrowBuffer ConcatenateOffsetBuffer()
                 return builder.Build(_allocator);
             }
 
+            private ArrowBuffer ConcatenateViewBuffer(out int variadicBufferCount)
+            {
+                var builder = new ArrowBuffer.Builder<BinaryView>(_totalLength);
+                variadicBufferCount = 0;
+                foreach (ArrayData arrayData in _arrayDataList)
+                {
+                    if (arrayData.Length == 0)
+                    {
+                        continue;
+                    }
+
+                    ReadOnlySpan<BinaryView> span = arrayData.Buffers[1].Span.CastTo<BinaryView>().Slice(0, arrayData.Length);
+                    foreach (BinaryView view in span)
+                    {
+                        if (view.Length > BinaryView.MaxInlineLength)
+                        {
+                            builder.Append(view.AdjustBufferIndex(variadicBufferCount));
+                        }
+                        else
+                        {
+                            builder.Append(view);
+                        }
+                    }
+
+                    variadicBufferCount += (arrayData.Buffers.Length - 2);
+                }
+
+                return builder.Build(_allocator);
+            }
+
             private ArrowBuffer ConcatenateUnionTypeBuffer()
             {
                 var builder = new ArrowBuffer.Builder<byte>(_totalLength);
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
index af5a524798396..f8367102082f5 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
@@ -54,8 +54,12 @@ internal static IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>>
                     return new DoubleArray.Builder();
                 case ArrowTypeId.String:
                     return new StringArray.Builder();
+                case ArrowTypeId.StringView:
+                    return new StringViewArray.Builder();
                 case ArrowTypeId.Binary:
                     return new BinaryArray.Builder();
+                case ArrowTypeId.BinaryView:
+                    return new BinaryViewArray.Builder();
                 case ArrowTypeId.Timestamp:
                     return new TimestampArray.Builder();
                 case ArrowTypeId.Date64:
@@ -70,6 +74,8 @@ internal static IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>>
                     return new DurationArray.Builder(dataType as DurationType);
                 case ArrowTypeId.List:
                     return new ListArray.Builder(dataType as ListType);
+                case ArrowTypeId.ListView:
+                    return new ListViewArray.Builder(dataType as ListViewType);
                 case ArrowTypeId.FixedSizeList:
                     return new FixedSizeListArray.Builder(dataType as FixedSizeListType);
                 case ArrowTypeId.Decimal128:
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
index d6577260bb82d..3d2ab1d2129f1 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
@@ -51,14 +51,20 @@ public static IArrowArray BuildArray(ArrayData data)
                     return new DoubleArray(data);
                 case ArrowTypeId.String:
                     return new StringArray(data);
+                case ArrowTypeId.StringView:
+                    return new StringViewArray(data);
                 case ArrowTypeId.FixedSizedBinary:
                     return new FixedSizeBinaryArray(data);
                 case ArrowTypeId.Binary:
                     return new BinaryArray(data);
+                case ArrowTypeId.BinaryView:
+                    return new BinaryViewArray(data);
                 case ArrowTypeId.Timestamp:
                     return new TimestampArray(data);
                 case ArrowTypeId.List:
                     return new ListArray(data);
+                case ArrowTypeId.ListView:
+                    return new ListViewArray(data);
                 case ArrowTypeId.Map:
                     return new MapArray(data);
                 case ArrowTypeId.Struct:
diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs b/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs
new file mode 100644
index 0000000000000..4f62dffd1ddeb
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs
@@ -0,0 +1,344 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using Apache.Arrow.Memory;
+using Apache.Arrow.Scalars;
+using Apache.Arrow.Types;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Collections;
+
+namespace Apache.Arrow
+{
+    public class BinaryViewArray : Array, IReadOnlyList<byte[]>
+    {
+        public class Builder : BuilderBase<BinaryViewArray, Builder>
+        {
+            public Builder() : base(BinaryViewType.Default) { }
+            public Builder(IArrowType dataType) : base(dataType) { }
+
+            protected override BinaryViewArray Build(ArrayData data)
+            {
+                return new BinaryViewArray(data);
+            }
+        }
+
+        public BinaryViewArray(ArrayData data)
+            : base(data)
+        {
+            data.EnsureDataType(ArrowTypeId.BinaryView);
+            data.EnsureVariadicBufferCount(2);
+        }
+
+        public BinaryViewArray(ArrowTypeId typeId, ArrayData data)
+            : base(data)
+        {
+            data.EnsureDataType(typeId);
+            data.EnsureVariadicBufferCount(2);
+        }
+
+        public abstract class BuilderBase<TArray, TBuilder> : IArrowArrayBuilder<byte, TArray, TBuilder>
+            where TArray : IArrowArray
+            where TBuilder : class, IArrowArrayBuilder<byte, TArray, TBuilder>
+        {
+            protected IArrowType DataType { get; }
+            protected TBuilder Instance => this as TBuilder;
+            protected ArrowBuffer.Builder<BinaryView> BinaryViews { get; }
+            protected ArrowBuffer.Builder<byte> ValueBuffer { get; }
+            protected ArrowBuffer.BitmapBuilder ValidityBuffer { get; }
+            protected int NullCount => this.ValidityBuffer.UnsetBitCount;
+
+            protected BuilderBase(IArrowType dataType)
+            {
+                DataType = dataType;
+                BinaryViews = new ArrowBuffer.Builder<BinaryView>();
+                ValueBuffer = new ArrowBuffer.Builder<byte>();
+                ValidityBuffer = new ArrowBuffer.BitmapBuilder();
+            }
+
+            protected abstract TArray Build(ArrayData data);
+
+            /// <summary>
+            /// Gets the length of the array built so far.
+            /// </summary>
+            public int Length => BinaryViews.Length;
+
+            /// <summary>
+            /// Build an Arrow array from the appended contents so far.
+            /// </summary>
+            /// <param name="allocator">Optional memory allocator.</param>
+            /// <returns>Returns an array of type <typeparamref name="TArray"/>.</returns>
+            public TArray Build(MemoryAllocator allocator = default)
+            {
+                bool hasValues = ValueBuffer.Length > 0;
+                var bufs = new ArrowBuffer[hasValues ? 3 : 2];
+                bufs[0] = NullCount > 0 ? ValidityBuffer.Build(allocator) : ArrowBuffer.Empty;
+                bufs[1] = BinaryViews.Build(allocator);
+                if (hasValues) { bufs[2] = ValueBuffer.Build(allocator); }
+
+                var data = new ArrayData(
+                    DataType,
+                    length: Length,
+                    NullCount,
+                    offset: 0,
+                    bufs);
+
+                return Build(data);
+            }
+
+            /// <summary>
+            /// Append a single null value to the array.
+            /// </summary>
+            /// <returns>Returns the builder (for fluent-style composition).</returns>
+            public TBuilder AppendNull()
+            {
+                // Do not add to the value buffer in the case of a null.
+                // Note that we do not need to increment the offset as a result.
+                ValidityBuffer.Append(false);
+                BinaryViews.Append(default(BinaryView));
+                return Instance;
+            }
+
+            /// <summary>
+            /// Appends a value, consisting of a single byte, to the array.
+            /// </summary>
+            /// <param name="value">Byte value to append.</param>
+            /// <returns>Returns the builder (for fluent-style composition).</returns>
+            public TBuilder Append(byte value)
+            {
+                ValidityBuffer.Append(true);
+                Span<byte> buf = stackalloc[] { value };
+                BinaryViews.Append(new BinaryView(buf));
+                return Instance;
+            }
+
+            /// <summary>
+            /// Append a value, consisting of a span of bytes, to the array.
+            /// </summary>
+            /// <remarks>
+            /// Note that a single value is added, which consists of arbitrarily many bytes.  If multiple values are
+            /// to be added, use the <see cref="AppendRange"/> method.
+            /// </remarks>
+            /// <param name="span">Span of bytes to add.</param>
+            /// <returns>Returns the builder (for fluent-style composition).</returns>
+            public TBuilder Append(ReadOnlySpan<byte> span)
+            {
+                if (span.Length > BinaryView.MaxInlineLength)
+                {
+                    int offset = ValueBuffer.Length;
+                    ValueBuffer.Append(span);
+                    BinaryViews.Append(new BinaryView(span.Length, span.Slice(0, 4), 0, offset));
+                }
+                else
+                {
+                    BinaryViews.Append(new BinaryView(span));
+                }
+                ValidityBuffer.Append(true);
+                return Instance;
+            }
+
+            /// <summary>
+            /// Append an enumerable collection of single-byte values to the array.
+            /// </summary>
+            /// <remarks>
+            /// Note that this method appends multiple values, each of which is a single byte
+            /// </remarks>
+            /// <param name="values">Single-byte values to add.</param>
+            /// <returns>Returns the builder (for fluent-style composition).</returns>
+            public TBuilder AppendRange(IEnumerable<byte> values)
+            {
+                if (values == null)
+                {
+                    throw new ArgumentNullException(nameof(values));
+                }
+
+                foreach (byte b in values)
+                {
+                    Append(b);
+                }
+
+                return Instance;
+            }
+
+            /// <summary>
+            /// Append an enumerable collection of values to the array.
+            /// </summary>
+            /// <param name="values">Values to add.</param>
+            /// <returns>Returns the builder (for fluent-style composition).</returns>
+            public TBuilder AppendRange(IEnumerable<byte[]> values)
+            {
+                if (values == null)
+                {
+                    throw new ArgumentNullException(nameof(values));
+                }
+
+                foreach (byte[] arr in values)
+                {
+                    if (arr == null)
+                    {
+                        AppendNull();
+                    }
+                    else
+                    {
+                        Append((ReadOnlySpan<byte>)arr);
+                    }
+                }
+
+                return Instance;
+            }
+
+            public TBuilder Reserve(int capacity)
+            {
+                // TODO: [ARROW-9366] Reserve capacity in the value buffer in a more sensible way.
+                BinaryViews.Reserve(capacity);
+                ValueBuffer.Reserve(capacity);
+                ValidityBuffer.Reserve(capacity);
+                return Instance;
+            }
+
+            public TBuilder Resize(int length)
+            {
+                // TODO: [ARROW-9366] Resize the value buffer to a safe length based on offsets, not `length`.
+                BinaryViews.Resize(length);
+                ValueBuffer.Resize(length);
+                ValidityBuffer.Resize(length);
+                return Instance;
+            }
+
+            public TBuilder Swap(int i, int j)
+            {
+                ValidityBuffer.Swap(i, j);
+                BinaryView view = BinaryViews.Span[i];
+                BinaryViews.Span[i] = BinaryViews.Span[j];
+                BinaryViews.Span[j] = view;
+                return Instance;
+            }
+
+            public TBuilder Set(int index, byte value)
+            {
+                // TODO: Implement
+                throw new NotImplementedException();
+            }
+
+            /// <summary>
+            /// Clear all contents appended so far.
+            /// </summary>
+            /// <returns>Returns the builder (for fluent-style composition).</returns>
+            public TBuilder Clear()
+            {
+                BinaryViews.Clear();
+                ValueBuffer.Clear();
+                ValidityBuffer.Clear();
+                return Instance;
+            }
+        }
+
+        public BinaryViewArray(IArrowType dataType, int length,
+            ArrowBuffer binaryViewsBuffer,
+            ArrowBuffer dataBuffer,
+            ArrowBuffer nullBitmapBuffer,
+            int nullCount = 0, int offset = 0)
+        : this(new ArrayData(dataType, length, nullCount, offset,
+            new[] { nullBitmapBuffer, binaryViewsBuffer, dataBuffer }))
+        { }
+
+        public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+
+        public ArrowBuffer ViewsBuffer => Data.Buffers[1];
+
+        public int DataBufferCount => Data.Buffers.Length - 2;
+
+        public ArrowBuffer DataBuffer(int index) => Data.Buffers[index + 2];
+
+        public ReadOnlySpan<BinaryView> Views => ViewsBuffer.Span.CastTo<BinaryView>().Slice(Offset, Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public int GetValueLength(int index)
+        {
+            if (index < 0 || index >= Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+            if (!IsValid(index))
+            {
+                return 0;
+            }
+
+            return Views[index].Length;
+        }
+
+        /// <summary>
+        /// Get the collection of bytes, as a read-only span, at a given index in the array.
+        /// </summary>
+        /// <remarks>
+        /// Note that this method cannot reliably identify null values, which are indistinguishable from empty byte
+        /// collection values when seen in the context of this method's return type of <see cref="ReadOnlySpan{Byte}"/>.
+        /// Use the <see cref="Array.IsNull"/> method or the <see cref="GetBytes(int, out bool)"/> overload instead
+        /// to reliably determine null values.
+        /// </remarks>
+        /// <param name="index">Index at which to get bytes.</param>
+        /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array.
+        /// </exception>
+        public ReadOnlySpan<byte> GetBytes(int index) => GetBytes(index, out _);
+
+        /// <summary>
+        /// Get the collection of bytes, as a read-only span, at a given index in the array.
+        /// </summary>
+        /// <param name="index">Index at which to get bytes.</param>
+        /// <param name="isNull">Set to <see langword="true"/> if the value at the given index is null.</param>
+        /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array.
+        /// </exception>
+        public ReadOnlySpan<byte> GetBytes(int index, out bool isNull)
+        {
+            if (index < 0 || index >= Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            isNull = IsNull(index);
+
+            if (isNull)
+            {
+                // Note that `return null;` is valid syntax, but would be misleading as `null` in the context of a span
+                // is actually returned as an empty span.
+                return ReadOnlySpan<byte>.Empty;
+            }
+
+            BinaryView binaryView = Views[index];
+            if (binaryView.IsInline)
+            {
+                return ViewsBuffer.Span.Slice(16 * index + 4, binaryView.Length);
+            }
+
+            return DataBuffer(binaryView._bufferIndex).Span.Slice(binaryView._bufferOffset, binaryView.Length);
+        }
+
+        int IReadOnlyCollection<byte[]>.Count => Length;
+        byte[] IReadOnlyList<byte[]>.this[int index] => GetBytes(index).ToArray();
+
+        IEnumerator<byte[]> IEnumerable<byte[]>.GetEnumerator()
+        {
+            for (int index = 0; index < Length; index++)
+            {
+                yield return GetBytes(index).ToArray();
+            }
+        }
+
+        IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<byte[]>)this).GetEnumerator();
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Arrays/ListViewArray.cs b/csharp/src/Apache.Arrow/Arrays/ListViewArray.cs
new file mode 100644
index 0000000000000..081385d9211a4
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/ListViewArray.cs
@@ -0,0 +1,217 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Memory;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow
+{
+    public class ListViewArray : Array
+    {
+        public class Builder : IArrowArrayBuilder<ListViewArray, Builder>
+        {
+            public IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>> ValueBuilder { get; }
+
+            public int Length => ValueOffsetsBufferBuilder.Length;
+
+            private ArrowBuffer.Builder<int> ValueOffsetsBufferBuilder { get; }
+
+            private ArrowBuffer.Builder<int> SizesBufferBuilder { get; }
+
+            private ArrowBuffer.BitmapBuilder ValidityBufferBuilder { get; }
+
+            public int NullCount { get; protected set; }
+
+            private IArrowType DataType { get; }
+
+            private int Start { get; set; }
+
+            public Builder(IArrowType valueDataType) : this(new ListViewType(valueDataType))
+            {
+            }
+
+            public Builder(Field valueField) : this(new ListViewType(valueField))
+            {
+            }
+
+            internal Builder(ListViewType dataType)
+            {
+                ValueBuilder = ArrowArrayBuilderFactory.Build(dataType.ValueDataType);
+                ValueOffsetsBufferBuilder = new ArrowBuffer.Builder<int>();
+                SizesBufferBuilder = new ArrowBuffer.Builder<int>();
+                ValidityBufferBuilder = new ArrowBuffer.BitmapBuilder();
+                DataType = dataType;
+                Start = -1;
+            }
+
+            /// <summary>
+            /// Start a new variable-length list slot
+            ///
+            /// This function should be called before beginning to append elements to the
+            /// value builder. TODO: Consider adding builder APIs to support construction
+            /// of overlapping lists.
+            /// </summary>
+            public Builder Append()
+            {
+                AppendPrevious();
+
+                ValidityBufferBuilder.Append(true);
+
+                return this;
+            }
+
+            public Builder AppendNull()
+            {
+                AppendPrevious();
+
+                ValidityBufferBuilder.Append(false);
+                ValueOffsetsBufferBuilder.Append(Start);
+                SizesBufferBuilder.Append(0);
+                NullCount++;
+                Start = -1;
+
+                return this;
+            }
+
+            private void AppendPrevious()
+            {
+                if (Start >= 0)
+                {
+                    ValueOffsetsBufferBuilder.Append(Start);
+                    SizesBufferBuilder.Append(ValueBuilder.Length - Start);
+                }
+                Start = ValueBuilder.Length;
+            }
+
+            public ListViewArray Build(MemoryAllocator allocator = default)
+            {
+                AppendPrevious();
+
+                ArrowBuffer validityBuffer = NullCount > 0
+                                        ? ValidityBufferBuilder.Build(allocator)
+                                        : ArrowBuffer.Empty;
+
+                return new ListViewArray(DataType, Length,
+                    ValueOffsetsBufferBuilder.Build(allocator), SizesBufferBuilder.Build(allocator),
+                    ValueBuilder.Build(allocator),
+                    validityBuffer, NullCount, 0);
+            }
+
+            public Builder Reserve(int capacity)
+            {
+                ValueOffsetsBufferBuilder.Reserve(capacity);
+                SizesBufferBuilder.Reserve(capacity);
+                ValidityBufferBuilder.Reserve(capacity);
+                return this;
+            }
+
+            public Builder Resize(int length)
+            {
+                ValueOffsetsBufferBuilder.Resize(length);
+                SizesBufferBuilder.Resize(length);
+                ValidityBufferBuilder.Resize(length);
+                return this;
+            }
+
+            public Builder Clear()
+            {
+                ValueOffsetsBufferBuilder.Clear();
+                SizesBufferBuilder.Clear();
+                ValueBuilder.Clear();
+                ValidityBufferBuilder.Clear();
+                return this;
+            }
+
+        }
+
+        public IArrowArray Values { get; }
+
+        public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1];
+
+        public ReadOnlySpan<int> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<int>().Slice(Offset, Length);
+
+        public ArrowBuffer SizesBuffer => Data.Buffers[2];
+
+        public ReadOnlySpan<int> Sizes => SizesBuffer.Span.CastTo<int>().Slice(Offset, Length);
+
+        public ListViewArray(IArrowType dataType, int length,
+            ArrowBuffer valueOffsetsBuffer, ArrowBuffer sizesBuffer, IArrowArray values,
+            ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0)
+            : this(new ArrayData(dataType, length, nullCount, offset,
+                new[] { nullBitmapBuffer, valueOffsetsBuffer, sizesBuffer }, new[] { values.Data }),
+                values)
+        {
+        }
+
+        public ListViewArray(ArrayData data)
+            : this(data, ArrowArrayFactory.BuildArray(data.Children[0]))
+        {
+        }
+
+        private ListViewArray(ArrayData data, IArrowArray values) : base(data)
+        {
+            data.EnsureBufferCount(3);
+            data.EnsureDataType(ArrowTypeId.ListView);
+            Values = values;
+        }
+
+        public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+
+        public int GetValueLength(int index)
+        {
+            if (index < 0 || index >= Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            if (IsNull(index))
+            {
+                return 0;
+            }
+
+            return Sizes[index];
+        }
+
+        public IArrowArray GetSlicedValues(int index)
+        {
+            if (index < 0 || index >= Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            if (IsNull(index))
+            {
+                return null;
+            }
+
+            if (!(Values is Array array))
+            {
+                return default;
+            }
+
+            return array.Slice(ValueOffsets[index], GetValueLength(index));
+        }
+
+        protected override void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                Values?.Dispose();
+            }
+            base.Dispose(disposing);
+        }
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Arrays/StringViewArray.cs b/csharp/src/Apache.Arrow/Arrays/StringViewArray.cs
new file mode 100644
index 0000000000000..88644761535d9
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/StringViewArray.cs
@@ -0,0 +1,110 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using Apache.Arrow.Types;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace Apache.Arrow
+{
+    public class StringViewArray: BinaryViewArray, IReadOnlyList<string>
+    {
+        public static readonly Encoding DefaultEncoding = Encoding.UTF8;
+
+        public new class Builder : BuilderBase<StringViewArray, Builder>
+        {
+            public Builder() : base(StringViewType.Default) { }
+
+            protected override StringViewArray Build(ArrayData data)
+            {
+                return new StringViewArray(data);
+            }
+
+            public Builder Append(string value, Encoding encoding = null)
+            {
+                if (value == null)
+                {
+                    return AppendNull();
+                }
+                encoding = encoding ?? DefaultEncoding;
+                byte[] span = encoding.GetBytes(value);
+                return Append(span.AsSpan());
+            }
+
+            public Builder AppendRange(IEnumerable<string> values, Encoding encoding = null)
+            {
+                foreach (string value in values)
+                {
+                    Append(value, encoding);
+                }
+
+                return this;
+            }
+        }
+
+        public StringViewArray(ArrayData data)
+            : base(ArrowTypeId.StringView, data) { }
+
+        public StringViewArray(int length,
+            ArrowBuffer valueOffsetsBuffer,
+            ArrowBuffer dataBuffer,
+            ArrowBuffer nullBitmapBuffer,
+            int nullCount = 0, int offset = 0)
+            : this(new ArrayData(StringViewType.Default, length, nullCount, offset,
+                new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer }))
+        { }
+
+        public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+
+        public string GetString(int index, Encoding encoding = default)
+        {
+            encoding ??= DefaultEncoding;
+
+            ReadOnlySpan<byte> bytes = GetBytes(index, out bool isNull);
+
+            if (isNull)
+            {
+                return null;
+            }
+            if (bytes.Length == 0)
+            {
+                return string.Empty;
+            }
+
+            unsafe
+            {
+                fixed (byte* data = &MemoryMarshal.GetReference(bytes))
+                    return encoding.GetString(data, bytes.Length);
+            }
+        }
+
+        int IReadOnlyCollection<string>.Count => Length;
+
+        string IReadOnlyList<string>.this[int index] => GetString(index);
+
+        IEnumerator<string> IEnumerable<string>.GetEnumerator()
+        {
+            for (int index = 0; index < Length; index++)
+            {
+                yield return GetString(index);
+            };
+        }
+
+        IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<string>)this).GetEnumerator();
+    }
+}
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
index 2d9febea33f54..03059eaf5d4df 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
@@ -15,10 +15,12 @@
 
 
 using System;
+using System.Buffers;
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using Apache.Arrow.Memory;
+using Apache.Arrow.Types;
 
 namespace Apache.Arrow.C
 {
@@ -121,7 +123,16 @@ private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, Arr
             cArray->buffers = null;
             if (cArray->n_buffers > 0)
             {
-                cArray->buffers = (byte**)sharedOwner.Allocate(array.Buffers.Length * IntPtr.Size);
+                long* lengths = null;
+                int bufferCount = array.Buffers.Length;
+                if (array.DataType.TypeId == ArrowTypeId.BinaryView || array.DataType.TypeId == ArrowTypeId.StringView)
+                {
+                    lengths = (long*)sharedOwner.Allocate(8 * bufferCount); // overallocation to avoid edge case
+                    bufferCount++;
+                    cArray->n_buffers++;
+                }
+
+                cArray->buffers = (byte**)sharedOwner.Allocate(bufferCount * IntPtr.Size);
                 for (int i = 0; i < array.Buffers.Length; i++)
                 {
                     ArrowBuffer buffer = array.Buffers[i];
@@ -131,6 +142,15 @@ private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, Arr
                         throw new NotSupportedException($"An ArrowArray of type {array.DataType.TypeId} could not be exported: failed on buffer #{i}");
                     }
                     cArray->buffers[i] = (byte*)ptr;
+                    if (lengths != null && i >= 2)
+                    {
+                        lengths[i - 2] = array.Buffers[i].Length;
+                    }
+                }
+
+                if (lengths != null)
+                {
+                    cArray->buffers[array.Buffers.Length] = (byte*)lengths;
                 }
             }
 
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
index 1b40ec49658bb..fbb2be661fc5d 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
@@ -157,10 +157,18 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type)
                     case ArrowTypeId.Binary:
                         buffers = ImportByteArrayBuffers(cArray);
                         break;
+                    case ArrowTypeId.StringView:
+                    case ArrowTypeId.BinaryView:
+                        buffers = ImportByteArrayViewBuffers(cArray);
+                        break;
                     case ArrowTypeId.List:
                         children = ProcessListChildren(cArray, ((ListType)type).ValueDataType);
                         buffers = ImportListBuffers(cArray);
                         break;
+                    case ArrowTypeId.ListView:
+                        children = ProcessListChildren(cArray, ((ListViewType)type).ValueDataType);
+                        buffers = ImportListViewBuffers(cArray);
+                        break;
                     case ArrowTypeId.FixedSizeList:
                         children = ProcessListChildren(cArray, ((FixedSizeListType)type).ValueDataType);
                         buffers = ImportFixedSizeListBuffers(cArray);
@@ -268,6 +276,28 @@ private ArrowBuffer[] ImportByteArrayBuffers(CArrowArray* cArray)
                 return buffers;
             }
 
+            private ArrowBuffer[] ImportByteArrayViewBuffers(CArrowArray* cArray)
+            {
+                if (cArray->n_buffers < 3)
+                {
+                    throw new InvalidOperationException("Byte array views are expected to have at least three buffers");
+                }
+
+                int length = checked((int)cArray->length);
+                int viewsLength = length * 16;
+
+                long* bufferLengths = (long*)cArray->buffers[cArray->n_buffers - 1];
+                ArrowBuffer[] buffers = new ArrowBuffer[cArray->n_buffers - 1];
+                buffers[0] = ImportValidityBuffer(cArray);
+                buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, viewsLength));
+                for (int i = 2; i < buffers.Length; i++)
+                {
+                    buffers[i] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[i], 0, checked((int)bufferLengths[i - 2])));
+                }
+
+                return buffers;
+            }
+
             private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray)
             {
                 if (cArray->n_buffers != 2)
@@ -285,6 +315,24 @@ private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray)
                 return buffers;
             }
 
+            private ArrowBuffer[] ImportListViewBuffers(CArrowArray* cArray)
+            {
+                if (cArray->n_buffers != 3)
+                {
+                    throw new InvalidOperationException("List view arrays are expected to have exactly three buffers");
+                }
+
+                int length = checked((int)cArray->length);
+                int offsetsLength = length * 4;
+
+                ArrowBuffer[] buffers = new ArrowBuffer[3];
+                buffers[0] = ImportValidityBuffer(cArray);
+                buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, offsetsLength));
+                buffers[2] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[2], 0, offsetsLength));
+
+                return buffers;
+            }
+
             private ArrowBuffer[] ImportFixedSizeListBuffers(CArrowArray* cArray)
             {
                 if (cArray->n_buffers != 1)
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
index c9b45a8eb2d87..3bb7134af3ba9 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
@@ -167,7 +167,9 @@ private static string GetFormat(IArrowType datatype)
                     return $"d:{decimalType.Precision},{decimalType.Scale},256";
                 // Binary
                 case BinaryType _: return "z";
+                case BinaryViewType _: return "vz";
                 case StringType _: return "u";
+                case StringViewType _: return "vu";
                 case FixedSizeBinaryType binaryType:
                     return $"w:{binaryType.ByteWidth}";
                 // Date
@@ -196,6 +198,7 @@ private static string GetFormat(IArrowType datatype)
                     };
                 // Nested
                 case ListType _: return "+l";
+                case ListViewType _: return "+vl";
                 case FixedSizeListType fixedListType:
                     return $"+w:{fixedListType.ListSize}";
                 case StructType _: return "+s";
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
index 9c81195771bae..f1acc007bcef7 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
@@ -165,7 +165,7 @@ public ArrowType GetAsType()
                 }
 
                 // Special handling for nested types
-                if (format == "+l")
+                if (format == "+l" || format == "+vl")
                 {
                     if (_cSchema->n_children != 1)
                     {
@@ -180,7 +180,7 @@ public ArrowType GetAsType()
 
                     Field childField = childSchema.GetAsField();
 
-                    return new ListType(childField);
+                    return format[1] == 'v' ? new ListViewType(childField) : new ListType(childField);
                 }
                 else if (format == "+s")
                 {
@@ -303,8 +303,10 @@ public ArrowType GetAsType()
                     "g" => DoubleType.Default,
                     // Binary data
                     "z" => BinaryType.Default,
+                    "vz" => BinaryViewType.Default,
                     //"Z" => new LargeBinaryType() // Not yet implemented
                     "u" => StringType.Default,
+                    "vu" => StringViewType.Default,
                     //"U" => new LargeStringType(), // Not yet implemented
                     // Date and time
                     "tdD" => Date32Type.Default,
diff --git a/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs b/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
index 399d9bf5e6bf1..2b6742a3d0cb2 100644
--- a/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
+++ b/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
@@ -23,6 +23,17 @@ internal static class ArrayDataExtensions
         public static void EnsureBufferCount(this ArrayData data, int count)
         {
             if (data.Buffers.Length != count)
+            {
+                // TODO: Use localizable string resource
+                throw new ArgumentException(
+                    $"Buffer count <{data.Buffers.Length}> must be at exactly <{count}>",
+                    nameof(data.Buffers.Length));
+            }
+        }
+
+        public static void EnsureVariadicBufferCount(this ArrayData data, int count)
+        {
+            if (data.Buffers.Length < count)
             {
                 // TODO: Use localizable string resource
                 throw new ArgumentException(
diff --git a/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs b/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
index 5f39680b90ebc..b44c02d854077 100644
--- a/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
+++ b/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
@@ -19,25 +19,6 @@ namespace Apache.Arrow
 {
     internal static class FlatbufExtensions
     {
-        public static bool IsFixedPrimitive(this Flatbuf.Type t)
-        {
-            if (t == Flatbuf.Type.Utf8 || t == Flatbuf.Type.Binary)
-                return false;
-            return true;
-        }
-
-        public static bool IsFixedPrimitive(this Types.IArrowType t)
-        {
-            return t.TypeId.IsFixedPrimitive();
-        }
-
-        public static bool IsFixedPrimitive(this Types.ArrowTypeId t)
-        {
-            if (t == Types.ArrowTypeId.String || t == Types.ArrowTypeId.Binary)
-                return false;
-            return true;
-        }
-
         public static Types.IntervalUnit ToArrow(this Flatbuf.IntervalUnit unit)
         {
             switch (unit)
diff --git a/csharp/src/Apache.Arrow/Flatbuf/BinaryView.cs b/csharp/src/Apache.Arrow/Flatbuf/BinaryView.cs
new file mode 100644
index 0000000000000..2f9cca51737f8
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Flatbuf/BinaryView.cs
@@ -0,0 +1,47 @@
+// <auto-generated>
+//  automatically generated by the FlatBuffers compiler, do not modify
+// </auto-generated>
+
+namespace Apache.Arrow.Flatbuf
+{
+
+using global::System;
+using global::System.Collections.Generic;
+using global::Google.FlatBuffers;
+
+/// Logically the same as Binary, but the internal representation uses a view
+/// struct that contains the string length and either the string's entire data
+/// inline (for small strings) or an inlined prefix, an index of another buffer,
+/// and an offset pointing to a slice in that buffer (for non-small strings).
+///
+/// Since it uses a variable number of data buffers, each Field with this type
+/// must have a corresponding entry in `variadicBufferCounts`.
+internal struct BinaryView : IFlatbufferObject
+{
+  private Table __p;
+  public ByteBuffer ByteBuffer { get { return __p.bb; } }
+  public static void ValidateVersion() { FlatBufferConstants.FLATBUFFERS_23_5_9(); }
+  public static BinaryView GetRootAsBinaryView(ByteBuffer _bb) { return GetRootAsBinaryView(_bb, new BinaryView()); }
+  public static BinaryView GetRootAsBinaryView(ByteBuffer _bb, BinaryView obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); }
+  public BinaryView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void StartBinaryView(FlatBufferBuilder builder) { builder.StartTable(0); }
+  public static Offset<BinaryView> EndBinaryView(FlatBufferBuilder builder) {
+    int o = builder.EndTable();
+    return new Offset<BinaryView>(o);
+  }
+}
+
+
+static internal class BinaryViewVerify
+{
+  static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos)
+  {
+    return verifier.VerifyTableStart(tablePos)
+      && verifier.VerifyTableEnd(tablePos);
+  }
+}
+
+}
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs b/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs
index 1e893e8cb6ffc..13b5315805dc9 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs
@@ -8,21 +8,21 @@ namespace Apache.Arrow.Flatbuf
 internal enum MetadataVersion : short
 {
   /// 0.1.0 (October 2016).
- V1 = 0,
+  V1 = 0,
   /// 0.2.0 (February 2017). Non-backwards compatible with V1.
- V2 = 1,
+  V2 = 1,
   /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
- V3 = 2,
+  V3 = 2,
   /// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
- V4 = 3,
-  /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
+  V4 = 3,
+  /// >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read V4
   /// metadata and IPC messages). Implementations are recommended to provide a
   /// V4 compatibility mode with V5 format changes disabled.
   ///
   /// Incompatible changes between V4 and V5:
   /// - Union buffer layout has changed. In V5, Unions don't have a validity
   ///   bitmap buffer.
- V5 = 4,
+  V5 = 4,
 };
 
 
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs b/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs
index 10f852efb9b96..9c04288648dea 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs
@@ -33,6 +33,10 @@ internal enum Type : byte
   LargeUtf8 = 20,
   LargeList = 21,
   RunEndEncoded = 22,
+  BinaryView = 23,
+  Utf8View = 24,
+  ListView = 25,
+  LargeListView = 26,
 };
 
 
@@ -110,6 +114,18 @@ static public bool Verify(Google.FlatBuffers.Verifier verifier, byte typeId, uin
       case Type.RunEndEncoded:
         result = RunEndEncodedVerify.Verify(verifier, tablePos);
         break;
+      case Type.BinaryView:
+        result = BinaryViewVerify.Verify(verifier, tablePos);
+        break;
+      case Type.Utf8View:
+        result = Utf8ViewVerify.Verify(verifier, tablePos);
+        break;
+      case Type.ListView:
+        result = ListViewVerify.Verify(verifier, tablePos);
+        break;
+      case Type.LargeListView:
+        result = LargeListViewVerify.Verify(verifier, tablePos);
+        break;
       default: result = true;
         break;
     }
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Field.cs b/csharp/src/Apache.Arrow/Flatbuf/Field.cs
index c5c6c0a165598..efbc6afb06d03 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/Field.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/Field.cs
@@ -57,6 +57,10 @@ internal struct Field : IFlatbufferObject
   public LargeUtf8 TypeAsLargeUtf8() { return Type<LargeUtf8>().Value; }
   public LargeList TypeAsLargeList() { return Type<LargeList>().Value; }
   public RunEndEncoded TypeAsRunEndEncoded() { return Type<RunEndEncoded>().Value; }
+  public BinaryView TypeAsBinaryView() { return Type<BinaryView>().Value; }
+  public Utf8View TypeAsUtf8View() { return Type<Utf8View>().Value; }
+  public ListView TypeAsListView() { return Type<ListView>().Value; }
+  public LargeListView TypeAsLargeListView() { return Type<LargeListView>().Value; }
   /// Present only if the field is dictionary encoded.
   public DictionaryEncoding? Dictionary { get { int o = __p.__offset(12); return o != 0 ? (DictionaryEncoding?)(new DictionaryEncoding()).__assign(__p.__indirect(o + __p.bb_pos), __p.bb) : null; } }
   /// children apply only to nested data types like Struct, List and Union. For
diff --git a/csharp/src/Apache.Arrow/Flatbuf/LargeListView.cs b/csharp/src/Apache.Arrow/Flatbuf/LargeListView.cs
new file mode 100644
index 0000000000000..685e91333c38c
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Flatbuf/LargeListView.cs
@@ -0,0 +1,42 @@
+// <auto-generated>
+//  automatically generated by the FlatBuffers compiler, do not modify
+// </auto-generated>
+
+namespace Apache.Arrow.Flatbuf
+{
+
+using global::System;
+using global::System.Collections.Generic;
+using global::Google.FlatBuffers;
+
+/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent
+/// extremely large data values.
+internal struct LargeListView : IFlatbufferObject
+{
+  private Table __p;
+  public ByteBuffer ByteBuffer { get { return __p.bb; } }
+  public static void ValidateVersion() { FlatBufferConstants.FLATBUFFERS_23_5_9(); }
+  public static LargeListView GetRootAsLargeListView(ByteBuffer _bb) { return GetRootAsLargeListView(_bb, new LargeListView()); }
+  public static LargeListView GetRootAsLargeListView(ByteBuffer _bb, LargeListView obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); }
+  public LargeListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void StartLargeListView(FlatBufferBuilder builder) { builder.StartTable(0); }
+  public static Offset<LargeListView> EndLargeListView(FlatBufferBuilder builder) {
+    int o = builder.EndTable();
+    return new Offset<LargeListView>(o);
+  }
+}
+
+
+static internal class LargeListViewVerify
+{
+  static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos)
+  {
+    return verifier.VerifyTableStart(tablePos)
+      && verifier.VerifyTableEnd(tablePos);
+  }
+}
+
+}
diff --git a/csharp/src/Apache.Arrow/Flatbuf/ListView.cs b/csharp/src/Apache.Arrow/Flatbuf/ListView.cs
new file mode 100644
index 0000000000000..d2e54e428524b
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Flatbuf/ListView.cs
@@ -0,0 +1,43 @@
+// <auto-generated>
+//  automatically generated by the FlatBuffers compiler, do not modify
+// </auto-generated>
+
+namespace Apache.Arrow.Flatbuf
+{
+
+using global::System;
+using global::System.Collections.Generic;
+using global::Google.FlatBuffers;
+
+/// Represents the same logical types that List can, but contains offsets and
+/// sizes allowing for writes in any order and sharing of child values among
+/// list values.
+internal struct ListView : IFlatbufferObject
+{
+  private Table __p;
+  public ByteBuffer ByteBuffer { get { return __p.bb; } }
+  public static void ValidateVersion() { FlatBufferConstants.FLATBUFFERS_23_5_9(); }
+  public static ListView GetRootAsListView(ByteBuffer _bb) { return GetRootAsListView(_bb, new ListView()); }
+  public static ListView GetRootAsListView(ByteBuffer _bb, ListView obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); }
+  public ListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void StartListView(FlatBufferBuilder builder) { builder.StartTable(0); }
+  public static Offset<ListView> EndListView(FlatBufferBuilder builder) {
+    int o = builder.EndTable();
+    return new Offset<ListView>(o);
+  }
+}
+
+
+static internal class ListViewVerify
+{
+  static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos)
+  {
+    return verifier.VerifyTableStart(tablePos)
+      && verifier.VerifyTableEnd(tablePos);
+  }
+}
+
+}
diff --git a/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs b/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs
index 9ab9715165ddc..2df8716bc1655 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs
@@ -38,27 +38,57 @@ internal struct RecordBatch : IFlatbufferObject
   public int BuffersLength { get { int o = __p.__offset(8); return o != 0 ? __p.__vector_len(o) : 0; } }
   /// Optional compression of the message body
   public BodyCompression? Compression { get { int o = __p.__offset(10); return o != 0 ? (BodyCompression?)(new BodyCompression()).__assign(__p.__indirect(o + __p.bb_pos), __p.bb) : null; } }
+  /// Some types such as Utf8View are represented using a variable number of buffers.
+  /// For each such Field in the pre-ordered flattened logical schema, there will be
+  /// an entry in variadicBufferCounts to indicate the number of number of variadic
+  /// buffers which belong to that Field in the current RecordBatch.
+  ///
+  /// For example, the schema
+  ///     col1: Struct<alpha: Int32, beta: BinaryView, gamma: Float64>
+  ///     col2: Utf8View
+  /// contains two Fields with variadic buffers so variadicBufferCounts will have
+  /// two entries, the first counting the variadic buffers of `col1.beta` and the
+  /// second counting `col2`'s.
+  ///
+  /// This field may be omitted if and only if the schema contains no Fields with
+  /// a variable number of buffers, such as BinaryView and Utf8View.
+  public long VariadicBufferCounts(int j) { int o = __p.__offset(12); return o != 0 ? __p.bb.GetLong(__p.__vector(o) + j * 8) : (long)0; }
+  public int VariadicBufferCountsLength { get { int o = __p.__offset(12); return o != 0 ? __p.__vector_len(o) : 0; } }
+#if ENABLE_SPAN_T
+  public Span<long> GetVariadicCountsBytes() { return __p.__vector_as_span<long>(12, 8); }
+#else
+  public ArraySegment<byte>? GetVariadicCountsBytes() { return __p.__vector_as_arraysegment(12); }
+#endif
+  public long[] GetVariadicCountsArray() { return __p.__vector_as_array<long>(12); }
 
   public static Offset<RecordBatch> CreateRecordBatch(FlatBufferBuilder builder,
       long length = 0,
       VectorOffset nodesOffset = default(VectorOffset),
       VectorOffset buffersOffset = default(VectorOffset),
-      Offset<BodyCompression> compressionOffset = default(Offset<BodyCompression>)) {
-    builder.StartTable(4);
+      Offset<BodyCompression> compressionOffset = default(Offset<BodyCompression>),
+      VectorOffset variadicCountsOffset = default(VectorOffset)) {
+    builder.StartTable(5);
     RecordBatch.AddLength(builder, length);
+    RecordBatch.AddVariadicCounts(builder, variadicCountsOffset);
     RecordBatch.AddCompression(builder, compressionOffset);
     RecordBatch.AddBuffers(builder, buffersOffset);
     RecordBatch.AddNodes(builder, nodesOffset);
     return RecordBatch.EndRecordBatch(builder);
   }
 
-  public static void StartRecordBatch(FlatBufferBuilder builder) { builder.StartTable(4); }
+  public static void StartRecordBatch(FlatBufferBuilder builder) { builder.StartTable(5); }
   public static void AddLength(FlatBufferBuilder builder, long length) { builder.AddLong(0, length, 0); }
   public static void AddNodes(FlatBufferBuilder builder, VectorOffset nodesOffset) { builder.AddOffset(1, nodesOffset.Value, 0); }
   public static void StartNodesVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(16, numElems, 8); }
   public static void AddBuffers(FlatBufferBuilder builder, VectorOffset buffersOffset) { builder.AddOffset(2, buffersOffset.Value, 0); }
   public static void StartBuffersVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(16, numElems, 8); }
   public static void AddCompression(FlatBufferBuilder builder, Offset<BodyCompression> compressionOffset) { builder.AddOffset(3, compressionOffset.Value, 0); }
+  public static void AddVariadicCounts(FlatBufferBuilder builder, VectorOffset variadicCountsOffset) { builder.AddOffset(4, variadicCountsOffset.Value, 0); }
+  public static VectorOffset CreateVariadicCountsVector(FlatBufferBuilder builder, long[] data) { builder.StartVector(8, data.Length, 8); for (int i = data.Length - 1; i >= 0; i--) builder.AddLong(data[i]); return builder.EndVector(); }
+  public static VectorOffset CreateVariadicCountsVectorBlock(FlatBufferBuilder builder, long[] data) { builder.StartVector(8, data.Length, 8); builder.Add(data); return builder.EndVector(); }
+  public static VectorOffset CreateVariadicCountsVectorBlock(FlatBufferBuilder builder, ArraySegment<long> data) { builder.StartVector(8, data.Count, 8); builder.Add(data); return builder.EndVector(); }
+  public static VectorOffset CreateVariadicCountsVectorBlock(FlatBufferBuilder builder, IntPtr dataPtr, int sizeInBytes) { builder.StartVector(1, sizeInBytes, 1); builder.Add<long>(dataPtr, sizeInBytes); return builder.EndVector(); }
+  public static void StartVariadicCountsVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(8, numElems, 8); }
   public static Offset<RecordBatch> EndRecordBatch(FlatBufferBuilder builder) {
     int o = builder.EndTable();
     return new Offset<RecordBatch>(o);
@@ -75,6 +105,7 @@ static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos)
       && verifier.VerifyVectorOfData(tablePos, 6 /*Nodes*/, 16 /*FieldNode*/, false)
       && verifier.VerifyVectorOfData(tablePos, 8 /*Buffers*/, 16 /*Buffer*/, false)
       && verifier.VerifyTable(tablePos, 10 /*Compression*/, BodyCompressionVerify.Verify, false)
+      && verifier.VerifyVectorOfData(tablePos, 12 /*VariadicCounts*/, 8 /*long*/, false)
       && verifier.VerifyTableEnd(tablePos);
   }
 }
diff --git a/csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs b/csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs
index 3f9e1de7c00a9..099950fafe4ee 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs
@@ -47,6 +47,10 @@ internal struct SparseTensor : IFlatbufferObject
   public LargeUtf8 TypeAsLargeUtf8() { return Type<LargeUtf8>().Value; }
   public LargeList TypeAsLargeList() { return Type<LargeList>().Value; }
   public RunEndEncoded TypeAsRunEndEncoded() { return Type<RunEndEncoded>().Value; }
+  public BinaryView TypeAsBinaryView() { return Type<BinaryView>().Value; }
+  public Utf8View TypeAsUtf8View() { return Type<Utf8View>().Value; }
+  public ListView TypeAsListView() { return Type<ListView>().Value; }
+  public LargeListView TypeAsLargeListView() { return Type<LargeListView>().Value; }
   /// The dimensions of the tensor, optionally named.
   public TensorDim? Shape(int j) { int o = __p.__offset(8); return o != 0 ? (TensorDim?)(new TensorDim()).__assign(__p.__indirect(__p.__vector(o) + j * 4), __p.bb) : null; }
   public int ShapeLength { get { int o = __p.__offset(8); return o != 0 ? __p.__vector_len(o) : 0; } }
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs b/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs
index f8c213768a3fc..eb39257d861ca 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs
@@ -46,6 +46,10 @@ internal struct Tensor : IFlatbufferObject
   public LargeUtf8 TypeAsLargeUtf8() { return Type<LargeUtf8>().Value; }
   public LargeList TypeAsLargeList() { return Type<LargeList>().Value; }
   public RunEndEncoded TypeAsRunEndEncoded() { return Type<RunEndEncoded>().Value; }
+  public BinaryView TypeAsBinaryView() { return Type<BinaryView>().Value; }
+  public Utf8View TypeAsUtf8View() { return Type<Utf8View>().Value; }
+  public ListView TypeAsListView() { return Type<ListView>().Value; }
+  public LargeListView TypeAsLargeListView() { return Type<LargeListView>().Value; }
   /// The dimensions of the tensor, optionally named
   public TensorDim? Shape(int j) { int o = __p.__offset(8); return o != 0 ? (TensorDim?)(new TensorDim()).__assign(__p.__indirect(__p.__vector(o) + j * 4), __p.bb) : null; }
   public int ShapeLength { get { int o = __p.__offset(8); return o != 0 ? __p.__vector_len(o) : 0; } }
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Utf8View.cs b/csharp/src/Apache.Arrow/Flatbuf/Utf8View.cs
new file mode 100644
index 0000000000000..e85c5374a9acc
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Flatbuf/Utf8View.cs
@@ -0,0 +1,47 @@
+// <auto-generated>
+//  automatically generated by the FlatBuffers compiler, do not modify
+// </auto-generated>
+
+namespace Apache.Arrow.Flatbuf
+{
+
+using global::System;
+using global::System.Collections.Generic;
+using global::Google.FlatBuffers;
+
+/// Logically the same as Utf8, but the internal representation uses a view
+/// struct that contains the string length and either the string's entire data
+/// inline (for small strings) or an inlined prefix, an index of another buffer,
+/// and an offset pointing to a slice in that buffer (for non-small strings).
+///
+/// Since it uses a variable number of data buffers, each Field with this type
+/// must have a corresponding entry in `variadicBufferCounts`.
+internal struct Utf8View : IFlatbufferObject
+{
+  private Table __p;
+  public ByteBuffer ByteBuffer { get { return __p.bb; } }
+  public static void ValidateVersion() { FlatBufferConstants.FLATBUFFERS_23_5_9(); }
+  public static Utf8View GetRootAsUtf8View(ByteBuffer _bb) { return GetRootAsUtf8View(_bb, new Utf8View()); }
+  public static Utf8View GetRootAsUtf8View(ByteBuffer _bb, Utf8View obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); }
+  public Utf8View __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void StartUtf8View(FlatBufferBuilder builder) { builder.StartTable(0); }
+  public static Offset<Utf8View> EndUtf8View(FlatBufferBuilder builder) {
+    int o = builder.EndTable();
+    return new Offset<Utf8View>(o);
+  }
+}
+
+
+static internal class Utf8ViewVerify
+{
+  static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos)
+  {
+    return verifier.VerifyTableStart(tablePos)
+      && verifier.VerifyTableEnd(tablePos);
+  }
+}
+
+}
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
index d3115da52cc6c..eb7349a570786 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
@@ -191,9 +191,7 @@ private List<IArrowArray> BuildArrays(
                 Field field = schema.GetFieldByIndex(schemaFieldIndex++);
                 Flatbuf.FieldNode fieldNode = recordBatchEnumerator.CurrentNode;
 
-                ArrayData arrayData = field.DataType.IsFixedPrimitive()
-                    ? LoadPrimitiveField(version, ref recordBatchEnumerator, field, in fieldNode, messageBuffer, bufferCreator)
-                    : LoadVariableField(version, ref recordBatchEnumerator, field, in fieldNode, messageBuffer, bufferCreator);
+                ArrayData arrayData = LoadField(version, ref recordBatchEnumerator, field, in fieldNode, messageBuffer, bufferCreator);
 
                 arrays.Add(ArrowArrayFactory.BuildArray(arrayData));
             } while (recordBatchEnumerator.MoveNextNode());
@@ -229,7 +227,7 @@ private IBufferCreator GetBufferCreator(BodyCompression? compression)
             return new DecompressingBufferCreator(decompressor, _allocator);
         }
 
-        private ArrayData LoadPrimitiveField(
+        private ArrayData LoadField(
             MetadataVersion version,
             ref RecordBatchEnumerator recordBatchEnumerator,
             Field field,
@@ -276,6 +274,16 @@ private ArrayData LoadPrimitiveField(
                 case ArrowTypeId.FixedSizeList:
                     buffers = 1;
                     break;
+                case ArrowTypeId.String:
+                case ArrowTypeId.Binary:
+                case ArrowTypeId.ListView:
+                    buffers = 3;
+                    break;
+                case ArrowTypeId.StringView:
+                case ArrowTypeId.BinaryView:
+                    buffers = checked((int)(2 + recordBatchEnumerator.CurrentVariadicCount));
+                    recordBatchEnumerator.MoveNextVariadicCount();
+                    break;
                 default:
                     buffers = 2;
                     break;
@@ -300,54 +308,6 @@ private ArrayData LoadPrimitiveField(
             return new ArrayData(field.DataType, fieldLength, fieldNullCount, 0, arrowBuff, children, dictionary?.Data);
         }
 
-        private ArrayData LoadVariableField(
-            MetadataVersion version,
-            ref RecordBatchEnumerator recordBatchEnumerator,
-            Field field,
-            in Flatbuf.FieldNode fieldNode,
-            ByteBuffer bodyData,
-            IBufferCreator bufferCreator)
-        {
-
-            ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer, bufferCreator);
-            if (!recordBatchEnumerator.MoveNextBuffer())
-            {
-                throw new Exception("Unable to move to the next buffer.");
-            }
-            ArrowBuffer offsetArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer, bufferCreator);
-            if (!recordBatchEnumerator.MoveNextBuffer())
-            {
-                throw new Exception("Unable to move to the next buffer.");
-            }
-            ArrowBuffer valueArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer, bufferCreator);
-            recordBatchEnumerator.MoveNextBuffer();
-
-            int fieldLength = (int)fieldNode.Length;
-            int fieldNullCount = (int)fieldNode.NullCount;
-
-            if (fieldLength < 0)
-            {
-                throw new InvalidDataException("Field length must be >= 0"); // TODO: Localize exception message
-            }
-
-            if (fieldNullCount < 0)
-            {
-                throw new InvalidDataException("Null count length must be >= 0"); //TODO: Localize exception message
-            }
-
-            ArrowBuffer[] arrowBuff = new[] { nullArrowBuffer, offsetArrowBuffer, valueArrowBuffer };
-            ArrayData[] children = GetChildren(version, ref recordBatchEnumerator, field, bodyData, bufferCreator);
-
-            IArrowArray dictionary = null;
-            if (field.DataType.TypeId == ArrowTypeId.Dictionary)
-            {
-                long id = DictionaryMemo.GetId(field);
-                dictionary = DictionaryMemo.GetDictionary(id);
-            }
-
-            return new ArrayData(field.DataType, fieldLength, fieldNullCount, 0, arrowBuff, children, dictionary?.Data);
-        }
-
         private ArrayData[] GetChildren(
             MetadataVersion version,
             ref RecordBatchEnumerator recordBatchEnumerator,
@@ -365,11 +325,7 @@ private ArrayData[] GetChildren(
                 Flatbuf.FieldNode childFieldNode = recordBatchEnumerator.CurrentNode;
 
                 Field childField = type.Fields[index];
-                ArrayData child = childField.DataType.IsFixedPrimitive()
-                    ? LoadPrimitiveField(version, ref recordBatchEnumerator, childField, in childFieldNode, bodyData, bufferCreator)
-                    : LoadVariableField(version, ref recordBatchEnumerator, childField, in childFieldNode, bodyData, bufferCreator);
-
-                children[index] = child;
+                children[index] = LoadField(version, ref recordBatchEnumerator, childField, in childFieldNode, bodyData, bufferCreator);
             }
             return children;
         }
@@ -394,11 +350,14 @@ internal struct RecordBatchEnumerator
         private Flatbuf.RecordBatch RecordBatch { get; }
         internal int CurrentBufferIndex { get; private set; }
         internal int CurrentNodeIndex { get; private set; }
+        internal int CurrentVariadicCountIndex { get; private set; }
 
         internal Flatbuf.Buffer CurrentBuffer => RecordBatch.Buffers(CurrentBufferIndex).GetValueOrDefault();
 
         internal Flatbuf.FieldNode CurrentNode => RecordBatch.Nodes(CurrentNodeIndex).GetValueOrDefault();
 
+        internal long CurrentVariadicCount => RecordBatch.VariadicBufferCounts(CurrentVariadicCountIndex);
+
         internal bool MoveNextBuffer()
         {
             return ++CurrentBufferIndex < RecordBatch.BuffersLength;
@@ -409,11 +368,17 @@ internal bool MoveNextNode()
             return ++CurrentNodeIndex < RecordBatch.NodesLength;
         }
 
+        internal bool MoveNextVariadicCount()
+        {
+            return ++CurrentVariadicCountIndex < RecordBatch.VariadicBufferCountsLength;
+        }
+
         internal RecordBatchEnumerator(in Flatbuf.RecordBatch recordBatch)
         {
             RecordBatch = recordBatch;
             CurrentBufferIndex = 0;
             CurrentNodeIndex = 0;
+            CurrentVariadicCountIndex = 0;
         }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 5f490019b2133..07d1dcfdb171d 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -54,9 +54,12 @@ internal class ArrowRecordBatchFlatBufferBuilder :
             IArrowArrayVisitor<DayTimeIntervalArray>,
             IArrowArrayVisitor<MonthDayNanosecondIntervalArray>,
             IArrowArrayVisitor<ListArray>,
+            IArrowArrayVisitor<ListViewArray>,
             IArrowArrayVisitor<FixedSizeListArray>,
             IArrowArrayVisitor<StringArray>,
+            IArrowArrayVisitor<StringViewArray>,
             IArrowArrayVisitor<BinaryArray>,
+            IArrowArrayVisitor<BinaryViewArray>,
             IArrowArrayVisitor<FixedSizeBinaryArray>,
             IArrowArrayVisitor<StructArray>,
             IArrowArrayVisitor<UnionArray>,
@@ -81,6 +84,7 @@ public Buffer(ArrowBuffer buffer, int offset)
 
             public IReadOnlyList<Buffer> Buffers => _buffers;
 
+            public List<long> VariadicCounts { get; private set; } 
             public int TotalLength { get; private set; }
 
             public ArrowRecordBatchFlatBufferBuilder()
@@ -121,6 +125,15 @@ public void Visit(ListArray array)
                 array.Values.Accept(this);
             }
 
+            public void Visit(ListViewArray array)
+            {
+                _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
+                _buffers.Add(CreateBuffer(array.ValueOffsetsBuffer));
+                _buffers.Add(CreateBuffer(array.SizesBuffer));
+
+                array.Values.Accept(this);
+            }
+
             public void Visit(FixedSizeListArray array)
             {
                 _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
@@ -130,6 +143,8 @@ public void Visit(FixedSizeListArray array)
 
             public void Visit(StringArray array) => Visit(array as BinaryArray);
 
+            public void Visit(StringViewArray array) => Visit(array as BinaryViewArray);
+
             public void Visit(BinaryArray array)
             {
                 _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
@@ -137,6 +152,18 @@ public void Visit(BinaryArray array)
                 _buffers.Add(CreateBuffer(array.ValueBuffer));
             }
 
+            public void Visit(BinaryViewArray array)
+            {
+                _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
+                _buffers.Add(CreateBuffer(array.ViewsBuffer));
+                for (int i = 0; i < array.DataBufferCount; i++)
+                {
+                    _buffers.Add(CreateBuffer(array.DataBuffer(i)));
+                }
+                VariadicCounts = VariadicCounts ?? new List<long>();
+                VariadicCounts.Add(array.DataBufferCount);
+            }
+
             public void Visit(FixedSizeBinaryArray array)
             {
                 _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
@@ -328,7 +355,7 @@ private protected void WriteRecordBatchInternal(RecordBatch recordBatch)
                 HasWrittenDictionaryBatch = true;
             }
 
-            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) =
+            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
                 PreparingWritingRecordBatch(recordBatch);
 
             VectorOffset buffersVectorOffset = Builder.EndVector();
@@ -339,7 +366,9 @@ private protected void WriteRecordBatchInternal(RecordBatch recordBatch)
 
             Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                 fieldNodesVectorOffset,
-                buffersVectorOffset);
+                buffersVectorOffset,
+                default,
+                variadicCountsOffset);
 
             long metadataLength = WriteMessage(Flatbuf.MessageHeader.RecordBatch,
                 recordBatchOffset, recordBatchBuilder.TotalLength);
@@ -367,7 +396,7 @@ private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBat
                 HasWrittenDictionaryBatch = true;
             }
 
-            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) =
+            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
                 PreparingWritingRecordBatch(recordBatch);
 
             VectorOffset buffersVectorOffset = Builder.EndVector();
@@ -378,7 +407,9 @@ private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBat
 
             Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                 fieldNodesVectorOffset,
-                buffersVectorOffset);
+                buffersVectorOffset,
+                default,
+                variadicCountsOffset);
 
             long metadataLength = await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch,
                 recordBatchOffset, recordBatchBuilder.TotalLength,
@@ -451,12 +482,12 @@ private async ValueTask<long> WriteBufferDataAsync(IReadOnlyList<ArrowRecordBatc
             return bodyLength + bodyPaddingLength;
         }
 
-        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingRecordBatch(RecordBatch recordBatch)
+        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> PreparingWritingRecordBatch(RecordBatch recordBatch)
         {
             return PreparingWritingRecordBatch(recordBatch.Schema.FieldsList, recordBatch.ArrayList);
         }
 
-        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingRecordBatch(IReadOnlyList<Field> fields, IReadOnlyList<IArrowArray> arrays)
+        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> PreparingWritingRecordBatch(IReadOnlyList<Field> fields, IReadOnlyList<IArrowArray> arrays)
         {
             Builder.Clear();
 
@@ -483,6 +514,12 @@ private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingR
                 fieldArray.Accept(recordBatchBuilder);
             }
 
+            VectorOffset variadicCountOffset = default;
+            if (recordBatchBuilder.VariadicCounts != null)
+            {
+                variadicCountOffset = Flatbuf.RecordBatch.CreateVariadicCountsVectorBlock(Builder, recordBatchBuilder.VariadicCounts.ToArray());
+            }
+
             IReadOnlyList<ArrowRecordBatchFlatBufferBuilder.Buffer> buffers = recordBatchBuilder.Buffers;
 
             Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count);
@@ -494,7 +531,7 @@ private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingR
                     buffers[i].Offset, buffers[i].DataBuffer.Length);
             }
 
-            return Tuple.Create(recordBatchBuilder, fieldNodesVectorOffset);
+            return Tuple.Create(recordBatchBuilder, fieldNodesVectorOffset, variadicCountOffset);
         }
 
         private protected virtual void StartingWritingDictionary()
@@ -561,7 +598,7 @@ private protected async Task WriteDictionaryAsync(long id, IArrowType valueType,
 
             var arrays = new List<IArrowArray> { dictionary };
 
-            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) =
+            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
                 PreparingWritingRecordBatch(fields, arrays);
 
             VectorOffset buffersVectorOffset = Builder.EndVector();
@@ -569,7 +606,9 @@ private protected async Task WriteDictionaryAsync(long id, IArrowType valueType,
             // Serialize record batch
             Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, dictionary.Length,
                 fieldNodesVectorOffset,
-                buffersVectorOffset);
+                buffersVectorOffset,
+                default,
+                variadicCountsOffset);
 
             // TODO: Support delta.
             Offset<Flatbuf.DictionaryBatch> dictionaryBatchOffset = Flatbuf.DictionaryBatch.CreateDictionaryBatch(Builder, id, recordBatchOffset, false);
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
index 84ff4f9cc7202..473e18968f8cb 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
@@ -50,9 +50,13 @@ class TypeVisitor :
             IArrowTypeVisitor<UInt16Type>,
             IArrowTypeVisitor<UInt32Type>,
             IArrowTypeVisitor<UInt64Type>,
+#if NET5_0_OR_GREATER
+            IArrowTypeVisitor<HalfFloatType>,
+#endif
             IArrowTypeVisitor<FloatType>,
             IArrowTypeVisitor<DoubleType>,
             IArrowTypeVisitor<StringType>,
+            IArrowTypeVisitor<StringViewType>,
             IArrowTypeVisitor<Date32Type>,
             IArrowTypeVisitor<Date64Type>,
             IArrowTypeVisitor<Time32Type>,
@@ -60,8 +64,10 @@ class TypeVisitor :
             IArrowTypeVisitor<DurationType>,
             IArrowTypeVisitor<IntervalType>,
             IArrowTypeVisitor<BinaryType>,
+            IArrowTypeVisitor<BinaryViewType>,
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<ListType>,
+            IArrowTypeVisitor<ListViewType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<UnionType>,
             IArrowTypeVisitor<StructType>,
@@ -106,6 +112,14 @@ public void Visit(BinaryType type)
                     Flatbuf.Binary.EndBinary(Builder));
             }
 
+            public void Visit(BinaryViewType type)
+            {
+                Flatbuf.BinaryView.StartBinaryView(Builder);
+                Offset<BinaryView> offset = Flatbuf.BinaryView.EndBinaryView(Builder);
+                Result = FieldType.Build(
+                    Flatbuf.Type.BinaryView, offset);
+            }
+
             public void Visit(ListType type)
             {
                 Flatbuf.List.StartList(Builder);
@@ -114,6 +128,14 @@ public void Visit(ListType type)
                     Flatbuf.List.EndList(Builder));
             }
 
+            public void Visit(ListViewType type)
+            {
+                Flatbuf.ListView.StartListView(Builder);
+                Result = FieldType.Build(
+                    Flatbuf.Type.ListView,
+                    Flatbuf.ListView.EndListView(Builder));
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 Result = FieldType.Build(
@@ -136,6 +158,14 @@ public void Visit(StringType type)
                     Flatbuf.Type.Utf8, offset);
             }
 
+            public void Visit(StringViewType type)
+            {
+                Flatbuf.Utf8View.StartUtf8View(Builder);
+                Offset<Utf8View> offset = Flatbuf.Utf8View.EndUtf8View(Builder);
+                Result = FieldType.Build(
+                    Flatbuf.Type.Utf8View, offset);
+            }
+
             public void Visit(TimestampType type)
             {
                 StringOffset timezoneStringOffset = default;
@@ -169,6 +199,15 @@ public void Visit(Time32Type type)
                     Flatbuf.Time.CreateTime(Builder, ToFlatBuffer(type.Unit)));
             }
 
+#if NET5_0_OR_GREATER
+            public void Visit(HalfFloatType type)
+            {
+                Result = FieldType.Build(
+                    Flatbuf.Type.FloatingPoint,
+                    Flatbuf.FloatingPoint.CreateFloatingPoint(Builder, Precision.HALF));
+            }
+#endif
+
             public void Visit(FloatType type)
             {
                 Result = FieldType.Build(
diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
index 633554fc53261..0e6f330aef091 100644
--- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
+++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
@@ -184,17 +184,27 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c
                     return Types.IntervalType.FromIntervalUnit(intervalMetadata.Unit.ToArrow());
                 case Flatbuf.Type.Utf8:
                     return Types.StringType.Default;
+                case Flatbuf.Type.Utf8View:
+                    return Types.StringViewType.Default;
                 case Flatbuf.Type.FixedSizeBinary:
                     Flatbuf.FixedSizeBinary fixedSizeBinaryMetadata = field.Type<Flatbuf.FixedSizeBinary>().Value;
                     return new Types.FixedSizeBinaryType(fixedSizeBinaryMetadata.ByteWidth);
                 case Flatbuf.Type.Binary:
                     return Types.BinaryType.Default;
+                case Flatbuf.Type.BinaryView:
+                    return Types.BinaryViewType.Default;
                 case Flatbuf.Type.List:
                     if (childFields == null || childFields.Length != 1)
                     {
                         throw new InvalidDataException($"List type must have exactly one child.");
                     }
                     return new Types.ListType(childFields[0]);
+                case Flatbuf.Type.ListView:
+                    if (childFields == null || childFields.Length != 1)
+                    {
+                        throw new InvalidDataException($"List view type must have exactly one child.");
+                    }
+                    return new Types.ListViewType(childFields[0]);
                 case Flatbuf.Type.FixedSizeList:
                     if (childFields == null || childFields.Length != 1)
                     {
diff --git a/csharp/src/Apache.Arrow/Scalars/BinaryView.cs b/csharp/src/Apache.Arrow/Scalars/BinaryView.cs
new file mode 100644
index 0000000000000..eaba89c7a3a8e
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Scalars/BinaryView.cs
@@ -0,0 +1,111 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Apache.Arrow.Scalars
+{
+    [StructLayout(LayoutKind.Explicit)]
+    public unsafe struct BinaryView : IEquatable<BinaryView>
+    {
+        public const int PrefixLength = 4;
+        public const int MaxInlineLength = 12;
+
+        [FieldOffset(0)]
+        public readonly int Length;
+
+        [FieldOffset(4)]
+        internal readonly int _prefix;
+
+        [FieldOffset(8)]
+        internal readonly int _bufferIndex;
+
+        [FieldOffset(12)]
+        internal readonly int _bufferOffset;
+
+        [FieldOffset(4)]
+        internal fixed byte _inline[MaxInlineLength];
+
+        public unsafe BinaryView(ReadOnlySpan<byte> inline) : this()
+        {
+            if (inline.Length > MaxInlineLength)
+            {
+                throw new ArgumentException("invalid inline data length", nameof(inline));
+            }
+
+            Length = inline.Length;
+            fixed (byte* dest = _inline)
+            fixed (byte* src = inline)
+            {
+                Buffer.MemoryCopy(src, dest, MaxInlineLength, inline.Length);
+            }
+        }
+
+        public BinaryView(int length, ReadOnlySpan<byte> prefix, int bufferIndex, int bufferOffset)
+        {
+            if (length < MaxInlineLength)
+            {
+                throw new ArgumentException("invalid length", nameof(length));
+            }
+            if (prefix.Length != PrefixLength)
+            {
+                throw new ArgumentException("invalid prefix length", nameof(prefix));
+            }
+
+            Length = length;
+            _bufferIndex = bufferIndex;
+            _bufferOffset = bufferOffset;
+            _prefix = prefix.CastTo<int>()[0];
+        }
+
+        private BinaryView(int length, int prefix, int bufferIndex, int offset)
+        {
+            Length = length;
+            _prefix = prefix;
+            _bufferIndex = bufferIndex;
+            _bufferOffset = offset;
+        }
+
+        public bool IsInline => Length <= MaxInlineLength;
+
+#if NET5_0_OR_GREATER
+        public ReadOnlySpan<byte> Bytes => MemoryMarshal.CreateReadOnlySpan<byte>(ref Unsafe.AsRef(_inline[0]), IsInline ? Length : PrefixLength);
+#else
+        public unsafe ReadOnlySpan<byte> Bytes => new ReadOnlySpan<byte>(Unsafe.AsPointer(ref _inline[0]), IsInline ? Length : PrefixLength);
+#endif
+
+        public int BufferIndex => IsInline ? -1 : _bufferIndex;
+
+        public int BufferOffset => IsInline ? -1 : _bufferOffset;
+
+        public override int GetHashCode() => Length ^ _prefix ^ _bufferIndex ^ _bufferOffset;
+
+        public override bool Equals(object obj)
+        {
+            BinaryView? other = obj as BinaryView?;
+            return other != null && Equals(other.Value);
+        }
+
+        public bool Equals(BinaryView other) =>
+            Length == other.Length && _prefix == other._prefix && _bufferIndex == other._bufferIndex && _bufferOffset == other._bufferOffset;
+
+        internal BinaryView AdjustBufferIndex(int bufferOffset)
+        {
+            return new BinaryView(Length, _prefix, _bufferIndex + bufferOffset, _bufferOffset);
+        }
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Types/BinaryViewType.cs b/csharp/src/Apache.Arrow/Types/BinaryViewType.cs
new file mode 100644
index 0000000000000..f5cfc034dc967
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Types/BinaryViewType.cs
@@ -0,0 +1,28 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+namespace Apache.Arrow.Types
+{
+    public class BinaryViewType: ArrowType
+    {
+        public static readonly BinaryViewType Default = new BinaryViewType();
+
+        public override ArrowTypeId TypeId => ArrowTypeId.BinaryView;
+        public override string Name => "binaryview";
+
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs b/csharp/src/Apache.Arrow/Types/IArrowType.cs
index 5e107813be828..cf520391fe1e6 100644
--- a/csharp/src/Apache.Arrow/Types/IArrowType.cs
+++ b/csharp/src/Apache.Arrow/Types/IArrowType.cs
@@ -50,6 +50,9 @@ public enum ArrowTypeId
         FixedSizeList,
         Duration,
         RecordBatch,
+        BinaryView,
+        StringView,
+        ListView,
     }
 
     public interface IArrowType
diff --git a/csharp/src/Apache.Arrow/Types/ListViewType.cs b/csharp/src/Apache.Arrow/Types/ListViewType.cs
new file mode 100644
index 0000000000000..ecf745723c4ae
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Types/ListViewType.cs
@@ -0,0 +1,35 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow.Types
+{
+    public sealed class ListViewType : NestedType
+    {
+        public override ArrowTypeId TypeId => ArrowTypeId.ListView;
+        public override string Name => "listview";
+
+        public Field ValueField => Fields[0];
+
+        public IArrowType ValueDataType => Fields[0].DataType;
+
+        public ListViewType(Field valueField)
+           : base(valueField) { }
+
+        public ListViewType(IArrowType valueDataType)
+            : this(new Field("item", valueDataType, true)) { }
+
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Types/StringViewType.cs b/csharp/src/Apache.Arrow/Types/StringViewType.cs
new file mode 100644
index 0000000000000..0c539a56b03b5
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Types/StringViewType.cs
@@ -0,0 +1,28 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+namespace Apache.Arrow.Types
+{
+    public sealed class StringViewType : ArrowType
+    {
+        public static StringViewType Default = new StringViewType();
+
+        public override ArrowTypeId TypeId => ArrowTypeId.StringView;
+        public override string Name => "utf8view";
+
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
+    }
+}
diff --git a/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs b/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
index c791c9969356a..f35c2a5d78d79 100644
--- a/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
+++ b/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
@@ -38,7 +38,7 @@ public class ArrowWriterBenchmark
         [GlobalSetup]
         public void GlobalSetup()
         {
-            _batch = TestData.CreateSampleRecordBatch(BatchLength, ColumnSetCount, false);
+            _batch = TestData.CreateSampleRecordBatch(BatchLength, ColumnSetCount);
             _memoryStream = new MemoryStream();
         }
 
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
index f3fe73588a7bb..31a5676f01315 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
@@ -21,6 +21,7 @@
 using System.Numerics;
 using System.Text;
 using System.Text.Json;
+using System.Text.Json.Nodes;
 using System.Text.Json.Serialization;
 using System.Threading.Tasks;
 using Apache.Arrow.Arrays;
@@ -175,7 +176,9 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children)
                 "floatingpoint" => ToFloatingPointArrowType(type),
                 "decimal" => ToDecimalArrowType(type),
                 "binary" => BinaryType.Default,
+                "binaryview" => BinaryViewType.Default,
                 "utf8" => StringType.Default,
+                "utf8view" => StringViewType.Default,
                 "fixedsizebinary" => new FixedSizeBinaryType(type.ByteWidth),
                 "date" => ToDateArrowType(type),
                 "time" => ToTimeArrowType(type),
@@ -184,6 +187,7 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children)
                 "interval_mdn" => ToIntervalArrowType(type),
                 "timestamp" => ToTimestampArrowType(type),
                 "list" => ToListArrowType(type, children),
+                "listview" => ToListViewArrowType(type, children),
                 "fixedsizelist" => ToFixedSizeListArrowType(type, children),
                 "struct" => ToStructArrowType(type, children),
                 "union" => ToUnionArrowType(type, children),
@@ -294,6 +298,11 @@ private static IArrowType ToListArrowType(JsonArrowType type, Field[] children)
             return new ListType(children[0]);
         }
 
+        private static IArrowType ToListViewArrowType(JsonArrowType type, Field[] children)
+        {
+            return new ListViewType(children[0]);
+        }
+
         private static IArrowType ToFixedSizeListArrowType(JsonArrowType type, Field[] children)
         {
             return new FixedSizeListType(children[0], type.ListSize);
@@ -451,9 +460,12 @@ private class ArrayCreator :
             IArrowTypeVisitor<IntervalType>,
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<StringType>,
+            IArrowTypeVisitor<StringViewType>,
             IArrowTypeVisitor<BinaryType>,
+            IArrowTypeVisitor<BinaryViewType>,
             IArrowTypeVisitor<FixedSizeBinaryType>,
             IArrowTypeVisitor<ListType>,
+            IArrowTypeVisitor<ListViewType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<StructType>,
             IArrowTypeVisitor<UnionType>,
@@ -652,6 +664,38 @@ public void Visit(StringType type)
                 Array = new StringArray(JsonFieldData.Count, offsetBuffer, valueBuffer, validityBuffer, nullCount);
             }
 
+            public void Visit(StringViewType type)
+            {
+                ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
+
+                // ArrowBuffer viewsBuffer = GetViewsBuffer();
+                ArrowBuffer viewsBuffer = ArrowBuffer.Empty;
+                if (JsonFieldData.Views != null)
+                {
+                    ArrowBuffer.Builder<BinaryView> viewBuilder = new ArrowBuffer.Builder<BinaryView>(JsonFieldData.Views.Count);
+                    foreach (JsonView jsonView in JsonFieldData.Views)
+                    {
+                        BinaryView view = (jsonView.BufferIndex == null) ?
+                            new BinaryView(Encoding.UTF8.GetBytes(jsonView.Inlined)) :
+                            new BinaryView(jsonView.Size, Convert.FromHexString(jsonView.PrefixHex), jsonView.BufferIndex.Value, jsonView.Offset.Value);
+                        viewBuilder.Append(view);
+                    }
+                    viewsBuffer = viewBuilder.Build();
+                }
+
+                int bufferCount = JsonFieldData.VariadicDataBuffers?.Count ?? 0;
+                ArrowBuffer[] buffers = new ArrowBuffer[2 + bufferCount];
+                buffers[0] = validityBuffer;
+                buffers[1] = viewsBuffer;
+                for (int i = 0; i < bufferCount; i++)
+                {
+                    buffers[i + 2] = new ArrowBuffer(Convert.FromHexString(JsonFieldData.VariadicDataBuffers[i])).Clone();
+                }
+
+                ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, buffers);
+                Array = new StringViewArray(arrayData);
+            }
+
             public void Visit(BinaryType type)
             {
                 ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
@@ -671,6 +715,38 @@ public void Visit(BinaryType type)
                 Array = new BinaryArray(arrayData);
             }
 
+            public void Visit(BinaryViewType type)
+            {
+                ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
+
+                // ArrowBuffer viewsBuffer = GetViewsBuffer();
+                ArrowBuffer viewsBuffer = ArrowBuffer.Empty;
+                if (JsonFieldData.Views != null)
+                {
+                    ArrowBuffer.Builder<BinaryView> viewBuilder = new ArrowBuffer.Builder<BinaryView>(JsonFieldData.Views.Count);
+                    foreach (JsonView jsonView in JsonFieldData.Views)
+                    {
+                        BinaryView view = (jsonView.BufferIndex == null) ?
+                            new BinaryView(Convert.FromHexString(jsonView.Inlined)) :
+                            new BinaryView(jsonView.Size, Convert.FromHexString(jsonView.PrefixHex), jsonView.BufferIndex.Value, jsonView.Offset.Value);
+                        viewBuilder.Append(view);
+                    }
+                    viewsBuffer = viewBuilder.Build();
+                }
+
+                int bufferCount = JsonFieldData.VariadicDataBuffers?.Count ?? 0;
+                ArrowBuffer[] buffers = new ArrowBuffer[2 + bufferCount];
+                buffers[0] = validityBuffer;
+                buffers[1] = viewsBuffer;
+                for (int i = 0; i < bufferCount; i++)
+                {
+                    buffers[i + 2] = new ArrowBuffer(Convert.FromHexString(JsonFieldData.VariadicDataBuffers[i])).Clone();
+                }
+
+                ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, buffers);
+                Array = new BinaryViewArray(arrayData);
+            }
+
             public void Visit(FixedSizeBinaryType type)
             {
                 ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
@@ -704,6 +780,22 @@ public void Visit(ListType type)
                 Array = new ListArray(arrayData);
             }
 
+            public void Visit(ListViewType type)
+            {
+                ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
+                ArrowBuffer offsetBuffer = GetOffsetBuffer();
+                ArrowBuffer sizeBuffer = GetSizeBuffer();
+
+                var data = JsonFieldData;
+                JsonFieldData = data.Children[0];
+                type.ValueDataType.Accept(this);
+                JsonFieldData = data;
+
+                ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0,
+                    new[] { validityBuffer, offsetBuffer, sizeBuffer }, new[] { Array.Data });
+                Array = new ListViewArray(arrayData);
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
@@ -878,11 +970,18 @@ private void GenerateArray<T, TArray>(Func<ArrowBuffer, ArrowBuffer, int, int, i
 
             private ArrowBuffer GetOffsetBuffer()
             {
-                ArrowBuffer.Builder<int> valueOffsets = new ArrowBuffer.Builder<int>(JsonFieldData.Offset.Length);
-                valueOffsets.AppendRange(JsonFieldData.Offset);
+                ArrowBuffer.Builder<int> valueOffsets = new ArrowBuffer.Builder<int>(JsonFieldData.Offset.Count);
+                valueOffsets.AppendRange(JsonFieldData.IntOffset);
                 return valueOffsets.Build(default);
             }
 
+            private ArrowBuffer GetSizeBuffer()
+            {
+                ArrowBuffer.Builder<int> valueSizes = new ArrowBuffer.Builder<int>(JsonFieldData.Size.Count);
+                valueSizes.AppendRange(JsonFieldData.IntSize);
+                return valueSizes.Build(default);
+            }
+
             private ArrowBuffer GetTypeIdBuffer()
             {
                 ArrowBuffer.Builder<byte> typeIds = new ArrowBuffer.Builder<byte>(JsonFieldData.TypeId.Length);
@@ -920,10 +1019,61 @@ public class JsonFieldData
         public string Name { get; set; }
         public int Count { get; set; }
         public bool[] Validity { get; set; }
-        public int[] Offset { get; set; }
+        public JsonArray Offset { get; set; }
+
+        [JsonPropertyName("SIZE")]
+        public JsonArray Size { get; set; }
         public int[] TypeId { get; set; }
         public JsonElement Data { get; set; }
         public List<JsonFieldData> Children { get; set; }
+
+        [JsonPropertyName("VIEWS")]
+        public List<JsonView> Views { get; set; }
+
+        [JsonPropertyName("VARIADIC_DATA_BUFFERS")]
+        public List<string> VariadicDataBuffers { get; set; }
+
+        [JsonIgnore]
+        public IEnumerable<int> IntOffset
+        {
+            get { return Offset.Select(GetInt); }
+        }
+
+        [JsonIgnore]
+        public IEnumerable<int> IntSize
+        {
+            get { return Size.Select(GetInt); }
+        }
+
+        static int GetInt(JsonNode node)
+        {
+            try
+            {
+                return node.GetValue<int>();
+            }
+            catch
+            {
+                return int.Parse(node.GetValue<string>());
+            }
+        }
+    }
+
+    public class JsonView
+    {
+        [JsonPropertyName("SIZE")]
+        public int Size { get; set; }
+
+        [JsonPropertyName("INLINED")]
+        public string Inlined { get; set; }
+
+        [JsonPropertyName("PREFIX_HEX")]
+        public string PrefixHex { get; set; }
+
+        [JsonPropertyName("BUFFER_INDEX")]
+        public int? BufferIndex { get; set; }
+
+        [JsonPropertyName("OFFSET")]
+        public int? Offset { get; set; }
     }
 
     internal sealed class ValidityConverter : JsonConverter<bool>
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/Properties/launchSettings.json b/csharp/test/Apache.Arrow.IntegrationTest/Properties/launchSettings.json
new file mode 100644
index 0000000000000..46bdeff290e17
--- /dev/null
+++ b/csharp/test/Apache.Arrow.IntegrationTest/Properties/launchSettings.json
@@ -0,0 +1,8 @@
+{
+  "profiles": {
+    "Apache.Arrow.IntegrationTest": {
+      "commandName": "Project",
+      "commandLineArgs": "--mode validate -j C:\\Users\\curt\\AppData\\Local\\Temp\\arrow-integration-9_cov7dz\\generated_binary_view.json -a C:\\Users\\curt\\AppData\\Local\\Temp\\tmpxicbzqpn\\460a151e_generated_binary_view.json_as_file"
+    }
+  }
+}
\ No newline at end of file
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
index 137dc16d473a4..25ef289f0dc25 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
@@ -64,13 +64,16 @@ private static IEnumerable<Tuple<List<IArrowArray>, IArrowArray>> GenerateTestDa
                     FloatType.Default,
                     DoubleType.Default,
                     BinaryType.Default,
+                    BinaryViewType.Default,
                     StringType.Default,
+                    StringViewType.Default,
                     Date32Type.Default,
                     Date64Type.Default,
                     TimestampType.Default,
                     new Decimal128Type(14, 10),
                     new Decimal256Type(14,10),
                     new ListType(Int64Type.Default),
+                    new ListViewType(Int64Type.Default),
                     new StructType(new List<Field>{
                         new Field.Builder().Name("Strings").DataType(StringType.Default).Nullable(true).Build(),
                         new Field.Builder().Name("Ints").DataType(Int32Type.Default).Nullable(true).Build()
@@ -122,7 +125,9 @@ private class TestDataGenerator :
             IArrowTypeVisitor<FloatType>,
             IArrowTypeVisitor<DoubleType>,
             IArrowTypeVisitor<BinaryType>,
+            IArrowTypeVisitor<BinaryViewType>,
             IArrowTypeVisitor<StringType>,
+            IArrowTypeVisitor<StringViewType>,
             IArrowTypeVisitor<Decimal128Type>,
             IArrowTypeVisitor<Decimal256Type>,
             IArrowTypeVisitor<Date32Type>,
@@ -131,6 +136,7 @@ private class TestDataGenerator :
             IArrowTypeVisitor<IntervalType>,
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<ListType>,
+            IArrowTypeVisitor<ListViewType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<StructType>,
             IArrowTypeVisitor<UnionType>,
@@ -368,6 +374,34 @@ public void Visit(BinaryType type)
                 ExpectedArray = resultBuilder.Build();
             }
 
+            public void Visit(BinaryViewType type)
+            {
+                BinaryViewArray.Builder resultBuilder = new BinaryViewArray.Builder().Reserve(_baseDataTotalElementCount);
+
+                for (int i = 0; i < _baseDataListCount; i++)
+                {
+                    List<int?> dataList = _baseData[i];
+                    BinaryViewArray.Builder builder = new BinaryViewArray.Builder().Reserve(dataList.Count);
+
+                    foreach (byte? value in dataList)
+                    {
+                        if (value.HasValue)
+                        {
+                            builder.Append(value.Value);
+                            resultBuilder.Append(value.Value);
+                        }
+                        else
+                        {
+                            builder.AppendNull();
+                            resultBuilder.AppendNull();
+                        }
+                    }
+                    TestTargetArrayList.Add(builder.Build());
+                }
+
+                ExpectedArray = resultBuilder.Build();
+            }
+
             public void Visit(StringType type)
             {
                 StringArray.Builder resultBuilder = new StringArray.Builder().Reserve(_baseDataTotalElementCount);
@@ -388,6 +422,26 @@ public void Visit(StringType type)
                 ExpectedArray = resultBuilder.Build();
             }
 
+            public void Visit(StringViewType type)
+            {
+                StringViewArray.Builder resultBuilder = new StringViewArray.Builder().Reserve(_baseDataTotalElementCount);
+
+                for (int i = 0; i < _baseDataListCount; i++)
+                {
+                    List<int?> dataList = _baseData[i];
+                    StringViewArray.Builder builder = new StringViewArray.Builder().Reserve(dataList.Count);
+
+                    foreach (string value in dataList.Select(_ => _.ToString() ?? null))
+                    {
+                        builder.Append(value);
+                        resultBuilder.Append(value);
+                    }
+                    TestTargetArrayList.Add(builder.Build());
+                }
+
+                ExpectedArray = resultBuilder.Build();
+            }
+
             public void Visit(ListType type)
             {
                 ListArray.Builder resultBuilder = new ListArray.Builder(type.ValueDataType).Reserve(_baseDataTotalElementCount);
@@ -423,6 +477,41 @@ public void Visit(ListType type)
                 ExpectedArray = resultBuilder.Build();
             }
 
+            public void Visit(ListViewType type)
+            {
+                ListViewArray.Builder resultBuilder = new ListViewArray.Builder(type.ValueDataType).Reserve(_baseDataTotalElementCount);
+                Int64Array.Builder resultValueBuilder = (Int64Array.Builder)resultBuilder.ValueBuilder.Reserve(_baseDataTotalElementCount);
+
+                for (int i = 0; i < _baseDataListCount; i++)
+                {
+                    List<int?> dataList = _baseData[i];
+
+                    ListViewArray.Builder builder = new ListViewArray.Builder(type.ValueField).Reserve(dataList.Count);
+                    Int64Array.Builder valueBuilder = (Int64Array.Builder)builder.ValueBuilder.Reserve(dataList.Count);
+
+                    foreach (long? value in dataList)
+                    {
+                        if (value.HasValue)
+                        {
+                            builder.Append();
+                            resultBuilder.Append();
+
+                            valueBuilder.Append(value.Value);
+                            resultValueBuilder.Append(value.Value);
+                        }
+                        else
+                        {
+                            builder.AppendNull();
+                            resultBuilder.AppendNull();
+                        }
+                    }
+
+                    TestTargetArrayList.Add(builder.Build());
+                }
+
+                ExpectedArray = resultBuilder.Build();
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 FixedSizeListArray.Builder resultBuilder = new FixedSizeListArray.Builder(type.ValueDataType, type.ListSize).Reserve(_baseDataTotalElementCount);
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
index 2aaffe7835258..10315ff287c0b 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
@@ -20,6 +20,7 @@
 using System.Threading.Tasks;
 using Apache.Arrow.Arrays;
 using Xunit;
+using System.Diagnostics;
 
 namespace Apache.Arrow.Tests
 {
@@ -90,10 +91,13 @@ private class ArrayComparer :
             IArrowArrayVisitor<DayTimeIntervalArray>,
             IArrowArrayVisitor<MonthDayNanosecondIntervalArray>,
             IArrowArrayVisitor<ListArray>,
+            IArrowArrayVisitor<ListViewArray>,
             IArrowArrayVisitor<FixedSizeListArray>,
             IArrowArrayVisitor<StringArray>,
+            IArrowArrayVisitor<StringViewArray>,
             IArrowArrayVisitor<FixedSizeBinaryArray>,
             IArrowArrayVisitor<BinaryArray>,
+            IArrowArrayVisitor<BinaryViewArray>,
             IArrowArrayVisitor<StructArray>,
             IArrowArrayVisitor<UnionArray>,
             IArrowArrayVisitor<Decimal128Array>,
@@ -136,12 +140,15 @@ public ArrayComparer(IArrowArray expectedArray, bool strictCompare)
             public void Visit(DayTimeIntervalArray array) => CompareArrays(array);
             public void Visit(MonthDayNanosecondIntervalArray array) => CompareArrays(array);
             public void Visit(ListArray array) => CompareArrays(array);
+            public void Visit(ListViewArray array) => CompareArrays(array);
             public void Visit(FixedSizeListArray array) => CompareArrays(array);
             public void Visit(FixedSizeBinaryArray array) => CompareArrays(array);
             public void Visit(Decimal128Array array) => CompareArrays(array);
             public void Visit(Decimal256Array array) => CompareArrays(array);
             public void Visit(StringArray array) => CompareBinaryArrays<StringArray>(array);
+            public void Visit(StringViewArray array) => CompareVariadicArrays<StringViewArray>(array);
             public void Visit(BinaryArray array) => CompareBinaryArrays<BinaryArray>(array);
+            public void Visit(BinaryViewArray array) => CompareVariadicArrays<BinaryViewArray>(array);
 
             public void Visit(StructArray array)
             {
@@ -230,6 +237,32 @@ private void CompareBinaryArrays<T>(BinaryArray actualArray)
                 }
             }
 
+            private void CompareVariadicArrays<T>(BinaryViewArray actualArray)
+                where T : IArrowArray
+            {
+                Assert.IsAssignableFrom<T>(_expectedArray);
+                Assert.IsAssignableFrom<T>(actualArray);
+
+                var expectedArray = (BinaryViewArray)_expectedArray;
+
+                actualArray.Data.DataType.Accept(_arrayTypeComparer);
+
+                Assert.Equal(expectedArray.Length, actualArray.Length);
+                Assert.Equal(expectedArray.NullCount, actualArray.NullCount);
+                Assert.Equal(expectedArray.Offset, actualArray.Offset);
+
+                CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer);
+
+                Assert.True(expectedArray.Views.SequenceEqual(actualArray.Views));
+
+                for (int i = 0; i < expectedArray.Length; i++)
+                {
+                    Assert.True(
+                        expectedArray.GetBytes(i).SequenceEqual(actualArray.GetBytes(i)),
+                        $"BinaryArray values do not match at index {i}.");
+                }
+            }
+
             private void CompareArrays(FixedSizeBinaryArray actualArray)
             {
                 Assert.IsAssignableFrom<FixedSizeBinaryArray>(_expectedArray);
@@ -346,6 +379,34 @@ private void CompareArrays(ListArray actualArray)
                 actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare));
             }
 
+            private void CompareArrays(ListViewArray actualArray)
+            {
+                Assert.IsAssignableFrom<ListViewArray>(_expectedArray);
+                ListViewArray expectedArray = (ListViewArray)_expectedArray;
+
+                actualArray.Data.DataType.Accept(_arrayTypeComparer);
+
+                Assert.Equal(expectedArray.Length, actualArray.Length);
+                Assert.Equal(expectedArray.NullCount, actualArray.NullCount);
+                Assert.Equal(expectedArray.Offset, actualArray.Offset);
+
+                CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer);
+
+                if (_strictCompare)
+                {
+                    Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span));
+                    Assert.True(expectedArray.SizesBuffer.Span.SequenceEqual(actualArray.SizesBuffer.Span));
+                }
+                else
+                {
+                    int length = expectedArray.Length * sizeof(int);
+                    Assert.True(expectedArray.ValueOffsetsBuffer.Span.Slice(0, length).SequenceEqual(actualArray.ValueOffsetsBuffer.Span.Slice(0, length)));
+                    Assert.True(expectedArray.SizesBuffer.Span.Slice(0, length).SequenceEqual(actualArray.SizesBuffer.Span.Slice(0, length)));
+                }
+
+                actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare));
+            }
+
             private void CompareArrays(FixedSizeListArray actualArray)
             {
                 Assert.IsAssignableFrom<FixedSizeListArray>(_expectedArray);
diff --git a/csharp/test/Apache.Arrow.Tests/BinaryViewTests.cs b/csharp/test/Apache.Arrow.Tests/BinaryViewTests.cs
new file mode 100644
index 0000000000000..eb617b4dedc75
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/BinaryViewTests.cs
@@ -0,0 +1,89 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Scalars;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+    public class BinaryViewTests
+    {
+        private static readonly byte[] empty = new byte[0];
+        private static readonly byte[] oneByte = new byte[1];
+        private static readonly byte[] fourBytes = new byte[] { 1, 2, 3, 4 };
+        private static readonly byte[] fiveBytes = new byte[] { 5, 4, 3, 2, 1 };
+        private static readonly byte[] twelveBytes = new byte[] { 1, 2, 3, 4, 8, 7, 6, 5, 9, 10, 11, 12 };
+        private static readonly byte[] thirteenBytes = new byte[13];
+
+        [Fact]
+        public void Equality()
+        {
+            BinaryView one = new BinaryView(oneByte);
+            BinaryView four = new BinaryView(fourBytes);
+            BinaryView twelve = new BinaryView(twelveBytes);
+            BinaryView twelvePlus = new BinaryView(13, fourBytes, 0, 0);
+            Assert.Equal(one, one);
+            Assert.NotEqual(one, four);
+            Assert.NotEqual(four, twelve);
+            Assert.NotEqual(four, twelvePlus);
+        }
+
+        [Fact]
+        public void ConstructorThrows()
+        {
+            Assert.Throws<ArgumentException>(() => new BinaryView(thirteenBytes));
+            Assert.Throws<ArgumentException>(() => new BinaryView(20, empty, 0, 0));
+            Assert.Throws<ArgumentException>(() => new BinaryView(20, fiveBytes, 0, 0));
+            Assert.Throws<ArgumentException>(() => new BinaryView(13, thirteenBytes, 0, 0));
+            Assert.Throws<ArgumentException>(() => new BinaryView(4, fourBytes, 0, 0));
+        }
+
+        [Fact]
+        public void ConstructInline()
+        {
+            BinaryView zero = new BinaryView(empty);
+            Assert.Equal(-1, zero.BufferIndex);
+            Assert.Equal(-1, zero.BufferOffset);
+            Assert.Equal(0, zero.Length);
+            Assert.Equal(0, zero.Bytes.Length);
+
+            BinaryView one = new BinaryView(oneByte);
+            Assert.Equal(-1, one.BufferIndex);
+            Assert.Equal(-1, one.BufferOffset);
+            Assert.Equal(1, one.Length);
+            Assert.Equal(1, one.Bytes.Length);
+            Assert.Equal((byte)0, one.Bytes[0]);
+
+            BinaryView twelve = new BinaryView(twelveBytes);
+            Assert.Equal(-1, one.BufferIndex);
+            Assert.Equal(-1, one.BufferOffset);
+            Assert.Equal(12, twelve.Length);
+            Assert.Equal(12, twelve.Bytes.Length);
+            Assert.Equal((byte)8, twelve.Bytes[4]);
+        }
+
+        [Fact]
+        public void ConstructPrefix()
+        {
+            BinaryView four = new BinaryView(14, fourBytes, 2, 3);
+            Assert.Equal(2, four.BufferIndex);
+            Assert.Equal(3, four.BufferOffset);
+            Assert.Equal(14, four.Length);
+            Assert.Equal(4, four.Bytes.Length);
+            Assert.Equal((byte)2, four.Bytes[1]);
+        }
+    }
+}
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index 83902d8d93c70..274434e4bab09 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -741,7 +741,9 @@ public unsafe void ExportBatch()
         [SkippableFact]
         public unsafe void RoundTripTestBatch()
         {
-            RecordBatch batch1 = TestData.CreateSampleRecordBatch(4, createDictionaryArray: true);
+            // TODO: Enable these once this the version of pyarrow referenced during testing supports them
+            HashSet<ArrowTypeId> unsupported = new HashSet<ArrowTypeId> { ArrowTypeId.ListView, ArrowTypeId.BinaryView, ArrowTypeId.StringView };
+            RecordBatch batch1 = TestData.CreateSampleRecordBatch(4, excludedTypes: unsupported);
             RecordBatch batch2 = batch1.Clone();
 
             CArrowArray* cExportArray = CArrowArray.Create();
diff --git a/csharp/test/Apache.Arrow.Tests/TableTests.cs b/csharp/test/Apache.Arrow.Tests/TableTests.cs
index d52b514e092d9..83c88265d172b 100644
--- a/csharp/test/Apache.Arrow.Tests/TableTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/TableTests.cs
@@ -62,7 +62,11 @@ public void TestTableFromRecordBatches()
 
             Table table1 = Table.TableFromRecordBatches(recordBatch1.Schema, recordBatches);
             Assert.Equal(20, table1.RowCount);
-            Assert.Equal(30, table1.ColumnCount);
+#if NET5_0_OR_GREATER
+            Assert.Equal(35, table1.ColumnCount);
+#else
+            Assert.Equal(34, table1.ColumnCount);
+#endif
             Assert.Equal("ChunkedArray: Length=20, DataType=list", table1.Column(0).Data.ToString());
 
             FixedSizeBinaryType type = new FixedSizeBinaryType(17);
diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs
index b43321abd7499..29ddef2864862 100644
--- a/csharp/test/Apache.Arrow.Tests/TestData.cs
+++ b/csharp/test/Apache.Arrow.Tests/TestData.cs
@@ -24,53 +24,66 @@ namespace Apache.Arrow.Tests
 {
     public static class TestData
     {
-        public static RecordBatch CreateSampleRecordBatch(int length, bool createDictionaryArray = true)
+        public static RecordBatch CreateSampleRecordBatch(int length, bool createDictionaryArray)
         {
-            return CreateSampleRecordBatch(length, columnSetCount: 1, createDictionaryArray);
+            HashSet<ArrowTypeId> excluded = createDictionaryArray ? null : new HashSet<ArrowTypeId> { ArrowTypeId.Dictionary };
+            return CreateSampleRecordBatch(length, columnSetCount: 1, excluded);
         }
 
-        public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount, bool createAdvancedTypeArrays)
+        public static RecordBatch CreateSampleRecordBatch(
+            int length,
+            int columnSetCount = 1,
+            HashSet<ArrowTypeId> excludedTypes = null)
         {
             Schema.Builder builder = new Schema.Builder();
-            for (int i = 0; i < columnSetCount; i++)
+
+            void AddField(Field field)
             {
-                builder.Field(CreateField(new ListType(Int64Type.Default), i));
-                builder.Field(CreateField(BooleanType.Default, i));
-                builder.Field(CreateField(UInt8Type.Default, i));
-                builder.Field(CreateField(Int8Type.Default, i));
-                builder.Field(CreateField(UInt16Type.Default, i));
-                builder.Field(CreateField(Int16Type.Default, i));
-                builder.Field(CreateField(UInt32Type.Default, i));
-                builder.Field(CreateField(Int32Type.Default, i));
-                builder.Field(CreateField(UInt64Type.Default, i));
-                builder.Field(CreateField(Int64Type.Default, i));
-                builder.Field(CreateField(FloatType.Default, i));
-                builder.Field(CreateField(DoubleType.Default, i));
-                builder.Field(CreateField(Date32Type.Default, i));
-                builder.Field(CreateField(Date64Type.Default, i));
-                builder.Field(CreateField(Time32Type.Default, i));
-                builder.Field(CreateField(Time64Type.Default, i));
-                builder.Field(CreateField(TimestampType.Default, i));
-                builder.Field(CreateField(StringType.Default, i));
-                builder.Field(CreateField(new StructType(new List<Field> { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i));
-                builder.Field(CreateField(new Decimal128Type(10, 6), i));
-                builder.Field(CreateField(new Decimal256Type(16, 8), i));
-                builder.Field(CreateField(new MapType(StringType.Default, Int32Type.Default), i));
-                builder.Field(CreateField(IntervalType.YearMonth, i));
-                builder.Field(CreateField(IntervalType.DayTime, i));
-                builder.Field(CreateField(IntervalType.MonthDayNanosecond, i));
-
-                if (createAdvancedTypeArrays)
+                if (excludedTypes == null || !excludedTypes.Contains(field.DataType.TypeId))
                 {
-                    builder.Field(CreateField(new DictionaryType(Int32Type.Default, StringType.Default, false), i));
-                    builder.Field(CreateField(new FixedSizeBinaryType(16), i));
-                    builder.Field(CreateField(new FixedSizeListType(Int32Type.Default, 3), i));
-                    builder.Field(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Sparse), i));
-                    builder.Field(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Dense), -i));
+                    builder.Field(field);
                 }
+            }
 
-                //builder.Field(CreateField(HalfFloatType.Default));
-                //builder.Field(CreateField(StringType.Default));
+            for (int i = 0; i < columnSetCount; i++)
+            {
+                AddField(CreateField(new ListType(Int64Type.Default), i));
+                AddField(CreateField(new ListViewType(Int64Type.Default), i));
+                AddField(CreateField(BooleanType.Default, i));
+                AddField(CreateField(UInt8Type.Default, i));
+                AddField(CreateField(Int8Type.Default, i));
+                AddField(CreateField(UInt16Type.Default, i));
+                AddField(CreateField(Int16Type.Default, i));
+                AddField(CreateField(UInt32Type.Default, i));
+                AddField(CreateField(Int32Type.Default, i));
+                AddField(CreateField(UInt64Type.Default, i));
+                AddField(CreateField(Int64Type.Default, i));
+#if NET5_0_OR_GREATER
+                AddField(CreateField(HalfFloatType.Default, i));
+#endif
+                AddField(CreateField(FloatType.Default, i));
+                AddField(CreateField(DoubleType.Default, i));
+                AddField(CreateField(Date32Type.Default, i));
+                AddField(CreateField(Date64Type.Default, i));
+                AddField(CreateField(Time32Type.Default, i));
+                AddField(CreateField(Time64Type.Default, i));
+                AddField(CreateField(TimestampType.Default, i));
+                AddField(CreateField(StringType.Default, i));
+                AddField(CreateField(StringViewType.Default, i));
+                AddField(CreateField(new StructType(new List<Field> { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i));
+                AddField(CreateField(new Decimal128Type(10, 6), i));
+                AddField(CreateField(new Decimal256Type(16, 8), i));
+                AddField(CreateField(new MapType(StringType.Default, Int32Type.Default), i));
+                AddField(CreateField(IntervalType.YearMonth, i));
+                AddField(CreateField(IntervalType.DayTime, i));
+                AddField(CreateField(IntervalType.MonthDayNanosecond, i));
+                AddField(CreateField(BinaryType.Default, i));
+                AddField(CreateField(BinaryViewType.Default, i));
+                AddField(CreateField(new FixedSizeBinaryType(16), i));
+                AddField(CreateField(new FixedSizeListType(Int32Type.Default, 3), i));
+                AddField(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Sparse), i));
+                AddField(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Dense), -i));
+                AddField(CreateField(new DictionaryType(Int32Type.Default, StringType.Default, false), i));
             }
 
             Schema schema = builder.Build();
@@ -130,16 +143,23 @@ private class ArrayCreator :
             IArrowTypeVisitor<DoubleType>,
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<StringType>,
+            IArrowTypeVisitor<StringViewType>,
             IArrowTypeVisitor<ListType>,
+            IArrowTypeVisitor<ListViewType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<StructType>,
             IArrowTypeVisitor<UnionType>,
             IArrowTypeVisitor<Decimal128Type>,
             IArrowTypeVisitor<Decimal256Type>,
             IArrowTypeVisitor<DictionaryType>,
+            IArrowTypeVisitor<BinaryType>,
+            IArrowTypeVisitor<BinaryViewType>,
             IArrowTypeVisitor<FixedSizeBinaryType>,
             IArrowTypeVisitor<MapType>,
             IArrowTypeVisitor<IntervalType>,
+#if NET5_0_OR_GREATER
+            IArrowTypeVisitor<HalfFloatType>,
+#endif
             IArrowTypeVisitor<NullType>
         {
             private int Length { get; }
@@ -160,6 +180,9 @@ public ArrayCreator(int length)
             public void Visit(UInt32Type type) => GenerateArray(new UInt32Array.Builder(), x => (uint)x);
             public void Visit(UInt64Type type) => GenerateArray(new UInt64Array.Builder(), x => (ulong)x);
             public void Visit(FloatType type) => GenerateArray(new FloatArray.Builder(), x => ((float)x / Length));
+#if NET5_0_OR_GREATER
+            public void Visit(HalfFloatType type) => GenerateArray(new HalfFloatArray.Builder(), x => ((Half)x / (Half)Length));
+#endif
             public void Visit(DoubleType type) => GenerateArray(new DoubleArray.Builder(), x => ((double)x / Length));
             public void Visit(Decimal128Type type)
             {
@@ -277,6 +300,30 @@ public void Visit(StringType type)
                 Array = builder.Build();
             }
 
+            public void Visit(StringViewType type)
+            {
+                var str = "length=ten";
+                var builder = new StringViewArray.Builder();
+
+                for (var i = 0; i < Length; i++)
+                {
+                    switch (i % 3)
+                    {
+                        case 0:
+                            builder.AppendNull();
+                            break;
+                        case 1:
+                            builder.Append(str);
+                            break;
+                        case 2:
+                            builder.Append(str + str);
+                            break;
+                    }
+                }
+
+                Array = builder.Build();
+            }
+
             public void Visit(ListType type)
             {
                 var builder = new ListArray.Builder(type.ValueField).Reserve(Length);
@@ -294,6 +341,23 @@ public void Visit(ListType type)
                 Array = builder.Build();
             }
 
+            public void Visit(ListViewType type)
+            {
+                var builder = new ListViewArray.Builder(type.ValueField).Reserve(Length);
+
+                var valueBuilder = (Int64Array.Builder)builder.ValueBuilder.Reserve(Length + 1);
+
+                for (var i = 0; i < Length; i++)
+                {
+                    builder.Append();
+                    valueBuilder.Append(i);
+                }
+                //Add a value to check if Values.Length can exceed ListArray.Length
+                valueBuilder.Append(0);
+
+                Array = builder.Build();
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 var builder = new FixedSizeListArray.Builder(type.ValueField, type.ListSize).Reserve(Length);
@@ -411,6 +475,64 @@ public void Visit(DictionaryType type)
                 Array = new DictionaryArray(type, indicesBuilder.Build(), valueBuilder.Build());
             }
 
+            public void Visit(BinaryType type)
+            {
+                ReadOnlySpan<byte> shortData = new[] { (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9 };
+                ReadOnlySpan<byte> longData = new[]
+                {
+                    (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9,
+                    (byte)10, (byte)11, (byte)12, (byte)13, (byte)14, (byte)15, (byte)16, (byte)17, (byte)18, (byte)19
+                };
+                var builder = new BinaryArray.Builder();
+
+                for (var i = 0; i < Length; i++)
+                {
+                    switch (i % 3)
+                    {
+                        case 0:
+                            builder.AppendNull();
+                            break;
+                        case 1:
+                            builder.Append(shortData);
+                            break;
+                        case 2:
+                            builder.Append(longData);
+                            break;
+                    }
+                }
+
+                Array = builder.Build();
+            }
+
+            public void Visit(BinaryViewType type)
+            {
+                ReadOnlySpan<byte> shortData = new[] { (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9 };
+                ReadOnlySpan<byte> longData = new[]
+                {
+                    (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9,
+                    (byte)10, (byte)11, (byte)12, (byte)13, (byte)14, (byte)15, (byte)16, (byte)17, (byte)18, (byte)19
+                };
+                var builder = new BinaryViewArray.Builder();
+
+                for (var i = 0; i < Length; i++)
+                {
+                    switch (i % 3)
+                    {
+                        case 0:
+                            builder.AppendNull();
+                            break;
+                        case 1:
+                            builder.Append(shortData);
+                            break;
+                        case 2:
+                            builder.Append(longData);
+                            break;
+                    }
+                }
+
+                Array = builder.Build();
+            }
+
             public void Visit(FixedSizeBinaryType type)
             {
                 ArrowBuffer.Builder<byte> valueBuilder = new ArrowBuffer.Builder<byte>();
diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 2bbc843836af9..230ec5b3effff 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1932,13 +1932,12 @@ def _temp_path():
         .skip_tester('Rust'),
 
         generate_binary_view_case()
-        .skip_tester('C#')
         .skip_tester('Java')
         .skip_tester('JS')
         .skip_tester('Rust'),
 
         generate_list_view_case()
-        .skip_tester('C#')
+        .skip_tester('C#')     # Doesn't support large list views
         .skip_tester('Java')
         .skip_tester('JS')
         .skip_tester('Rust'),
diff --git a/docs/source/status.rst b/docs/source/status.rst
index e860aceb76e15..03a87012342c2 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -68,9 +68,13 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Large Utf8        | ✓     | ✓     | ✓     | ✓          |       |  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Binary View       | ✓     |       | ✓     |            |       |       |       |       |
+| Binary View       | ✓     |       | ✓     |            |   ✓   |       |       |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| String View       | ✓     |       | ✓     |            |       |       |       |       |
+| Large Binary View | ✓     |       | ✓     |            |       |       |       |       |
++-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| Utf8 View         | ✓     |       | ✓     |            |   ✓   |       |       |       |
++-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| Large Utf8 View   | ✓     |       | ✓     |            |       |       |       |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -83,7 +87,7 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Large List        | ✓     | ✓     | ✓     |            |       |  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| List View         | ✓     |       | ✓     |            |       |       |       |       |
+| List View         | ✓     |       | ✓     |            |   ✓   |       |       |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Large List View   | ✓     |       | ✓     |            |       |       |       |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+

From bcaeaa8c2d970b81249cfba019475598e3d3109f Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Wed, 27 Dec 2023 11:30:16 -0800
Subject: [PATCH 109/570] MINOR: [C#] Remove launchSettings.json (#39382)

### Rationale for this change

A previous commit accidentally included a version of launchSettings.json used for local debugging. This file is not helpful to anyone.

### Are these changes tested?

N/A

### Are there any user-facing changes?

No.

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Properties/launchSettings.json                        | 8 --------
 1 file changed, 8 deletions(-)
 delete mode 100644 csharp/test/Apache.Arrow.IntegrationTest/Properties/launchSettings.json

diff --git a/csharp/test/Apache.Arrow.IntegrationTest/Properties/launchSettings.json b/csharp/test/Apache.Arrow.IntegrationTest/Properties/launchSettings.json
deleted file mode 100644
index 46bdeff290e17..0000000000000
--- a/csharp/test/Apache.Arrow.IntegrationTest/Properties/launchSettings.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "profiles": {
-    "Apache.Arrow.IntegrationTest": {
-      "commandName": "Project",
-      "commandLineArgs": "--mode validate -j C:\\Users\\curt\\AppData\\Local\\Temp\\arrow-integration-9_cov7dz\\generated_binary_view.json -a C:\\Users\\curt\\AppData\\Local\\Temp\\tmpxicbzqpn\\460a151e_generated_binary_view.json_as_file"
-    }
-  }
-}
\ No newline at end of file

From 7c3480e2f028f5881242f227f42155cf833efee7 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Fri, 29 Dec 2023 10:58:12 +0800
Subject: [PATCH 110/570] GH-39326: [C++] Flaky
 DatasetWriterTestFixture.MaxRowsOneWriteBackpresure test (#39379)

### Rationale for this change

This patch reduce the number of open files in testing first. I've verify the test in 14.0.2, it hangs forever.

### What changes are included in this PR?

Change the test file number from 100 to 20

### Are these changes tested?

Already

### Are there any user-facing changes?

no

* Closes: #39326

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/arrow/dataset/dataset_writer_test.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/dataset/dataset_writer_test.cc b/cpp/src/arrow/dataset/dataset_writer_test.cc
index e62e779f71797..1ac0ec3f39e97 100644
--- a/cpp/src/arrow/dataset/dataset_writer_test.cc
+++ b/cpp/src/arrow/dataset/dataset_writer_test.cc
@@ -290,12 +290,12 @@ TEST_F(DatasetWriterTestFixture, MaxRowsOneWriteBackpresure) {
   write_options_.max_open_files = 2;
   write_options_.min_rows_per_group = kFileSizeLimit - 1;
   auto dataset_writer = MakeDatasetWriter(/*max_rows=*/kFileSizeLimit);
-  for (int i = 0; i < 20; ++i) {
-    dataset_writer->WriteRecordBatch(MakeBatch(kFileSizeLimit * 5), "");
+  for (int i = 0; i < 5; ++i) {
+    dataset_writer->WriteRecordBatch(MakeBatch(kFileSizeLimit * 2), "");
   }
   EndWriterChecked(dataset_writer.get());
   std::vector<ExpectedFile> expected_files;
-  for (int i = 0; i < 100; ++i) {
+  for (int i = 0; i < 10; ++i) {
     expected_files.emplace_back("testdir/chunk-" + std::to_string(i) + ".arrow",
                                 kFileSizeLimit * i, kFileSizeLimit);
   }

From 8a9f877896644ef1629136e8428a2c21bce64ae3 Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Mon, 1 Jan 2024 22:35:58 +0900
Subject: [PATCH 111/570] GH-39051: [C++] Use Cast() instead of CastTo() for
 List Scalar in test (#39353)

### Rationale for this change

Remove legacy code

### What changes are included in this PR?

Replace the legacy scalar CastTo implementation for List Scalar in test.

### Are these changes tested?

Yes. It is passed by existing test cases.

### Are there any user-facing changes?

No.

* Closes: #39051

Authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../compute/kernels/scalar_cast_nested.cc     | 10 ++++-
 cpp/src/arrow/scalar_test.cc                  | 39 ++++++++++++-------
 2 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
index 6fd449a931381..ec5291ef608a3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
@@ -401,7 +401,7 @@ void AddTypeToTypeCast(CastFunction* func) {
   kernel.exec = CastFunctor::Exec;
   kernel.signature = KernelSignature::Make({InputType(SrcT::type_id)}, kOutputTargetType);
   kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
-  DCHECK_OK(func->AddKernel(StructType::type_id, std::move(kernel)));
+  DCHECK_OK(func->AddKernel(SrcT::type_id, std::move(kernel)));
 }
 
 template <typename DestType>
@@ -480,14 +480,18 @@ std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
   auto cast_list = std::make_shared<CastFunction>("cast_list", Type::LIST);
   AddCommonCasts(Type::LIST, kOutputTargetType, cast_list.get());
   AddListCast<ListType, ListType>(cast_list.get());
+  AddListCast<ListViewType, ListType>(cast_list.get());
   AddListCast<LargeListType, ListType>(cast_list.get());
+  AddListCast<LargeListViewType, ListType>(cast_list.get());
   AddTypeToTypeCast<CastFixedToVarList<ListType>, FixedSizeListType>(cast_list.get());
 
   auto cast_large_list =
       std::make_shared<CastFunction>("cast_large_list", Type::LARGE_LIST);
   AddCommonCasts(Type::LARGE_LIST, kOutputTargetType, cast_large_list.get());
   AddListCast<ListType, LargeListType>(cast_large_list.get());
+  AddListCast<ListViewType, LargeListType>(cast_large_list.get());
   AddListCast<LargeListType, LargeListType>(cast_large_list.get());
+  AddListCast<LargeListViewType, LargeListType>(cast_large_list.get());
   AddTypeToTypeCast<CastFixedToVarList<LargeListType>, FixedSizeListType>(
       cast_large_list.get());
 
@@ -503,7 +507,11 @@ std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
   AddCommonCasts(Type::FIXED_SIZE_LIST, kOutputTargetType, cast_fsl.get());
   AddTypeToTypeCast<CastFixedList, FixedSizeListType>(cast_fsl.get());
   AddTypeToTypeCast<CastVarToFixedList<ListType>, ListType>(cast_fsl.get());
+  AddTypeToTypeCast<CastVarToFixedList<ListViewType>, ListViewType>(cast_fsl.get());
   AddTypeToTypeCast<CastVarToFixedList<LargeListType>, LargeListType>(cast_fsl.get());
+  AddTypeToTypeCast<CastVarToFixedList<LargeListViewType>, LargeListViewType>(
+      cast_fsl.get());
+  AddTypeToTypeCast<CastVarToFixedList<ListType>, MapType>(cast_fsl.get());
 
   // So is struct
   auto cast_struct = std::make_shared<CastFunction>("cast_struct", Type::STRUCT);
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index ac740f92c8527..e8b8784e7a314 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -1077,7 +1077,8 @@ std::shared_ptr<DataType> MakeListType<FixedSizeListType>(
 
 template <typename ScalarType>
 void CheckListCast(const ScalarType& scalar, const std::shared_ptr<DataType>& to_type) {
-  EXPECT_OK_AND_ASSIGN(auto cast_scalar, scalar.CastTo(to_type));
+  EXPECT_OK_AND_ASSIGN(auto cast_scalar_datum, Cast(scalar, to_type));
+  const auto& cast_scalar = cast_scalar_datum.scalar();
   ASSERT_OK(cast_scalar->ValidateFull());
   ASSERT_EQ(*cast_scalar->type, *to_type);
 
@@ -1087,11 +1088,25 @@ void CheckListCast(const ScalarType& scalar, const std::shared_ptr<DataType>& to
                       *checked_cast<const BaseListScalar&>(*cast_scalar).value);
 }
 
-void CheckInvalidListCast(const Scalar& scalar, const std::shared_ptr<DataType>& to_type,
-                          const std::string& expected_message) {
-  EXPECT_RAISES_WITH_CODE_AND_MESSAGE_THAT(StatusCode::Invalid,
-                                           ::testing::HasSubstr(expected_message),
-                                           scalar.CastTo(to_type));
+template <typename ScalarType>
+void CheckListCastError(const ScalarType& scalar,
+                        const std::shared_ptr<DataType>& to_type) {
+  StatusCode code;
+  std::string expected_message;
+  if (scalar.type->id() == Type::FIXED_SIZE_LIST) {
+    code = StatusCode::TypeError;
+    expected_message =
+        "Size of FixedSizeList is not the same. input list: " + scalar.type->ToString() +
+        " output list: " + to_type->ToString();
+  } else {
+    code = StatusCode::Invalid;
+    expected_message =
+        "ListType can only be casted to FixedSizeListType if the lists are all the "
+        "expected size.";
+  }
+
+  EXPECT_RAISES_WITH_CODE_AND_MESSAGE_THAT(code, ::testing::HasSubstr(expected_message),
+                                           Cast(scalar, to_type));
 }
 
 template <typename T>
@@ -1178,10 +1193,8 @@ class TestListLikeScalar : public ::testing::Test {
     CheckListCast(
         scalar, fixed_size_list(value_->type(), static_cast<int32_t>(value_->length())));
 
-    CheckInvalidListCast(scalar, fixed_size_list(value_->type(), 5),
-                         "Cannot cast " + scalar.type->ToString() + " of length " +
-                             std::to_string(value_->length()) +
-                             " to fixed size list of length 5");
+    auto invalid_cast_type = fixed_size_list(value_->type(), 5);
+    CheckListCastError(scalar, invalid_cast_type);
   }
 
  protected:
@@ -1238,10 +1251,8 @@ TEST(TestMapScalar, Cast) {
   CheckListCast(scalar, large_list(key_value_type));
   CheckListCast(scalar, fixed_size_list(key_value_type, 2));
 
-  CheckInvalidListCast(scalar, fixed_size_list(key_value_type, 5),
-                       "Cannot cast " + scalar.type->ToString() + " of length " +
-                           std::to_string(value->length()) +
-                           " to fixed size list of length 5");
+  auto invalid_cast_type = fixed_size_list(key_value_type, 5);
+  CheckListCastError(scalar, invalid_cast_type);
 }
 
 TEST(TestStructScalar, FieldAccess) {

From 13696304089217c7c1c9b84c497318f506eee67b Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 1 Jan 2024 22:36:37 +0900
Subject: [PATCH 112/570] GH-39359: [CI][C++] Remove MinGW MINGW32 C++ job
 (#39376)

### Rationale for this change

MSYS2 stopped providing MINGW32 packages:

* https://github.com/msys2/MINGW-packages/pull/19517
* https://github.com/msys2/MINGW-packages/commit/f68162d5827fce41e7c2d4eb65cab6fcd8b9dd60

### What changes are included in this PR?

Remove the job.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39359

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/cpp.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 2e3c2a355a884..3d4fb10b10c39 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -340,8 +340,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - msystem_lower: mingw32
-            msystem_upper: MINGW32
           - msystem_lower: mingw64
             msystem_upper: MINGW64
           - msystem_lower: clang64

From 4543f5d8394e221681c362f4e7c8a7268823b2cd Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 1 Jan 2024 22:38:24 +0900
Subject: [PATCH 113/570] GH-39268: [C++] Don't install bundled Azure SDK for
 C++ with CMake 3.28+ (#39269)

### Rationale for this change

We can implement this by specifying `EXCLUDE_FROM_ALL TRUE` to `fetchcontent_declare()`.

### What changes are included in this PR?

Specify `EXCLUDE_FROM_ALL TRUE` only with CMake 3.28+.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39268

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 89d046945e5fe..3f327ed64ff00 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1015,6 +1015,10 @@ else()
 endif()
 
 include(FetchContent)
+set(FC_DECLARE_COMMON_OPTIONS)
+if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.28)
+  list(APPEND FC_DECLARE_COMMON_OPTIONS EXCLUDE_FROM_ALL TRUE)
+endif()
 
 macro(prepare_fetchcontent)
   set(BUILD_SHARED_LIBS OFF)
@@ -2146,6 +2150,9 @@ function(build_gtest)
   message(STATUS "Building gtest from source")
   set(GTEST_VENDORED TRUE)
   fetchcontent_declare(googletest
+                       # We should not specify "EXCLUDE_FROM_ALL TRUE" here.
+                       # Because we install GTest with custom path.
+                       # ${FC_DECLARE_COMMON_OPTIONS}
                        URL ${GTEST_SOURCE_URL}
                        URL_HASH "SHA256=${ARROW_GTEST_BUILD_SHA256_CHECKSUM}")
   prepare_fetchcontent()
@@ -5096,8 +5103,7 @@ function(build_azure_sdk)
   endif()
   message(STATUS "Building Azure SDK for C++ from source")
   fetchcontent_declare(azure_sdk
-                       # EXCLUDE_FROM_ALL is available since CMake 3.28
-                       # EXCLUDE_FROM_ALL TRUE
+                       ${FC_DECLARE_COMMON_OPTIONS}
                        URL ${ARROW_AZURE_SDK_URL}
                        URL_HASH "SHA256=${ARROW_AZURE_SDK_BUILD_SHA256_CHECKSUM}")
   prepare_fetchcontent()

From 3087c941699ea8485de619b8a36d98322fe20aa0 Mon Sep 17 00:00:00 2001
From: shibei <shibei.lh@foxmail.com>
Date: Tue, 2 Jan 2024 09:23:56 +0800
Subject: [PATCH 114/570] GH-39387: [C++] Fix compile warning (#39389)

### Rationale for this change

Fix compile warning:
```bash
In file included from /workspace/arrow/cpp/src/arrow/array/array_base.h:26:
/workspace/arrow/cpp/src/arrow/array/data.h:452:19: warning: unused variable 'buffer_length' [-Wunused-variable]
    const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
                  ^
/workspace/arrow/cpp/src/arrow/array/data.h:467:19: warning: unused variable 'buffer_length' [-Wunused-variable]
    const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
                  ^
2 warnings generated.
```

### What changes are included in this PR?

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #39387

Authored-by: shibei <shibei.lh@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/array/data.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index f29f164d19973..edd443adc43c4 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -451,6 +451,7 @@ struct ARROW_EXPORT ArraySpan {
   util::span<const T> GetSpan(int i, int64_t length) const {
     const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
     assert(i > 0 && length + offset <= buffer_length);
+    ARROW_UNUSED(buffer_length);
     return util::span<const T>(buffers[i].data_as<T>() + this->offset, length);
   }
 
@@ -466,6 +467,7 @@ struct ARROW_EXPORT ArraySpan {
   util::span<T> GetSpan(int i, int64_t length) {
     const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
     assert(i > 0 && length + offset <= buffer_length);
+    ARROW_UNUSED(buffer_length);
     return util::span<T>(buffers[i].mutable_data_as<T>() + this->offset, length);
   }
 

From 98f677af3c281680b95093ceeab084b3e57e180a Mon Sep 17 00:00:00 2001
From: Hattonuri <53221537+Hattonuri@users.noreply.github.com>
Date: Tue, 2 Jan 2024 07:35:48 +0300
Subject: [PATCH 115/570] GH-39413: [C++][Parquet] Vectorize decode plain on
 FLBA (#39414)

### Rationale for this change

### What changes are included in this PR?
FLBA Decode Plain is not vectorized. So this parsing can be implemented faster https://godbolt.org/z/xWeb93xjW

### Are these changes tested?
Yes, on unittest

### Are there any user-facing changes?

* Closes: #39413

Authored-by: Dmitry Stasenko <dmitry.stasenko@pinely.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/parquet/encoding.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 9ad1ee6efc12a..840efa12cc3c1 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -1080,9 +1080,7 @@ inline int DecodePlain<FixedLenByteArray>(const uint8_t* data, int64_t data_size
     ParquetException::EofException();
   }
   for (int i = 0; i < num_values; ++i) {
-    out[i].ptr = data;
-    data += type_length;
-    data_size -= type_length;
+    out[i].ptr = data + i * type_length;
   }
   return static_cast<int>(bytes_to_decode);
 }

From fc20cd002817d62158cfa4cf4e096f29c3fce5da Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Tue, 2 Jan 2024 16:07:09 +0800
Subject: [PATCH 116/570] MINOR: [Docs] update date in NOTICE.txt (#39418)

### Rationale for this change

Update Date from 2019 to 2024 in `NOTICE.txt`

### What changes are included in this PR?

Update Date from 2019 to 2024 in `NOTICE.txt`

### Are these changes tested?

no

### Are there any user-facing changes?

no

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 NOTICE.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NOTICE.txt b/NOTICE.txt
index a609791374c28..2089c6fb20358 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -1,5 +1,5 @@
 Apache Arrow
-Copyright 2016-2019 The Apache Software Foundation
+Copyright 2016-2024 The Apache Software Foundation
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).

From eef2f76ec0f80d3bad7f54c4690465eb3df011f3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 2 Jan 2024 10:19:36 -0500
Subject: [PATCH 117/570] MINOR: Bump org.apache.avro:avro from 1.8.2 to 1.11.3
 in /java/dataset (#39401)

Bumps org.apache.avro:avro from 1.8.2 to 1.11.3.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.avro:avro&package-manager=maven&previous-version=1.8.2&new-version=1.11.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apache/arrow/network/alerts).

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/dataset/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index b533a1733521b..7d6092743bf4d 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -27,7 +27,7 @@
         <arrow.cpp.build.dir>../../../cpp/release-build/</arrow.cpp.build.dir>
         <protobuf.version>2.5.0</protobuf.version>
         <parquet.version>1.11.0</parquet.version>
-        <avro.version>1.8.2</avro.version>
+        <avro.version>1.11.3</avro.version>
     </properties>
 
     <dependencies>

From 6b32b6d5ad5c4a519111086277f231b654c96056 Mon Sep 17 00:00:00 2001
From: david dali susanibar arce <davi.sarces@gmail.com>
Date: Tue, 2 Jan 2024 10:20:25 -0500
Subject: [PATCH 118/570] GH-39327: [Java] define assemble descriptor for new
 custom maven plugin project (#39331)

### Rationale for this change

To closes https://github.com/apache/arrow/issues/39327

### What changes are included in this PR?

GitHub CI validation needs to [run](https://github.com/apache/arrow/blob/main/ci/scripts/java_full_build.sh#L52) `assembly:single` for that reason is needed to setup a descriptor ref. In the case of this maven plugin, I only propose to include "src" as part of the resources.

### Are these changes tested?

Yes, by
````
mvn clean \
    install \
    assembly:single \
    source:jar \
    javadoc:jar \
    -Papache-release \
    -DdescriptorId=source-release
````

### Are there any user-facing changes?

No.

* Closes: #39327

Lead-authored-by: david dali susanibar arce <davi.sarces@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/maven/pom.xml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 86ac402732bc4..0923984c8e5e5 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -281,6 +281,27 @@
           </execution>
         </executions>
       </plugin>
+      <!--
+      Generate custom Arrow Java Maven Plugin for module-info.java compiler.
+      Needed to pass GitHub CI validation `$ mvn assembly:single`
+      -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>single</goal>
+            </goals>
+          </execution>
+        </executions>
+        <configuration>
+          <descriptorRefs>
+            <descriptorRef>src</descriptorRef>
+          </descriptorRefs>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 

From 2f63ab9daf9236e8634e12126add0373688adc80 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 2 Jan 2024 10:47:46 -0500
Subject: [PATCH 119/570] MINOR: [Java] Bump com.google.guava:guava-bom from
 32.1.3-jre to 33.0.0-jre in /java (#39411)

Bumps [com.google.guava:guava-bom](https://github.com/google/guava) from 32.1.3-jre to 33.0.0-jre.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/google/guava/releases">com.google.guava:guava-bom's releases</a>.</em></p>
<blockquote>
<h2>33.0.0</h2>
<h3>Maven</h3>
<pre lang="xml"><code>&lt;dependency&gt;
  &lt;groupId&gt;com.google.guava&lt;/groupId&gt;
  &lt;artifactId&gt;guava&lt;/artifactId&gt;
  &lt;version&gt;33.0.0-jre&lt;/version&gt;
  &lt;!-- or, for Android: --&gt;
  &lt;version&gt;33.0.0-android&lt;/version&gt;
&lt;/dependency&gt;
</code></pre>
<h3>Jar files</h3>
<ul>
<li><a href="https://repo1.maven.org/maven2/com/google/guava/guava/33.0.0-jre/guava-33.0.0-jre.jar">33.0.0-jre.jar</a></li>
<li><a href="https://repo1.maven.org/maven2/com/google/guava/guava/33.0.0-android/guava-33.0.0-android.jar">33.0.0-android.jar</a></li>
</ul>
<p>Guava requires <a href="https://github.com/google/guava/wiki/UseGuavaInYourBuild#what-about-guavas-own-dependencies">one runtime dependency</a>, which you can download here:</p>
<ul>
<li><a href="https://repo1.maven.org/maven2/com/google/guava/failureaccess/1.0.1/failureaccess-1.0.1.jar">failureaccess-1.0.1.jar</a></li>
</ul>
<h3>Javadoc</h3>
<ul>
<li><a href="http://guava.dev/releases/33.0.0-jre/api/docs/">33.0.0-jre</a></li>
<li><a href="http://guava.dev/releases/33.0.0-android/api/docs/">33.0.0-android</a></li>
</ul>
<h3>JDiff</h3>
<ul>
<li><a href="http://guava.dev/releases/33.0.0-jre/api/diffs/">33.0.0-jre vs. 32.1.3-jre</a></li>
<li><a href="http://guava.dev/releases/33.0.0-android/api/diffs/">33.0.0-android vs. 32.1.3-android</a></li>
<li><a href="http://guava.dev/releases/33.0.0-android/api/androiddiffs/">33.0.0-android vs. 33.0.0-jre</a></li>
</ul>
<h3>Changelog</h3>
<ul>
<li>This version of <code>guava-android</code> contains some package-private methods whose signature includes the Java 8 <code>Collector</code> API. This is a test to identify any problems before we expose those methods publicly to users. Please report any problems that you encounter. (73dbf7ef26)</li>
<li>Changed various classes to catch <code>Exception</code> instead of <code>RuntimeException</code> even when only <code>RuntimeException</code> is theoretically possible. This can help code that throws undeclared exceptions, as some bytecode rewriters (e.g., Robolectric) and languages (e.g., Kotlin) do. (c294c23760, 747924e, b2baf48)</li>
<li>Added an <code>Automatic-Module-Name</code> to <code>failureaccess</code>, <a href="https://github.com/google/guava/wiki/UseGuavaInYourBuild#what-about-guavas-own-dependencies">Guava's one strong runtime dependency</a>. (280b5d2f60)</li>
<li><code>reflect</code>: In <code>guava-android</code> only, removed <code>Invokable.getAnnotatedReturnType()</code> and <code>Parameter.getAnnotatedType()</code>. These methods never worked in an Android VM, and to reflect that, they were born <code>@ Deprecated</code>, <code>@ Beta</code>, and <code>@ DoNotCall</code>. They're now preventing us from rolling out some new Android compatibility testing. This is <strong>the only binary-incompatible change in this release</strong>, and it should have no effect in practice. Still, we bump the major version number to follow Semantic Versioning. (045cd8428f)</li>
<li><code>util.concurrent</code>: Changed our implementations to avoid eagerly initializing loggers during class loading. This can help performance, especially under Android. (4fe1df56bd)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/google/guava/commits">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.google.guava:guava-bom&package-manager=maven&previous-version=32.1.3-jre&new-version=33.0.0-jre)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 523e5642720cd..522ee4abc7669 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -32,7 +32,7 @@
     <dep.junit.platform.version>1.9.0</dep.junit.platform.version>
     <dep.junit.jupiter.version>5.10.1</dep.junit.jupiter.version>
     <dep.slf4j.version>2.0.9</dep.slf4j.version>
-    <dep.guava-bom.version>32.1.3-jre</dep.guava-bom.version>
+    <dep.guava-bom.version>33.0.0-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.104.Final</dep.netty-bom.version>
     <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>

From 984eb3838e853a6a862678fb3faed907cd3d05eb Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 2 Jan 2024 12:14:14 -0800
Subject: [PATCH 120/570] GH-39430: [C++][ORC] Upgrade ORC to 1.9.2 (#39431)

### Rationale for this change

This PR aims to bring the latest bug fixes
- https://orc.apache.org/news/2023/11/10/ORC-1.9.2/
  - [ORC-1525 Fix bad read in RleDecoderV2::readByte](https://issues.apache.org/jira/browse/ORC-1525)
- https://orc.apache.org/news/2023/08/16/ORC-1.9.1/
  - [ORC-1462 Bump aircompressor to 0.25 to fix JDK-8081450](https://issues.apache.org/jira/browse/ORC-1462)

### What changes are included in this PR?

This PR upgrades ORC dependency from 1.9.0 to 1.9.2.

### Are these changes tested?

Pass the CIs.

### Are there any user-facing changes?

No.
* Closes: #39430

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/thirdparty/versions.txt | 4 ++--
 java/adapter/orc/pom.xml    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 01cb836ea2a86..e9df0c8d7566b 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.8.1
 ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=3d640201594b07f08dade9cd1017bd0b59674daca26223b560b9bb6bf56264c2
 ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0
 ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412
-ARROW_ORC_BUILD_VERSION=1.9.0
-ARROW_ORC_BUILD_SHA256_CHECKSUM=0dca8bbccdb2ee87e59ba964933436beebd02ea78c4134424828a8127fbc4faa
+ARROW_ORC_BUILD_VERSION=1.9.2
+ARROW_ORC_BUILD_SHA256_CHECKSUM=7f46f2c184ecefd6791f1a53fb062286818bd8710c3f08b94dd3cac365e240ee
 ARROW_PROTOBUF_BUILD_VERSION=v21.3
 ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f
 # Because of https://github.com/Tencent/rapidjson/pull/1323, we require
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 803ae5a33826f..a42a458e2072a 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -34,7 +34,7 @@
         <dependency>
             <groupId>org.apache.orc</groupId>
             <artifactId>orc-core</artifactId>
-            <version>1.9.0</version>
+            <version>1.9.2</version>
             <scope>test</scope>
             <exclusions>
                 <exclusion>

From 3d1324e86231fbf6799ba5ea22604072857776b1 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@apache.org>
Date: Wed, 3 Jan 2024 10:53:00 +0200
Subject: [PATCH 121/570] GH-39255: [JS] Allow customization of schema when
 passing vectors to table constructor (#39256)

Merge after #39254.

* Closes: #39255
---
 js/src/builder/largebinary.ts |  2 +-
 js/src/table.ts               |  6 ++++--
 js/test/unit/table-tests.ts   | 17 +++++++++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/js/src/builder/largebinary.ts b/js/src/builder/largebinary.ts
index 59aa7144d20a1..f737349ac1c49 100644
--- a/js/src/builder/largebinary.ts
+++ b/js/src/builder/largebinary.ts
@@ -24,7 +24,7 @@ import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
 export class LargeBinaryBuilder<TNull = any> extends VariableWidthBuilder<LargeBinary, TNull> {
     constructor(opts: BuilderOptions<LargeBinary, TNull>) {
         super(opts);
-        this._values = new BufferBuilder(new Uint8Array(0));
+        this._values = new BufferBuilder(Uint8Array);
     }
     public get byteLength(): number {
         let size = this._pendingLength + (this.length * 4);
diff --git a/js/src/table.ts b/js/src/table.ts
index 58518257b30cb..00f4a4cfe0a14 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -73,6 +73,8 @@ export class Table<T extends TypeMap = any> {
     constructor(...batches: readonly RecordBatch<T>[]);
     constructor(...columns: { [P in keyof T]: Vector<T[P]> }[]);
     constructor(...columns: { [P in keyof T]: Data<T[P]> | DataProps<T[P]> }[]);
+    constructor(schema: Schema<T>, ...columns: { [P in keyof T]: Vector<T[P]> }[]);
+    constructor(schema: Schema<T>, ...columns: { [P in keyof T]: Data<T[P]> | DataProps<T[P]> }[]);
     constructor(schema: Schema<T>, data?: RecordBatch<T> | RecordBatch<T>[]);
     constructor(schema: Schema<T>, data?: RecordBatch<T> | RecordBatch<T>[], offsets?: Uint32Array);
     constructor(...args: any[]) {
@@ -112,8 +114,8 @@ export class Table<T extends TypeMap = any> {
                 } else if (typeof x === 'object') {
                     const keys = Object.keys(x) as (keyof T)[];
                     const vecs = keys.map((k) => new Vector([x[k]]));
-                    const schema = new Schema(keys.map((k, i) => new Field(String(k), vecs[i].type, vecs[i].nullCount > 0)));
-                    const [, batches] = distributeVectorsIntoRecordBatches(schema, vecs);
+                    const batchSchema = schema ?? new Schema(keys.map((k, i) => new Field(String(k), vecs[i].type, vecs[i].nullCount > 0)));
+                    const [, batches] = distributeVectorsIntoRecordBatches(batchSchema, vecs);
                     return batches.length === 0 ? [new RecordBatch(x)] : batches;
                 }
             }
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index 6b34124abcaba..094988c052b6e 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -151,6 +151,23 @@ describe(`Table`, () => {
             expect(i32).toEqualVector(makeVector(i32s));
         });
 
+        test(`creates a new Table from a Typed Array and force nullable`, () => {
+            const i32s = new Int32Array(arange(new Array<number>(10)));
+            const i32 = makeVector([i32s]);
+            expect(i32).toHaveLength(i32s.length);
+            expect(i32.nullCount).toBe(0);
+
+            const table = new Table(new Schema([new Field('i32', new Int32, true)]), { i32 });
+            const i32Field = table.schema.fields[0];
+
+            expect(i32Field.name).toBe('i32');
+            expect(i32).toHaveLength(i32s.length);
+            expect(i32Field.nullable).toBe(true);
+            expect(i32.nullCount).toBe(0);
+
+            expect(i32).toEqualVector(makeVector(i32s));
+        });
+
         test(`creates a new Table from Typed Arrays`, () => {
             const i32s = new Int32Array(arange(new Array<number>(10)));
             const f32s = new Float32Array(arange(new Array<number>(10)));

From d75269f9ee85f5dea736192fdef9f831cb518879 Mon Sep 17 00:00:00 2001
From: John <thespica@qq.com>
Date: Wed, 3 Jan 2024 17:35:41 +0800
Subject: [PATCH 122/570] MINOR: [Docs] Add an empty line to make `..
 code-block::` work correctly (#39388)

### Rationale for this change

Code block [here](https://arrow.apache.org/docs/developers/java/development.html#unit-testing) didn't work correctly. Added a empty line to make it work well.

### What changes are included in this PR?

Added a empty line to make it work correctly.

### Are these changes tested?

No.

### Are there any user-facing changes?

No.

Authored-by: John <thespica@qq.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 docs/source/developers/java/development.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/developers/java/development.rst b/docs/source/developers/java/development.rst
index f7b19d73da2e2..261cd5702ae07 100644
--- a/docs/source/developers/java/development.rst
+++ b/docs/source/developers/java/development.rst
@@ -42,6 +42,7 @@ Unit Testing
 Unit tests are run by Maven during the build.
 
 To speed up the build, you can skip them by passing -DskipTests.
+
 .. code-block::
 
     $ cd arrow/java

From fe38d0e1ee16662e66784f715c2e8179855ee803 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Wed, 3 Jan 2024 11:34:53 +0100
Subject: [PATCH 123/570] GH-39425: [CI] Fix import to match new substrait repo
 structure (#39426)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Upstream substrait repo did a small refactor. We have to update our imports to match the new structure.

### What changes are included in this PR?

Update import

### Are these changes tested?
Via archery

### Are there any user-facing changes?
No
* Closes: #39425

Authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 ci/scripts/integration_substrait.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/scripts/integration_substrait.sh b/ci/scripts/integration_substrait.sh
index f7208ae113814..164f0e80b9890 100755
--- a/ci/scripts/integration_substrait.sh
+++ b/ci/scripts/integration_substrait.sh
@@ -24,7 +24,7 @@ set -e
 echo "Substrait Integration Tests"
 echo "Validating imports"
 python -c "import pyarrow.substrait"
-python -c "from substrait_consumer.consumers import AceroConsumer"
+python -c "from substrait_consumer.consumers.acero_consumer import AceroConsumer"
 
 echo "Executing pytest"
 cd consumer-testing

From 213cadbbc080399b372291f93aaaa05fe0e67de1 Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Wed, 3 Jan 2024 11:29:15 -0500
Subject: [PATCH 124/570] GH-38458: [Go] Add ValueLen to BinaryLike interface
 (#39242)

### Rationale for this change
Adding `ValueLen` to the `BinaryLike` interface for easy convenience of determining the length of an individual value for a Binary/String like array.

### Are these changes tested?
yes

* Closes: #38458

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/array/binary.go |  9 +++++++++
 go/arrow/array/string.go | 17 +++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go
index c226297da04c6..9e26de7a6d820 100644
--- a/go/arrow/array/binary.go
+++ b/go/arrow/array/binary.go
@@ -30,6 +30,7 @@ import (
 
 type BinaryLike interface {
 	arrow.Array
+	ValueLen(int) int
 	ValueBytes() []byte
 	ValueOffset64(int) int64
 }
@@ -367,6 +368,11 @@ func (a *BinaryView) Value(i int) []byte {
 	return buf.Bytes()[start : start+int32(s.Len())]
 }
 
+func (a *BinaryView) ValueLen(i int) int {
+	s := a.ValueHeader(i)
+	return s.Len()
+}
+
 // ValueString returns the value at index i as a string instead of
 // a byte slice, without copying the underlying data.
 func (a *BinaryView) ValueString(i int) string {
@@ -441,4 +447,7 @@ var (
 	_ arrow.Array = (*Binary)(nil)
 	_ arrow.Array = (*LargeBinary)(nil)
 	_ arrow.Array = (*BinaryView)(nil)
+
+	_ BinaryLike = (*Binary)(nil)
+	_ BinaryLike = (*LargeBinary)(nil)
 )
diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go
index 90a4628f0d0fb..c8517ba3056df 100644
--- a/go/arrow/array/string.go
+++ b/go/arrow/array/string.go
@@ -31,6 +31,7 @@ import (
 type StringLike interface {
 	arrow.Array
 	Value(int) string
+	ValueLen(int) int
 }
 
 // String represents an immutable sequence of variable-length UTF-8 strings.
@@ -225,6 +226,14 @@ func (a *LargeString) ValueOffset64(i int) int64 {
 	return a.ValueOffset(i)
 }
 
+func (a *LargeString) ValueLen(i int) int {
+	if i < 0 || i >= a.array.data.length {
+		panic("arrow/array: index out of range")
+	}
+	beg := a.array.data.offset + i
+	return int(a.offsets[beg+1] - a.offsets[beg])
+}
+
 func (a *LargeString) ValueOffsets() []int64 {
 	beg := a.array.data.offset
 	end := beg + a.array.data.length + 1
@@ -364,6 +373,11 @@ func (a *StringView) Value(i int) string {
 	return *(*string)(unsafe.Pointer(&value))
 }
 
+func (a *StringView) ValueLen(i int) int {
+	s := a.ValueHeader(i)
+	return s.Len()
+}
+
 func (a *StringView) String() string {
 	var o strings.Builder
 	o.WriteString("[")
@@ -698,4 +712,7 @@ var (
 	_ StringLikeBuilder = (*StringBuilder)(nil)
 	_ StringLikeBuilder = (*LargeStringBuilder)(nil)
 	_ StringLikeBuilder = (*StringViewBuilder)(nil)
+	_ StringLike        = (*String)(nil)
+	_ StringLike        = (*LargeString)(nil)
+	_ StringLike        = (*StringView)(nil)
 )

From 0e597ab1ac62f12a4cf020994b2097643fdb9657 Mon Sep 17 00:00:00 2001
From: LucasG0 <44552904+LucasG0@users.noreply.github.com>
Date: Thu, 4 Jan 2024 00:12:24 +0100
Subject: [PATCH 125/570] GH-34316: [Python] FixedSizeListArray.from_arrays
 supports mask parameter (#39396)

### What changes are included in this PR?

Add `mask` / `null_bitmap` parameters in corresponding Cython / C++ `FixedSizeListArray` methods, and propagate this bitmap instead of using the current dummy `validity_buf`.

### Are these changes tested?

Yes

### Are there any user-facing changes?

Yes, `mask` parameter has been added to `FixedSizeListArray.from_arrays`
* Closes: #34316

Authored-by: LucasG0 <guillermou.lucas@gmail.com>
Signed-off-by: Will Jones <willjones127@gmail.com>
---
 cpp/src/arrow/array/array_nested.cc  | 16 ++++++++--------
 cpp/src/arrow/array/array_nested.h   | 16 ++++++++++++----
 python/pyarrow/array.pxi             | 13 +++++++++----
 python/pyarrow/includes/libarrow.pxd |  8 ++++++--
 python/pyarrow/tests/test_array.py   | 10 ++++++++++
 5 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc
index acdd0a0742468..0b0e340a67d4e 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -894,7 +894,8 @@ const std::shared_ptr<DataType>& FixedSizeListArray::value_type() const {
 const std::shared_ptr<Array>& FixedSizeListArray::values() const { return values_; }
 
 Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
-    const std::shared_ptr<Array>& values, int32_t list_size) {
+    const std::shared_ptr<Array>& values, int32_t list_size,
+    std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
   if (list_size <= 0) {
     return Status::Invalid("list_size needs to be a strict positive integer");
   }
@@ -905,14 +906,14 @@ Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
   }
   int64_t length = values->length() / list_size;
   auto list_type = std::make_shared<FixedSizeListType>(values->type(), list_size);
-  std::shared_ptr<Buffer> validity_buf;
 
-  return std::make_shared<FixedSizeListArray>(list_type, length, values, validity_buf,
-                                              /*null_count=*/0, /*offset=*/0);
+  return std::make_shared<FixedSizeListArray>(list_type, length, values, null_bitmap,
+                                              null_count);
 }
 
 Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
-    const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type) {
+    const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type,
+    std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
   if (type->id() != Type::FIXED_SIZE_LIST) {
     return Status::TypeError("Expected fixed size list type, got ", type->ToString());
   }
@@ -926,10 +927,9 @@ Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
         "The length of the values Array needs to be a multiple of the list size");
   }
   int64_t length = values->length() / list_type.list_size();
-  std::shared_ptr<Buffer> validity_buf;
 
-  return std::make_shared<FixedSizeListArray>(type, length, values, validity_buf,
-                                              /*null_count=*/0, /*offset=*/0);
+  return std::make_shared<FixedSizeListArray>(type, length, values, null_bitmap,
+                                              null_count);
 }
 
 Result<std::shared_ptr<Array>> FixedSizeListArray::Flatten(
diff --git a/cpp/src/arrow/array/array_nested.h b/cpp/src/arrow/array/array_nested.h
index 61606e1592d61..768a630e0af54 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -599,17 +599,25 @@ class ARROW_EXPORT FixedSizeListArray : public Array {
   ///
   /// \param[in] values Array containing list values
   /// \param[in] list_size The fixed length of each list
+  /// \param[in] null_bitmap Optional validity bitmap
+  /// \param[in] null_count Optional null count in null_bitmap
   /// \return Will have length equal to values.length() / list_size
-  static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
-                                                   int32_t list_size);
+  static Result<std::shared_ptr<Array>> FromArrays(
+      const std::shared_ptr<Array>& values, int32_t list_size,
+      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+      int64_t null_count = kUnknownNullCount);
 
   /// \brief Construct FixedSizeListArray from child value array and type
   ///
   /// \param[in] values Array containing list values
   /// \param[in] type The fixed sized list type
+  /// \param[in] null_bitmap Optional validity bitmap
+  /// \param[in] null_count Optional null count in null_bitmap
   /// \return Will have length equal to values.length() / type.list_size()
-  static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
-                                                   std::shared_ptr<DataType> type);
+  static Result<std::shared_ptr<Array>> FromArrays(
+      const std::shared_ptr<Array>& values, std::shared_ptr<DataType> type,
+      std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+      int64_t null_count = kUnknownNullCount);
 
  protected:
   void SetData(const std::shared_ptr<ArrayData>& data);
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 74a196002bfa6..751dfbcce4342 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2484,7 +2484,7 @@ cdef class MapArray(ListArray):
 
         Examples
         --------
-        First, let's understand the structure of our dataset when viewed in a rectangular data model. 
+        First, let's understand the structure of our dataset when viewed in a rectangular data model.
         The total of 5 respondents answered the question "How much did you like the movie x?".
         The value -1 in the integer array means that the value is missing. The boolean array
         represents the null bitmask corresponding to the missing values in the integer array.
@@ -2590,7 +2590,7 @@ cdef class FixedSizeListArray(BaseListArray):
     """
 
     @staticmethod
-    def from_arrays(values, list_size=None, DataType type=None):
+    def from_arrays(values, list_size=None, DataType type=None, mask=None):
         """
         Construct FixedSizeListArray from array of values and a list length.
 
@@ -2602,6 +2602,9 @@ cdef class FixedSizeListArray(BaseListArray):
         type : DataType, optional
             If not specified, a default ListType with the values' type and
             `list_size` length is used.
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
 
         Returns
         -------
@@ -2652,19 +2655,21 @@ cdef class FixedSizeListArray(BaseListArray):
 
         _values = asarray(values)
 
+        c_mask = c_mask_inverted_from_obj(mask, None)
+
         if type is not None:
             if list_size is not None:
                 raise ValueError("Cannot specify both list_size and type")
             with nogil:
                 c_result = CFixedSizeListArray.FromArraysAndType(
-                    _values.sp_array, type.sp_type)
+                    _values.sp_array, type.sp_type, c_mask)
         else:
             if list_size is None:
                 raise ValueError("Should specify one of list_size and type")
             _list_size = <int32_t>list_size
             with nogil:
                 c_result = CFixedSizeListArray.FromArrays(
-                    _values.sp_array, _list_size)
+                    _values.sp_array, _list_size, c_mask)
         cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
         result.validate()
         return result
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index bad5ec606c268..82b888f584813 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -673,11 +673,15 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef cppclass CFixedSizeListArray" arrow::FixedSizeListArray"(CArray):
         @staticmethod
         CResult[shared_ptr[CArray]] FromArrays(
-            const shared_ptr[CArray]& values, int32_t list_size)
+            const shared_ptr[CArray]& values,
+            int32_t list_size,
+            shared_ptr[CBuffer] null_bitmap)
 
         @staticmethod
         CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
-            const shared_ptr[CArray]& values, shared_ptr[CDataType])
+            const shared_ptr[CArray]& values,
+            shared_ptr[CDataType],
+            shared_ptr[CBuffer] null_bitmap)
 
         int64_t value_offset(int i)
         int64_t value_length(int i)
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 599d15d023a55..d598630dc2103 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1091,6 +1091,16 @@ def test_fixed_size_list_from_arrays():
     assert result.type.equals(typ)
     assert result.type.value_field.name == "name"
 
+    result = pa.FixedSizeListArray.from_arrays(values,
+                                               type=typ,
+                                               mask=pa.array([False, True, False]))
+    assert result.to_pylist() == [[0, 1, 2, 3], None, [8, 9, 10, 11]]
+
+    result = pa.FixedSizeListArray.from_arrays(values,
+                                               list_size=4,
+                                               mask=pa.array([False, True, False]))
+    assert result.to_pylist() == [[0, 1, 2, 3], None, [8, 9, 10, 11]]
+
     # raise on invalid values / list_size
     with pytest.raises(ValueError):
         pa.FixedSizeListArray.from_arrays(values, -4)

From 5c0fa712faec0b2997b5970890c076011f96de77 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@apache.org>
Date: Thu, 4 Jan 2024 03:12:04 +0200
Subject: [PATCH 126/570] GH-39435: [JS] Add Vector.nullable (#39436)

---
 js/src/table.ts             |  2 +-
 js/src/util/chunk.ts        |  5 +++++
 js/src/vector.ts            |  8 ++++++++
 js/test/unit/table-tests.ts | 18 ++++++++++--------
 4 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/js/src/table.ts b/js/src/table.ts
index 00f4a4cfe0a14..e719b7ca9d313 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -114,7 +114,7 @@ export class Table<T extends TypeMap = any> {
                 } else if (typeof x === 'object') {
                     const keys = Object.keys(x) as (keyof T)[];
                     const vecs = keys.map((k) => new Vector([x[k]]));
-                    const batchSchema = schema ?? new Schema(keys.map((k, i) => new Field(String(k), vecs[i].type, vecs[i].nullCount > 0)));
+                    const batchSchema = schema ?? new Schema(keys.map((k, i) => new Field(String(k), vecs[i].type, vecs[i].nullable)));
                     const [, batches] = distributeVectorsIntoRecordBatches(batchSchema, vecs);
                     return batches.length === 0 ? [new RecordBatch(x)] : batches;
                 }
diff --git a/js/src/util/chunk.ts b/js/src/util/chunk.ts
index 6098b04243422..36620627f197d 100644
--- a/js/src/util/chunk.ts
+++ b/js/src/util/chunk.ts
@@ -51,6 +51,11 @@ export class ChunkedIterator<T extends DataType> implements IterableIterator<T['
     }
 }
 
+/** @ignore */
+export function computeChunkNullable<T extends DataType>(chunks: ReadonlyArray<Data<T>>) {
+    return chunks.some(chunk => chunk.nullable);
+}
+
 /** @ignore */
 export function computeChunkNullCounts<T extends DataType>(chunks: ReadonlyArray<Data<T>>) {
     return chunks.reduce((nullCount, chunk) => nullCount + chunk.nullCount, 0);
diff --git a/js/src/vector.ts b/js/src/vector.ts
index 7e1caa343562c..8b94b14e3fff7 100644
--- a/js/src/vector.ts
+++ b/js/src/vector.ts
@@ -24,6 +24,7 @@ import { BigIntArray, TypedArray, TypedArrayDataType } from './interfaces.js';
 import {
     isChunkedValid,
     computeChunkOffsets,
+    computeChunkNullable,
     computeChunkNullCounts,
     sliceChunks,
     wrapChunkedCall1,
@@ -132,6 +133,13 @@ export class Vector<T extends DataType = any> {
         return this.data.reduce((byteLength, data) => byteLength + data.byteLength, 0);
     }
 
+    /**
+     * Whether this Vector's elements can contain null values.
+     */
+    public get nullable() {
+        return computeChunkNullable(this.data);
+    }
+
     /**
      * The number of null elements in this Vector.
      */
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index 094988c052b6e..ffda47f473368 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -139,30 +139,32 @@ describe(`Table`, () => {
             const i32 = makeVector([i32s]);
             expect(i32).toHaveLength(i32s.length);
             expect(i32.nullCount).toBe(0);
+            expect(i32.nullable).toBe(true);
 
             const table = new Table({ i32 });
             const i32Field = table.schema.fields[0];
 
             expect(i32Field.name).toBe('i32');
             expect(i32).toHaveLength(i32s.length);
-            expect(i32Field.nullable).toBe(false);
+            expect(i32Field.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
 
             expect(i32).toEqualVector(makeVector(i32s));
         });
 
-        test(`creates a new Table from a Typed Array and force nullable`, () => {
+        test(`creates a new Table from a Typed Array and force not nullable`, () => {
             const i32s = new Int32Array(arange(new Array<number>(10)));
             const i32 = makeVector([i32s]);
             expect(i32).toHaveLength(i32s.length);
             expect(i32.nullCount).toBe(0);
+            expect(i32.nullable).toBe(true);
 
-            const table = new Table(new Schema([new Field('i32', new Int32, true)]), { i32 });
+            const table = new Table(new Schema([new Field('i32', new Int32, false)]), { i32 });
             const i32Field = table.schema.fields[0];
 
             expect(i32Field.name).toBe('i32');
             expect(i32).toHaveLength(i32s.length);
-            expect(i32Field.nullable).toBe(true);
+            expect(i32Field.nullable).toBe(false);
             expect(i32.nullCount).toBe(0);
 
             expect(i32).toEqualVector(makeVector(i32s));
@@ -187,8 +189,8 @@ describe(`Table`, () => {
             expect(f32Field.name).toBe('f32');
             expect(i32).toHaveLength(i32s.length);
             expect(f32).toHaveLength(f32s.length);
-            expect(i32Field.nullable).toBe(false);
-            expect(f32Field.nullable).toBe(false);
+            expect(i32Field.nullable).toBe(true);
+            expect(f32Field.nullable).toBe(true);
             expect(i32.nullCount).toBe(0);
             expect(f32.nullCount).toBe(0);
 
@@ -222,7 +224,7 @@ describe(`Table`, () => {
 
             expect(i32Vector).toHaveLength(i32s.length);
             expect(f32Vector).toHaveLength(i32s.length); // new length should be the same as the longest sibling
-            expect(i32Field.nullable).toBe(false);
+            expect(i32Field.nullable).toBe(true);
             expect(f32Field.nullable).toBe(true); // true, with 12 additional nulls
             expect(i32Vector.nullCount).toBe(0);
             expect(f32Vector.nullCount).toBe(i32s.length - f32s.length);
@@ -264,7 +266,7 @@ describe(`Table`, () => {
             expect(f32RenamedField.name).toBe('f32Renamed');
             expect(i32Renamed).toHaveLength(i32s.length);
             expect(f32Renamed).toHaveLength(i32s.length); // new length should be the same as the longest sibling
-            expect(i32RenamedField.nullable).toBe(false);
+            expect(i32RenamedField.nullable).toBe(true);
             expect(f32RenamedField.nullable).toBe(true); // true, with 4 additional nulls
             expect(i32Renamed.nullCount).toBe(0);
             expect(f32Renamed.nullCount).toBe(i32s.length - f32s.length);

From 27d72f3a773ddbb8dd5ee679b9ed6b555a2bb8ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Thu, 4 Jan 2024 11:49:04 +0100
Subject: [PATCH 127/570] GH-39421: [CI][Ruby] Update to using Ubuntu 22.04 on
 test-ruby and test-c-glib nightly jobs (#39422)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change
CI Jobs for Ruby and c-glib are failing on Ubuntu due to bundler failing to install on old Ruby.

### What changes are included in this PR?

Use Ubuntu 22.04 on those jobs.

### Are these changes tested?

Via Archery

### Are there any user-facing changes?
No
* Closes: #39421

Authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 dev/tasks/tasks.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index ed6ea08894f10..04faef427e281 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1032,6 +1032,8 @@ tasks:
     ci: github
     template: docker-tests/github.linux.yml
     params:
+      env:
+        UBUNTU: 22.04
       image: {{ image }}
 {% endfor %}
 

From ccc674c56f3473c9556a5af96dff9d156f559663 Mon Sep 17 00:00:00 2001
From: Josh Soref <2119212+jsoref@users.noreply.github.com>
Date: Thu, 4 Jan 2024 12:57:25 -0500
Subject: [PATCH 128/570] GH-38964: [C++] Fix spelling (compute) (#38965)

### Rationale for this change

### What changes are included in this PR?

Spelling fixes to cpp/src/arrow/compute/

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #38964

Authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/compute/api_aggregate.h         |  2 +-
 cpp/src/arrow/compute/api_scalar.h            |  4 +--
 cpp/src/arrow/compute/api_vector.h            |  6 ++--
 cpp/src/arrow/compute/exec.cc                 |  2 +-
 cpp/src/arrow/compute/exec_internal.h         |  2 +-
 cpp/src/arrow/compute/exec_test.cc            |  2 +-
 .../arrow/compute/kernels/aggregate_basic.cc  |  2 +-
 .../kernels/aggregate_basic_internal.h        |  2 +-
 .../arrow/compute/kernels/aggregate_mode.cc   |  2 +-
 .../compute/kernels/aggregate_quantile.cc     |  2 +-
 .../arrow/compute/kernels/aggregate_test.cc   |  4 +--
 .../arrow/compute/kernels/hash_aggregate.cc   |  4 +--
 .../kernels/scalar_arithmetic_benchmark.cc    |  2 +-
 .../compute/kernels/scalar_arithmetic_test.cc |  2 +-
 .../arrow/compute/kernels/scalar_cast_test.cc |  8 ++---
 .../compute/kernels/scalar_if_else_test.cc    |  2 +-
 cpp/src/arrow/compute/kernels/scalar_round.cc |  2 +-
 .../compute/kernels/scalar_string_internal.h  |  2 +-
 .../compute/kernels/scalar_string_test.cc     |  4 +--
 .../compute/kernels/scalar_temporal_test.cc   | 14 ++++----
 .../compute/kernels/vector_run_end_encode.cc  | 12 +++----
 .../arrow/compute/kernels/vector_select_k.cc  | 32 +++++++++----------
 .../compute/kernels/vector_selection_test.cc  |  2 +-
 cpp/src/arrow/compute/key_map.cc              |  4 +--
 cpp/src/arrow/compute/key_map.h               |  4 +--
 cpp/src/arrow/compute/key_map_avx2.cc         |  2 +-
 cpp/src/arrow/compute/light_array.cc          |  2 +-
 cpp/src/arrow/compute/light_array_test.cc     |  2 +-
 cpp/src/arrow/compute/ordering.h              |  2 +-
 cpp/src/arrow/compute/registry_test.cc        |  2 +-
 cpp/src/arrow/compute/row/grouper.cc          |  2 +-
 cpp/src/arrow/compute/row/grouper.h           | 10 +++---
 32 files changed, 73 insertions(+), 73 deletions(-)

diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index 4d2c814a69bbb..2e5210b073ee4 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -452,7 +452,7 @@ Result<Datum> TDigest(const Datum& value,
 /// \brief Find the first index of a value in an array.
 ///
 /// \param[in] value The array to search.
-/// \param[in] options The array to search for. See IndexOoptions.
+/// \param[in] options The array to search for. See IndexOptions.
 /// \param[in] ctx the function execution context, optional
 /// \return out a Scalar containing the index (or -1 if not found).
 ///
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 26fbe64f74293..bad34f4a37881 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -491,7 +491,7 @@ struct ARROW_EXPORT AssumeTimezoneOptions : public FunctionOptions {
 
   /// How to interpret ambiguous local times (due to DST shifts)
   Ambiguous ambiguous;
-  /// How to interpret non-existent local times (due to DST shifts)
+  /// How to interpret nonexistent local times (due to DST shifts)
   Nonexistent nonexistent;
 };
 
@@ -1589,7 +1589,7 @@ ARROW_EXPORT Result<Datum> MonthsBetween(const Datum& left, const Datum& right,
 ARROW_EXPORT Result<Datum> WeeksBetween(const Datum& left, const Datum& right,
                                         ExecContext* ctx = NULLPTR);
 
-/// \brief Month Day Nano Between finds the number of months, days, and nonaseconds
+/// \brief Month Day Nano Between finds the number of months, days, and nanoseconds
 /// between two values
 ///
 /// \param[in] left input treated as the start time
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index 759f9e5c1a408..919572f16ee69 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -401,7 +401,7 @@ Result<std::shared_ptr<Array>> NthToIndices(const Array& values, int64_t n,
 
 /// \brief Return indices that partition an array around n-th sorted element.
 ///
-/// This overload takes a PartitionNthOptions specifiying the pivot index
+/// This overload takes a PartitionNthOptions specifying the pivot index
 /// and the null handling.
 ///
 /// \param[in] values array to be partitioned
@@ -452,7 +452,7 @@ Result<std::shared_ptr<Array>> SortIndices(const Array& array,
 
 /// \brief Return the indices that would sort an array.
 ///
-/// This overload takes a ArraySortOptions specifiying the sort order
+/// This overload takes a ArraySortOptions specifying the sort order
 /// and the null handling.
 ///
 /// \param[in] array array to sort
@@ -486,7 +486,7 @@ Result<std::shared_ptr<Array>> SortIndices(const ChunkedArray& chunked_array,
 
 /// \brief Return the indices that would sort a chunked array.
 ///
-/// This overload takes a ArraySortOptions specifiying the sort order
+/// This overload takes a ArraySortOptions specifying the sort order
 /// and the null handling.
 ///
 /// \param[in] chunked_array chunked array to sort
diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc
index c18dfa0952245..28dcf493fa294 100644
--- a/cpp/src/arrow/compute/exec.cc
+++ b/cpp/src/arrow/compute/exec.cc
@@ -1164,7 +1164,7 @@ class ScalarAggExecutor : public KernelExecutorImpl<ScalarAggregateKernel> {
     // TODO(wesm): this is odd and should be examined soon -- only one state
     // "should" be needed per thread of execution
 
-    // FIXME(ARROW-11840) don't merge *any* aggegates for every batch
+    // FIXME(ARROW-11840) don't merge *any* aggregates for every batch
     ARROW_ASSIGN_OR_RAISE(auto batch_state,
                           kernel_->init(kernel_ctx_, {kernel_, *input_types_, options_}));
 
diff --git a/cpp/src/arrow/compute/exec_internal.h b/cpp/src/arrow/compute/exec_internal.h
index 8beff2a6c63ac..7e4f364a9288e 100644
--- a/cpp/src/arrow/compute/exec_internal.h
+++ b/cpp/src/arrow/compute/exec_internal.h
@@ -46,7 +46,7 @@ class ARROW_EXPORT ExecSpanIterator {
  public:
   ExecSpanIterator() = default;
 
-  /// \brief Initialize itertor iterator and do basic argument validation
+  /// \brief Initialize iterator and do basic argument validation
   ///
   /// \param[in] batch the input ExecBatch
   /// \param[in] max_chunksize the maximum length of each ExecSpan. Depending
diff --git a/cpp/src/arrow/compute/exec_test.cc b/cpp/src/arrow/compute/exec_test.cc
index d661e5735fea6..cfce0c57fa416 100644
--- a/cpp/src/arrow/compute/exec_test.cc
+++ b/cpp/src/arrow/compute/exec_test.cc
@@ -1232,7 +1232,7 @@ void TestCallScalarFunctionPreallocationCases::DoTest(FunctionCallerMaker caller
     }
 
     // Set the exec_chunksize to be smaller, so now we have several invocations
-    // of the kernel, but still the output is onee array
+    // of the kernel, but still the output is one array
     {
       std::vector<Datum> args = {Datum(arr)};
       exec_ctx_->set_exec_chunksize(80);
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index ddd241652460e..1fbcd6a249093 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -1100,7 +1100,7 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   AddFirstLastKernels(FirstLastInit, TemporalTypes(), func.get());
   DCHECK_OK(registry->AddFunction(std::move(func)));
 
-  // Add first/last as convience functions
+  // Add first/last as convenience functions
   func = std::make_shared<ScalarAggregateFunction>("first", Arity::Unary(), first_doc,
                                                    &default_scalar_aggregate_options);
   AddFirstOrLastAggKernel<FirstOrLast::First>(func.get(), first_last_func);
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index 4966e9871d62c..f08e7aaa538bb 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -472,7 +472,7 @@ struct FirstLastImpl : public ScalarAggregator {
     this->count += arr.length() - null_count;
 
     if (null_count == 0) {
-      // If there are no null valus, we can just merge
+      // If there are no null values, we can just merge
       // the first and last element
       this->state.MergeOne(arr.GetView(0));
       this->state.MergeOne(arr.GetView(arr.length() - 1));
diff --git a/cpp/src/arrow/compute/kernels/aggregate_mode.cc b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
index 7f359ead6cb83..3f84c0a5ee4c4 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_mode.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_mode.cc
@@ -115,7 +115,7 @@ Status Finalize(KernelContext* ctx, const DataType& type, ExecResult* out,
   return Status::OK();
 }
 
-// count value occurances for integers with narrow value range
+// count value occurrences for integers with narrow value range
 // O(1) space, O(n) time
 template <typename T>
 struct CountModer {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
index e675a1cec86c9..f4826229dd46c 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc
@@ -120,7 +120,7 @@ struct SortQuantiler {
                 });
 
       // input array is partitioned around data point at `last_index` (pivot)
-      // for next quatile which is smaller, we only consider inputs left of the pivot
+      // for next quantile which is smaller, we only consider inputs left of the pivot
       uint64_t last_index = in_buffer.size();
       if (is_datapoint) {
         CType* out_buffer = out_data->template GetMutableValues<CType>(1);
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index aa19fb3401232..65439af2748b5 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -478,7 +478,7 @@ TEST_F(TestSumKernelRoundOff, Basics) {
 
   // array = np.arange(321000, dtype='float64')
   // array -= np.mean(array)
-  // array *= arrray
+  // array *= array
   double index = 0;
   ASSERT_OK_AND_ASSIGN(
       auto array, ArrayFromBuilderVisitor(
@@ -3653,7 +3653,7 @@ class TestPrimitiveQuantileKernel : public ::testing::Test {
 
 #define INTYPE(x) Datum(static_cast<typename TypeParam::c_type>(x))
 #define DOUBLE(x) Datum(static_cast<double>(x))
-// output type per interplation: linear, lower, higher, nearest, midpoint
+// output type per interpolation: linear, lower, higher, nearest, midpoint
 #define O(a, b, c, d, e) \
   { DOUBLE(a), INTYPE(b), INTYPE(c), INTYPE(d), DOUBLE(e) }
 
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 47cae538e2e3f..c37e45513d040 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -1848,8 +1848,8 @@ struct GroupedFirstLastImpl final : public GroupedAggregator {
                const ArrayData& group_id_mapping) override {
     // The merge is asymmetric. "first" from this state gets pick over "first" from other
     // state. "last" from other state gets pick over from this state. This is so that when
-    // using with segmeneted aggregation, we still get the correct "first" and "last"
-    // value for the entire segement.
+    // using with segmented aggregation, we still get the correct "first" and "last"
+    // value for the entire segment.
     auto other = checked_cast<GroupedFirstLastImpl*>(&raw_other);
 
     auto raw_firsts = firsts_.mutable_data();
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_benchmark.cc
index 4b678da5f1b42..17e9951d69bc2 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_benchmark.cc
@@ -33,7 +33,7 @@ constexpr auto kSeed = 0x94378165;
 using BinaryOp = Result<Datum>(const Datum&, const Datum&, ArithmeticOptions,
                                ExecContext*);
 
-// Add explicit overflow-checked shortcuts, for easy benchmark parametering.
+// Add explicit overflow-checked shortcuts, for easy benchmark parameterizing.
 static Result<Datum> AddChecked(const Datum& left, const Datum& right,
                                 ArithmeticOptions options = ArithmeticOptions(),
                                 ExecContext* ctx = NULLPTR) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 756b3028c4a59..37a1bcbc02d73 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -1857,7 +1857,7 @@ TEST_F(TestBinaryArithmeticDecimal, DispatchBest) {
   }
 }
 
-// reference result from bc (precsion=100, scale=40)
+// reference result from bc (precision=100, scale=40)
 TEST_F(TestBinaryArithmeticDecimal, AddSubtract) {
   // array array, decimal128
   {
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index b429c8175b020..a8acf68f66c8b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -2842,19 +2842,19 @@ TEST(Cast, StructToDifferentNullabilityStruct) {
         ::testing::HasSubstr("cannot cast nullable field to non-nullable field"),
         Cast(src_nullable, options1_non_nullable));
 
-    std::vector<std::shared_ptr<Field>> fields_dest2_non_nullble = {
+    std::vector<std::shared_ptr<Field>> fields_dest2_non_nullable = {
         std::make_shared<Field>("a", int64(), false),
         std::make_shared<Field>("c", int64(), false)};
-    const auto dest2_non_nullable = arrow::struct_(fields_dest2_non_nullble);
+    const auto dest2_non_nullable = arrow::struct_(fields_dest2_non_nullable);
     const auto options2_non_nullable = CastOptions::Safe(dest2_non_nullable);
     EXPECT_RAISES_WITH_MESSAGE_THAT(
         TypeError,
         ::testing::HasSubstr("cannot cast nullable field to non-nullable field"),
         Cast(src_nullable, options2_non_nullable));
 
-    std::vector<std::shared_ptr<Field>> fields_dest3_non_nullble = {
+    std::vector<std::shared_ptr<Field>> fields_dest3_non_nullable = {
         std::make_shared<Field>("c", int64(), false)};
-    const auto dest3_non_nullable = arrow::struct_(fields_dest3_non_nullble);
+    const auto dest3_non_nullable = arrow::struct_(fields_dest3_non_nullable);
     const auto options3_non_nullable = CastOptions::Safe(dest3_non_nullable);
     EXPECT_RAISES_WITH_MESSAGE_THAT(
         TypeError,
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
index 771261cac9140..c4c46b5efe84d 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
@@ -69,7 +69,7 @@ template <typename Type>
 class TestIfElsePrimitive : public ::testing::Test {};
 
 // There are a lot of tests here if we cover all the types and it gets slow on valgrind
-// so we overrdie the standard type sets with a smaller range
+// so we override the standard type sets with a smaller range
 #ifdef ARROW_VALGRIND
 using IfElseNumericBasedTypes =
     ::testing::Types<UInt32Type, FloatType, Date32Type, Time32Type, TimestampType,
diff --git a/cpp/src/arrow/compute/kernels/scalar_round.cc b/cpp/src/arrow/compute/kernels/scalar_round.cc
index 36e59c8c1deac..98ebc8422cdd5 100644
--- a/cpp/src/arrow/compute/kernels/scalar_round.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_round.cc
@@ -747,7 +747,7 @@ struct Round {
       } else {
         round_val = RoundImpl<CType, RndMode>::Round(round_val);
       }
-      // Equality check is ommitted so that the common case of 10^0 (integer rounding)
+      // Equality check is omitted so that the common case of 10^0 (integer rounding)
       // uses multiply-only
       round_val = ndigits > 0 ? (round_val / pow10) : (round_val * pow10);
       if (!std::isfinite(round_val)) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_internal.h b/cpp/src/arrow/compute/kernels/scalar_string_internal.h
index 1a9969441655d..7a5d5a7c86e85 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_internal.h
+++ b/cpp/src/arrow/compute/kernels/scalar_string_internal.h
@@ -306,7 +306,7 @@ struct StringSplitExec {
   using ListOffsetsBuilderType = TypedBufferBuilder<list_offset_type>;
   using State = OptionsWrapper<Options>;
 
-  // Keep the temporary storage accross individual values, to minimize reallocations
+  // Keep the temporary storage across individual values, to minimize reallocations
   std::vector<std::string_view> parts;
   Options options;
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index ff14f5e7a5c5d..5dec16d89e29c 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -2060,7 +2060,7 @@ TYPED_TEST(TestStringKernels, SliceCodeunitsBasic) {
   this->CheckUnary("utf8_slice_codeunits", R"(["𝑓öõḍš"])", this->type(), R"([""])",
                    &options_edgecase_1);
 
-  // this is a safeguard agains an optimization path possible, but actually a tricky case
+  // this is a safeguard against an optimization path possible, but actually a tricky case
   SliceOptions options_edgecase_2{-6, -2};
   this->CheckUnary("utf8_slice_codeunits", R"(["𝑓öõḍš"])", this->type(), R"(["𝑓öõ"])",
                    &options_edgecase_2);
@@ -2189,7 +2189,7 @@ TYPED_TEST(TestBinaryKernels, SliceBytesBasic) {
                    "ds\"]",
                    this->type(), R"([""])", &options_edgecase_1);
 
-  // this is a safeguard agains an optimization path possible, but actually a tricky case
+  // this is a safeguard against an optimization path possible, but actually a tricky case
   SliceOptions options_edgecase_2{-6, -2};
   this->CheckUnary("binary_slice",
                    "[\"f\xc2\xa2"
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
index d8bbe5ca8a34c..d4482334285bc 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
@@ -2101,9 +2101,9 @@ TEST_F(ScalarTemporalTest, StrftimeNoTimezone) {
 
 TEST_F(ScalarTemporalTest, StrftimeInvalidTimezone) {
   const char* seconds = R"(["1970-01-01T00:00:59", null])";
-  auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "non-existent"), seconds);
+  auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "nonexistent"), seconds);
   EXPECT_RAISES_WITH_MESSAGE_THAT(
-      Invalid, testing::HasSubstr("Cannot locate timezone 'non-existent'"),
+      Invalid, testing::HasSubstr("Cannot locate timezone 'nonexistent'"),
       Strftime(arr, StrftimeOptions()));
 }
 
@@ -2159,12 +2159,12 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) {
 }
 
 TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) {
-  auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "non-existent");
+  auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "nonexistent");
   const char* seconds = R"(["1970-01-01T00:00:59", null])";
   auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), seconds);
 
   EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
-                                  testing::HasSubstr("Cannot find locale 'non-existent'"),
+                                  testing::HasSubstr("Cannot find locale 'nonexistent'"),
                                   Strftime(arr, options));
 }
 
@@ -2601,7 +2601,7 @@ TEST_F(ScalarTemporalTestStrictCeil, TestCeilTemporalStrictCeil) {
 TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, CeilUTC) {
   std::string op = "ceil_temporal";
 
-  // Data for tests below was generaed via lubridate with the exception
+  // Data for tests below was generated via lubridate with the exception
   // of week data because lubridate currently does not support rounding to
   // multiple of week.
   const char* ceil_15_nanosecond =
@@ -2989,7 +2989,7 @@ TEST_F(ScalarTemporalTest, TestFloorTemporal) {
 TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, FloorUTC) {
   std::string op = "floor_temporal";
 
-  // Data for tests below was generaed via lubridate with the exception
+  // Data for tests below was generated via lubridate with the exception
   // of week data because lubridate currently does not support rounding to
   // multiple of week.
   const char* floor_15_nanosecond =
@@ -3402,7 +3402,7 @@ TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalBrussels) {
 TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, RoundUTC) {
   std::string op = "round_temporal";
 
-  // Data for tests below was generaed via lubridate with the exception
+  // Data for tests below was generated via lubridate with the exception
   // of week data because lubridate currently does not support rounding to
   // multiple of week.
   const char* round_15_nanosecond =
diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
index 943fdcd6b147f..811ed23e1134b 100644
--- a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
+++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc
@@ -30,11 +30,11 @@ namespace compute {
 namespace internal {
 namespace {
 
-struct RunEndEncondingState : public KernelState {
-  explicit RunEndEncondingState(std::shared_ptr<DataType> run_end_type)
+struct RunEndEncodingState : public KernelState {
+  explicit RunEndEncodingState(std::shared_ptr<DataType> run_end_type)
       : run_end_type{std::move(run_end_type)} {}
 
-  ~RunEndEncondingState() override = default;
+  ~RunEndEncodingState() override = default;
 
   std::shared_ptr<DataType> run_end_type;
 };
@@ -273,7 +273,7 @@ struct RunEndEncodeExec {
 
   template <typename ValueType>
   static Status Exec(KernelContext* ctx, const ExecSpan& span, ExecResult* result) {
-    auto state = checked_cast<const RunEndEncondingState*>(ctx->state());
+    auto state = checked_cast<const RunEndEncodingState*>(ctx->state());
     switch (state->run_end_type->id()) {
       case Type::INT16:
         return DoExec<Int16Type, ValueType>(ctx, span, result);
@@ -290,7 +290,7 @@ struct RunEndEncodeExec {
   /// \brief The OutputType::Resolver of the "run_end_decode" function.
   static Result<TypeHolder> ResolveOutputType(
       KernelContext* ctx, const std::vector<TypeHolder>& input_types) {
-    auto state = checked_cast<const RunEndEncondingState*>(ctx->state());
+    auto state = checked_cast<const RunEndEncodingState*>(ctx->state());
     return TypeHolder(std::make_shared<RunEndEncodedType>(state->run_end_type,
                                                           input_types[0].GetSharedPtr()));
   }
@@ -301,7 +301,7 @@ Result<std::unique_ptr<KernelState>> RunEndEncodeInit(KernelContext*,
   auto* options = checked_cast<const RunEndEncodeOptions*>(args.options);
   auto run_end_type =
       options ? options->run_end_type : RunEndEncodeOptions::Defaults().run_end_type;
-  return std::make_unique<RunEndEncondingState>(std::move(run_end_type));
+  return std::make_unique<RunEndEncodingState>(std::move(run_end_type));
 }
 
 template <typename RunEndType, typename ValueType, bool has_validity_buffer>
diff --git a/cpp/src/arrow/compute/kernels/vector_select_k.cc b/cpp/src/arrow/compute/kernels/vector_select_k.cc
index 1740a9b7f0bb4..97996e6d52cc0 100644
--- a/cpp/src/arrow/compute/kernels/vector_select_k.cc
+++ b/cpp/src/arrow/compute/kernels/vector_select_k.cc
@@ -72,9 +72,9 @@ class SelectKComparator<SortOrder::Descending> {
   }
 };
 
-class ArraySelecter : public TypeVisitor {
+class ArraySelector : public TypeVisitor {
  public:
-  ArraySelecter(ExecContext* ctx, const Array& array, const SelectKOptions& options,
+  ArraySelector(ExecContext* ctx, const Array& array, const SelectKOptions& options,
                 Datum* output)
       : TypeVisitor(),
         ctx_(ctx),
@@ -164,9 +164,9 @@ struct TypedHeapItem {
   ArrayType* array;
 };
 
-class ChunkedArraySelecter : public TypeVisitor {
+class ChunkedArraySelector : public TypeVisitor {
  public:
-  ChunkedArraySelecter(ExecContext* ctx, const ChunkedArray& chunked_array,
+  ChunkedArraySelector(ExecContext* ctx, const ChunkedArray& chunked_array,
                        const SelectKOptions& options, Datum* output)
       : TypeVisitor(),
         chunked_array_(chunked_array),
@@ -273,13 +273,13 @@ class ChunkedArraySelecter : public TypeVisitor {
   Datum* output_;
 };
 
-class RecordBatchSelecter : public TypeVisitor {
+class RecordBatchSelector : public TypeVisitor {
  private:
   using ResolvedSortKey = ResolvedRecordBatchSortKey;
   using Comparator = MultipleKeyComparator<ResolvedSortKey>;
 
  public:
-  RecordBatchSelecter(ExecContext* ctx, const RecordBatch& record_batch,
+  RecordBatchSelector(ExecContext* ctx, const RecordBatch& record_batch,
                       const SelectKOptions& options, Datum* output)
       : TypeVisitor(),
         ctx_(ctx),
@@ -391,7 +391,7 @@ class RecordBatchSelecter : public TypeVisitor {
   Comparator comparator_;
 };
 
-class TableSelecter : public TypeVisitor {
+class TableSelector : public TypeVisitor {
  private:
   struct ResolvedSortKey {
     ResolvedSortKey(const std::shared_ptr<ChunkedArray>& chunked_array,
@@ -420,7 +420,7 @@ class TableSelecter : public TypeVisitor {
   using Comparator = MultipleKeyComparator<ResolvedSortKey>;
 
  public:
-  TableSelecter(ExecContext* ctx, const Table& table, const SelectKOptions& options,
+  TableSelector(ExecContext* ctx, const Table& table, const SelectKOptions& options,
                 Datum* output)
       : TypeVisitor(),
         ctx_(ctx),
@@ -610,32 +610,32 @@ class SelectKUnstableMetaFunction : public MetaFunction {
   Result<Datum> SelectKth(const Array& array, const SelectKOptions& options,
                           ExecContext* ctx) const {
     Datum output;
-    ArraySelecter selecter(ctx, array, options, &output);
-    ARROW_RETURN_NOT_OK(selecter.Run());
+    ArraySelector selector(ctx, array, options, &output);
+    ARROW_RETURN_NOT_OK(selector.Run());
     return output;
   }
 
   Result<Datum> SelectKth(const ChunkedArray& chunked_array,
                           const SelectKOptions& options, ExecContext* ctx) const {
     Datum output;
-    ChunkedArraySelecter selecter(ctx, chunked_array, options, &output);
-    ARROW_RETURN_NOT_OK(selecter.Run());
+    ChunkedArraySelector selector(ctx, chunked_array, options, &output);
+    ARROW_RETURN_NOT_OK(selector.Run());
     return output;
   }
   Result<Datum> SelectKth(const RecordBatch& record_batch, const SelectKOptions& options,
                           ExecContext* ctx) const {
     ARROW_RETURN_NOT_OK(CheckConsistency(*record_batch.schema(), options.sort_keys));
     Datum output;
-    RecordBatchSelecter selecter(ctx, record_batch, options, &output);
-    ARROW_RETURN_NOT_OK(selecter.Run());
+    RecordBatchSelector selector(ctx, record_batch, options, &output);
+    ARROW_RETURN_NOT_OK(selector.Run());
     return output;
   }
   Result<Datum> SelectKth(const Table& table, const SelectKOptions& options,
                           ExecContext* ctx) const {
     ARROW_RETURN_NOT_OK(CheckConsistency(*table.schema(), options.sort_keys));
     Datum output;
-    TableSelecter selecter(ctx, table, options, &output);
-    ARROW_RETURN_NOT_OK(selecter.Run());
+    TableSelector selector(ctx, table, options, &output);
+    ARROW_RETURN_NOT_OK(selector.Run());
     return output;
   }
 };
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
index 30e85c1f71089..bdf9f5454fdef 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
@@ -2488,7 +2488,7 @@ TEST(TestIndicesNonZero, IndicesNonZeroBoolean) {
   Datum actual;
   std::shared_ptr<Array> result;
 
-  // boool
+  // bool
   ASSERT_OK_AND_ASSIGN(
       actual, CallFunction("indices_nonzero",
                            {ArrayFromJSON(boolean(), "[null, true, false, true]")}));
diff --git a/cpp/src/arrow/compute/key_map.cc b/cpp/src/arrow/compute/key_map.cc
index 525dae850f19b..a027ec811cf24 100644
--- a/cpp/src/arrow/compute/key_map.cc
+++ b/cpp/src/arrow/compute/key_map.cc
@@ -505,7 +505,7 @@ void SwissTable::find(const int num_keys, const uint32_t* hashes,
 
 // Slow processing of input keys in the most generic case.
 // Handles inserting new keys.
-// Pre-existing keys will be handled correctly, although the intended use is for this
+// Preexisting keys will be handled correctly, although the intended use is for this
 // call to follow a call to find() method, which would only pass on new keys that were
 // not present in the hash table.
 //
@@ -617,7 +617,7 @@ Status SwissTable::map_new_keys(uint32_t num_ids, uint16_t* ids, const uint32_t*
   ARROW_DCHECK(static_cast<int>(num_ids) <= (1 << log_minibatch_));
   ARROW_DCHECK(static_cast<int>(max_id + 1) <= (1 << log_minibatch_));
 
-  // Allocate temporary buffers for slot ids and intialize them
+  // Allocate temporary buffers for slot ids and initialize them
   auto slot_ids_buf = util::TempVectorHolder<uint32_t>(temp_stack, max_id + 1);
   uint32_t* slot_ids = slot_ids_buf.mutable_data();
   init_slot_ids_for_new_keys(num_ids, ids, hashes, slot_ids);
diff --git a/cpp/src/arrow/compute/key_map.h b/cpp/src/arrow/compute/key_map.h
index 85ef9029d6fc9..8e06dc83483aa 100644
--- a/cpp/src/arrow/compute/key_map.h
+++ b/cpp/src/arrow/compute/key_map.h
@@ -142,7 +142,7 @@ class ARROW_EXPORT SwissTable {
   void extract_group_ids_imp(const int num_keys, const uint16_t* selection,
                              const uint32_t* hashes, const uint8_t* local_slots,
                              uint32_t* out_group_ids, int elements_offset,
-                             int element_mutltiplier) const;
+                             int element_multiplier) const;
 
   inline uint64_t next_slot_to_visit(uint64_t block_index, int slot,
                                      int match_found) const;
@@ -187,7 +187,7 @@ class ARROW_EXPORT SwissTable {
 
   // Slow processing of input keys in the most generic case.
   // Handles inserting new keys.
-  // Pre-existing keys will be handled correctly, although the intended use is for this
+  // Preexisting keys will be handled correctly, although the intended use is for this
   // call to follow a call to find() method, which would only pass on new keys that were
   // not present in the hash table.
   //
diff --git a/cpp/src/arrow/compute/key_map_avx2.cc b/cpp/src/arrow/compute/key_map_avx2.cc
index 731553511044f..3526a6cb0f344 100644
--- a/cpp/src/arrow/compute/key_map_avx2.cc
+++ b/cpp/src/arrow/compute/key_map_avx2.cc
@@ -117,7 +117,7 @@ int SwissTable::early_filter_imp_avx2_x8(const int num_hashes, const uint32_t* h
     vlocal_slot = _mm256_add_epi32(_mm256_and_si256(vlocal_slot, _mm256_set1_epi32(0xff)),
                                    _mm256_and_si256(vgt, _mm256_set1_epi32(4)));
 
-    // Convert slot id relative to the block to slot id relative to the beginnning of the
+    // Convert slot id relative to the block to slot id relative to the beginning of the
     // table
     //
     uint64_t local_slot = _mm256_extract_epi64(
diff --git a/cpp/src/arrow/compute/light_array.cc b/cpp/src/arrow/compute/light_array.cc
index 93a054de1957c..73ea01a03a8fa 100644
--- a/cpp/src/arrow/compute/light_array.cc
+++ b/cpp/src/arrow/compute/light_array.cc
@@ -89,7 +89,7 @@ KeyColumnArray KeyColumnArray::Slice(int64_t offset, int64_t length) const {
   sliced.bit_offset_[0] = (bit_offset_[0] + offset) % 8;
 
   if (metadata_.fixed_length == 0 && !metadata_.is_null_type) {
-    ARROW_DCHECK(is_bool_type()) << "Expected BOOL type type but got a different type.";
+    ARROW_DCHECK(is_bool_type()) << "Expected BOOL type but got a different type.";
     sliced.buffers_[1] =
         buffers_[1] ? buffers_[1] + (bit_offset_[1] + offset) / 8 : nullptr;
     sliced.mutable_buffers_[1] = mutable_buffers_[1]
diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc
index 52121530fe91d..3ceba43604b28 100644
--- a/cpp/src/arrow/compute/light_array_test.cc
+++ b/cpp/src/arrow/compute/light_array_test.cc
@@ -333,7 +333,7 @@ TEST(ResizableArrayData, Binary) {
       ASSERT_EQ(0, array.num_rows());
       ASSERT_OK(array.ResizeFixedLengthBuffers(2));
       ASSERT_EQ(2, array.num_rows());
-      // At this point the offets memory has been allocated and needs to be filled
+      // At this point the offsets memory has been allocated and needs to be filled
       // in before we allocate the variable length memory
       int offsets_width =
           static_cast<int>(arrow::internal::checked_pointer_cast<BaseBinaryType>(type)
diff --git a/cpp/src/arrow/compute/ordering.h b/cpp/src/arrow/compute/ordering.h
index e581269cc20dd..61caa2b570dd3 100644
--- a/cpp/src/arrow/compute/ordering.h
+++ b/cpp/src/arrow/compute/ordering.h
@@ -52,7 +52,7 @@ class ARROW_EXPORT SortKey : public util::EqualityComparable<SortKey> {
   bool Equals(const SortKey& other) const;
   std::string ToString() const;
 
-  /// A FieldRef targetting the sort column.
+  /// A FieldRef targeting the sort column.
   FieldRef target;
   /// How to order by this sort key.
   SortOrder order;
diff --git a/cpp/src/arrow/compute/registry_test.cc b/cpp/src/arrow/compute/registry_test.cc
index 2d69f119df1f4..3dc14bcff83ee 100644
--- a/cpp/src/arrow/compute/registry_test.cc
+++ b/cpp/src/arrow/compute/registry_test.cc
@@ -69,7 +69,7 @@ TEST_P(TestRegistry, Basics) {
   ASSERT_OK_AND_ASSIGN(std::shared_ptr<const Function> f1, registry_->GetFunction("f1"));
   ASSERT_EQ("f1", f1->name());
 
-  // Non-existent function
+  // Nonexistent function
   ASSERT_RAISES(KeyError, registry_->GetFunction("f2"));
 
   // Try adding a function with name collision
diff --git a/cpp/src/arrow/compute/row/grouper.cc b/cpp/src/arrow/compute/row/grouper.cc
index b3d28ef19a1a0..5e23eda16fda2 100644
--- a/cpp/src/arrow/compute/row/grouper.cc
+++ b/cpp/src/arrow/compute/row/grouper.cc
@@ -210,7 +210,7 @@ struct SimpleKeySegmenter : public BaseRowSegmenter {
 
  private:
   TypeHolder key_type_;
-  std::vector<uint8_t> save_key_data_;  // previusly seen segment-key grouping data
+  std::vector<uint8_t> save_key_data_;  // previously seen segment-key grouping data
   bool extend_was_called_;
 };
 
diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h
index 15f00eaac2191..628a9c14f3e44 100644
--- a/cpp/src/arrow/compute/row/grouper.h
+++ b/cpp/src/arrow/compute/row/grouper.h
@@ -29,12 +29,12 @@ namespace arrow {
 namespace compute {
 
 /// \brief A segment
-/// A segment group is a chunk of continous rows that have the same segment key. (For
+/// A segment group is a chunk of continuous rows that have the same segment key. (For
 /// example, in ordered time series processing, segment key can be "date", and a segment
 /// group can be all the rows that belong to the same date.) A segment group can span
-/// across multiple exec batches. A segment is a chunk of continous rows that has the same
-/// segment key within a given batch. When a segment group span cross batches, it will
-/// have multiple segments. A segment never spans cross batches. The segment data
+/// across multiple exec batches. A segment is a chunk of continuous rows that has the
+/// same segment key within a given batch. When a segment group span cross batches, it
+/// will have multiple segments. A segment never spans cross batches. The segment data
 /// structure only makes sense when used along with a exec batch.
 struct ARROW_EXPORT Segment {
   /// \brief the offset into the batch where the segment starts
@@ -92,7 +92,7 @@ class ARROW_EXPORT RowSegmenter {
   /// \brief Reset this segmenter
   ///
   /// A segmenter normally extends (see `Segment`) a segment from one batch to the next.
-  /// If segment-extenion is undesirable, for example when each batch is processed
+  /// If segment-extension is undesirable, for example when each batch is processed
   /// independently, then `Reset` should be invoked before processing the next batch.
   virtual Status Reset() = 0;
 

From 83cba25017a5c3a03e47f1851f242fa284f93533 Mon Sep 17 00:00:00 2001
From: Yue <niyue.com@gmail.com>
Date: Fri, 5 Jan 2024 03:02:40 +0800
Subject: [PATCH 129/570] GH-37848: [C++][Gandiva] Migrate LLVM JIT engine from
 MCJIT to ORC v2/LLJIT (#39098)

### Rationale for this change
Gandiva currently employs MCJIT as its internal JIT engine. However, LLVM has introduced a newer JIT API known as ORC v2/LLJIT since LLVM 7.0, and it has several advantage over MCJIT, in particular, MCJIT is not actively maintained, and is slated for eventual deprecation and removal.

### What changes are included in this PR?
* This PR replaces the MCJIT JIT engine with the ORC v2 engine, using the `LLJIT` API.
* This PR adds a new JIT linker option `JITLink` (https://llvm.org/docs/JITLink.html), which can be used together with `LLJIT`, for LLVM 14+ on Linux/macOS platform. It is turned off by default but could be turned on with environment variable `GANDIVA_USE_JIT_LINK`

### Are these changes tested?
Yes, they are covered by existing unit tests

### Are there any user-facing changes?
* `Configuration` class has a new option called `dump_ir`. If users would like to call `DumpIR` API of `Projector` and `Filter`, they have to set the `dump_ir` option first.
* Closes: #37848

Authored-by: Yue Ni <niyue.com@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/FindLLVMAlt.cmake       |   2 +-
 cpp/src/gandiva/configuration.h           |  17 +-
 cpp/src/gandiva/engine.cc                 | 357 ++++++++++++++--------
 cpp/src/gandiva/engine.h                  |  46 ++-
 cpp/src/gandiva/engine_llvm_test.cc       |  26 +-
 cpp/src/gandiva/filter.cc                 |   8 +-
 cpp/src/gandiva/filter.h                  |   2 +-
 cpp/src/gandiva/llvm_generator.cc         |  23 +-
 cpp/src/gandiva/llvm_generator.h          |  12 +-
 cpp/src/gandiva/llvm_generator_test.cc    |  21 +-
 cpp/src/gandiva/projector.cc              |   8 +-
 cpp/src/gandiva/projector.h               |   2 +-
 cpp/src/gandiva/tests/micro_benchmarks.cc |  31 ++
 cpp/src/gandiva/tests/test_util.cc        |   4 +
 cpp/src/gandiva/tests/test_util.h         |   2 +
 python/pyarrow/gandiva.pyx                |  59 +++-
 python/pyarrow/includes/libgandiva.pxd    |  14 +-
 python/pyarrow/tests/test_gandiva.py      |   6 +-
 18 files changed, 441 insertions(+), 199 deletions(-)

diff --git a/cpp/cmake_modules/FindLLVMAlt.cmake b/cpp/cmake_modules/FindLLVMAlt.cmake
index 69f680824b082..2730f829817f6 100644
--- a/cpp/cmake_modules/FindLLVMAlt.cmake
+++ b/cpp/cmake_modules/FindLLVMAlt.cmake
@@ -93,8 +93,8 @@ if(LLVM_FOUND)
         debuginfodwarf
         ipo
         linker
-        mcjit
         native
+        orcjit
         target)
     if(LLVM_VERSION_MAJOR GREATER_EQUAL 14)
       list(APPEND LLVM_TARGET_COMPONENTS passes)
diff --git a/cpp/src/gandiva/configuration.h b/cpp/src/gandiva/configuration.h
index f43a2b190731f..620c58537f963 100644
--- a/cpp/src/gandiva/configuration.h
+++ b/cpp/src/gandiva/configuration.h
@@ -37,10 +37,12 @@ class GANDIVA_EXPORT Configuration {
 
   explicit Configuration(bool optimize,
                          std::shared_ptr<FunctionRegistry> function_registry =
-                             gandiva::default_function_registry())
+                             gandiva::default_function_registry(),
+                         bool dump_ir = false)
       : optimize_(optimize),
         target_host_cpu_(true),
-        function_registry_(function_registry) {}
+        function_registry_(std::move(function_registry)),
+        dump_ir_(dump_ir) {}
 
   Configuration() : Configuration(true) {}
 
@@ -50,11 +52,13 @@ class GANDIVA_EXPORT Configuration {
 
   bool optimize() const { return optimize_; }
   bool target_host_cpu() const { return target_host_cpu_; }
+  bool dump_ir() const { return dump_ir_; }
   std::shared_ptr<FunctionRegistry> function_registry() const {
     return function_registry_;
   }
 
   void set_optimize(bool optimize) { optimize_ = optimize; }
+  void set_dump_ir(bool dump_ir) { dump_ir_ = dump_ir; }
   void target_host_cpu(bool target_host_cpu) { target_host_cpu_ = target_host_cpu; }
   void set_function_registry(std::shared_ptr<FunctionRegistry> function_registry) {
     function_registry_ = std::move(function_registry);
@@ -65,6 +69,9 @@ class GANDIVA_EXPORT Configuration {
   bool target_host_cpu_; /* set the mcpu flag to host cpu while compiling llvm ir */
   std::shared_ptr<FunctionRegistry>
       function_registry_; /* function registry that may contain external functions */
+  // flag indicating if IR dumping is needed, defaults to false, and turning it on will
+  // negatively affect performance
+  bool dump_ir_ = false;
 };
 
 /// \brief configuration builder for gandiva
@@ -83,6 +90,12 @@ class GANDIVA_EXPORT ConfigurationBuilder {
     return configuration;
   }
 
+  std::shared_ptr<Configuration> build_with_ir_dumping(bool dump_ir) {
+    std::shared_ptr<Configuration> configuration(
+        new Configuration(true, gandiva::default_function_registry(), dump_ir));
+    return configuration;
+  }
+
   std::shared_ptr<Configuration> build(
       std::shared_ptr<FunctionRegistry> function_registry) {
     std::shared_ptr<Configuration> configuration(
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index 1cea1fd2cbf30..fc047f2ac0763 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -31,7 +31,8 @@
 #include <unordered_set>
 #include <utility>
 
-#include "arrow/util/logging.h"
+#include <arrow/util/io_util.h>
+#include <arrow/util/logging.h>
 
 #if defined(_MSC_VER)
 #pragma warning(push)
@@ -46,13 +47,14 @@
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/Bitcode/BitcodeReader.h>
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
-#include <llvm/ExecutionEngine/MCJIT.h>
+#include <llvm/ExecutionEngine/Orc/LLJIT.h>
 #include <llvm/IR/DataLayout.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Linker/Linker.h>
+#include <llvm/Transforms/Utils/Cloning.h>
 #if LLVM_VERSION_MAJOR >= 17
 #include <llvm/TargetParser/SubtargetFeature.h>
 #else
@@ -86,6 +88,13 @@
 #include <llvm/Transforms/Utils.h>
 #include <llvm/Transforms/Vectorize.h>
 
+// JITLink is available in LLVM 9+
+// but the `InProcessMemoryManager::Create` API was added since LLVM 14
+#if LLVM_VERSION_MAJOR >= 14 && !defined(_WIN32)
+#define JIT_LINK_SUPPORTED
+#include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
+#endif
+
 #if defined(_MSC_VER)
 #pragma warning(pop)
 #endif
@@ -103,9 +112,136 @@ extern const size_t kPrecompiledBitcodeSize;
 std::once_flag llvm_init_once_flag;
 static bool llvm_init = false;
 static llvm::StringRef cpu_name;
-static llvm::SmallVector<std::string, 10> cpu_attrs;
+static std::vector<std::string> cpu_attrs;
 std::once_flag register_exported_funcs_flag;
 
+template <typename T>
+arrow::Result<T> AsArrowResult(llvm::Expected<T>& expected,
+                               const std::string& error_context) {
+  if (!expected) {
+    return Status::CodeGenError(error_context, llvm::toString(expected.takeError()));
+  }
+  return std::move(expected.get());
+}
+
+Result<llvm::orc::JITTargetMachineBuilder> MakeTargetMachineBuilder(
+    const Configuration& conf) {
+  llvm::orc::JITTargetMachineBuilder jtmb(
+      (llvm::Triple(llvm::sys::getDefaultTargetTriple())));
+  if (conf.target_host_cpu()) {
+    jtmb.setCPU(cpu_name.str());
+    jtmb.addFeatures(cpu_attrs);
+  }
+  auto const opt_level =
+      conf.optimize() ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None;
+  jtmb.setCodeGenOptLevel(opt_level);
+  return jtmb;
+}
+
+std::string DumpModuleIR(const llvm::Module& module) {
+  std::string ir;
+  llvm::raw_string_ostream stream(ir);
+  module.print(stream, nullptr);
+  return ir;
+}
+
+void AddAbsoluteSymbol(llvm::orc::LLJIT& lljit, const std::string& name,
+                       void* function_ptr) {
+  llvm::orc::MangleAndInterner mangle(lljit.getExecutionSession(), lljit.getDataLayout());
+
+  // https://github.com/llvm/llvm-project/commit/8b1771bd9f304be39d4dcbdcccedb6d3bcd18200#diff-77984a824d9182e5c67a481740f3bc5da78d5bd4cf6e1716a083ddb30a4a4931
+  // LLVM 17 introduced ExecutorSymbolDef and move most of ORC APIs to ExecutorAddr
+#if LLVM_VERSION_MAJOR >= 17
+  llvm::orc::ExecutorSymbolDef symbol(
+      llvm::orc::ExecutorAddr(reinterpret_cast<uint64_t>(function_ptr)),
+      llvm::JITSymbolFlags::Exported);
+#else
+  llvm::JITEvaluatedSymbol symbol(reinterpret_cast<llvm::JITTargetAddress>(function_ptr),
+                                  llvm::JITSymbolFlags::Exported);
+#endif
+
+  auto error = lljit.getMainJITDylib().define(
+      llvm::orc::absoluteSymbols({{mangle(name), symbol}}));
+  llvm::cantFail(std::move(error));
+}
+
+// add current process symbol to dylib
+// LLVM >= 18 does this automatically
+void AddProcessSymbol(llvm::orc::LLJIT& lljit) {
+  lljit.getMainJITDylib().addGenerator(
+      llvm::cantFail(llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
+          lljit.getDataLayout().getGlobalPrefix())));
+  // the `atexit` symbol cannot be found for ASAN
+#ifdef ADDRESS_SANITIZER
+  if (!lljit.lookup("atexit")) {
+    AddAbsoluteSymbol(lljit, "atexit", reinterpret_cast<void*>(atexit));
+  }
+#endif
+}
+
+#ifdef JIT_LINK_SUPPORTED
+Result<std::unique_ptr<llvm::jitlink::InProcessMemoryManager>> CreateMemmoryManager() {
+  auto maybe_mem_manager = llvm::jitlink::InProcessMemoryManager::Create();
+  return AsArrowResult(maybe_mem_manager, "Could not create memory manager: ");
+}
+
+Status UseJITLinkIfEnabled(llvm::orc::LLJITBuilder& jit_builder) {
+  static auto maybe_use_jit_link = ::arrow::internal::GetEnvVar("GANDIVA_USE_JIT_LINK");
+  if (maybe_use_jit_link.ok()) {
+    ARROW_ASSIGN_OR_RAISE(static auto memory_manager, CreateMemmoryManager());
+    jit_builder.setObjectLinkingLayerCreator(
+        [&](llvm::orc::ExecutionSession& ES, const llvm::Triple& TT) {
+          return std::make_unique<llvm::orc::ObjectLinkingLayer>(ES, *memory_manager);
+        });
+  }
+  return Status::OK();
+}
+#endif
+
+Result<std::unique_ptr<llvm::orc::LLJIT>> BuildJIT(
+    llvm::orc::JITTargetMachineBuilder jtmb,
+    std::optional<std::reference_wrapper<GandivaObjectCache>>& object_cache) {
+  llvm::orc::LLJITBuilder jit_builder;
+
+#ifdef JIT_LINK_SUPPORTED
+  ARROW_RETURN_NOT_OK(UseJITLinkIfEnabled(jit_builder));
+#endif
+
+  jit_builder.setJITTargetMachineBuilder(std::move(jtmb));
+  if (object_cache.has_value()) {
+    jit_builder.setCompileFunctionCreator(
+        [&object_cache](llvm::orc::JITTargetMachineBuilder JTMB)
+            -> llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
+          auto target_machine = JTMB.createTargetMachine();
+          if (!target_machine) {
+            return target_machine.takeError();
+          }
+          // after compilation, the object code will be stored into the given object
+          // cache
+          return std::make_unique<llvm::orc::TMOwningSimpleCompiler>(
+              std::move(*target_machine), &object_cache.value().get());
+        });
+  }
+  auto maybe_jit = jit_builder.create();
+  ARROW_ASSIGN_OR_RAISE(auto jit,
+                        AsArrowResult(maybe_jit, "Could not create LLJIT instance: "));
+
+  AddProcessSymbol(*jit);
+  return jit;
+}
+
+Status Engine::SetLLVMObjectCache(GandivaObjectCache& object_cache) {
+  auto cached_buffer = object_cache.getObject(nullptr);
+  if (cached_buffer) {
+    auto error = lljit_->addObjectFile(std::move(cached_buffer));
+    if (error) {
+      return Status::CodeGenError("Failed to add cached object file to LLJIT: ",
+                                  llvm::toString(std::move(error)));
+    }
+  }
+  return Status::OK();
+}
+
 void Engine::InitOnce() {
   DCHECK_EQ(llvm_init, false);
 
@@ -127,28 +263,34 @@ void Engine::InitOnce() {
     }
   }
   ARROW_LOG(INFO) << "Detected CPU Name : " << cpu_name.str();
-  ARROW_LOG(INFO) << "Detected CPU Features:" << cpu_attrs_str;
+  ARROW_LOG(INFO) << "Detected CPU Features: [" << cpu_attrs_str << "]";
   llvm_init = true;
 }
 
 Engine::Engine(const std::shared_ptr<Configuration>& conf,
-               std::unique_ptr<llvm::LLVMContext> ctx,
-               std::unique_ptr<llvm::ExecutionEngine> engine, llvm::Module* module,
-               bool cached)
-    : context_(std::move(ctx)),
-      execution_engine_(std::move(engine)),
+               std::unique_ptr<llvm::orc::LLJIT> lljit,
+               std::unique_ptr<llvm::TargetMachine> target_machine, bool cached)
+    : context_(std::make_unique<llvm::LLVMContext>()),
+      lljit_(std::move(lljit)),
       ir_builder_(std::make_unique<llvm::IRBuilder<>>(*context_)),
-      module_(module),
       types_(*context_),
       optimize_(conf->optimize()),
       cached_(cached),
-      function_registry_(conf->function_registry()) {}
+      function_registry_(conf->function_registry()),
+      target_machine_(std::move(target_machine)),
+      conf_(conf) {
+  // LLVM 10 doesn't like the expr function name to be the same as the module name
+  auto module_id = "gdv_module_" + std::to_string(reinterpret_cast<uintptr_t>(this));
+  module_ = std::make_unique<llvm::Module>(module_id, *context_);
+}
+
+Engine::~Engine() {}
 
 Status Engine::Init() {
   std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs);
+
   // Add mappings for global functions that can be accessed from LLVM/IR module.
   ARROW_RETURN_NOT_OK(AddGlobalMappings());
-
   return Status::OK();
 }
 
@@ -163,101 +305,32 @@ Status Engine::LoadFunctionIRs() {
 }
 
 /// factory method to construct the engine.
-Status Engine::Make(const std::shared_ptr<Configuration>& conf, bool cached,
-                    std::unique_ptr<Engine>* out) {
+Result<std::unique_ptr<Engine>> Engine::Make(
+    const std::shared_ptr<Configuration>& conf, bool cached,
+    std::optional<std::reference_wrapper<GandivaObjectCache>> object_cache) {
   std::call_once(llvm_init_once_flag, InitOnce);
 
-  auto ctx = std::make_unique<llvm::LLVMContext>();
-  auto module = std::make_unique<llvm::Module>("codegen", *ctx);
-
-  // Capture before moving, ExecutionEngine does not allow retrieving the
-  // original Module.
-  auto module_ptr = module.get();
-
-  auto opt_level =
-      conf->optimize() ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None;
-
-  // Note that the lifetime of the error string is not captured by the
-  // ExecutionEngine but only for the lifetime of the builder. Found by
-  // inspecting LLVM sources.
-  std::string builder_error;
-
-  llvm::EngineBuilder engine_builder(std::move(module));
-
-  engine_builder.setEngineKind(llvm::EngineKind::JIT)
-      .setOptLevel(opt_level)
-      .setErrorStr(&builder_error);
-
-  if (conf->target_host_cpu()) {
-    engine_builder.setMCPU(cpu_name);
-    engine_builder.setMAttrs(cpu_attrs);
-  }
-  std::unique_ptr<llvm::ExecutionEngine> exec_engine{engine_builder.create()};
-
-  if (exec_engine == nullptr) {
-    return Status::CodeGenError("Could not instantiate llvm::ExecutionEngine: ",
-                                builder_error);
-  }
+  ARROW_ASSIGN_OR_RAISE(auto jtmb, MakeTargetMachineBuilder(*conf));
+  ARROW_ASSIGN_OR_RAISE(auto jit, BuildJIT(jtmb, object_cache));
+  auto maybe_tm = jtmb.createTargetMachine();
+  ARROW_ASSIGN_OR_RAISE(auto target_machine,
+                        AsArrowResult(maybe_tm, "Could not create target machine: "));
 
   std::unique_ptr<Engine> engine{
-      new Engine(conf, std::move(ctx), std::move(exec_engine), module_ptr, cached)};
-  ARROW_RETURN_NOT_OK(engine->Init());
-  *out = std::move(engine);
-  return Status::OK();
-}
-
-// This method was modified from its original version for a part of MLIR
-// Original source from
-// https://github.com/llvm/llvm-project/blob/9f2ce5b915a505a5488a5cf91bb0a8efa9ddfff7/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
-// The original copyright notice follows.
-
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-static void SetDataLayout(llvm::Module* module) {
-  auto target_triple = llvm::sys::getDefaultTargetTriple();
-  std::string error_message;
-  auto target = llvm::TargetRegistry::lookupTarget(target_triple, error_message);
-  if (!target) {
-    return;
-  }
-
-  std::string cpu(llvm::sys::getHostCPUName());
-  llvm::SubtargetFeatures features;
-  llvm::StringMap<bool> host_features;
-
-  if (llvm::sys::getHostCPUFeatures(host_features)) {
-    for (auto& f : host_features) {
-      features.AddFeature(f.first(), f.second);
-    }
-  }
+      new Engine(conf, std::move(jit), std::move(target_machine), cached)};
 
-  std::unique_ptr<llvm::TargetMachine> machine(
-      target->createTargetMachine(target_triple, cpu, features.getString(), {}, {}));
-
-  module->setDataLayout(machine->createDataLayout());
-}
-// end of the modified method from MLIR
-
-template <typename T>
-static arrow::Result<T> AsArrowResult(llvm::Expected<T>& expected) {
-  if (!expected) {
-    std::string str;
-    llvm::raw_string_ostream stream(str);
-    stream << expected.takeError();
-    return Status::CodeGenError(stream.str());
-  }
-  return std::move(expected.get());
+  ARROW_RETURN_NOT_OK(engine->Init());
+  return engine;
 }
 
 static arrow::Status VerifyAndLinkModule(
-    llvm::Module* dest_module,
+    llvm::Module& dest_module,
     llvm::Expected<std::unique_ptr<llvm::Module>> src_module_or_error) {
-  ARROW_ASSIGN_OR_RAISE(auto src_ir_module, AsArrowResult(src_module_or_error));
+  ARROW_ASSIGN_OR_RAISE(
+      auto src_ir_module,
+      AsArrowResult(src_module_or_error, "Failed to verify and link module: "));
 
-  // set dataLayout
-  SetDataLayout(src_ir_module.get());
+  src_ir_module->setDataLayout(dest_module.getDataLayout());
 
   std::string error_info;
   llvm::raw_string_ostream error_stream(error_info);
@@ -265,16 +338,21 @@ static arrow::Status VerifyAndLinkModule(
       llvm::verifyModule(*src_ir_module, &error_stream),
       Status::CodeGenError("verify of IR Module failed: " + error_stream.str()));
 
-  ARROW_RETURN_IF(llvm::Linker::linkModules(*dest_module, std::move(src_ir_module)),
+  ARROW_RETURN_IF(llvm::Linker::linkModules(dest_module, std::move(src_ir_module)),
                   Status::CodeGenError("failed to link IR Modules"));
 
   return Status::OK();
 }
 
+llvm::Module* Engine::module() {
+  DCHECK(!module_finalized_) << "module cannot be accessed after finalized";
+  return module_.get();
+}
+
 // Handling for pre-compiled IR libraries.
 Status Engine::LoadPreCompiledIR() {
-  auto bitcode = llvm::StringRef(reinterpret_cast<const char*>(kPrecompiledBitcode),
-                                 kPrecompiledBitcodeSize);
+  auto const bitcode = llvm::StringRef(reinterpret_cast<const char*>(kPrecompiledBitcode),
+                                       kPrecompiledBitcodeSize);
 
   /// Read from file into memory buffer.
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer_or_error =
@@ -291,14 +369,14 @@ Status Engine::LoadPreCompiledIR() {
       llvm::getOwningLazyBitcodeModule(std::move(buffer), *context());
   // NOTE: llvm::handleAllErrors() fails linking with RTTI-disabled LLVM builds
   // (ARROW-5148)
-  ARROW_RETURN_NOT_OK(VerifyAndLinkModule(module_, std::move(module_or_error)));
+  ARROW_RETURN_NOT_OK(VerifyAndLinkModule(*module_, std::move(module_or_error)));
   return Status::OK();
 }
 
 static llvm::MemoryBufferRef AsLLVMMemoryBuffer(const arrow::Buffer& arrow_buffer) {
-  auto data = reinterpret_cast<const char*>(arrow_buffer.data());
-  auto size = arrow_buffer.size();
-  return llvm::MemoryBufferRef(llvm::StringRef(data, size), "external_bitcode");
+  auto const data = reinterpret_cast<const char*>(arrow_buffer.data());
+  auto const size = arrow_buffer.size();
+  return {llvm::StringRef(data, size), "external_bitcode"};
 }
 
 Status Engine::LoadExternalPreCompiledIR() {
@@ -306,7 +384,7 @@ Status Engine::LoadExternalPreCompiledIR() {
   for (auto const& buffer : buffers) {
     auto llvm_memory_buffer_ref = AsLLVMMemoryBuffer(*buffer);
     auto module_or_error = llvm::parseBitcodeFile(llvm_memory_buffer_ref, *context());
-    ARROW_RETURN_NOT_OK(VerifyAndLinkModule(module_, std::move(module_or_error)));
+    ARROW_RETURN_NOT_OK(VerifyAndLinkModule(*module_, std::move(module_or_error)));
   }
 
   return Status::OK();
@@ -386,7 +464,8 @@ static void OptimizeModuleWithLegacyPassManager(llvm::Module& module,
   std::unique_ptr<llvm::legacy::PassManager> pass_manager(
       new llvm::legacy::PassManager());
 
-  pass_manager->add(llvm::createTargetTransformInfoWrapperPass(target_analysis));
+  pass_manager->add(
+      llvm::createTargetTransformInfoWrapperPass(std::move(target_analysis)));
   pass_manager->add(llvm::createFunctionInliningPass());
   pass_manager->add(llvm::createInstructionCombiningPass());
   pass_manager->add(llvm::createPromoteMemoryToRegisterPass());
@@ -411,40 +490,64 @@ Status Engine::FinalizeModule() {
     ARROW_RETURN_NOT_OK(RemoveUnusedFunctions());
 
     if (optimize_) {
-      auto target_analysis = execution_engine_->getTargetMachine()->getTargetIRAnalysis();
-
+      auto target_analysis = target_machine_->getTargetIRAnalysis();
 // misc passes to allow for inlining, vectorization, ..
 #if LLVM_VERSION_MAJOR >= 14
-      OptimizeModuleWithNewPassManager(*module_, target_analysis);
+      OptimizeModuleWithNewPassManager(*module_, std::move(target_analysis));
 #else
-      OptimizeModuleWithLegacyPassManager(*module_, target_analysis);
+      OptimizeModuleWithLegacyPassManager(*module_, std::move(target_analysis));
 #endif
     }
 
     ARROW_RETURN_IF(llvm::verifyModule(*module_, &llvm::errs()),
                     Status::CodeGenError("Module verification failed after optimizer"));
-  }
 
-  // do the compilation
-  execution_engine_->finalizeObject();
+    // print the module IR and save it for later use if IR dumping is needed
+    // since the module will be moved to construct LLJIT instance, and it is not
+    // available after LLJIT instance is constructed
+    if (conf_->dump_ir()) {
+      module_ir_ = DumpModuleIR(*module_);
+    }
+
+    llvm::orc::ThreadSafeModule tsm(std::move(module_), std::move(context_));
+    auto error = lljit_->addIRModule(std::move(tsm));
+    if (error) {
+      return Status::CodeGenError("Failed to add IR module to LLJIT: ",
+                                  llvm::toString(std::move(error)));
+    }
+  }
   module_finalized_ = true;
 
   return Status::OK();
 }
 
-void* Engine::CompiledFunction(std::string& function) {
-  DCHECK(module_finalized_);
-  return reinterpret_cast<void*>(execution_engine_->getFunctionAddress(function));
+Result<void*> Engine::CompiledFunction(const std::string& function) {
+  DCHECK(module_finalized_)
+      << "module must be finalized before getting compiled function";
+  auto sym = lljit_->lookup(function);
+  if (!sym) {
+    return Status::CodeGenError("Failed to look up function: " + function +
+                                " error: " + llvm::toString(sym.takeError()));
+  }
+  // Since LLVM 15, `LLJIT::lookup` returns ExecutorAddrs rather than
+  // JITEvaluatedSymbols
+#if LLVM_VERSION_MAJOR >= 15
+  auto fn_addr = sym->getValue();
+#else
+  auto fn_addr = sym->getAddress();
+#endif
+  auto fn_ptr = reinterpret_cast<void*>(fn_addr);
+  if (fn_ptr == nullptr) {
+    return Status::CodeGenError("Failed to get address for function: " + function);
+  }
+  return fn_ptr;
 }
 
 void Engine::AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_type,
-                                     const std::vector<llvm::Type*>& args,
-                                     void* function_ptr) {
-  constexpr bool is_var_arg = false;
-  auto prototype = llvm::FunctionType::get(ret_type, args, is_var_arg);
-  constexpr auto linkage = llvm::GlobalValue::ExternalLinkage;
-  auto fn = llvm::Function::Create(prototype, linkage, name, module());
-  execution_engine_->addGlobalMapping(fn, function_ptr);
+                                     const std::vector<llvm::Type*>& args, void* func) {
+  auto const prototype = llvm::FunctionType::get(ret_type, args, /*is_var_arg*/ false);
+  llvm::Function::Create(prototype, llvm::GlobalValue::ExternalLinkage, name, module());
+  AddAbsoluteSymbol(*lljit_, name, func);
 }
 
 arrow::Status Engine::AddGlobalMappings() {
@@ -453,11 +556,9 @@ arrow::Status Engine::AddGlobalMappings() {
   return c_funcs.AddMappings(this);
 }
 
-std::string Engine::DumpIR() {
-  std::string ir;
-  llvm::raw_string_ostream stream(ir);
-  module_->print(stream, nullptr);
-  return ir;
+const std::string& Engine::ir() {
+  DCHECK(!module_ir_.empty()) << "dump_ir in Configuration must be set for dumping IR";
+  return module_ir_;
 }
 
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h
index df2d8b36d9260..565c3f142502d 100644
--- a/cpp/src/gandiva/engine.h
+++ b/cpp/src/gandiva/engine.h
@@ -17,11 +17,16 @@
 
 #pragma once
 
+#include <cinttypes>
+#include <functional>
 #include <memory>
+#include <optional>
 #include <set>
 #include <string>
 #include <vector>
 
+#include <llvm/Analysis/TargetTransformInfo.h>
+
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "gandiva/configuration.h"
@@ -30,23 +35,34 @@
 #include "gandiva/llvm_types.h"
 #include "gandiva/visibility.h"
 
+namespace llvm::orc {
+class LLJIT;
+}  // namespace llvm::orc
+
 namespace gandiva {
 
 /// \brief LLVM Execution engine wrapper.
 class GANDIVA_EXPORT Engine {
  public:
+  ~Engine();
   llvm::LLVMContext* context() { return context_.get(); }
   llvm::IRBuilder<>* ir_builder() { return ir_builder_.get(); }
   LLVMTypes* types() { return &types_; }
-  llvm::Module* module() { return module_; }
+
+  /// Retrieve LLVM module in the engine.
+  /// This should only be called before `FinalizeModule` is called
+  llvm::Module* module();
 
   /// Factory method to create and initialize the engine object.
   ///
   /// \param[in] config the engine configuration
   /// \param[in] cached flag to mark if the module is already compiled and cached
-  /// \param[out] engine the created engine
-  static Status Make(const std::shared_ptr<Configuration>& config, bool cached,
-                     std::unique_ptr<Engine>* engine);
+  /// \param[in] object_cache an optional object_cache used for building the module
+  /// \return arrow::Result containing the created engine
+  static Result<std::unique_ptr<Engine>> Make(
+      const std::shared_ptr<Configuration>& config, bool cached,
+      std::optional<std::reference_wrapper<GandivaObjectCache>> object_cache =
+          std::nullopt);
 
   /// Add the function to the list of IR functions that need to be compiled.
   /// Compiling only the functions that are used by the module saves time.
@@ -59,36 +75,31 @@ class GANDIVA_EXPORT Engine {
   Status FinalizeModule();
 
   /// Set LLVM ObjectCache.
-  void SetLLVMObjectCache(GandivaObjectCache& object_cache) {
-    execution_engine_->setObjectCache(&object_cache);
-  }
+  Status SetLLVMObjectCache(GandivaObjectCache& object_cache);
 
   /// Get the compiled function corresponding to the irfunction.
-  void* CompiledFunction(std::string& function);
+  Result<void*> CompiledFunction(const std::string& function);
 
   // Create and add a mapping for the cpp function to make it accessible from LLVM.
   void AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_type,
                                const std::vector<llvm::Type*>& args, void* func);
 
   /// Return the generated IR for the module.
-  std::string DumpIR();
+  const std::string& ir();
 
   /// Load the function IRs that can be accessed in the module.
   Status LoadFunctionIRs();
 
  private:
   Engine(const std::shared_ptr<Configuration>& conf,
-         std::unique_ptr<llvm::LLVMContext> ctx,
-         std::unique_ptr<llvm::ExecutionEngine> engine, llvm::Module* module,
-         bool cached);
+         std::unique_ptr<llvm::orc::LLJIT> lljit,
+         std::unique_ptr<llvm::TargetMachine> target_machine, bool cached);
 
   // Post construction init. This _must_ be called after the constructor.
   Status Init();
 
   static void InitOnce();
 
-  llvm::ExecutionEngine& execution_engine() { return *execution_engine_; }
-
   /// load pre-compiled IR modules from precompiled_bitcode.cc and merge them into
   /// the main module.
   Status LoadPreCompiledIR();
@@ -103,9 +114,9 @@ class GANDIVA_EXPORT Engine {
   Status RemoveUnusedFunctions();
 
   std::unique_ptr<llvm::LLVMContext> context_;
-  std::unique_ptr<llvm::ExecutionEngine> execution_engine_;
+  std::unique_ptr<llvm::orc::LLJIT> lljit_;
   std::unique_ptr<llvm::IRBuilder<>> ir_builder_;
-  llvm::Module* module_;
+  std::unique_ptr<llvm::Module> module_;
   LLVMTypes types_;
 
   std::vector<std::string> functions_to_compile_;
@@ -115,6 +126,9 @@ class GANDIVA_EXPORT Engine {
   bool cached_;
   bool functions_loaded_ = false;
   std::shared_ptr<FunctionRegistry> function_registry_;
+  std::string module_ir_;
+  std::unique_ptr<llvm::TargetMachine> target_machine_;
+  const std::shared_ptr<Configuration> conf_;
 };
 
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/engine_llvm_test.cc b/cpp/src/gandiva/engine_llvm_test.cc
index 9baaa82d2e0d3..78f468d13fa1f 100644
--- a/cpp/src/gandiva/engine_llvm_test.cc
+++ b/cpp/src/gandiva/engine_llvm_test.cc
@@ -24,14 +24,14 @@
 
 namespace gandiva {
 
-typedef int64_t (*add_vector_func_t)(int64_t* data, int n);
+using add_vector_func_t = int64_t (*)(int64_t*, int);
 
 class TestEngine : public ::testing::Test {
  protected:
-  std::string BuildVecAdd(Engine* engine) {
-    auto types = engine->types();
-    llvm::IRBuilder<>* builder = engine->ir_builder();
-    llvm::LLVMContext* context = engine->context();
+  std::string BuildVecAdd(Engine* gdv_engine) {
+    auto types = gdv_engine->types();
+    llvm::IRBuilder<>* builder = gdv_engine->ir_builder();
+    llvm::LLVMContext* context = gdv_engine->context();
 
     // Create fn prototype :
     //   int64_t add_longs(int64_t *elements, int32_t nelements)
@@ -42,10 +42,10 @@ class TestEngine : public ::testing::Test {
         llvm::FunctionType::get(types->i64_type(), arguments, false /*isVarArg*/);
 
     // Create fn
-    std::string func_name = "add_longs";
-    engine->AddFunctionToCompile(func_name);
+    std::string func_name = "add_longs_test_expr";
+    gdv_engine->AddFunctionToCompile(func_name);
     llvm::Function* fn = llvm::Function::Create(
-        prototype, llvm::GlobalValue::ExternalLinkage, func_name, engine->module());
+        prototype, llvm::GlobalValue::ExternalLinkage, func_name, gdv_engine->module());
     assert(fn != nullptr);
 
     // Name the arguments
@@ -99,7 +99,9 @@ class TestEngine : public ::testing::Test {
     return func_name;
   }
 
-  void BuildEngine() { ASSERT_OK(Engine::Make(TestConfiguration(), false, &engine)); }
+  void BuildEngine() {
+    ASSERT_OK_AND_ASSIGN(engine, Engine::Make(TestConfiguration(), false));
+  }
 
   std::unique_ptr<Engine> engine;
   std::shared_ptr<Configuration> configuration = TestConfiguration();
@@ -111,7 +113,8 @@ TEST_F(TestEngine, TestAddUnoptimised) {
 
   std::string fn_name = BuildVecAdd(engine.get());
   ASSERT_OK(engine->FinalizeModule());
-  auto add_func = reinterpret_cast<add_vector_func_t>(engine->CompiledFunction(fn_name));
+  ASSERT_OK_AND_ASSIGN(auto fn_ptr, engine->CompiledFunction(fn_name));
+  auto add_func = reinterpret_cast<add_vector_func_t>(fn_ptr);
 
   int64_t my_array[] = {1, 3, -5, 8, 10};
   EXPECT_EQ(add_func(my_array, 5), 17);
@@ -123,7 +126,8 @@ TEST_F(TestEngine, TestAddOptimised) {
 
   std::string fn_name = BuildVecAdd(engine.get());
   ASSERT_OK(engine->FinalizeModule());
-  auto add_func = reinterpret_cast<add_vector_func_t>(engine->CompiledFunction(fn_name));
+  EXPECT_OK_AND_ASSIGN(auto fn_ptr, engine->CompiledFunction(fn_name));
+  auto add_func = reinterpret_cast<add_vector_func_t>(fn_ptr);
 
   int64_t my_array[] = {1, 3, -5, 8, 10};
   EXPECT_EQ(add_func(my_array, 5), 17);
diff --git a/cpp/src/gandiva/filter.cc b/cpp/src/gandiva/filter.cc
index 416d97b5dbd1d..8a270cfdc06f2 100644
--- a/cpp/src/gandiva/filter.cc
+++ b/cpp/src/gandiva/filter.cc
@@ -65,8 +65,8 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition,
   GandivaObjectCache obj_cache(cache, cache_key);
 
   // Build LLVM generator, and generate code for the specified expression
-  std::unique_ptr<LLVMGenerator> llvm_gen;
-  ARROW_RETURN_NOT_OK(LLVMGenerator::Make(configuration, is_cached, &llvm_gen));
+  ARROW_ASSIGN_OR_RAISE(auto llvm_gen,
+                        LLVMGenerator::Make(configuration, is_cached, obj_cache));
 
   if (!is_cached) {
     // Run the validation on the expression.
@@ -77,7 +77,7 @@ Status Filter::Make(SchemaPtr schema, ConditionPtr condition,
   }
 
   // Set the object cache for LLVM
-  llvm_gen->SetLLVMObjectCache(obj_cache);
+  ARROW_RETURN_NOT_OK(llvm_gen->SetLLVMObjectCache(obj_cache));
 
   ARROW_RETURN_NOT_OK(llvm_gen->Build({condition}, SelectionVector::Mode::MODE_NONE));
 
@@ -119,7 +119,7 @@ Status Filter::Evaluate(const arrow::RecordBatch& batch,
   return out_selection->PopulateFromBitMap(result, bitmap_size, num_rows - 1);
 }
 
-std::string Filter::DumpIR() { return llvm_generator_->DumpIR(); }
+const std::string& Filter::DumpIR() { return llvm_generator_->ir(); }
 
 void Filter::SetBuiltFromCache(bool flag) { built_from_cache_ = flag; }
 
diff --git a/cpp/src/gandiva/filter.h b/cpp/src/gandiva/filter.h
index cc536bca1bb3d..b4043d93c857a 100644
--- a/cpp/src/gandiva/filter.h
+++ b/cpp/src/gandiva/filter.h
@@ -76,7 +76,7 @@ class GANDIVA_EXPORT Filter {
   Status Evaluate(const arrow::RecordBatch& batch,
                   std::shared_ptr<SelectionVector> out_selection);
 
-  std::string DumpIR();
+  const std::string& DumpIR();
 
   void SetBuiltFromCache(bool flag);
 
diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc
index 41cbe0ffe3a3a..62ebab08f4d6b 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -42,15 +42,15 @@ LLVMGenerator::LLVMGenerator(bool cached,
       function_registry_(std::move(function_registry)),
       enable_ir_traces_(false) {}
 
-Status LLVMGenerator::Make(const std::shared_ptr<Configuration>& config, bool cached,
-                           std::unique_ptr<LLVMGenerator>* llvm_generator) {
-  std::unique_ptr<LLVMGenerator> llvmgen_obj(
+Result<std::unique_ptr<LLVMGenerator>> LLVMGenerator::Make(
+    const std::shared_ptr<Configuration>& config, bool cached,
+    std::optional<std::reference_wrapper<GandivaObjectCache>> object_cache) {
+  std::unique_ptr<LLVMGenerator> llvm_generator(
       new LLVMGenerator(cached, config->function_registry()));
 
-  ARROW_RETURN_NOT_OK(Engine::Make(config, cached, &(llvmgen_obj->engine_)));
-  *llvm_generator = std::move(llvmgen_obj);
-
-  return Status::OK();
+  ARROW_ASSIGN_OR_RAISE(llvm_generator->engine_,
+                        Engine::Make(config, cached, object_cache));
+  return llvm_generator;
 }
 
 std::shared_ptr<Cache<ExpressionCacheKey, std::shared_ptr<llvm::MemoryBuffer>>>
@@ -62,8 +62,8 @@ LLVMGenerator::GetCache() {
   return shared_cache;
 }
 
-void LLVMGenerator::SetLLVMObjectCache(GandivaObjectCache& object_cache) {
-  engine_->SetLLVMObjectCache(object_cache);
+Status LLVMGenerator::SetLLVMObjectCache(GandivaObjectCache& object_cache) {
+  return engine_->SetLLVMObjectCache(object_cache);
 }
 
 Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr output) {
@@ -73,7 +73,7 @@ Status LLVMGenerator::Add(const ExpressionPtr expr, const FieldDescriptorPtr out
   ValueValidityPairPtr value_validity;
   ARROW_RETURN_NOT_OK(decomposer.Decompose(*expr->root(), &value_validity));
   // Generate the IR function for the decomposed expression.
-  std::unique_ptr<CompiledExpr> compiled_expr(new CompiledExpr(value_validity, output));
+  auto compiled_expr = std::make_unique<CompiledExpr>(value_validity, output);
   std::string fn_name = "expr_" + std::to_string(idx) + "_" +
                         std::to_string(static_cast<int>(selection_vector_mode_));
   if (!cached_) {
@@ -103,7 +103,8 @@ Status LLVMGenerator::Build(const ExpressionVector& exprs, SelectionVector::Mode
   // setup the jit functions for each expression.
   for (auto& compiled_expr : compiled_exprs_) {
     auto fn_name = compiled_expr->GetFunctionName(mode);
-    auto jit_fn = reinterpret_cast<EvalFunc>(engine_->CompiledFunction(fn_name));
+    ARROW_ASSIGN_OR_RAISE(auto fn_ptr, engine_->CompiledFunction(fn_name));
+    auto jit_fn = reinterpret_cast<EvalFunc>(fn_ptr);
     compiled_expr->SetJITFunction(selection_vector_mode_, jit_fn);
   }
 
diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h
index 250ab78fbfe28..0c532998e8b83 100644
--- a/cpp/src/gandiva/llvm_generator.h
+++ b/cpp/src/gandiva/llvm_generator.h
@@ -18,7 +18,9 @@
 #pragma once
 
 #include <cstdint>
+#include <functional>
 #include <memory>
+#include <optional>
 #include <string>
 #include <vector>
 
@@ -47,15 +49,17 @@ class FunctionHolder;
 class GANDIVA_EXPORT LLVMGenerator {
  public:
   /// \brief Factory method to initialize the generator.
-  static Status Make(const std::shared_ptr<Configuration>& config, bool cached,
-                     std::unique_ptr<LLVMGenerator>* llvm_generator);
+  static Result<std::unique_ptr<LLVMGenerator>> Make(
+      const std::shared_ptr<Configuration>& config, bool cached,
+      std::optional<std::reference_wrapper<GandivaObjectCache>> object_cache =
+          std::nullopt);
 
   /// \brief Get the cache to be used for LLVM ObjectCache.
   static std::shared_ptr<Cache<ExpressionCacheKey, std::shared_ptr<llvm::MemoryBuffer>>>
   GetCache();
 
   /// \brief Set LLVM ObjectCache.
-  void SetLLVMObjectCache(GandivaObjectCache& object_cache);
+  Status SetLLVMObjectCache(GandivaObjectCache& object_cache);
 
   /// \brief Build the code for the expression trees for default mode with a LLVM
   /// ObjectCache. Each element in the vector represents an expression tree
@@ -79,7 +83,7 @@ class GANDIVA_EXPORT LLVMGenerator {
   SelectionVector::Mode selection_vector_mode() { return selection_vector_mode_; }
   LLVMTypes* types() { return engine_->types(); }
   llvm::Module* module() { return engine_->module(); }
-  std::string DumpIR() { return engine_->DumpIR(); }
+  const std::string& ir() { return engine_->ir(); }
 
  private:
   explicit LLVMGenerator(bool cached,
diff --git a/cpp/src/gandiva/llvm_generator_test.cc b/cpp/src/gandiva/llvm_generator_test.cc
index 853d8ae6c3b8d..79654e7b78c7e 100644
--- a/cpp/src/gandiva/llvm_generator_test.cc
+++ b/cpp/src/gandiva/llvm_generator_test.cc
@@ -47,8 +47,7 @@ class TestLLVMGenerator : public ::testing::Test {
     auto external_registry = std::make_shared<FunctionRegistry>();
     auto config = config_factory(std::move(external_registry));
 
-    std::unique_ptr<LLVMGenerator> generator;
-    ASSERT_OK(LLVMGenerator::Make(config, false, &generator));
+    ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(config, false));
 
     auto module = generator->module();
     ASSERT_OK(generator->engine_->LoadFunctionIRs());
@@ -58,8 +57,7 @@ class TestLLVMGenerator : public ::testing::Test {
 
 // Verify that a valid pc function exists for every function in the registry.
 TEST_F(TestLLVMGenerator, VerifyPCFunctions) {
-  std::unique_ptr<LLVMGenerator> generator;
-  ASSERT_OK(LLVMGenerator::Make(TestConfiguration(), false, &generator));
+  ASSERT_OK_AND_ASSIGN(auto generator, LLVMGenerator::Make(TestConfiguration(), false));
 
   llvm::Module* module = generator->module();
   ASSERT_OK(generator->engine_->LoadFunctionIRs());
@@ -70,8 +68,8 @@ TEST_F(TestLLVMGenerator, VerifyPCFunctions) {
 
 TEST_F(TestLLVMGenerator, TestAdd) {
   // Setup LLVM generator to do an arithmetic add of two vectors
-  std::unique_ptr<LLVMGenerator> generator;
-  ASSERT_OK(LLVMGenerator::Make(TestConfiguration(), false, &generator));
+  ASSERT_OK_AND_ASSIGN(auto generator,
+                       LLVMGenerator::Make(TestConfigWithIrDumping(), false));
   Annotator annotator;
 
   auto field0 = std::make_shared<arrow::Field>("f0", arrow::int32());
@@ -100,18 +98,22 @@ TEST_F(TestLLVMGenerator, TestAdd) {
   auto field_sum = std::make_shared<arrow::Field>("out", arrow::int32());
   auto desc_sum = annotator.CheckAndAddInputFieldDescriptor(field_sum);
 
-  std::string fn_name = "codegen";
+  // LLVM 10 doesn't like the expr function name to be the same as the module name when
+  // LLJIT is used
+  std::string fn_name = "llvm_gen_test_add_expr";
 
   ASSERT_OK(generator->engine_->LoadFunctionIRs());
   ASSERT_OK(generator->CodeGenExprValue(func_dex, 4, desc_sum, 0, fn_name,
                                         SelectionVector::MODE_NONE));
 
   ASSERT_OK(generator->engine_->FinalizeModule());
-  auto ir = generator->engine_->DumpIR();
+  auto const& ir = generator->engine_->ir();
   EXPECT_THAT(ir, testing::HasSubstr("vector.body"));
 
-  EvalFunc eval_func = (EvalFunc)generator->engine_->CompiledFunction(fn_name);
+  ASSERT_OK_AND_ASSIGN(auto fn_ptr, generator->engine_->CompiledFunction(fn_name));
+  ASSERT_TRUE(fn_ptr);
 
+  auto eval_func = reinterpret_cast<EvalFunc>(fn_ptr);
   constexpr size_t kNumRecords = 4;
   std::array<uint32_t, kNumRecords> a0{1, 2, 3, 4};
   std::array<uint32_t, kNumRecords> a1{5, 6, 7, 8};
@@ -126,6 +128,7 @@ TEST_F(TestLLVMGenerator, TestAdd) {
       reinterpret_cast<uint8_t*>(out.data()), reinterpret_cast<uint8_t*>(&out_bitmap),
   };
   std::array<int64_t, 6> addr_offsets{0, 0, 0, 0, 0, 0};
+
   eval_func(addrs.data(), addr_offsets.data(), nullptr, nullptr, nullptr,
             0 /* dummy context ptr */, kNumRecords);
 
diff --git a/cpp/src/gandiva/projector.cc b/cpp/src/gandiva/projector.cc
index e717e825dfc71..ec0302146fff5 100644
--- a/cpp/src/gandiva/projector.cc
+++ b/cpp/src/gandiva/projector.cc
@@ -80,8 +80,8 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
   GandivaObjectCache obj_cache(cache, cache_key);
 
   // Build LLVM generator, and generate code for the specified expressions
-  std::unique_ptr<LLVMGenerator> llvm_gen;
-  ARROW_RETURN_NOT_OK(LLVMGenerator::Make(configuration, is_cached, &llvm_gen));
+  ARROW_ASSIGN_OR_RAISE(auto llvm_gen,
+                        LLVMGenerator::Make(configuration, is_cached, obj_cache));
 
   // Run the validation on the expressions.
   // Return if any of the expression is invalid since
@@ -95,7 +95,7 @@ Status Projector::Make(SchemaPtr schema, const ExpressionVector& exprs,
   }
 
   // Set the object cache for LLVM
-  llvm_gen->SetLLVMObjectCache(obj_cache);
+  ARROW_RETURN_NOT_OK(llvm_gen->SetLLVMObjectCache(obj_cache));
 
   ARROW_RETURN_NOT_OK(llvm_gen->Build(exprs, selection_vector_mode));
 
@@ -281,7 +281,7 @@ Status Projector::ValidateArrayDataCapacity(const arrow::ArrayData& array_data,
   return Status::OK();
 }
 
-std::string Projector::DumpIR() { return llvm_generator_->DumpIR(); }
+const std::string& Projector::DumpIR() { return llvm_generator_->ir(); }
 
 void Projector::SetBuiltFromCache(bool flag) { built_from_cache_ = flag; }
 
diff --git a/cpp/src/gandiva/projector.h b/cpp/src/gandiva/projector.h
index 6801a7c9f3f3c..f1ae7e4dc8ccd 100644
--- a/cpp/src/gandiva/projector.h
+++ b/cpp/src/gandiva/projector.h
@@ -118,7 +118,7 @@ class GANDIVA_EXPORT Projector {
                   const SelectionVector* selection_vector,
                   const ArrayDataVector& output) const;
 
-  std::string DumpIR();
+  const std::string& DumpIR();
 
   void SetBuiltFromCache(bool flag);
 
diff --git a/cpp/src/gandiva/tests/micro_benchmarks.cc b/cpp/src/gandiva/tests/micro_benchmarks.cc
index f126b769b2010..450e691323cae 100644
--- a/cpp/src/gandiva/tests/micro_benchmarks.cc
+++ b/cpp/src/gandiva/tests/micro_benchmarks.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <stdlib.h>
+
 #include "arrow/memory_pool.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
@@ -420,6 +421,35 @@ static void DoDecimalAdd2(benchmark::State& state, int32_t precision, int32_t sc
   ASSERT_OK(status);
 }
 
+static void TimedTestExprCompilation(benchmark::State& state) {
+  int64_t iteration = 0;
+  for (auto _ : state) {
+    // schema for input fields
+    auto field0 = field("f0", int64());
+    auto field1 = field("f1", int64());
+    auto literal = TreeExprBuilder::MakeLiteral(iteration);
+    auto schema = arrow::schema({field0, field1});
+
+    // output field
+    auto field_add = field("c1", int64());
+    auto field_less_than = field("c2", boolean());
+
+    // Build expression
+    auto add_func = TreeExprBuilder::MakeFunction(
+        "add", {TreeExprBuilder::MakeField(field0), literal}, int64());
+    auto less_than_func = TreeExprBuilder::MakeFunction(
+        "less_than", {TreeExprBuilder::MakeField(field1), literal}, boolean());
+
+    auto expr_0 = TreeExprBuilder::MakeExpression(add_func, field_add);
+    auto expr_1 = TreeExprBuilder::MakeExpression(less_than_func, field_less_than);
+
+    std::shared_ptr<Projector> projector;
+    ASSERT_OK(Projector::Make(schema, {expr_0, expr_1}, TestConfiguration(), &projector));
+
+    ++iteration;
+  }
+}
+
 static void DecimalAdd2Fast(benchmark::State& state) {
   // use lesser precision to test the fast-path
   DoDecimalAdd2(state, DecimalTypeUtil::kMaxPrecision - 6, 18);
@@ -460,6 +490,7 @@ static void DecimalAdd3Large(benchmark::State& state) {
   DoDecimalAdd3(state, DecimalTypeUtil::kMaxPrecision, 18, true);
 }
 
+BENCHMARK(TimedTestExprCompilation)->Unit(benchmark::kMicrosecond);
 BENCHMARK(TimedTestAdd3)->Unit(benchmark::kMicrosecond);
 BENCHMARK(TimedTestBigNested)->Unit(benchmark::kMicrosecond);
 BENCHMARK(TimedTestExtractYear)->Unit(benchmark::kMicrosecond);
diff --git a/cpp/src/gandiva/tests/test_util.cc b/cpp/src/gandiva/tests/test_util.cc
index 959ea3cd7a446..2ee49ffae0ed6 100644
--- a/cpp/src/gandiva/tests/test_util.cc
+++ b/cpp/src/gandiva/tests/test_util.cc
@@ -30,6 +30,10 @@ std::shared_ptr<Configuration> TestConfiguration() {
   return ConfigurationBuilder::DefaultConfiguration();
 }
 
+std::shared_ptr<Configuration> TestConfigWithIrDumping() {
+  return ConfigurationBuilder().build_with_ir_dumping(true);
+}
+
 #ifndef GANDIVA_EXTENSION_TEST_DIR
 #define GANDIVA_EXTENSION_TEST_DIR "."
 #endif
diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h
index 69d63732aeeaa..d8181fe67516c 100644
--- a/cpp/src/gandiva/tests/test_util.h
+++ b/cpp/src/gandiva/tests/test_util.h
@@ -98,6 +98,8 @@ static inline ArrayPtr MakeArrowTypeArray(const std::shared_ptr<arrow::DataType>
 
 std::shared_ptr<Configuration> TestConfiguration();
 
+std::shared_ptr<Configuration> TestConfigWithIrDumping();
+
 // helper function to create a Configuration with an external function registered to the
 // given function registry
 std::shared_ptr<Configuration> TestConfigWithFunctionRegistry(
diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
index 35bbf5018f08a..2202ec64f2962 100644
--- a/python/pyarrow/gandiva.pyx
+++ b/python/pyarrow/gandiva.pyx
@@ -36,6 +36,7 @@ from pyarrow.includes.libgandiva cimport (
     CNode, CProjector, CFilter,
     CSelectionVector,
     _ensure_selection_mode,
+    CConfiguration,
     CConfigurationBuilder,
     TreeExprBuilder_MakeExpression,
     TreeExprBuilder_MakeFunction,
@@ -583,9 +584,47 @@ cdef class TreeExprBuilder(_Weakrefable):
             condition.node)
         return Condition.create(r)
 
+cdef class Configuration(_Weakrefable):
+    cdef:
+        shared_ptr[CConfiguration] configuration
+
+    def __cinit__(self, bint optimize=True, bint dump_ir=False):
+        """
+        Initialize the configuration with specified options.
+
+        Parameters
+        ----------
+        optimize : bool, default True
+            Whether to enable optimizations.
+        dump_ir : bool, default False
+            Whether to dump LLVM IR.
+        """
+        self.configuration = CConfigurationBuilder().build()
+        self.configuration.get().set_optimize(optimize)
+        self.configuration.get().set_dump_ir(dump_ir)
+
+    @staticmethod
+    cdef create(shared_ptr[CConfiguration] configuration):
+        """
+        Create a Configuration instance from an existing CConfiguration pointer.
+
+        Parameters
+        ----------
+        configuration : shared_ptr[CConfiguration]
+            Existing CConfiguration pointer.
+
+        Returns
+        -------
+        Configuration instance
+        """
+        cdef Configuration self = Configuration.__new__(Configuration)
+        self.configuration = configuration
+        return self
+
 
 cpdef make_projector(Schema schema, children, MemoryPool pool,
-                     str selection_mode="NONE"):
+                     str selection_mode="NONE",
+                     Configuration configuration=None):
     """
     Construct a projection using expressions.
 
@@ -602,6 +641,8 @@ cpdef make_projector(Schema schema, children, MemoryPool pool,
         Memory pool used to allocate output arrays.
     selection_mode : str, default "NONE"
         Possible values are NONE, UINT16, UINT32, UINT64.
+    configuration : pyarrow.gandiva.Configuration, default None
+        Configuration for the projector.
 
     Returns
     -------
@@ -612,6 +653,9 @@ cpdef make_projector(Schema schema, children, MemoryPool pool,
         c_vector[shared_ptr[CGandivaExpression]] c_children
         shared_ptr[CProjector] result
 
+    if configuration is None:
+        configuration = Configuration()
+
     for child in children:
         if child is None:
             raise TypeError("Expressions must not be None")
@@ -620,12 +664,13 @@ cpdef make_projector(Schema schema, children, MemoryPool pool,
     check_status(
         Projector_Make(schema.sp_schema, c_children,
                        _ensure_selection_mode(selection_mode),
-                       CConfigurationBuilder.DefaultConfiguration(),
+                       configuration.configuration,
                        &result))
     return Projector.create(result, pool)
 
 
-cpdef make_filter(Schema schema, Condition condition):
+cpdef make_filter(Schema schema, Condition condition,
+                  Configuration configuration=None):
     """
     Construct a filter based on a condition.
 
@@ -638,6 +683,8 @@ cpdef make_filter(Schema schema, Condition condition):
         Schema for the record batches, and the condition.
     condition : pyarrow.gandiva.Condition
         Filter condition.
+    configuration : pyarrow.gandiva.Configuration, default None
+        Configuration for the filter.
 
     Returns
     -------
@@ -646,8 +693,12 @@ cpdef make_filter(Schema schema, Condition condition):
     cdef shared_ptr[CFilter] result
     if condition is None:
         raise TypeError("Condition must not be None")
+
+    if configuration is None:
+        configuration = Configuration()
+
     check_status(
-        Filter_Make(schema.sp_schema, condition.condition, &result))
+        Filter_Make(schema.sp_schema, condition.condition, configuration.configuration, &result))
     return Filter.create(result)
 
 
diff --git a/python/pyarrow/includes/libgandiva.pxd b/python/pyarrow/includes/libgandiva.pxd
index fa3b72bad61be..7d76576bef2b9 100644
--- a/python/pyarrow/includes/libgandiva.pxd
+++ b/python/pyarrow/includes/libgandiva.pxd
@@ -252,6 +252,7 @@ cdef extern from "gandiva/filter.h" namespace "gandiva" nogil:
     cdef CStatus Filter_Make \
         "gandiva::Filter::Make"(
             shared_ptr[CSchema] schema, shared_ptr[CCondition] condition,
+            shared_ptr[CConfiguration] configuration,
             shared_ptr[CFilter]* filter)
 
 cdef extern from "gandiva/function_signature.h" namespace "gandiva" nogil:
@@ -278,9 +279,20 @@ cdef extern from "gandiva/expression_registry.h" namespace "gandiva" nogil:
 cdef extern from "gandiva/configuration.h" namespace "gandiva" nogil:
 
     cdef cppclass CConfiguration" gandiva::Configuration":
-        pass
+
+        CConfiguration()
+
+        CConfiguration(bint optimize, bint dump_ir)
+
+        void set_optimize(bint optimize)
+
+        void set_dump_ir(bint dump_ir)
 
     cdef cppclass CConfigurationBuilder \
             " gandiva::ConfigurationBuilder":
         @staticmethod
         shared_ptr[CConfiguration] DefaultConfiguration()
+
+        CConfigurationBuilder()
+
+        shared_ptr[CConfiguration] build()
diff --git a/python/pyarrow/tests/test_gandiva.py b/python/pyarrow/tests/test_gandiva.py
index 241cac4d83db4..80d119a48530d 100644
--- a/python/pyarrow/tests/test_gandiva.py
+++ b/python/pyarrow/tests/test_gandiva.py
@@ -47,8 +47,9 @@ def test_tree_exp_builder():
 
     assert expr.result().type == pa.int32()
 
+    config = gandiva.Configuration(dump_ir=True)
     projector = gandiva.make_projector(
-        schema, [expr], pa.default_memory_pool())
+        schema, [expr], pa.default_memory_pool(), "NONE", config)
 
     # Gandiva generates compute kernel function named `@expr_X`
     assert projector.llvm_ir.find("@expr_") != -1
@@ -104,7 +105,8 @@ def test_filter():
 
     assert condition.result().type == pa.bool_()
 
-    filter = gandiva.make_filter(table.schema, condition)
+    config = gandiva.Configuration(dump_ir=True)
+    filter = gandiva.make_filter(table.schema, condition, config)
     # Gandiva generates compute kernel function named `@expr_X`
     assert filter.llvm_ir.find("@expr_") != -1
 

From 6c3972651e2dfa874f9bc38791de329bcdd78ecd Mon Sep 17 00:00:00 2001
From: Tammy DiPrima <tammy.diprima@stonybrook.edu>
Date: Thu, 4 Jan 2024 16:18:22 -0500
Subject: [PATCH 130/570] GH-39114: [JS] Fix Example Code (#39442)

---
 js/examples/read_file.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/js/examples/read_file.html b/js/examples/read_file.html
index 1013fbe79ef65..cd4d58f542756 100644
--- a/js/examples/read_file.html
+++ b/js/examples/read_file.html
@@ -41,7 +41,7 @@
 }
 reader.onload = function (evt) {
 
-  var arrowTable = Arrow.Table.from([new Uint8Array(evt.target.result)]);
+  var arrowTable = Arrow.tableFromIPC(evt.target.result);
   var thead = document.getElementById("thead");
   var tbody = document.getElementById("tbody");
 

From 7b0c6f955675c9ad309afc5f82da1623f9b13a59 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Thu, 4 Jan 2024 21:04:25 -0300
Subject: [PATCH 131/570] GH-39384: [C++] Disable -Werror=attributes for Azure
 SDK's identity.hpp (#39448)

### Rationale for this change

Warnings in included headers are causing -Werror builds to fail.

### What changes are included in this PR?

Push and pop of ignore warning pragmas.

### Are these changes tested?

I'm asking @ anjakefala to test the build on this branch.
* Closes: #39384

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 21350a490411a..029e19bc0e32a 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -18,7 +18,16 @@
 #include "arrow/filesystem/azurefs.h"
 #include "arrow/filesystem/azurefs_internal.h"
 
+// idenfity.hpp triggers -Wattributes warnings cause -Werror builds to fail,
+// so disable it for this file with pragmas.
+#if defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
 #include <azure/identity.hpp>
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
 #include <azure/storage/blobs.hpp>
 #include <azure/storage/files/datalake.hpp>
 

From bec03856799a69bf0e6d4419ab7bc565afd070fe Mon Sep 17 00:00:00 2001
From: Jinpeng <zjpzlz@163.com>
Date: Thu, 4 Jan 2024 21:41:01 -0500
Subject: [PATCH 132/570] PARQUET-2411: [C++][Parquet] Allow reading dictionary
 without reading data via ByteArrayDictionaryRecordReader (#39153)

### Rationale for this change
This proposes an API to read only the dictionary  from ByteArrayDictionaryRecordReader, enabling possible uses cases where the caller just want to check the dictionary content.

### What changes are included in this PR?

New APIs to enable reading dictionary with RecordReader.

### Are these changes tested?
Unit tests.

### Are there any user-facing changes?

New APIs without breaking existing workflow.

Authored-by: jp0317 <zjpzlz@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/parquet/column_reader.cc |  20 +++++
 cpp/src/parquet/column_reader.h  |  10 +++
 cpp/src/parquet/file_reader.cc   |  79 +++++++++++--------
 cpp/src/parquet/file_reader.h    |  15 +++-
 cpp/src/parquet/reader_test.cc   | 127 +++++++++++++++++++++++++++++++
 5 files changed, 217 insertions(+), 34 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index a49e58afbdb83..99978e283b46a 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -1370,6 +1370,26 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
     return bytes_for_values;
   }
 
+  const void* ReadDictionary(int32_t* dictionary_length) override {
+    if (this->current_decoder_ == nullptr && !this->HasNextInternal()) {
+      dictionary_length = 0;
+      return nullptr;
+    }
+    // Verify the current data page is dictionary encoded. The current_encoding_ should
+    // have been set as RLE_DICTIONARY if the page encoding is RLE_DICTIONARY or
+    // PLAIN_DICTIONARY.
+    if (this->current_encoding_ != Encoding::RLE_DICTIONARY) {
+      std::stringstream ss;
+      ss << "Data page is not dictionary encoded. Encoding: "
+         << EncodingToString(this->current_encoding_);
+      throw ParquetException(ss.str());
+    }
+    auto decoder = dynamic_cast<DictDecoder<DType>*>(this->current_decoder_);
+    const T* dictionary = nullptr;
+    decoder->GetDictionary(&dictionary, dictionary_length);
+    return reinterpret_cast<const void*>(dictionary);
+  }
+
   int64_t ReadRecords(int64_t num_records) override {
     if (num_records == 0) return 0;
     // Delimit records, then read values at the end
diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h
index 334b8bcffe0b8..086f6c0e55806 100644
--- a/cpp/src/parquet/column_reader.h
+++ b/cpp/src/parquet/column_reader.h
@@ -368,6 +368,16 @@ class PARQUET_EXPORT RecordReader {
 
   virtual void DebugPrintState() = 0;
 
+  /// \brief Returns the dictionary owned by the current decoder. Throws an
+  /// exception if the current decoder is not for dictionary encoding. The caller is
+  /// responsible for casting the returned pointer to proper type depending on the
+  /// column's physical type. An example:
+  ///   const ByteArray* dict = reinterpret_cast<const ByteArray*>(ReadDictionary(&len));
+  /// or:
+  ///   const float* dict = reinterpret_cast<const float*>(ReadDictionary(&len));
+  /// \param[out] dictionary_length The number of dictionary entries.
+  virtual const void* ReadDictionary(int32_t* dictionary_length) = 0;
+
   /// \brief Decoded definition levels
   int16_t* def_levels() const {
     return reinterpret_cast<int16_t*>(def_levels_->mutable_data());
diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc
index 1d972b78fb99c..b3dd1d6054ac8 100644
--- a/cpp/src/parquet/file_reader.cc
+++ b/cpp/src/parquet/file_reader.cc
@@ -54,6 +54,36 @@ using arrow::internal::AddWithOverflow;
 
 namespace parquet {
 
+namespace {
+bool IsColumnChunkFullyDictionaryEncoded(const ColumnChunkMetaData& col) {
+  // Check the encoding_stats to see if all data pages are dictionary encoded.
+  const std::vector<PageEncodingStats>& encoding_stats = col.encoding_stats();
+  if (encoding_stats.empty()) {
+    // Some parquet files may have empty encoding_stats. In this case we are
+    // not sure whether all data pages are dictionary encoded.
+    return false;
+  }
+  // The 1st page should be the dictionary page.
+  if (encoding_stats[0].page_type != PageType::DICTIONARY_PAGE ||
+      (encoding_stats[0].encoding != Encoding::PLAIN &&
+       encoding_stats[0].encoding != Encoding::PLAIN_DICTIONARY)) {
+    return false;
+  }
+  // The following pages should be dictionary encoded data pages.
+  for (size_t idx = 1; idx < encoding_stats.size(); ++idx) {
+    if ((encoding_stats[idx].encoding != Encoding::RLE_DICTIONARY &&
+         encoding_stats[idx].encoding != Encoding::PLAIN_DICTIONARY) ||
+        (encoding_stats[idx].page_type != PageType::DATA_PAGE &&
+         encoding_stats[idx].page_type != PageType::DATA_PAGE_V2)) {
+      // Return false if any following page is not a dictionary encoded data
+      // page.
+      return false;
+    }
+  }
+  return true;
+}
+}  // namespace
+
 // PARQUET-978: Minimize footer reads by reading 64 KB from the end of the file
 static constexpr int64_t kDefaultFooterReadSize = 64 * 1024;
 static constexpr uint32_t kFooterSize = 8;
@@ -82,7 +112,8 @@ std::shared_ptr<ColumnReader> RowGroupReader::Column(int i) {
       const_cast<ReaderProperties*>(contents_->properties())->memory_pool());
 }
 
-std::shared_ptr<internal::RecordReader> RowGroupReader::RecordReader(int i) {
+std::shared_ptr<internal::RecordReader> RowGroupReader::RecordReader(
+    int i, bool read_dictionary) {
   if (i >= metadata()->num_columns()) {
     std::stringstream ss;
     ss << "Trying to read column index " << i << " but row group metadata has only "
@@ -96,8 +127,8 @@ std::shared_ptr<internal::RecordReader> RowGroupReader::RecordReader(int i) {
   internal::LevelInfo level_info = internal::LevelInfo::ComputeLevelInfo(descr);
 
   auto reader = internal::RecordReader::Make(
-      descr, level_info, contents_->properties()->memory_pool(),
-      /* read_dictionary = */ false, contents_->properties()->read_dense_for_nullable());
+      descr, level_info, contents_->properties()->memory_pool(), read_dictionary,
+      contents_->properties()->read_dense_for_nullable());
   reader->SetPageReader(std::move(page_reader));
   return reader;
 }
@@ -106,41 +137,23 @@ std::shared_ptr<ColumnReader> RowGroupReader::ColumnWithExposeEncoding(
     int i, ExposedEncoding encoding_to_expose) {
   std::shared_ptr<ColumnReader> reader = Column(i);
 
-  if (encoding_to_expose == ExposedEncoding::DICTIONARY) {
-    // Check the encoding_stats to see if all data pages are dictionary encoded.
-    std::unique_ptr<ColumnChunkMetaData> col = metadata()->ColumnChunk(i);
-    const std::vector<PageEncodingStats>& encoding_stats = col->encoding_stats();
-    if (encoding_stats.empty()) {
-      // Some parquet files may have empty encoding_stats. In this case we are
-      // not sure whether all data pages are dictionary encoded. So we do not
-      // enable exposing dictionary.
-      return reader;
-    }
-    // The 1st page should be the dictionary page.
-    if (encoding_stats[0].page_type != PageType::DICTIONARY_PAGE ||
-        (encoding_stats[0].encoding != Encoding::PLAIN &&
-         encoding_stats[0].encoding != Encoding::PLAIN_DICTIONARY)) {
-      return reader;
-    }
-    // The following pages should be dictionary encoded data pages.
-    for (size_t idx = 1; idx < encoding_stats.size(); ++idx) {
-      if ((encoding_stats[idx].encoding != Encoding::RLE_DICTIONARY &&
-           encoding_stats[idx].encoding != Encoding::PLAIN_DICTIONARY) ||
-          (encoding_stats[idx].page_type != PageType::DATA_PAGE &&
-           encoding_stats[idx].page_type != PageType::DATA_PAGE_V2)) {
-        return reader;
-      }
-    }
-  } else {
-    // Exposing other encodings are not supported for now.
-    return reader;
+  if (encoding_to_expose == ExposedEncoding::DICTIONARY &&
+      IsColumnChunkFullyDictionaryEncoded(*metadata()->ColumnChunk(i))) {
+    // Set exposed encoding.
+    reader->SetExposedEncoding(encoding_to_expose);
   }
 
-  // Set exposed encoding.
-  reader->SetExposedEncoding(encoding_to_expose);
   return reader;
 }
 
+std::shared_ptr<internal::RecordReader> RowGroupReader::RecordReaderWithExposeEncoding(
+    int i, ExposedEncoding encoding_to_expose) {
+  return RecordReader(
+      i,
+      /*read_dictionary=*/encoding_to_expose == ExposedEncoding::DICTIONARY &&
+          IsColumnChunkFullyDictionaryEncoded(*metadata()->ColumnChunk(i)));
+}
+
 std::unique_ptr<PageReader> RowGroupReader::GetColumnPageReader(int i) {
   if (i >= metadata()->num_columns()) {
     std::stringstream ss;
diff --git a/cpp/src/parquet/file_reader.h b/cpp/src/parquet/file_reader.h
index da85b73fc2dfe..b59b59f95c2d8 100644
--- a/cpp/src/parquet/file_reader.h
+++ b/cpp/src/parquet/file_reader.h
@@ -64,7 +64,8 @@ class PARQUET_EXPORT RowGroupReader {
 
   // EXPERIMENTAL: Construct a RecordReader for the indicated column of the row group.
   // Ownership is shared with the RowGroupReader.
-  std::shared_ptr<internal::RecordReader> RecordReader(int i);
+  std::shared_ptr<internal::RecordReader> RecordReader(int i,
+                                                       bool read_dictionary = false);
 
   // Construct a ColumnReader, trying to enable exposed encoding.
   //
@@ -80,6 +81,18 @@ class PARQUET_EXPORT RowGroupReader {
   std::shared_ptr<ColumnReader> ColumnWithExposeEncoding(
       int i, ExposedEncoding encoding_to_expose);
 
+  // Construct a RecordReader, trying to enable exposed encoding.
+  //
+  // For dictionary encoding, currently we only support column chunks that are
+  // fully dictionary encoded byte arrays. The caller should verify if the reader can read
+  // and expose the dictionary by checking the reader's read_dictionary(). If a column
+  // chunk uses dictionary encoding but then falls back to plain encoding, the returned
+  // reader will read decoded data without exposing the dictionary.
+  //
+  // \note API EXPERIMENTAL
+  std::shared_ptr<internal::RecordReader> RecordReaderWithExposeEncoding(
+      int i, ExposedEncoding encoding_to_expose);
+
   std::unique_ptr<PageReader> GetColumnPageReader(int i);
 
  private:
diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc
index 5223158e5f4f9..2c2b62f5d12f6 100644
--- a/cpp/src/parquet/reader_test.cc
+++ b/cpp/src/parquet/reader_test.cc
@@ -542,6 +542,83 @@ TEST(TestFileReader, GetRecordReader) {
   ASSERT_EQ(8, col_record_reader_->levels_written());
 }
 
+TEST(TestFileReader, RecordReaderWithExposingDictionary) {
+  const int num_rows = 1000;
+
+  // Make schema
+  schema::NodeVector fields;
+  fields.push_back(PrimitiveNode::Make("field", Repetition::REQUIRED, Type::BYTE_ARRAY,
+                                       ConvertedType::NONE));
+  auto schema = std::static_pointer_cast<GroupNode>(
+      GroupNode::Make("schema", Repetition::REQUIRED, fields));
+
+  // Write small batches and small data pages
+  std::shared_ptr<WriterProperties> writer_props = WriterProperties::Builder()
+                                                       .write_batch_size(64)
+                                                       ->data_pagesize(128)
+                                                       ->enable_dictionary()
+                                                       ->build();
+
+  ASSERT_OK_AND_ASSIGN(auto out_file, ::arrow::io::BufferOutputStream::Create());
+  std::shared_ptr<ParquetFileWriter> file_writer =
+      ParquetFileWriter::Open(out_file, schema, writer_props);
+
+  RowGroupWriter* rg_writer = file_writer->AppendRowGroup();
+
+  // write one column
+  ::arrow::random::RandomArrayGenerator rag(0);
+  ByteArrayWriter* writer = static_cast<ByteArrayWriter*>(rg_writer->NextColumn());
+  std::vector<std::string> raw_unique_data = {"a", "bc", "defg"};
+  std::vector<ByteArray> col_typed;
+  for (int i = 0; i < num_rows; i++) {
+    std::string_view chosed_data = raw_unique_data[i % raw_unique_data.size()];
+    col_typed.emplace_back(chosed_data);
+  }
+  writer->WriteBatch(num_rows, nullptr, nullptr, col_typed.data());
+  rg_writer->Close();
+  file_writer->Close();
+
+  // Open the reader
+  ASSERT_OK_AND_ASSIGN(auto file_buf, out_file->Finish());
+  auto in_file = std::make_shared<::arrow::io::BufferReader>(file_buf);
+
+  ReaderProperties reader_props;
+  reader_props.enable_buffered_stream();
+  reader_props.set_buffer_size(64);
+  std::unique_ptr<ParquetFileReader> file_reader =
+      ParquetFileReader::Open(in_file, reader_props);
+
+  auto row_group = file_reader->RowGroup(0);
+  auto record_reader = std::dynamic_pointer_cast<internal::DictionaryRecordReader>(
+      row_group->RecordReaderWithExposeEncoding(0, ExposedEncoding::DICTIONARY));
+  ASSERT_NE(record_reader, nullptr);
+  ASSERT_TRUE(record_reader->read_dictionary());
+
+  int32_t dict_len = 0;
+  auto dict =
+      reinterpret_cast<const ByteArray*>(record_reader->ReadDictionary(&dict_len));
+  ASSERT_NE(dict, nullptr);
+  ASSERT_EQ(dict_len, raw_unique_data.size());
+  ASSERT_EQ(record_reader->ReadRecords(num_rows), num_rows);
+  std::shared_ptr<::arrow::ChunkedArray> result_array = record_reader->GetResult();
+  ASSERT_EQ(result_array->num_chunks(), 1);
+  const std::shared_ptr<::arrow::Array> chunk = result_array->chunk(0);
+  auto dictionary_array = std::dynamic_pointer_cast<::arrow::DictionaryArray>(chunk);
+  const int32_t* indices =
+      (std::dynamic_pointer_cast<::arrow::Int32Array>(dictionary_array->indices()))
+          ->raw_values();
+
+  // Verify values based on the dictionary from ReadDictionary().
+  int64_t indices_read = chunk->length();
+  ASSERT_EQ(indices_read, num_rows);
+  for (int i = 0; i < indices_read; ++i) {
+    ASSERT_LT(indices[i], dict_len);
+    ASSERT_EQ(std::string_view(reinterpret_cast<const char* const>(dict[indices[i]].ptr),
+                               dict[indices[i]].len),
+              col_typed[i]);
+  }
+}
+
 class TestLocalFile : public ::testing::Test {
  public:
   void SetUp() {
@@ -1064,6 +1141,56 @@ TEST(TestFileReader, BufferedReadsWithDictionary) {
   }
 }
 
+TEST(TestFileReader, PartiallyDictionaryEncodingNotExposed) {
+  const int num_rows = 1000;
+
+  // Make schema
+  schema::NodeVector fields;
+  fields.push_back(PrimitiveNode::Make("field", Repetition::REQUIRED, Type::DOUBLE,
+                                       ConvertedType::NONE));
+  auto schema = std::static_pointer_cast<GroupNode>(
+      GroupNode::Make("schema", Repetition::REQUIRED, fields));
+
+  // Write small batches and small data pages. Explicitly set the dictionary page size
+  // limit such that the column chunk will not be fully dictionary encoded.
+  std::shared_ptr<WriterProperties> writer_props = WriterProperties::Builder()
+                                                       .write_batch_size(64)
+                                                       ->data_pagesize(128)
+                                                       ->enable_dictionary()
+                                                       ->dictionary_pagesize_limit(4)
+                                                       ->build();
+
+  ASSERT_OK_AND_ASSIGN(auto out_file, ::arrow::io::BufferOutputStream::Create());
+  std::shared_ptr<ParquetFileWriter> file_writer =
+      ParquetFileWriter::Open(out_file, schema, writer_props);
+
+  RowGroupWriter* rg_writer = file_writer->AppendRowGroup();
+
+  // write one column
+  ::arrow::random::RandomArrayGenerator rag(0);
+  DoubleWriter* writer = static_cast<DoubleWriter*>(rg_writer->NextColumn());
+  std::shared_ptr<::arrow::Array> col = rag.Float64(num_rows, 0, 100);
+  const auto& col_typed = static_cast<const ::arrow::DoubleArray&>(*col);
+  writer->WriteBatch(num_rows, nullptr, nullptr, col_typed.raw_values());
+  rg_writer->Close();
+  file_writer->Close();
+
+  // Open the reader
+  ASSERT_OK_AND_ASSIGN(auto file_buf, out_file->Finish());
+  auto in_file = std::make_shared<::arrow::io::BufferReader>(file_buf);
+
+  ReaderProperties reader_props;
+  reader_props.enable_buffered_stream();
+  reader_props.set_buffer_size(64);
+  std::unique_ptr<ParquetFileReader> file_reader =
+      ParquetFileReader::Open(in_file, reader_props);
+
+  auto row_group = file_reader->RowGroup(0);
+  auto col_reader = std::static_pointer_cast<DoubleReader>(
+      row_group->ColumnWithExposeEncoding(0, ExposedEncoding::DICTIONARY));
+  EXPECT_NE(col_reader->GetExposedEncoding(), ExposedEncoding::DICTIONARY);
+}
+
 TEST(TestFileReader, BufferedReads) {
   // PARQUET-1636: Buffered reads were broken before introduction of
   // RandomAccessFile::GetStream

From 04d79846dc5fff606dd66407c5479e087185b35a Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Fri, 5 Jan 2024 23:04:31 +0900
Subject: [PATCH 133/570] GH-39433: [Ruby] Add support for Table.load(format:
 json) options (#39464)

### Rationale for this change

Other `format:` such as `format: :csv` accepts custom options. `format: :json` should also accept them.

### What changes are included in this PR?

Use `Arrow::JSONReadOptions` for `Table::Load(format: :json)`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39433

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ruby/red-arrow/lib/arrow/table-loader.rb |  8 +++++++-
 ruby/red-arrow/test/helper.rb            |  1 +
 ruby/red-arrow/test/test-table.rb        | 25 ++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/ruby/red-arrow/lib/arrow/table-loader.rb b/ruby/red-arrow/lib/arrow/table-loader.rb
index 308eb16a37ad0..450be3fbe09ff 100644
--- a/ruby/red-arrow/lib/arrow/table-loader.rb
+++ b/ruby/red-arrow/lib/arrow/table-loader.rb
@@ -252,7 +252,13 @@ def load_as_feather
 
     def load_as_json
       open_input_stream do |input|
-        reader = JSONReader.new(input)
+        options = JSONReadOptions.new
+        @options.each do |key, value|
+          next if value.nil?
+          setter = :"#{key}="
+          options.__send__(setter, value) if options.respond_to?(setter)
+        end
+        reader = JSONReader.new(input, options)
         table = reader.read
         table.refer_input(input)
         table
diff --git a/ruby/red-arrow/test/helper.rb b/ruby/red-arrow/test/helper.rb
index 7fa6764dd40c2..42732a5954a6d 100644
--- a/ruby/red-arrow/test/helper.rb
+++ b/ruby/red-arrow/test/helper.rb
@@ -18,6 +18,7 @@
 require "arrow"
 
 require "fiddle"
+require "json"
 require "pathname"
 require "tempfile"
 require "timeout"
diff --git a/ruby/red-arrow/test/test-table.rb b/ruby/red-arrow/test/test-table.rb
index 7c372bd44f14a..883cf70c269bb 100644
--- a/ruby/red-arrow/test/test-table.rb
+++ b/ruby/red-arrow/test/test-table.rb
@@ -677,6 +677,31 @@ def test_tsv
                                        format: :tsv,
                                        schema: @table.schema))
       end
+
+      def test_json
+        output = create_output(".json")
+        # TODO: Implement this.
+        # @table.save(output, format: :json)
+        columns = ""
+        @table.each_record.each do |record|
+          column = {
+            "count" => record.count,
+            "visible" => record.visible,
+          }
+          columns << column.to_json
+          columns << "\n"
+        end
+        if output.is_a?(String)
+          File.write(output, columns)
+        else
+          output.resize(columns.bytesize)
+          output.set_data(0, columns)
+        end
+        assert_equal(@table,
+                     Arrow::Table.load(output,
+                                       format: :json,
+                                       schema: @table.schema))
+      end
     end
 
     sub_test_case("path") do

From 42b995b4f8de239da2be17430706cf4eb795ac50 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com>
Date: Fri, 5 Jan 2024 20:49:07 +0530
Subject: [PATCH 134/570] MINOR: [Java] Bump
 com.google.errorprone:error_prone_core from 2.4.0 to 2.24.0 in /java (#39452)

### Rationale for this change

This is a draft PR for fixing the dependabot PR https://github.com/apache/arrow/pull/39409

### What changes are included in this PR?

Upgrading `com.google.errorprone` to 2.24.0 for JDK11+ and restricting `com.google.errorprone` to 2.10 to JDK8.

### Are these changes tested?

N/A. CIs are implicitly testing this including existing test cases.

### Are there any user-facing changes?

No

Lead-authored-by: vibhatha <vibhatha@gmail.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/pom.xml                                                  | 4 ++--
 .../apache/arrow/vector/complex/writer/TestComplexWriter.java | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/java/pom.xml b/java/pom.xml
index 522ee4abc7669..fae072018eb19 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -44,7 +44,7 @@
     <forkCount>2</forkCount>
     <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
     <errorprone.javac.version>9+181-r4173-1</errorprone.javac.version>
-    <error_prone_core.version>2.22.0</error_prone_core.version>
+    <error_prone_core.version>2.24.0</error_prone_core.version>
     <maven-compiler-plugin.version>3.11.0</maven-compiler-plugin.version>
     <mockito.core.version>5.5.0</mockito.core.version>
     <mockito.inline.version>5.2.0</mockito.inline.version>
@@ -844,7 +844,7 @@
                 <path>
                   <groupId>com.google.errorprone</groupId>
                   <artifactId>error_prone_core</artifactId>
-                  <version>2.4.0</version>
+                  <version>2.10.0</version>
                 </path>
               </annotationProcessorPaths>
             </configuration>
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
index 96d39e85f1f4a..e03ce0c056bf1 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
@@ -773,7 +773,7 @@ public void simpleUnion() throws Exception {
     for (int i = 0; i < COUNT; i++) {
       unionReader.setPosition(i);
       if (i % 5 == 0) {
-        Assert.assertEquals(i, i, unionReader.readInteger());
+        Assert.assertEquals(i, unionReader.readInteger().intValue());
       } else if (i % 5 == 1) {
         NullableTimeStampMilliTZHolder holder = new NullableTimeStampMilliTZHolder();
         unionReader.read(holder);

From aae6fa40b458a90c598df281fdc8fc023e05a262 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Fri, 5 Jan 2024 12:44:45 -0300
Subject: [PATCH 135/570] GH-38772: [C++] Implement directory semantics even
 when the storage account doesn't support HNS (#39361)

### Rationale for this change

The `FileSystem` implementation based on Azure Blob Storage should implement directory operations according to filesystem semantics. When Hierarchical Namespace (HNS) is enabled, we can rely on Azure Data Lake Storage Gen 2 APIs implementing the filesystem semantics for us, but when all we have is the Blobs API, we should emulate it.

### What changes are included in this PR?

 - Skip fewer tests
 - Re-implement `GetFileInfo` using `ListBlobsByHierarchy` instead of `ListBlobs`
 - Re-implement `CreateDir` with an upfront HNS support check instead of falling back to Blobs API after an error
 - Add comprehensive tests to `CreateDir`
 - Add `HasSubmitBatchBug` to check if a test inside any scenario is affected by a certain Azurite issue
 - Implement `DeleteDir` to work properly on flat namespace storage accounts (non-HNS accounts)
 -

### Are these changes tested?

Yes. By existing and new tests added by this PR itself.
* Closes: #38772

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc         | 709 +++++++++++++-------
 cpp/src/arrow/filesystem/azurefs_internal.h |   2 +-
 cpp/src/arrow/filesystem/azurefs_test.cc    | 444 +++++++-----
 3 files changed, 731 insertions(+), 424 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 029e19bc0e32a..9569eff2e47ed 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -828,7 +828,7 @@ bool IsDfsEmulator(const AzureOptions& options) {
 namespace internal {
 
 Result<HNSSupport> CheckIfHierarchicalNamespaceIsEnabled(
-    DataLake::DataLakeFileSystemClient& adlfs_client, const AzureOptions& options) {
+    const DataLake::DataLakeFileSystemClient& adlfs_client, const AzureOptions& options) {
   try {
     auto directory_client = adlfs_client.GetDirectoryClient("");
     // GetAccessControlList will fail on storage accounts
@@ -891,10 +891,12 @@ namespace {
 
 const char kDelimiter[] = {internal::kSep, '\0'};
 
+/// \pre location.container is not empty.
 template <class ContainerClient>
-Result<FileInfo> GetContainerPropsAsFileInfo(const std::string& container_name,
-                                             ContainerClient& container_client) {
-  FileInfo info{container_name};
+Result<FileInfo> GetContainerPropsAsFileInfo(const AzureLocation& location,
+                                             const ContainerClient& container_client) {
+  DCHECK(!location.container.empty());
+  FileInfo info{location.path.empty() ? location.all : location.container};
   try {
     auto properties = container_client.GetProperties();
     info.set_type(FileType::Directory);
@@ -910,6 +912,18 @@ Result<FileInfo> GetContainerPropsAsFileInfo(const std::string& container_name,
   }
 }
 
+template <class ContainerClient>
+Status CreateContainerIfNotExists(const std::string& container_name,
+                                  const ContainerClient& container_client) {
+  try {
+    container_client.CreateIfNotExists();
+    return Status::OK();
+  } catch (const Storage::StorageException& exception) {
+    return ExceptionToStatus(exception, "Failed to create a container: ", container_name,
+                             ": ", container_client.GetUrl());
+  }
+}
+
 FileInfo DirectoryFileInfoFromPath(std::string_view path) {
   return FileInfo{std::string{internal::RemoveTrailingSlash(path)}, FileType::Directory};
 }
@@ -955,12 +969,21 @@ class AzureFileSystem::Impl {
   io::IOContext& io_context() { return io_context_; }
   const AzureOptions& options() const { return options_; }
 
- private:
+  Blobs::BlobContainerClient GetBlobContainerClient(const std::string& container_name) {
+    return blob_service_client_->GetBlobContainerClient(container_name);
+  }
+
+  /// \param container_name Also known as "filesystem" in the ADLS Gen2 API.
+  DataLake::DataLakeFileSystemClient GetFileSystemClient(
+      const std::string& container_name) {
+    return datalake_service_client_->GetFileSystemClient(container_name);
+  }
+
   /// \brief Memoized version of CheckIfHierarchicalNamespaceIsEnabled.
   ///
   /// \return kEnabled/kDisabled/kContainerNotFound (kUnknown is never returned).
   Result<HNSSupport> HierarchicalNamespaceSupport(
-      DataLake::DataLakeFileSystemClient& adlfs_client) {
+      const DataLake::DataLakeFileSystemClient& adlfs_client) {
     switch (cached_hns_support_) {
       case HNSSupport::kEnabled:
       case HNSSupport::kDisabled:
@@ -987,7 +1010,6 @@ class AzureFileSystem::Impl {
     return hns_support;
   }
 
- public:
   /// This is used from unit tests to ensure we perform operations on all the
   /// possible states of cached_hns_support_.
   void ForceCachedHierarchicalNamespaceSupport(int support) {
@@ -1004,33 +1026,20 @@ class AzureFileSystem::Impl {
     DCHECK(false) << "Invalid enum HierarchicalNamespaceSupport value.";
   }
 
-  Result<FileInfo> GetFileInfo(const AzureLocation& location) {
-    if (location.container.empty()) {
-      DCHECK(location.path.empty());
-      // Root directory of the storage account.
-      return FileInfo{"", FileType::Directory};
-    }
-    if (location.path.empty()) {
-      // We have a container, but no path within the container.
-      // The container itself represents a directory.
-      auto container_client =
-          blob_service_client_->GetBlobContainerClient(location.container);
-      return GetContainerPropsAsFileInfo(location.container, container_client);
-    }
-    // There is a path to search within the container.
-    FileInfo info{location.all};
-    auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
+  /// \pre location.path is not empty.
+  Result<FileInfo> GetFileInfo(const DataLake::DataLakeFileSystemClient& adlfs_client,
+                               const AzureLocation& location) {
     auto file_client = adlfs_client.GetFileClient(location.path);
     try {
+      FileInfo info{location.all};
       auto properties = file_client.GetProperties();
       if (properties.Value.IsDirectory) {
         info.set_type(FileType::Directory);
       } else if (internal::HasTrailingSlash(location.path)) {
-        // For a path with a trailing slash a hierarchical namespace may return a blob
-        // with that trailing slash removed. For consistency with flat namespace and
-        // other filesystems we chose to return NotFound.
-        //
-        // NOTE(felipecrv): could this be an empty directory marker?
+        // For a path with a trailing slash, a Hierarchical Namespace storage account
+        // may recognize a file (path with trailing slash removed). For consistency
+        // with other arrow::FileSystem implementations we chose to return NotFound
+        // because the trailing slash means the user was looking for a directory.
         info.set_type(FileType::NotFound);
         return info;
       } else {
@@ -1042,47 +1051,88 @@ class AzureFileSystem::Impl {
       return info;
     } catch (const Storage::StorageException& exception) {
       if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
-        ARROW_ASSIGN_OR_RAISE(auto hns_support,
-                              HierarchicalNamespaceSupport(adlfs_client));
-        if (hns_support == HNSSupport::kContainerNotFound ||
-            hns_support == HNSSupport::kEnabled) {
-          // If the hierarchical namespace is enabled, then the storage account will
-          // have explicit directories. Neither a file nor a directory was found.
-          info.set_type(FileType::NotFound);
+        return FileInfo{location.all, FileType::NotFound};
+      }
+      return ExceptionToStatus(
+          exception, "GetProperties for '", file_client.GetUrl(),
+          "' failed. GetFileInfo is unable to determine whether the path exists.");
+    }
+  }
+
+  /// On flat namespace accounts there are no real directories. Directories are
+  /// implied by empty directory marker blobs with names ending in "/" or there
+  /// being blobs with names starting with the directory path.
+  ///
+  /// \pre location.path is not empty.
+  Result<FileInfo> GetFileInfo(const Blobs::BlobContainerClient& container_client,
+                               const AzureLocation& location) {
+    DCHECK(!location.path.empty());
+    Blobs::ListBlobsOptions options;
+    options.Prefix = internal::RemoveTrailingSlash(location.path);
+    options.PageSizeHint = 1;
+
+    try {
+      FileInfo info{location.all};
+      auto list_response = container_client.ListBlobsByHierarchy(kDelimiter, options);
+      // Since PageSizeHint=1, we expect at most one entry in either Blobs or
+      // BlobPrefixes. A BlobPrefix always ends with kDelimiter ("/"), so we can
+      // distinguish between a directory and a file by checking if we received a
+      // prefix or a blob.
+      if (!list_response.BlobPrefixes.empty()) {
+        // Ensure the returned BlobPrefixes[0] string doesn't contain more characters than
+        // the requested Prefix. For instance, if we request with Prefix="dir/abra" and
+        // the container contains "dir/abracadabra/" but not "dir/abra/", we will get back
+        // "dir/abracadabra/" in the BlobPrefixes list. If "dir/abra/" existed,
+        // it would be returned instead because it comes before "dir/abracadabra/" in the
+        // lexicographic order guaranteed by ListBlobsByHierarchy.
+        const auto& blob_prefix = list_response.BlobPrefixes[0];
+        if (blob_prefix == internal::EnsureTrailingSlash(location.path)) {
+          info.set_type(FileType::Directory);
           return info;
         }
-        // On flat namespace accounts there are no real directories. Directories are only
-        // implied by using `/` in the blob name.
-        Blobs::ListBlobsOptions list_blob_options;
-        // If listing the prefix `path.path_to_file` with trailing slash returns at least
-        // one result then `path` refers to an implied directory.
-        list_blob_options.Prefix = internal::EnsureTrailingSlash(location.path);
-        // We only need to know if there is at least one result, so minimise page size
-        // for efficiency.
-        list_blob_options.PageSizeHint = 1;
-
-        try {
-          auto paged_list_result =
-              blob_service_client_->GetBlobContainerClient(location.container)
-                  .ListBlobs(list_blob_options);
-          auto file_type = paged_list_result.Blobs.size() > 0 ? FileType::Directory
-                                                              : FileType::NotFound;
-          info.set_type(file_type);
+      }
+      if (!list_response.Blobs.empty()) {
+        const auto& blob = list_response.Blobs[0];
+        if (blob.Name == location.path) {
+          info.set_type(FileType::File);
+          info.set_size(blob.BlobSize);
+          info.set_mtime(
+              std::chrono::system_clock::time_point{blob.Details.LastModified});
           return info;
-        } catch (const Storage::StorageException& exception) {
-          return ExceptionToStatus(
-              exception, "ListBlobs failed for prefix='", *list_blob_options.Prefix,
-              "' failed. GetFileInfo is unable to determine whether the path should "
-              "be considered an implied directory.");
         }
       }
+      info.set_type(FileType::NotFound);
+      return info;
+    } catch (const Storage::StorageException& exception) {
+      if (IsContainerNotFound(exception)) {
+        return FileInfo{location.all, FileType::NotFound};
+      }
       return ExceptionToStatus(
-          exception, "GetProperties failed for '", file_client.GetUrl(),
-          "' GetFileInfo is unable to determine whether the path exists.");
+          exception, "ListBlobsByHierarchy failed for prefix='", *options.Prefix,
+          "'. GetFileInfo is unable to determine whether the path exists.");
     }
   }
 
  private:
+  /// \pref location.container is not empty.
+  template <typename ContainerClient>
+  Status CheckDirExists(const ContainerClient& container_client,
+                        const AzureLocation& location) {
+    DCHECK(!location.container.empty());
+    FileInfo info;
+    if (location.path.empty()) {
+      ARROW_ASSIGN_OR_RAISE(info,
+                            GetContainerPropsAsFileInfo(location, container_client));
+    } else {
+      ARROW_ASSIGN_OR_RAISE(info, GetFileInfo(container_client, location));
+    }
+    if (info.type() == FileType::NotFound) {
+      return PathNotFound(location);
+    }
+    DCHECK_EQ(info.type(), FileType::Directory);
+    return Status::OK();
+  }
+
   template <typename OnContainer>
   Status VisitContainers(const Core::Context& context, OnContainer&& on_container) const {
     Blobs::ListBlobContainersOptions options;
@@ -1297,97 +1347,79 @@ class AzureFileSystem::Impl {
     return ptr;
   }
 
-  Status CreateDir(const AzureLocation& location) {
-    if (location.container.empty()) {
-      return Status::Invalid("CreateDir requires a non-empty path.");
-    }
-
-    auto container_client =
-        blob_service_client_->GetBlobContainerClient(location.container);
-    if (location.path.empty()) {
-      try {
-        auto response = container_client.Create();
-        return response.Value.Created
-                   ? Status::OK()
-                   : Status::AlreadyExists("Directory already exists: " + location.all);
-      } catch (const Storage::StorageException& exception) {
-        return ExceptionToStatus(exception,
-                                 "Failed to create a container: ", location.container,
-                                 ": ", container_client.GetUrl());
-      }
-    }
-
-    auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
-    ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client));
-    if (hns_support == HNSSupport::kContainerNotFound) {
-      return PathNotFound(location);
-    }
-    if (hns_support == HNSSupport::kDisabled) {
-      ARROW_ASSIGN_OR_RAISE(
-          auto container_info,
-          GetContainerPropsAsFileInfo(location.container, container_client));
-      if (container_info.type() == FileType::NotFound) {
-        return PathNotFound(location);
-      }
-      // Without hierarchical namespace enabled Azure blob storage has no directories.
-      // Therefore we can't, and don't need to create one. Simply creating a blob with `/`
-      // in the name implies directories.
-      return Status::OK();
-    }
-
-    auto directory_client = adlfs_client.GetDirectoryClient(location.path);
-    try {
-      auto response = directory_client.Create();
-      if (response.Value.Created) {
-        return Status::OK();
-      } else {
-        return StatusFromErrorResponse(directory_client.GetUrl(), *response.RawResponse,
-                                       "Failed to create a directory: " + location.path);
+ private:
+  /// This function cannot assume the filesystem/container already exists.
+  ///
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  template <class ContainerClient, class CreateDirIfNotExists>
+  Status CreateDirTemplate(const ContainerClient& container_client,
+                           CreateDirIfNotExists&& create_if_not_exists,
+                           const AzureLocation& location, bool recursive) {
+    DCHECK(!location.container.empty());
+    DCHECK(!location.path.empty());
+    // Non-recursive CreateDir calls require the parent directory to exist.
+    if (!recursive) {
+      auto parent = location.parent();
+      if (!parent.path.empty()) {
+        RETURN_NOT_OK(CheckDirExists(container_client, parent));
       }
-    } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus(exception, "Failed to create a directory: ", location.path,
-                               ": ", directory_client.GetUrl());
+      // If the parent location is just the container, we don't need to check if it
+      // exists because the operation we perform below will fail if the container
+      // doesn't exist and we can handle that error according to the recursive flag.
     }
-  }
-
-  Status CreateDirRecursive(const AzureLocation& location) {
-    if (location.container.empty()) {
-      return Status::Invalid("CreateDir requires a non-empty path.");
-    }
-
-    auto container_client =
-        blob_service_client_->GetBlobContainerClient(location.container);
     try {
-      container_client.CreateIfNotExists();
-    } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus(exception,
-                               "Failed to create a container: ", location.container, " (",
-                               container_client.GetUrl(), ")");
-    }
-
-    auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
-    ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client));
-    if (hns_support == HNSSupport::kDisabled) {
-      // Without hierarchical namespace enabled Azure blob storage has no directories.
-      // Therefore we can't, and don't need to create one. Simply creating a blob with `/`
-      // in the name implies directories.
+      create_if_not_exists(container_client, location);
       return Status::OK();
-    }
-    // Don't handle HNSSupport::kContainerNotFound, just assume it still exists (because
-    // it was created above) and try to create the directory.
-
-    if (!location.path.empty()) {
-      auto directory_client = adlfs_client.GetDirectoryClient(location.path);
-      try {
-        directory_client.CreateIfNotExists();
-      } catch (const Storage::StorageException& exception) {
-        return ExceptionToStatus(exception,
-                                 "Failed to create a directory: ", location.path, " (",
-                                 directory_client.GetUrl(), ")");
+    } catch (const Storage::StorageException& exception) {
+      if (IsContainerNotFound(exception)) {
+        try {
+          if (recursive) {
+            container_client.CreateIfNotExists();
+            create_if_not_exists(container_client, location);
+            return Status::OK();
+          } else {
+            auto parent = location.parent();
+            return PathNotFound(parent);
+          }
+        } catch (const Storage::StorageException& second_exception) {
+          return ExceptionToStatus(second_exception, "Failed to create directory '",
+                                   location.all, "': ", container_client.GetUrl());
+        }
       }
+      return ExceptionToStatus(exception, "Failed to create directory '", location.all,
+                               "': ", container_client.GetUrl());
     }
+  }
 
-    return Status::OK();
+ public:
+  /// This function cannot assume the filesystem already exists.
+  ///
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  Status CreateDirOnFileSystem(const DataLake::DataLakeFileSystemClient& adlfs_client,
+                               const AzureLocation& location, bool recursive) {
+    return CreateDirTemplate(
+        adlfs_client,
+        [](const auto& adlfs_client, const auto& location) {
+          auto directory_client = adlfs_client.GetDirectoryClient(location.path);
+          directory_client.CreateIfNotExists();
+        },
+        location, recursive);
+  }
+
+  /// This function cannot assume the container already exists.
+  ///
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  Status CreateDirOnContainer(const Blobs::BlobContainerClient& container_client,
+                              const AzureLocation& location, bool recursive) {
+    return CreateDirTemplate(
+        container_client,
+        [this](const auto& container_client, const auto& location) {
+          EnsureEmptyDirExistsImplThatThrows(container_client, location.path);
+        },
+        location, recursive);
   }
 
   Result<std::shared_ptr<ObjectAppendStream>> OpenAppendStream(
@@ -1414,10 +1446,92 @@ class AzureFileSystem::Impl {
   }
 
  private:
-  Status DeleteDirContentsWithoutHierarchicalNamespace(const AzureLocation& location,
-                                                       bool missing_dir_ok) {
-    auto container_client =
-        blob_service_client_->GetBlobContainerClient(location.container);
+  void EnsureEmptyDirExistsImplThatThrows(
+      const Blobs::BlobContainerClient& container_client,
+      const std::string& path_within_container) {
+    auto dir_marker_blob_path = internal::EnsureTrailingSlash(path_within_container);
+    auto block_blob_client =
+        container_client.GetBlobClient(dir_marker_blob_path).AsBlockBlobClient();
+    // Attach metadata that other filesystem implementations expect to be present
+    // on directory marker blobs.
+    // https://github.com/fsspec/adlfs/blob/32132c4094350fca2680155a5c236f2e9f991ba5/adlfs/spec.py#L855-L870
+    Blobs::UploadBlockBlobFromOptions blob_options;
+    blob_options.Metadata.emplace("is_directory", "true");
+    block_blob_client.UploadFrom(nullptr, 0, blob_options);
+  }
+
+ public:
+  /// This function assumes the container already exists. So it can only be
+  /// called after that has been verified.
+  ///
+  /// \pre location.container is not empty.
+  /// \pre The location.container container already exists.
+  Status EnsureEmptyDirExists(const Blobs::BlobContainerClient& container_client,
+                              const AzureLocation& location, const char* operation_name) {
+    DCHECK(!location.container.empty());
+    if (location.path.empty()) {
+      // Nothing to do. The container already exists per the preconditions.
+      return Status::OK();
+    }
+    try {
+      EnsureEmptyDirExistsImplThatThrows(container_client, location.path);
+      return Status::OK();
+    } catch (const Storage::StorageException& exception) {
+      return ExceptionToStatus(
+          exception, operation_name, " failed to ensure empty directory marker '",
+          location.path, "' exists in container: ", container_client.GetUrl());
+    }
+  }
+
+  /// \pre location.container is not empty.
+  /// \pre location.path is empty.
+  Status DeleteContainer(const Blobs::BlobContainerClient& container_client,
+                         const AzureLocation& location) {
+    DCHECK(!location.container.empty());
+    DCHECK(location.path.empty());
+    try {
+      auto response = container_client.Delete();
+      if (response.Value.Deleted) {
+        return Status::OK();
+      } else {
+        return StatusFromErrorResponse(
+            container_client.GetUrl(), *response.RawResponse,
+            "Failed to delete a container: " + location.container);
+      }
+    } catch (const Storage::StorageException& exception) {
+      if (IsContainerNotFound(exception)) {
+        return PathNotFound(location);
+      }
+      return ExceptionToStatus(exception,
+                               "Failed to delete a container: ", location.container, ": ",
+                               container_client.GetUrl());
+    }
+  }
+
+  /// Deletes contents of a directory and possibly the directory itself
+  /// depending on the value of preserve_dir_marker_blob.
+  ///
+  /// \pre location.container is not empty.
+  /// \pre preserve_dir_marker_blob=false implies location.path is not empty
+  /// because we can't *not preserve* the root directory of a container.
+  ///
+  /// \param require_dir_to_exist Require the directory to exist *before* this
+  /// operation, otherwise return PathNotFound.
+  /// \param preserve_dir_marker_blob Ensure the empty directory marker blob
+  /// is preserved (not deleted) or created (before the contents are deleted) if it
+  /// doesn't exist explicitly but is implied by the existence of blobs with names
+  /// starting with the directory path.
+  /// \param operation_name Used in error messages to accurately describe the operation
+  Status DeleteDirContentsOnContainer(const Blobs::BlobContainerClient& container_client,
+                                      const AzureLocation& location,
+                                      bool require_dir_to_exist,
+                                      bool preserve_dir_marker_blob,
+                                      const char* operation_name) {
+    using DeleteBlobResponse = Storage::DeferredResponse<Blobs::Models::DeleteBlobResult>;
+    DCHECK(!location.container.empty());
+    DCHECK(preserve_dir_marker_blob || !location.path.empty())
+        << "Must pass preserve_dir_marker_blob=true when location.path is empty "
+           "(i.e. deleting the contents of a container).";
     Blobs::ListBlobsOptions options;
     if (!location.path.empty()) {
       options.Prefix = internal::EnsureTrailingSlash(location.path);
@@ -1428,9 +1542,11 @@ class AzureFileSystem::Impl {
     // size of the body for a batch request can't exceed 4 MB.
     const int32_t kNumMaxRequestsInBatch = 256;
     options.PageSizeHint = kNumMaxRequestsInBatch;
+    // trusted only if preserve_dir_marker_blob is true.
+    bool found_dir_marker_blob = false;
     try {
       auto list_response = container_client.ListBlobs(options);
-      if (!missing_dir_ok && list_response.Blobs.empty()) {
+      if (require_dir_to_exist && list_response.Blobs.empty()) {
         return PathNotFound(location);
       }
       for (; list_response.HasPage(); list_response.MoveToNextPage()) {
@@ -1438,20 +1554,44 @@ class AzureFileSystem::Impl {
           continue;
         }
         auto batch = container_client.CreateBatch();
-        std::vector<Storage::DeferredResponse<Blobs::Models::DeleteBlobResult>>
-            deferred_responses;
+        std::vector<std::pair<std::string_view, DeleteBlobResponse>> deferred_responses;
         for (const auto& blob_item : list_response.Blobs) {
-          deferred_responses.push_back(batch.DeleteBlob(blob_item.Name));
+          if (preserve_dir_marker_blob && !found_dir_marker_blob) {
+            const bool is_dir_marker_blob =
+                options.Prefix.HasValue() && blob_item.Name == *options.Prefix;
+            if (is_dir_marker_blob) {
+              // Skip deletion of the existing directory marker blob,
+              // but take note that it exists.
+              found_dir_marker_blob = true;
+              continue;
+            }
+          }
+          deferred_responses.emplace_back(blob_item.Name,
+                                          batch.DeleteBlob(blob_item.Name));
         }
         try {
-          container_client.SubmitBatch(batch);
+          // Before submitting the batch deleting directory contents, ensure
+          // the empty directory marker blob exists. Doing this first, means that
+          // directory doesn't "stop existing" during the duration of the batch delete
+          // operation.
+          if (preserve_dir_marker_blob && !found_dir_marker_blob) {
+            // Only create an empty directory marker blob if the directory's
+            // existence is implied by the existence of blobs with names
+            // starting with the directory path.
+            if (!deferred_responses.empty()) {
+              RETURN_NOT_OK(
+                  EnsureEmptyDirExists(container_client, location, operation_name));
+            }
+          }
+          if (!deferred_responses.empty()) {
+            container_client.SubmitBatch(batch);
+          }
         } catch (const Storage::StorageException& exception) {
           return ExceptionToStatus(exception, "Failed to delete blobs in a directory: ",
                                    location.path, ": ", container_client.GetUrl());
         }
         std::vector<std::string> failed_blob_names;
-        for (size_t i = 0; i < deferred_responses.size(); ++i) {
-          const auto& deferred_response = deferred_responses[i];
+        for (auto& [blob_name_view, deferred_response] : deferred_responses) {
           bool success = true;
           try {
             auto delete_result = deferred_response.GetResponse();
@@ -1460,8 +1600,7 @@ class AzureFileSystem::Impl {
             success = false;
           }
           if (!success) {
-            const auto& blob_item = list_response.Blobs[i];
-            failed_blob_names.push_back(blob_item.Name);
+            failed_blob_names.emplace_back(blob_name_view);
           }
         }
         if (!failed_blob_names.empty()) {
@@ -1475,117 +1614,74 @@ class AzureFileSystem::Impl {
           }
         }
       }
+      return Status::OK();
     } catch (const Storage::StorageException& exception) {
       return ExceptionToStatus(exception,
                                "Failed to list blobs in a directory: ", location.path,
                                ": ", container_client.GetUrl());
     }
-    return Status::OK();
   }
 
- public:
-  Status DeleteDir(const AzureLocation& location) {
-    if (location.container.empty()) {
-      return Status::Invalid("DeleteDir requires a non-empty path.");
-    }
-
-    auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
-    ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client));
-    if (hns_support == HNSSupport::kContainerNotFound) {
-      return PathNotFound(location);
-    }
-
-    if (location.path.empty()) {
-      auto container_client =
-          blob_service_client_->GetBlobContainerClient(location.container);
-      try {
-        auto response = container_client.Delete();
-        if (response.Value.Deleted) {
-          return Status::OK();
-        } else {
-          return StatusFromErrorResponse(
-              container_client.GetUrl(), *response.RawResponse,
-              "Failed to delete a container: " + location.container);
-        }
-      } catch (const Storage::StorageException& exception) {
-        return ExceptionToStatus(exception,
-                                 "Failed to delete a container: ", location.container,
-                                 ": ", container_client.GetUrl());
-      }
-    }
-
-    if (hns_support == HNSSupport::kEnabled) {
-      auto directory_client = adlfs_client.GetDirectoryClient(location.path);
-      try {
-        auto response = directory_client.DeleteRecursive();
-        if (response.Value.Deleted) {
-          return Status::OK();
-        } else {
-          return StatusFromErrorResponse(
-              directory_client.GetUrl(), *response.RawResponse,
-              "Failed to delete a directory: " + location.path);
-        }
-      } catch (const Storage::StorageException& exception) {
-        return ExceptionToStatus(exception,
-                                 "Failed to delete a directory: ", location.path, ": ",
-                                 directory_client.GetUrl());
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  Status DeleteDirOnFileSystem(const DataLake::DataLakeFileSystemClient& adlfs_client,
+                               const AzureLocation& location) {
+    DCHECK(!location.container.empty());
+    DCHECK(!location.path.empty());
+    auto directory_client = adlfs_client.GetDirectoryClient(location.path);
+    // XXX: should "directory not found" be considered an error?
+    try {
+      auto response = directory_client.DeleteRecursive();
+      if (response.Value.Deleted) {
+        return Status::OK();
+      } else {
+        return StatusFromErrorResponse(directory_client.GetUrl(), *response.RawResponse,
+                                       "Failed to delete a directory: " + location.path);
       }
-    } else {
-      return DeleteDirContentsWithoutHierarchicalNamespace(location,
-                                                           /*missing_dir_ok=*/true);
+    } catch (const Storage::StorageException& exception) {
+      return ExceptionToStatus(exception, "Failed to delete a directory: ", location.path,
+                               ": ", directory_client.GetUrl());
     }
   }
 
-  Status DeleteDirContents(const AzureLocation& location, bool missing_dir_ok) {
-    if (location.container.empty()) {
-      return internal::InvalidDeleteDirContents(location.all);
-    }
-
-    auto adlfs_client = datalake_service_client_->GetFileSystemClient(location.container);
-    ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client));
-    if (hns_support == HNSSupport::kContainerNotFound) {
-      return missing_dir_ok ? Status::OK() : PathNotFound(location);
-    }
-
-    if (hns_support == HNSSupport::kEnabled) {
-      auto directory_client = adlfs_client.GetDirectoryClient(location.path);
-      try {
-        auto list_response = directory_client.ListPaths(false);
-        for (; list_response.HasPage(); list_response.MoveToNextPage()) {
-          for (const auto& path : list_response.Paths) {
-            if (path.IsDirectory) {
-              auto sub_directory_client = adlfs_client.GetDirectoryClient(path.Name);
-              try {
-                sub_directory_client.DeleteRecursive();
-              } catch (const Storage::StorageException& exception) {
-                return ExceptionToStatus(
-                    exception, "Failed to delete a sub directory: ", location.container,
-                    kDelimiter, path.Name, ": ", sub_directory_client.GetUrl());
-              }
-            } else {
-              auto sub_file_client = adlfs_client.GetFileClient(path.Name);
-              try {
-                sub_file_client.Delete();
-              } catch (const Storage::StorageException& exception) {
-                return ExceptionToStatus(
-                    exception, "Failed to delete a sub file: ", location.container,
-                    kDelimiter, path.Name, ": ", sub_file_client.GetUrl());
-              }
+  /// \pre location.container is not empty.
+  Status DeleteDirContentsOnFileSystem(
+      const DataLake::DataLakeFileSystemClient& adlfs_client,
+      const AzureLocation& location, bool missing_dir_ok) {
+    auto directory_client = adlfs_client.GetDirectoryClient(location.path);
+    try {
+      auto list_response = directory_client.ListPaths(false);
+      for (; list_response.HasPage(); list_response.MoveToNextPage()) {
+        for (const auto& path : list_response.Paths) {
+          if (path.IsDirectory) {
+            auto sub_directory_client = adlfs_client.GetDirectoryClient(path.Name);
+            try {
+              sub_directory_client.DeleteRecursive();
+            } catch (const Storage::StorageException& exception) {
+              return ExceptionToStatus(
+                  exception, "Failed to delete a sub directory: ", location.container,
+                  kDelimiter, path.Name, ": ", sub_directory_client.GetUrl());
+            }
+          } else {
+            auto sub_file_client = adlfs_client.GetFileClient(path.Name);
+            try {
+              sub_file_client.Delete();
+            } catch (const Storage::StorageException& exception) {
+              return ExceptionToStatus(
+                  exception, "Failed to delete a sub file: ", location.container,
+                  kDelimiter, path.Name, ": ", sub_file_client.GetUrl());
             }
           }
         }
-      } catch (const Storage::StorageException& exception) {
-        if (missing_dir_ok && exception.StatusCode == Http::HttpStatusCode::NotFound) {
-          return Status::OK();
-        } else {
-          return ExceptionToStatus(exception,
-                                   "Failed to delete directory contents: ", location.path,
-                                   ": ", directory_client.GetUrl());
-        }
       }
       return Status::OK();
-    } else {
-      return DeleteDirContentsWithoutHierarchicalNamespace(location, missing_dir_ok);
+    } catch (const Storage::StorageException& exception) {
+      if (missing_dir_ok && exception.StatusCode == Http::HttpStatusCode::NotFound) {
+        return Status::OK();
+      }
+      return ExceptionToStatus(exception,
+                               "Failed to delete directory contents: ", location.path,
+                               ": ", directory_client.GetUrl());
     }
   }
 
@@ -1640,7 +1736,30 @@ bool AzureFileSystem::Equals(const FileSystem& other) const {
 
 Result<FileInfo> AzureFileSystem::GetFileInfo(const std::string& path) {
   ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path));
-  return impl_->GetFileInfo(location);
+  if (location.container.empty()) {
+    DCHECK(location.path.empty());
+    // Root directory of the storage account.
+    return FileInfo{"", FileType::Directory};
+  }
+  if (location.path.empty()) {
+    // We have a container, but no path within the container.
+    // The container itself represents a directory.
+    auto container_client = impl_->GetBlobContainerClient(location.container);
+    return GetContainerPropsAsFileInfo(location, container_client);
+  }
+  // There is a path to search within the container. Check HNS support to proceed.
+  auto adlfs_client = impl_->GetFileSystemClient(location.container);
+  ARROW_ASSIGN_OR_RAISE(auto hns_support,
+                        impl_->HierarchicalNamespaceSupport(adlfs_client));
+  if (hns_support == HNSSupport::kContainerNotFound) {
+    return FileInfo{location.all, FileType::NotFound};
+  }
+  if (hns_support == HNSSupport::kEnabled) {
+    return impl_->GetFileInfo(adlfs_client, location);
+  }
+  DCHECK_EQ(hns_support, HNSSupport::kDisabled);
+  auto container_client = impl_->GetBlobContainerClient(location.container);
+  return impl_->GetFileInfo(container_client, location);
 }
 
 Result<FileInfoVector> AzureFileSystem::GetFileInfo(const FileSelector& select) {
@@ -1654,21 +1773,95 @@ Result<FileInfoVector> AzureFileSystem::GetFileInfo(const FileSelector& select)
 
 Status AzureFileSystem::CreateDir(const std::string& path, bool recursive) {
   ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path));
-  if (recursive) {
-    return impl_->CreateDirRecursive(location);
-  } else {
-    return impl_->CreateDir(location);
+  if (location.container.empty()) {
+    return Status::Invalid("CreateDir requires a non-empty path.");
   }
+
+  auto container_client = impl_->GetBlobContainerClient(location.container);
+  if (location.path.empty()) {
+    // If the path is just the container, the parent (root) trivially exists,
+    // and the CreateDir operation comes down to just creating the container.
+    return CreateContainerIfNotExists(location.container, container_client);
+  }
+
+  auto adlfs_client = impl_->GetFileSystemClient(location.container);
+  ARROW_ASSIGN_OR_RAISE(auto hns_support,
+                        impl_->HierarchicalNamespaceSupport(adlfs_client));
+  if (hns_support == HNSSupport::kContainerNotFound) {
+    if (!recursive) {
+      auto parent = location.parent();
+      return PathNotFound(parent);
+    }
+    RETURN_NOT_OK(CreateContainerIfNotExists(location.container, container_client));
+    // Perform a second check for HNS support after creating the container.
+    ARROW_ASSIGN_OR_RAISE(hns_support, impl_->HierarchicalNamespaceSupport(adlfs_client));
+    if (hns_support == HNSSupport::kContainerNotFound) {
+      // We only get kContainerNotFound if we are unable to read the properties of the
+      // container we just created. This is very unlikely, but theoretically possible in
+      // a concurrent system, so the error is handled to avoid infinite recursion.
+      return Status::IOError("Unable to read properties of a newly created container: ",
+                             location.container, ": " + container_client.GetUrl());
+    }
+  }
+  // CreateDirOnFileSystem and CreateDirOnContainer can handle the container
+  // not existing which is useful and necessary here since the only reason
+  // a container was created above was to check for HNS support when it wasn't
+  // cached yet.
+  if (hns_support == HNSSupport::kEnabled) {
+    return impl_->CreateDirOnFileSystem(adlfs_client, location, recursive);
+  }
+  DCHECK_EQ(hns_support, HNSSupport::kDisabled);
+  return impl_->CreateDirOnContainer(container_client, location, recursive);
 }
 
 Status AzureFileSystem::DeleteDir(const std::string& path) {
   ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path));
-  return impl_->DeleteDir(location);
+  if (location.container.empty()) {
+    return Status::Invalid("DeleteDir requires a non-empty path.");
+  }
+  if (location.path.empty()) {
+    auto container_client = impl_->GetBlobContainerClient(location.container);
+    return impl_->DeleteContainer(container_client, location);
+  }
+
+  auto adlfs_client = impl_->GetFileSystemClient(location.container);
+  ARROW_ASSIGN_OR_RAISE(auto hns_support,
+                        impl_->HierarchicalNamespaceSupport(adlfs_client));
+  if (hns_support == HNSSupport::kContainerNotFound) {
+    return PathNotFound(location);
+  }
+  if (hns_support == HNSSupport::kEnabled) {
+    return impl_->DeleteDirOnFileSystem(adlfs_client, location);
+  }
+  DCHECK_EQ(hns_support, HNSSupport::kDisabled);
+  auto container_client = impl_->GetBlobContainerClient(location.container);
+  return impl_->DeleteDirContentsOnContainer(container_client, location,
+                                             /*require_dir_to_exist=*/true,
+                                             /*preserve_dir_marker_blob=*/false,
+                                             "DeleteDir");
 }
 
 Status AzureFileSystem::DeleteDirContents(const std::string& path, bool missing_dir_ok) {
   ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path));
-  return impl_->DeleteDirContents(location, missing_dir_ok);
+  if (location.container.empty()) {
+    return internal::InvalidDeleteDirContents(location.all);
+  }
+
+  auto adlfs_client = impl_->GetFileSystemClient(location.container);
+  ARROW_ASSIGN_OR_RAISE(auto hns_support,
+                        impl_->HierarchicalNamespaceSupport(adlfs_client));
+  if (hns_support == HNSSupport::kContainerNotFound) {
+    return missing_dir_ok ? Status::OK() : PathNotFound(location);
+  }
+
+  if (hns_support == HNSSupport::kEnabled) {
+    return impl_->DeleteDirContentsOnFileSystem(adlfs_client, location, missing_dir_ok);
+  }
+  auto container_client = impl_->GetBlobContainerClient(location.container);
+  return impl_->DeleteDirContentsOnContainer(container_client, location,
+                                             /*require_dir_to_exist=*/!missing_dir_ok,
+                                             /*preserve_dir_marker_blob=*/true,
+                                             "DeleteDirContents");
 }
 
 Status AzureFileSystem::DeleteRootDirContents() {
diff --git a/cpp/src/arrow/filesystem/azurefs_internal.h b/cpp/src/arrow/filesystem/azurefs_internal.h
index 13d84c9b542b4..5642e16bcfb05 100644
--- a/cpp/src/arrow/filesystem/azurefs_internal.h
+++ b/cpp/src/arrow/filesystem/azurefs_internal.h
@@ -71,7 +71,7 @@ enum class HierarchicalNamespaceSupport {
 /// \return kEnabled/kDisabled/kContainerNotFound (kUnknown is never
 /// returned).
 Result<HierarchicalNamespaceSupport> CheckIfHierarchicalNamespaceIsEnabled(
-    Azure::Storage::Files::DataLake::DataLakeFileSystemClient& adlfs_client,
+    const Azure::Storage::Files::DataLake::DataLakeFileSystemClient& adlfs_client,
     const arrow::fs::AzureOptions& options);
 
 }  // namespace internal
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index f6af9f722dbac..ff94578b041dc 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -473,6 +473,14 @@ class TestAzureFileSystem : public ::testing::Test {
     return blob_client;
   }
 
+  Blobs::Models::BlobProperties GetBlobProperties(const std::string& container_name,
+                                                  const std::string& blob_name) {
+    return blob_service_client_->GetBlobContainerClient(container_name)
+        .GetBlobClient(blob_name)
+        .GetProperties()
+        .Value;
+  }
+
   void UploadLines(const std::vector<std::string>& lines, const std::string& path,
                    int total_size) {
     ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
@@ -566,86 +574,259 @@ class TestAzureFileSystem : public ::testing::Test {
     return env->WithHierarchicalNamespace();
   }
 
+  constexpr static const char* const kSubmitBatchBugMessage =
+      "This test is affected by an Azurite issue: "
+      "https://github.com/Azure/Azurite/pull/2302";
+
+  /// Azurite has a bug that causes BlobContainerClient::SubmitBatch to fail on macOS.
+  /// SubmitBatch is used by:
+  ///  - AzureFileSystem::DeleteDir
+  ///  - AzureFileSystem::DeleteDirContents
+  bool HasSubmitBatchBug() const {
+#ifdef __APPLE__
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    return env->backend() == AzureBackend::kAzurite;
+#else
+    return false;
+#endif
+  }
+
   // Tests that are called from more than one implementation of TestAzureFileSystem
 
   void TestDetectHierarchicalNamespace(bool trip_up_azurite);
   void TestDetectHierarchicalNamespaceOnMissingContainer();
-  void TestGetFileInfoObject();
+
+  void TestGetFileInfoOfRoot() {
+    AssertFileInfo(fs(), "", FileType::Directory);
+
+    // URI
+    ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://"));
+  }
+
+  void TestGetFileInfoOnExistingContainer() {
+    auto data = SetUpPreexistingData();
+    AssertFileInfo(fs(), data.container_name, FileType::Directory);
+    AssertFileInfo(fs(), data.container_name + "/", FileType::Directory);
+    auto props = GetBlobProperties(data.container_name, data.kObjectName);
+    AssertFileInfo(fs(), data.ObjectPath(), FileType::File,
+                   std::chrono::system_clock::time_point{props.LastModified},
+                   static_cast<int64_t>(props.BlobSize));
+    AssertFileInfo(fs(), data.NotFoundObjectPath(), FileType::NotFound);
+    AssertFileInfo(fs(), data.ObjectPath() + "/", FileType::NotFound);
+    AssertFileInfo(fs(), data.NotFoundObjectPath() + "/", FileType::NotFound);
+
+    // URIs
+    ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + data.container_name));
+    ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + std::string{data.kObjectName}));
+    ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + data.ObjectPath()));
+  }
+
+  void TestGetFileInfoOnMissingContainer() {
+    auto data = SetUpPreexistingData();
+    AssertFileInfo(fs(), "nonexistent", FileType::NotFound);
+    AssertFileInfo(fs(), "nonexistent/object", FileType::NotFound);
+    AssertFileInfo(fs(), "nonexistent/object/", FileType::NotFound);
+  }
+
   void TestGetFileInfoObjectWithNestedStructure();
 
+  void TestCreateDirOnRoot() {
+    auto dir1 = PreexistingData::RandomContainerName(rng_);
+    auto dir2 = PreexistingData::RandomContainerName(rng_);
+
+    AssertFileInfo(fs(), dir1, FileType::NotFound);
+    ASSERT_OK(fs()->CreateDir(dir1, false));
+    AssertFileInfo(fs(), dir1, FileType::Directory);
+
+    AssertFileInfo(fs(), dir2, FileType::NotFound);
+    ASSERT_OK(fs()->CreateDir(dir2, true));
+    AssertFileInfo(fs(), dir1, FileType::Directory);
+
+    // Should not fail if the directory already exists.
+    ASSERT_OK(fs()->CreateDir(dir1, false));
+    ASSERT_OK(fs()->CreateDir(dir1, true));
+    AssertFileInfo(fs(), dir1, FileType::Directory);
+  }
+
+  void TestCreateDirOnExistingContainer() {
+    auto data = SetUpPreexistingData();
+    auto dir1 = data.RandomDirectoryPath(rng_);
+    auto dir2 = data.RandomDirectoryPath(rng_);
+
+    AssertFileInfo(fs(), dir1, FileType::NotFound);
+    ASSERT_OK(fs()->CreateDir(dir1, /*recursive=*/false));
+    AssertFileInfo(fs(), dir1, FileType::Directory);
+
+    AssertFileInfo(fs(), dir2, FileType::NotFound);
+    ASSERT_OK(fs()->CreateDir(dir2, /*recursive=*/true));
+    AssertFileInfo(fs(), dir2, FileType::Directory);
+
+    auto subdir1 = ConcatAbstractPath(dir1, "subdir");
+    auto subdir2 = ConcatAbstractPath(dir2, "subdir");
+    AssertFileInfo(fs(), subdir1, FileType::NotFound);
+    ASSERT_OK(fs()->CreateDir(subdir1, /*recursive=*/false));
+    AssertFileInfo(fs(), subdir1, FileType::Directory);
+    AssertFileInfo(fs(), subdir2, FileType::NotFound);
+    ASSERT_OK(fs()->CreateDir(subdir2, /*recursive=*/true));
+    AssertFileInfo(fs(), subdir2, FileType::Directory);
+
+    auto dir3 = data.RandomDirectoryPath(rng_);
+    AssertFileInfo(fs(), dir3, FileType::NotFound);
+    auto subdir3 = ConcatAbstractPath(dir3, "subdir");
+    AssertFileInfo(fs(), subdir3, FileType::NotFound);
+    // Creating subdir3 with recursive=false should fail.
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, ::testing::HasSubstr("Path does not exist '" + dir3 + "'"),
+        fs()->CreateDir(subdir3, /*recursive=*/false));
+    AssertFileInfo(fs(), dir3, FileType::NotFound);
+    AssertFileInfo(fs(), subdir3, FileType::NotFound);
+    // Creating subdir3 with recursive=true should work.
+    ASSERT_OK(fs()->CreateDir(subdir3, /*recursive=*/true));
+    AssertFileInfo(fs(), dir3, FileType::Directory);
+    AssertFileInfo(fs(), subdir3, FileType::Directory);
+
+    auto dir4 = data.RandomDirectoryPath(rng_);
+    auto subdir4 = ConcatAbstractPath(dir4, "subdir4");
+    auto subdir5 = ConcatAbstractPath(dir4, "subdir4/subdir5");
+    // Creating subdir4 with recursive=false should fail.
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, ::testing::HasSubstr("Path does not exist '" + dir4 + "'"),
+        fs()->CreateDir(subdir4, /*recursive=*/false));
+    AssertFileInfo(fs(), dir4, FileType::NotFound);
+    AssertFileInfo(fs(), subdir4, FileType::NotFound);
+    // Creating subdir5 with recursive=false should fail.
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, ::testing::HasSubstr("Path does not exist '" + subdir4 + "'"),
+        fs()->CreateDir(subdir5, /*recursive=*/false));
+    AssertFileInfo(fs(), dir4, FileType::NotFound);
+    AssertFileInfo(fs(), subdir4, FileType::NotFound);
+    AssertFileInfo(fs(), subdir5, FileType::NotFound);
+    // Creating subdir5 with recursive=true should work.
+    ASSERT_OK(fs()->CreateDir(subdir5, /*recursive=*/true));
+    AssertFileInfo(fs(), dir4, FileType::Directory);
+    AssertFileInfo(fs(), subdir4, FileType::Directory);
+    AssertFileInfo(fs(), subdir5, FileType::Directory);
+  }
+
+  void TestCreateDirOnMissingContainer() {
+    auto container1 = PreexistingData::RandomContainerName(rng_);
+    auto container2 = PreexistingData::RandomContainerName(rng_);
+    AssertFileInfo(fs(), container1, FileType::NotFound);
+    AssertFileInfo(fs(), container2, FileType::NotFound);
+
+    auto dir1 = ConcatAbstractPath(container1, "dir");
+    AssertFileInfo(fs(), dir1, FileType::NotFound);
+    // Creating dir1 with recursive=false should fail.
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, ::testing::HasSubstr("Path does not exist '" + container1 + "'"),
+        fs()->CreateDir(dir1, /*recursive=*/false));
+    AssertFileInfo(fs(), container1, FileType::NotFound);
+    AssertFileInfo(fs(), dir1, FileType::NotFound);
+    // Creating dir1 with recursive=true should work.
+    ASSERT_OK(fs()->CreateDir(dir1, /*recursive=*/true));
+    AssertFileInfo(fs(), container1, FileType::Directory);
+    AssertFileInfo(fs(), dir1, FileType::Directory);
+
+    auto dir2 = ConcatAbstractPath(container2, "dir");
+    auto subdir2 = ConcatAbstractPath(dir2, "subdir2");
+    auto subdir3 = ConcatAbstractPath(dir2, "subdir2/subdir3");
+    // Creating dir2 with recursive=false should fail.
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, ::testing::HasSubstr("Path does not exist '" + container2 + "'"),
+        fs()->CreateDir(dir2, /*recursive=*/false));
+    AssertFileInfo(fs(), container2, FileType::NotFound);
+    AssertFileInfo(fs(), dir2, FileType::NotFound);
+    // Creating subdir2 with recursive=false should fail.
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, ::testing::HasSubstr("Path does not exist '" + dir2 + "'"),
+        fs()->CreateDir(subdir2, /*recursive=*/false));
+    AssertFileInfo(fs(), container2, FileType::NotFound);
+    AssertFileInfo(fs(), dir2, FileType::NotFound);
+    AssertFileInfo(fs(), subdir2, FileType::NotFound);
+    // Creating subdir3 with recursive=false should fail.
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, ::testing::HasSubstr("Path does not exist '" + subdir2 + "'"),
+        fs()->CreateDir(subdir3, /*recursive=*/false));
+    AssertFileInfo(fs(), container2, FileType::NotFound);
+    AssertFileInfo(fs(), dir2, FileType::NotFound);
+    AssertFileInfo(fs(), subdir2, FileType::NotFound);
+    AssertFileInfo(fs(), subdir3, FileType::NotFound);
+    // Creating subdir3 with recursive=true should work.
+    ASSERT_OK(fs()->CreateDir(subdir3, /*recursive=*/true));
+    AssertFileInfo(fs(), container2, FileType::Directory);
+    AssertFileInfo(fs(), dir2, FileType::Directory);
+    AssertFileInfo(fs(), subdir2, FileType::Directory);
+    AssertFileInfo(fs(), subdir3, FileType::Directory);
+  }
+
   void TestDeleteDirSuccessEmpty() {
+    if (HasSubmitBatchBug()) {
+      GTEST_SKIP() << kSubmitBatchBugMessage;
+    }
     auto data = SetUpPreexistingData();
     const auto directory_path = data.RandomDirectoryPath(rng_);
 
-    if (WithHierarchicalNamespace()) {
-      ASSERT_OK(fs()->CreateDir(directory_path, true));
-      AssertFileInfo(fs(), directory_path, FileType::Directory);
-      ASSERT_OK(fs()->DeleteDir(directory_path));
-      AssertFileInfo(fs(), directory_path, FileType::NotFound);
-    } else {
-      // There is only virtual directory without hierarchical namespace
-      // support. So the CreateDir() and DeleteDir() do nothing.
-      ASSERT_OK(fs()->CreateDir(directory_path));
-      AssertFileInfo(fs(), directory_path, FileType::NotFound);
-      ASSERT_OK(fs()->DeleteDir(directory_path));
-      AssertFileInfo(fs(), directory_path, FileType::NotFound);
-    }
+    AssertFileInfo(fs(), directory_path, FileType::NotFound);
+    ASSERT_OK(fs()->CreateDir(directory_path, true));
+    AssertFileInfo(fs(), directory_path, FileType::Directory);
+    ASSERT_OK(fs()->DeleteDir(directory_path));
+    AssertFileInfo(fs(), directory_path, FileType::NotFound);
   }
 
-  void TestCreateDirSuccessContainerAndDirectory() {
+  void TestDeleteDirFailureNonexistent() {
     auto data = SetUpPreexistingData();
     const auto path = data.RandomDirectoryPath(rng_);
-    ASSERT_OK(fs()->CreateDir(path, false));
-    if (WithHierarchicalNamespace()) {
-      AssertFileInfo(fs(), path, FileType::Directory);
-    } else {
-      // There is only virtual directory without hierarchical namespace
-      // support. So the CreateDir() does nothing.
-      AssertFileInfo(fs(), path, FileType::NotFound);
-    }
+    ASSERT_RAISES(IOError, fs()->DeleteDir(path));
   }
 
-  void TestCreateDirRecursiveSuccessContainerOnly() {
-    auto container_name = PreexistingData::RandomContainerName(rng_);
-    ASSERT_OK(fs()->CreateDir(container_name, true));
-    AssertFileInfo(fs(), container_name, FileType::Directory);
+  void TestDeleteDirSuccessHaveBlob() {
+    if (HasSubmitBatchBug()) {
+      GTEST_SKIP() << kSubmitBatchBugMessage;
+    }
+    auto data = SetUpPreexistingData();
+    const auto directory_path = data.RandomDirectoryPath(rng_);
+    const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt");
+    ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(blob_path));
+    ASSERT_OK(output->Write("hello"));
+    ASSERT_OK(output->Close());
+    AssertFileInfo(fs(), blob_path, FileType::File);
+    ASSERT_OK(fs()->DeleteDir(directory_path));
+    AssertFileInfo(fs(), blob_path, FileType::NotFound);
   }
 
-  void TestCreateDirRecursiveSuccessDirectoryOnly() {
+  void TestDeleteDirSuccessHaveDirectory() {
+    if (HasSubmitBatchBug()) {
+      GTEST_SKIP() << kSubmitBatchBugMessage;
+    }
     auto data = SetUpPreexistingData();
     const auto parent = data.RandomDirectoryPath(rng_);
     const auto path = ConcatAbstractPath(parent, "new-sub");
     ASSERT_OK(fs()->CreateDir(path, true));
-    if (WithHierarchicalNamespace()) {
-      AssertFileInfo(fs(), path, FileType::Directory);
-      AssertFileInfo(fs(), parent, FileType::Directory);
-    } else {
-      // There is only virtual directory without hierarchical namespace
-      // support. So the CreateDir() does nothing.
-      AssertFileInfo(fs(), path, FileType::NotFound);
-      AssertFileInfo(fs(), parent, FileType::NotFound);
-    }
+    AssertFileInfo(fs(), path, FileType::Directory);
+    AssertFileInfo(fs(), parent, FileType::Directory);
+    ASSERT_OK(fs()->DeleteDir(parent));
+    AssertFileInfo(fs(), path, FileType::NotFound);
+    AssertFileInfo(fs(), parent, FileType::NotFound);
   }
 
-  void TestCreateDirRecursiveSuccessContainerAndDirectory() {
-    auto data = SetUpPreexistingData();
-    const auto parent = data.RandomDirectoryPath(rng_);
-    const auto path = ConcatAbstractPath(parent, "new-sub");
-    ASSERT_OK(fs()->CreateDir(path, true));
-    if (WithHierarchicalNamespace()) {
-      AssertFileInfo(fs(), path, FileType::Directory);
-      AssertFileInfo(fs(), parent, FileType::Directory);
-      AssertFileInfo(fs(), data.container_name, FileType::Directory);
-    } else {
-      // There is only virtual directory without hierarchical namespace
-      // support. So the CreateDir() does nothing.
-      AssertFileInfo(fs(), path, FileType::NotFound);
-      AssertFileInfo(fs(), parent, FileType::NotFound);
-      AssertFileInfo(fs(), data.container_name, FileType::Directory);
+  void TestDeleteDirContentsSuccessExist() {
+    if (HasSubmitBatchBug()) {
+      GTEST_SKIP() << kSubmitBatchBugMessage;
+    }
+    auto preexisting_data = SetUpPreexistingData();
+    HierarchicalPaths paths;
+    CreateHierarchicalData(&paths);
+    ASSERT_OK(fs()->DeleteDirContents(paths.directory));
+    AssertFileInfo(fs(), paths.directory, FileType::Directory);
+    for (const auto& sub_path : paths.sub_paths) {
+      AssertFileInfo(fs(), sub_path, FileType::NotFound);
     }
   }
 
   void TestDeleteDirContentsSuccessNonexistent() {
+    if (HasSubmitBatchBug()) {
+      GTEST_SKIP() << kSubmitBatchBugMessage;
+    }
     auto data = SetUpPreexistingData();
     const auto directory_path = data.RandomDirectoryPath(rng_);
     ASSERT_OK(fs()->DeleteDirContents(directory_path, true));
@@ -662,7 +843,7 @@ class TestAzureFileSystem : public ::testing::Test {
 void TestAzureFileSystem::TestDetectHierarchicalNamespace(bool trip_up_azurite) {
   EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
   if (trip_up_azurite && env->backend() != AzureBackend::kAzurite) {
-    GTEST_SKIP() << "trip_up_azurite=true is only for Azurite.";
+    return;
   }
 
   auto data = SetUpPreexistingData();
@@ -704,22 +885,6 @@ void TestAzureFileSystem::TestDetectHierarchicalNamespaceOnMissingContainer() {
   }
 }
 
-void TestAzureFileSystem::TestGetFileInfoObject() {
-  auto data = SetUpPreexistingData();
-  auto object_properties =
-      blob_service_client_->GetBlobContainerClient(data.container_name)
-          .GetBlobClient(data.kObjectName)
-          .GetProperties()
-          .Value;
-
-  AssertFileInfo(fs(), data.ObjectPath(), FileType::File,
-                 std::chrono::system_clock::time_point{object_properties.LastModified},
-                 static_cast<int64_t>(object_properties.BlobSize));
-
-  // URI
-  ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + std::string{data.kObjectName}));
-}
-
 void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() {
   auto data = SetUpPreexistingData();
   // Adds detailed tests to handle cases of different edge cases
@@ -855,6 +1020,16 @@ TYPED_TEST(TestAzureFileSystemOnAllEnvs, DetectHierarchicalNamespaceOnMissingCon
   this->TestDetectHierarchicalNamespaceOnMissingContainer();
 }
 
+TYPED_TEST(TestAzureFileSystemOnAllEnvs, GetFileInfoOfRoot) {
+  this->TestGetFileInfoOfRoot();
+}
+
+TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirWithEmptyPath) {
+  ASSERT_RAISES(Invalid, this->fs()->CreateDir("", false));
+}
+
+TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirOnRoot) { this->TestCreateDirOnRoot(); }
+
 // Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS)
 // combined with the two scenarios for AzureFileSystem::cached_hns_support_ -- unknown and
 // known according to the environment.
@@ -869,105 +1044,56 @@ using AllScenarios = ::testing::Types<
 
 TYPED_TEST_SUITE(TestAzureFileSystemOnAllScenarios, AllScenarios);
 
-TYPED_TEST(TestAzureFileSystemOnAllScenarios, GetFileInfoObject) {
-  this->TestGetFileInfoObject();
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, GetFileInfoOnExistingContainer) {
+  this->TestGetFileInfoOnExistingContainer();
 }
 
-TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessEmpty) {
-  this->TestDeleteDirSuccessEmpty();
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, GetFileInfoOnMissingContainer) {
+  this->TestGetFileInfoOnMissingContainer();
 }
 
 TYPED_TEST(TestAzureFileSystemOnAllScenarios, GetFileInfoObjectWithNestedStructure) {
   this->TestGetFileInfoObjectWithNestedStructure();
 }
 
-TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirSuccessContainerAndDirectory) {
-  this->TestCreateDirSuccessContainerAndDirectory();
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirOnExistingContainer) {
+  this->TestCreateDirOnExistingContainer();
 }
 
-TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirRecursiveSuccessContainerOnly) {
-  this->TestCreateDirRecursiveSuccessContainerOnly();
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirOnMissingContainer) {
+  this->TestCreateDirOnMissingContainer();
 }
 
-TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirRecursiveSuccessDirectoryOnly) {
-  this->TestCreateDirRecursiveSuccessDirectoryOnly();
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessEmpty) {
+  this->TestDeleteDirSuccessEmpty();
 }
 
-TYPED_TEST(TestAzureFileSystemOnAllScenarios,
-           CreateDirRecursiveSuccessContainerAndDirectory) {
-  this->TestCreateDirRecursiveSuccessContainerAndDirectory();
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirFailureNonexistent) {
+  this->TestDeleteDirFailureNonexistent();
 }
 
-// Tests using a real storage account *with Hierarchical Namespace enabled*
-
-TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirFailureNonexistent) {
-  auto data = SetUpPreexistingData();
-  const auto path = data.RandomDirectoryPath(rng_);
-  ASSERT_RAISES(IOError, fs()->DeleteDir(path));
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessHaveBlob) {
+  this->TestDeleteDirSuccessHaveBlob();
 }
 
-TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveBlob) {
-  auto data = SetUpPreexistingData();
-  const auto directory_path = data.RandomDirectoryPath(rng_);
-  const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt");
-  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(blob_path));
-  ASSERT_OK(output->Write(std::string_view("hello")));
-  ASSERT_OK(output->Close());
-  AssertFileInfo(fs(), blob_path, FileType::File);
-  ASSERT_OK(fs()->DeleteDir(directory_path));
-  AssertFileInfo(fs(), blob_path, FileType::NotFound);
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessHaveDirectory) {
+  this->TestDeleteDirSuccessHaveDirectory();
 }
 
-TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveDirectory) {
-  auto data = SetUpPreexistingData();
-  const auto parent = data.RandomDirectoryPath(rng_);
-  const auto path = ConcatAbstractPath(parent, "new-sub");
-  ASSERT_OK(fs()->CreateDir(path, true));
-  AssertFileInfo(fs(), path, FileType::Directory);
-  AssertFileInfo(fs(), parent, FileType::Directory);
-  ASSERT_OK(fs()->DeleteDir(parent));
-  AssertFileInfo(fs(), path, FileType::NotFound);
-  AssertFileInfo(fs(), parent, FileType::NotFound);
-}
-
-TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessExist) {
-  auto preexisting_data = SetUpPreexistingData();
-  HierarchicalPaths paths;
-  CreateHierarchicalData(&paths);
-  ASSERT_OK(fs()->DeleteDirContents(paths.directory));
-  AssertFileInfo(fs(), paths.directory, FileType::Directory);
-  for (const auto& sub_path : paths.sub_paths) {
-    AssertFileInfo(fs(), sub_path, FileType::NotFound);
-  }
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsSuccessExist) {
+  this->TestDeleteDirContentsSuccessExist();
 }
 
-TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessNonexistent) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsSuccessNonexistent) {
   this->TestDeleteDirContentsSuccessNonexistent();
 }
 
-TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsFailureNonexistent) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsFailureNonexistent) {
   this->TestDeleteDirContentsFailureNonexistent();
 }
 
 // Tests using Azurite (the local Azure emulator)
 
-TEST_F(TestAzuriteFileSystem, GetFileInfoAccount) {
-  AssertFileInfo(fs(), "", FileType::Directory);
-
-  // URI
-  ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://"));
-}
-
-TEST_F(TestAzuriteFileSystem, GetFileInfoContainer) {
-  auto data = SetUpPreexistingData();
-  AssertFileInfo(fs(), data.container_name, FileType::Directory);
-
-  AssertFileInfo(fs(), "nonexistent-container", FileType::NotFound);
-
-  // URI
-  ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + data.container_name));
-}
-
 TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) {
   SetUpSmallFileSystemTree();
 
@@ -1141,16 +1267,6 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorExplicitImplicitDirDedup) {
   AssertFileInfo(infos[0], "container/mydir/nonemptydir2/somefile", FileType::File);
 }
 
-TEST_F(TestAzuriteFileSystem, CreateDirFailureNoContainer) {
-  ASSERT_RAISES(Invalid, fs()->CreateDir("", false));
-}
-
-TEST_F(TestAzuriteFileSystem, CreateDirSuccessContainerOnly) {
-  auto container_name = PreexistingData::RandomContainerName(rng_);
-  ASSERT_OK(fs()->CreateDir(container_name, false));
-  AssertFileInfo(fs(), container_name, FileType::Directory);
-}
-
 TEST_F(TestAzuriteFileSystem, CreateDirFailureDirectoryWithMissingContainer) {
   const auto path = std::string("not-a-container/new-directory");
   ASSERT_RAISES(IOError, fs()->CreateDir(path, false));
@@ -1175,19 +1291,20 @@ TEST_F(TestAzuriteFileSystem, DeleteDirSuccessContainer) {
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirSuccessNonexistent) {
+  if (HasSubmitBatchBug()) {
+    GTEST_SKIP() << kSubmitBatchBugMessage;
+  }
   auto data = SetUpPreexistingData();
   const auto directory_path = data.RandomDirectoryPath(rng_);
-  // There is only virtual directory without hierarchical namespace
-  // support. So the DeleteDir() for nonexistent directory does nothing.
-  ASSERT_OK(fs()->DeleteDir(directory_path));
+  // DeleteDir() fails if the directory doesn't exist.
+  ASSERT_RAISES(IOError, fs()->DeleteDir(directory_path));
   AssertFileInfo(fs(), directory_path, FileType::NotFound);
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirSuccessHaveBlobs) {
-#ifdef __APPLE__
-  GTEST_SKIP() << "This test fails by an Azurite problem: "
-                  "https://github.com/Azure/Azurite/pull/2302";
-#endif
+  if (HasSubmitBatchBug()) {
+    GTEST_SKIP() << kSubmitBatchBugMessage;
+  }
   auto data = SetUpPreexistingData();
   const auto directory_path = data.RandomDirectoryPath(rng_);
   // We must use 257 or more blobs here to test pagination of ListBlobs().
@@ -1213,10 +1330,9 @@ TEST_F(TestAzuriteFileSystem, DeleteDirUri) {
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) {
-#ifdef __APPLE__
-  GTEST_SKIP() << "This test fails by an Azurite problem: "
-                  "https://github.com/Azure/Azurite/pull/2302";
-#endif
+  if (HasSubmitBatchBug()) {
+    GTEST_SKIP() << kSubmitBatchBugMessage;
+  }
   auto data = SetUpPreexistingData();
   HierarchicalPaths paths;
   CreateHierarchicalData(&paths);
@@ -1229,16 +1345,14 @@ TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) {
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessDirectory) {
-#ifdef __APPLE__
-  GTEST_SKIP() << "This test fails by an Azurite problem: "
-                  "https://github.com/Azure/Azurite/pull/2302";
-#endif
+  if (HasSubmitBatchBug()) {
+    GTEST_SKIP() << kSubmitBatchBugMessage;
+  }
   auto data = SetUpPreexistingData();
   HierarchicalPaths paths;
   CreateHierarchicalData(&paths);
   ASSERT_OK(fs()->DeleteDirContents(paths.directory));
-  // GH-38772: We may change this to FileType::Directory.
-  AssertFileInfo(fs(), paths.directory, FileType::NotFound);
+  AssertFileInfo(fs(), paths.directory, FileType::Directory);
   for (const auto& sub_path : paths.sub_paths) {
     AssertFileInfo(fs(), sub_path, FileType::NotFound);
   }

From 01deb9438acde11f1968acd2a0bb5d3e8e4a4cc6 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Fri, 5 Jan 2024 23:44:52 +0800
Subject: [PATCH 136/570] GH-39419: [C++][Parquet] Style: Using arrow::Buffer
 data_as api rather than reinterpret_cast (#39420)

### Rationale for this change

This patch using `{mutable}_data_as<T>()` api to replace `interpret_cast<{const} T*>`. It's just a style fixing.

### What changes are included in this PR?

Just api replacement for `::arrow::Buffer`

* `reinterpret_cast<T*>` -> `mutable_data_as<T>()`
* `reinterpret_cast<const T*>` -> `data_as<T>()`

Also, for `auto {variable_name} = reinterpret_cast<{mutable} T*>( ... )`, I changed it to:
1. `const auto*` for `data_as<T>()`.
2. `auto*` for `mutable_data_as<T>()`

This didn't change the syntax, but make it more readable.

### Are these changes tested?

No need

### Are there any user-facing changes?

no

* Closes: #39419
*

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/parquet/encoding.cc | 74 +++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 41 deletions(-)

diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 840efa12cc3c1..b07ad6c9fb062 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -125,7 +125,7 @@ class PlainEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
     if (valid_bits != NULLPTR) {
       PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
                                                                    this->memory_pool()));
-      T* data = reinterpret_cast<T*>(buffer->mutable_data());
+      T* data = buffer->template mutable_data_as<T>();
       int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
           src, num_values, valid_bits, valid_bits_offset, data);
       Put(data, num_valid_values);
@@ -323,7 +323,7 @@ class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEnco
     if (valid_bits != NULLPTR) {
       PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
                                                                    this->memory_pool()));
-      T* data = reinterpret_cast<T*>(buffer->mutable_data());
+      T* data = buffer->mutable_data_as<T>();
       int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
           src, num_values, valid_bits, valid_bits_offset, data);
       Put(data, num_valid_values);
@@ -882,7 +882,7 @@ void ByteStreamSplitEncoder<DType>::PutSpaced(const T* src, int num_values,
   if (valid_bits != NULLPTR) {
     PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
                                                                  this->memory_pool()));
-    T* data = reinterpret_cast<T*>(buffer->mutable_data());
+    T* data = buffer->template mutable_data_as<T>();
     int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
         src, num_values, valid_bits, valid_bits_offset, data);
     Put(data, num_valid_values);
@@ -1080,7 +1080,7 @@ inline int DecodePlain<FixedLenByteArray>(const uint8_t* data, int64_t data_size
     ParquetException::EofException();
   }
   for (int i = 0; i < num_values; ++i) {
-    out[i].ptr = data + i * type_length;
+    out[i].ptr = data + i * static_cast<int64_t>(type_length);
   }
   return static_cast<int>(bytes_to_decode);
 }
@@ -1537,9 +1537,8 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
 
   int Decode(T* buffer, int num_values) override {
     num_values = std::min(num_values, num_values_);
-    int decoded_values =
-        idx_decoder_.GetBatchWithDict(reinterpret_cast<const T*>(dictionary_->data()),
-                                      dictionary_length_, buffer, num_values);
+    int decoded_values = idx_decoder_.GetBatchWithDict(
+        dictionary_->data_as<T>(), dictionary_length_, buffer, num_values);
     if (decoded_values != num_values) {
       ParquetException::EofException();
     }
@@ -1551,9 +1550,8 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
                    int64_t valid_bits_offset) override {
     num_values = std::min(num_values, num_values_);
     if (num_values != idx_decoder_.GetBatchWithDictSpaced(
-                          reinterpret_cast<const T*>(dictionary_->data()),
-                          dictionary_length_, buffer, num_values, null_count, valid_bits,
-                          valid_bits_offset)) {
+                          dictionary_->data_as<T>(), dictionary_length_, buffer,
+                          num_values, null_count, valid_bits, valid_bits_offset)) {
       ParquetException::EofException();
     }
     num_values_ -= num_values;
@@ -1580,8 +1578,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
           num_values, /*shrink_to_fit=*/false));
     }
 
-    auto indices_buffer =
-        reinterpret_cast<int32_t*>(indices_scratch_space_->mutable_data());
+    auto indices_buffer = indices_scratch_space_->mutable_data_as<int32_t>();
 
     if (num_values != idx_decoder_.GetBatchSpaced(num_values, null_count, valid_bits,
                                                   valid_bits_offset, indices_buffer)) {
@@ -1611,8 +1608,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
       PARQUET_THROW_NOT_OK(indices_scratch_space_->TypedResize<int32_t>(
           num_values, /*shrink_to_fit=*/false));
     }
-    auto indices_buffer =
-        reinterpret_cast<int32_t*>(indices_scratch_space_->mutable_data());
+    auto indices_buffer = indices_scratch_space_->mutable_data_as<int32_t>();
     if (num_values != idx_decoder_.GetBatch(indices_buffer, num_values)) {
       ParquetException::EofException();
     }
@@ -1632,7 +1628,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
 
   void GetDictionary(const T** dictionary, int32_t* dictionary_length) override {
     *dictionary_length = dictionary_length_;
-    *dictionary = reinterpret_cast<T*>(dictionary_->mutable_data());
+    *dictionary = dictionary_->mutable_data_as<T>();
   }
 
  protected:
@@ -1647,8 +1643,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
     dictionary_length_ = static_cast<int32_t>(dictionary->values_left());
     PARQUET_THROW_NOT_OK(dictionary_->Resize(dictionary_length_ * sizeof(T),
                                              /*shrink_to_fit=*/false));
-    dictionary->Decode(reinterpret_cast<T*>(dictionary_->mutable_data()),
-                       dictionary_length_);
+    dictionary->Decode(dictionary_->mutable_data_as<T>(), dictionary_length_);
   }
 
   // Only one is set.
@@ -1688,7 +1683,7 @@ template <>
 void DictDecoderImpl<ByteArrayType>::SetDict(TypedDecoder<ByteArrayType>* dictionary) {
   DecodeDict(dictionary);
 
-  auto dict_values = reinterpret_cast<ByteArray*>(dictionary_->mutable_data());
+  auto* dict_values = dictionary_->mutable_data_as<ByteArray>();
 
   int total_size = 0;
   for (int i = 0; i < dictionary_length_; ++i) {
@@ -1702,8 +1697,7 @@ void DictDecoderImpl<ByteArrayType>::SetDict(TypedDecoder<ByteArrayType>* dictio
 
   int32_t offset = 0;
   uint8_t* bytes_data = byte_array_data_->mutable_data();
-  int32_t* bytes_offsets =
-      reinterpret_cast<int32_t*>(byte_array_offsets_->mutable_data());
+  int32_t* bytes_offsets = byte_array_offsets_->mutable_data_as<int32_t>();
   for (int i = 0; i < dictionary_length_; ++i) {
     memcpy(bytes_data + offset, dict_values[i].ptr, dict_values[i].len);
     bytes_offsets[i] = offset;
@@ -1717,7 +1711,7 @@ template <>
 inline void DictDecoderImpl<FLBAType>::SetDict(TypedDecoder<FLBAType>* dictionary) {
   DecodeDict(dictionary);
 
-  auto dict_values = reinterpret_cast<FLBA*>(dictionary_->mutable_data());
+  auto* dict_values = dictionary_->mutable_data_as<FLBA>();
 
   int fixed_len = descr_->type_length();
   int total_size = dictionary_length_ * fixed_len;
@@ -1765,7 +1759,7 @@ int DictDecoderImpl<DType>::DecodeArrow(
     typename EncodingTraits<DType>::DictAccumulator* builder) {
   PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
 
-  auto dict_values = reinterpret_cast<const typename DType::c_type*>(dictionary_->data());
+  const auto* dict_values = dictionary_->data_as<typename DType::c_type>();
 
   VisitNullBitmapInline(
       valid_bits, valid_bits_offset, num_values, null_count,
@@ -1801,7 +1795,7 @@ inline int DictDecoderImpl<FLBAType>::DecodeArrow(
 
   PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
 
-  auto dict_values = reinterpret_cast<const FLBA*>(dictionary_->data());
+  const auto* dict_values = dictionary_->data_as<FLBA>();
 
   VisitNullBitmapInline(
       valid_bits, valid_bits_offset, num_values, null_count,
@@ -1834,7 +1828,7 @@ int DictDecoderImpl<FLBAType>::DecodeArrow(
 
   PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
 
-  auto dict_values = reinterpret_cast<const FLBA*>(dictionary_->data());
+  const auto* dict_values = dictionary_->data_as<FLBA>();
 
   VisitNullBitmapInline(
       valid_bits, valid_bits_offset, num_values, null_count,
@@ -1858,7 +1852,7 @@ int DictDecoderImpl<Type>::DecodeArrow(
   PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
 
   using value_type = typename Type::c_type;
-  auto dict_values = reinterpret_cast<const value_type*>(dictionary_->data());
+  const auto* dict_values = dictionary_->data_as<value_type>();
 
   VisitNullBitmapInline(
       valid_bits, valid_bits_offset, num_values, null_count,
@@ -1936,7 +1930,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType>,
     // space for binary data.
     RETURN_NOT_OK(helper.Prepare());
 
-    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+    const auto* dict_values = dictionary_->data_as<ByteArray>();
     int values_decoded = 0;
     int num_indices = 0;
     int pos_indices = 0;
@@ -2007,7 +2001,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType>,
     // space for binary data.
     RETURN_NOT_OK(helper.Prepare());
 
-    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+    const auto* dict_values = dictionary_->data_as<ByteArray>();
 
     while (values_decoded < num_values) {
       const int32_t batch_size =
@@ -2037,7 +2031,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType>,
     RETURN_NOT_OK(builder->Reserve(num_values));
     ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values);
 
-    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+    const auto* dict_values = dictionary_->data_as<ByteArray>();
 
     int values_decoded = 0;
     int num_appended = 0;
@@ -2090,7 +2084,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType>,
 
     RETURN_NOT_OK(builder->Reserve(num_values));
 
-    auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
+    const auto* dict_values = dictionary_->data_as<ByteArray>();
 
     int values_decoded = 0;
     while (values_decoded < num_values) {
@@ -2388,7 +2382,7 @@ void DeltaBitPackEncoder<DType>::PutSpaced(const T* src, int num_values,
   if (valid_bits != NULLPTR) {
     PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
                                                                  this->memory_pool()));
-    T* data = reinterpret_cast<T*>(buffer->mutable_data());
+    T* data = buffer->template mutable_data_as<T>();
     int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
         src, num_values, valid_bits, valid_bits_offset, data);
     Put(data, num_valid_values);
@@ -2734,7 +2728,7 @@ void DeltaLengthByteArrayEncoder<DType>::PutSpaced(const T* src, int num_values,
   if (valid_bits != NULLPTR) {
     PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
                                                                  this->memory_pool()));
-    T* data = reinterpret_cast<T*>(buffer->mutable_data());
+    T* data = buffer->template mutable_data_as<T>();
     int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
         src, num_values, valid_bits, valid_bits_offset, data);
     Put(data, num_valid_values);
@@ -2789,8 +2783,7 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
     }
 
     int32_t data_size = 0;
-    const int32_t* length_ptr =
-        reinterpret_cast<const int32_t*>(buffered_length_->data()) + length_idx_;
+    const int32_t* length_ptr = buffered_length_->data_as<int32_t>() + length_idx_;
     int bytes_offset = len_ - decoder_->bytes_left();
     for (int i = 0; i < max_values; ++i) {
       int32_t len = length_ptr[i];
@@ -2844,8 +2837,8 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
 
     // call len_decoder_.Decode to decode all the lengths.
     // all the lengths are buffered in buffered_length_.
-    int ret = len_decoder_.Decode(
-        reinterpret_cast<int32_t*>(buffered_length_->mutable_data()), num_length);
+    int ret =
+        len_decoder_.Decode(buffered_length_->mutable_data_as<int32_t>(), num_length);
     DCHECK_EQ(ret, num_length);
     length_idx_ = 0;
     num_valid_values_ = num_length;
@@ -2938,7 +2931,7 @@ class RleBooleanEncoder final : public EncoderImpl, virtual public BooleanEncode
     if (valid_bits != NULLPTR) {
       PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
                                                                    this->memory_pool()));
-      T* data = reinterpret_cast<T*>(buffer->mutable_data());
+      T* data = buffer->mutable_data_as<T>();
       int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
           src, num_values, valid_bits, valid_bits_offset, data);
       Put(data, num_valid_values);
@@ -3136,7 +3129,7 @@ class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder<DT
       } else {
         PARQUET_THROW_NOT_OK(buffer_->Resize(num_values * sizeof(T), false));
       }
-      T* data = reinterpret_cast<T*>(buffer_->mutable_data());
+      T* data = buffer_->mutable_data_as<T>();
       int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
           src, num_values, valid_bits, valid_bits_offset, data);
       Put(data, num_valid_values);
@@ -3338,7 +3331,7 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecode
     // all the prefix lengths are buffered in buffered_prefix_length_.
     PARQUET_THROW_NOT_OK(buffered_prefix_length_->Resize(num_prefix * sizeof(int32_t)));
     int ret = prefix_len_decoder_.Decode(
-        reinterpret_cast<int32_t*>(buffered_prefix_length_->mutable_data()), num_prefix);
+        buffered_prefix_length_->mutable_data_as<int32_t>(), num_prefix);
     DCHECK_EQ(ret, num_prefix);
     prefix_len_offset_ = 0;
     num_valid_values_ = num_prefix;
@@ -3425,8 +3418,7 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecode
 
     int64_t data_size = 0;
     const int32_t* prefix_len_ptr =
-        reinterpret_cast<const int32_t*>(buffered_prefix_length_->data()) +
-        prefix_len_offset_;
+        buffered_prefix_length_->data_as<int32_t>() + prefix_len_offset_;
     for (int i = 0; i < max_values; ++i) {
       if (prefix_len_ptr[i] == 0) {
         // We don't need to copy the suffix if the prefix length is 0.
@@ -3578,7 +3570,7 @@ class ByteStreamSplitDecoder : public DecoderImpl, virtual public TypedDecoder<D
     if (!decode_buffer_ || decode_buffer_->size() < size) {
       PARQUET_ASSIGN_OR_THROW(decode_buffer_, ::arrow::AllocateBuffer(size));
     }
-    return reinterpret_cast<T*>(decode_buffer_->mutable_data());
+    return decode_buffer_->mutable_data_as<T>();
   }
 
  private:

From b736c99cea9e6b86475e8f2ce264ede3262a237c Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Fri, 5 Jan 2024 12:54:42 -0500
Subject: [PATCH 137/570] GH-39468: [Java] Fix site build for docs (#39471)

### Rationale for this change

Pin plugins we use for docs build.

### Are there any user-facing changes?

No.
* Closes: #39468

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/bom/pom.xml         | 30 ++++++++++++++++++++++++++++++
 java/maven/pom.xml       | 37 +++++++++++++++++++++++++++++++++++++
 java/performance/pom.xml |  4 ----
 java/pom.xml             | 30 ++++++++++++++++++++++++++++++
 4 files changed, 97 insertions(+), 4 deletions(-)

diff --git a/java/bom/pom.xml b/java/bom/pom.xml
index 1f6f854f60013..5c2ed33dadddf 100644
--- a/java/bom/pom.xml
+++ b/java/bom/pom.xml
@@ -145,4 +145,34 @@
     </dependencies>
 
   </dependencyManagement>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-project-info-reports-plugin</artifactId>
+        <version>3.5.0</version>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-site-plugin</artifactId>
+        <version>3.7.1</version>
+      </plugin>
+    </plugins>
+  </build>
+
+  <reporting>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-project-info-reports-plugin</artifactId>
+        <version>3.5.0</version>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-site-plugin</artifactId>
+        <version>3.7.1</version>
+      </plugin>
+    </plugins>
+  </reporting>
 </project>
diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 0923984c8e5e5..56f3c4c434f64 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -302,7 +302,44 @@
           </descriptorRefs>
         </configuration>
       </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-project-info-reports-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-site-plugin</artifactId>
+      </plugin>
     </plugins>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-project-info-reports-plugin</artifactId>
+          <version>3.5.0</version>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-site-plugin</artifactId>
+          <version>3.7.1</version>
+        </plugin>
+      </plugins>
+    </pluginManagement>
   </build>
 
+  <reporting>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-project-info-reports-plugin</artifactId>
+        <version>3.0.0</version>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-site-plugin</artifactId>
+        <version>3.7.1</version>
+      </plugin>
+    </plugins>
+  </reporting>
 </project>
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index 4d449af46b6b1..13300c2ac834f 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -201,10 +201,6 @@
                     <artifactId>maven-resources-plugin</artifactId>
                     <version>3.3.1</version>
                 </plugin>
-                <plugin>
-                    <artifactId>maven-site-plugin</artifactId>
-                    <version>3.3</version>
-                </plugin>
                 <plugin>
                     <artifactId>maven-source-plugin</artifactId>
                     <version>2.2.1</version>
diff --git a/java/pom.xml b/java/pom.xml
index fae072018eb19..6b7192fd33efc 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -390,6 +390,16 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-project-info-reports-plugin</artifactId>
+        <version>3.0.0</version>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-site-plugin</artifactId>
+        <version>3.7.1</version>
+      </plugin>
     </plugins>
 
     <pluginManagement>
@@ -572,6 +582,16 @@
           <artifactId>module-info-compiler-maven-plugin</artifactId>
           <version>${project.version}</version>
         </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-project-info-reports-plugin</artifactId>
+          <version>3.0.0</version>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-site-plugin</artifactId>
+          <version>3.7.1</version>
+        </plugin>
       </plugins>
     </pluginManagement>
   </build>
@@ -757,6 +777,16 @@
           </reportSet>
         </reportSets>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-project-info-reports-plugin</artifactId>
+        <version>3.0.0</version>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-site-plugin</artifactId>
+        <version>3.7.1</version>
+      </plugin>
     </plugins>
   </reporting>
 

From 38af25808e7826fb64265a78b2ed36b3882499f9 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@apache.org>
Date: Fri, 5 Jan 2024 17:57:06 +0000
Subject: [PATCH 138/570] GH-39048: [JS] Re-export existing type enums (#39473)

This way we don't have to manually maintain the enums even though they
should be pretty much constants.
* Closes: #39048
---
 js/src/enum.ts | 120 +++----------------------------------------------
 1 file changed, 7 insertions(+), 113 deletions(-)

diff --git a/js/src/enum.ts b/js/src/enum.ts
index 0eecc0c68b525..e4284e42774ad 100644
--- a/js/src/enum.ts
+++ b/js/src/enum.ts
@@ -15,119 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-////
-//
-// A few enums copied from `fb/Schema.ts` and `fb/Message.ts` because Webpack
-// v4 doesn't seem to be able to tree-shake the rest of those exports.
-//
-// We will have to keep these enums in sync when we re-generate the flatbuffers
-// code from the schemas. See js/DEVELOP.md for info on how to run flatbuffers
-// code generation.
-//
-////
-
-/**
- * Logical types, vector layouts, and schemas
- *
- * @enum {number}
- */
-export enum MetadataVersion {
-    /**
-     * 0.1.0 (October 2016).
-     */
-    V1 = 0,
-
-    /**
-     * 0.2.0 (February 2017). Non-backwards compatible with V1.
-     */
-    V2 = 1,
-
-    /**
-     * 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
-     */
-    V3 = 2,
-
-    /**
-     * >= 0.8.0 (December 2017). Non-backwards compatible with V3.
-     */
-    V4 = 3,
-
-    /**
-     * >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
-     * metadata and IPC messages). Implementations are recommended to provide a
-     * V4 compatibility mode with V5 format changes disabled.
-     *
-     * Incompatible changes between V4 and V5:
-     * - Union buffer layout has changed. In V5, Unions don't have a validity
-     *   bitmap buffer.
-     */
-    V5 = 4
-}
-
-/**
- * @enum {number}
- */
-export enum UnionMode {
-    Sparse = 0,
-    Dense = 1
-}
-
-/**
- * @enum {number}
- */
-export enum Precision {
-    HALF = 0,
-    SINGLE = 1,
-    DOUBLE = 2
-}
-
-/**
- * @enum {number}
- */
-export enum DateUnit {
-    DAY = 0,
-    MILLISECOND = 1
-}
-
-/**
- * @enum {number}
- */
-export enum TimeUnit {
-    SECOND = 0,
-    MILLISECOND = 1,
-    MICROSECOND = 2,
-    NANOSECOND = 3
-}
-
-/**
- * @enum {number}
- */
-export enum IntervalUnit {
-    YEAR_MONTH = 0,
-    DAY_TIME = 1,
-    MONTH_DAY_NANO = 2
-}
-
-/**
- * ----------------------------------------------------------------------
- * The root Message type
- * This union enables us to easily send different message types without
- * redundant storage, and in the future we can easily add new message types.
- *
- * Arrow implementations do not need to implement all of the message types,
- * which may include experimental metadata types. For maximum compatibility,
- * it is best to send data using RecordBatch
- *
- * @enum {number}
- */
-export enum MessageHeader {
-    NONE = 0,
-    Schema = 1,
-    DictionaryBatch = 2,
-    RecordBatch = 3,
-    Tensor = 4,
-    SparseTensor = 5
-}
+export { MetadataVersion } from './fb/metadata-version.js';
+export { UnionMode } from './fb/union-mode.js';
+export { Precision } from './fb/precision.js';
+export { DateUnit } from './fb/date-unit.js';
+export { TimeUnit } from './fb/time-unit.js';
+export { IntervalUnit } from './fb/interval-unit.js';
+export { MessageHeader } from './fb/message-header.js';
 
 /**
  * Main data type enumeration.

From 9b931af14e5a710cba0aaa6b899e2ca696bfd785 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@apache.org>
Date: Fri, 5 Jan 2024 19:35:36 +0000
Subject: [PATCH 139/570] GH-39477: [JS] remove esModuleInterop (#39478)

* Closes: #39477

Also removes a dependency for pad left since it's built in now:
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/padStart.
---
 js/package.json                               |  5 +-
 js/src/bin/arrow2csv.ts                       |  9 ++-
 js/test/generate-test-data.ts                 |  5 +-
 js/test/random-string.ts                      | 43 ++++++++++++
 js/test/unit/builders/utils.ts                |  5 +-
 js/test/unit/ipc/helpers.ts                   |  8 +--
 js/test/unit/ipc/reader/streams-node-tests.ts |  3 -
 js/test/unit/utils.ts                         |  2 +-
 js/tsconfig.json                              |  1 -
 js/tsconfig/tsconfig.base.json                |  1 -
 js/yarn.lock                                  | 68 +++++++++----------
 11 files changed, 89 insertions(+), 61 deletions(-)
 create mode 100644 js/test/random-string.ts

diff --git a/js/package.json b/js/package.json
index d72fdd3177016..eb24947ce78b8 100644
--- a/js/package.json
+++ b/js/package.json
@@ -56,12 +56,10 @@
     "@types/command-line-args": "^5.2.1",
     "@types/command-line-usage": "^5.0.2",
     "@types/node": "^20.6.0",
-    "@types/pad-left": "^2.1.1",
     "command-line-args": "^5.2.1",
     "command-line-usage": "^7.0.1",
     "flatbuffers": "^23.5.26",
     "json-bignum": "^0.0.3",
-    "pad-left": "^2.1.0",
     "tslib": "^2.6.2"
   },
   "devDependencies": {
@@ -73,7 +71,7 @@
     "@types/benchmark": "2.1.4",
     "@types/glob": "8.1.0",
     "@types/jest": "29.5.3",
-    "@types/randomatic": "3.1.3",
+    "@types/multistream": "4.1.3",
     "@typescript-eslint/eslint-plugin": "5.62.0",
     "@typescript-eslint/parser": "5.59.9",
     "async-done": "2.0.0",
@@ -104,7 +102,6 @@
     "memfs": "4.5.0",
     "mkdirp": "3.0.1",
     "multistream": "4.1.0",
-    "randomatic": "3.1.1",
     "regenerator-runtime": "0.14.0",
     "rollup": "4.3.0",
     "rxjs": "7.8.1",
diff --git a/js/src/bin/arrow2csv.ts b/js/src/bin/arrow2csv.ts
index 39db8c17497cd..4115f30099f03 100755
--- a/js/src/bin/arrow2csv.ts
+++ b/js/src/bin/arrow2csv.ts
@@ -23,9 +23,8 @@ import * as fs from 'fs';
 import * as stream from 'stream';
 import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue, util } from '../Arrow.js';
 
-import commandLineUsage from 'command-line-usage';
-import commandLineArgs from 'command-line-args';
-import padLeft from 'pad-left';
+import * as commandLineUsage from 'command-line-usage';
+import * as commandLineArgs from 'command-line-args';
 // @ts-ignore
 import { parse as bignumJSONParse } from 'json-bignum';
 
@@ -190,11 +189,11 @@ function batchesToString(state: ToStringState, schema: Schema) {
 }
 
 function horizontalRule(maxColWidths: number[], hr = '', sep = ' | ') {
-    return ` ${padLeft('', maxColWidths.reduce((x, y) => x + y, -2 + maxColWidths.length * sep.length), hr)}`;
+    return ` ${''.padStart(maxColWidths.reduce((x, y) => x + y, -2 + maxColWidths.length * sep.length), hr)}`;
 }
 
 function formatRow(row: string[] = [], maxColWidths: number[] = [], sep = ' | ') {
-    return `${row.map((x, j) => padLeft(x, maxColWidths[j])).join(sep)}`;
+    return row.map((x, j) => x.padStart(maxColWidths[j])).join(sep);
 }
 
 function formatMetadataValue(value = '') {
diff --git a/js/test/generate-test-data.ts b/js/test/generate-test-data.ts
index be248ad2c6ed8..8e6e47de836eb 100644
--- a/js/test/generate-test-data.ts
+++ b/js/test/generate-test-data.ts
@@ -15,8 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import randomatic from 'randomatic';
-
 import {
     makeData, Vector, Visitor, DataType, TypeMap,
     Table, Schema, Field, RecordBatch,
@@ -43,6 +41,8 @@ import {
     util
 } from 'apache-arrow';
 
+import { randomString } from './random-string.js';
+
 type TKeys = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32;
 
 interface TestDataVectorGenerator extends Visitor {
@@ -650,7 +650,6 @@ type TypedArrayConstructor =
 
 const rand = Math.random.bind(Math);
 const randomBytes = (length: number) => fillRandom(Uint8Array, length);
-const randomString = (length: number) => randomatic('?', length, { chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
 
 const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))();
 
diff --git a/js/test/random-string.ts b/js/test/random-string.ts
new file mode 100644
index 0000000000000..a70af451d4220
--- /dev/null
+++ b/js/test/random-string.ts
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+export const LOWER = 'abcdefghijklmnopqrstuvwxyz';
+export const UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
+export const NUMBER = '0123456789';
+export const SPECIAL = '~!@#$%^&()_+-={}[];\',.';
+
+export const ALL = LOWER + UPPER + NUMBER + SPECIAL;
+
+/**
+ * Generate random string of specified `length` for the given `pattern`.
+ *
+ * @param `pattern` The pattern to use for generating the random string.
+ * @param `length` The length of the string to generate.
+ * @param `options`
+ */
+export function randomString(length: number, characters: string = `${LOWER + NUMBER}_`) {
+    let result = '';
+
+    while (length--) {
+        result += characters.charAt(Math.floor(Math.random() * characters.length));
+    }
+    return result;
+}
+
+
+10;
+
diff --git a/js/test/unit/builders/utils.ts b/js/test/unit/builders/utils.ts
index fbd8eb49eee7e..db4e80d002778 100644
--- a/js/test/unit/builders/utils.ts
+++ b/js/test/unit/builders/utils.ts
@@ -20,9 +20,9 @@ import 'web-streams-polyfill';
 import { from, fromDOMStream, toArray } from 'ix/asynciterable';
 import { fromNodeStream } from 'ix/asynciterable/fromnodestream';
 import 'ix/Ix.node';
-import randstr from 'randomatic';
 
 import '../../jest-extensions.js';
+import { randomString } from '../../random-string.js';
 
 import { Builder, makeBuilder, builderThroughIterable, DataType, util, Vector } from 'apache-arrow';
 
@@ -30,9 +30,6 @@ const rand = Math.random.bind(Math);
 const randnulls = <T, TNull = null>(values: T[], n: TNull = <any>null) => values.map((x) => Math.random() > 0.25 ? x : n) as (T | TNull)[];
 
 export const randomBytes = (length: number) => fillRandom(Uint8Array, length);
-export const randomString = ((opts) => (length: number) =>
-    randstr('?', length, opts)
-)({ chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
 
 export const stringsNoNulls = (length = 20) => Array.from({ length }, (_) => randomString(1 + (Math.trunc(Math.random() * 19))));
 export const timestamp32sNoNulls = (length = 20, now = Math.trunc(Date.now() / 86400000)) =>
diff --git a/js/test/unit/ipc/helpers.ts b/js/test/unit/ipc/helpers.ts
index a09cd01799a5f..2a228aa7abf18 100644
--- a/js/test/unit/ipc/helpers.ts
+++ b/js/test/unit/ipc/helpers.ts
@@ -15,11 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import '../../jest-extensions.js';
-
 import * as fs from 'fs';
 import { fs as memfs } from 'memfs';
-import randomatic from 'randomatic';
 import { PassThrough, Readable } from 'stream';
 
 import {
@@ -30,6 +27,9 @@ import {
     Table
 } from 'apache-arrow';
 
+import '../../jest-extensions.js';
+import { LOWER, NUMBER, randomString } from '../../random-string.js';
+
 export abstract class ArrowIOTestHelper {
 
     constructor(public table: Table) { }
@@ -40,7 +40,7 @@ export abstract class ArrowIOTestHelper {
 
     protected abstract writer(table: Table): RecordBatchWriter;
     protected async filepath(table: Table): Promise<fs.PathLike> {
-        const path = `/${randomatic('a0', 20)}.arrow`;
+        const path = `/${randomString(20, LOWER + NUMBER)}.arrow`;
         const data = await this.writer(table).toUint8Array();
         await memfs.promises.writeFile(path, data);
         return path;
diff --git a/js/test/unit/ipc/reader/streams-node-tests.ts b/js/test/unit/ipc/reader/streams-node-tests.ts
index 24dd92fb5712a..2e3f08c4e7837 100644
--- a/js/test/unit/ipc/reader/streams-node-tests.ts
+++ b/js/test/unit/ipc/reader/streams-node-tests.ts
@@ -100,7 +100,6 @@ import {
     }
 
     it('readAll() should pipe to separate NodeJS WritableStreams', async () => {
-        // @ts-ignore
         const { default: MultiStream } = await import('multistream');
         const { PassThrough } = await import('stream');
 
@@ -138,7 +137,6 @@ import {
     });
 
     it('should not close the underlying NodeJS ReadableStream when reading multiple tables to completion', async () => {
-        // @ts-ignore
         const { default: MultiStream } = await import('multistream');
 
         expect.hasAssertions();
@@ -168,7 +166,6 @@ import {
     });
 
     it('should close the underlying NodeJS ReadableStream when reading multiple tables and we break early', async () => {
-        // @ts-ignore
         const { default: MultiStream } = await import('multistream');
 
         expect.hasAssertions();
diff --git a/js/test/unit/utils.ts b/js/test/unit/utils.ts
index c57de487f9edb..8f0a99c4a8616 100644
--- a/js/test/unit/utils.ts
+++ b/js/test/unit/utils.ts
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-export function arange<T extends { length: number; [n: number]: number }>(arr: T, n = arr.length) {
+export function arange<T extends { length: number;[n: number]: number }>(arr: T, n = arr.length) {
     for (let i = -1; ++i < n; arr[i] = i) { }
     return arr;
 }
diff --git a/js/tsconfig.json b/js/tsconfig.json
index abdd1815a0d98..96f457b50fb82 100644
--- a/js/tsconfig.json
+++ b/js/tsconfig.json
@@ -9,7 +9,6 @@
     "module": "ESNext",
     "isolatedModules": true,
     "noEmit": true,
-    "esModuleInterop": true,
     "baseUrl": "./",
     "rootDir": "./",
     "paths": {
diff --git a/js/tsconfig/tsconfig.base.json b/js/tsconfig/tsconfig.base.json
index 0d7fefd90949f..874ea9f52f0d8 100644
--- a/js/tsconfig/tsconfig.base.json
+++ b/js/tsconfig/tsconfig.base.json
@@ -12,7 +12,6 @@
     /* Basic stuff */
     "moduleResolution": "Node",
     "lib": ["DOM", "ESNext", "ESNext.AsyncIterable"],
-    "esModuleInterop": true,
 
     /* Control what is emitted */
     "declaration": true,
diff --git a/js/yarn.lock b/js/yarn.lock
index bf22cce197c6b..cef6357e02e44 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -65,6 +65,14 @@
     "@babel/highlight" "^7.22.13"
     chalk "^2.4.2"
 
+"@babel/code-frame@^7.22.5":
+  version "7.23.5"
+  resolved "https://registry.yarnpkg.com/@babel/code-frame/-/code-frame-7.23.5.tgz#9009b69a8c602293476ad598ff53e4562e15c244"
+  integrity sha512-CgH3s1a96LipHCmSUmYFPwY7MNx8C3avkq7i4Wl3cfa662ldtUe4VM1TPXX70pfmrlWTb6jLqTYrZyT2ZTJBgA==
+  dependencies:
+    "@babel/highlight" "^7.23.4"
+    chalk "^2.4.2"
+
 "@babel/compat-data@^7.22.9":
   version "7.23.2"
   resolved "https://registry.yarnpkg.com/@babel/compat-data/-/compat-data-7.23.2.tgz#6a12ced93455827037bfb5ed8492820d60fc32cc"
@@ -217,6 +225,15 @@
     chalk "^2.4.2"
     js-tokens "^4.0.0"
 
+"@babel/highlight@^7.23.4":
+  version "7.23.4"
+  resolved "https://registry.yarnpkg.com/@babel/highlight/-/highlight-7.23.4.tgz#edaadf4d8232e1a961432db785091207ead0621b"
+  integrity sha512-acGdbYSfp2WheJoJm/EBBBLh/ID8KDc64ISZ9DYtBmC8/Q204PZJLHyzeB5qMzJ5trcOkybd78M4x2KWsUq++A==
+  dependencies:
+    "@babel/helper-validator-identifier" "^7.22.20"
+    chalk "^2.4.2"
+    js-tokens "^4.0.0"
+
 "@babel/parser@^7.1.0", "@babel/parser@^7.14.7", "@babel/parser@^7.20.7", "@babel/parser@^7.22.16":
   version "7.22.16"
   resolved "https://registry.npmjs.org/@babel/parser/-/parser-7.22.16.tgz#180aead7f247305cce6551bea2720934e2fa2c95"
@@ -227,6 +244,11 @@
   resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.23.0.tgz#da950e622420bf96ca0d0f2909cdddac3acd8719"
   integrity sha512-vvPKKdMemU85V9WE/l5wZEmImpCtLqbnTvqDS2U1fJ96KrxoW7KrXhNsNCblQlg8Ck4b85yxdTyelsMUgFUXiw==
 
+"@babel/parser@^7.22.5":
+  version "7.23.6"
+  resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.23.6.tgz#ba1c9e512bda72a47e285ae42aff9d2a635a9e3b"
+  integrity sha512-Z2uID7YJ7oNvAI20O9X0bblw7Qqs8Q2hFy0R9tAfnfLkp5MW0UH9eUvnDSnFwKZ0AvgS1ucqR4KzvVHgnke1VQ==
+
 "@babel/plugin-syntax-async-generators@^7.8.4":
   version "7.8.4"
   resolved "https://registry.npmjs.org/@babel/plugin-syntax-async-generators/-/plugin-syntax-async-generators-7.8.4.tgz#a983fb1aeb2ec3f6ed042a210f640e90e786fe0d"
@@ -339,9 +361,9 @@
   resolved "https://registry.yarnpkg.com/@babel/template/-/template-7.22.5.tgz#0c8c4d944509875849bd0344ff0050756eefc6ec"
   integrity sha512-X7yV7eiwAxdj9k94NEylvbVHLiVG1nvzCV2EAowhxLTwODV1jl9UzZ48leOC0sH7OnuHrIkllaBgneUykIcZaw==
   dependencies:
-    "@babel/code-frame" "^7.22.13"
-    "@babel/parser" "^7.22.15"
-    "@babel/types" "^7.22.15"
+    "@babel/code-frame" "^7.22.5"
+    "@babel/parser" "^7.22.5"
+    "@babel/types" "^7.22.5"
 
 "@babel/traverse@^7.22.15", "@babel/traverse@^7.22.19":
   version "7.23.2"
@@ -1342,6 +1364,13 @@
   resolved "https://registry.npmjs.org/@types/minimist/-/minimist-1.2.2.tgz#ee771e2ba4b3dc5b372935d549fd9617bf345b8c"
   integrity sha512-jhuKLIRrhvCPLqwPcx6INqmKeiA5EWrsCOPhrlFSrbrmU4ZMPjj5Ul/oLCMDO98XRUIwVm78xICz4EPCektzeQ==
 
+"@types/multistream@4.1.3":
+  version "4.1.3"
+  resolved "https://registry.yarnpkg.com/@types/multistream/-/multistream-4.1.3.tgz#972e3666502128dc273ef15c86b2e533e373ece4"
+  integrity sha512-t57vmDEJOZuC0M3IrZYfCd9wolTcr3ZTCGk1iwHNosvgBX+7/SMvCGcR8wP9lidpelBZQ12crSuINOxkk0azPA==
+  dependencies:
+    "@types/node" "*"
+
 "@types/node@*", "@types/node@^20.6.0":
   version "20.8.10"
   resolved "https://registry.yarnpkg.com/@types/node/-/node-20.8.10.tgz#a5448b895c753ae929c26ce85cab557c6d4a365e"
@@ -1359,16 +1388,6 @@
   resolved "https://registry.npmjs.org/@types/normalize-package-data/-/normalize-package-data-2.4.1.tgz#d3357479a0fdfdd5907fe67e17e0a85c906e1301"
   integrity sha512-Gj7cI7z+98M282Tqmp2K5EIsoouUEzbBJhQQzDE3jSIRk6r9gsz0oUokqIUR4u1R3dMHo0pDHM7sNOHyhulypw==
 
-"@types/pad-left@^2.1.1":
-  version "2.1.3"
-  resolved "https://registry.yarnpkg.com/@types/pad-left/-/pad-left-2.1.3.tgz#f636e62154e95bf6660439c51fe828da918124b2"
-  integrity sha512-fayws3T8lGvIY3UEtqFHKSH6FS1Lepo6kd3ZTgdj8rsVIIwzr9MZJt1ZP9UGu+cdAZsJiG2d5iYxyhRXwtUB5A==
-
-"@types/randomatic@3.1.3":
-  version "3.1.3"
-  resolved "https://registry.npmjs.org/@types/randomatic/-/randomatic-3.1.3.tgz#5475c29e82cb8dab6c94e55e77306c8eedab2d1f"
-  integrity sha512-UlYMg/XxN+YMh6vAiB879yh2bhaTOU0DB1g4NGIhzlaiSf22rAVKIGTvH8HjCXu+wfFvjAWHuPG5waN4btEubw==
-
 "@types/resolve@1.20.2":
   version "1.20.2"
   resolved "https://registry.npmjs.org/@types/resolve/-/resolve-1.20.2.tgz#97d26e00cd4a0423b4af620abecf3e6f442b7975"
@@ -5136,11 +5155,6 @@ matchdep@^2.0.0:
     resolve "^1.4.0"
     stack-trace "0.0.10"
 
-math-random@^1.0.1:
-  version "1.0.4"
-  resolved "https://registry.npmjs.org/math-random/-/math-random-1.0.4.tgz#5dd6943c938548267016d4e34f057583080c514c"
-  integrity sha512-rUxjysqif/BZQH2yhd5Aaq7vXMSx9NdEsQcyA07uEzIvxgI7zIr33gGsh+RU0/XjmQpCW7RsVof1vlkvQVCK5A==
-
 memfs@4.5.0:
   version "4.5.0"
   resolved "https://registry.yarnpkg.com/memfs/-/memfs-4.5.0.tgz#03082709987760022275e0d3bc0f24545b7fe279"
@@ -5584,13 +5598,6 @@ p-try@^2.0.0:
   resolved "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz#cb2868540e313d61de58fafbe35ce9004d5540e6"
   integrity sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==
 
-pad-left@^2.1.0:
-  version "2.1.0"
-  resolved "https://registry.npmjs.org/pad-left/-/pad-left-2.1.0.tgz#16e6a3b2d44a8e138cb0838cc7cb403a4fc9e994"
-  integrity sha512-HJxs9K9AztdIQIAIa/OIazRAUW/L6B9hbQDxO4X07roW3eo9XqZc2ur9bn1StH9CnbbI9EgvejHQX7CBpCF1QA==
-  dependencies:
-    repeat-string "^1.5.4"
-
 parent-module@^1.0.0:
   version "1.0.1"
   resolved "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz#691d2709e78c79fae3a156622452d00762caaaa2"
@@ -5875,15 +5882,6 @@ quick-lru@^5.1.1:
   resolved "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz#366493e6b3e42a3a6885e2e99d18f80fb7a8c932"
   integrity sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA==
 
-randomatic@3.1.1:
-  version "3.1.1"
-  resolved "https://registry.npmjs.org/randomatic/-/randomatic-3.1.1.tgz#b776efc59375984e36c537b2f51a1f0aff0da1ed"
-  integrity sha512-TuDE5KxZ0J461RVjrJZCJc+J+zCkTb1MbH9AQUq68sMhOMcy9jLcb3BrZKgp9q9Ncltdg4QVqWrH02W2EFFVYw==
-  dependencies:
-    is-number "^4.0.0"
-    kind-of "^6.0.0"
-    math-random "^1.0.1"
-
 randombytes@^2.1.0:
   version "2.1.0"
   resolved "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz#df6f84372f0270dc65cdf6291349ab7a473d4f2a"
@@ -6049,7 +6047,7 @@ repeat-element@^1.1.2:
   resolved "https://registry.npmjs.org/repeat-element/-/repeat-element-1.1.4.tgz#be681520847ab58c7568ac75fbfad28ed42d39e9"
   integrity sha512-LFiNfRcSu7KK3evMyYOuCzv3L10TW7yC1G2/+StMjK8Y6Vqd2MG7r/Qjw4ghtuCOjFvlnms/iMmLqpvW/ES/WQ==
 
-repeat-string@^1.5.4, repeat-string@^1.6.1:
+repeat-string@^1.6.1:
   version "1.6.1"
   resolved "https://registry.npmjs.org/repeat-string/-/repeat-string-1.6.1.tgz#8dcae470e1c88abc2d600fff4a776286da75e637"
   integrity sha512-PV0dzCYDNfRi1jCDbJzpW7jNNDRuCOG/jI5ctQcGKt/clZD+YcPS3yIlWuTJMmESC8aevCFmWJy5wjAFgNqN6w==

From afb40a9f5a33802897e1d5bae8305c81da7beee1 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@apache.org>
Date: Fri, 5 Jan 2024 19:36:43 +0000
Subject: [PATCH 140/570] GH-39259: [JS] Remove getByteLength (#39260)

* Closes: #39259
---
 js/src/recordbatch.ts        |   9 --
 js/src/table.ts              |   9 --
 js/src/vector.ts             |  17 +---
 js/src/visitor/bytelength.ts | 164 -----------------------------------
 js/test/unit/table-tests.ts  |   7 --
 5 files changed, 3 insertions(+), 203 deletions(-)
 delete mode 100644 js/src/visitor/bytelength.ts

diff --git a/js/src/recordbatch.ts b/js/src/recordbatch.ts
index 1ea7c52ccf310..b9061c8b9bb04 100644
--- a/js/src/recordbatch.ts
+++ b/js/src/recordbatch.ts
@@ -25,7 +25,6 @@ import { instance as getVisitor } from './visitor/get.js';
 import { instance as setVisitor } from './visitor/set.js';
 import { instance as indexOfVisitor } from './visitor/indexof.js';
 import { instance as iteratorVisitor } from './visitor/iterator.js';
-import { instance as byteLengthVisitor } from './visitor/bytelength.js';
 
 /** @ignore */
 export interface RecordBatch<T extends TypeMap = any> {
@@ -150,14 +149,6 @@ export class RecordBatch<T extends TypeMap = any> {
         return indexOfVisitor.visit(this.data, element, offset);
     }
 
-    /**
-     * Get the size (in bytes) of a row by index.
-     * @param index The row index for which to compute the byteLength.
-     */
-    public getByteLength(index: number): number {
-        return byteLengthVisitor.visit(this.data, index);
-    }
-
     /**
      * Iterator for rows in this RecordBatch.
      */
diff --git a/js/src/table.ts b/js/src/table.ts
index e719b7ca9d313..d7a6617530a8e 100644
--- a/js/src/table.ts
+++ b/js/src/table.ts
@@ -38,7 +38,6 @@ import { instance as getVisitor } from './visitor/get.js';
 import { instance as setVisitor } from './visitor/set.js';
 import { instance as indexOfVisitor } from './visitor/indexof.js';
 import { instance as iteratorVisitor } from './visitor/iterator.js';
-import { instance as byteLengthVisitor } from './visitor/bytelength.js';
 
 import { DataProps } from './data.js';
 import { clampRange } from './util/vector.js';
@@ -215,13 +214,6 @@ export class Table<T extends TypeMap = any> {
     // @ts-ignore
     public indexOf(element: Struct<T>['TValue'], offset?: number): number { return -1; }
 
-    /**
-     * Get the size in bytes of an element by index.
-     * @param index The index at which to get the byteLength.
-     */
-    // @ts-ignore
-    public getByteLength(index: number): number { return 0; }
-
     /**
      * Iterator for rows in this Table.
      */
@@ -390,7 +382,6 @@ export class Table<T extends TypeMap = any> {
         (proto as any)['get'] = wrapChunkedCall1(getVisitor.getVisitFn(Type.Struct));
         (proto as any)['set'] = wrapChunkedCall2(setVisitor.getVisitFn(Type.Struct));
         (proto as any)['indexOf'] = wrapChunkedIndexOf(indexOfVisitor.getVisitFn(Type.Struct));
-        (proto as any)['getByteLength'] = wrapChunkedCall1(byteLengthVisitor.getVisitFn(Type.Struct));
         return 'Table';
     })(Table.prototype);
 }
diff --git a/js/src/vector.ts b/js/src/vector.ts
index 8b94b14e3fff7..a7c103bc326ee 100644
--- a/js/src/vector.ts
+++ b/js/src/vector.ts
@@ -36,7 +36,6 @@ import { instance as getVisitor } from './visitor/get.js';
 import { instance as setVisitor } from './visitor/set.js';
 import { instance as indexOfVisitor } from './visitor/indexof.js';
 import { instance as iteratorVisitor } from './visitor/iterator.js';
-import { instance as byteLengthVisitor } from './visitor/bytelength.js';
 
 // @ts-ignore
 import type { vectorFromArray } from './factories.js';
@@ -56,7 +55,7 @@ export interface Vector<T extends DataType = any> {
     [Symbol.isConcatSpreadable]: true;
 }
 
-const visitorsByTypeId = {} as { [typeId: number]: { get: any; set: any; indexOf: any; byteLength: any } };
+const visitorsByTypeId = {} as { [typeId: number]: { get: any; set: any; indexOf: any } };
 const vectorPrototypesByTypeId = {} as { [typeId: number]: any };
 
 /**
@@ -76,14 +75,13 @@ export class Vector<T extends DataType = any> {
             case 0: this._offsets = [0]; break;
             case 1: {
                 // special case for unchunked vectors
-                const { get, set, indexOf, byteLength } = visitorsByTypeId[type.typeId];
+                const { get, set, indexOf } = visitorsByTypeId[type.typeId];
                 const unchunkedData = data[0];
 
                 this.isValid = (index: number) => isChunkedValid(unchunkedData, index);
                 this.get = (index: number) => get(unchunkedData, index);
                 this.set = (index: number, value: T) => set(unchunkedData, index, value);
                 this.indexOf = (index: number) => indexOf(unchunkedData, index);
-                this.getByteLength = (index: number) => byteLength(unchunkedData, index);
                 this._offsets = [0, unchunkedData.length];
                 break;
             }
@@ -200,13 +198,6 @@ export class Vector<T extends DataType = any> {
         return this.indexOf(element, offset) > -1;
     }
 
-    /**
-     * Get the size in bytes of an element by index.
-     * @param index The index at which to get the byteLength.
-     */
-    // @ts-ignore
-    public getByteLength(index: number): number { return 0; }
-
     /**
      * Iterator for the Vector's elements.
      */
@@ -366,15 +357,13 @@ export class Vector<T extends DataType = any> {
             const get = getVisitor.getVisitFnByTypeId(typeId);
             const set = setVisitor.getVisitFnByTypeId(typeId);
             const indexOf = indexOfVisitor.getVisitFnByTypeId(typeId);
-            const byteLength = byteLengthVisitor.getVisitFnByTypeId(typeId);
 
-            visitorsByTypeId[typeId] = { get, set, indexOf, byteLength };
+            visitorsByTypeId[typeId] = { get, set, indexOf };
             vectorPrototypesByTypeId[typeId] = Object.create(proto, {
                 ['isValid']: { value: wrapChunkedCall1(isChunkedValid) },
                 ['get']: { value: wrapChunkedCall1(getVisitor.getVisitFnByTypeId(typeId)) },
                 ['set']: { value: wrapChunkedCall2(setVisitor.getVisitFnByTypeId(typeId)) },
                 ['indexOf']: { value: wrapChunkedIndexOf(indexOfVisitor.getVisitFnByTypeId(typeId)) },
-                ['getByteLength']: { value: wrapChunkedCall1(byteLengthVisitor.getVisitFnByTypeId(typeId)) },
             });
         }
 
diff --git a/js/src/visitor/bytelength.ts b/js/src/visitor/bytelength.ts
deleted file mode 100644
index 43399b2571fe2..0000000000000
--- a/js/src/visitor/bytelength.ts
+++ /dev/null
@@ -1,164 +0,0 @@
-/* istanbul ignore file */
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/* eslint-disable unicorn/no-array-callback-reference */
-
-import { Data } from '../data.js';
-import { Visitor } from '../visitor.js';
-import { TypeToDataType } from '../interfaces.js';
-import { Type, TimeUnit, UnionMode } from '../enum.js';
-import {
-    DataType, Dictionary,
-    Float, Int, Date_, Interval, Time, Timestamp, Duration,
-    Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary,
-    List, FixedSizeList, Map_, Struct, Union, DenseUnion, SparseUnion,
-} from '../type.js';
-import { bigIntToNumber } from '../util/bigint.js';
-
-/** @ignore */ const sum = (x: number, y: number) => x + y;
-
-/** @ignore */
-export interface GetByteLengthVisitor extends Visitor {
-    visit<T extends DataType>(node: Data<T>, index: number): number;
-    visitMany<T extends DataType>(nodes: Data<T>[], index: number[]): number[];
-    getVisitFn<T extends DataType>(node: Data<T> | T): (data: Data<T>, index: number) => number;
-    getVisitFn<T extends Type>(node: T): (data: Data<TypeToDataType<T>>, index: number) => number;
-    visitBinary<T extends Binary>(data: Data<T>, index: number): number;
-    visitLargeBinary<T extends LargeBinary>(data: Data<T>, index: number): number;
-    visitUtf8<T extends Utf8>(data: Data<T>, index: number): number;
-    visitLargeUtf8<T extends LargeUtf8>(data: Data<T>, index: number): number;
-    visitList<T extends List>(data: Data<T>, index: number): number;
-    visitDenseUnion<T extends DenseUnion>(data: Data<T>, index: number): number;
-    visitSparseUnion<T extends SparseUnion>(data: Data<T>, index: number): number;
-    visitFixedSizeList<T extends FixedSizeList>(data: Data<T>, index: number): number;
-}
-
-/** @ignore */
-export class GetByteLengthVisitor extends Visitor {
-    public visitNull(____: Data<Null>, _: number) {
-        return 0;
-    }
-    public visitInt(data: Data<Int>, _: number) {
-        return data.type.bitWidth / 8;
-    }
-    public visitFloat(data: Data<Float>, _: number) {
-        return data.type.ArrayType.BYTES_PER_ELEMENT;
-    }
-    public visitBool(____: Data<Bool>, _: number) {
-        return 1 / 8;
-    }
-    public visitDecimal(data: Data<Decimal>, _: number) {
-        return data.type.bitWidth / 8;
-    }
-    public visitDate(data: Data<Date_>, _: number) {
-        return (data.type.unit + 1) * 4;
-    }
-    public visitTime(data: Data<Time>, _: number) {
-        return data.type.bitWidth / 8;
-    }
-    public visitTimestamp(data: Data<Timestamp>, _: number) {
-        return data.type.unit === TimeUnit.SECOND ? 4 : 8;
-    }
-    public visitInterval(data: Data<Interval>, _: number) {
-        return (data.type.unit + 1) * 4;
-    }
-    public visitDuration(____: Data<Duration>, _: number) {
-        return 8;
-    }
-    public visitStruct(data: Data<Struct>, i: number) {
-        return data.children.reduce((total, child) => total + instance.visit(child, i), 0);
-    }
-    public visitFixedSizeBinary(data: Data<FixedSizeBinary>, _: number) {
-        return data.type.byteWidth;
-    }
-    public visitMap(data: Data<Map_>, i: number) {
-        // 4 + 4 for the indices
-        return 8 + data.children.reduce((total, child) => total + instance.visit(child, i), 0);
-    }
-    public visitDictionary(data: Data<Dictionary>, i: number) {
-        return (data.type.indices.bitWidth / 8) + (data.dictionary?.getByteLength(data.values[i]) || 0);
-    }
-}
-
-/** @ignore */
-const getBinaryByteLength = <T extends Binary | LargeBinary | Utf8 | LargeUtf8>({ valueOffsets }: Data<T>, index: number): number => {
-    // 4 + 4 for the indices, `end - start` for the data bytes
-    return 8 + bigIntToNumber(valueOffsets[index + 1]) - bigIntToNumber(valueOffsets[index]);
-};
-
-/** @ignore */
-const getListByteLength = <T extends List>({ valueOffsets, stride, children }: Data<T>, index: number): number => {
-    const child: Data<T['valueType']> = children[0];
-    const { [index * stride]: start, [index * stride + 1]: end } = valueOffsets;
-    const visit = instance.getVisitFn(child.type);
-    const slice = child.slice(start, end - start);
-    let size = 8; // 4 + 4 for the indices
-    for (let idx = -1, len = end - start; ++idx < len;) {
-        size += visit(slice, idx);
-    }
-    return size;
-};
-
-/** @ignore */
-const getFixedSizeListByteLength = <T extends FixedSizeList>({ stride, children }: Data<T>, index: number): number => {
-    const child: Data<T['valueType']> = children[0];
-    const slice = child.slice(index * stride, stride);
-    const visit = instance.getVisitFn(child.type);
-    let size = 0;
-    for (let idx = -1, len = slice.length; ++idx < len;) {
-        size += visit(slice, idx);
-    }
-    return size;
-};
-
-/* istanbul ignore next */
-/** @ignore */
-const getUnionByteLength = <
-    D extends Data<Union> | Data<DenseUnion> | Data<SparseUnion>
->(data: D, index: number): number => {
-    return data.type.mode === UnionMode.Dense ?
-        getDenseUnionByteLength(data as Data<DenseUnion>, index) :
-        getSparseUnionByteLength(data as Data<SparseUnion>, index);
-};
-
-/** @ignore */
-const getDenseUnionByteLength = <T extends DenseUnion>({ type, children, typeIds, valueOffsets }: Data<T>, index: number): number => {
-    const childIndex = type.typeIdToChildIndex[typeIds[index]];
-    // 4 for the typeId, 4 for the valueOffsets, then the child at the offset
-    return 8 + instance.visit(children[childIndex], valueOffsets[index]);
-};
-
-/** @ignore */
-const getSparseUnionByteLength = <T extends SparseUnion>({ children }: Data<T>, index: number): number => {
-    // 4 for the typeId, then once each for the children at this index
-    return 4 + instance.visitMany(children, children.map(() => index)).reduce(sum, 0);
-};
-
-GetByteLengthVisitor.prototype.visitUtf8 = getBinaryByteLength;
-GetByteLengthVisitor.prototype.visitLargeUtf8 = getBinaryByteLength;
-GetByteLengthVisitor.prototype.visitBinary = getBinaryByteLength;
-GetByteLengthVisitor.prototype.visitLargeBinary = getBinaryByteLength;
-GetByteLengthVisitor.prototype.visitList = getListByteLength;
-GetByteLengthVisitor.prototype.visitFixedSizeList = getFixedSizeListByteLength;
-GetByteLengthVisitor.prototype.visitUnion = getUnionByteLength;
-GetByteLengthVisitor.prototype.visitDenseUnion = getDenseUnionByteLength;
-GetByteLengthVisitor.prototype.visitSparseUnion = getSparseUnionByteLength;
-
-/** @ignore */
-export const instance = new GetByteLengthVisitor();
diff --git a/js/test/unit/table-tests.ts b/js/test/unit/table-tests.ts
index ffda47f473368..3f12905a92bdf 100644
--- a/js/test/unit/table-tests.ts
+++ b/js/test/unit/table-tests.ts
@@ -369,13 +369,6 @@ describe(`Table`, () => {
                     }
                 }
             });
-
-            test(`table.getByteLength() returns the byteLength of each row`, () => {
-                const table = datum.table();
-                for (let i = -1, n = table.numRows; ++i < n;) {
-                    expect(table.getByteLength(i)).toBeGreaterThan(0);
-                }
-            });
         });
     }
 });

From 278bcab07709d8d896d7fe7df981482304bd9fc5 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@apache.org>
Date: Fri, 5 Jan 2024 22:06:45 +0000
Subject: [PATCH 141/570] GH-39366: [JS] Add largeUtf8 to benchmark (#39367)

* Closes: #39366
---
 js/perf/config.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/js/perf/config.ts b/js/perf/config.ts
index d5a0707558440..d2b24c4ee9722 100644
--- a/js/perf/config.ts
+++ b/js/perf/config.ts
@@ -82,7 +82,8 @@ export const arrays = {
 export const vectors: { [k: string]: Arrow.Vector } = Object.fromEntries([
     ...Object.entries(typedArrays).map(([name, array]) => [name, Arrow.makeVector(array)]),
     ...Object.entries(arrays).map(([name, array]) => [name, Arrow.vectorFromArray(array)]),
-    ['string', Arrow.vectorFromArray(arrays.dictionary, new Arrow.Utf8)],
+    ['utf8', Arrow.vectorFromArray(arrays.dictionary, new Arrow.Utf8)],
+    ['largeUtf8', Arrow.vectorFromArray(arrays.dictionary, new Arrow.LargeUtf8)],
 ]);
 
 const tracks = new Arrow.Table(batches[0].schema, batches);

From 694fd7ed89e9e5dd02af1ddf84dd098de87bbcea Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@apache.org>
Date: Fri, 5 Jan 2024 22:07:06 +0000
Subject: [PATCH 142/570] GH-39047: [JS] Enable test for
 generate_primitive_large_offsets_case (#39470)

* Closes: #39047
---
 dev/archery/archery/integration/datagen.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 230ec5b3effff..5cae907a4aa71 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1873,8 +1873,7 @@ def _temp_path():
         generate_primitive_case([0, 0, 0], name='primitive_zerolength'),
 
         generate_primitive_large_offsets_case([17, 20])
-        .skip_tester('C#')
-        .skip_tester('JS'),
+        .skip_tester('C#'),
 
         generate_null_case([10, 0]),
 

From 17b946c745cbcc4ee62a8607301db1939f364c68 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@apache.org>
Date: Fri, 5 Jan 2024 22:40:48 +0000
Subject: [PATCH 143/570] GH-39289: [JS] Add types to exports (#39475)

* Closes: #39289
---
 js/gulp/arrow-task.js   |  1 +
 js/gulp/package-task.js | 38 ++++++++++++++++++++++++++++----------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/js/gulp/arrow-task.js b/js/gulp/arrow-task.js
index 2de20947dc2f5..f8a18fe122a93 100644
--- a/js/gulp/arrow-task.js
+++ b/js/gulp/arrow-task.js
@@ -41,6 +41,7 @@ export const arrowTask = ((cache) => memoizeTask(cache, function copyMain(target
     const esnextUmdSourceMapsGlob = `${targetDir(`esnext`, `umd`)}/*.map`;
     return ObservableForkJoin([
         observableFromStreams(gulp.src(dtsGlob), gulp.dest(out)), // copy d.ts files
+        observableFromStreams(gulp.src(dtsGlob), gulpRename((p) => { p.extname = '.mts'; }), gulp.dest(out)), // copy d.ts files as esm
         observableFromStreams(gulp.src(cjsGlob), gulp.dest(out)), // copy es2015 cjs files
         observableFromStreams(gulp.src(cjsSourceMapsGlob), gulp.dest(out)), // copy es2015 cjs sourcemaps
         observableFromStreams(gulp.src(esmSourceMapsGlob), gulp.dest(out)), // copy es2015 esm sourcemaps
diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js
index 449b0761ca9e9..9bc002e664885 100644
--- a/js/gulp/package-task.js
+++ b/js/gulp/package-task.js
@@ -45,6 +45,9 @@ const createMainPackageJson = (target, format) => (orig) => ({
     type: 'commonjs',
     main: `${mainExport}.node.js`,
     module: `${mainExport}.node.mjs`,
+    types: `${mainExport}.node.d.ts`,
+    unpkg: `${mainExport}.es2015.min.js`,
+    jsdelivr: `${mainExport}.es2015.min.js`,
     browser: {
         [`./${mainExport}.node.js`]: `./${mainExport}.dom.js`,
         [`./${mainExport}.node.mjs`]: `./${mainExport}.dom.mjs`
@@ -52,20 +55,35 @@ const createMainPackageJson = (target, format) => (orig) => ({
     exports: {
         '.': {
             node: {
-                import: `./${mainExport}.node.mjs`,
-                require: `./${mainExport}.node.js`,
+                import: {
+                    types: `./${mainExport}.node.d.mts`,
+                    default: `./${mainExport}.node.mjs`,
+                },
+                require: {
+                    types: `./${mainExport}.node.d.ts`,
+                    default: `./${mainExport}.node.js`,
+                },
             },
-            import: `./${mainExport}.dom.mjs`,
-            require: `./${mainExport}.dom.js`,
+            import: {
+                types: `./${mainExport}.dom.d.mts`,
+                default: `./${mainExport}.dom.mjs`,
+            },
+            require: {
+                types: `./${mainExport}.dom.d.ts`,
+                default: `./${mainExport}.dom.js`,
+            }
         },
         './*': {
-            import: `./*.mjs`,
-            require: `./*.js`
-        }
+            import: {
+                types: `./*.d.mts`,
+                default: `./*.mjs`,
+            },
+            require: {
+                types: `./*.d.ts`,
+                default: `./*.js`,
+            },
+        },
     },
-    types: `${mainExport}.node.d.ts`,
-    unpkg: `${mainExport}.es2015.min.js`,
-    jsdelivr: `${mainExport}.es2015.min.js`,
     sideEffects: false,
     esm: { mode: `all`, sourceMap: true }
 });

From 33c64edc93353b83e3edf20c9ebe474f0362fe01 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Fri, 5 Jan 2024 20:33:42 -0300
Subject: [PATCH 144/570] GH-39449: [C++] Use default Azure credentials
 implicitly and support anonymous credentials explicitly (#39450)

### Rationale for this change

 - Default credentials should be used by default.
 - There should be a way to connect to public containers without credentials (aka "anonymous credential").

### What changes are included in this PR?

 - Sync ordering of declarations and definitions in the `AzureOptions` classs
 - Use default credentials even when `ConfigureDefaultCredential()` isn't explicitly called
 - Create clients when `credential_kind_` is "anonymous" instead of returning an error

### Are these changes tested?

By new and existing tests.
* Closes: #39449

Lead-authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 62 ++++++++++++++++--------
 cpp/src/arrow/filesystem/azurefs.h       | 37 +++++++++-----
 cpp/src/arrow/filesystem/azurefs_test.cc | 30 +++++++++---
 3 files changed, 92 insertions(+), 37 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 9569eff2e47ed..730adabd48bec 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -73,13 +73,17 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
     return false;
   }
   switch (credential_kind_) {
+    case CredentialKind::kDefault:
     case CredentialKind::kAnonymous:
       return true;
-    case CredentialKind::kTokenCredential:
-      return token_credential_ == other.token_credential_;
-    case CredentialKind::kStorageSharedKeyCredential:
+    case CredentialKind::kStorageSharedKey:
       return storage_shared_key_credential_->AccountName ==
              other.storage_shared_key_credential_->AccountName;
+    case CredentialKind::kClientSecret:
+    case CredentialKind::kManagedIdentity:
+    case CredentialKind::kWorkloadIdentity:
+      return token_credential_->GetCredentialName() ==
+             other.token_credential_->GetCredentialName();
   }
   DCHECK(false);
   return false;
@@ -113,8 +117,19 @@ std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const {
   return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
 }
 
+Status AzureOptions::ConfigureDefaultCredential() {
+  credential_kind_ = CredentialKind::kDefault;
+  token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureAnonymousCredential() {
+  credential_kind_ = CredentialKind::kAnonymous;
+  return Status::OK();
+}
+
 Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_key) {
-  credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
+  credential_kind_ = CredentialKind::kStorageSharedKey;
   if (account_name.empty()) {
     return Status::Invalid("AzureOptions doesn't contain a valid account name");
   }
@@ -126,27 +141,21 @@ Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_ke
 Status AzureOptions::ConfigureClientSecretCredential(const std::string& tenant_id,
                                                      const std::string& client_id,
                                                      const std::string& client_secret) {
-  credential_kind_ = CredentialKind::kTokenCredential;
+  credential_kind_ = CredentialKind::kClientSecret;
   token_credential_ = std::make_shared<Azure::Identity::ClientSecretCredential>(
       tenant_id, client_id, client_secret);
   return Status::OK();
 }
 
-Status AzureOptions::ConfigureDefaultCredential() {
-  credential_kind_ = CredentialKind::kTokenCredential;
-  token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
-  return Status::OK();
-}
-
 Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& client_id) {
-  credential_kind_ = CredentialKind::kTokenCredential;
+  credential_kind_ = CredentialKind::kManagedIdentity;
   token_credential_ =
       std::make_shared<Azure::Identity::ManagedIdentityCredential>(client_id);
   return Status::OK();
 }
 
 Status AzureOptions::ConfigureWorkloadIdentityCredential() {
-  credential_kind_ = CredentialKind::kTokenCredential;
+  credential_kind_ = CredentialKind::kWorkloadIdentity;
   token_credential_ = std::make_shared<Azure::Identity::WorkloadIdentityCredential>();
   return Status::OK();
 }
@@ -158,11 +167,18 @@ Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceC
   }
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
-      break;
-    case CredentialKind::kTokenCredential:
+      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name));
+    case CredentialKind::kDefault:
+      if (!token_credential_) {
+        token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
+      }
+      [[fallthrough]];
+    case CredentialKind::kClientSecret:
+    case CredentialKind::kManagedIdentity:
+    case CredentialKind::kWorkloadIdentity:
       return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
                                                         token_credential_);
-    case CredentialKind::kStorageSharedKeyCredential:
+    case CredentialKind::kStorageSharedKey:
       return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
                                                         storage_shared_key_credential_);
   }
@@ -176,11 +192,19 @@ AzureOptions::MakeDataLakeServiceClient() const {
   }
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
-      break;
-    case CredentialKind::kTokenCredential:
+      return std::make_unique<DataLake::DataLakeServiceClient>(
+          AccountDfsUrl(account_name));
+    case CredentialKind::kDefault:
+      if (!token_credential_) {
+        token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
+      }
+      [[fallthrough]];
+    case CredentialKind::kClientSecret:
+    case CredentialKind::kManagedIdentity:
+    case CredentialKind::kWorkloadIdentity:
       return std::make_unique<DataLake::DataLakeServiceClient>(
           AccountDfsUrl(account_name), token_credential_);
-    case CredentialKind::kStorageSharedKeyCredential:
+    case CredentialKind::kStorageSharedKey:
       return std::make_unique<DataLake::DataLakeServiceClient>(
           AccountDfsUrl(account_name), storage_shared_key_credential_);
   }
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 78e0a8148c616..55f89ba4776e2 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -47,8 +47,23 @@ namespace arrow::fs {
 class TestAzureFileSystem;
 
 /// Options for the AzureFileSystem implementation.
+///
+/// By default, authentication is handled by the Azure SDK's credential chain
+/// which may read from multiple environment variables, such as:
+/// - `AZURE_TENANT_ID`
+/// - `AZURE_CLIENT_ID`
+/// - `AZURE_CLIENT_SECRET`
+/// - `AZURE_AUTHORITY_HOST`
+/// - `AZURE_CLIENT_CERTIFICATE_PATH`
+/// - `AZURE_FEDERATED_TOKEN_FILE`
+///
+/// Functions are provided for explicit configuration of credentials if that is preferred.
 struct ARROW_EXPORT AzureOptions {
-  /// \brief account name of the Azure Storage account.
+  /// \brief The name of the Azure Storage Account being accessed.
+  ///
+  /// All service URLs will be constructed using this storage account name.
+  /// `ConfigureAccountKeyCredential` assumes the user wants to authenticate
+  /// this account.
   std::string account_name;
 
   /// \brief hostname[:port] of the Azure Blob Storage Service.
@@ -92,30 +107,30 @@ struct ARROW_EXPORT AzureOptions {
 
  private:
   enum class CredentialKind {
+    kDefault,
     kAnonymous,
-    kTokenCredential,
-    kStorageSharedKeyCredential,
-  } credential_kind_ = CredentialKind::kAnonymous;
+    kStorageSharedKey,
+    kClientSecret,
+    kManagedIdentity,
+    kWorkloadIdentity,
+  } credential_kind_ = CredentialKind::kDefault;
 
-  std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
   std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
       storage_shared_key_credential_;
+  mutable std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
 
  public:
   AzureOptions();
   ~AzureOptions();
 
   Status ConfigureDefaultCredential();
-
-  Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string());
-
-  Status ConfigureWorkloadIdentityCredential();
-
+  Status ConfigureAnonymousCredential();
   Status ConfigureAccountKeyCredential(const std::string& account_key);
-
   Status ConfigureClientSecretCredential(const std::string& tenant_id,
                                          const std::string& client_id,
                                          const std::string& client_secret);
+  Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string());
+  Status ConfigureWorkloadIdentityCredential();
 
   bool Equals(const AzureOptions& other) const;
 
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index ff94578b041dc..6104b04411b32 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -280,22 +280,38 @@ TEST(AzureFileSystem, InitializingFilesystemWithoutAccountNameFails) {
   ASSERT_RAISES(Invalid, AzureFileSystem::Make(options));
 }
 
-TEST(AzureFileSystem, InitializeFilesystemWithClientSecretCredential) {
+TEST(AzureFileSystem, InitializeWithDefaultCredential) {
   AzureOptions options;
   options.account_name = "dummy-account-name";
-  ARROW_EXPECT_OK(
-      options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
+  ARROW_EXPECT_OK(options.ConfigureDefaultCredential());
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
-TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
+TEST(AzureFileSystem, InitializeWithDefaultCredentialImplicitly) {
   AzureOptions options;
   options.account_name = "dummy-account-name";
-  ARROW_EXPECT_OK(options.ConfigureDefaultCredential());
+  AzureOptions explictly_default_options;
+  explictly_default_options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(explictly_default_options.ConfigureDefaultCredential());
+  ASSERT_TRUE(options.Equals(explictly_default_options));
+}
+
+TEST(AzureFileSystem, InitializeWithAnonymousCredential) {
+  AzureOptions options;
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(options.ConfigureAnonymousCredential());
+  EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
+}
+
+TEST(AzureFileSystem, InitializeWithClientSecretCredential) {
+  AzureOptions options;
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(
+      options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
-TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) {
+TEST(AzureFileSystem, InitializeWithManagedIdentityCredential) {
   AzureOptions options;
   options.account_name = "dummy-account-name";
   ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential());
@@ -305,7 +321,7 @@ TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) {
   EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options));
 }
 
-TEST(AzureFileSystem, InitializeFilesystemWithWorkloadIdentityCredential) {
+TEST(AzureFileSystem, InitializeWithWorkloadIdentityCredential) {
   AzureOptions options;
   options.account_name = "dummy-account-name";
   ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential());

From 6ab7a18fdc4cc3a48c1f40da3b2fedd58f5bfc23 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 6 Jan 2024 16:57:44 +0900
Subject: [PATCH 145/570] GH-39163: [C++] Add missing data copy in
 StreamDecoder::Consume(data) (#39164)

### Rationale for this change

We need to copy data for metadata message. Because it may be used in subsequent `Consume(data)` calls. We can't assume that the given `data` is still valid in subsequent `Consume(data)`.

We also need to copy buffered `data` because it's used in subsequent `Consume(data)` calls.

### What changes are included in this PR?

* Add missing copies.
* Clean up existing buffer copy codes.
* Change tests to use ephemeral `data` to detect this case.
* Add `copy_record_batch` option to `CollectListener` to copy decoded record batches.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.

* Closes #39163
* Closes: #39163

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/ipc/message.cc         | 37 ++++++++++++++++------
 cpp/src/arrow/ipc/read_write_test.cc | 46 +++++++++++++++++++++++++---
 2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc
index 36754518d29d4..fbcd6f139b6d2 100644
--- a/cpp/src/arrow/ipc/message.cc
+++ b/cpp/src/arrow/ipc/message.cc
@@ -626,10 +626,24 @@ class MessageDecoder::MessageDecoderImpl {
             RETURN_NOT_OK(ConsumeMetadataLengthData(data, next_required_size_));
             break;
           case State::METADATA: {
-            auto buffer = std::make_shared<Buffer>(data, next_required_size_);
+            // We need to copy metadata because it's used in
+            // ConsumeBody(). ConsumeBody() may be called from another
+            // ConsumeData(). We can't assume that the given data for
+            // the current ConsumeData() call is still valid in the
+            // next ConsumeData() call. So we need to copy metadata
+            // here.
+            ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> buffer,
+                                  AllocateBuffer(next_required_size_, pool_));
+            memcpy(buffer->mutable_data(), data, next_required_size_);
             RETURN_NOT_OK(ConsumeMetadataBuffer(buffer));
           } break;
           case State::BODY: {
+            // We don't need to copy the given data for body because
+            // we can assume that a decoded record batch should be
+            // valid only in a listener_->OnMessageDecoded() call. If
+            // the passed message is needed to be valid after the
+            // call, it's a listener_'s responsibility. The listener_
+            // may copy the data for it.
             auto buffer = std::make_shared<Buffer>(data, next_required_size_);
             RETURN_NOT_OK(ConsumeBodyBuffer(buffer));
           } break;
@@ -645,7 +659,12 @@ class MessageDecoder::MessageDecoderImpl {
       return Status::OK();
     }
 
-    chunks_.push_back(std::make_shared<Buffer>(data, size));
+    // We need to copy unused data because the given data for the
+    // current ConsumeData() call may be invalid in the next
+    // ConsumeData() call.
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> chunk, AllocateBuffer(size, pool_));
+    memcpy(chunk->mutable_data(), data, size);
+    chunks_.push_back(std::move(chunk));
     buffered_size_ += size;
     return ConsumeChunks();
   }
@@ -830,8 +849,7 @@ class MessageDecoder::MessageDecoderImpl {
       }
       buffered_size_ -= next_required_size_;
     } else {
-      ARROW_ASSIGN_OR_RAISE(auto metadata, AllocateBuffer(next_required_size_, pool_));
-      metadata_ = std::shared_ptr<Buffer>(metadata.release());
+      ARROW_ASSIGN_OR_RAISE(metadata_, AllocateBuffer(next_required_size_, pool_));
       RETURN_NOT_OK(ConsumeDataChunks(next_required_size_, metadata_->mutable_data()));
     }
     return ConsumeMetadata();
@@ -846,9 +864,8 @@ class MessageDecoder::MessageDecoderImpl {
     next_required_size_ = skip_body_ ? 0 : body_length;
     RETURN_NOT_OK(listener_->OnBody());
     if (next_required_size_ == 0) {
-      ARROW_ASSIGN_OR_RAISE(auto body, AllocateBuffer(0, pool_));
-      std::shared_ptr<Buffer> shared_body(body.release());
-      return ConsumeBody(&shared_body);
+      auto body = std::make_shared<Buffer>(nullptr, 0);
+      return ConsumeBody(&body);
     } else {
       return Status::OK();
     }
@@ -872,10 +889,10 @@ class MessageDecoder::MessageDecoderImpl {
       buffered_size_ -= used_size;
       return Status::OK();
     } else {
-      ARROW_ASSIGN_OR_RAISE(auto body, AllocateBuffer(next_required_size_, pool_));
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> body,
+                            AllocateBuffer(next_required_size_, pool_));
       RETURN_NOT_OK(ConsumeDataChunks(next_required_size_, body->mutable_data()));
-      std::shared_ptr<Buffer> shared_body(body.release());
-      return ConsumeBody(&shared_body);
+      return ConsumeBody(&body);
     }
   }
 
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index 17c4c5636d5b0..bd2c2b716d502 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -1330,11 +1330,44 @@ struct StreamWriterHelper {
   std::shared_ptr<RecordBatchWriter> writer_;
 };
 
+class CopyCollectListener : public CollectListener {
+ public:
+  CopyCollectListener() : CollectListener() {}
+
+  Status OnRecordBatchWithMetadataDecoded(
+      RecordBatchWithMetadata record_batch_with_metadata) override {
+    auto& record_batch = record_batch_with_metadata.batch;
+    for (auto column_data : record_batch->column_data()) {
+      ARROW_RETURN_NOT_OK(CopyArrayData(column_data));
+    }
+    return CollectListener::OnRecordBatchWithMetadataDecoded(record_batch_with_metadata);
+  }
+
+ private:
+  Status CopyArrayData(std::shared_ptr<ArrayData> data) {
+    auto& buffers = data->buffers;
+    for (size_t i = 0; i < buffers.size(); ++i) {
+      auto& buffer = buffers[i];
+      if (!buffer) {
+        continue;
+      }
+      ARROW_ASSIGN_OR_RAISE(buffers[i], Buffer::Copy(buffer, buffer->memory_manager()));
+    }
+    for (auto child_data : data->child_data) {
+      ARROW_RETURN_NOT_OK(CopyArrayData(child_data));
+    }
+    if (data->dictionary) {
+      ARROW_RETURN_NOT_OK(CopyArrayData(data->dictionary));
+    }
+    return Status::OK();
+  }
+};
+
 struct StreamDecoderWriterHelper : public StreamWriterHelper {
   Status ReadBatches(const IpcReadOptions& options, RecordBatchVector* out_batches,
                      ReadStats* out_stats = nullptr,
                      MetadataVector* out_metadata_list = nullptr) override {
-    auto listener = std::make_shared<CollectListener>();
+    auto listener = std::make_shared<CopyCollectListener>();
     StreamDecoder decoder(listener, options);
     RETURN_NOT_OK(DoConsume(&decoder));
     *out_batches = listener->record_batches();
@@ -1358,7 +1391,10 @@ struct StreamDecoderWriterHelper : public StreamWriterHelper {
 
 struct StreamDecoderDataWriterHelper : public StreamDecoderWriterHelper {
   Status DoConsume(StreamDecoder* decoder) override {
-    return decoder->Consume(buffer_->data(), buffer_->size());
+    // This data is valid only in this function.
+    ARROW_ASSIGN_OR_RAISE(auto temporary_buffer,
+                          Buffer::Copy(buffer_, arrow::default_cpu_memory_manager()));
+    return decoder->Consume(temporary_buffer->data(), temporary_buffer->size());
   }
 };
 
@@ -1369,7 +1405,9 @@ struct StreamDecoderBufferWriterHelper : public StreamDecoderWriterHelper {
 struct StreamDecoderSmallChunksWriterHelper : public StreamDecoderWriterHelper {
   Status DoConsume(StreamDecoder* decoder) override {
     for (int64_t offset = 0; offset < buffer_->size() - 1; ++offset) {
-      RETURN_NOT_OK(decoder->Consume(buffer_->data() + offset, 1));
+      // This data is valid only in this block.
+      ARROW_ASSIGN_OR_RAISE(auto temporary_buffer, buffer_->CopySlice(offset, 1));
+      RETURN_NOT_OK(decoder->Consume(temporary_buffer->data(), temporary_buffer->size()));
     }
     return Status::OK();
   }
@@ -2172,7 +2210,6 @@ TEST(TestRecordBatchStreamReader, MalformedInput) {
   ASSERT_RAISES(Invalid, RecordBatchStreamReader::Open(&garbage_reader));
 }
 
-namespace {
 class EndlessCollectListener : public CollectListener {
  public:
   EndlessCollectListener() : CollectListener(), decoder_(nullptr) {}
@@ -2184,7 +2221,6 @@ class EndlessCollectListener : public CollectListener {
  private:
   StreamDecoder* decoder_;
 };
-};  // namespace
 
 TEST(TestStreamDecoder, Reset) {
   auto listener = std::make_shared<EndlessCollectListener>();

From ca2362a8b35352671d744813c9edd01601a4f6d8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sat, 6 Jan 2024 08:38:42 +0000
Subject: [PATCH 146/570] MINOR: [JS] Bump @typescript-eslint/parser from
 5.59.9 to 5.62.0 in /js (#39406)

---
 js/package.json |  2 +-
 js/yarn.lock    | 50 ++++++++-----------------------------------------
 2 files changed, 9 insertions(+), 43 deletions(-)

diff --git a/js/package.json b/js/package.json
index eb24947ce78b8..d1346eb37c9ed 100644
--- a/js/package.json
+++ b/js/package.json
@@ -73,7 +73,7 @@
     "@types/jest": "29.5.3",
     "@types/multistream": "4.1.3",
     "@typescript-eslint/eslint-plugin": "5.62.0",
-    "@typescript-eslint/parser": "5.59.9",
+    "@typescript-eslint/parser": "5.62.0",
     "async-done": "2.0.0",
     "benny": "3.7.1",
     "cross-env": "7.0.3",
diff --git a/js/yarn.lock b/js/yarn.lock
index cef6357e02e44..10d2a256e1cac 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -1446,23 +1446,15 @@
     semver "^7.3.7"
     tsutils "^3.21.0"
 
-"@typescript-eslint/parser@5.59.9":
-  version "5.59.9"
-  resolved "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-5.59.9.tgz#a85c47ccdd7e285697463da15200f9a8561dd5fa"
-  integrity sha512-FsPkRvBtcLQ/eVK1ivDiNYBjn3TGJdXy2fhXX+rc7czWl4ARwnpArwbihSOHI2Peg9WbtGHrbThfBUkZZGTtvQ==
-  dependencies:
-    "@typescript-eslint/scope-manager" "5.59.9"
-    "@typescript-eslint/types" "5.59.9"
-    "@typescript-eslint/typescript-estree" "5.59.9"
-    debug "^4.3.4"
-
-"@typescript-eslint/scope-manager@5.59.9":
-  version "5.59.9"
-  resolved "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-5.59.9.tgz#eadce1f2733389cdb58c49770192c0f95470d2f4"
-  integrity sha512-8RA+E+w78z1+2dzvK/tGZ2cpGigBZ58VMEHDZtpE1v+LLjzrYGc8mMaTONSxKyEkz3IuXFM0IqYiGHlCsmlZxQ==
+"@typescript-eslint/parser@5.62.0":
+  version "5.62.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-5.62.0.tgz#1b63d082d849a2fcae8a569248fbe2ee1b8a56c7"
+  integrity sha512-VlJEV0fOQ7BExOsHYAGrgbEiZoi8D+Bl2+f6V2RrXerRSylnp+ZBHmPvaIa8cz0Ajx7WO7Z5RqfgYg7ED1nRhA==
   dependencies:
-    "@typescript-eslint/types" "5.59.9"
-    "@typescript-eslint/visitor-keys" "5.59.9"
+    "@typescript-eslint/scope-manager" "5.62.0"
+    "@typescript-eslint/types" "5.62.0"
+    "@typescript-eslint/typescript-estree" "5.62.0"
+    debug "^4.3.4"
 
 "@typescript-eslint/scope-manager@5.62.0":
   version "5.62.0"
@@ -1482,29 +1474,11 @@
     debug "^4.3.4"
     tsutils "^3.21.0"
 
-"@typescript-eslint/types@5.59.9":
-  version "5.59.9"
-  resolved "https://registry.npmjs.org/@typescript-eslint/types/-/types-5.59.9.tgz#3b4e7ae63718ce1b966e0ae620adc4099a6dcc52"
-  integrity sha512-uW8H5NRgTVneSVTfiCVffBb8AbwWSKg7qcA4Ot3JI3MPCJGsB4Db4BhvAODIIYE5mNj7Q+VJkK7JxmRhk2Lyjw==
-
 "@typescript-eslint/types@5.62.0":
   version "5.62.0"
   resolved "https://registry.npmjs.org/@typescript-eslint/types/-/types-5.62.0.tgz#258607e60effa309f067608931c3df6fed41fd2f"
   integrity sha512-87NVngcbVXUahrRTqIK27gD2t5Cu1yuCXxbLcFtCzZGlfyVWWh8mLHkoxzjsB6DDNnvdL+fW8MiwPEJyGJQDgQ==
 
-"@typescript-eslint/typescript-estree@5.59.9":
-  version "5.59.9"
-  resolved "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-5.59.9.tgz#6bfea844e468427b5e72034d33c9fffc9557392b"
-  integrity sha512-pmM0/VQ7kUhd1QyIxgS+aRvMgw+ZljB3eDb+jYyp6d2bC0mQWLzUDF+DLwCTkQ3tlNyVsvZRXjFyV0LkU/aXjA==
-  dependencies:
-    "@typescript-eslint/types" "5.59.9"
-    "@typescript-eslint/visitor-keys" "5.59.9"
-    debug "^4.3.4"
-    globby "^11.1.0"
-    is-glob "^4.0.3"
-    semver "^7.3.7"
-    tsutils "^3.21.0"
-
 "@typescript-eslint/typescript-estree@5.62.0":
   version "5.62.0"
   resolved "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-5.62.0.tgz#7d17794b77fabcac615d6a48fb143330d962eb9b"
@@ -1532,14 +1506,6 @@
     eslint-scope "^5.1.1"
     semver "^7.3.7"
 
-"@typescript-eslint/visitor-keys@5.59.9":
-  version "5.59.9"
-  resolved "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-5.59.9.tgz#9f86ef8e95aca30fb5a705bb7430f95fc58b146d"
-  integrity sha512-bT7s0td97KMaLwpEBckbzj/YohnvXtqbe2XgqNvTl6RJVakY5mvENOTPvw5u66nljfZxthESpDozs86U+oLY8Q==
-  dependencies:
-    "@typescript-eslint/types" "5.59.9"
-    eslint-visitor-keys "^3.3.0"
-
 "@typescript-eslint/visitor-keys@5.62.0":
   version "5.62.0"
   resolved "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-5.62.0.tgz#2174011917ce582875954ffe2f6912d5931e353e"

From faa9d8077fb1066935ee9ba755590515b7e646d7 Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Sun, 7 Jan 2024 06:16:54 +0900
Subject: [PATCH 147/570] GH-39225: [GLib] Use Cast() instaed of CastTo
 (#39228)

### Rationale for this change

Remove legacy code

### What changes are included in this PR?

* Replace the legacy scalar CastTo implementation for GLib.

### Are these changes tested?

No.

### Are there any user-facing changes?

No.

* Closes: #39225

Authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/scalar.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/c_glib/arrow-glib/scalar.cpp b/c_glib/arrow-glib/scalar.cpp
index d25bdaf89d691..c45fca81c0f13 100644
--- a/c_glib/arrow-glib/scalar.cpp
+++ b/c_glib/arrow-glib/scalar.cpp
@@ -26,6 +26,8 @@
 #include <arrow-glib/interval.hpp>
 #include <arrow-glib/scalar.hpp>
 
+#include <arrow/compute/cast.h>
+
 G_BEGIN_DECLS
 
 /**
@@ -385,9 +387,9 @@ garrow_scalar_cast(GArrowScalar *scalar,
 {
   const auto arrow_scalar = garrow_scalar_get_raw(scalar);
   const auto arrow_data_type = garrow_data_type_get_raw(data_type);
-  auto arrow_casted_scalar_result = arrow_scalar->CastTo(arrow_data_type);
+  auto arrow_casted_scalar_result = arrow::compute::Cast(arrow_scalar, arrow_data_type);
   if (garrow::check(error, arrow_casted_scalar_result, "[scalar][cast]")) {
-    auto arrow_casted_scalar = *arrow_casted_scalar_result;
+    auto arrow_casted_scalar = (*arrow_casted_scalar_result).scalar();
     return garrow_scalar_new_raw(&arrow_casted_scalar,
                                  "scalar", &arrow_casted_scalar,
                                  "data-type", data_type,

From 1c1fa746f8d1266f9210d9c0e1bbab074c5dd4e0 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Sat, 6 Jan 2024 22:58:21 +0100
Subject: [PATCH 148/570] GH-39303: [Archery][Benchmarking] Allow setting C++
 repetition min time (#39324)

### Rationale for this change

We want to be able to increase the number of repetitions for each C++ micro-benchmark without increasing the total runtime.

### What changes are included in this PR?

* Add a `--repetition-min-time` argument to set the repetition duration in seconds
* Add a `--cpp-benchmark-extras` argument to pass arbitrary arguments to Google Benchmark executables
* Add a couple tests with multiple benchmark repetitions

### Are these changes tested?

Not entirely. Command-line argument passing is not unit-tested.

### Are there any user-facing changes?

No.
* Closes: #39303

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/archery.yml                |   4 +-
 dev/archery/archery/benchmark/google.py      |  18 +-
 dev/archery/archery/benchmark/runner.py      |  14 +-
 dev/archery/archery/cli.py                   |  25 ++-
 dev/archery/archery/tests/test_benchmarks.py | 173 +++++++++++++++++++
 dev/archery/requirements-test.txt            |   2 +
 6 files changed, 218 insertions(+), 18 deletions(-)
 create mode 100644 dev/archery/requirements-test.txt

diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index 1f915240e9f31..d5f419f8a7dd8 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -63,7 +63,9 @@ jobs:
       - name: Install pygit2 binary wheel
         run: pip install pygit2 --only-binary pygit2
       - name: Install Archery, Crossbow- and Test Dependencies
-        run: pip install pytest responses -e dev/archery[all]
+        run: |
+          pip install -e dev/archery[all]
+          pip install -r dev/archery/requirements-test.txt
       - name: Archery Unittests
         working-directory: dev/archery
         run: pytest -v archery
diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py
index ebcc5263645f2..5d07ffab2edd8 100644
--- a/dev/archery/archery/benchmark/google.py
+++ b/dev/archery/archery/benchmark/google.py
@@ -37,9 +37,10 @@ class GoogleBenchmarkCommand(Command):
     notably `--benchmark_filter`, `--benchmark_format`, etc...
     """
 
-    def __init__(self, benchmark_bin, benchmark_filter=None):
+    def __init__(self, benchmark_bin, benchmark_filter=None, benchmark_extras=None):
         self.bin = benchmark_bin
         self.benchmark_filter = benchmark_filter
+        self.benchmark_extras = benchmark_extras or []
 
     def list_benchmarks(self):
         argv = ["--benchmark_list_tests"]
@@ -49,16 +50,19 @@ def list_benchmarks(self):
                           stderr=subprocess.PIPE)
         return str.splitlines(result.stdout.decode("utf-8"))
 
-    def results(self, repetitions=1):
+    def results(self, repetitions=1, repetition_min_time=None):
         with NamedTemporaryFile() as out:
-            argv = ["--benchmark_repetitions={}".format(repetitions),
-                    "--benchmark_out={}".format(out.name),
+            argv = [f"--benchmark_repetitions={repetitions}",
+                    f"--benchmark_out={out.name}",
                     "--benchmark_out_format=json"]
 
+            if repetition_min_time is not None:
+                argv.append(f"--benchmark_min_time={repetition_min_time:.6f}")
+
             if self.benchmark_filter:
-                argv.append(
-                    "--benchmark_filter={}".format(self.benchmark_filter)
-                )
+                argv.append(f"--benchmark_filter={self.benchmark_filter}")
+
+            argv += self.benchmark_extras
 
             self.run(*argv, check=True)
             return json.load(out)
diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index 86053e6ecdc97..c12c74135e96e 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -42,10 +42,11 @@ def regex_filter(re_expr):
 
 class BenchmarkRunner:
     def __init__(self, suite_filter=None, benchmark_filter=None,
-                 repetitions=DEFAULT_REPETITIONS):
+                 repetitions=DEFAULT_REPETITIONS, repetition_min_time=None):
         self.suite_filter = suite_filter
         self.benchmark_filter = benchmark_filter
         self.repetitions = repetitions
+        self.repetition_min_time = repetition_min_time
 
     @property
     def suites(self):
@@ -107,9 +108,10 @@ def __repr__(self):
 class CppBenchmarkRunner(BenchmarkRunner):
     """ Run suites from a CMakeBuild. """
 
-    def __init__(self, build, **kwargs):
+    def __init__(self, build, benchmark_extras, **kwargs):
         """ Initialize a CppBenchmarkRunner. """
         self.build = build
+        self.benchmark_extras = benchmark_extras
         super().__init__(**kwargs)
 
     @staticmethod
@@ -142,14 +144,17 @@ def suites_binaries(self):
 
     def suite(self, name, suite_bin):
         """ Returns the resulting benchmarks for a given suite. """
-        suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter)
+        suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter,
+                                           self.benchmark_extras)
 
         # Ensure there will be data
         benchmark_names = suite_cmd.list_benchmarks()
         if not benchmark_names:
             return None
 
-        results = suite_cmd.results(repetitions=self.repetitions)
+        results = suite_cmd.results(
+            repetitions=self.repetitions,
+            repetition_min_time=self.repetition_min_time)
         benchmarks = GoogleBenchmark.from_json(results.get("benchmarks"))
         return BenchmarkSuite(name, benchmarks)
 
@@ -252,6 +257,7 @@ def suite(self, name):
         if not benchmark_names:
             return None
 
+        # TODO: support `repetition_min_time`
         results = suite_cmd.results(repetitions=self.repetitions)
         benchmarks = JavaMicrobenchmarkHarness.from_json(results)
         return BenchmarkSuite(name, benchmarks)
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 32b094263098c..052fe23bfc969 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -377,7 +377,10 @@ def check_language(ctx, param, value):
                      "Can be stacked. For language=java"),
         click.option("--cmake-extras", type=str, multiple=True,
                      help="Extra flags/options to pass to cmake invocation. "
-                     "Can be stacked. For language=cpp")
+                     "Can be stacked. For language=cpp"),
+        click.option("--cpp-benchmark-extras", type=str, multiple=True,
+                     help="Extra flags/options to pass to C++ benchmark executables. "
+                     "Can be stacked. For language=cpp"),
     ]
 
     cmd = java_toolchain_options(cmd)
@@ -440,12 +443,16 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
 @click.option("--repetitions", type=int, default=-1,
               help=("Number of repetitions of each benchmark. Increasing "
                     "may improve result precision. "
-                    "[default: 1 for cpp, 5 for java"))
+                    "[default: 1 for cpp, 5 for java]"))
+@click.option("--repetition-min-time", type=float, default=None,
+              help=("Minimum duration of each repetition in seconds. "
+                    "Currently only supported for language=cpp. "
+                    "[default: use runner-specific defaults]"))
 @click.pass_context
 def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
                   java_home, java_options, build_extras, benchmark_extras,
                   language, suite_filter, benchmark_filter, repetitions,
-                  **kwargs):
+                  repetition_min_time, cpp_benchmark_extras, **kwargs):
     """ Run benchmark suite.
 
     This command will run the benchmark suite for a single build. This is
@@ -468,13 +475,18 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
     \b
     archery benchmark run
 
+    \b
+    # Run the benchmarks on an existing build directory
+    \b
+    archery benchmark run /build/cpp
+
     \b
     # Run the benchmarks on current previous commit
     \b
     archery benchmark run HEAD~1
 
     \b
-    # Run the benchmarks on current previous commit
+    # Run the benchmarks on current git workspace and output results as a JSON file.
     \b
     archery benchmark run --output=run.json
     """
@@ -488,8 +500,9 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
             repetitions = repetitions if repetitions != -1 else 1
             runner_base = CppBenchmarkRunner.from_rev_or_path(
                 src, root, rev_or_path, conf,
-                repetitions=repetitions,
-                suite_filter=suite_filter, benchmark_filter=benchmark_filter)
+                repetitions=repetitions, repetition_min_time=repetition_min_time,
+                suite_filter=suite_filter, benchmark_filter=benchmark_filter,
+                benchmark_extras=cpp_benchmark_extras)
 
         elif language == "java":
             for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
diff --git a/dev/archery/archery/tests/test_benchmarks.py b/dev/archery/archery/tests/test_benchmarks.py
index fab1e8d443219..e5af2b3b02794 100644
--- a/dev/archery/archery/tests/test_benchmarks.py
+++ b/dev/archery/archery/tests/test_benchmarks.py
@@ -81,6 +81,53 @@ def test_static_runner_from_json_not_a_regression():
     assert not comparison.regression
 
 
+def test_static_runner_from_json_multiple_values_not_a_regression():
+    # Same as above, but with multiple repetitions
+    archery_result = {
+        "suites": [
+            {
+                "name": "arrow-value-parsing-benchmark",
+                "benchmarks": [
+                    {
+                        "name": "FloatParsing<DoubleType>",
+                        "unit": "items_per_second",
+                        "less_is_better": False,
+                        "values": [
+                            93588476.22327498,
+                            94873831.3818328,
+                            95593675.20810866,
+                            95797325.6543961,
+                            96134728.05794072
+                        ],
+                        "time_unit": "ns",
+                        "times": [
+                            10537.724568456104,
+                            10575.162068480413,
+                            10599.271208720838,
+                            10679.028059166194,
+                            10827.995119861762
+                        ],
+                        "counters": {
+                            "family_index": 0,
+                            "per_family_instance_index": 0,
+                            "run_name": "FloatParsing<DoubleType>",
+                            "repetitions": 5,
+                            "repetition_index": 0,
+                            "threads": 1,
+                            "iterations": 10656
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+
+    contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
+    baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
+    [comparison] = RunnerComparator(contender, baseline).comparisons
+    assert not comparison.regression
+
+
 def test_static_runner_from_json_regression():
     archery_result = {
         "suites": [
@@ -114,6 +161,58 @@ def test_static_runner_from_json_regression():
     assert comparison.regression
 
 
+def test_static_runner_from_json_multiple_values_regression():
+    # Same as above, but with multiple repetitions
+    archery_result = {
+        "suites": [
+            {
+                "name": "arrow-value-parsing-benchmark",
+                "benchmarks": [
+                    {
+                        "name": "FloatParsing<DoubleType>",
+                        "unit": "items_per_second",
+                        "less_is_better": False,
+                        "values": [
+                            93588476.22327498,
+                            94873831.3818328,
+                            95593675.20810866,
+                            95797325.6543961,
+                            96134728.05794072
+                        ],
+                        "time_unit": "ns",
+                        "times": [
+                            10537.724568456104,
+                            10575.162068480413,
+                            10599.271208720838,
+                            10679.028059166194,
+                            10827.995119861762
+                        ],
+                        "counters": {
+                            "family_index": 0,
+                            "per_family_instance_index": 0,
+                            "run_name": "FloatParsing<DoubleType>",
+                            "repetitions": 5,
+                            "repetition_index": 0,
+                            "threads": 1,
+                            "iterations": 10656
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+
+    contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
+
+    # introduce artificial regression
+    values = archery_result['suites'][0]['benchmarks'][0]['values']
+    values[:] = [v * 2 for v in values]
+    baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
+
+    [comparison] = RunnerComparator(contender, baseline).comparisons
+    assert comparison.regression
+
+
 def test_benchmark_median():
     assert median([10]) == 10
     assert median([1, 2, 3]) == 2
@@ -381,3 +480,77 @@ def test_omits_aggregates():
     benchmark = GoogleBenchmark(name, [observation1, observation2])
     result = json.dumps(benchmark, cls=JsonEncoder)
     assert json.loads(result) == archery_result
+
+
+def test_multiple_observations():
+    name = "FloatParsing<DoubleType>"
+    google_results = [
+        {
+            'cpu_time': 10627.38199641615,
+            'family_index': 0,
+            'items_per_second': 94096551.75067839,
+            'iterations': 9487,
+            'name': 'FloatParsing<DoubleType>',
+            'per_family_instance_index': 0,
+            'real_time': 10628.84905663701,
+            'repetition_index': 0,
+            'repetitions': 3,
+            'run_name': 'FloatParsing<DoubleType>',
+            'run_type': 'iteration',
+            'threads': 1,
+            'time_unit': 'ns'
+        },
+        {
+            'cpu_time': 10633.318014124594,
+            'family_index': 0,
+            'items_per_second': 94044022.63448404,
+            'iterations': 9487,
+            'name': 'FloatParsing<DoubleType>',
+            'per_family_instance_index': 0,
+            'real_time': 10634.858754122948,
+            'repetition_index': 1,
+            'repetitions': 3,
+            'run_name': 'FloatParsing<DoubleType>',
+            'run_type': 'iteration',
+            'threads': 1,
+            'time_unit': 'ns'
+        },
+        {
+            'cpu_time': 10664.315484347,
+            'family_index': 0,
+            'items_per_second': 93770669.24434038,
+            'iterations': 9487,
+            'name': 'FloatParsing<DoubleType>',
+            'per_family_instance_index': 0,
+            'real_time': 10665.584589337563,
+            'repetition_index': 2,
+            'repetitions': 3,
+            'run_name': 'FloatParsing<DoubleType>',
+            'run_type': 'iteration',
+            'threads': 1,
+            'time_unit': 'ns'
+        }
+    ]
+
+    archery_result = {
+        'counters': {
+            'family_index': 0,
+            'iterations': 9487,
+            'per_family_instance_index': 0,
+            'repetition_index': 2,
+            'repetitions': 3,
+            'run_name': 'FloatParsing<DoubleType>',
+            'threads': 1
+        },
+        'less_is_better': False,
+        'name': 'FloatParsing<DoubleType>',
+        'time_unit': 'ns',
+        'times': [10628.84905663701, 10634.858754122948, 10665.584589337563],
+        'unit': 'items_per_second',
+        'values': [93770669.24434038, 94044022.63448404, 94096551.75067839]
+    }
+
+    observations = [GoogleBenchmarkObservation(**g) for g in google_results]
+    benchmark = GoogleBenchmark(name, observations)
+    result = json.dumps(benchmark, cls=JsonEncoder)
+    assert json.loads(result) == archery_result
diff --git a/dev/archery/requirements-test.txt b/dev/archery/requirements-test.txt
new file mode 100644
index 0000000000000..208ec64cdf026
--- /dev/null
+++ b/dev/archery/requirements-test.txt
@@ -0,0 +1,2 @@
+pytest
+responses

From 37a8bf04bc713858a5b247d4424c1e8505e61947 Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Sun, 7 Jan 2024 08:02:08 +0900
Subject: [PATCH 149/570] GH-39049: [C++] Use Cast() instead of CastTo() for
 Dictionary Scalar in test (#39362)

### Rationale for this change

Remove legacy code

### What changes are included in this PR?

Replace the legacy scalar CastTo implementation for Dictionary Scalar in test.

### Are these changes tested?

Yes. It is passed by existing test cases.

### Are there any user-facing changes?

No.

* Closes: #39049

Authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../compute/kernels/scalar_cast_dictionary.cc | 30 +++++++++++++------
 cpp/src/arrow/dataset/partition_test.cc       |  4 +--
 cpp/src/arrow/scalar_test.cc                  |  3 +-
 3 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
index 13c0d599bf993..f13aa26d969c1 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
@@ -36,14 +36,22 @@ Status CastToDictionary(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
   const CastOptions& options = CastState::Get(ctx);
   const auto& out_type = checked_cast<const DictionaryType&>(*out->type());
 
+  std::shared_ptr<ArrayData> in_array = batch[0].array.ToArrayData();
+
   // if out type is same as in type, return input
   if (out_type.Equals(*batch[0].type())) {
     /// XXX: This is the wrong place to do a zero-copy optimization
-    out->value = batch[0].array.ToArrayData();
+    out->value = in_array;
     return Status::OK();
   }
 
-  std::shared_ptr<ArrayData> in_array = batch[0].array.ToArrayData();
+  // If the input type is STRING, it is first encoded as a dictionary to facilitate
+  // processing. This approach allows the subsequent code to uniformly handle STRING
+  // inputs as if they were originally provided in dictionary format. Encoding as a
+  // dictionary helps in reusing the same logic for dictionary operations.
+  if (batch[0].type()->id() == Type::STRING) {
+    in_array = DictionaryEncode(in_array)->array();
+  }
   const auto& in_type = checked_cast<const DictionaryType&>(*in_array->type);
 
   ArrayData* out_array = out->array_data().get();
@@ -77,17 +85,21 @@ Status CastToDictionary(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
   return Status::OK();
 }
 
-std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() {
-  auto func = std::make_shared<CastFunction>("cast_dictionary", Type::DICTIONARY);
-
-  AddCommonCasts(Type::DICTIONARY, kOutputTargetType, func.get());
-  ScalarKernel kernel({InputType(Type::DICTIONARY)}, kOutputTargetType, CastToDictionary);
+template <typename SrcType>
+void AddDictionaryCast(CastFunction* func) {
+  ScalarKernel kernel({InputType(SrcType::type_id)}, kOutputTargetType, CastToDictionary);
   kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
   kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
+}
 
-  DCHECK_OK(func->AddKernel(Type::DICTIONARY, std::move(kernel)));
+std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() {
+  auto cast_dict = std::make_shared<CastFunction>("cast_dictionary", Type::DICTIONARY);
+  AddCommonCasts(Type::DICTIONARY, kOutputTargetType, cast_dict.get());
+  AddDictionaryCast<DictionaryType>(cast_dict.get());
+  AddDictionaryCast<StringType>(cast_dict.get());
 
-  return {func};
+  return {cast_dict};
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/dataset/partition_test.cc b/cpp/src/arrow/dataset/partition_test.cc
index 7ec96929a9355..1b71be15d19f5 100644
--- a/cpp/src/arrow/dataset/partition_test.cc
+++ b/cpp/src/arrow/dataset/partition_test.cc
@@ -316,7 +316,7 @@ TEST_F(TestPartitioning, DirectoryPartitioningFormatDictionary) {
                                                           ArrayVector{dictionary});
   written_schema_ = partitioning_->schema();
 
-  ASSERT_OK_AND_ASSIGN(auto dict_hello, MakeScalar("hello")->CastTo(DictStr("")->type()));
+  ASSERT_OK_AND_ASSIGN(auto dict_hello, Cast(MakeScalar("hello"), DictStr("")->type()));
   AssertFormat(equal(field_ref("alpha"), literal(dict_hello)), "hello");
 }
 
@@ -329,7 +329,7 @@ TEST_F(TestPartitioning, DirectoryPartitioningFormatDictionaryCustomIndex) {
       schema({field("alpha", dict_type)}), ArrayVector{dictionary});
   written_schema_ = partitioning_->schema();
 
-  ASSERT_OK_AND_ASSIGN(auto dict_hello, MakeScalar("hello")->CastTo(dict_type));
+  ASSERT_OK_AND_ASSIGN(auto dict_hello, Cast(MakeScalar("hello"), dict_type));
   AssertFormat(equal(field_ref("alpha"), literal(dict_hello)), "hello");
 }
 
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index e8b8784e7a314..d9fb3feaeea6e 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -1490,7 +1490,8 @@ TEST(TestDictionaryScalar, Cast) {
       auto alpha =
           dict->IsValid(i) ? MakeScalar(dict->GetString(i)) : MakeNullScalar(utf8());
       // Cast string to dict(..., string)
-      ASSERT_OK_AND_ASSIGN(auto cast_alpha, alpha->CastTo(ty));
+      ASSERT_OK_AND_ASSIGN(auto cast_alpha_datum, Cast(alpha, ty));
+      const auto& cast_alpha = cast_alpha_datum.scalar();
       ASSERT_OK(cast_alpha->ValidateFull());
       ASSERT_OK_AND_ASSIGN(
           auto roundtripped_alpha,

From de3130ede1a2db7bbe9e129f21856ee80de0f8cb Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 8 Jan 2024 07:21:00 +0900
Subject: [PATCH 150/570] GH-39488: [Ruby] Add support for ChunkedArray in
 Ractor (#39490)

### Rationale for this change

We can't use `@ cache ||= build_cache` idiom in Ractor because Ractor requires that shared objects are immutable.

### What changes are included in this PR?

Compute caches before making ChunkedArray immutable.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39488

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ruby/red-arrow/lib/arrow/chunked-array.rb |  8 ++++++
 ruby/red-arrow/test/helper/omittable.rb   |  5 ++++
 ruby/red-arrow/test/test-ractor.rb        | 34 +++++++++++++++++++++++
 3 files changed, 47 insertions(+)
 create mode 100644 ruby/red-arrow/test/test-ractor.rb

diff --git a/ruby/red-arrow/lib/arrow/chunked-array.rb b/ruby/red-arrow/lib/arrow/chunked-array.rb
index 7d83becc6deb3..0cdd9b6a6d2f0 100644
--- a/ruby/red-arrow/lib/arrow/chunked-array.rb
+++ b/ruby/red-arrow/lib/arrow/chunked-array.rb
@@ -24,6 +24,14 @@ class ChunkedArray
     include GenericTakeable
     include InputReferable
 
+    def freeze
+      unless frozen?
+        # Ensure caching
+        chunks
+      end
+      super
+    end
+
     def to_arrow
       self
     end
diff --git a/ruby/red-arrow/test/helper/omittable.rb b/ruby/red-arrow/test/helper/omittable.rb
index a1c0334b63a2b..8e1564be47879 100644
--- a/ruby/red-arrow/test/helper/omittable.rb
+++ b/ruby/red-arrow/test/helper/omittable.rb
@@ -17,6 +17,11 @@
 
 module Helper
   module Omittable
+    def require_ruby(major, minor, micro=0)
+      return if (RUBY_VERSION <=> "#{major}.#{minor}.#{micro}") >= 0
+      omit("Require Ruby #{major}.#{minor}.#{micro} or later: #{RUBY_VERSION}")
+    end
+
     def require_gi_bindings(major, minor, micro)
       return if GLib.check_binding_version?(major, minor, micro)
       message =
diff --git a/ruby/red-arrow/test/test-ractor.rb b/ruby/red-arrow/test/test-ractor.rb
new file mode 100644
index 0000000000000..daf674d135257
--- /dev/null
+++ b/ruby/red-arrow/test/test-ractor.rb
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class RactorTest < Test::Unit::TestCase
+  include Helper::Omittable
+
+  ractor
+  test("ChunkedArray") do
+    require_ruby(3, 1, 0)
+    array = Arrow::Array.new([1, 2, 3])
+    chunked_array = Arrow::ChunkedArray.new([array])
+    Ractor.make_shareable(chunked_array)
+    ractor = Ractor.new do
+      recived_chunked_array = Ractor.receive
+      recived_chunked_array.chunks
+    end
+    ractor.send(chunked_array)
+    assert_equal([array], ractor.take)
+  end
+end

From 6d449060debb39f29230113e2f6b160046551bdf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Mon, 8 Jan 2024 13:43:22 +0100
Subject: [PATCH 151/570] GH-39469: [CI][JS] Force node 20 on JS build on arm64
 to fix build issues (#39499)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

The nightly jobs are currently failing when using node v21 on macOS arm64.

### What changes are included in this PR?

Install and use node v20 for macOS arm64 jobs.

### Are these changes tested?

Yes via archery.

### Are there any user-facing changes?
No
* Closes: #39469

Authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/tasks/verify-rc/github.macos.arm64.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dev/tasks/verify-rc/github.macos.arm64.yml b/dev/tasks/verify-rc/github.macos.arm64.yml
index c7777bb072b88..fce10925df2ae 100644
--- a/dev/tasks/verify-rc/github.macos.arm64.yml
+++ b/dev/tasks/verify-rc/github.macos.arm64.yml
@@ -45,7 +45,9 @@ jobs:
         run: |
           brew bundle --file=arrow/cpp/Brewfile
           brew bundle --file=arrow/c_glib/Brewfile
-          export PATH="$(brew --prefix node@18)/bin:$PATH"
+          # Force node v20 due to GH-39469
+          brew install node@20
+          export PATH="$(brew --prefix node@20)/bin:$PATH"
           export PATH="$(brew --prefix ruby)/bin:$PATH"
           export PKG_CONFIG_PATH="$(brew --prefix ruby)/lib/pkgconfig"
           arrow/dev/release/verify-release-candidate.sh \

From 6b93c4a0e8cb5110c6c4d3746f4e8bb0a8b76ec8 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 8 Jan 2024 14:21:10 +0100
Subject: [PATCH 152/570] GH-38341: [Python] Remove usage of pandas internals
 DatetimeTZBlock (#38321)

### Rationale for this change

This usage probably stems from a long time ago that it was required to specify the Block type, but nowadays it's good enough to just specify the dtype, and thus cutting down on our usage of internal pandas objects.

Part of #35081

* Closes: #38341

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/pandas_compat.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 80e313be02dcc..3757d81a47815 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -717,9 +717,15 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
     elif 'timezone' in item:
         unit, _ = np.datetime_data(block_arr.dtype)
         dtype = make_datetimetz(unit, item['timezone'])
-        block = _int.make_block(block_arr, placement=placement,
-                                klass=_int.DatetimeTZBlock,
-                                dtype=dtype)
+        if _pandas_api.is_ge_v21():
+            pd_arr = _pandas_api.pd.array(
+                block_arr.view("int64"), dtype=dtype, copy=False
+            )
+            block = _int.make_block(pd_arr, placement=placement)
+        else:
+            block = _int.make_block(block_arr, placement=placement,
+                                    klass=_int.DatetimeTZBlock,
+                                    dtype=dtype)
     elif 'py_array' in item:
         # create ExtensionBlock
         arr = item['py_array']

From a288364d971ab9a6a3f05a903a5df83ebeddf0a0 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Mon, 8 Jan 2024 14:26:13 +0100
Subject: [PATCH 153/570] GH-30117:  [C++][Python] Add "Z" to the end of
 timestamp print string when tz defined (#39272)

### What changes are included in this PR?

This PR updates the PrettyPrint for Timestamp type so that "Z" is printed at the end of the output string if the timezone has been defined. This way we add minimum information about the values being stored in UTC.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

There is a change in how `TimestampArray` prints out the data. With this change "Z" would be added to the end of the string if the timezone is defined.
* Closes: #30117

Lead-authored-by: AlenkaF <frim.alenka@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Co-authored-by: Rok Mihevc <rok@mihevc.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 cpp/src/arrow/pretty_print_test.cc         |  6 ++---
 cpp/src/arrow/util/formatting.h            |  7 +++++-
 cpp/src/arrow/util/formatting_util_test.cc | 28 ++++++++++++++++++++++
 python/pyarrow/tests/test_types.py         | 11 +++++++++
 4 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/pretty_print_test.cc b/cpp/src/arrow/pretty_print_test.cc
index 0db6ae4867299..5d2256e8c5d44 100644
--- a/cpp/src/arrow/pretty_print_test.cc
+++ b/cpp/src/arrow/pretty_print_test.cc
@@ -350,10 +350,10 @@ TEST_F(TestPrettyPrint, DateTimeTypes) {
     std::vector<int64_t> values = {
         0, 1, 2, 678 + 1000000 * (5 + 60 * (4 + 60 * (3 + 24 * int64_t(1)))), 4};
     static const char* expected = R"expected([
-  1970-01-01 00:00:00.000000,
-  1970-01-01 00:00:00.000001,
+  1970-01-01 00:00:00.000000Z,
+  1970-01-01 00:00:00.000001Z,
   null,
-  1970-01-02 03:04:05.000678,
+  1970-01-02 03:04:05.000678Z,
   null
 ])expected";
     CheckPrimitive<TimestampType, int64_t>(timestamp(TimeUnit::MICRO, "Transylvania"),
diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h
index 9dcc6463fb778..71bae74629e35 100644
--- a/cpp/src/arrow/util/formatting.h
+++ b/cpp/src/arrow/util/formatting.h
@@ -470,7 +470,8 @@ class StringFormatter<TimestampType> {
   using value_type = int64_t;
 
   explicit StringFormatter(const DataType* type)
-      : unit_(checked_cast<const TimestampType&>(*type).unit()) {}
+      : unit_(checked_cast<const TimestampType&>(*type).unit()),
+        timezone_(checked_cast<const TimestampType&>(*type).timezone()) {}
 
   template <typename Duration, typename Appender>
   Return<Appender> operator()(Duration, value_type value, Appender&& append) {
@@ -503,6 +504,9 @@ class StringFormatter<TimestampType> {
     std::array<char, buffer_size> buffer;
     char* cursor = buffer.data() + buffer_size;
 
+    if (timezone_.size() > 0) {
+      detail::FormatOneChar('Z', &cursor);
+    }
     detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
     detail::FormatOneChar(' ', &cursor);
     detail::FormatYYYY_MM_DD(timepoint_days, &cursor);
@@ -516,6 +520,7 @@ class StringFormatter<TimestampType> {
 
  private:
   TimeUnit::type unit_;
+  std::string timezone_;
 };
 
 template <typename T>
diff --git a/cpp/src/arrow/util/formatting_util_test.cc b/cpp/src/arrow/util/formatting_util_test.cc
index 9afbc91063a5a..13f57a495d639 100644
--- a/cpp/src/arrow/util/formatting_util_test.cc
+++ b/cpp/src/arrow/util/formatting_util_test.cc
@@ -522,6 +522,34 @@ TEST(Formatting, Timestamp) {
     AssertFormatting(formatter, -2203932304LL * 1000000000LL + 8,
                      "1900-02-28 12:34:56.000000008");
   }
+
+  {
+    auto timestamp_types = {timestamp(TimeUnit::SECOND, "US/Eastern"),
+                            timestamp(TimeUnit::SECOND, "+01:00")};
+    for (auto ty : timestamp_types) {
+      StringFormatter<TimestampType> formatter(ty.get());
+
+      AssertFormatting(formatter, 0, "1970-01-01 00:00:00Z");
+    }
+  }
+
+  {
+    auto ty = timestamp(TimeUnit::MILLI, "Pacific/Maruesas");
+    StringFormatter<TimestampType> formatter(ty.get());
+    AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000Z");
+  }
+
+  {
+    auto ty = timestamp(TimeUnit::MICRO, "-42:00");
+    StringFormatter<TimestampType> formatter(ty.get());
+    AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000000Z");
+  }
+
+  {
+    auto ty = timestamp(TimeUnit::NANO, "Mars/Mariner_Valley");
+    StringFormatter<TimestampType> formatter(ty.get());
+    AssertFormatting(formatter, 0, "1970-01-01 00:00:00.000000000Z");
+  }
 }
 
 TEST(Formatting, Interval) {
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index 7600f1dd33226..c8a52c6b626c2 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -487,6 +487,17 @@ def test_timestamp():
             pa.timestamp(invalid_unit)
 
 
+def test_timestamp_print():
+    for unit in ('s', 'ms', 'us', 'ns'):
+        for tz in ('UTC', 'Europe/Paris', 'Pacific/Marquesas',
+                   'Mars/Mariner_Valley', '-00:42', '+42:00'):
+            ty = pa.timestamp(unit, tz=tz)
+            arr = pa.array([0], ty)
+            assert "Z" in str(arr)
+        arr = pa.array([0], pa.timestamp(unit))
+        assert "Z" not in str(arr)
+
+
 def test_time32_units():
     for valid_unit in ('s', 'ms'):
         ty = pa.time32(valid_unit)

From 6ce3c3f8840cdd5294f22a6e662b6d2c0ff0a077 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 8 Jan 2024 15:29:04 +0100
Subject: [PATCH 154/570] GH-39500: [Docs] Pin pydata-sphinx-theme to 0.14
 (#39501)

### Rationale for this change

The latest pydata-sphinx-theme release 0.15 of a few days ago had some breakages. So let's pin to 0.14.x until 0.15 has stabilized.

* Closes: #39500

Lead-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/conda_env_sphinx.txt | 2 +-
 docs/requirements.txt   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt
index af1bfe9b780f4..0e50875fc1ef8 100644
--- a/ci/conda_env_sphinx.txt
+++ b/ci/conda_env_sphinx.txt
@@ -20,7 +20,7 @@ breathe
 doxygen
 ipython
 numpydoc
-pydata-sphinx-theme
+pydata-sphinx-theme=0.14
 sphinx-autobuild
 sphinx-design
 sphinx-copybutton
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 37a50d51dd54c..da2327a6df5fc 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,7 +5,7 @@
 breathe
 ipython
 numpydoc
-pydata-sphinx-theme
+pydata-sphinx-theme==0.14
 sphinx-autobuild
 sphinx-design
 sphinx-copybutton

From a71c941ce10ea90d52a1ca9b8b8b88cf7b7a627f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Diego=20Fern=C3=A1ndez=20Giraldo?=
 <aiguo.fernandez@gmail.com>
Date: Mon, 8 Jan 2024 07:51:29 -0700
Subject: [PATCH 155/570] GH-39355: [Java] Improve JdbcConsumer exceptions
 (#39356)

### Rationale for this change

This helps debug Arrow conversion errors from JDBC by exposing the JdbcFieldInfo for the related column and the ArrowType for the corresponding vector.

### What changes are included in this PR?

A new JdbcConsumerException which is thrown by the CompositeJdbcConsumer while consuming data for a specific vector.

### Are these changes tested?

N/A

### Are there any user-facing changes?

Users can now catch `JdbcConsumerException`s and extract the related ArrowType and JdbcFieldInfo from it for debugging.

* Closes: #39355

Authored-by: Diego Fernandez <aiguo.fernandez@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../adapter/jdbc/ArrowVectorIterator.java     | 14 ++++-
 .../arrow/adapter/jdbc/JdbcToArrowUtils.java  |  2 +
 .../jdbc/consumer/CompositeJdbcConsumer.java  | 17 +++++-
 .../exceptions/JdbcConsumerException.java     | 52 +++++++++++++++++++
 4 files changed, 82 insertions(+), 3 deletions(-)
 create mode 100644 java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java

diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
index 632c7c474b4a9..427c766982f30 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java
@@ -26,6 +26,7 @@
 
 import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer;
 import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException;
 import org.apache.arrow.util.AutoCloseables;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.FieldVector;
@@ -114,7 +115,11 @@ private void consumeData(VectorSchemaRoot root) {
       root.setRowCount(readRowCount);
     } catch (Throwable e) {
       compositeConsumer.close();
-      throw new RuntimeException("Error occurred while consuming data.", e);
+      if (e instanceof JdbcConsumerException) {
+        throw (JdbcConsumerException) e;
+      } else {
+        throw new RuntimeException("Error occurred while consuming data.", e);
+      }
     }
   }
 
@@ -168,6 +173,7 @@ public boolean hasNext() {
    * Gets the next vector.
    * If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false,
    * the client is responsible for freeing its resources.
+   * @throws JdbcConsumerException on error from VectorConsumer
    */
   @Override
   public VectorSchemaRoot next() {
@@ -178,7 +184,11 @@ public VectorSchemaRoot next() {
       return ret;
     } catch (Exception e) {
       close();
-      throw new RuntimeException("Error occurred while getting next schema root.", e);
+      if (e instanceof JdbcConsumerException) {
+        throw (JdbcConsumerException) e;
+      } else {
+        throw new RuntimeException("Error occurred while getting next schema root.", e);
+      }
     }
   }
 
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index b66e133785f42..6949469bfcd7d 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -58,6 +58,7 @@
 import org.apache.arrow.adapter.jdbc.consumer.TimestampTZConsumer;
 import org.apache.arrow.adapter.jdbc.consumer.TinyIntConsumer;
 import org.apache.arrow.adapter.jdbc.consumer.VarCharConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.BigIntVector;
@@ -386,6 +387,7 @@ static boolean isColumnNullable(ResultSetMetaData resultSetMetadata, int index,
    * @param root   Arrow {@link VectorSchemaRoot} object to populate
    * @param config The configuration to use when reading the data.
    * @throws SQLException on error
+   * @throws JdbcConsumerException on error from VectorConsumer
    */
   public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config)
       throws SQLException, IOException {
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java
index 99cca71b18e8a..e6d780956d538 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java
@@ -21,9 +21,12 @@
 import java.sql.ResultSet;
 import java.sql.SQLException;
 
+import org.apache.arrow.adapter.jdbc.JdbcFieldInfo;
+import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException;
 import org.apache.arrow.util.AutoCloseables;
 import org.apache.arrow.vector.ValueVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.ArrowType;
 
 /**
  * Composite consumer which hold all consumers.
@@ -43,7 +46,18 @@ public CompositeJdbcConsumer(JdbcConsumer[] consumers) {
   @Override
   public void consume(ResultSet rs) throws SQLException, IOException {
     for (int i = 0; i < consumers.length; i++) {
-      consumers[i].consume(rs);
+      try {
+        consumers[i].consume(rs);
+      } catch (Exception e) {
+        if (consumers[i] instanceof BaseConsumer) {
+          BaseConsumer consumer = (BaseConsumer) consumers[i];
+          JdbcFieldInfo fieldInfo = new JdbcFieldInfo(rs.getMetaData(), consumer.columnIndexInResultSet);
+          ArrowType arrowType = consumer.vector.getMinorType().getType();
+          throw new JdbcConsumerException("Exception while consuming JDBC value", e, fieldInfo, arrowType);
+        } else {
+          throw e;
+        }
+      }
     }
   }
 
@@ -74,3 +88,4 @@ public void resetVectorSchemaRoot(VectorSchemaRoot root) {
     }
   }
 }
+
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java
new file mode 100644
index 0000000000000..b235be173cf10
--- /dev/null
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer.exceptions;
+
+import org.apache.arrow.adapter.jdbc.JdbcFieldInfo;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+
+/**
+ * Exception while consuming JDBC data. This exception stores the JdbcFieldInfo for the column and the
+ * ArrowType for the corresponding vector for easier debugging.
+ */
+public class JdbcConsumerException extends RuntimeException {
+  final JdbcFieldInfo fieldInfo;
+  final ArrowType arrowType;
+
+  /**
+   * Construct JdbcConsumerException with all fields.
+   *
+   * @param message   error message
+   * @param cause     original exception
+   * @param fieldInfo JdbcFieldInfo for the column
+   * @param arrowType ArrowType for the corresponding vector
+   */
+  public JdbcConsumerException(String message, Throwable cause, JdbcFieldInfo fieldInfo, ArrowType arrowType) {
+    super(message, cause);
+    this.fieldInfo = fieldInfo;
+    this.arrowType = arrowType;
+  }
+
+  public ArrowType getArrowType() {
+    return this.arrowType;
+  }
+
+  public JdbcFieldInfo getFieldInfo() {
+    return this.fieldInfo;
+  }
+}

From ffcfabdb956d72707557a1fcf113c6b7cb118f50 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 8 Jan 2024 16:06:59 +0100
Subject: [PATCH 156/570] GH-39064: [C++][Parquet] Support row group filtering
 for nested paths for struct fields (#39065)

### Rationale for this change

Currently when filtering with a nested field reference, we were taking the corresponding parquet SchemaField for just the first index of the nested path, i.e. the parent node in the Parquet schema. But logically, filtering on statistics only works for a primitive leaf node.

This PR changes that logic to iterate over all indices of the FieldPath, if nested, to ensure we use the actual corresponding child leaf node of the ParquetSchema to get the statistics from.

### Are there any user-facing changes?

No, only improving performance by doing the filtering at the row group stage, instead of afterwards on the read data

* Closes: #39064

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 cpp/src/arrow/dataset/file_parquet.cc      | 39 ++++++++++++++++------
 cpp/src/arrow/dataset/file_parquet.h       |  8 +++++
 cpp/src/arrow/dataset/file_parquet_test.cc |  6 ++++
 python/pyarrow/tests/test_dataset.py       | 36 ++++++++++++++++++++
 4 files changed, 79 insertions(+), 10 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 1c2fd2dea6307..0ce08502921f3 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -161,7 +161,8 @@ bool IsNan(const Scalar& value) {
 }
 
 std::optional<compute::Expression> ColumnChunkStatisticsAsExpression(
-    const SchemaField& schema_field, const parquet::RowGroupMetaData& metadata) {
+    const FieldRef& field_ref, const SchemaField& schema_field,
+    const parquet::RowGroupMetaData& metadata) {
   // For the remaining of this function, failure to extract/parse statistics
   // are ignored by returning nullptr. The goal is two fold. First
   // avoid an optimization which breaks the computation. Second, allow the
@@ -180,7 +181,8 @@ std::optional<compute::Expression> ColumnChunkStatisticsAsExpression(
     return std::nullopt;
   }
 
-  return ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *statistics);
+  return ParquetFileFragment::EvaluateStatisticsAsExpression(*field, field_ref,
+                                                             *statistics);
 }
 
 void AddColumnIndices(const SchemaField& schema_field,
@@ -360,8 +362,9 @@ Result<bool> IsSupportedParquetFile(const ParquetFileFormat& format,
 }  // namespace
 
 std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpression(
-    const Field& field, const parquet::Statistics& statistics) {
-  auto field_expr = compute::field_ref(field.name());
+    const Field& field, const FieldRef& field_ref,
+    const parquet::Statistics& statistics) {
+  auto field_expr = compute::field_ref(field_ref);
 
   // Optimize for corner case where all values are nulls
   if (statistics.num_values() == 0 && statistics.null_count() > 0) {
@@ -418,6 +421,13 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
   return std::nullopt;
 }
 
+std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpression(
+    const Field& field, const parquet::Statistics& statistics) {
+  const auto field_name = field.name();
+  return EvaluateStatisticsAsExpression(field, FieldRef(std::move(field_name)),
+                                        statistics);
+}
+
 ParquetFileFormat::ParquetFileFormat()
     : FileFormat(std::make_shared<ParquetFragmentScanOptions>()) {}
 
@@ -810,7 +820,7 @@ Status ParquetFileFragment::SetMetadata(
   manifest_ = std::move(manifest);
 
   statistics_expressions_.resize(row_groups_->size(), compute::literal(true));
-  statistics_expressions_complete_.resize(physical_schema_->num_fields(), false);
+  statistics_expressions_complete_.resize(manifest_->descr->num_columns(), false);
 
   for (int row_group : *row_groups_) {
     // Ensure RowGroups are indexing valid RowGroups before augmenting.
@@ -900,16 +910,25 @@ Result<std::vector<compute::Expression>> ParquetFileFragment::TestRowGroups(
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOneOrNone(*physical_schema_));
 
     if (match.empty()) continue;
-    if (statistics_expressions_complete_[match[0]]) continue;
-    statistics_expressions_complete_[match[0]] = true;
+    const SchemaField* schema_field = &manifest_->schema_fields[match[0]];
+
+    for (size_t i = 1; i < match.indices().size(); ++i) {
+      if (schema_field->field->type()->id() != Type::STRUCT) {
+        return Status::Invalid("nested paths only supported for structs");
+      }
+      schema_field = &schema_field->children[match[i]];
+    }
+
+    if (!schema_field->is_leaf()) continue;
+    if (statistics_expressions_complete_[schema_field->column_index]) continue;
+    statistics_expressions_complete_[schema_field->column_index] = true;
 
-    const SchemaField& schema_field = manifest_->schema_fields[match[0]];
     int i = 0;
     for (int row_group : *row_groups_) {
       auto row_group_metadata = metadata_->RowGroup(row_group);
 
-      if (auto minmax =
-              ColumnChunkStatisticsAsExpression(schema_field, *row_group_metadata)) {
+      if (auto minmax = ColumnChunkStatisticsAsExpression(ref, *schema_field,
+                                                          *row_group_metadata)) {
         FoldingAnd(&statistics_expressions_[i], std::move(*minmax));
         ARROW_ASSIGN_OR_RAISE(statistics_expressions_[i],
                               statistics_expressions_[i].Bind(*physical_schema_));
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index f527ce5d70ae0..1e81a34fb3cf0 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -177,6 +177,10 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
   static std::optional<compute::Expression> EvaluateStatisticsAsExpression(
       const Field& field, const parquet::Statistics& statistics);
 
+  static std::optional<compute::Expression> EvaluateStatisticsAsExpression(
+      const Field& field, const FieldRef& field_ref,
+      const parquet::Statistics& statistics);
+
  private:
   ParquetFileFragment(FileSource source, std::shared_ptr<FileFormat> format,
                       compute::Expression partition_expression,
@@ -207,7 +211,11 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
   /// or std::nullopt if all row groups are selected.
   std::optional<std::vector<int>> row_groups_;
 
+  // the expressions (combined for all columns for which statistics have been
+  // processed) are stored per column group
   std::vector<compute::Expression> statistics_expressions_;
+  // statistics status are kept track of by Parquet Schema column indices
+  // (i.e. not Arrow schema field index)
   std::vector<bool> statistics_expressions_complete_;
   std::shared_ptr<parquet::FileMetaData> metadata_;
   std::shared_ptr<parquet::arrow::SchemaManifest> manifest_;
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index 84d4342a25e20..76cd0af3b835f 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -655,6 +655,12 @@ TEST_P(TestParquetFileFormatScan, PredicatePushdownRowGroupFragments) {
   CountRowGroupsInFragment(fragment, {5, 6},
                            and_(greater_equal(field_ref("i64"), literal(6)),
                                 less(field_ref("i64"), literal(8))));
+
+  // nested field reference
+  CountRowGroupsInFragment(fragment, {0, 1, 2, 3, 4},
+                           less(field_ref(FieldRef("struct", "i32")), literal(6)));
+  CountRowGroupsInFragment(fragment, {1},
+                           equal(field_ref(FieldRef("struct", "str")), literal("2")));
 }
 
 TEST_P(TestParquetFileFormatScan, ExplicitRowGroupSelection) {
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index e2bb4400c8bde..ae2146c0bdaee 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -1648,6 +1648,42 @@ def test_fragments_parquet_subset_invalid(tempdir):
         fragment.subset()
 
 
+@pytest.mark.parquet
+def test_fragments_parquet_subset_with_nested_fields(tempdir):
+    # ensure row group filtering with nested field works
+    f1 = pa.array([0, 1, 2, 3])
+    f21 = pa.array([0.1, 0.2, 0.3, 0.4])
+    f22 = pa.array([1, 2, 3, 4])
+    f2 = pa.StructArray.from_arrays([f21, f22], names=["f21", "f22"])
+    struct_col = pa.StructArray.from_arrays([f1, f2], names=["f1", "f2"])
+    table = pa.table({"col": struct_col})
+    pq.write_table(table, tempdir / "data_struct.parquet", row_group_size=2)
+
+    dataset = ds.dataset(tempdir / "data_struct.parquet", format="parquet")
+    fragment = list(dataset.get_fragments())[0]
+    assert fragment.num_row_groups == 2
+
+    subfrag = fragment.subset(ds.field("col", "f1") > 2)
+    assert subfrag.num_row_groups == 1
+    subfrag = fragment.subset(ds.field("col", "f1") > 5)
+    assert subfrag.num_row_groups == 0
+
+    subfrag = fragment.subset(ds.field("col", "f2", "f21") > 0)
+    assert subfrag.num_row_groups == 2
+    subfrag = fragment.subset(ds.field("col", "f2", "f22") <= 2)
+    assert subfrag.num_row_groups == 1
+
+    # nonexisting field ref
+    with pytest.raises(pa.ArrowInvalid, match="No match for FieldRef.Nested"):
+        fragment.subset(ds.field("col", "f3") > 0)
+
+    # comparison with struct field is not implemented
+    with pytest.raises(
+        NotImplementedError, match="Function 'greater' has no kernel matching"
+    ):
+        fragment.subset(ds.field("col", "f2") > 0)
+
+
 @pytest.mark.pandas
 @pytest.mark.parquet
 def test_fragments_repr(tempdir, dataset):

From dc40e5fba1c9ace6da3de14158bb6195bed6fc58 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 8 Jan 2024 16:49:14 +0100
Subject: [PATCH 157/570] GH-39217: [Python] RecordBatchReader.from_stream
 constructor for objects implementing the Arrow PyCapsule protocol (#39218)

### Rationale for this change

In contrast to Array, RecordBatch and Schema, for the C Stream (mapping to RecordBatchReader) we haven't an equivalent factory function that can accept any Arrow-compatible object and turn it into a pyarrow object through the PyCapsule Protocol.

For that reason, this proposes an explicit constructor class method for this: `RecordBatchReader.from_stream` (this is a quite generic name, so other name suggestions are certainly welcome).

### Are these changes tested?
TODO

* Closes: #39217

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/ipc.pxi             | 43 +++++++++++++++++++++++++++++
 python/pyarrow/tests/test_array.py |  4 +--
 python/pyarrow/tests/test_ipc.py   | 44 ++++++++++++++++++++++++++++++
 python/pyarrow/tests/test_table.py | 12 ++++----
 4 files changed, 95 insertions(+), 8 deletions(-)

diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index ae52f5cf34e8b..da9636dfc86e1 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -883,6 +883,49 @@ cdef class RecordBatchReader(_Weakrefable):
         self.reader = c_reader
         return self
 
+    @staticmethod
+    def from_stream(data, schema=None):
+        """
+        Create RecordBatchReader from a Arrow-compatible stream object.
+
+        This accepts objects implementing the Arrow PyCapsule Protocol for
+        streams, i.e. objects that have a ``__arrow_c_stream__`` method.
+
+        Parameters
+        ----------
+        data : Arrow-compatible stream object
+            Any object that implements the Arrow PyCapsule Protocol for
+            streams.
+        schema : Schema, default None
+            The schema to which the stream should be casted, if supported
+            by the stream object.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+
+        if not hasattr(data, "__arrow_c_stream__"):
+            raise TypeError(
+                "Expected an object implementing the Arrow PyCapsule Protocol for "
+                "streams (i.e. having a `__arrow_c_stream__` method), "
+                f"got {type(data)!r}."
+            )
+
+        if schema is not None:
+            if not hasattr(schema, "__arrow_c_schema__"):
+                raise TypeError(
+                    "Expected an object implementing the Arrow PyCapsule Protocol for "
+                    "schema (i.e. having a `__arrow_c_schema__` method), "
+                    f"got {type(schema)!r}."
+                )
+            requested = schema.__arrow_c_schema__()
+        else:
+            requested = None
+
+        capsule = data.__arrow_c_stream__(requested)
+        return RecordBatchReader._import_from_c_capsule(capsule)
+
     @staticmethod
     def from_batches(Schema schema not None, batches):
         """
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index d598630dc2103..3dcbf399f3aca 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -3351,8 +3351,8 @@ class ArrayWrapper:
         def __init__(self, data):
             self.data = data
 
-        def __arrow_c_array__(self, requested_type=None):
-            return self.data.__arrow_c_array__(requested_type)
+        def __arrow_c_array__(self, requested_schema=None):
+            return self.data.__arrow_c_array__(requested_schema)
 
     # Can roundtrip through the C array protocol
     arr = ArrayWrapper(pa.array([1, 2, 3], type=pa.int64()))
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 450d26e3b771c..f75ec8158a9da 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -1194,3 +1194,47 @@ def make_batches():
     with pytest.raises(TypeError):
         reader = pa.RecordBatchReader.from_batches(None, batches)
         pass
+
+
+def test_record_batch_reader_from_arrow_stream():
+
+    class StreamWrapper:
+        def __init__(self, batches):
+            self.batches = batches
+
+        def __arrow_c_stream__(self, requested_schema=None):
+            reader = pa.RecordBatchReader.from_batches(
+                self.batches[0].schema, self.batches)
+            return reader.__arrow_c_stream__(requested_schema)
+
+    data = [
+        pa.record_batch([pa.array([1, 2, 3], type=pa.int64())], names=['a']),
+        pa.record_batch([pa.array([4, 5, 6], type=pa.int64())], names=['a'])
+    ]
+    wrapper = StreamWrapper(data)
+
+    # Can roundtrip a pyarrow stream-like object
+    expected = pa.Table.from_batches(data)
+    reader = pa.RecordBatchReader.from_stream(expected)
+    assert reader.read_all() == expected
+
+    # Can roundtrip through the wrapper.
+    reader = pa.RecordBatchReader.from_stream(wrapper)
+    assert reader.read_all() == expected
+
+    # Passing schema works if already that schema
+    reader = pa.RecordBatchReader.from_stream(wrapper, schema=data[0].schema)
+    assert reader.read_all() == expected
+
+    # If schema doesn't match, raises NotImplementedError
+    with pytest.raises(NotImplementedError):
+        pa.RecordBatchReader.from_stream(
+            wrapper, schema=pa.schema([pa.field('a', pa.int32())])
+        )
+
+    # Proper type errors for wrong input
+    with pytest.raises(TypeError):
+        pa.RecordBatchReader.from_stream(data[0]['a'])
+
+    with pytest.raises(TypeError):
+        pa.RecordBatchReader.from_stream(expected, schema=data[0])
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index a678f521e38d5..ee036f136c77b 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -558,8 +558,8 @@ class BatchWrapper:
         def __init__(self, batch):
             self.batch = batch
 
-        def __arrow_c_array__(self, requested_type=None):
-            return self.batch.__arrow_c_array__(requested_type)
+        def __arrow_c_array__(self, requested_schema=None):
+            return self.batch.__arrow_c_array__(requested_schema)
 
     data = pa.record_batch([
         pa.array([1, 2, 3], type=pa.int64())
@@ -586,8 +586,8 @@ class BatchWrapper:
         def __init__(self, batch):
             self.batch = batch
 
-        def __arrow_c_array__(self, requested_type=None):
-            return self.batch.__arrow_c_array__(requested_type)
+        def __arrow_c_array__(self, requested_schema=None):
+            return self.batch.__arrow_c_array__(requested_schema)
 
     data = pa.record_batch([
         pa.array([1, 2, 3], type=pa.int64())
@@ -615,10 +615,10 @@ class StreamWrapper:
         def __init__(self, batches):
             self.batches = batches
 
-        def __arrow_c_stream__(self, requested_type=None):
+        def __arrow_c_stream__(self, requested_schema=None):
             reader = pa.RecordBatchReader.from_batches(
                 self.batches[0].schema, self.batches)
-            return reader.__arrow_c_stream__(requested_type)
+            return reader.__arrow_c_stream__(requested_schema)
 
     data = [
         pa.record_batch([pa.array([1, 2, 3], type=pa.int64())], names=['a']),

From 60b89ff0c94d695b05997f6802efdc148bcbd3d7 Mon Sep 17 00:00:00 2001
From: Judah Rand <17158624+judahrand@users.noreply.github.com>
Date: Mon, 8 Jan 2024 16:10:31 +0000
Subject: [PATCH 158/570] GH-33500: [Python] add `Table.to/from_struct_array`
 (#38520)

### Rationale for this change

### What changes are included in this PR?

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #33500

Lead-authored-by: Judah Rand <17158624+judahrand@users.noreply.github.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/table.pxi           | 54 +++++++++++++++++++++
 python/pyarrow/tests/test_table.py | 75 ++++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+)

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 2f8d1abd1f085..d98c93e1c049b 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -3991,6 +3991,60 @@ cdef class Table(_Tabular):
         result.validate()
         return result
 
+    @staticmethod
+    def from_struct_array(struct_array):
+        """
+        Construct a Table from a StructArray.
+
+        Each field in the StructArray will become a column in the resulting
+        ``Table``.
+
+        Parameters
+        ----------
+        struct_array : StructArray or ChunkedArray
+            Array to construct the table from.
+
+        Returns
+        -------
+        pyarrow.Table
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct = pa.array([{'n_legs': 2, 'animals': 'Parrot'},
+        ...                    {'year': 2022, 'n_legs': 4}])
+        >>> pa.Table.from_struct_array(struct).to_pandas()
+          animals  n_legs    year
+        0  Parrot       2     NaN
+        1    None       4  2022.0
+        """
+        if isinstance(struct_array, Array):
+            return Table.from_batches([RecordBatch.from_struct_array(struct_array)])
+        else:
+            return Table.from_batches([
+                RecordBatch.from_struct_array(chunk)
+                for chunk in struct_array.chunks
+            ])
+
+    def to_struct_array(self, max_chunksize=None):
+        """
+        Convert to a chunked array of struct type.
+
+        Parameters
+        ----------
+        max_chunksize : int, default None
+            Maximum size for ChunkedArray chunks. Individual chunks may be
+            smaller depending on the chunk layout of individual columns.
+
+        Returns
+        -------
+        ChunkedArray
+        """
+        return chunked_array([
+            batch.to_struct_array()
+            for batch in self.to_batches(max_chunksize=max_chunksize)
+        ])
+
     @staticmethod
     def from_batches(batches, Schema schema=None):
         """
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index ee036f136c77b..ff38c614c251f 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -878,6 +878,81 @@ def test_recordbatch_from_struct_array():
     ))
 
 
+def test_recordbatch_to_struct_array():
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, None], type=pa.int32()),
+            pa.array([None, 1.0], type=pa.float32()),
+        ], ["ints", "floats"]
+    )
+    result = batch.to_struct_array()
+    assert result.equals(pa.array(
+        [{"ints": 1}, {"floats": 1.0}],
+        type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
+    ))
+
+
+def test_table_from_struct_array_invalid():
+    with pytest.raises(TypeError, match="Argument 'struct_array' has incorrect type"):
+        pa.Table.from_struct_array(pa.array(range(5)))
+
+
+def test_table_from_struct_array():
+    struct_array = pa.array(
+        [{"ints": 1}, {"floats": 1.0}],
+        type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
+    )
+    result = pa.Table.from_struct_array(struct_array)
+    assert result.equals(pa.Table.from_arrays(
+        [
+            pa.array([1, None], type=pa.int32()),
+            pa.array([None, 1.0], type=pa.float32()),
+        ], ["ints", "floats"]
+    ))
+
+
+def test_table_from_struct_array_chunked_array():
+    chunked_struct_array = pa.chunked_array(
+        [[{"ints": 1}, {"floats": 1.0}]],
+        type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
+    )
+    result = pa.Table.from_struct_array(chunked_struct_array)
+    assert result.equals(pa.Table.from_arrays(
+        [
+            pa.array([1, None], type=pa.int32()),
+            pa.array([None, 1.0], type=pa.float32()),
+        ], ["ints", "floats"]
+    ))
+
+
+def test_table_to_struct_array():
+    table = pa.Table.from_arrays(
+        [
+            pa.array([1, None], type=pa.int32()),
+            pa.array([None, 1.0], type=pa.float32()),
+        ], ["ints", "floats"]
+    )
+    result = table.to_struct_array()
+    assert result.equals(pa.chunked_array(
+        [[{"ints": 1}, {"floats": 1.0}]],
+        type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
+    ))
+
+
+def test_table_to_struct_array_with_max_chunksize():
+    table = pa.Table.from_arrays(
+        [
+            pa.array([1, None], type=pa.int32()),
+            pa.array([None, 1.0], type=pa.float32()),
+        ], ["ints", "floats"]
+    )
+    result = table.to_struct_array(max_chunksize=1)
+    assert result.equals(pa.chunked_array(
+        [[{"ints": 1}], [{"floats": 1.0}]],
+        type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
+    ))
+
+
 def _table_like_slice_tests(factory):
     data = [
         pa.array(range(5)),

From 1622a2e945d35fbb6848d0053b3ae30045d3fbda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Diego=20Fern=C3=A1ndez=20Giraldo?=
 <aiguo.fernandez@gmail.com>
Date: Mon, 8 Jan 2024 09:19:05 -0700
Subject: [PATCH 159/570] GH-39484: [Java] Support 256 bit decimals in
 JdbcToArrowUtils (#39485)

### Rationale for this change

This PR allows users of `JdbcToArrowUtils` to convert 256 bit decimals.

### What changes are included in this PR?

Add a `Decimal256Consumer` and logic to

### Are these changes tested?

No, at this point there are no good tests for JDBC consumers.

### Are there any user-facing changes?

Converting 256 bit decimals and ints bigger than 64 bit should now work

* Closes: #39484

Authored-by: Diego Fernandez <aiguo.fernandez@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/adapter/jdbc/JdbcToArrowUtils.java  |  15 +-
 .../jdbc/consumer/Decimal256Consumer.java     | 128 ++++++++++++++++++
 2 files changed, 141 insertions(+), 2 deletions(-)
 create mode 100644 java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java

diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index 6949469bfcd7d..eaee49936079f 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -45,6 +45,7 @@
 import org.apache.arrow.adapter.jdbc.consumer.BitConsumer;
 import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer;
 import org.apache.arrow.adapter.jdbc.consumer.DateConsumer;
+import org.apache.arrow.adapter.jdbc.consumer.Decimal256Consumer;
 import org.apache.arrow.adapter.jdbc.consumer.DecimalConsumer;
 import org.apache.arrow.adapter.jdbc.consumer.DoubleConsumer;
 import org.apache.arrow.adapter.jdbc.consumer.FloatConsumer;
@@ -64,6 +65,7 @@
 import org.apache.arrow.vector.BigIntVector;
 import org.apache.arrow.vector.BitVector;
 import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.Decimal256Vector;
 import org.apache.arrow.vector.DecimalVector;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.Float4Vector;
@@ -170,7 +172,11 @@ public static ArrowType getArrowTypeFromJdbcType(final JdbcFieldInfo fieldInfo,
       case Types.DECIMAL:
         int precision = fieldInfo.getPrecision();
         int scale = fieldInfo.getScale();
-        return new ArrowType.Decimal(precision, scale, 128);
+        if (precision > 38) {
+          return new ArrowType.Decimal(precision, scale, 256);
+        } else {
+          return new ArrowType.Decimal(precision, scale, 128);
+        }
       case Types.REAL:
       case Types.FLOAT:
         return new ArrowType.FloatingPoint(SINGLE);
@@ -465,7 +471,12 @@ public static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boo
         }
       case Decimal:
         final RoundingMode bigDecimalRoundingMode = config.getBigDecimalRoundingMode();
-        return DecimalConsumer.createConsumer((DecimalVector) vector, columnIndex, nullable, bigDecimalRoundingMode);
+        if (((ArrowType.Decimal) arrowType).getBitWidth() == 256) {
+          return Decimal256Consumer.createConsumer((Decimal256Vector) vector, columnIndex, nullable,
+                  bigDecimalRoundingMode);
+        } else {
+          return DecimalConsumer.createConsumer((DecimalVector) vector, columnIndex, nullable, bigDecimalRoundingMode);
+        }
       case FloatingPoint:
         switch (((ArrowType.FloatingPoint) arrowType).getPrecision()) {
           case SINGLE:
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java
new file mode 100644
index 0000000000000..ad00d9b5a2492
--- /dev/null
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.adapter.jdbc.consumer;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+import org.apache.arrow.vector.Decimal256Vector;
+
+/**
+ * Consumer which consume decimal type values from {@link ResultSet}.
+ * Write the data to {@link org.apache.arrow.vector.Decimal256Vector}.
+ */
+public abstract class Decimal256Consumer extends BaseConsumer<Decimal256Vector> {
+  private final RoundingMode bigDecimalRoundingMode;
+  private final int scale;
+
+  /**
+   * Constructs a new consumer.
+   *
+   * @param vector the underlying vector for the consumer.
+   * @param index  the column id for the consumer.
+   */
+  public Decimal256Consumer(Decimal256Vector vector, int index) {
+    this(vector, index, null);
+  }
+
+  /**
+   * Constructs a new consumer, with optional coercibility.
+   * @param vector the underlying vector for the consumer.
+   * @param index the column index for the consumer.
+   * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does not match that
+   *                               of the target vector.  Set to null to retain strict matching behavior (scale of
+   *                               source and target vector must match exactly).
+   */
+  public Decimal256Consumer(Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) {
+    super(vector, index);
+    this.bigDecimalRoundingMode = bigDecimalRoundingMode;
+    this.scale = vector.getScale();
+  }
+
+  /**
+   * Creates a consumer for {@link Decimal256Vector}.
+   */
+  public static JdbcConsumer<Decimal256Vector> createConsumer(
+          Decimal256Vector vector,
+          int index,
+          boolean nullable,
+          RoundingMode bigDecimalRoundingMode
+  ) {
+    if (nullable) {
+      return new NullableDecimal256Consumer(vector, index, bigDecimalRoundingMode);
+    } else {
+      return new NonNullableDecimal256Consumer(vector, index, bigDecimalRoundingMode);
+    }
+  }
+
+  protected void set(BigDecimal value) {
+    if (bigDecimalRoundingMode != null && value.scale() != scale) {
+      value = value.setScale(scale, bigDecimalRoundingMode);
+    }
+    vector.set(currentIndex, value);
+  }
+
+
+  /**
+   * Consumer for nullable decimal.
+   */
+  static class NullableDecimal256Consumer extends Decimal256Consumer {
+
+    /**
+     * Instantiate a Decimal256Consumer.
+     */
+    public NullableDecimal256Consumer(Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) {
+      super(vector, index, bigDecimalRoundingMode);
+    }
+
+    @Override
+    public void consume(ResultSet resultSet) throws SQLException {
+      BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet);
+      if (!resultSet.wasNull()) {
+        // for fixed width vectors, we have allocated enough memory proactively,
+        // so there is no need to call the setSafe method here.
+        set(value);
+      }
+      currentIndex++;
+    }
+  }
+
+  /**
+   * Consumer for non-nullable decimal.
+   */
+  static class NonNullableDecimal256Consumer extends Decimal256Consumer {
+
+    /**
+     * Instantiate a Decimal256Consumer.
+     */
+    public NonNullableDecimal256Consumer(Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) {
+      super(vector, index, bigDecimalRoundingMode);
+    }
+
+    @Override
+    public void consume(ResultSet resultSet) throws SQLException {
+      BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet);
+      // for fixed width vectors, we have allocated enough memory proactively,
+      // so there is no need to call the setSafe method here.
+      set(value);
+      currentIndex++;
+    }
+  }
+}

From 20bcf0f4301ec29aa1d19ab362af8af6636ba9c9 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Mon, 8 Jan 2024 10:28:01 -0600
Subject: [PATCH 160/570] GH-38811: [R] Actually use fetched cmake on macos
 (#39453)

### Rationale for this change

We sometimes need to use a more modern cmake, before this change although we downloaded a functioning cmake on macos, we didn't have the correct path for it.

### What changes are included in this PR?

Resolves #38811 so that cmake is useable when downloaded on macos. This also restores the local source build jobs to be testing that source builds work (which is what the Ci jobs say they are doing). I believe these jobs started using binaries when we overhauled the build system last release.

### Are these changes tested?

Yes, in CI with the local (source) install jobs in crossbow)

### Are there any user-facing changes?

* Closes: #38811

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 dev/tasks/r/github.macos-linux.local.yml |  3 +++
 r/tools/nixlibs.R                        | 10 +++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/dev/tasks/r/github.macos-linux.local.yml b/dev/tasks/r/github.macos-linux.local.yml
index 8bce057059b97..045c387b73f60 100644
--- a/dev/tasks/r/github.macos-linux.local.yml
+++ b/dev/tasks/r/github.macos-linux.local.yml
@@ -44,6 +44,8 @@ jobs:
           # disable sccache on macos as it times out for unknown reasons
           # see GH-33721
           # brew install sccache
+          # remove cmake so that we can test our cmake downloading abilities
+          brew uninstall cmake
       - name: Configure dependencies (linux)
         if: contains(matrix.os, 'ubuntu')
         run: |
@@ -71,6 +73,7 @@ jobs:
           FORCE_BUNDLED_BUILD: true
           LIBARROW_MINIMAL: false
           ARROW_R_DEV: TRUE
+          LIBARROW_BINARY: false
         {{ macros.github_set_sccache_envvars()|indent(8)}}
         run: |
           sccache --start-server || echo 'sccache not found'
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index 1794acee70d22..fe8de284b16b0 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -629,10 +629,18 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") {
     untar(cmake_tar, exdir = cmake_dir)
     unlink(cmake_tar)
     cleanup(cmake_dir)
+    # the bin dir is slightly different on macos
+    if (on_macos) {
+      bin_dir <- "CMake.app/Contents/bin"
+    } else {
+      bin_dir <- "bin"
+    }
     cmake <- paste0(
       cmake_dir,
       "/cmake-", CMAKE_VERSION, sub(".tar.gz", "", postfix, fixed = TRUE),
-      "/bin/cmake"
+      "/",
+      bin_dir,
+      "/cmake"
     )
   } else {
     # Show which one we found

From 48f704e2a3161366661d180e0c2198c00671756c Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Mon, 8 Jan 2024 17:38:26 +0100
Subject: [PATCH 161/570] GH-39437: [CI][Python] Update pandas tests failing on
 pandas nightly CI build (#39498)

Update version checks and assertions of pyarrow array equality for pandas failing tests on the CI: [test-conda-python-3.10-pandas-nightly](https://github.com/ursacomputing/crossbow/actions/runs/7391976015/job/20109720695)

* Closes: #39437

Lead-authored-by: AlenkaF <frim.alenka@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/tests/parquet/test_pandas.py | 10 +++++++---
 python/pyarrow/tests/test_pandas.py         | 16 ++++++++++++++--
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index f194d12876968..b5913bf5c6b6e 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -404,6 +404,10 @@ def test_backwards_compatible_index_multi_level_some_named(datadir):
 
 @pytest.mark.pandas
 def test_backwards_compatible_column_metadata_handling(datadir):
+    if Version("2.2.0") <= Version(pd.__version__):
+        # TODO: regression in pandas
+        # https://github.com/pandas-dev/pandas/issues/56775
+        pytest.skip("Regression in pandas 2.2.0")
     expected = pd.DataFrame(
         {'a': [1, 2, 3], 'b': [.1, .2, .3],
          'c': pd.date_range("2017-01-01", periods=3, tz='Europe/Brussels')})
@@ -504,9 +508,9 @@ def test_categories_with_string_pyarrow_dtype(tempdir):
     df2 = df2.astype("category")
 
     # categories should be converted to pa.Array
-    assert pa.array(df1["x"]) == pa.array(df2["x"])
-    assert pa.array(df1["x"].cat.categories.values) == pa.array(
-        df2["x"].cat.categories.values)
+    assert pa.array(df1["x"]).to_pylist() == pa.array(df2["x"]).to_pylist()
+    assert pa.array(df1["x"].cat.categories.values).to_pylist() == pa.array(
+        df2["x"].cat.categories.values).to_pylist()
 
     path = str(tempdir / 'cat.parquet')
     pq.write_table(pa.table(df1), path)
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 342beaaeb5a98..3353bebce7ade 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -261,6 +261,12 @@ def test_rangeindex_doesnt_warn(self):
 
         with warnings.catch_warnings():
             warnings.simplefilter(action="error")
+            # make_block deprecation in pandas, still under discussion
+            # https://github.com/pandas-dev/pandas/pull/56422
+            # https://github.com/pandas-dev/pandas/issues/40226
+            warnings.filterwarnings(
+                "ignore", "make_block is deprecated", DeprecationWarning
+            )
             _check_pandas_roundtrip(df, preserve_index=True)
 
     def test_multiindex_columns(self):
@@ -311,6 +317,12 @@ def test_multiindex_doesnt_warn(self):
 
         with warnings.catch_warnings():
             warnings.simplefilter(action="error")
+            # make_block deprecation in pandas, still under discussion
+            # https://github.com/pandas-dev/pandas/pull/56422
+            # https://github.com/pandas-dev/pandas/issues/40226
+            warnings.filterwarnings(
+                "ignore", "make_block is deprecated", DeprecationWarning
+            )
             _check_pandas_roundtrip(df, preserve_index=True)
 
     def test_integer_index_column(self):
@@ -465,7 +477,7 @@ def test_mixed_column_names(self):
                                         preserve_index=True)
 
     def test_binary_column_name(self):
-        if Version("2.0.0") <= Version(pd.__version__) < Version("2.2.0"):
+        if Version("2.0.0") <= Version(pd.__version__) < Version("2.3.0"):
             # TODO: regression in pandas, hopefully fixed in next version
             # https://issues.apache.org/jira/browse/ARROW-18394
             # https://github.com/pandas-dev/pandas/issues/50127
@@ -3095,7 +3107,7 @@ def _fully_loaded_dataframe_example():
 
 @pytest.mark.parametrize('columns', ([b'foo'], ['foo']))
 def test_roundtrip_with_bytes_unicode(columns):
-    if Version("2.0.0") <= Version(pd.__version__) < Version("2.2.0"):
+    if Version("2.0.0") <= Version(pd.__version__) < Version("2.3.0"):
         # TODO: regression in pandas, hopefully fixed in next version
         # https://issues.apache.org/jira/browse/ARROW-18394
         # https://github.com/pandas-dev/pandas/issues/50127

From 13621223d2dec362449721242ba1e3c6dfb27c34 Mon Sep 17 00:00:00 2001
From: messense <messense@icloud.com>
Date: Tue, 9 Jan 2024 00:45:36 +0800
Subject: [PATCH 162/570] GH-38663: [C++] Add support for service-specific
 endpoint for S3 using `AWS_ENDPOINT_URL_S3` (#39160)

### Rationale for this change

See #38663

### What changes are included in this PR?

set variable `endpoint_override` according the environment variable, prefer service-specific endpoint url over global endpoint url.

### Are these changes tested?

unittest

### Are there any user-facing changes?

No

* Closes: #38663

Lead-authored-by: messense <messense@icloud.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/s3fs.cc      | 13 ++++++++++---
 cpp/src/arrow/filesystem/s3fs_test.cc |  5 +++++
 docs/source/cpp/env_vars.rst          |  7 +++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 62bec9b23b56f..a987d63a6d247 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -156,6 +156,7 @@ using internal::ToURLEncodedAwsString;
 
 static const char kSep = '/';
 constexpr char kAwsEndpointUrlEnvVar[] = "AWS_ENDPOINT_URL";
+constexpr char kAwsEndpointUrlS3EnvVar[] = "AWS_ENDPOINT_URL_S3";
 
 // -----------------------------------------------------------------------
 // S3ProxyOptions implementation
@@ -366,9 +367,15 @@ Result<S3Options> S3Options::FromUri(const Uri& uri, std::string* out_path) {
   } else {
     options.ConfigureDefaultCredentials();
   }
-  auto endpoint_env = arrow::internal::GetEnvVar(kAwsEndpointUrlEnvVar);
-  if (endpoint_env.ok()) {
-    options.endpoint_override = *endpoint_env;
+  // Prefer AWS service-specific endpoint url
+  auto s3_endpoint_env = arrow::internal::GetEnvVar(kAwsEndpointUrlS3EnvVar);
+  if (s3_endpoint_env.ok()) {
+    options.endpoint_override = *s3_endpoint_env;
+  } else {
+    auto endpoint_env = arrow::internal::GetEnvVar(kAwsEndpointUrlEnvVar);
+    if (endpoint_env.ok()) {
+      options.endpoint_override = *endpoint_env;
+    }
   }
 
   bool region_set = false;
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index 487a6abb18903..33e9712a666cd 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -303,6 +303,11 @@ TEST_F(S3OptionsTest, FromUri) {
   ASSERT_RAISES(Invalid, S3Options::FromUri("s3://mybucket/?xxx=zzz", &path));
 
   // Endpoint from environment variable
+  {
+    EnvVarGuard endpoint_guard("AWS_ENDPOINT_URL_S3", "http://127.0.0.1:9000");
+    ASSERT_OK_AND_ASSIGN(options, S3Options::FromUri("s3://mybucket/", &path));
+    ASSERT_EQ(options.endpoint_override, "http://127.0.0.1:9000");
+  }
   {
     EnvVarGuard endpoint_guard("AWS_ENDPOINT_URL", "http://127.0.0.1:9000");
     ASSERT_OK_AND_ASSIGN(options, S3Options::FromUri("s3://mybucket/", &path));
diff --git a/docs/source/cpp/env_vars.rst b/docs/source/cpp/env_vars.rst
index f116effeb258e..0fa80aa1106c1 100644
--- a/docs/source/cpp/env_vars.rst
+++ b/docs/source/cpp/env_vars.rst
@@ -163,6 +163,13 @@ that changing their value later will have an effect.
 .. envvar:: AWS_ENDPOINT_URL
 
    Endpoint URL used for S3-like storage, for example Minio or s3.scality.
+   Alternatively, one can set :envvar:`AWS_ENDPOINT_URL_S3`.
+
+.. envvar:: AWS_ENDPOINT_URL_S3
+
+   Endpoint URL used for S3-like storage, for example Minio or s3.scality.
+   This takes precedence over :envvar:`AWS_ENDPOINT_URL` if both variables
+   are set.
 
 .. envvar:: GANDIVA_CACHE_SIZE
 

From 0aadd5a0e7fd58e286f2d0f03b8bdbf99a1d3108 Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Mon, 8 Jan 2024 15:28:15 -0500
Subject: [PATCH 163/570] GH-39309: [Go][Parquet] handle nil bitWriter for
 DeltaBinaryPacked (#39347)

### Rationale for this change
If using the DeltaBinaryPacked encoding, we end up with a nil pointer dereference if we end up with an empty column.

### What changes are included in this PR?
Add a nil check in `EstimatedDataEncodedSize` for the base `deltaBitPackEncoder`. This should only ever occur if we have an empty column with this encoding when closing a row group.

### Are these changes tested?
Yes a unit test was added to verify the fix.

* Closes: #39309

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 .../internal/encoding/delta_bit_packing.go    |  4 ++
 go/parquet/pqarrow/encode_arrow_test.go       | 37 +++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go
index 560b77f4c66ce..6ac799f1c179d 100644
--- a/go/parquet/internal/encoding/delta_bit_packing.go
+++ b/go/parquet/internal/encoding/delta_bit_packing.go
@@ -466,6 +466,10 @@ func (enc *deltaBitPackEncoder) FlushValues() (Buffer, error) {
 
 // EstimatedDataEncodedSize returns the current amount of data actually flushed out and written
 func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 {
+	if enc.bitWriter == nil {
+		return 0
+	}
+
 	return int64(enc.bitWriter.Written())
 }
 
diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go
index 95ea644dd8013..565fa3b6b2820 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -1983,3 +1983,40 @@ func TestWriteTableMemoryAllocation(t *testing.T) {
 
 	require.Zero(t, mem.CurrentAlloc())
 }
+
+func TestEmptyListDeltaBinaryPacked(t *testing.T) {
+	schema := arrow.NewSchema([]arrow.Field{
+		{Name: "ts", Type: arrow.ListOf(arrow.PrimitiveTypes.Uint64),
+			Metadata: arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})}}, nil)
+	builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
+	defer builder.Release()
+
+	listBuilder := builder.Field(0).(*array.ListBuilder)
+	listBuilder.Append(true)
+	arrowRec := builder.NewRecord()
+	defer arrowRec.Release()
+
+	var buf bytes.Buffer
+	wr, err := pqarrow.NewFileWriter(schema, &buf,
+		parquet.NewWriterProperties(
+			parquet.WithDictionaryFor("ts.list.element", false),
+			parquet.WithEncodingFor("ts.list.element", parquet.Encodings.DeltaBinaryPacked)),
+		pqarrow.DefaultWriterProps())
+	require.NoError(t, err)
+
+	require.NoError(t, wr.WriteBuffered(arrowRec))
+	require.NoError(t, wr.Close())
+
+	rdr, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
+	require.NoError(t, err)
+	reader, err := pqarrow.NewFileReader(rdr, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
+	require.NoError(t, err)
+	defer rdr.Close()
+
+	tbl, err := reader.ReadTable(context.Background())
+	require.NoError(t, err)
+	defer tbl.Release()
+
+	assert.True(t, schema.Equal(tbl.Schema()))
+	assert.EqualValues(t, 1, tbl.NumRows())
+}

From 65e98ae412142edabdc2d9047bf9b9555eb53762 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 8 Jan 2024 13:40:54 -0800
Subject: [PATCH 164/570] MINOR: [C#] Bump Grpc.AspNetCore.Server from 2.59.0
 to 2.60.0 in /csharp (#39507)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [Grpc.AspNetCore.Server](https://github.com/grpc/grpc-dotnet) from 2.59.0 to 2.60.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/grpc/grpc-dotnet/releases">Grpc.AspNetCore.Server's releases</a>.</em></p>
<blockquote>
<h2>Release v2.60.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Update to GA .NET 8 SDK by <a href="https://github.com/JamesNK"><code>@​JamesNK</code></a> in <a href="https://redirect.github.com/grpc/grpc-dotnet/pull/2321">grpc/grpc-dotnet#2321</a></li>
<li>Update to use Directory.Packages.props by <a href="https://github.com/JamesNK"><code>@​JamesNK</code></a> in <a href="https://redirect.github.com/grpc/grpc-dotnet/pull/2315">grpc/grpc-dotnet#2315</a></li>
<li>Add Native AOT test by <a href="https://github.com/JamesNK"><code>@​JamesNK</code></a> in <a href="https://redirect.github.com/grpc/grpc-dotnet/pull/2324">grpc/grpc-dotnet#2324</a></li>
<li>Trim regex from Grpc.AspNetCore by <a href="https://github.com/JamesNK"><code>@​JamesNK</code></a> in <a href="https://redirect.github.com/grpc/grpc-dotnet/pull/2326">grpc/grpc-dotnet#2326</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/grpc/grpc-dotnet/compare/v2.59.0...v2.60.0">https://github.com/grpc/grpc-dotnet/compare/v2.59.0...v2.60.0</a></p>
<h2>Release v2.60.0-pre1</h2>
<h2>What's Changed</h2>
<ul>
<li>Update to GA .NET 8 SDK by <a href="https://github.com/JamesNK"><code>@​JamesNK</code></a> in <a href="https://redirect.github.com/grpc/grpc-dotnet/pull/2321">grpc/grpc-dotnet#2321</a></li>
<li>Update to use Directory.Packages.props by <a href="https://github.com/JamesNK"><code>@​JamesNK</code></a> in <a href="https://redirect.github.com/grpc/grpc-dotnet/pull/2315">grpc/grpc-dotnet#2315</a></li>
<li>Add Native AOT test by <a href="https://github.com/JamesNK"><code>@​JamesNK</code></a> in <a href="https://redirect.github.com/grpc/grpc-dotnet/pull/2324">grpc/grpc-dotnet#2324</a></li>
<li>Trim regex from Grpc.AspNetCore by <a href="https://github.com/JamesNK"><code>@​JamesNK</code></a> in <a href="https://redirect.github.com/grpc/grpc-dotnet/pull/2326">grpc/grpc-dotnet#2326</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/grpc/grpc-dotnet/compare/v2.59.0...v2.60.0-pre1">https://github.com/grpc/grpc-dotnet/compare/v2.59.0...v2.60.0-pre1</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/grpc/grpc-dotnet/commit/6eccb614c532d52c1569ce9f14754fdc826609ef"><code>6eccb61</code></a> Bump version to 2.60.0 (<a href="https://redirect.github.com/grpc/grpc-dotnet/issues/2348">#2348</a>)</li>
<li><a href="https://github.com/grpc/grpc-dotnet/commit/b1a6ce4cb81a4b5be40be382f6173a11abd8a2cf"><code>b1a6ce4</code></a> Update Grpc.Tools to 2.60.0 (<a href="https://redirect.github.com/grpc/grpc-dotnet/issues/2336">#2336</a>) (<a href="https://redirect.github.com/grpc/grpc-dotnet/issues/2339">#2339</a>)</li>
<li><a href="https://github.com/grpc/grpc-dotnet/commit/4938da9c300698cca14e210a2191ceb9e869c9dc"><code>4938da9</code></a> Bumping version string to 2.60.0-pre1 in release branch (<a href="https://redirect.github.com/grpc/grpc-dotnet/issues/2338">#2338</a>)</li>
<li><a href="https://github.com/grpc/grpc-dotnet/commit/1625f8942791c82d700802fc7278c543025f0fd3"><code>1625f89</code></a> Trim regex from Grpc.AspNetCore (<a href="https://redirect.github.com/grpc/grpc-dotnet/issues/2326">#2326</a>)</li>
<li><a href="https://github.com/grpc/grpc-dotnet/commit/2bbb977dd2d269b4b3182360a87c7402ed2b6114"><code>2bbb977</code></a> Add Native AOT test (<a href="https://redirect.github.com/grpc/grpc-dotnet/issues/2324">#2324</a>)</li>
<li><a href="https://github.com/grpc/grpc-dotnet/commit/ca9b756d73cd82bd1a30af0a8141dc183e86a90b"><code>ca9b756</code></a> Update to use Directory.Packages.props (<a href="https://redirect.github.com/grpc/grpc-dotnet/issues/2315">#2315</a>)</li>
<li><a href="https://github.com/grpc/grpc-dotnet/commit/4a60f414a17629ff00d70610ce23d3575c4807cd"><code>4a60f41</code></a> Update to GA .NET 8 SDK (<a href="https://redirect.github.com/grpc/grpc-dotnet/issues/2321">#2321</a>)</li>
<li><a href="https://github.com/grpc/grpc-dotnet/commit/e5afb085e0af960617b51504442f1ea42b3bf56b"><code>e5afb08</code></a> Bump browserify-sign from 4.2.1 to 4.2.2 in /examples/Spar/Server/ClientApp (...</li>
<li><a href="https://github.com/grpc/grpc-dotnet/commit/5e505b017a55af9f34462db0313a8196f29e5f88"><code>5e505b0</code></a> Bump axios and wait-on in /testassets/InteropTestsGrpcWebWebsite/Tests (<a href="https://redirect.github.com/grpc/grpc-dotnet/issues/2322">#2322</a>)</li>
<li><a href="https://github.com/grpc/grpc-dotnet/commit/8d33f094d2310310cb04c6da7b91d360d74b6f07"><code>8d33f09</code></a> Bump <code>@​babel/traverse</code> from 7.21.4 to 7.23.2 in /testassets/InteropTestsGrpcWeb...</li>
<li>See full diff in <a href="https://github.com/grpc/grpc-dotnet/compare/v2.59.0...v2.60.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Grpc.AspNetCore.Server&package-manager=nuget&previous-version=2.59.0&new-version=2.60.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Apache.Arrow.Flight.AspNetCore.csproj                       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj b/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
index 04cf06b1ec440..3fbd772db5ec6 100644
--- a/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
+++ b/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.AspNetCore.Server" Version="2.59.0" />
+    <PackageReference Include="Grpc.AspNetCore.Server" Version="2.60.0" />
   </ItemGroup>
 
   <ItemGroup>

From 1f42e6d594fab033938cb6ff65fbf521e2b92bc6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 8 Jan 2024 13:42:27 -0800
Subject: [PATCH 165/570] MINOR: [C#] Bump xunit from 2.6.4 to 2.6.5 in /csharp
 (#39508)

Bumps [xunit](https://github.com/xunit/xunit) from 2.6.4 to 2.6.5.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/xunit/xunit/commit/7f8276500af528de2a9011f9a32625046bdf5e4a"><code>7f82765</code></a> v2.6.5</li>
<li><a href="https://github.com/xunit/xunit/commit/98f19ddd6d072b695bacec42854332871a3762d4"><code>98f19dd</code></a> Updated tests (for analyzer exclusions)</li>
<li><a href="https://github.com/xunit/xunit/commit/298b1b13d3bea59daeb33d15e3e3cad45a4dd744"><code>298b1b1</code></a> <a href="https://redirect.github.com/xunit/xunit/issues/2854">#2854</a>: Add constructors and AddRange for TheoryData&lt;T&gt;</li>
<li><a href="https://github.com/xunit/xunit/commit/36fe729c871ab563781f671e9ab1014515732a87"><code>36fe729</code></a> Bump up to v2.6.5-pre</li>
<li>See full diff in <a href="https://github.com/xunit/xunit/compare/2.6.4...2.6.5">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit&package-manager=nuget&previous-version=2.6.4&new-version=2.6.5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Apache.Arrow.Compression.Tests.csproj                       | 2 +-
 .../Apache.Arrow.Flight.Sql.Tests.csproj                        | 2 +-
 .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj  | 2 +-
 csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index dd2c75dd3df90..7a93d8f92635b 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -8,7 +8,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.4" />
+    <PackageReference Include="xunit" Version="2.6.5" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 0e9c02d61977c..7799577535ded 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -7,7 +7,7 @@
 
     <ItemGroup>
       <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-      <PackageReference Include="xunit" Version="2.6.4" />
+      <PackageReference Include="xunit" Version="2.6.5" />
       <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
       <PackageReference Include="coverlet.collector" Version="6.0.0" />
     </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index d38413ba45b3a..972aa178eabe8 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -7,7 +7,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.4" />
+    <PackageReference Include="xunit" Version="2.6.5" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
     <PackageReference Include="coverlet.collector" Version="6.0.0" />
   </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index 0afd1490e7b69..afb636123b37b 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -15,7 +15,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.4" />
+    <PackageReference Include="xunit" Version="2.6.5" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>

From f60c2818092a407eddfaaacadac775a14e0d28d1 Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Tue, 9 Jan 2024 00:37:54 +0100
Subject: [PATCH 166/570] MINOR: [R] Default to bundled re2 build (#39514)

### Rationale for this change

The CRAN check on `fedora clang devel` builds with clang against libc++ and has a system re2 installed that was build with C++11  ABI which causes  linking to fail  due to the [abi:cxx11]-symbol annotation on the system version.

A user could manually use the bundled build or path hint a clang version of the library. To avoid extra work for the CRAN maintainers we can just default to the bundled build. The re2 build is small enough that users building from source will not really feel the difference and can still opt to use the system re2 via `EXTRA_CMAKE_FLAGS`.

### What changes are included in this PR?

Default to use our bundled build to prevent the problems.

### Are these changes tested?

On a local dev container replicating the cran env.

### Are there any user-facing changes?

Source build now default to use the bundled re2 version, this can be overridden.

Authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/inst/build_arrow_static.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh
index 52ac5b7d3245b..9c9fadea4757b 100755
--- a/r/inst/build_arrow_static.sh
+++ b/r/inst/build_arrow_static.sh
@@ -97,6 +97,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
     -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON \
     -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON \
     -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
+    -Dre2_SOURCE=${re2_SOURCE:-BUNDLED} \
     -Dxsimd_SOURCE=${xsimd_SOURCE:-} \
     -Dzstd_SOURCE=${zstd_SOURCE:-} \
     ${EXTRA_CMAKE_FLAGS} \

From a63228116d4ee9de5e85eb32d87b56ed3bd96060 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 9 Jan 2024 09:22:59 +0900
Subject: [PATCH 167/570] MINOR: [Java] Bump org.bouncycastle:bcpkix-jdk15on
 from 1.61 to 1.70 in /java (#39509)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.bouncycastle:bcpkix-jdk15on](https://github.com/bcgit/bc-java) from 1.61 to 1.70.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/bcgit/bc-java/blob/main/docs/releasenotes.html">org.bouncycastle:bcpkix-jdk15on's changelog</a>.</em></p>
<blockquote>

<p>2.1.1 Version
Release: 1.78
Date:      TBD</p>

<p>2.1.1 Version
Release: 1.78
Date:      TBD</p>

<p>2.2.1 Version
Release: 1.77
Date:      2023, November 13th</p>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/bcgit/bc-java/commits">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.bouncycastle:bcpkix-jdk15on&package-manager=maven&previous-version=1.61&new-version=1.70)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/flight/flight-sql-jdbc-core/pom.xml   | 2 +-
 java/flight/flight-sql-jdbc-driver/pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml
index 6c577954f8fc5..1f20912b9974f 100644
--- a/java/flight/flight-sql-jdbc-core/pom.xml
+++ b/java/flight/flight-sql-jdbc-core/pom.xml
@@ -134,7 +134,7 @@
         <dependency>
             <groupId>org.bouncycastle</groupId>
             <artifactId>bcpkix-jdk15on</artifactId>
-            <version>1.61</version>
+            <version>1.70</version>
         </dependency>
     </dependencies>
 
diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml
index a8e55ea4089d5..653ee5c192756 100644
--- a/java/flight/flight-sql-jdbc-driver/pom.xml
+++ b/java/flight/flight-sql-jdbc-driver/pom.xml
@@ -45,7 +45,7 @@
         <dependency>
             <groupId>org.bouncycastle</groupId>
             <artifactId>bcpkix-jdk15on</artifactId>
-            <version>1.61</version>
+            <version>1.70</version>
             <scope>runtime</scope>
         </dependency>
 

From 4743f4f7ceda537a91349a35736be7504da10f80 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 9 Jan 2024 09:23:48 +0900
Subject: [PATCH 168/570] MINOR: [Java] Bump org.apache.commons:commons-pool2
 from 2.11.1 to 2.12.0 in /java (#39511)

Bumps org.apache.commons:commons-pool2 from 2.11.1 to 2.12.0.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.commons:commons-pool2&package-manager=maven&previous-version=2.11.1&new-version=2.12.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/flight/flight-sql/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml
index 3ddc1758ae8ef..3c7e4b3495e5a 100644
--- a/java/flight/flight-sql/pom.xml
+++ b/java/flight/flight-sql/pom.xml
@@ -94,7 +94,7 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-pool2</artifactId>
-      <version>2.11.1</version>
+      <version>2.12.0</version>
       <scope>test</scope>
     </dependency>
     <dependency>

From b1a7c7899174fe004743499003dfa698f2f04ed1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 9 Jan 2024 09:24:22 +0900
Subject: [PATCH 169/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-deploy-plugin from 2.8.1 to 3.1.1 in /java
 (#39510)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.apache.maven.plugins:maven-deploy-plugin](https://github.com/apache/maven-deploy-plugin) from 2.8.1 to 3.1.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/maven-deploy-plugin/releases">org.apache.maven.plugins:maven-deploy-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.1.1</h2>

<ul>
<li><a href="https://issues.apache.org/jira/browse/MDEPLOY-308">[MDEPLOY-308]</a> - Add parameter to lax project validation (<a href="https://redirect.github.com/apache/maven-deploy-plugin/pull/39">#39</a>) <a href="https://github.com/cstamas"><code>@​cstamas</code></a></li>
</ul>
<h2>3.1.0</h2>
<p>Release Notes - Maven Deploy Plugin - Version 3.1.0</p>
<h2>Bug</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-200">MDEPLOY-200</a>] - deployAtEnd fails if the last project to build skip deployment</li>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-204">MDEPLOY-204</a>] - Must deploy maven-metadata.xml last</li>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-224">MDEPLOY-224</a>] - Overriding deployAtEnd fails</li>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-225">MDEPLOY-225</a>] - DeployAtEnd fails when overriding skip</li>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-226">MDEPLOY-226</a>] - DeployAtEnd fails when module has extension</li>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-305">MDEPLOY-305</a>] - Improvement in DeployAtEnd</li>
</ul>
<h2>New Feature</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-206">MDEPLOY-206</a>] - Support parallel deployment at end (needs resolver changes as well)</li>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-272">MDEPLOY-272</a>] - Add skip capability to deploy-file goal</li>
</ul>
<h2>Task</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-282">MDEPLOY-282</a>] - Revert MDEPLOY-231</li>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-292">MDEPLOY-292</a>] - Require Java 8</li>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-303">MDEPLOY-303</a>] - Refresh download page</li>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-304">MDEPLOY-304</a>] - Clean up ITs</li>
</ul>
<h2>Dependency upgrade</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-299">MDEPLOY-299</a>] - Upgrade maven-plugin parent to 37</li>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-302">MDEPLOY-302</a>] - Upgrade maven-plugin parent to 39</li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/slachiewicz"><code>@​slachiewicz</code></a> made their first contribution in <a href="https://redirect.github.com/apache/maven-deploy-plugin/pull/24">apache/maven-deploy-plugin#24</a></li>
<li><a href="https://github.com/elharo"><code>@​elharo</code></a> made their first contribution in <a href="https://redirect.github.com/apache/maven-deploy-plugin/pull/29">apache/maven-deploy-plugin#29</a></li>
<li><a href="https://github.com/awisus"><code>@​awisus</code></a> made their first contribution in <a href="https://redirect.github.com/apache/maven-deploy-plugin/pull/30">apache/maven-deploy-plugin#30</a></li>
<li><a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a> made their first contribution in <a href="https://redirect.github.com/apache/maven-deploy-plugin/pull/31">apache/maven-deploy-plugin#31</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/apache/maven-deploy-plugin/compare/maven-deploy-plugin-3.0.0...maven-deploy-plugin-3.1.0">https://github.com/apache/maven-deploy-plugin/compare/maven-deploy-plugin-3.0.0...maven-deploy-plugin-3.1.0</a></p>
<h2>3.0.0</h2>
<h2>Bug</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MDEPLOY-193">MDEPLOY-193</a>] - deployAtEnd does not deploy artifacts for multi-module project with build extensions</li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-deploy-plugin/commit/43aaa63898ed46935e987c269142101a1ddb3d72"><code>43aaa63</code></a> [maven-release-plugin] prepare release maven-deploy-plugin-3.1.1</li>
<li><a href="https://github.com/apache/maven-deploy-plugin/commit/04f5e8856adbe9019476d9a0aba6c8de95bdf107"><code>04f5e88</code></a> [MDEPLOY-308] Add parameter to lax project validation (<a href="https://redirect.github.com/apache/maven-deploy-plugin/issues/39">#39</a>)</li>
<li><a href="https://github.com/apache/maven-deploy-plugin/commit/2de56bd2abec1963ed7a8d180c23d792d037950c"><code>2de56bd</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li><a href="https://github.com/apache/maven-deploy-plugin/commit/1ef96fc4be5ee1837a0bb32b1f7f391f3a9c22d4"><code>1ef96fc</code></a> [maven-release-plugin] prepare release maven-deploy-plugin-3.1.0</li>
<li><a href="https://github.com/apache/maven-deploy-plugin/commit/2ff9a2eac94d2ca25e48af975ac47375d6fdb323"><code>2ff9a2e</code></a> [MDEPLOY-206] Deploy improvements (<a href="https://redirect.github.com/apache/maven-deploy-plugin/issues/38">#38</a>)</li>
<li><a href="https://github.com/apache/maven-deploy-plugin/commit/59bee05f8a935f4c8ac9fcf0da47bab60c425e92"><code>59bee05</code></a> [MDEPLOY-206] Support parallel deployment for deployAtEnd</li>
<li><a href="https://github.com/apache/maven-deploy-plugin/commit/1fb083bf2525276c1a355b8826ebea55c41cd333"><code>1fb083b</code></a> [MDEPLOY-225] DeployAtEnd - ITs for skipped root project</li>
<li><a href="https://github.com/apache/maven-deploy-plugin/commit/af8c615c0926f57c8dfd34d87e9160347021bc64"><code>af8c615</code></a> [MDEPLOY-224] DeployAtEnd - ITs use only in modules</li>
<li><a href="https://github.com/apache/maven-deploy-plugin/commit/33d89b19e88702d581992d7e7acaec85c6d3757d"><code>33d89b1</code></a> [MDEPLOY-305] Improvement in DeployAtEnd</li>
<li><a href="https://github.com/apache/maven-deploy-plugin/commit/6c2ce4894791f7071285522ad5b243d28469818e"><code>6c2ce48</code></a> Bump project version to 3.1.0-SNAPSHOT</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-deploy-plugin/compare/maven-deploy-plugin-2.8.1...maven-deploy-plugin-3.1.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-deploy-plugin&package-manager=maven&previous-version=2.8.1&new-version=3.1.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/maven/module-info-compiler-maven-plugin/pom.xml | 2 +-
 java/performance/pom.xml                             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml
index ce598fc7f0f87..6519fe638b092 100644
--- a/java/maven/module-info-compiler-maven-plugin/pom.xml
+++ b/java/maven/module-info-compiler-maven-plugin/pom.xml
@@ -92,7 +92,7 @@
         </plugin>
         <plugin>
           <artifactId>maven-deploy-plugin</artifactId>
-          <version>2.8.2</version>
+          <version>3.1.1</version>
         </plugin>
         <plugin>
           <artifactId>maven-invoker-plugin</artifactId>
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index 13300c2ac834f..d215d856d7a2f 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -183,7 +183,7 @@
                 </plugin>
                 <plugin>
                     <artifactId>maven-deploy-plugin</artifactId>
-                    <version>2.8.1</version>
+                    <version>3.1.1</version>
                 </plugin>
                 <plugin>
                     <artifactId>maven-install-plugin</artifactId>

From b101c014d7d332760cddc323df7d084fbd497006 Mon Sep 17 00:00:00 2001
From: Anja Kefala <anja@voltrondata.com>
Date: Mon, 8 Jan 2024 18:35:32 -0800
Subject: [PATCH 170/570] GH-39385: [C++] Use more permissable return code for
 rename (#39481)

### Rationale for this change

While the `rename` [system call](https://man7.org/linux/man-pages/man2/rename.2.html) and [Posix standard](https://pubs.opengroup.org/onlinepubs/9699919799/functions/rename.html) do specify that a return value of -1 is expected for error calls, the [C++ reference](https://en.cppreference.com/w/cpp/io/c/rename) specifies that a "non-zero" is returned upon error.

This PR proposes changing to the more encompassing "non-zero" check for `std::rename`.

### Are these changes tested?

There are existing tests: https://github.com/apache/arrow/blob/afb40a9f5a33802897e1d5bae8305c81da7beee1/cpp/src/arrow/filesystem/filesystem_test.cc#L701C3-L701C3
* Closes: #39385

Authored-by: anjakefala <anja@voltrondata.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/filesystem/localfs.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc
index d440629a02496..01ac946379119 100644
--- a/cpp/src/arrow/filesystem/localfs.cc
+++ b/cpp/src/arrow/filesystem/localfs.cc
@@ -595,7 +595,7 @@ Status LocalFileSystem::Move(const std::string& src, const std::string& dest) {
                                "' to '", dfn.ToString(), "'");
   }
 #else
-  if (rename(sfn.ToNative().c_str(), dfn.ToNative().c_str()) == -1) {
+  if (rename(sfn.ToNative().c_str(), dfn.ToNative().c_str()) != 0) {
     return IOErrorFromErrno(errno, "Failed renaming '", sfn.ToString(), "' to '",
                             dfn.ToString(), "'");
   }

From 3af69c88f590152ccf7760b74dbd6f64c1b10123 Mon Sep 17 00:00:00 2001
From: Hattonuri <53221537+Hattonuri@users.noreply.github.com>
Date: Tue, 9 Jan 2024 05:46:37 +0300
Subject: [PATCH 171/570] GH-39398: [C++][Parquet] Use std::count in
 ColumnReader ReadLevels (#39397)

### Rationale for this change

I've found that for-loop here
https://github.com/apache/arrow/blob/7c3480e2f028f5881242f227f42155cf833efee7/cpp/src/parquet/column_reader.cc#L1055-L1073

transforms into

0xc0c2f0 <ReadLevels()+96>      inc    %rdx
0xc0c2f3 <ReadLevels()+99>      cmp    %rax,%rdx
0xc0c2f6 <ReadLevels()+102>     jge    0xc0c30c <ReadLevels()+124>
0xc0c2f8 <ReadLevels()+104>     cmp    %cx,(%r14,%rdx,2)
0xc0c2fd <ReadLevels()+109>     jne    0xc0c2f0 <ReadLevels()+96>
0xc0c2ff <ReadLevels()+111>     incq   0x0(%rbp)
0xc0c303 <ReadLevels()+115>     mov    (%rbx),%rax
0xc0c306 <ReadLevels()+118>     jmp    0xc0c2f0 <ReadLevels()+96>

That means that it uses iteration element by element and changes reference with incq
I think that the reason is that values_to_read and num_def_levels are not set as restrict. So the compiler can not optimize this to a more efficient way(for example using simd)

On my flamegraph this part showed ~10% of time spent

In this file there also some for loops which could easily be changed to std::count, but they do not touch references and I don't know the reason why std::count was not used in the all cpp/src/parquet/ directory - so I didn't change much

### What changes are included in this PR?

Using `std::count` in `parquet/column_reader.cc` to avoid loop not being optimized

### Are these changes tested?
They are tested with unittest but not benched because I don't know what bench will show performance rise here(

### Are there any user-facing changes?

* Closes: #39398

Authored-by: Dmitry Stasenko <dmitry.stasenko@pinely.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/parquet/column_reader.cc | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 99978e283b46a..f5d9734aa1e01 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -1062,11 +1062,8 @@ class TypedColumnReaderImpl : public TypedColumnReader<DType>,
       *num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels);
       // TODO(wesm): this tallying of values-to-decode can be performed with better
       // cache-efficiency if fused with the level decoding.
-      for (int64_t i = 0; i < *num_def_levels; ++i) {
-        if (def_levels[i] == this->max_def_level_) {
-          ++(*values_to_read);
-        }
-      }
+      *values_to_read +=
+          std::count(def_levels, def_levels + *num_def_levels, this->max_def_level_);
     } else {
       // Required field, read all values
       *values_to_read = batch_size;
@@ -1195,12 +1192,8 @@ int64_t TypedColumnReaderImpl<DType>::ReadBatchSpaced(
     const bool has_spaced_values = HasSpacedValues(this->descr_);
     int64_t null_count = 0;
     if (!has_spaced_values) {
-      int values_to_read = 0;
-      for (int64_t i = 0; i < num_def_levels; ++i) {
-        if (def_levels[i] == this->max_def_level_) {
-          ++values_to_read;
-        }
-      }
+      int64_t values_to_read =
+          std::count(def_levels, def_levels + num_def_levels, this->max_def_level_);
       total_values = this->ReadValues(values_to_read, values);
       ::arrow::bit_util::SetBitsTo(valid_bits, valid_bits_offset,
                                    /*length=*/total_values,
@@ -1929,11 +1922,8 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
 
     // When reading dense we need to figure out number of values to read.
     const int16_t* def_levels = this->def_levels();
-    for (int64_t i = start_levels_position; i < levels_position_; ++i) {
-      if (def_levels[i] == this->max_def_level_) {
-        ++(*values_to_read);
-      }
-    }
+    *values_to_read += std::count(def_levels + start_levels_position,
+                                  def_levels + levels_position_, this->max_def_level_);
     ReadValuesDense(*values_to_read);
   }
 

From 5acf67c48c3a1e73c6bebae32f299e09721dd1ad Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Tue, 9 Jan 2024 04:26:23 +0100
Subject: [PATCH 172/570] GH-39517: [C++] Disable parallelism for jemalloc
 external project (#39522)

### Rationale for this change

With CMake > 3.28 the generated Makefile fails on the jemalloc_ep due to 'bad file descriptor'.

### What changes are included in this PR?

For a sequential build for jemalloc by setting -j1.

### Are these changes tested?

CI

### Are there any user-facing changes?

No.
* Closes: #39517

Authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 3f327ed64ff00..a2627c190f738 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2040,10 +2040,17 @@ macro(build_jemalloc)
     # Enable jemalloc debug checks when Arrow itself has debugging enabled
     list(APPEND JEMALLOC_CONFIGURE_COMMAND "--enable-debug")
   endif()
+
   set(JEMALLOC_BUILD_COMMAND ${MAKE} ${MAKE_BUILD_ARGS})
+  # Paralleism for Make fails with CMake > 3.28 see #39517
+  if(${CMAKE_GENERATOR} MATCHES "Makefiles")
+    list(APPEND JEMALLOC_BUILD_COMMAND "-j1")
+  endif()
+
   if(CMAKE_OSX_SYSROOT)
     list(APPEND JEMALLOC_BUILD_COMMAND "SDKROOT=${CMAKE_OSX_SYSROOT}")
   endif()
+
   externalproject_add(jemalloc_ep
                       ${EP_COMMON_OPTIONS}
                       URL ${JEMALLOC_SOURCE_URL}

From b522f8c5cc152ea97d33340e3bef852e063dad46 Mon Sep 17 00:00:00 2001
From: Tom Jarosz <thomas.jarosz@c3.ai>
Date: Tue, 9 Jan 2024 05:25:21 -0800
Subject: [PATCH 173/570] GH-39313: [Python] Fix race condition in
 _pandas_api#_check_import (#39314)

### Rationale for this change

See:
```
    cdef inline bint _have_pandas_internal(self):
        if not self._tried_importing_pandas:
            self._check_import(raise_=False)
        return self._have_pandas
```

The method `_check_import`:
1) sets `_tried_importing_pandas` to true
2) does some things which take time...
3) sets `_have_pandas` to true (if we indeed do have pandas)

Suppose thread 1 calls `_have_pandas_internal`. If thread 1 is at step 2 while thread 2 calls `_have_pandas_internal`, `_have_pandas_internal` may incorrectly return False for thread 2 as thread 1 has set `_tried_importing_pandas` to true, but has not yet (but will) set `_have_pandas` to True. `_have_pandas_internal` will return True for thread 1.

After my fix, `_have_pandas_internal` will not return an incorrect value in the scenario described above. It would instead result in a redundant, but (I believe) harmless, invocation of `_check_import`.

### What changes are included in this PR?

Changes ordering of "trying to import pandas" and "recording that pandas import has been tried"

### Are these changes tested?
yes, see test committed

### Are there any user-facing changes?

This PR resolves a user-facing race condition https://github.com/apache/arrow/issues/39313
* Closes: #39313

Lead-authored-by: Thomas Jarosz <thomas.jarosz@c3.ai>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/pandas-shim.pxi      | 22 +++++++++-----
 python/pyarrow/tests/arrow_39313.py | 47 +++++++++++++++++++++++++++++
 python/pyarrow/tests/test_pandas.py |  6 ++++
 3 files changed, 67 insertions(+), 8 deletions(-)
 create mode 100644 python/pyarrow/tests/arrow_39313.py

diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
index 273575b779346..0409e133ada5d 100644
--- a/python/pyarrow/pandas-shim.pxi
+++ b/python/pyarrow/pandas-shim.pxi
@@ -18,6 +18,7 @@
 # pandas lazy-loading API shim that reduces API call and import overhead
 
 import warnings
+from threading import Lock
 
 
 cdef class _PandasAPIShim(object):
@@ -34,12 +35,13 @@ cdef class _PandasAPIShim(object):
         object _pd, _types_api, _compat_module
         object _data_frame, _index, _series, _categorical_type
         object _datetimetz_type, _extension_array, _extension_dtype
-        object _array_like_types, _is_extension_array_dtype
+        object _array_like_types, _is_extension_array_dtype, _lock
         bint has_sparse
         bint _pd024
         bint _is_v1, _is_ge_v21
 
     def __init__(self):
+        self._lock = Lock()
         self._tried_importing_pandas = False
         self._have_pandas = 0
 
@@ -96,13 +98,17 @@ cdef class _PandasAPIShim(object):
         self.has_sparse = False
 
     cdef inline _check_import(self, bint raise_=True):
-        if self._tried_importing_pandas:
-            if not self._have_pandas and raise_:
-                self._import_pandas(raise_)
-            return
-
-        self._tried_importing_pandas = True
-        self._import_pandas(raise_)
+        if not self._tried_importing_pandas:
+            with self._lock:
+                if not self._tried_importing_pandas:
+                    try:
+                        self._import_pandas(raise_)
+                    finally:
+                        self._tried_importing_pandas = True
+                    return
+
+        if not self._have_pandas and raise_:
+            self._import_pandas(raise_)
 
     def series(self, *args, **kwargs):
         self._check_import()
diff --git a/python/pyarrow/tests/arrow_39313.py b/python/pyarrow/tests/arrow_39313.py
new file mode 100644
index 0000000000000..1e769f49d942b
--- /dev/null
+++ b/python/pyarrow/tests/arrow_39313.py
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This file is called from a test in test_pandas.py.
+
+from threading import Thread
+
+import pandas as pd
+from pyarrow.pandas_compat import _pandas_api
+
+if __name__ == "__main__":
+    wait = True
+    num_threads = 10
+    df = pd.DataFrame()
+    results = []
+
+    def rc():
+        while wait:
+            pass
+        results.append(_pandas_api.is_data_frame(df))
+
+    threads = [Thread(target=rc) for _ in range(num_threads)]
+
+    for t in threads:
+        t.start()
+
+    wait = False
+
+    for t in threads:
+        t.join()
+
+    assert len(results) == num_threads
+    assert all(results), "`is_data_frame` returned False when given a DataFrame"
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 3353bebce7ade..d15ee82d5dbf1 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -34,6 +34,7 @@
 from pyarrow.pandas_compat import get_logical_type, _pandas_api
 from pyarrow.tests.util import invoke_script, random_ascii, rands
 import pyarrow.tests.strategies as past
+import pyarrow.tests.util as test_util
 from pyarrow.vendored.version import Version
 
 import pyarrow as pa
@@ -5008,3 +5009,8 @@ def roundtrip(df, schema=None):
     schema = pa.schema([("maps", map_type)])
     roundtrip(pd.DataFrame({"maps": [map_of_los, map_of_los, map_of_los]}),
               schema=schema)
+
+
+def test_is_data_frame_race_condition():
+    # See https://github.com/apache/arrow/issues/39313
+    test_util.invoke_script('arrow_39313.py')

From 74a29a58e3df6dd3122af3ce7b8adfd1425979a0 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Tue, 9 Jan 2024 14:52:39 +0100
Subject: [PATCH 174/570] GH-39531: [Python][CI] Skip failing dask tests:
 test_describe_empty and test_view (#39534)

Skipping dask tests `test_dataframe.py::test_describe_empty` and `test_dataframe.py::test_view` on our CI to stop the nightly dask test jobs to fail.
* Closes: #39531

Authored-by: AlenkaF <frim.alenka@gmail.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 ci/scripts/integration_dask.sh | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ci/scripts/integration_dask.sh b/ci/scripts/integration_dask.sh
index bf306dc652239..ff6d690bddc4d 100755
--- a/ci/scripts/integration_dask.sh
+++ b/ci/scripts/integration_dask.sh
@@ -34,7 +34,12 @@ python -c "import dask.dataframe"
 # The "skip_with_pyarrow_strings" marker is meant to skip automatically, but that doesn't work with --pyargs, so de-selecting manually
 # - The 'test_categorize_info' test is failing because of change in StringArray's nbytes and
 #   an upstream fix (https://github.com/apache/arrow/issues/39028)
-pytest -v --pyargs dask.dataframe.tests.test_dataframe -m "not skip_with_pyarrow_strings" -k "not test_categorize_info"
+# - The 'test_describe_empty' test is flakey
+#   upstream issue: https://github.com/dask/dask/issues/10672
+# - The 'test_view' fails because we are not using the dev version of pandas
+#   where pd.Series.view is deprecated (https://pandas.pydata.org/docs/dev/reference/api/pandas.Series.view.html)
+pytest -v --pyargs dask.dataframe.tests.test_dataframe -m "not skip_with_pyarrow_strings" \
+  -k "not test_categorize_info and not test_describe_empty and not test_view"
 pytest -v --pyargs dask.dataframe.io.tests.test_orc
 pytest -v --pyargs dask.dataframe.io.tests.test_parquet \
   -m "not skip_with_pyarrow_strings and not xfail_with_pyarrow_strings"

From ccc79e9c6946501806cfb2a6b6ea86d4d0097df1 Mon Sep 17 00:00:00 2001
From: david dali susanibar arce <davi.sarces@gmail.com>
Date: Tue, 9 Jan 2024 09:18:55 -0500
Subject: [PATCH 175/570] GH-37286: [Java] Start adding nullability/nullness
 annotations (#37723)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Closes: https://github.com/apache/arrow/issues/37286

### What changes are included in this PR?

Initial support for:

- Use the Checker Framework to enhances Java’s type system to make it more powerful and useful. Planning to start with [Nullness Checker](https://checkerframework.org/manual/#nullness-checker)

### Are these changes tested?

These are the activities involved on this PR:

- [x] Configure the Checker Framework
- [x] Treat checker errors as warnings initially
- [x] Applying Nullness Checker annotation as needed: @ NonNull / @ Nullable
- [x] Check if building timer increases after this checker is added
- [x] Fixes for code review

### Are there any user-facing changes?

Yes
* Closes: #37286

Lead-authored-by: david dali susanibar arce <davi.sarces@gmail.com>
Co-authored-by: David Susanibar Arce <dsusanibar@Voltron.local>
Co-authored-by: David Susanibar Arce <davi.sarces@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/memory/memory-core/pom.xml               |  45 +++++
 .../org/apache/arrow/memory/Accountant.java   |  14 +-
 .../arrow/memory/AllocationManager.java       |  12 +-
 .../arrow/memory/AllocationOutcome.java       |   7 +-
 .../memory/AllocationOutcomeDetails.java      |   5 +-
 .../org/apache/arrow/memory/ArrowBuf.java     |  39 ++--
 .../apache/arrow/memory/BaseAllocator.java    | 189 +++++++++++-------
 .../apache/arrow/memory/BufferAllocator.java  |   3 +-
 .../org/apache/arrow/memory/BufferLedger.java | 106 +++++-----
 .../DefaultAllocationManagerOption.java       |   9 +-
 .../arrow/memory/LowCostIdentityHashMap.java  |  37 +++-
 .../arrow/memory/util/ArrowBufPointer.java    |   9 +-
 .../arrow/memory/util/HistoricalLog.java      |  13 +-
 .../apache/arrow/memory/util/MemoryUtil.java  |  15 +-
 .../apache/arrow/memory/util/StackTrace.java  |  27 ++-
 .../arrow/memory/util/hash/MurmurHasher.java  |   3 +-
 .../arrow/memory/util/hash/SimpleHasher.java  |   3 +-
 .../org/apache/arrow/util/Preconditions.java  |   6 +
 java/pom.xml                                  |   7 +
 19 files changed, 357 insertions(+), 192 deletions(-)

diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml
index 8f28699045383..b914b1fa10da2 100644
--- a/java/memory/memory-core/pom.xml
+++ b/java/memory/memory-core/pom.xml
@@ -35,6 +35,10 @@
       <groupId>org.immutables</groupId>
       <artifactId>value</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.checkerframework</groupId>
+      <artifactId>checker-qual</artifactId>
+    </dependency>
   </dependencies>
 
   <build>
@@ -90,5 +94,46 @@
         </plugins>
       </build>
     </profile>
+
+    <profile>
+      <id>checkerframework-jdk11+</id>
+      <activation>
+        <jdk>[11,]</jdk>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-compiler-plugin</artifactId>
+            <configuration>
+              <source>8</source>
+              <target>8</target>
+              <encoding>UTF-8</encoding>
+              <compilerArgs combine.children="append">
+                <arg>-Xmaxerrs</arg> <!-- javac only reports the first 100 errors or warnings -->
+                <arg>10000</arg>
+                <arg>-Xmaxwarns</arg>
+                <arg>10000</arg>
+                <arg>-AskipDefs=.*Test</arg> <!-- Skip analysis for Testing classes -->
+                <arg>-AatfDoNotCache</arg> <!-- not cache results -->
+              </compilerArgs>
+              <annotationProcessorPaths combine.children="append">
+                <path>
+                  <groupId>org.checkerframework</groupId>
+                  <artifactId>checker</artifactId>
+                  <version>${checker.framework.version}</version>
+                </path>
+              </annotationProcessorPaths>
+              <annotationProcessors>
+                <!-- To support @Value.Immutable processors -->
+                <annotationProcessor>org.immutables.value.internal.$processor$.$Processor</annotationProcessor>
+                <!-- Add all the checkers you want to enable here -->
+                <annotationProcessor>org.checkerframework.checker.nullness.NullnessChecker</annotationProcessor>
+              </annotationProcessors>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
   </profiles>
 </project>
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java
index 87769dd122dac..b87f1345a510e 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java
@@ -22,6 +22,7 @@
 import javax.annotation.concurrent.ThreadSafe;
 
 import org.apache.arrow.util.Preconditions;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
  * Provides a concurrent way to manage account for memory usage without locking. Used as basis
@@ -34,7 +35,7 @@ class Accountant implements AutoCloseable {
   /**
    * The parent allocator.
    */
-  protected final Accountant parent;
+  protected final @Nullable Accountant parent;
 
   private final String name;
 
@@ -59,7 +60,7 @@ class Accountant implements AutoCloseable {
    */
   private final AtomicLong locallyHeldMemory = new AtomicLong();
 
-  public Accountant(Accountant parent, String name, long reservation, long maxAllocation) {
+  public Accountant(@Nullable Accountant parent, String name, long reservation, long maxAllocation) {
     Preconditions.checkNotNull(name, "name must not be null");
     Preconditions.checkArgument(reservation >= 0, "The initial reservation size must be non-negative.");
     Preconditions.checkArgument(maxAllocation >= 0, "The maximum allocation limit must be non-negative.");
@@ -73,12 +74,13 @@ public Accountant(Accountant parent, String name, long reservation, long maxAllo
     this.allocationLimit.set(maxAllocation);
 
     if (reservation != 0) {
+      Preconditions.checkArgument(parent != null, "parent must not be null");
       // we will allocate a reservation from our parent.
       final AllocationOutcome outcome = parent.allocateBytes(reservation);
       if (!outcome.isOk()) {
         throw new OutOfMemoryException(String.format(
-            "Failure trying to allocate initial reservation for Allocator. " +
-                "Attempted to allocate %d bytes.", reservation), outcome.getDetails());
+                "Failure trying to allocate initial reservation for Allocator. " +
+                        "Attempted to allocate %d bytes.", reservation), outcome.getDetails());
       }
     }
   }
@@ -103,7 +105,7 @@ AllocationOutcome allocateBytes(long size) {
     }
   }
 
-  private AllocationOutcome.Status allocateBytesInternal(long size, AllocationOutcomeDetails details) {
+  private AllocationOutcome.Status allocateBytesInternal(long size, @Nullable AllocationOutcomeDetails details) {
     final AllocationOutcome.Status status = allocate(size,
         true /*incomingUpdatePeek*/, false /*forceAllocation*/, details);
     if (!status.isOk()) {
@@ -168,7 +170,7 @@ public boolean forceAllocate(long size) {
    * @return The outcome of the allocation.
    */
   private AllocationOutcome.Status allocate(final long size, final boolean incomingUpdatePeak,
-      final boolean forceAllocation, AllocationOutcomeDetails details) {
+      final boolean forceAllocation, @Nullable AllocationOutcomeDetails details) {
     final long oldLocal = locallyHeldMemory.getAndAdd(size);
     final long newLocal = oldLocal + size;
     // Borrowed from Math.addExact (but avoid exception here)
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationManager.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationManager.java
index 3071c02f30ae0..6ccefdd9c1695 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationManager.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationManager.java
@@ -18,6 +18,7 @@
 package org.apache.arrow.memory;
 
 import org.apache.arrow.util.Preconditions;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
  * An AllocationManager is the implementation of a physical memory allocation.
@@ -48,8 +49,9 @@ public abstract class AllocationManager {
   // This is mostly a semantic constraint on the API user: if the reference count reaches 0 in the owningLedger, then
   // there are not supposed to be any references through other allocators. In practice, this doesn't do anything
   // as the implementation just forces ownership to be transferred to one of the other extant references.
-  private volatile BufferLedger owningLedger;
+  private volatile @Nullable BufferLedger owningLedger;
 
+  @SuppressWarnings("nullness:method.invocation") //call to associate(a, b) not allowed on the given receiver
   protected AllocationManager(BufferAllocator accountingAllocator) {
     Preconditions.checkNotNull(accountingAllocator);
     accountingAllocator.assertOpen();
@@ -61,7 +63,7 @@ protected AllocationManager(BufferAllocator accountingAllocator) {
     this.owningLedger = associate(accountingAllocator, false);
   }
 
-  BufferLedger getOwningLedger() {
+  @Nullable BufferLedger getOwningLedger() {
     return owningLedger;
   }
 
@@ -133,9 +135,9 @@ void release(final BufferLedger ledger) {
     // remove the <BaseAllocator, BufferLedger> mapping for the allocator
     // of calling BufferLedger
     Preconditions.checkState(map.containsKey(allocator),
-        "Expecting a mapping for allocator and reference manager");
+            "Expecting a mapping for allocator and reference manager");
     final BufferLedger oldLedger = map.remove(allocator);
-
+    Preconditions.checkState(oldLedger != null, "Expecting a mapping for allocator and reference manager");
     BufferAllocator oldAllocator = oldLedger.getAllocator();
     if (oldAllocator instanceof BaseAllocator) {
       // needed for debug only: tell the allocator that AllocationManager is removing a
@@ -168,7 +170,7 @@ void release(final BufferLedger ledger) {
       // the release call was made by a non-owning reference manager, so after remove there have
       // to be 1 or more <allocator, reference manager> mappings
       Preconditions.checkState(map.size() > 0,
-          "The final removal of reference manager should be connected to owning reference manager");
+              "The final removal of reference manager should be connected to owning reference manager");
     }
   }
 
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcome.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcome.java
index 2977775e6ce27..21a57eee49b11 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcome.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcome.java
@@ -19,15 +19,18 @@
 
 import java.util.Optional;
 
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+
 /**
  * Describes the type of outcome that occurred when trying to account for allocation of memory.
  */
 public class AllocationOutcome {
   private final Status status;
-  private final AllocationOutcomeDetails details;
+  private final @Nullable AllocationOutcomeDetails details;
   static final AllocationOutcome SUCCESS_INSTANCE = new AllocationOutcome(Status.SUCCESS);
 
-  AllocationOutcome(Status status, AllocationOutcomeDetails details) {
+  AllocationOutcome(Status status, @Nullable AllocationOutcomeDetails details) {
     this.status = status;
     this.details = details;
   }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java
index 6499ce84b1a10..3ceda71cce0fe 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java
@@ -20,6 +20,9 @@
 import java.util.ArrayDeque;
 import java.util.Deque;
 
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+
 /**
  * Captures details of allocation for each accountant in the hierarchical chain.
  */
@@ -47,7 +50,7 @@ void pushEntry(Accountant accountant, long totalUsedBeforeAllocation, long reque
    * Get the allocator that caused the failure.
    * @return the allocator that caused failure, null if there was no failure.
    */
-  public BufferAllocator getFailedAllocator() {
+  public @Nullable BufferAllocator getFailedAllocator() {
     Entry top = allocEntries.peekLast();
     if (top != null && top.allocationFailed && (top.accountant instanceof BufferAllocator)) {
       return (BufferAllocator) top.accountant;
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
index 2c2e93b2d70ce..112d36ece0418 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
@@ -33,6 +33,7 @@
 import org.apache.arrow.memory.util.MemoryUtil;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.util.VisibleForTesting;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
  * ArrowBuf serves as a facade over underlying memory by providing
@@ -68,11 +69,11 @@ public final class ArrowBuf implements AutoCloseable {
   private static final int LOG_BYTES_PER_ROW = 10;
   private final long id = idGenerator.incrementAndGet();
   private final ReferenceManager referenceManager;
-  private final BufferManager bufferManager;
+  private final @Nullable BufferManager bufferManager;
   private final long addr;
   private long readerIndex;
   private long writerIndex;
-  private final HistoricalLog historicalLog = BaseAllocator.DEBUG ?
+  private final @Nullable HistoricalLog historicalLog = BaseAllocator.DEBUG ?
           new HistoricalLog(BaseAllocator.DEBUG_LOG_LENGTH, "ArrowBuf[%d]", id) : null;
   private volatile long capacity;
 
@@ -84,7 +85,7 @@ public final class ArrowBuf implements AutoCloseable {
    */
   public ArrowBuf(
       final ReferenceManager referenceManager,
-      final BufferManager bufferManager,
+      final @Nullable BufferManager bufferManager,
       final long capacity,
       final long memoryAddress) {
     this.referenceManager = referenceManager;
@@ -93,7 +94,7 @@ public ArrowBuf(
     this.capacity = capacity;
     this.readerIndex = 0;
     this.writerIndex = 0;
-    if (BaseAllocator.DEBUG) {
+    if (historicalLog != null) {
       historicalLog.recordEvent("create()");
     }
   }
@@ -244,7 +245,7 @@ public int hashCode() {
   }
 
   @Override
-  public boolean equals(Object obj) {
+  public boolean equals(@Nullable Object obj) {
     // identity equals only.
     return this == obj;
   }
@@ -313,7 +314,7 @@ private void checkIndexD(long index, long fieldLength) {
     // check bounds
     Preconditions.checkArgument(fieldLength >= 0, "expecting non-negative data length");
     if (index < 0 || index > capacity() - fieldLength) {
-      if (BaseAllocator.DEBUG) {
+      if (historicalLog != null) {
         historicalLog.logHistory(logger);
       }
       throw new IndexOutOfBoundsException(String.format(
@@ -736,7 +737,7 @@ public void getBytes(long index, byte[] dst, int dstIndex, int length) {
     if (length != 0) {
       // copy "length" bytes from this ArrowBuf starting at addr(index) address
       // into dst byte array at dstIndex onwards
-      MemoryUtil.UNSAFE.copyMemory(null, addr(index), dst, MemoryUtil.BYTE_ARRAY_BASE_OFFSET + dstIndex, length);
+      MemoryUtil.copyMemory(null, addr(index), dst, MemoryUtil.BYTE_ARRAY_BASE_OFFSET + dstIndex, length);
     }
   }
 
@@ -773,7 +774,7 @@ public void setBytes(long index, byte[] src, int srcIndex, long length) {
     if (length > 0) {
       // copy "length" bytes from src byte array at the starting index (srcIndex)
       // into this ArrowBuf starting at address "addr(index)"
-      MemoryUtil.UNSAFE.copyMemory(src, MemoryUtil.BYTE_ARRAY_BASE_OFFSET + srcIndex, null, addr(index), length);
+      MemoryUtil.copyMemory(src, MemoryUtil.BYTE_ARRAY_BASE_OFFSET + srcIndex, null, addr(index), length);
     }
   }
 
@@ -799,7 +800,7 @@ public void getBytes(long index, ByteBuffer dst) {
         // at address srcAddress into the dst ByteBuffer starting at
         // address dstAddress
         final long dstAddress = MemoryUtil.getByteBufferAddress(dst) + dst.position();
-        MemoryUtil.UNSAFE.copyMemory(null, srcAddress, null, dstAddress, dst.remaining());
+        MemoryUtil.copyMemory(null, srcAddress, null, dstAddress, dst.remaining());
         // after copy, bump the next write position for the dst ByteBuffer
         dst.position(dst.position() + dst.remaining());
       } else if (dst.hasArray()) {
@@ -807,7 +808,7 @@ public void getBytes(long index, ByteBuffer dst) {
         // at address srcAddress into the dst ByteBuffer starting at
         // index dstIndex
         final int dstIndex = dst.arrayOffset() + dst.position();
-        MemoryUtil.UNSAFE.copyMemory(
+        MemoryUtil.copyMemory(
                 null, srcAddress, dst.array(), MemoryUtil.BYTE_ARRAY_BASE_OFFSET + dstIndex, dst.remaining());
         // after copy, bump the next write position for the dst ByteBuffer
         dst.position(dst.position() + dst.remaining());
@@ -836,14 +837,14 @@ public void setBytes(long index, ByteBuffer src) {
         // copy src.remaining() bytes of data from src ByteBuffer starting at
         // address srcAddress into this ArrowBuf starting at address dstAddress
         final long srcAddress = MemoryUtil.getByteBufferAddress(src) + src.position();
-        MemoryUtil.UNSAFE.copyMemory(null, srcAddress, null, dstAddress, length);
+        MemoryUtil.copyMemory(null, srcAddress, null, dstAddress, length);
         // after copy, bump the next read position for the src ByteBuffer
         src.position(src.position() + length);
       } else if (src.hasArray()) {
         // copy src.remaining() bytes of data from src ByteBuffer starting at
         // index srcIndex into this ArrowBuf starting at address dstAddress
         final int srcIndex = src.arrayOffset() + src.position();
-        MemoryUtil.UNSAFE.copyMemory(
+        MemoryUtil.copyMemory(
                 src.array(), MemoryUtil.BYTE_ARRAY_BASE_OFFSET + srcIndex, null, dstAddress, length);
         // after copy, bump the next read position for the src ByteBuffer
         src.position(src.position() + length);
@@ -896,7 +897,7 @@ public void setBytes(long index, ByteBuffer src, int srcIndex, int length) {
       // srcAddress into this ArrowBuf at address dstAddress
       final long srcAddress = MemoryUtil.getByteBufferAddress(src) + srcIndex;
       final long dstAddress = addr(index);
-      MemoryUtil.UNSAFE.copyMemory(null, srcAddress, null, dstAddress, length);
+      MemoryUtil.copyMemory(null, srcAddress, null, dstAddress, length);
     } else {
       if (srcIndex == 0 && src.capacity() == length) {
         // copy the entire ByteBuffer from start to end of length
@@ -936,7 +937,7 @@ public void getBytes(long index, ArrowBuf dst, long dstIndex, int length) {
       // dstAddress
       final long srcAddress = addr(index);
       final long dstAddress = dst.memoryAddress() + (long) dstIndex;
-      MemoryUtil.UNSAFE.copyMemory(null, srcAddress, null, dstAddress, length);
+      MemoryUtil.copyMemory(null, srcAddress, null, dstAddress, length);
     }
   }
 
@@ -966,7 +967,7 @@ public void setBytes(long index, ArrowBuf src, long srcIndex, long length) {
       // dstAddress
       final long srcAddress = src.memoryAddress() + srcIndex;
       final long dstAddress = addr(index);
-      MemoryUtil.UNSAFE.copyMemory(null, srcAddress, null, dstAddress, length);
+      MemoryUtil.copyMemory(null, srcAddress, null, dstAddress, length);
     }
   }
 
@@ -986,7 +987,7 @@ public void setBytes(long index, ArrowBuf src) {
     checkIndex(index, length);
     final long srcAddress = src.memoryAddress() + src.readerIndex;
     final long dstAddress = addr(index);
-    MemoryUtil.UNSAFE.copyMemory(null, srcAddress, null, dstAddress, length);
+    MemoryUtil.copyMemory(null, srcAddress, null, dstAddress, length);
     src.readerIndex(src.readerIndex + length);
   }
 
@@ -1011,7 +1012,7 @@ public int setBytes(long index, InputStream in, int length) throws IOException {
       if (readBytes > 0) {
         // copy readBytes length of data from the tmp byte array starting
         // at srcIndex 0 into this ArrowBuf starting at address addr(index)
-        MemoryUtil.UNSAFE.copyMemory(tmp, MemoryUtil.BYTE_ARRAY_BASE_OFFSET, null, addr(index), readBytes);
+        MemoryUtil.copyMemory(tmp, MemoryUtil.BYTE_ARRAY_BASE_OFFSET, null, addr(index), readBytes);
       }
     }
     return readBytes;
@@ -1033,7 +1034,7 @@ public void getBytes(long index, OutputStream out, int length) throws IOExceptio
       // copy length bytes of data from this ArrowBuf starting at
       // address addr(index) into the tmp byte array starting at index 0
       byte[] tmp = new byte[length];
-      MemoryUtil.UNSAFE.copyMemory(null, addr(index), tmp, MemoryUtil.BYTE_ARRAY_BASE_OFFSET, length);
+      MemoryUtil.copyMemory(null, addr(index), tmp, MemoryUtil.BYTE_ARRAY_BASE_OFFSET, length);
       // write the copied data to output stream
       out.write(tmp);
     }
@@ -1109,7 +1110,7 @@ public long getId() {
   public void print(StringBuilder sb, int indent, Verbosity verbosity) {
     CommonUtil.indent(sb, indent).append(toString());
 
-    if (BaseAllocator.DEBUG && verbosity.includeHistoricalLog) {
+    if (historicalLog != null && verbosity.includeHistoricalLog) {
       sb.append("\n");
       historicalLog.buildHistory(sb, indent + 1, verbosity.includeStackTraces);
     }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
index 9337f48b7430e..8779c7a3434ea 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
@@ -30,6 +30,10 @@
 import org.apache.arrow.memory.util.CommonUtil;
 import org.apache.arrow.memory.util.HistoricalLog;
 import org.apache.arrow.util.Preconditions;
+import org.checkerframework.checker.initialization.qual.Initialized;
+import org.checkerframework.checker.nullness.qual.KeyFor;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.checkerframework.checker.nullness.qual.Nullable;
 import org.immutables.value.Value;
 
 /**
@@ -64,17 +68,17 @@ abstract class BaseAllocator extends Accountant implements BufferAllocator {
   // Package exposed for sharing between AllocatorManger and BaseAllocator objects
   private final String name;
   private final RootAllocator root;
-  private final Object DEBUG_LOCK = DEBUG ? new Object() : null;
+  private final Object DEBUG_LOCK = new Object();
   private final AllocationListener listener;
-  private final BaseAllocator parentAllocator;
+  private final @Nullable BaseAllocator parentAllocator;
   private final Map<BaseAllocator, Object> childAllocators;
   private final ArrowBuf empty;
   // members used purely for debugging
-  private final IdentityHashMap<BufferLedger, Object> childLedgers;
-  private final IdentityHashMap<Reservation, Object> reservations;
-  private final HistoricalLog historicalLog;
+  private final @Nullable IdentityHashMap<BufferLedger, @Nullable Object> childLedgers;
+  private final @Nullable IdentityHashMap<Reservation, Object> reservations;
+  private final @Nullable HistoricalLog historicalLog;
   private final RoundingPolicy roundingPolicy;
-  private final AllocationManager.Factory allocationManagerFactory;
+  private final AllocationManager.@NonNull Factory allocationManagerFactory;
 
   private volatile boolean isClosed = false; // the allocator has been closed
 
@@ -87,8 +91,10 @@ abstract class BaseAllocator extends Accountant implements BufferAllocator {
    *
    * @see Config
    */
+  @SuppressWarnings({"nullness:method.invocation", "nullness:cast.unsafe"})
+  //{"call to hist(,...) not allowed on the given receiver.", "cast cannot be statically verified"}
   protected BaseAllocator(
-      final BaseAllocator parentAllocator,
+      final @Nullable BaseAllocator parentAllocator,
       final String name,
       final Config config) throws OutOfMemoryException {
     super(parentAllocator, name, config.getInitReservation(), config.getMaxAllocation());
@@ -100,7 +106,7 @@ protected BaseAllocator(
       this.root = parentAllocator.root;
       empty = parentAllocator.empty;
     } else if (this instanceof RootAllocator) {
-      this.root = (RootAllocator) this;
+      this.root = (@Initialized RootAllocator) this;
       empty = createEmpty();
     } else {
       throw new IllegalStateException("An parent allocator must either carry a root or be the " +
@@ -131,7 +137,7 @@ public AllocationListener getListener() {
   }
 
   @Override
-  public BaseAllocator getParentAllocator() {
+  public @Nullable BaseAllocator getParentAllocator() {
     return parentAllocator;
   }
 
@@ -187,7 +193,9 @@ void associateLedger(BufferLedger ledger) {
     assertOpen();
     if (DEBUG) {
       synchronized (DEBUG_LOCK) {
-        childLedgers.put(ledger, null);
+        if (childLedgers != null) {
+          childLedgers.put(ledger, null);
+        }
       }
     }
   }
@@ -201,6 +209,7 @@ void dissociateLedger(BufferLedger ledger) {
     assertOpen();
     if (DEBUG) {
       synchronized (DEBUG_LOCK) {
+        Preconditions.checkState(childLedgers != null, "childLedgers must not be null");
         if (!childLedgers.containsKey(ledger)) {
           throw new IllegalStateException("Trying to remove a child ledger that doesn't exist.");
         }
@@ -223,7 +232,9 @@ private void childClosed(final BaseAllocator childAllocator) {
       synchronized (DEBUG_LOCK) {
         final Object object = childAllocators.remove(childAllocator);
         if (object == null) {
-          childAllocator.historicalLog.logHistory(logger);
+          if (childAllocator.historicalLog != null) {
+            childAllocator.historicalLog.logHistory(logger);
+          }
           throw new IllegalStateException("Child allocator[" + childAllocator.name +
             "] not found in parent allocator[" + name + "]'s childAllocators");
         }
@@ -280,12 +291,13 @@ public ArrowBuf buffer(final long initialRequestSize) {
     return buffer(initialRequestSize, null);
   }
 
+  @SuppressWarnings("nullness:dereference.of.nullable")//dereference of possibly-null reference allocationManagerFactory
   private ArrowBuf createEmpty() {
     return allocationManagerFactory.empty();
   }
 
   @Override
-  public ArrowBuf buffer(final long initialRequestSize, BufferManager manager) {
+  public ArrowBuf buffer(final long initialRequestSize, @Nullable BufferManager manager) {
     assertOpen();
 
     Preconditions.checkArgument(initialRequestSize >= 0, "the requested size must be non-negative");
@@ -332,7 +344,7 @@ public ArrowBuf buffer(final long initialRequestSize, BufferManager manager) {
    */
   private ArrowBuf bufferWithoutReservation(
       final long size,
-      BufferManager bufferManager) throws OutOfMemoryException {
+      @Nullable BufferManager bufferManager) throws OutOfMemoryException {
     assertOpen();
 
     final AllocationManager manager = newAllocationManager(size);
@@ -388,8 +400,10 @@ public BufferAllocator newChildAllocator(
     if (DEBUG) {
       synchronized (DEBUG_LOCK) {
         childAllocators.put(childAllocator, childAllocator);
-        historicalLog.recordEvent("allocator[%s] created new child allocator[%s]", name,
-            childAllocator.getName());
+        if (historicalLog != null) {
+          historicalLog.recordEvent("allocator[%s] created new child allocator[%s]", name,
+              childAllocator.getName());
+        }
       }
     } else {
       childAllocators.put(childAllocator, childAllocator);
@@ -439,14 +453,14 @@ public synchronized void close() {
         }
 
         // are there outstanding buffers?
-        final int allocatedCount = childLedgers.size();
+        final int allocatedCount = childLedgers != null ? childLedgers.size() : 0;
         if (allocatedCount > 0) {
           throw new IllegalStateException(
             String.format("Allocator[%s] closed with outstanding buffers allocated (%d).\n%s",
               name, allocatedCount, toString()));
         }
 
-        if (reservations.size() != 0) {
+        if (reservations != null && reservations.size() != 0) {
           throw new IllegalStateException(
             String.format("Allocator[%s] closed with outstanding reservations (%d).\n%s", name,
               reservations.size(),
@@ -486,7 +500,9 @@ public synchronized void close() {
     }
 
     if (DEBUG) {
-      historicalLog.recordEvent("closed");
+      if (historicalLog != null) {
+        historicalLog.recordEvent("closed");
+      }
       logger.debug(String.format("closed allocator[%s].", name));
     }
 
@@ -517,7 +533,9 @@ public String toVerboseString() {
   }
 
   private void hist(String noteFormat, Object... args) {
-    historicalLog.recordEvent(noteFormat, args);
+    if (historicalLog != null) {
+      historicalLog.recordEvent(noteFormat, args);
+    }
   }
 
   /**
@@ -567,10 +585,14 @@ private void verifyAllocator(
         childTotal += Math.max(childAllocator.getAllocatedMemory(), childAllocator.reservation);
       }
       if (childTotal > getAllocatedMemory()) {
-        historicalLog.logHistory(logger);
+        if (historicalLog != null) {
+          historicalLog.logHistory(logger);
+        }
         logger.debug("allocator[" + name + "] child event logs BEGIN");
         for (final BaseAllocator childAllocator : childSet) {
-          childAllocator.historicalLog.logHistory(logger);
+          if (childAllocator.historicalLog != null) {
+            childAllocator.historicalLog.logHistory(logger);
+          }
         }
         logger.debug("allocator[" + name + "] child event logs END");
         throw new IllegalStateException(
@@ -581,33 +603,39 @@ private void verifyAllocator(
       // Furthermore, the amount I've allocated should be that plus buffers I've allocated.
       long bufferTotal = 0;
 
-      final Set<BufferLedger> ledgerSet = childLedgers.keySet();
-      for (final BufferLedger ledger : ledgerSet) {
-        if (!ledger.isOwningLedger()) {
-          continue;
-        }
+      final Set<@KeyFor("this.childLedgers") BufferLedger> ledgerSet = childLedgers != null ?
+              childLedgers.keySet() : null;
+      if (ledgerSet != null) {
+        for (final BufferLedger ledger : ledgerSet) {
+          if (!ledger.isOwningLedger()) {
+            continue;
+          }
 
-        final AllocationManager am = ledger.getAllocationManager();
-        /*
-         * Even when shared, ArrowBufs are rewrapped, so we should never see the same instance
-         * twice.
-         */
-        final BaseAllocator otherOwner = buffersSeen.get(am);
-        if (otherOwner != null) {
-          throw new IllegalStateException("This allocator's ArrowBuf already owned by another " +
-            "allocator");
-        }
-        buffersSeen.put(am, this);
+          final AllocationManager am = ledger.getAllocationManager();
+          /*
+           * Even when shared, ArrowBufs are rewrapped, so we should never see the same instance
+           * twice.
+           */
+          final BaseAllocator otherOwner = buffersSeen.get(am);
+          if (otherOwner != null) {
+            throw new IllegalStateException("This allocator's ArrowBuf already owned by another " +
+              "allocator");
+          }
+          buffersSeen.put(am, this);
 
-        bufferTotal += am.getSize();
+          bufferTotal += am.getSize();
+        }
       }
 
       // Preallocated space has to be accounted for
-      final Set<Reservation> reservationSet = reservations.keySet();
+      final Set<@KeyFor("this.reservations") Reservation> reservationSet = reservations != null ?
+              reservations.keySet() : null;
       long reservedTotal = 0;
-      for (final Reservation reservation : reservationSet) {
-        if (!reservation.isUsed()) {
-          reservedTotal += reservation.getSize();
+      if (reservationSet != null) {
+        for (final Reservation reservation : reservationSet) {
+          if (!reservation.isUsed()) {
+            reservedTotal += reservation.getSize();
+          }
         }
       }
 
@@ -644,9 +672,13 @@ private void verifyAllocator(
 
         if (reservedTotal != 0) {
           sb.append(String.format("reserved total : %d bytes.", reservedTotal));
-          for (final Reservation reservation : reservationSet) {
-            reservation.historicalLog.buildHistory(sb, 0, true);
-            sb.append('\n');
+          if (reservationSet != null) {
+            for (final Reservation reservation : reservationSet) {
+              if (reservation.historicalLog != null) {
+                reservation.historicalLog.buildHistory(sb, 0, true);
+              }
+              sb.append('\n');
+            }
           }
         }
 
@@ -689,16 +721,25 @@ void print(StringBuilder sb, int level, Verbosity verbosity) {
         child.print(sb, level + 2, verbosity);
       }
 
-      CommonUtil.indent(sb, level + 1).append(String.format("ledgers: %d\n", childLedgers.size()));
-      for (BufferLedger ledger : childLedgers.keySet()) {
-        ledger.print(sb, level + 2, verbosity);
+      CommonUtil.indent(sb, level + 1).append(String.format("ledgers: %d\n", childLedgers != null ?
+              childLedgers.size() : 0));
+      if (childLedgers != null) {
+        for (BufferLedger ledger : childLedgers.keySet()) {
+          ledger.print(sb, level + 2, verbosity);
+        }
       }
 
-      final Set<Reservation> reservations = this.reservations.keySet();
-      CommonUtil.indent(sb, level + 1).append(String.format("reservations: %d\n", reservations.size()));
-      for (final Reservation reservation : reservations) {
-        if (verbosity.includeHistoricalLog) {
-          reservation.historicalLog.buildHistory(sb, level + 3, true);
+      final Set<@KeyFor("this.reservations") Reservation> reservations = this.reservations != null ?
+              this.reservations.keySet() : null;
+      CommonUtil.indent(sb, level + 1).append(String.format("reservations: %d\n",
+              reservations != null ? reservations.size() : 0));
+      if (reservations != null) {
+        for (final Reservation reservation : reservations) {
+          if (verbosity.includeHistoricalLog) {
+            if (reservation.historicalLog != null) {
+              reservation.historicalLog.buildHistory(sb, level + 3, true);
+            }
+          }
         }
       }
 
@@ -706,17 +747,20 @@ void print(StringBuilder sb, int level, Verbosity verbosity) {
 
   }
 
-  private void dumpBuffers(final StringBuilder sb, final Set<BufferLedger> ledgerSet) {
-    for (final BufferLedger ledger : ledgerSet) {
-      if (!ledger.isOwningLedger()) {
-        continue;
+  private void dumpBuffers(final StringBuilder sb,
+                           final @Nullable Set<@KeyFor("this.childLedgers") BufferLedger> ledgerSet) {
+    if (ledgerSet != null) {
+      for (final BufferLedger ledger : ledgerSet) {
+        if (!ledger.isOwningLedger()) {
+          continue;
+        }
+        final AllocationManager am = ledger.getAllocationManager();
+        sb.append("UnsafeDirectLittleEndian[identityHashCode == ");
+        sb.append(Integer.toString(System.identityHashCode(am)));
+        sb.append("] size ");
+        sb.append(Long.toString(am.getSize()));
+        sb.append('\n');
       }
-      final AllocationManager am = ledger.getAllocationManager();
-      sb.append("UnsafeDirectLittleEndian[identityHashCode == ");
-      sb.append(Integer.toString(System.identityHashCode(am)));
-      sb.append("] size ");
-      sb.append(Long.toString(am.getSize()));
-      sb.append('\n');
     }
   }
 
@@ -813,7 +857,7 @@ RoundingPolicy getRoundingPolicy() {
    */
   public class Reservation implements AllocationReservation {
 
-    private final HistoricalLog historicalLog;
+    private final @Nullable HistoricalLog historicalLog;
     private int nBytes = 0;
     private boolean used = false;
     private boolean closed = false;
@@ -824,13 +868,16 @@ public class Reservation implements AllocationReservation {
      * <p>If {@linkplain #DEBUG} is true this will capture a historical
      * log of events relevant to this Reservation.
      */
+    @SuppressWarnings("nullness:argument")//to handle null assignment on third party dependency: System.identityHashCode
     public Reservation() {
       if (DEBUG) {
         historicalLog = new HistoricalLog("Reservation[allocator[%s], %d]", name, System
           .identityHashCode(this));
         historicalLog.recordEvent("created");
         synchronized (DEBUG_LOCK) {
-          reservations.put(this, this);
+          if (reservations != null) {
+            reservations.put(this, this);
+          }
         }
       } else {
         historicalLog = null;
@@ -901,7 +948,7 @@ public void close() {
         if (!isClosed()) {
           final Object object;
           synchronized (DEBUG_LOCK) {
-            object = reservations.remove(this);
+            object = reservations != null ? reservations.remove(this) : null;
           }
           if (object == null) {
             final StringBuilder sb = new StringBuilder();
@@ -911,7 +958,9 @@ public void close() {
               System.identityHashCode(this)));
           }
 
-          historicalLog.recordEvent("closed");
+          if (historicalLog != null) {
+            historicalLog.recordEvent("closed");
+          }
         }
       }
 
@@ -928,7 +977,7 @@ public boolean reserve(int nBytes) {
 
       final AllocationOutcome outcome = BaseAllocator.this.allocateBytes(nBytes);
 
-      if (DEBUG) {
+      if (historicalLog != null) {
         historicalLog.recordEvent("reserve(%d) => %s", nBytes, Boolean.toString(outcome.isOk()));
       }
 
@@ -959,7 +1008,7 @@ private ArrowBuf allocate(int nBytes) {
         final ArrowBuf arrowBuf = BaseAllocator.this.bufferWithoutReservation(nBytes, null);
 
         listener.onAllocation(nBytes);
-        if (DEBUG) {
+        if (historicalLog != null) {
           historicalLog.recordEvent("allocate() => %s", String.format("ArrowBuf[%d]", arrowBuf
               .getId()));
         }
@@ -982,7 +1031,7 @@ private void releaseReservation(int nBytes) {
 
       releaseBytes(nBytes);
 
-      if (DEBUG) {
+      if (historicalLog != null) {
         historicalLog.recordEvent("releaseReservation(%d)", nBytes);
       }
     }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java
index 90a4ef26fb4e3..c279e18f1eafb 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferAllocator.java
@@ -21,6 +21,7 @@
 
 import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
 import org.apache.arrow.memory.rounding.RoundingPolicy;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
  * Wrapper class to deal with byte buffer allocation. Ensures users only use designated methods.
@@ -166,7 +167,7 @@ BufferAllocator newChildAllocator(
    *
    * @return parent allocator
    */
-  BufferAllocator getParentAllocator();
+  @Nullable BufferAllocator getParentAllocator();
 
   /**
    * Returns the set of child allocators.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
index 48b3e183d5ae0..1ca3e08ecf046 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
@@ -24,6 +24,7 @@
 import org.apache.arrow.memory.util.CommonUtil;
 import org.apache.arrow.memory.util.HistoricalLog;
 import org.apache.arrow.util.Preconditions;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
  * The reference manager that binds an {@link AllocationManager} to
@@ -32,7 +33,7 @@
  * fate (same reference count).
  */
 public class BufferLedger implements ValueWithKeyIncluded<BufferAllocator>, ReferenceManager {
-  private final IdentityHashMap<ArrowBuf, Object> buffers =
+  private final @Nullable IdentityHashMap<ArrowBuf, @Nullable Object> buffers =
           BaseAllocator.DEBUG ? new IdentityHashMap<>() : null;
   private static final AtomicLong LEDGER_ID_GENERATOR = new AtomicLong(0);
   // unique ID assigned to each ledger
@@ -43,7 +44,7 @@ public class BufferLedger implements ValueWithKeyIncluded<BufferAllocator>, Refe
   private final long lCreationTime = System.nanoTime();
   private final BufferAllocator allocator;
   private final AllocationManager allocationManager;
-  private final HistoricalLog historicalLog =
+  private final @Nullable HistoricalLog historicalLog =
       BaseAllocator.DEBUG ? new HistoricalLog(BaseAllocator.DEBUG_LOG_LENGTH,
         "BufferLedger[%d]", 1) : null;
   private volatile long lDestructionTime = 0;
@@ -122,7 +123,7 @@ public boolean release(int decrement) {
           "ref count decrement should be greater than or equal to 1");
     // decrement the ref count
     final int refCnt = decrement(decrement);
-    if (BaseAllocator.DEBUG) {
+    if (historicalLog != null) {
       historicalLog.recordEvent("release(%d). original value: %d",
           decrement, refCnt + decrement);
     }
@@ -178,7 +179,7 @@ public void retain() {
   @Override
   public void retain(int increment) {
     Preconditions.checkArgument(increment > 0, "retain(%s) argument is not positive", increment);
-    if (BaseAllocator.DEBUG) {
+    if (historicalLog != null) {
       historicalLog.recordEvent("retain(%d)", increment);
     }
     final int originalReferenceCount = bufRefCnt.getAndAdd(increment);
@@ -233,20 +234,7 @@ public ArrowBuf deriveBuffer(final ArrowBuf sourceBuffer, long index, long lengt
             );
 
     // logging
-    if (BaseAllocator.DEBUG) {
-      historicalLog.recordEvent(
-              "ArrowBuf(BufferLedger, BufferAllocator[%s], " +
-                      "UnsafeDirectLittleEndian[identityHashCode == " +
-                      "%d](%s)) => ledger hc == %d",
-              allocator.getName(), System.identityHashCode(derivedBuf), derivedBuf.toString(),
-              System.identityHashCode(this));
-
-      synchronized (buffers) {
-        buffers.put(derivedBuf, null);
-      }
-    }
-
-    return derivedBuf;
+    return loggingArrowBufHistoricalLog(derivedBuf);
   }
 
   /**
@@ -261,7 +249,7 @@ public ArrowBuf deriveBuffer(final ArrowBuf sourceBuffer, long index, long lengt
    * @return A new ArrowBuf that shares references with all ArrowBufs associated
    *         with this BufferLedger
    */
-  ArrowBuf newArrowBuf(final long length, final BufferManager manager) {
+  ArrowBuf newArrowBuf(final long length, final @Nullable BufferManager manager) {
     allocator.assertOpen();
 
     // the start virtual address of the ArrowBuf will be same as address of memory chunk
@@ -271,13 +259,17 @@ ArrowBuf newArrowBuf(final long length, final BufferManager manager) {
     final ArrowBuf buf = new ArrowBuf(this, manager, length, startAddress);
 
     // logging
-    if (BaseAllocator.DEBUG) {
+    return loggingArrowBufHistoricalLog(buf);
+  }
+
+  private ArrowBuf loggingArrowBufHistoricalLog(ArrowBuf buf) {
+    if (historicalLog != null) {
       historicalLog.recordEvent(
           "ArrowBuf(BufferLedger, BufferAllocator[%s], " +
           "UnsafeDirectLittleEndian[identityHashCode == " + "%d](%s)) => ledger hc == %d",
           allocator.getName(), System.identityHashCode(buf), buf.toString(),
           System.identityHashCode(this));
-
+      Preconditions.checkState(buffers != null, "IdentityHashMap of buffers must not be null");
       synchronized (buffers) {
         buffers.put(buf, null);
       }
@@ -306,7 +298,7 @@ ArrowBuf newArrowBuf(final long length, final BufferManager manager) {
   @Override
   public ArrowBuf retain(final ArrowBuf srcBuffer, BufferAllocator target) {
 
-    if (BaseAllocator.DEBUG) {
+    if (historicalLog != null) {
       historicalLog.recordEvent("retain(%s)", target.getName());
     }
 
@@ -333,45 +325,48 @@ public ArrowBuf retain(final ArrowBuf srcBuffer, BufferAllocator target) {
    * @param targetReferenceManager The ledger to transfer ownership account to.
    * @return Whether transfer fit within target ledgers limits.
    */
-  boolean transferBalance(final ReferenceManager targetReferenceManager) {
+  boolean transferBalance(final @Nullable ReferenceManager targetReferenceManager) {
     Preconditions.checkArgument(targetReferenceManager != null,
-        "Expecting valid target reference manager");
-    final BufferAllocator targetAllocator = targetReferenceManager.getAllocator();
-    Preconditions.checkArgument(allocator.getRoot() == targetAllocator.getRoot(),
-        "You can only transfer between two allocators that share the same root.");
-
-    allocator.assertOpen();
-    targetReferenceManager.getAllocator().assertOpen();
-
-    // if we're transferring to ourself, just return.
-    if (targetReferenceManager == this) {
-      return true;
-    }
-
-    // since two balance transfers out from the allocation manager could cause incorrect
-    // accounting, we need to ensure
-    // that this won't happen by synchronizing on the allocation manager instance.
-    synchronized (allocationManager) {
-      if (allocationManager.getOwningLedger() != this) {
-        // since the calling reference manager is not the owning
-        // reference manager for the underlying memory, transfer is
-        // a NO-OP
+            "Expecting valid target reference manager");
+    boolean overlimit = false;
+    if (targetReferenceManager != null) {
+      final BufferAllocator targetAllocator = targetReferenceManager.getAllocator();
+      Preconditions.checkArgument(allocator.getRoot() == targetAllocator.getRoot(),
+              "You can only transfer between two allocators that share the same root.");
+
+      allocator.assertOpen();
+      targetReferenceManager.getAllocator().assertOpen();
+
+      // if we're transferring to ourself, just return.
+      if (targetReferenceManager == this) {
         return true;
       }
 
-      if (BaseAllocator.DEBUG) {
-        this.historicalLog.recordEvent("transferBalance(%s)",
-            targetReferenceManager.getAllocator().getName());
-      }
+      // since two balance transfers out from the allocation manager could cause incorrect
+      // accounting, we need to ensure
+      // that this won't happen by synchronizing on the allocation manager instance.
+      synchronized (allocationManager) {
+        if (allocationManager.getOwningLedger() != this) {
+          // since the calling reference manager is not the owning
+          // reference manager for the underlying memory, transfer is
+          // a NO-OP
+          return true;
+        }
 
-      boolean overlimit = targetAllocator.forceAllocate(allocationManager.getSize());
-      allocator.releaseBytes(allocationManager.getSize());
-      // since the transfer can only happen from the owning reference manager,
-      // we need to set the target ref manager as the new owning ref manager
-      // for the chunk of memory in allocation manager
-      allocationManager.setOwningLedger((BufferLedger) targetReferenceManager);
-      return overlimit;
+        if (BaseAllocator.DEBUG && this.historicalLog != null) {
+          this.historicalLog.recordEvent("transferBalance(%s)",
+                  targetReferenceManager.getAllocator().getName());
+        }
+
+        overlimit = targetAllocator.forceAllocate(allocationManager.getSize());
+        allocator.releaseBytes(allocationManager.getSize());
+        // since the transfer can only happen from the owning reference manager,
+        // we need to set the target ref manager as the new owning ref manager
+        // for the chunk of memory in allocation manager
+        allocationManager.setOwningLedger((BufferLedger) targetReferenceManager);
+      }
     }
+    return overlimit;
   }
 
   /**
@@ -501,6 +496,7 @@ void print(StringBuilder sb, int indent, BaseAllocator.Verbosity verbosity) {
     if (!BaseAllocator.DEBUG) {
       sb.append("]\n");
     } else {
+      Preconditions.checkArgument(buffers != null, "IdentityHashMap of buffers must not be null");
       synchronized (buffers) {
         sb.append("] holds ")
           .append(buffers.size())
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java
index 15120c252fca3..d57b72ba41573 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java
@@ -19,6 +19,9 @@
 
 import java.lang.reflect.Field;
 
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+
 /**
  * A class for choosing the default allocation manager.
  */
@@ -39,7 +42,7 @@ public class DefaultAllocationManagerOption {
   /**
    * The default allocation manager factory.
    */
-  private static AllocationManager.Factory DEFAULT_ALLOCATION_MANAGER_FACTORY = null;
+  private static AllocationManager.@Nullable Factory DEFAULT_ALLOCATION_MANAGER_FACTORY = null;
 
   /**
    * The allocation manager type.
@@ -61,6 +64,7 @@ public enum AllocationManagerType {
     Unknown,
   }
 
+  @SuppressWarnings("nullness:argument") //enum types valueOf are implicitly non-null
   static AllocationManagerType getDefaultAllocationManagerType() {
     AllocationManagerType ret = AllocationManagerType.Unknown;
 
@@ -103,6 +107,9 @@ static AllocationManager.Factory getDefaultAllocationManagerFactory() {
     return DEFAULT_ALLOCATION_MANAGER_FACTORY;
   }
 
+  @SuppressWarnings({"nullness:argument", "nullness:return"})
+  //incompatible argument for parameter obj of Field.get
+  // Static member qualifying type may not be annotated
   private static AllocationManager.Factory getFactory(String clazzName) {
     try {
       Field field = Class.forName(clazzName).getDeclaredField("FACTORY");
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java
index edfa82392a46e..740233ef411ff 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java
@@ -19,6 +19,9 @@
 
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.util.VisibleForTesting;
+import org.checkerframework.checker.initialization.qual.Initialized;
+import org.checkerframework.checker.initialization.qual.UnderInitialization;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
  * Highly specialized IdentityHashMap that implements only partial
@@ -35,7 +38,7 @@ public class LowCostIdentityHashMap<K, V extends ValueWithKeyIncluded<K>> {
   /*
    * The internal data structure to hold values.
    */
-  private Object[] elementData;
+  private @Nullable Object [] elementData; // elementData[index] = null;
 
   /* Actual number of values. */
   private int size;
@@ -69,19 +72,20 @@ public LowCostIdentityHashMap(int maxSize) {
     if (maxSize >= 0) {
       this.size = 0;
       threshold = getThreshold(maxSize);
-      elementData = newElementArray(computeElementArraySize());
+      elementData = newElementArrayUnderInitialized(computeElementArraySize());
     } else {
       throw new IllegalArgumentException();
     }
   }
 
-  private int getThreshold(int maxSize) {
+  private int getThreshold(@UnderInitialization LowCostIdentityHashMap<K, V> this,
+                           int maxSize) {
     // assign the threshold to maxSize initially, this will change to a
     // higher value if rehashing occurs.
     return maxSize > 2 ? maxSize : 2;
   }
 
-  private int computeElementArraySize() {
+  private int computeElementArraySize(@UnderInitialization LowCostIdentityHashMap<K, V> this) {
     int arraySize = (int) (((long) threshold * 10000) / LOAD_FACTOR);
     // ensure arraySize is positive, the above cast from long to int type
     // leads to overflow and negative arraySize if threshold is too big
@@ -95,7 +99,18 @@ private int computeElementArraySize() {
    *            the number of elements
    * @return Reference to the element array
    */
-  private Object[] newElementArray(int s) {
+  private Object[] newElementArrayInitialized(@Initialized LowCostIdentityHashMap<K, V> this, int s) {
+    return new Object[s];
+  }
+
+  /**
+   * Create a new element array.
+   *
+   * @param s
+   *            the number of elements
+   * @return Reference to the element array
+   */
+  private Object[] newElementArrayUnderInitialized(@UnderInitialization LowCostIdentityHashMap<K, V> this, int s) {
     return new Object[s];
   }
 
@@ -152,7 +167,7 @@ public boolean containsValue(V value) {
    * @param key the key.
    * @return the value of the mapping with the specified key.
    */
-  public V get(K key) {
+  public @Nullable V get(K key) {
     Preconditions.checkNotNull(key);
 
     int index = findIndex(key, elementData);
@@ -166,7 +181,7 @@ public V get(K key) {
    * empty spot if the key is not found in this table.
    */
   @VisibleForTesting
-  int findIndex(Object key, Object[] array) {
+  int findIndex(@Nullable Object key, @Nullable Object[] array) {
     int length = array.length;
     int index = getModuloHash(key, length);
     int last = (index + length - 1) % length;
@@ -184,7 +199,7 @@ int findIndex(Object key, Object[] array) {
   }
 
   @VisibleForTesting
-  static int getModuloHash(Object key, int length) {
+  static int getModuloHash(@Nullable Object key, int length) {
     return ((System.identityHashCode(key) & 0x7FFFFFFF) % length);
   }
 
@@ -226,7 +241,7 @@ void rehash() {
     if (newlength == 0) {
       newlength = 1;
     }
-    Object[] newData = newElementArray(newlength);
+    @Nullable Object[] newData = newElementArrayInitialized(newlength);
     for (int i = 0; i < elementData.length; i++) {
       Object key = (elementData[i] == null) ? null : ((V) elementData[i]).getKey();
       if (key != null) {
@@ -250,7 +265,7 @@ private void computeMaxSize() {
    * @return the value of the removed mapping, or {@code null} if no mapping
    *         for the specified key was found.
    */
-  public V remove(K key) {
+  public @Nullable V remove(K key) {
     Preconditions.checkNotNull(key);
 
     boolean hashedOk;
@@ -325,7 +340,7 @@ public int size() {
    *
    * @return next available value or null if none available
    */
-  public V getNextValue() {
+  public @Nullable V getNextValue() {
     for (int i = 0; i < elementData.length; i++) {
       if (elementData[i] != null) {
         return (V) elementData[i];
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java
index fa1cfbdb293ab..b41576847d6b7 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java
@@ -21,6 +21,7 @@
 import org.apache.arrow.memory.util.hash.ArrowBufHasher;
 import org.apache.arrow.memory.util.hash.SimpleHasher;
 import org.apache.arrow.util.Preconditions;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
  * Pointer to a memory region within an {@link ArrowBuf}.
@@ -33,7 +34,7 @@ public final class ArrowBufPointer {
    */
   public static final int NULL_HASH_CODE = 0;
 
-  private ArrowBuf buf;
+  private @Nullable ArrowBuf buf;
 
   private long offset;
 
@@ -62,6 +63,7 @@ public ArrowBufPointer() {
   public ArrowBufPointer(ArrowBufHasher hasher) {
     Preconditions.checkNotNull(hasher);
     this.hasher = hasher;
+    this.buf = null;
   }
 
   /**
@@ -93,6 +95,7 @@ public ArrowBufPointer(ArrowBuf buf, long offset, long length, ArrowBufHasher ha
    * @param offset the start off set of the memory region pointed to.
    * @param length the length off set of the memory region pointed to.
    */
+
   public void set(ArrowBuf buf, long offset, long length) {
     this.buf = buf;
     this.offset = offset;
@@ -105,7 +108,7 @@ public void set(ArrowBuf buf, long offset, long length) {
    * Gets the underlying buffer, or null if the underlying data is invalid or null.
    * @return the underlying buffer, if any, or null if the underlying data is invalid or null.
    */
-  public ArrowBuf getBuf() {
+  public @Nullable ArrowBuf getBuf() {
     return buf;
   }
 
@@ -118,7 +121,7 @@ public long getLength() {
   }
 
   @Override
-  public boolean equals(Object o) {
+  public boolean equals(@Nullable Object o) {
     if (this == o) {
       return true;
     }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java
index f02539a8a3d79..21f063c939ec8 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java
@@ -20,6 +20,7 @@
 import java.util.Arrays;
 import java.util.LinkedList;
 
+import org.checkerframework.checker.nullness.qual.Nullable;
 import org.slf4j.Logger;
 
 /**
@@ -32,7 +33,7 @@ public class HistoricalLog {
   private final LinkedList<Event> history = new LinkedList<>();
   private final String idString; // the formatted id string
   private final int limit; // the limit on the number of events kept
-  private Event firstEvent; // the first stack trace recorded
+  private @Nullable Event firstEvent; // the first stack trace recorded
 
   /**
    * Constructor. The format string will be formatted and have its arguments
@@ -68,6 +69,7 @@ public HistoricalLog(final String idStringFormat, Object... args) {
   public HistoricalLog(final int limit, final String idStringFormat, Object... args) {
     this.limit = limit;
     this.idString = String.format(idStringFormat, args);
+    this.firstEvent = null;
   }
 
   /**
@@ -122,13 +124,16 @@ public synchronized void buildHistory(
         .append('\n');
 
     if (firstEvent != null) {
+      long time = firstEvent.time;
+      String note = firstEvent.note;
+      final StackTrace stackTrace = firstEvent.stackTrace;
       sb.append(innerIndentation)
-          .append(firstEvent.time)
+          .append(time)
           .append(' ')
-          .append(firstEvent.note)
+          .append(note)
           .append('\n');
       if (includeStackTrace) {
-        firstEvent.stackTrace.writeToBuilder(sb, indent + 2);
+        stackTrace.writeToBuilder(sb, indent + 2);
       }
 
       for (final Event event : history) {
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java
index cc615c5b38321..f79cf79531296 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java
@@ -25,15 +25,18 @@
 import java.security.AccessController;
 import java.security.PrivilegedAction;
 
+import org.checkerframework.checker.nullness.qual.Nullable;
+
 import sun.misc.Unsafe;
 
+
 /**
  * Utilities for memory related operations.
  */
 public class MemoryUtil {
   private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(MemoryUtil.class);
 
-  private static final Constructor<?> DIRECT_BUFFER_CONSTRUCTOR;
+  private static final @Nullable Constructor<?> DIRECT_BUFFER_CONSTRUCTOR;
   /**
    * The unsafe object from which to access the off-heap memory.
    */
@@ -63,6 +66,9 @@ public class MemoryUtil {
       // try to get the unsafe object
       final Object maybeUnsafe = AccessController.doPrivileged(new PrivilegedAction<Object>() {
         @Override
+        @SuppressWarnings({"nullness:argument", "nullness:return"})
+        // incompatible argument for parameter obj of Field.get
+        // incompatible types in return
         public Object run() {
           try {
             final Field unsafeField = Unsafe.class.getDeclaredField("theUnsafe");
@@ -179,4 +185,11 @@ public static ByteBuffer directBuffer(long address, int capacity) {
     throw new UnsupportedOperationException(
         "sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available");
   }
+
+  @SuppressWarnings("nullness:argument") //to handle null assignment on third party dependency: Unsafe
+  public static void copyMemory(@Nullable Object srcBase, long srcOffset,
+                                @Nullable Object destBase, long destOffset,
+                                long bytes) {
+    UNSAFE.copyMemory(srcBase, srcOffset, destBase, destOffset, bytes);
+  }
 }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/StackTrace.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/StackTrace.java
index cd864eb99853f..a533edd79370a 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/StackTrace.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/StackTrace.java
@@ -19,12 +19,15 @@
 
 import java.util.Arrays;
 
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+
 /**
  * Convenient way of obtaining and manipulating stack traces for debugging.
  */
 public class StackTrace {
 
-  private final StackTraceElement[] stackTraceElements;
+  private final @Nullable StackTraceElement [] stackTraceElements;
 
   /**
    * Constructor. Captures the current stack trace.
@@ -48,16 +51,18 @@ public void writeToBuilder(final StringBuilder sb, final int indent) {
 
     // write the stack trace in standard Java format
     for (StackTraceElement ste : stackTraceElements) {
-      sb.append(indentation)
-          .append("at ")
-          .append(ste.getClassName())
-          .append('.')
-          .append(ste.getMethodName())
-          .append('(')
-          .append(ste.getFileName())
-          .append(':')
-          .append(Integer.toString(ste.getLineNumber()))
-          .append(")\n");
+      if (ste != null) {
+        sb.append(indentation)
+            .append("at ")
+            .append(ste.getClassName())
+            .append('.')
+            .append(ste.getMethodName())
+            .append('(')
+            .append(ste.getFileName())
+            .append(':')
+            .append(Integer.toString(ste.getLineNumber()))
+            .append(")\n");
+      }
     }
   }
 
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java
index 75fc3f0c45831..5de98d23bb83b 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java
@@ -19,6 +19,7 @@
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.util.MemoryUtil;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
  * Implementation of the Murmur hashing algorithm.
@@ -157,7 +158,7 @@ private static int rotateLeft(int value, int count) {
   }
 
   @Override
-  public boolean equals(Object o) {
+  public boolean equals(@Nullable Object o) {
     if (this == o) {
       return true;
     }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java
index da0ee482997f2..3bf3c2a828338 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java
@@ -20,6 +20,7 @@
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.util.MemoryUtil;
+import org.checkerframework.checker.nullness.qual.Nullable;
 
 /**
  * A simple hasher that calculates the hash code of integers as is,
@@ -110,7 +111,7 @@ public int hashCode() {
   }
 
   @Override
-  public boolean equals(Object obj) {
+  public boolean equals(@Nullable Object obj) {
     return obj != null && (obj instanceof SimpleHasher);
   }
 }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
index 0ffc9447e4372..8083033007d9c 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
@@ -33,6 +33,8 @@
 
 package org.apache.arrow.util;
 
+import org.checkerframework.dataflow.qual.AssertMethod;
+
 /**
  * Static convenience methods that help a method or constructor check whether it was invoked
  * correctly (whether its <i>preconditions</i> have been met). These methods generally accept a
@@ -117,6 +119,7 @@ private Preconditions() {}
    * @param expression a boolean expression
    * @throws IllegalArgumentException if {@code expression} is false
    */
+  @AssertMethod
   public static void checkArgument(boolean expression) {
     if (!expression) {
       throw new IllegalArgumentException();
@@ -131,6 +134,7 @@ public static void checkArgument(boolean expression) {
    *     string using {@link String#valueOf(Object)}
    * @throws IllegalArgumentException if {@code expression} is false
    */
+  @AssertMethod
   public static void checkArgument(boolean expression, Object errorMessage) {
     if (!expression) {
       throw new IllegalArgumentException(String.valueOf(errorMessage));
@@ -438,6 +442,7 @@ public static void checkArgument(
    * @param expression a boolean expression
    * @throws IllegalStateException if {@code expression} is false
    */
+  @AssertMethod
   public static void checkState(boolean expression) {
     if (!expression) {
       throw new IllegalStateException();
@@ -453,6 +458,7 @@ public static void checkState(boolean expression) {
    *     string using {@link String#valueOf(Object)}
    * @throws IllegalStateException if {@code expression} is false
    */
+  @AssertMethod
   public static void checkState(boolean expression, Object errorMessage) {
     if (!expression) {
       throw new IllegalStateException(String.valueOf(errorMessage));
diff --git a/java/pom.xml b/java/pom.xml
index 6b7192fd33efc..62e63d41a9193 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -48,6 +48,7 @@
     <maven-compiler-plugin.version>3.11.0</maven-compiler-plugin.version>
     <mockito.core.version>5.5.0</mockito.core.version>
     <mockito.inline.version>5.2.0</mockito.inline.version>
+    <checker.framework.version>3.42.0</checker.framework.version>
   </properties>
 
   <scm>
@@ -354,6 +355,7 @@
                 <!-- source annotations (not kept in compiled code) -->
                 <ignoredDependency>javax.annotation:javax.annotation-api:*</ignoredDependency>
                 <ignoredDependency>org.apache.hadoop:hadoop-client-api</ignoredDependency>
+                <ignoredDependency>org.checkerframework:checker-qual</ignoredDependency>
               </ignoredDependencies>
             </configuration>
           </execution>
@@ -606,6 +608,11 @@
         <type>pom</type>
         <scope>import</scope>
       </dependency>
+      <dependency>
+        <groupId>org.checkerframework</groupId>
+        <artifactId>checker-qual</artifactId>
+        <version>${checker.framework.version}</version>
+      </dependency>
       <dependency>
         <groupId>com.google.flatbuffers</groupId>
         <artifactId>flatbuffers-java</artifactId>

From 1e74acef7ca6b1f9ae6c7ee4f64d0eb4c01e706f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 9 Jan 2024 10:16:04 -0500
Subject: [PATCH 176/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-plugin-plugin from 3.6.0 to 3.10.2 in /java
 (#39412)

Bumps [org.apache.maven.plugins:maven-plugin-plugin](https://github.com/apache/maven-plugin-tools) from 3.6.0 to 3.10.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/maven-plugin-tools/releases">org.apache.maven.plugins:maven-plugin-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.10.2</h2>
<h2><a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12317820&amp;version=12353719">Release Notes - Maven Plugin Tools - Version 3.10.2</a></h2>
<h2>Bug</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-484">MPLUGIN-484</a>] - Downgrade plexus-xml to 3.0.0</li>
</ul>
<h2>Dependency upgrade</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-485">MPLUGIN-485</a>] - Upgrade Parent to 40</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-487">MPLUGIN-487</a>] - Bump org.codehaus.plexus:plexus-java from 1.1.2 to 1.2.0</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-488">MPLUGIN-488</a>] - Bump asmVersion from 9.5 to 9.6</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-489">MPLUGIN-489</a>] - Bump antVersion from 1.10.13 to 1.10.14</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-490">MPLUGIN-490</a>] - Bump org.jsoup:jsoup from 1.16.1 to 1.16.2</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-491">MPLUGIN-491</a>] - Bump org.codehaus.plexus:plexus-testing from 1.1.0 to 1.2.0</li>
</ul>
<h2>3.10.1</h2>
<h2><a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12317820&amp;version=12353716">Release Notes - Maven Plugin Tools - Version 3.10.1</a></h2>
<h2>Bug</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-482">MPLUGIN-482</a>] - JavadocSite.createLink() does not consider implicit module path prefix</li>
</ul>
<h2>Improvement</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-442">MPLUGIN-442</a>] - Rewrite plugin goal documentation generation to use supplied sink instead of direct Xdoc</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-475">MPLUGIN-475</a>] - Upgrade to plexus-utils / plexus-xml 4.0.0</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-477">MPLUGIN-477</a>] - Don't add a stray period</li>
</ul>
<h2>Dependency upgrade</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-478">MPLUGIN-478</a>] - Upgrade org.junit:junit-bom from 5.9.3 to 5.10.0</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-479">MPLUGIN-479</a>] - Bump org.codehaus.plexus:plexus-archiver from 4.7.1 to 4.8.0</li>
</ul>
<h2>3.9.0</h2>
<h2><a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12317820&amp;version=12353224">Release Notes - Maven Plugin Tools - Version 3.9.0
</a>
Bug</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-470">MPLUGIN-470</a>] - *-mojo.xml (in PluginXdocGenerator) is overwritten when multiple locales are defined</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-471">MPLUGIN-471</a>] - Generated table by PluginXdocGenerator does not contain default attributes</li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/f6222d6f362bab1682e4682726b758eaa1faa945"><code>f6222d6</code></a> [maven-release-plugin] prepare release maven-plugin-tools-3.10.2</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/b2aa6cd87812a86c3641e786d3721d105d21085a"><code>b2aa6cd</code></a> [MPLUGIN-491] Bump org.codehaus.plexus:plexus-testing from 1.1.0 to 1.2.0 (<a href="https://redirect.github.com/apache/maven-plugin-tools/issues/235">#235</a>)</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/be032eed3de41aaa55d0241e32056816ac082c3d"><code>be032ee</code></a> [MPLUGIN-490] Bump org.jsoup:jsoup from 1.16.1 to 1.16.2 (<a href="https://redirect.github.com/apache/maven-plugin-tools/issues/232">#232</a>)</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/596553cee673fd273f2253f5d535c6bcca14791c"><code>596553c</code></a> [MPLUGIN-489] Bump antVersion from 1.10.13 to 1.10.14 (<a href="https://redirect.github.com/apache/maven-plugin-tools/issues/223">#223</a>)</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/71a7750d7fc7dea4f959332e9172587d297ae74d"><code>71a7750</code></a> [MPLUGIN-488] Bump asmVersion from 9.5 to 9.6.</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/1934c85712a93e735c416ccdf313190bcce39319"><code>1934c85</code></a> [MPLUGIN-485] Upgrade Parent to 40</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/52cf6b5ff5c07ab1d6c0b015ffcfe02b22e84081"><code>52cf6b5</code></a> [MPLUGIN-487] Bump org.codehaus.plexus:plexus-java from 1.1.2 to 1.2.0 (<a href="https://redirect.github.com/apache/maven-plugin-tools/issues/231">#231</a>)</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/fbe2a7ca9fa4636a08fb39e2409456b44e1c9911"><code>fbe2a7c</code></a> [MPLUGIN-484] Downgrade plexus-xml to 3.0.0</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/619bb57aeefb2f8f14f98c31d68a8da45f88d9e2"><code>619bb57</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/cfdc1e35b1cf617a094d70d36f7028baafa5f3eb"><code>cfdc1e3</code></a> [maven-release-plugin] prepare release maven-plugin-tools-3.10.1</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-plugin-tools/compare/maven-plugin-tools-3.6.0...maven-plugin-tools-3.10.2">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-plugin-plugin&package-manager=maven&previous-version=3.6.0&new-version=3.10.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/maven/module-info-compiler-maven-plugin/pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml
index 6519fe638b092..837a78b43b9a6 100644
--- a/java/maven/module-info-compiler-maven-plugin/pom.xml
+++ b/java/maven/module-info-compiler-maven-plugin/pom.xml
@@ -80,7 +80,7 @@
         </plugin>
         <plugin>
           <artifactId>maven-plugin-plugin</artifactId>
-          <version>3.6.0</version>
+          <version>3.10.2</version>
         </plugin>
         <plugin>
           <artifactId>maven-jar-plugin</artifactId>
@@ -104,7 +104,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-plugin-plugin</artifactId>
-        <version>3.6.0</version>
+        <version>3.10.2</version>
         <configuration>
           <skipErrorNoDescriptorsFound>true</skipErrorNoDescriptorsFound>
         </configuration>

From 92520c67b4fbeddf5a0c4e829ce2ca0bf54adccd Mon Sep 17 00:00:00 2001
From: ella-chao <ella.chao@datadoghq.com>
Date: Tue, 9 Jan 2024 21:25:24 +0000
Subject: [PATCH 177/570] GH-38988: [Go] Expose dictionary size from
 DictionaryBuilder (#39521)

### Rationale for this change

Details are in https://github.com/apache/arrow/issues/38988

### What changes are included in this PR?

This adds a method to `DictionaryBuilder` that returns the current dictionary size.

### Are these changes tested?

Updated an existing test to account for this new method.

### Are there any user-facing changes?

Yes, a new method is added to `DictionaryBuilder`.

* Closes: #38988

Authored-by: Ella Chao <ella.chao@datadoghq.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/array/dictionary.go      | 5 +++++
 go/arrow/array/dictionary_test.go | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go
index 125c02391f340..bbde4e4f1e5bd 100644
--- a/go/arrow/array/dictionary.go
+++ b/go/arrow/array/dictionary.go
@@ -412,6 +412,7 @@ type DictionaryBuilder interface {
 	AppendArray(arrow.Array) error
 	AppendIndices([]int, []bool)
 	ResetFull()
+	DictionarySize() int
 }
 
 type dictionaryBuilder struct {
@@ -1004,6 +1005,10 @@ func (b *dictionaryBuilder) AppendIndices(indices []int, valid []bool) {
 	}
 }
 
+func (b *dictionaryBuilder) DictionarySize() int {
+	return b.memoTable.Size()
+}
+
 type NullDictionaryBuilder struct {
 	dictionaryBuilder
 }
diff --git a/go/arrow/array/dictionary_test.go b/go/arrow/array/dictionary_test.go
index 5a3e0e10c23f3..f32cc9555f9bc 100644
--- a/go/arrow/array/dictionary_test.go
+++ b/go/arrow/array/dictionary_test.go
@@ -92,6 +92,8 @@ func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderBasic() {
 	p.EqualValues(4, bldr.Len())
 	p.EqualValues(1, bldr.NullN())
 
+	p.EqualValues(2, bldr.DictionarySize())
+
 	arr := bldr.NewArray().(*array.Dictionary)
 	defer arr.Release()
 

From eade9383fb237006390c91cc90e52724262f7dd1 Mon Sep 17 00:00:00 2001
From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com>
Date: Tue, 9 Jan 2024 16:59:02 -0500
Subject: [PATCH 178/570] GH-39456: [Go][Parquet] Arrow DATE64 Type Coerced to
 Parquet DATE Logical Type (#39460)

### Rationale for this change

Closes: #39456

### What changes are included in this PR?

Update physical and logical type mapping from Arrow to Parquet for DATE64 type

### Are these changes tested?

Yes,
- Update expected schema mapping in existing test
- Tests asserting new behavior
  - Arrow DATE64 will roundtrip -> Parquet -> Arrow as DATE32
  - Arrow DATE64 _not aligned_ to exact date boundary will truncate to milliseconds at boundary of greatest full day on Parquet roundtrip

### Are there any user-facing changes?

Yes, users of `pqarrow.FileWriter` will produce Parquet files containing `DATE` logical type instead of `TIMESTAMP[ms]` when writing Arrow data containing DATE64 field(s). The proposed implementation truncates `int64` values to be divisible by 86400000 rather than validating that this is already the case, as some implementations do. I am happy to add this validation if it would be preferred, but the truncating behavior will likely break fewer existing users.

I'm not sure whether this is technically considered a breaking change to a public API and if/how it should be communicated. Any direction regarding this would be appreciated.

* Closes: #39456

Authored-by: Joel Lubinitsky <joel@cherre.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/parquet/pqarrow/encode_arrow_test.go | 84 +++++++++++++++++++++++++
 go/parquet/pqarrow/schema.go            |  4 +-
 go/parquet/pqarrow/schema_test.go       |  2 +-
 3 files changed, 87 insertions(+), 3 deletions(-)

diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go
index 565fa3b6b2820..3a8fef7e5ad37 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -125,6 +125,52 @@ func makeDateTimeTypesTable(mem memory.Allocator, expected bool, addFieldMeta bo
 	return array.NewTable(arrsc, cols, int64(len(isValid)))
 }
 
+func makeDateTypeTable(mem memory.Allocator, expected bool, partialDays bool) arrow.Table {
+	const (
+		millisPerHour int64 = 1000 * 60 * 60
+		millisPerDay  int64 = millisPerHour * 24
+	)
+	isValid := []bool{true, true, true, false, true, true}
+
+	var field arrow.Field
+	if expected {
+		field = arrow.Field{Name: "date", Type: arrow.FixedWidthTypes.Date32, Nullable: true}
+	} else {
+		field = arrow.Field{Name: "date", Type: arrow.FixedWidthTypes.Date64, Nullable: true}
+	}
+
+	field.Metadata = arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"1"})
+
+	arrsc := arrow.NewSchema([]arrow.Field{field}, nil)
+
+	d32Values := []arrow.Date32{1489269000, 1489270000, 1489271000, 1489272000, 1489272000, 1489273000}
+
+	d64Values := make([]arrow.Date64, len(d32Values))
+	for i := range d64Values {
+		// Calculate number of milliseconds at date boundary
+		d64Values[i] = arrow.Date64(int64(d32Values[i]) * millisPerDay)
+		if partialDays {
+			// Offset 1 or more hours past the date boundary
+			hoursIntoDay := int64(i) * millisPerHour
+			d64Values[i] += arrow.Date64(hoursIntoDay)
+		}
+	}
+
+	bldr := array.NewRecordBuilder(mem, arrsc)
+	defer bldr.Release()
+
+	if expected {
+		bldr.Field(0).(*array.Date32Builder).AppendValues(d32Values, isValid)
+	} else {
+		bldr.Field(0).(*array.Date64Builder).AppendValues(d64Values, isValid)
+	}
+
+	rec := bldr.NewRecord()
+	defer rec.Release()
+
+	return array.NewTableFromRecords(arrsc, []arrow.Record{rec})
+}
+
 func TestWriteArrowCols(t *testing.T) {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	defer mem.AssertSize(t, 0)
@@ -831,6 +877,44 @@ func (ps *ParquetIOTestSuite) TestDateTimeTypesWithInt96ReadWriteTable() {
 	}
 }
 
+func (ps *ParquetIOTestSuite) TestDate64ReadWriteTable() {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(ps.T(), 0)
+
+	date64InputTable := makeDateTypeTable(mem, false, false)
+	defer date64InputTable.Release()
+	buf := writeTableToBuffer(ps.T(), mem, date64InputTable, date64InputTable.NumRows(), pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))
+	defer buf.Release()
+
+	reader := ps.createReader(mem, buf.Bytes())
+	roundTripOutputTable := ps.readTable(reader)
+	defer roundTripOutputTable.Release()
+
+	date32ExpectedOutputTable := makeDateTypeTable(mem, true, false)
+	defer date32ExpectedOutputTable.Release()
+
+	ps.Truef(array.TableEqual(date32ExpectedOutputTable, roundTripOutputTable), "expected table: %s\ngot table: %s", date32ExpectedOutputTable, roundTripOutputTable)
+}
+
+func (ps *ParquetIOTestSuite) TestDate64ReadWriteTableWithPartialDays() {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(ps.T(), 0)
+
+	date64InputTableNotAlignedToDateBoundary := makeDateTypeTable(mem, false, true)
+	defer date64InputTableNotAlignedToDateBoundary.Release()
+	buf := writeTableToBuffer(ps.T(), mem, date64InputTableNotAlignedToDateBoundary, date64InputTableNotAlignedToDateBoundary.NumRows(), pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))
+	defer buf.Release()
+
+	reader := ps.createReader(mem, buf.Bytes())
+	roundTripOutputTable := ps.readTable(reader)
+	defer roundTripOutputTable.Release()
+
+	date32ExpectedOutputTable := makeDateTypeTable(mem, true, true)
+	defer date32ExpectedOutputTable.Release()
+
+	ps.Truef(array.TableEqual(date32ExpectedOutputTable, roundTripOutputTable), "expected table: %s\ngot table: %s", date32ExpectedOutputTable, roundTripOutputTable)
+}
+
 func (ps *ParquetIOTestSuite) TestLargeBinaryReadWriteTable() {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	defer mem.AssertSize(ps.T(), 0)
diff --git a/go/parquet/pqarrow/schema.go b/go/parquet/pqarrow/schema.go
index ccb3dc0350ae1..383d47fbaabed 100644
--- a/go/parquet/pqarrow/schema.go
+++ b/go/parquet/pqarrow/schema.go
@@ -326,8 +326,8 @@ func fieldToNode(name string, field arrow.Field, props *parquet.WriterProperties
 		typ = parquet.Types.Int32
 		logicalType = schema.DateLogicalType{}
 	case arrow.DATE64:
-		typ = parquet.Types.Int64
-		logicalType = schema.NewTimestampLogicalType(true, schema.TimeUnitMillis)
+		typ = parquet.Types.Int32
+		logicalType = schema.DateLogicalType{}
 	case arrow.TIMESTAMP:
 		typ, logicalType, err = getTimestampMeta(field.Type.(*arrow.TimestampType), props, arrprops)
 		if err != nil {
diff --git a/go/parquet/pqarrow/schema_test.go b/go/parquet/pqarrow/schema_test.go
index bc33663414075..a3c2c7a4ff60c 100644
--- a/go/parquet/pqarrow/schema_test.go
+++ b/go/parquet/pqarrow/schema_test.go
@@ -187,7 +187,7 @@ func TestConvertArrowFlatPrimitives(t *testing.T) {
 	arrowFields = append(arrowFields, arrow.Field{Name: "date", Type: arrow.FixedWidthTypes.Date32, Nullable: false})
 
 	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("date64", parquet.Repetitions.Required,
-		schema.NewTimestampLogicalType(true, schema.TimeUnitMillis), parquet.Types.Int64, 0, -1)))
+		schema.DateLogicalType{}, parquet.Types.Int32, 0, -1)))
 	arrowFields = append(arrowFields, arrow.Field{Name: "date64", Type: arrow.FixedWidthTypes.Date64, Nullable: false})
 
 	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("time32", parquet.Repetitions.Required,

From f0879ed3544bb12ee306eae1fb5c6d924dde02ed Mon Sep 17 00:00:00 2001
From: MagicBoost <39663879+MagicBoost@users.noreply.github.com>
Date: Wed, 10 Jan 2024 06:02:04 +0800
Subject: [PATCH 179/570] GH-35718: [Go][Parquet] Fix for null-only encoding
 panic (#39497)

### Rationale for this change

closes: #35718

### What changes are included in this PR?

Fix painc writing with DeltaBinaryPacked or DeltaByteArray when column only has nulls

### Are these changes tested?

Yes

- add a test writing nulls to columns with DeltaBinaryPacked / DeltaByteArray / DeltaLengthByteArray encodings

### Are there any user-facing changes?

No

* Closes: #35718

Lead-authored-by: yufanmo <yufan.mo@transwarp.io>
Co-authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 .../internal/encoding/delta_byte_array.go     | 10 +++-
 go/parquet/pqarrow/encode_arrow_test.go       | 58 +++++++++++++++++++
 2 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go
index 5e5002e34a68f..18bd12015a430 100644
--- a/go/parquet/internal/encoding/delta_byte_array.go
+++ b/go/parquet/internal/encoding/delta_byte_array.go
@@ -40,7 +40,15 @@ type DeltaByteArrayEncoder struct {
 }
 
 func (enc *DeltaByteArrayEncoder) EstimatedDataEncodedSize() int64 {
-	return enc.prefixEncoder.EstimatedDataEncodedSize() + enc.suffixEncoder.EstimatedDataEncodedSize()
+	prefixEstimatedSize := int64(0)
+	if enc.prefixEncoder != nil {
+		prefixEstimatedSize = enc.prefixEncoder.EstimatedDataEncodedSize()
+	}
+	suffixEstimatedSize := int64(0)
+	if enc.suffixEncoder != nil {
+		suffixEstimatedSize = enc.suffixEncoder.EstimatedDataEncodedSize()
+	}
+	return prefixEstimatedSize + suffixEstimatedSize
 }
 
 func (enc *DeltaByteArrayEncoder) initEncoders() {
diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go
index 3a8fef7e5ad37..75eb965d033b5 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -473,6 +473,64 @@ func TestWriteEmptyLists(t *testing.T) {
 	require.NoError(t, err)
 }
 
+func TestWriteAllNullsWithDeltaEncoding(t *testing.T) {
+	sc := arrow.NewSchema([]arrow.Field{
+		{Name: "f1", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+		{Name: "f2", Type: arrow.ListOf(arrow.FixedWidthTypes.Date32)},
+		{Name: "f3", Type: arrow.BinaryTypes.String, Nullable: true},
+		{Name: "f4", Type: arrow.ListOf(arrow.BinaryTypes.String)},
+		{Name: "f5", Type: arrow.BinaryTypes.LargeString, Nullable: true},
+		{Name: "f6", Type: arrow.ListOf(arrow.BinaryTypes.LargeString)},
+		{Name: "f7", Type: arrow.PrimitiveTypes.Float64, Nullable: true},
+		{Name: "f8", Type: arrow.ListOf(arrow.FixedWidthTypes.Date64)},
+		{Name: "f9", Type: arrow.BinaryTypes.String, Nullable: true},
+		{Name: "f10", Type: arrow.ListOf(arrow.BinaryTypes.LargeString)},
+		{Name: "f11", Type: arrow.FixedWidthTypes.Boolean, Nullable: true},
+		{Name: "f12", Type: arrow.ListOf(arrow.FixedWidthTypes.Boolean)},
+		{Name: "f13", Type: arrow.PrimitiveTypes.Int32, Nullable: true},
+		{Name: "f14", Type: arrow.ListOf(arrow.PrimitiveTypes.Float32)},
+	}, nil)
+	bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc)
+	defer bldr.Release()
+	for _, b := range bldr.Fields() {
+		b.AppendNull()
+	}
+
+	rec := bldr.NewRecord()
+	defer rec.Release()
+
+	props := parquet.NewWriterProperties(
+		parquet.WithVersion(parquet.V1_0),
+		parquet.WithDictionaryDefault(false),
+		parquet.WithDictionaryFor("f9", true),
+		parquet.WithDictionaryFor("f10", true),
+		parquet.WithDictionaryFor("f13", true),
+		parquet.WithDictionaryFor("f14", true),
+		parquet.WithEncodingFor("f1", parquet.Encodings.DeltaBinaryPacked),
+		parquet.WithEncodingFor("f2", parquet.Encodings.DeltaBinaryPacked),
+		parquet.WithEncodingFor("f3", parquet.Encodings.DeltaByteArray),
+		parquet.WithEncodingFor("f4", parquet.Encodings.DeltaByteArray),
+		parquet.WithEncodingFor("f5", parquet.Encodings.DeltaLengthByteArray),
+		parquet.WithEncodingFor("f6", parquet.Encodings.DeltaLengthByteArray),
+		parquet.WithEncodingFor("f7", parquet.Encodings.Plain),
+		parquet.WithEncodingFor("f8", parquet.Encodings.Plain),
+		parquet.WithEncodingFor("f9", parquet.Encodings.Plain),
+		parquet.WithEncodingFor("f10", parquet.Encodings.Plain),
+		parquet.WithEncodingFor("f11", parquet.Encodings.RLE),
+		parquet.WithEncodingFor("f12", parquet.Encodings.RLE),
+		parquet.WithEncodingFor("f13", parquet.Encodings.RLE),
+		parquet.WithEncodingFor("f14", parquet.Encodings.RLE),
+	)
+	arrprops := pqarrow.DefaultWriterProps()
+	var buf bytes.Buffer
+	fw, err := pqarrow.NewFileWriter(sc, &buf, props, arrprops)
+	require.NoError(t, err)
+	err = fw.Write(rec)
+	require.NoError(t, err)
+	err = fw.Close()
+	require.NoError(t, err)
+}
+
 func TestArrowReadWriteTableChunkedCols(t *testing.T) {
 	chunkSizes := []int{2, 4, 10, 2}
 	const totalLen = int64(18)

From c752bdb08b1efae467b81a026b06a92cc571497f Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Wed, 10 Jan 2024 15:32:02 +0800
Subject: [PATCH 180/570] GH-39489: [C++][Parquet] ConvertedType TIME and
 TIMESTAMP should imply adjustedToUtc (#39491)

### Rationale for this change

TLDR: When reading from a Parquet file with legacy timestamp not written by arrow, `isAdjustedToUTC` would be ignored during read. And when filtering a file like this, filtering would not work.

When casting from a "deprecated" parquet 1.0 ConvertedType, a timestamp should be force adjustedToUtc.

For the parquet standard part. Parquet has a ConvertedType for legacy timestamp, the legacy timestamp **do not** having a `adjustedToUtc` flag. So, for forward compatible, when reading it we need to regard it as `adjustedToUtc` ( A UTC Timestamp). See [1] [2].

In our implementation, we use a `(LogicalType, PhysicalType)` pair, `LogicalType` it's a wrapper for parquet thrift `ConvertedType` and `LogicalType`, the code is listed in [3] :
1. During write, we always use `FieldToNode` and `TimestampLogicalTypeFromArrowTimestamp` [4] to generate a `TimestampLogicalType`, and generate `isAdjustedUtc` by `::arrow::TimestampType::timezone()`.  Also, the arrow type will be a `ARROW:schema` extended key-value, and written in parquet file. During write, we'll both set logicaltype and convertType. `arrow_properties.coerce_timestamps_enabled()` and `arrow_properties.support_deprecated_int96_timestamps()` control the behavior.
2. During read, we'll first parse parquet thrift and generate a `LogicalType`[5]. And then, `FromParquetSchema`[6] would be called for arrow type.

in 1-5, code works well, because LogicalType parsing works well. However, [6] might causing some problem:
1. If `ARROW:schema` extended is provided, nothing is wrong
2. **Otherwise, timestamp would be wrong because it ignore isAdjustedUtc!**. So when it read legacy Parquet file not written by arrow, it would not have right type.

Further more, this could happening when dataset filtering happens.

[1] https://github.com/apache/parquet-format/blob/eb4b31c1d64a01088d02a2f9aefc6c17c54cc6fc/LogicalTypes.md?plain=1#L480-L485
[2] https://github.com/apache/parquet-format/blob/eb4b31c1d64a01088d02a2f9aefc6c17c54cc6fc/LogicalTypes.md?plain=1#L308
[3] https://github.com/apache/arrow/blob/main/cpp/src/parquet/schema.cc#L128-L283
[4] https://github.com/apache/arrow/blob/main/cpp/src/parquet/arrow/schema.cc#L140
[5] https://github.com/apache/arrow/blob/main/cpp/src/parquet/schema.cc#L443
[6] https://github.com/apache/arrow/blob/main/cpp/src/parquet/arrow/schema.cc#L1038

### What changes are included in this PR?

ConvertedType TIME and TIMESTAMP should imply adjustedToUtc when casting to arrow

### Are these changes tested?

Already

### Are there any user-facing changes?

User might got some behavior change during read the legacy timestamp. But most of time, arrow would casting it.

* Closes: #39489

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/parquet/arrow/arrow_schema_test.cc |  4 ++--
 cpp/src/parquet/arrow/schema.cc            |  4 +++-
 cpp/src/parquet/arrow/schema_internal.cc   |  3 +--
 cpp/src/parquet/types.cc                   | 18 ++++++++++++++----
 4 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc
index 5443214f930d7..9f60cd31d3541 100644
--- a/cpp/src/parquet/arrow/arrow_schema_test.cc
+++ b/cpp/src/parquet/arrow/arrow_schema_test.cc
@@ -115,13 +115,13 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
                                                ParquetType::INT64,
                                                ConvertedType::TIMESTAMP_MILLIS));
   arrow_fields.push_back(
-      ::arrow::field("timestamp", ::arrow::timestamp(TimeUnit::MILLI), false));
+      ::arrow::field("timestamp", ::arrow::timestamp(TimeUnit::MILLI, "UTC"), false));
 
   parquet_fields.push_back(PrimitiveNode::Make("timestamp[us]", Repetition::REQUIRED,
                                                ParquetType::INT64,
                                                ConvertedType::TIMESTAMP_MICROS));
   arrow_fields.push_back(
-      ::arrow::field("timestamp[us]", ::arrow::timestamp(TimeUnit::MICRO), false));
+      ::arrow::field("timestamp[us]", ::arrow::timestamp(TimeUnit::MICRO, "UTC"), false));
 
   parquet_fields.push_back(PrimitiveNode::Make("date", Repetition::REQUIRED,
                                                ParquetType::INT32, ConvertedType::DATE));
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index e1a6f44119f96..ec3890a41f442 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -156,7 +156,9 @@ static std::shared_ptr<const LogicalType> TimestampLogicalTypeFromArrowTimestamp
                                     /*is_from_converted_type=*/false,
                                     /*force_set_converted_type=*/true);
     case ::arrow::TimeUnit::NANO:
-      return LogicalType::Timestamp(utc, LogicalType::TimeUnit::NANOS);
+      return LogicalType::Timestamp(utc, LogicalType::TimeUnit::NANOS,
+                                    /*is_from_converted_type=*/false,
+                                    /*force_set_converted_type=*/false);
     case ::arrow::TimeUnit::SECOND:
       // No equivalent parquet logical type.
       break;
diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc
index e319f712b515d..a8e2a95b9b97d 100644
--- a/cpp/src/parquet/arrow/schema_internal.cc
+++ b/cpp/src/parquet/arrow/schema_internal.cc
@@ -89,8 +89,7 @@ Result<std::shared_ptr<ArrowType>> MakeArrowTime64(const LogicalType& logical_ty
 
 Result<std::shared_ptr<ArrowType>> MakeArrowTimestamp(const LogicalType& logical_type) {
   const auto& timestamp = checked_cast<const TimestampLogicalType&>(logical_type);
-  const bool utc_normalized =
-      timestamp.is_from_converted_type() ? false : timestamp.is_adjusted_to_utc();
+  const bool utc_normalized = timestamp.is_adjusted_to_utc();
   static const char* utc_timezone = "UTC";
   switch (timestamp.time_unit()) {
     case LogicalType::TimeUnit::MILLIS:
diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index 04a0fc2e0117b..33fed01ba324f 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -345,15 +345,25 @@ std::shared_ptr<const LogicalType> LogicalType::FromConvertedType(
     case ConvertedType::DATE:
       return DateLogicalType::Make();
     case ConvertedType::TIME_MILLIS:
-      return TimeLogicalType::Make(true, LogicalType::TimeUnit::MILLIS);
+      // ConvertedType::TIME_{*} are deprecated in favor of LogicalType::Time, the
+      // compatibility for ConvertedType::TIME_{*} are listed in
+      // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#deprecated-time-convertedtype
+      return TimeLogicalType::Make(/*is_adjusted_to_utc=*/true,
+                                   LogicalType::TimeUnit::MILLIS);
     case ConvertedType::TIME_MICROS:
-      return TimeLogicalType::Make(true, LogicalType::TimeUnit::MICROS);
+      return TimeLogicalType::Make(/*is_adjusted_to_utc=*/true,
+                                   LogicalType::TimeUnit::MICROS);
     case ConvertedType::TIMESTAMP_MILLIS:
-      return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MILLIS,
+      // ConvertedType::TIMESTAMP_{*} are deprecated in favor of LogicalType::Timestamp,
+      // the compatibility for ConvertedType::TIMESTAMP_{*} are listed in
+      // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#deprecated-timestamp-convertedtype
+      return TimestampLogicalType::Make(/*is_adjusted_to_utc=*/true,
+                                        LogicalType::TimeUnit::MILLIS,
                                         /*is_from_converted_type=*/true,
                                         /*force_set_converted_type=*/false);
     case ConvertedType::TIMESTAMP_MICROS:
-      return TimestampLogicalType::Make(true, LogicalType::TimeUnit::MICROS,
+      return TimestampLogicalType::Make(/*is_adjusted_to_utc=*/true,
+                                        LogicalType::TimeUnit::MICROS,
                                         /*is_from_converted_type=*/true,
                                         /*force_set_converted_type=*/false);
     case ConvertedType::INTERVAL:

From 32d785ff405e3cc31866faa38bc2704eb44fda60 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 10 Jan 2024 09:11:11 +0100
Subject: [PATCH 181/570] GH-39537: [Packaging][Python] Add a numpy<2 pin to
 the install requirements for the 15.x release branch (#39538)

### Rationale for this change

PyArrow wheels for the 15.0.0 release will not be compatible with future numpy 2.0 packages, therefore it is recommended to add this upper pin now for _releases_. We will keep the more flexible pin on the development branch (by reverting this commit on main, but so it can be cherry-picked in the release branch)

* Closes: #39537

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/setup.py b/python/setup.py
index b1c825d84d5a9..51eb40af088e5 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -449,7 +449,7 @@ def has_ext_modules(foo):
 
 
 install_requires = (
-    'numpy >= 1.16.6',
+    'numpy >= 1.16.6, <2',
 )
 
 
From 72ed58449ea71aab1343d9adce19f177f20705cf Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 10 Jan 2024 09:13:02 +0100
Subject: [PATCH 182/570] GH-39533: [Python] NumPy 2.0 compat: remove usage of
 np.core (#39535)

### Rationale for this change

Removing usage of `np.core`, as that is deprecated and will be removed in numpy 2.0.

For this specific case, we can just hardcode the list of data types instead of using a numpy api (this list doesn't typically change).

* Closes: #39533

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/pandas_compat.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 3757d81a47815..39dee85492400 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -30,7 +30,6 @@
 import warnings
 
 import numpy as np
-from numpy.core.numerictypes import sctypes as _np_sctypes
 
 import pyarrow as pa
 from pyarrow.lib import _pandas_api, frombytes  # noqa
@@ -789,9 +788,10 @@ def table_to_dataframe(
 # Set of the string repr of all numpy dtypes that can be stored in a pandas
 # dataframe (complex not included since not supported by Arrow)
 _pandas_supported_numpy_types = {
-    str(np.dtype(typ))
-    for typ in (_np_sctypes['int'] + _np_sctypes['uint'] + _np_sctypes['float'] +
-                ['object', 'bool'])
+    "int8", "int16", "int32", "int64",
+    "uint8", "uint16", "uint32", "uint64",
+    "float16", "float32", "float64",
+    "object", "bool"
 }
 
 
From 4e101f02f8d248f0448d718bd914579d9f8f0088 Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Wed, 10 Jan 2024 14:14:51 +0100
Subject: [PATCH 183/570] MINOR: [R][Docs] Fix geoarrrow url in NEWS.md
 (#39547)

### Rationale for this change

The geoarrow repo was moved and is no failing urlcheck.

### What changes are included in this PR?

Update the url.

Authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/NEWS.md b/r/NEWS.md
index ca062b0390a9f..1744e6e96e936 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -327,7 +327,7 @@ Arrow query engine. See `?acero`.
 A few new features and bugfixes were implemented for joins:
 
 * Extension arrays are now supported in joins, allowing, for example, joining
-  datasets that contain [geoarrow](https://paleolimbot.github.io/geoarrow/) data.
+  datasets that contain [geoarrow](https://github.com/geoarrow/geoarrow) data.
 * The `keep` argument is now supported, allowing separate columns for the left
   and right hand side join keys in join output. Full joins now coalesce the
   join keys (when `keep = FALSE`), avoiding the issue where the join keys would

From e7ab540ed9901d19ebde81f43e8417562ef8a334 Mon Sep 17 00:00:00 2001
From: James Duong <james.duong@improving.com>
Date: Wed, 10 Jan 2024 05:30:21 -0800
Subject: [PATCH 184/570] GH-38998: [Java] Build memory-core and memory-unsafe
 as JPMS modules (#39011)

Depends on #39134

### Rationale for this change
Part of modularizing Arrow. Allows these components to be used by tools that require JPMS modules such as jlink.

### What changes are included in this PR?
- Refactor memory modules so that all netty code is in netty-memory.
- All code being injected into Netty packages and have dependencies on Netty's package-private code go into a new module memory-netty-buffer-patch.
- The surefire command line has been changed to allow reflection on unsafe to be used by arrow-memory-core
- Add module-info files for arrow-memory-core and arrow-memory-unsafe

### Are these changes tested?
Yes, existing tests work when run with modules.

### Are there any user-facing changes?
Yes, users now need to put --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED on their java command line instead of --add-opens=java.base/java.nio=ALL-UNNAMED since memory-core is now a named module instead of unnamed.

 **This PR includes breaking changes to public APIs.**
Yes, users now need to put --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED on their java command line instead of --add-opens=java.base/java.nio=ALL-UNNAMED since memory-core is now a named module instead of unnamed.
* Closes: #38998

Authored-by: James Duong <james.duong@improving.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../archery/integration/tester_java.py        |   5 +-
 docs/source/java/install.rst                  |   6 +-
 java/c/pom.xml                                |   5 +
 java/gandiva/pom.xml                          |   2 +-
 .../module-info-compiler-maven-plugin/pom.xml |   4 +-
 java/memory/memory-core/pom.xml               |  25 ++-
 .../src/main/java/module-info.java            |  28 ++++
 .../apache/arrow/memory/CheckAllocator.java   |  39 ++++-
 .../DefaultAllocationManagerOption.java       |  12 +-
 .../apache/arrow/memory/util/MemoryUtil.java  |   2 +-
 .../memory/CountingAllocationListener.java    |  10 ++
 .../org/apache/arrow/memory/TestArrowBuf.java |   8 +-
 .../arrow/memory/TestBaseAllocator.java       | 149 +++++++-----------
 .../arrow/memory/TestForeignAllocation.java   |   7 +
 java/memory/memory-netty-buffer-patch/pom.xml |  44 ++++++
 .../io/netty/buffer/ExpandableByteBuf.java    |   0
 .../java/io/netty/buffer/LargeBuffer.java     |   0
 .../netty/buffer/MutableWrappedByteBuf.java   |   0
 .../java/io/netty/buffer/NettyArrowBuf.java   |  16 +-
 .../netty/buffer/PooledByteBufAllocatorL.java |   7 +-
 .../buffer/UnsafeDirectLittleEndian.java      |   0
 .../memory/patch}/ArrowByteBufAllocator.java  |   4 +-
 .../buffer/TestUnsafeDirectLittleEndian.java  |   5 +
 java/memory/memory-netty/pom.xml              |   7 +
 .../DefaultAllocationManagerFactory.java      |   6 +-
 .../{ => netty}/NettyAllocationManager.java   |   7 +-
 .../netty/buffer/TestExpandableByteBuf.java   |   4 +
 .../io/netty/buffer/TestNettyArrowBuf.java    |   5 +-
 .../{ => netty}/ITTestLargeArrowBuf.java      |   5 +-
 .../TestAllocationManagerNetty.java           |   4 +-
 .../memory/{ => netty}/TestEmptyArrowBuf.java |   9 +-
 .../memory/{ => netty}/TestEndianness.java    |   4 +-
 .../TestNettyAllocationManager.java           |  15 +-
 .../memory/netty/TestNettyAllocator.java      |  81 ++++++++++
 java/memory/memory-unsafe/pom.xml             |   4 +
 .../src/main/java/module-info.java            |  22 +++
 .../DefaultAllocationManagerFactory.java      |   6 +-
 .../{ => unsafe}/UnsafeAllocationManager.java |   6 +-
 .../TestAllocationManagerUnsafe.java          |   4 +-
 .../TestUnsafeAllocationManager.java          |  13 +-
 java/memory/pom.xml                           |   1 +
 java/performance/pom.xml                      |   2 +-
 java/pom.xml                                  |  20 ++-
 43 files changed, 449 insertions(+), 154 deletions(-)
 create mode 100644 java/memory/memory-core/src/main/java/module-info.java
 rename java/memory/{memory-netty => memory-core}/src/test/java/org/apache/arrow/memory/CountingAllocationListener.java (87%)
 rename java/memory/{memory-netty => memory-core}/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java (90%)
 rename java/memory/{memory-netty => memory-core}/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java (93%)
 create mode 100644 java/memory/memory-netty-buffer-patch/pom.xml
 rename java/memory/{memory-netty => memory-netty-buffer-patch}/src/main/java/io/netty/buffer/ExpandableByteBuf.java (100%)
 rename java/memory/{memory-netty => memory-netty-buffer-patch}/src/main/java/io/netty/buffer/LargeBuffer.java (100%)
 rename java/memory/{memory-netty => memory-netty-buffer-patch}/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java (100%)
 rename java/memory/{memory-netty => memory-netty-buffer-patch}/src/main/java/io/netty/buffer/NettyArrowBuf.java (96%)
 rename java/memory/{memory-netty => memory-netty-buffer-patch}/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java (97%)
 rename java/memory/{memory-netty => memory-netty-buffer-patch}/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java (100%)
 rename java/memory/{memory-netty/src/main/java/org/apache/arrow/memory => memory-netty-buffer-patch/src/main/java/org/apache/arrow/memory/patch}/ArrowByteBufAllocator.java (97%)
 rename java/memory/{memory-netty => memory-netty-buffer-patch}/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java (95%)
 rename java/memory/memory-netty/src/main/java/org/apache/arrow/memory/{ => netty}/DefaultAllocationManagerFactory.java (87%)
 rename java/memory/memory-netty/src/main/java/org/apache/arrow/memory/{ => netty}/NettyAllocationManager.java (94%)
 rename java/memory/memory-netty/src/test/java/org/apache/arrow/memory/{ => netty}/ITTestLargeArrowBuf.java (93%)
 rename java/memory/memory-netty/src/test/java/org/apache/arrow/memory/{ => netty}/TestAllocationManagerNetty.java (90%)
 rename java/memory/memory-netty/src/test/java/org/apache/arrow/memory/{ => netty}/TestEmptyArrowBuf.java (90%)
 rename java/memory/memory-netty/src/test/java/org/apache/arrow/memory/{ => netty}/TestEndianness.java (92%)
 rename java/memory/memory-netty/src/test/java/org/apache/arrow/memory/{ => netty}/TestNettyAllocationManager.java (87%)
 create mode 100644 java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java
 create mode 100644 java/memory/memory-unsafe/src/main/java/module-info.java
 rename java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/{ => unsafe}/DefaultAllocationManagerFactory.java (87%)
 rename java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/{ => unsafe}/UnsafeAllocationManager.java (89%)
 rename java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/{ => unsafe}/TestAllocationManagerUnsafe.java (90%)
 rename java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/{ => unsafe}/TestUnsafeAllocationManager.java (82%)

diff --git a/dev/archery/archery/integration/tester_java.py b/dev/archery/archery/integration/tester_java.py
index 6cd1afa64feb8..032ac13e74ec2 100644
--- a/dev/archery/archery/integration/tester_java.py
+++ b/dev/archery/archery/integration/tester_java.py
@@ -40,7 +40,7 @@ def load_version_from_pom():
 _JAVA_OPTS = [
     "-Dio.netty.tryReflectionSetAccessible=true",
     "-Darrow.struct.conflict.policy=CONFLICT_APPEND",
-    "--add-opens=java.base/java.nio=ALL-UNNAMED",
+    "--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED",
     # GH-39113: avoid failures accessing files in `/tmp/hsperfdata_...`
     "-XX:-UsePerfData",
 ]
@@ -247,7 +247,8 @@ def __init__(self, *args, **kwargs):
         if 'Unrecognized option: --add-opens' not in proc.stderr:
             # Java 9+
             self._java_opts.append(
-                '--add-opens=java.base/java.nio=ALL-UNNAMED')
+                '--add-opens=java.base/java.nio='
+                'org.apache.arrow.memory.core,ALL-UNNAMED')
 
     def _run(self, arrow_path=None, json_path=None, command='VALIDATE'):
         cmd = (
diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst
index 32c121573a692..b7484536f2367 100644
--- a/docs/source/java/install.rst
+++ b/docs/source/java/install.rst
@@ -33,14 +33,14 @@ Java modules are compatible with JDK 8 and above.
 Currently, JDK 8, 11, 17, and 21 are tested in CI.
 
 When using Java 9 or later, some JDK internals must be exposed by
-adding ``--add-opens=java.base/java.nio=ALL-UNNAMED`` to the ``java`` command:
+adding ``--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED`` to the ``java`` command:
 
 .. code-block:: shell
 
    # Directly on the command line
-   $ java --add-opens=java.base/java.nio=ALL-UNNAMED -jar ...
+   $ java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -jar ...
    # Indirectly via environment variables
-   $ env _JAVA_OPTIONS="--add-opens=java.base/java.nio=ALL-UNNAMED" java -jar ...
+   $ env _JAVA_OPTIONS="--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED" java -jar ...
 
 Otherwise, you may see errors like ``module java.base does not "opens
 java.nio" to unnamed module``.
diff --git a/java/c/pom.xml b/java/c/pom.xml
index d66379d356624..8fc3f36994d8a 100644
--- a/java/c/pom.xml
+++ b/java/c/pom.xml
@@ -53,6 +53,11 @@
             <artifactId>arrow-memory-unsafe</artifactId>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>org.apache.arrow</groupId>
+            <artifactId>arrow-format</artifactId>
+            <scope>test</scope>
+        </dependency>
         <dependency>
             <groupId>com.google.guava</groupId>
             <artifactId>guava</artifactId>
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index 128fa1508fbd1..e837a09ff8330 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -79,7 +79,7 @@
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-javadoc-plugin</artifactId>
-                        <version>2.9.1</version>
+                        <version>3.6.3</version>
                         <executions>
                             <execution>
                                 <id>attach-javadocs</id>
diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml
index 837a78b43b9a6..46c0d563f4eb9 100644
--- a/java/maven/module-info-compiler-maven-plugin/pom.xml
+++ b/java/maven/module-info-compiler-maven-plugin/pom.xml
@@ -125,6 +125,4 @@
       </plugin>
     </plugins>
   </build>
-
-
-</project>
\ No newline at end of file
+</project>
diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml
index b914b1fa10da2..6e411c0cd5440 100644
--- a/java/memory/memory-core/pom.xml
+++ b/java/memory/memory-core/pom.xml
@@ -46,7 +46,6 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-surefire-plugin</artifactId>
-
         <configuration>
           <excludes>
             <!-- Test is only useful when NOT running with add-opens -->
@@ -58,6 +57,30 @@
   </build>
 
   <profiles>
+    <profile>
+      <id>jdk11+</id>
+      <activation>
+        <jdk>[11,]</jdk>
+        <property>
+          <name>!m2e.version</name>
+        </property>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-surefire-plugin</artifactId>
+            <configuration combine.self="override">
+              <argLine>--add-reads=org.apache.arrow.memory.core=ch.qos.logback.classic --add-opens=java.base/java.lang.reflect=org.apache.arrow.memory.core --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
+              <excludes>
+                <!-- Test is only useful when NOT running with add-opens -->
+                <exclude>**/TestOpens.java</exclude>
+              </excludes>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
     <profile>
       <id>opens-tests</id>
       <!-- Run tests WITHOUT add-opens to make sure we fail-fast -->
diff --git a/java/memory/memory-core/src/main/java/module-info.java b/java/memory/memory-core/src/main/java/module-info.java
new file mode 100644
index 0000000000000..34ba34e80bc69
--- /dev/null
+++ b/java/memory/memory-core/src/main/java/module-info.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module org.apache.arrow.memory.core {
+  exports org.apache.arrow.memory;
+  exports org.apache.arrow.memory.rounding;
+  exports org.apache.arrow.memory.util;
+  exports org.apache.arrow.memory.util.hash;
+  exports org.apache.arrow.util;
+  requires transitive jdk.unsupported;
+  requires jsr305;
+  requires org.immutables.value;
+  requires org.slf4j;
+}
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java
index 79b825aa2e898..dac4a3fcff59a 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java
@@ -31,20 +31,35 @@
  */
 final class CheckAllocator {
   private static final Logger logger = LoggerFactory.getLogger(CheckAllocator.class);
-  private static final String ALLOCATOR_PATH = "org/apache/arrow/memory/DefaultAllocationManagerFactory.class";
+  // unique package names needed by JPMS module naming
+  private static final String ALLOCATOR_PATH_CORE =
+      "org/apache/arrow/memory/DefaultAllocationManagerFactory.class";
+  private static final String ALLOCATOR_PATH_UNSAFE =
+      "org/apache/arrow/memory/unsafe/DefaultAllocationManagerFactory.class";
+  private static final String ALLOCATOR_PATH_NETTY =
+      "org/apache/arrow/memory/netty/DefaultAllocationManagerFactory.class";
 
   private CheckAllocator() {
-
   }
 
   static String check() {
     Set<URL> urls = scanClasspath();
     URL rootAllocator = assertOnlyOne(urls);
     reportResult(rootAllocator);
-    return "org.apache.arrow.memory.DefaultAllocationManagerFactory";
+    if (rootAllocator.getPath().contains("memory-core") ||
+        rootAllocator.getPath().contains("/org/apache/arrow/memory/core/")) {
+      return "org.apache.arrow.memory.DefaultAllocationManagerFactory";
+    } else if (rootAllocator.getPath().contains("memory-unsafe") ||
+        rootAllocator.getPath().contains("/org/apache/arrow/memory/unsafe/")) {
+      return "org.apache.arrow.memory.unsafe.DefaultAllocationManagerFactory";
+    } else if (rootAllocator.getPath().contains("memory-netty") ||
+        rootAllocator.getPath().contains("/org/apache/arrow/memory/netty/")) {
+      return "org.apache.arrow.memory.netty.DefaultAllocationManagerFactory";
+    } else {
+      throw new IllegalStateException("Unknown allocation manager type to infer. Current: " + rootAllocator.getPath());
+    }
   }
 
-
   private static Set<URL> scanClasspath() {
     // LinkedHashSet appropriate here because it preserves insertion order
     // during iteration
@@ -53,9 +68,21 @@ private static Set<URL> scanClasspath() {
       ClassLoader allocatorClassLoader = CheckAllocator.class.getClassLoader();
       Enumeration<URL> paths;
       if (allocatorClassLoader == null) {
-        paths = ClassLoader.getSystemResources(ALLOCATOR_PATH);
+        paths = ClassLoader.getSystemResources(ALLOCATOR_PATH_CORE);
+        if (!paths.hasMoreElements()) {
+          paths = ClassLoader.getSystemResources(ALLOCATOR_PATH_UNSAFE);
+        }
+        if (!paths.hasMoreElements()) {
+          paths = ClassLoader.getSystemResources(ALLOCATOR_PATH_NETTY);
+        }
       } else {
-        paths = allocatorClassLoader.getResources(ALLOCATOR_PATH);
+        paths = allocatorClassLoader.getResources(ALLOCATOR_PATH_CORE);
+        if (!paths.hasMoreElements()) {
+          paths = allocatorClassLoader.getResources(ALLOCATOR_PATH_UNSAFE);
+        }
+        if (!paths.hasMoreElements()) {
+          paths = allocatorClassLoader.getResources(ALLOCATOR_PATH_NETTY);
+        }
       }
       while (paths.hasMoreElements()) {
         URL path = paths.nextElement();
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java
index d57b72ba41573..6d408abcc3433 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerOption.java
@@ -19,6 +19,7 @@
 
 import java.lang.reflect.Field;
 
+import org.apache.arrow.util.VisibleForTesting;
 import org.checkerframework.checker.nullness.qual.Nullable;
 
 
@@ -64,8 +65,13 @@ public enum AllocationManagerType {
     Unknown,
   }
 
+  /**
+   * Returns the default allocation manager type.
+   * @return the default allocation manager type.
+   */
   @SuppressWarnings("nullness:argument") //enum types valueOf are implicitly non-null
-  static AllocationManagerType getDefaultAllocationManagerType() {
+  @VisibleForTesting
+  public static AllocationManagerType getDefaultAllocationManagerType() {
     AllocationManagerType ret = AllocationManagerType.Unknown;
 
     try {
@@ -122,7 +128,7 @@ private static AllocationManager.Factory getFactory(String clazzName) {
 
   private static AllocationManager.Factory getUnsafeFactory() {
     try {
-      return getFactory("org.apache.arrow.memory.UnsafeAllocationManager");
+      return getFactory("org.apache.arrow.memory.unsafe.UnsafeAllocationManager");
     } catch (RuntimeException e) {
       throw new RuntimeException("Please add arrow-memory-unsafe to your classpath," +
           " No DefaultAllocationManager found to instantiate an UnsafeAllocationManager", e);
@@ -131,7 +137,7 @@ private static AllocationManager.Factory getUnsafeFactory() {
 
   private static AllocationManager.Factory getNettyFactory() {
     try {
-      return getFactory("org.apache.arrow.memory.NettyAllocationManager");
+      return getFactory("org.apache.arrow.memory.netty.NettyAllocationManager");
     } catch (RuntimeException e) {
       throw new RuntimeException("Please add arrow-memory-netty to your classpath," +
           " No DefaultAllocationManager found to instantiate an NettyAllocationManager", e);
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java
index f79cf79531296..2f74a985a3ff4 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java
@@ -148,7 +148,7 @@ public Object run() {
       // the static fields above get initialized
       final RuntimeException failure = new RuntimeException(
           "Failed to initialize MemoryUtil. You must start Java with " +
-              "`--add-opens=java.base/java.nio=ALL-UNNAMED` " +
+              "`--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED` " +
               "(See https://arrow.apache.org/docs/java/install.html)", e);
       failure.printStackTrace();
       throw failure;
diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/CountingAllocationListener.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/CountingAllocationListener.java
similarity index 87%
rename from java/memory/memory-netty/src/test/java/org/apache/arrow/memory/CountingAllocationListener.java
rename to java/memory/memory-core/src/test/java/org/apache/arrow/memory/CountingAllocationListener.java
index 78c78c8ad8cba..38e1a582b8c81 100644
--- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/CountingAllocationListener.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/CountingAllocationListener.java
@@ -17,6 +17,11 @@
 
 package org.apache.arrow.memory;
 
+import org.apache.arrow.memory.AllocationListener;
+import org.apache.arrow.memory.AllocationOutcome;
+import org.apache.arrow.memory.BufferAllocator;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
 /**
  * Counting allocation listener.
  * It counts the number of times it has been invoked, and how much memory allocation it has seen
@@ -30,6 +35,7 @@ final class CountingAllocationListener implements AllocationListener {
   private long totalMem;
   private long currentMem;
   private boolean expandOnFail;
+  @Nullable
   BufferAllocator expandAlloc;
   long expandLimit;
 
@@ -58,6 +64,10 @@ public void onAllocation(long size) {
   @Override
   public boolean onFailedAllocation(long size, AllocationOutcome outcome) {
     if (expandOnFail) {
+      if (expandAlloc == null) {
+        throw new IllegalStateException("expandAlloc must be non-null because this " +
+            "listener is set to expand on failure.");
+      }
       expandAlloc.setLimit(expandLimit);
       return true;
     }
diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
index c88768c43bc23..9ba42abc1ce89 100644
--- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
@@ -167,13 +167,15 @@ public void testEnabledHistoricalLog() {
       fieldDebug.set(null, true);
       try (BufferAllocator allocator = new RootAllocator(128)) {
         allocator.buffer(2);
-        Exception e = assertThrows(IllegalStateException.class, () -> allocator.close());
-        assertTrue(e.getMessage().contains("event log for:")); // JDK8, JDK11
+        Exception e = assertThrows(IllegalStateException.class, allocator::close);
+        assertTrue("Exception had the following message: " + e.getMessage(),
+            e.getMessage().contains("event log for:")); // JDK8, JDK11
       } finally {
         fieldDebug.set(null, false);
       }
     } catch (Exception e) {
-      assertTrue(e.toString().contains("java.lang.NoSuchFieldException: modifiers")); // JDK17+
+      assertTrue("Exception had the following toString(): " + e.toString(),
+          e.toString().contains("java.lang.NoSuchFieldException: modifiers")); // JDK17+
     } finally {
       ((Logger) LoggerFactory.getLogger("org.apache.arrow")).setLevel(null);
     }
diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
similarity index 90%
rename from java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
rename to java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
index 7613d073f8c0d..535d5c15e8916 100644
--- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
@@ -31,22 +31,16 @@
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
-import java.util.stream.Collectors;
 
 import org.apache.arrow.memory.AllocationOutcomeDetails.Entry;
 import org.apache.arrow.memory.rounding.RoundingPolicy;
 import org.apache.arrow.memory.rounding.SegmentRoundingPolicy;
 import org.apache.arrow.memory.util.AssertionUtil;
+import org.junit.Assert;
 import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.jupiter.api.Assertions;
-import org.slf4j.LoggerFactory;
 
-import ch.qos.logback.classic.Level;
-import ch.qos.logback.classic.Logger;
-import ch.qos.logback.classic.spi.ILoggingEvent;
-import ch.qos.logback.core.read.ListAppender;
-import io.netty.buffer.PooledByteBufAllocatorL;
 import sun.misc.Unsafe;
 
 public class TestBaseAllocator {
@@ -118,6 +112,7 @@ public void testRootAllocator_closeWithOutstanding() throws Exception {
   }
 
   @Test
+  @Ignore
   public void testRootAllocator_getEmpty() throws Exception {
     try (final RootAllocator rootAllocator =
              new RootAllocator(MAX_ALLOCATION)) {
@@ -447,79 +442,79 @@ public ArrowBuf empty() {
 
   @Test
   public void testRootAllocator_listeners() throws Exception {
-    CountingAllocationListener l1 = new CountingAllocationListener();
-    assertEquals(0, l1.getNumPreCalls());
-    assertEquals(0, l1.getNumCalls());
-    assertEquals(0, l1.getNumReleaseCalls());
-    assertEquals(0, l1.getNumChildren());
-    assertEquals(0, l1.getTotalMem());
-    CountingAllocationListener l2 = new CountingAllocationListener();
-    assertEquals(0, l2.getNumPreCalls());
-    assertEquals(0, l2.getNumCalls());
-    assertEquals(0, l2.getNumReleaseCalls());
-    assertEquals(0, l2.getNumChildren());
-    assertEquals(0, l2.getTotalMem());
+    CountingAllocationListener listener1 = new CountingAllocationListener();
+    Assert.assertEquals(0, listener1.getNumPreCalls());
+    Assert.assertEquals(0, listener1.getNumCalls());
+    Assert.assertEquals(0, listener1.getNumReleaseCalls());
+    Assert.assertEquals(0, listener1.getNumChildren());
+    Assert.assertEquals(0, listener1.getTotalMem());
+    CountingAllocationListener listener2 = new CountingAllocationListener();
+    Assert.assertEquals(0, listener2.getNumPreCalls());
+    Assert.assertEquals(0, listener2.getNumCalls());
+    Assert.assertEquals(0, listener2.getNumReleaseCalls());
+    Assert.assertEquals(0, listener2.getNumChildren());
+    Assert.assertEquals(0, listener2.getTotalMem());
     // root and first-level child share the first listener
     // second-level and third-level child share the second listener
-    try (final RootAllocator rootAllocator = new RootAllocator(l1, MAX_ALLOCATION)) {
+    try (final RootAllocator rootAllocator = new RootAllocator(listener1, MAX_ALLOCATION)) {
       try (final BufferAllocator c1 = rootAllocator.newChildAllocator("c1", 0, MAX_ALLOCATION)) {
-        assertEquals(1, l1.getNumChildren());
+        Assert.assertEquals(1, listener1.getNumChildren());
         final ArrowBuf buf1 = c1.buffer(16);
         assertNotNull("allocation failed", buf1);
-        assertEquals(1, l1.getNumPreCalls());
-        assertEquals(1, l1.getNumCalls());
-        assertEquals(0, l1.getNumReleaseCalls());
-        assertEquals(16, l1.getTotalMem());
+        Assert.assertEquals(1, listener1.getNumPreCalls());
+        Assert.assertEquals(1, listener1.getNumCalls());
+        Assert.assertEquals(0, listener1.getNumReleaseCalls());
+        Assert.assertEquals(16, listener1.getTotalMem());
         buf1.getReferenceManager().release();
-        try (final BufferAllocator c2 = c1.newChildAllocator("c2", l2, 0, MAX_ALLOCATION)) {
-          assertEquals(2, l1.getNumChildren()); // c1 got a new child, so c1's listener (l1) is notified
-          assertEquals(0, l2.getNumChildren());
+        try (final BufferAllocator c2 = c1.newChildAllocator("c2", listener2, 0, MAX_ALLOCATION)) {
+          Assert.assertEquals(2, listener1.getNumChildren()); // c1 got a new child, so listener1 is notified.
+          Assert.assertEquals(0, listener2.getNumChildren());
           final ArrowBuf buf2 = c2.buffer(32);
           assertNotNull("allocation failed", buf2);
-          assertEquals(1, l1.getNumCalls());
-          assertEquals(16, l1.getTotalMem());
-          assertEquals(1, l2.getNumPreCalls());
-          assertEquals(1, l2.getNumCalls());
-          assertEquals(0, l2.getNumReleaseCalls());
-          assertEquals(32, l2.getTotalMem());
+          Assert.assertEquals(1, listener1.getNumCalls());
+          Assert.assertEquals(16, listener1.getTotalMem());
+          Assert.assertEquals(1, listener2.getNumPreCalls());
+          Assert.assertEquals(1, listener2.getNumCalls());
+          Assert.assertEquals(0, listener2.getNumReleaseCalls());
+          Assert.assertEquals(32, listener2.getTotalMem());
           buf2.getReferenceManager().release();
           try (final BufferAllocator c3 = c2.newChildAllocator("c3", 0, MAX_ALLOCATION)) {
-            assertEquals(2, l1.getNumChildren());
-            assertEquals(1, l2.getNumChildren());
+            Assert.assertEquals(2, listener1.getNumChildren());
+            Assert.assertEquals(1, listener2.getNumChildren());
             final ArrowBuf buf3 = c3.buffer(64);
             assertNotNull("allocation failed", buf3);
-            assertEquals(1, l1.getNumPreCalls());
-            assertEquals(1, l1.getNumCalls());
-            assertEquals(1, l1.getNumReleaseCalls());
-            assertEquals(16, l1.getTotalMem());
-            assertEquals(2, l2.getNumPreCalls());
-            assertEquals(2, l2.getNumCalls());
-            assertEquals(1, l2.getNumReleaseCalls());
-            assertEquals(32 + 64, l2.getTotalMem());
+            Assert.assertEquals(1, listener1.getNumPreCalls());
+            Assert.assertEquals(1, listener1.getNumCalls());
+            Assert.assertEquals(1, listener1.getNumReleaseCalls());
+            Assert.assertEquals(16, listener1.getTotalMem());
+            Assert.assertEquals(2, listener2.getNumPreCalls());
+            Assert.assertEquals(2, listener2.getNumCalls());
+            Assert.assertEquals(1, listener2.getNumReleaseCalls());
+            Assert.assertEquals(32 + 64, listener2.getTotalMem());
             buf3.getReferenceManager().release();
           }
-          assertEquals(2, l1.getNumChildren());
-          assertEquals(0, l2.getNumChildren()); // third-level child removed
+          Assert.assertEquals(2, listener1.getNumChildren());
+          Assert.assertEquals(0, listener2.getNumChildren()); // third-level child removed
         }
-        assertEquals(1, l1.getNumChildren()); // second-level child removed
-        assertEquals(0, l2.getNumChildren());
+        Assert.assertEquals(1, listener1.getNumChildren()); // second-level child removed
+        Assert.assertEquals(0, listener2.getNumChildren());
       }
-      assertEquals(0, l1.getNumChildren()); // first-level child removed
+      Assert.assertEquals(0, listener1.getNumChildren()); // first-level child removed
 
-      assertEquals(2, l2.getNumReleaseCalls());
+      Assert.assertEquals(2, listener2.getNumReleaseCalls());
     }
   }
 
   @Test
   public void testRootAllocator_listenerAllocationFail() throws Exception {
-    CountingAllocationListener l1 = new CountingAllocationListener();
-    assertEquals(0, l1.getNumCalls());
-    assertEquals(0, l1.getTotalMem());
+    CountingAllocationListener listener1 = new CountingAllocationListener();
+    Assert.assertEquals(0, listener1.getNumCalls());
+    Assert.assertEquals(0, listener1.getTotalMem());
     // Test attempts to allocate too much from a child whose limit is set to half of the max
     // allocation. The listener's callback triggers, expanding the child allocator's limit, so then
     // the allocation succeeds.
     try (final RootAllocator rootAllocator = new RootAllocator(MAX_ALLOCATION)) {
-      try (final BufferAllocator c1 = rootAllocator.newChildAllocator("c1", l1, 0,
+      try (final BufferAllocator c1 = rootAllocator.newChildAllocator("c1", listener1, 0,
           MAX_ALLOCATION / 2)) {
         try {
           c1.buffer(MAX_ALLOCATION);
@@ -527,14 +522,14 @@ public void testRootAllocator_listenerAllocationFail() throws Exception {
         } catch (OutOfMemoryException e) {
           // expected
         }
-        assertEquals(0, l1.getNumCalls());
-        assertEquals(0, l1.getTotalMem());
+        Assert.assertEquals(0, listener1.getNumCalls());
+        Assert.assertEquals(0, listener1.getTotalMem());
 
-        l1.setExpandOnFail(c1, MAX_ALLOCATION);
+        listener1.setExpandOnFail(c1, MAX_ALLOCATION);
         ArrowBuf arrowBuf = c1.buffer(MAX_ALLOCATION);
         assertNotNull("allocation failed", arrowBuf);
-        assertEquals(1, l1.getNumCalls());
-        assertEquals(MAX_ALLOCATION, l1.getTotalMem());
+        Assert.assertEquals(1, listener1.getNumCalls());
+        Assert.assertEquals(MAX_ALLOCATION, listener1.getTotalMem());
         arrowBuf.getReferenceManager().release();
       }
     }
@@ -1098,42 +1093,6 @@ public void testMemoryLeakWithReservation() throws Exception {
     }
   }
 
-  @Test
-  public void testMemoryUsage() {
-    ListAppender<ILoggingEvent> memoryLogsAppender = new ListAppender<>();
-    Logger logger = (Logger) LoggerFactory.getLogger("arrow.allocator");
-    try {
-      logger.setLevel(Level.TRACE);
-      logger.addAppender(memoryLogsAppender);
-      memoryLogsAppender.start();
-      try (ArrowBuf buf = new ArrowBuf(ReferenceManager.NO_OP, null,
-          1024, new PooledByteBufAllocatorL().empty.memoryAddress())) {
-        buf.memoryAddress();
-      }
-      boolean result = false;
-      long startTime = System.currentTimeMillis();
-      while ((System.currentTimeMillis() - startTime) < 10000) { // 10 seconds maximum for time to read logs
-        result = memoryLogsAppender.list.stream()
-            .anyMatch(
-                log -> log.toString().contains("Memory Usage: \n") &&
-                    log.toString().contains("Large buffers outstanding: ") &&
-                    log.toString().contains("Normal buffers outstanding: ") &&
-                    log.getLevel().equals(Level.TRACE)
-            );
-        if (result) {
-          break;
-        }
-      }
-      assertTrue("Log messages are:\n" +
-          memoryLogsAppender.list.stream().map(ILoggingEvent::toString).collect(Collectors.joining("\n")),
-          result);
-    } finally {
-      memoryLogsAppender.stop();
-      logger.detachAppender(memoryLogsAppender);
-      logger.setLevel(null);
-    }
-  }
-
   @Test
   public void testOverlimit() {
     try (BufferAllocator allocator = new RootAllocator(1024)) {
diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java
similarity index 93%
rename from java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java
rename to java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java
index ec049ca6923bc..46e94fad37b05 100644
--- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java
@@ -23,6 +23,13 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.arrow.memory.AllocationListener;
+import org.apache.arrow.memory.AllocationOutcome;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ForeignAllocation;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.memory.util.MemoryUtil;
 import org.junit.After;
 import org.junit.Before;
diff --git a/java/memory/memory-netty-buffer-patch/pom.xml b/java/memory/memory-netty-buffer-patch/pom.xml
new file mode 100644
index 0000000000000..1d4407c638d8a
--- /dev/null
+++ b/java/memory/memory-netty-buffer-patch/pom.xml
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+  license agreements. See the NOTICE file distributed with this work for additional
+  information regarding copyright ownership. The ASF licenses this file to
+  You under the Apache License, Version 2.0 (the "License"); you may not use
+  this file except in compliance with the License. You may obtain a copy of
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+  by applicable law or agreed to in writing, software distributed under the
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+  OF ANY KIND, either express or implied. See the License for the specific
+  language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <artifactId>arrow-memory</artifactId>
+    <groupId>org.apache.arrow</groupId>
+    <version>15.0.0-SNAPSHOT</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>arrow-memory-netty-buffer-patch</artifactId>
+  <name>Arrow Memory - Netty Buffer</name>
+  <description>Netty Buffer needed to patch that is consumed by Arrow Memory Netty</description>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-memory-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-buffer</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-common</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/java/memory/memory-netty/src/main/java/io/netty/buffer/ExpandableByteBuf.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/ExpandableByteBuf.java
similarity index 100%
rename from java/memory/memory-netty/src/main/java/io/netty/buffer/ExpandableByteBuf.java
rename to java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/ExpandableByteBuf.java
diff --git a/java/memory/memory-netty/src/main/java/io/netty/buffer/LargeBuffer.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/LargeBuffer.java
similarity index 100%
rename from java/memory/memory-netty/src/main/java/io/netty/buffer/LargeBuffer.java
rename to java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/LargeBuffer.java
diff --git a/java/memory/memory-netty/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java
similarity index 100%
rename from java/memory/memory-netty/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java
rename to java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/MutableWrappedByteBuf.java
diff --git a/java/memory/memory-netty/src/main/java/io/netty/buffer/NettyArrowBuf.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
similarity index 96%
rename from java/memory/memory-netty/src/main/java/io/netty/buffer/NettyArrowBuf.java
rename to java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
index 71e4b7cb6d522..466444c7d53e8 100644
--- a/java/memory/memory-netty/src/main/java/io/netty/buffer/NettyArrowBuf.java
+++ b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
@@ -17,8 +17,6 @@
 
 package io.netty.buffer;
 
-import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
-
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -29,10 +27,12 @@
 import java.nio.channels.ScatteringByteChannel;
 
 import org.apache.arrow.memory.ArrowBuf;
-import org.apache.arrow.memory.ArrowByteBufAllocator;
 import org.apache.arrow.memory.BoundsChecking;
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.patch.ArrowByteBufAllocator;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
 import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.util.VisibleForTesting;
 
 import io.netty.util.internal.PlatformDependent;
 
@@ -264,7 +264,7 @@ public ByteBuffer nioBuffer(long index, int length) {
    * @return ByteBuffer
    */
   private ByteBuffer getDirectBuffer(long index) {
-    return PlatformDependent.directBuffer(addr(index), checkedCastToInt(length - index));
+    return PlatformDependent.directBuffer(addr(index), LargeMemoryUtil.checkedCastToInt(length - index));
   }
 
   @Override
@@ -580,11 +580,13 @@ public NettyArrowBuf setMedium(int index, int value) {
   }
 
   @Override
+  @VisibleForTesting
   protected void _setInt(int index, int value) {
     setInt(index, value);
   }
 
   @Override
+  @VisibleForTesting
   protected void _setIntLE(int index, int value) {
     this.chk(index, 4);
     PlatformDependent.putInt(this.addr(index), Integer.reverseBytes(value));
@@ -620,9 +622,9 @@ public static NettyArrowBuf unwrapBuffer(ArrowBuf buf) {
     final NettyArrowBuf nettyArrowBuf = new NettyArrowBuf(
         buf,
         buf.getReferenceManager().getAllocator(),
-        checkedCastToInt(buf.capacity()));
-    nettyArrowBuf.readerIndex(checkedCastToInt(buf.readerIndex()));
-    nettyArrowBuf.writerIndex(checkedCastToInt(buf.writerIndex()));
+        LargeMemoryUtil.checkedCastToInt(buf.capacity()));
+    nettyArrowBuf.readerIndex(LargeMemoryUtil.checkedCastToInt(buf.readerIndex()));
+    nettyArrowBuf.writerIndex(LargeMemoryUtil.checkedCastToInt(buf.writerIndex()));
     return nettyArrowBuf;
   }
 
diff --git a/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
similarity index 97%
rename from java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
rename to java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
index ba9aba353c351..ea84a3258e823 100644
--- a/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
+++ b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
@@ -17,13 +17,12 @@
 
 package io.netty.buffer;
 
-import static org.apache.arrow.memory.util.AssertionUtil.ASSERT_ENABLED;
-
 import java.lang.reflect.Field;
 import java.nio.ByteBuffer;
 import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.AssertionUtil;
 import org.apache.arrow.memory.util.LargeMemoryUtil;
 
 import io.netty.util.internal.OutOfDirectMemoryError;
@@ -51,7 +50,7 @@ public PooledByteBufAllocatorL() {
   }
 
   /**
-   * Returns a {@linkplain io.netty.buffer.UnsafeDirectLittleEndian} of the given size.
+   * Returns a {@linkplain UnsafeDirectLittleEndian} of the given size.
    */
   public UnsafeDirectLittleEndian allocate(long size) {
     try {
@@ -180,7 +179,7 @@ private UnsafeDirectLittleEndian newDirectBufferL(int initialCapacity, int maxCa
             fail();
           }
 
-          if (!ASSERT_ENABLED) {
+          if (!AssertionUtil.ASSERT_ENABLED) {
             return new UnsafeDirectLittleEndian((PooledUnsafeDirectByteBuf) buf);
           }
 
diff --git a/java/memory/memory-netty/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java
similarity index 100%
rename from java/memory/memory-netty/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java
rename to java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java
diff --git a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java b/java/memory/memory-netty-buffer-patch/src/main/java/org/apache/arrow/memory/patch/ArrowByteBufAllocator.java
similarity index 97%
rename from java/memory/memory-netty/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java
rename to java/memory/memory-netty-buffer-patch/src/main/java/org/apache/arrow/memory/patch/ArrowByteBufAllocator.java
index ff40b49ff6f5c..6ce08b5a5902b 100644
--- a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/ArrowByteBufAllocator.java
+++ b/java/memory/memory-netty-buffer-patch/src/main/java/org/apache/arrow/memory/patch/ArrowByteBufAllocator.java
@@ -15,7 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.memory;
+package org.apache.arrow.memory.patch;
+
+import org.apache.arrow.memory.BufferAllocator;
 
 import io.netty.buffer.AbstractByteBufAllocator;
 import io.netty.buffer.ByteBuf;
diff --git a/java/memory/memory-netty/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java b/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java
similarity index 95%
rename from java/memory/memory-netty/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java
rename to java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java
index c2bd95bb3d939..043c2c1605a63 100644
--- a/java/memory/memory-netty/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java
+++ b/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java
@@ -28,6 +28,11 @@
 
 import org.junit.Test;
 
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.LargeBuffer;
+import io.netty.buffer.Unpooled;
+import io.netty.buffer.UnsafeDirectLittleEndian;
+
 public class TestUnsafeDirectLittleEndian {
   private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
 
diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml
index e625cbeabc65a..159ab5160c983 100644
--- a/java/memory/memory-netty/pom.xml
+++ b/java/memory/memory-netty/pom.xml
@@ -26,9 +26,15 @@
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-memory-core</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.arrow</groupId>
+      <artifactId>arrow-memory-netty-buffer-patch</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>io.netty</groupId>
       <artifactId>netty-buffer</artifactId>
+      <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>io.netty</groupId>
@@ -37,6 +43,7 @@
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
+      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>ch.qos.logback</groupId>
diff --git a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java b/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/DefaultAllocationManagerFactory.java
similarity index 87%
rename from java/memory/memory-netty/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
rename to java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/DefaultAllocationManagerFactory.java
index 10cfb5c164855..8ece77178f09f 100644
--- a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
+++ b/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/DefaultAllocationManagerFactory.java
@@ -15,7 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.memory;
+package org.apache.arrow.memory.netty;
+
+import org.apache.arrow.memory.AllocationManager;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
 
 /**
  * The default Allocation Manager Factory for a module.
diff --git a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/NettyAllocationManager.java b/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java
similarity index 94%
rename from java/memory/memory-netty/src/main/java/org/apache/arrow/memory/NettyAllocationManager.java
rename to java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java
index 2000477830735..58354d0c2eebd 100644
--- a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/NettyAllocationManager.java
+++ b/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java
@@ -15,7 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.memory;
+package org.apache.arrow.memory.netty;
+
+import org.apache.arrow.memory.AllocationManager;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
 
 import io.netty.buffer.PooledByteBufAllocatorL;
 import io.netty.buffer.UnsafeDirectLittleEndian;
diff --git a/java/memory/memory-netty/src/test/java/io/netty/buffer/TestExpandableByteBuf.java b/java/memory/memory-netty/src/test/java/io/netty/buffer/TestExpandableByteBuf.java
index b39cca8e8e7ee..67557b65a6267 100644
--- a/java/memory/memory-netty/src/test/java/io/netty/buffer/TestExpandableByteBuf.java
+++ b/java/memory/memory-netty/src/test/java/io/netty/buffer/TestExpandableByteBuf.java
@@ -23,6 +23,10 @@
 import org.junit.Assert;
 import org.junit.Test;
 
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.ExpandableByteBuf;
+import io.netty.buffer.NettyArrowBuf;
+
 public class TestExpandableByteBuf {
 
   @Test
diff --git a/java/memory/memory-netty/src/test/java/io/netty/buffer/TestNettyArrowBuf.java b/java/memory/memory-netty/src/test/java/io/netty/buffer/TestNettyArrowBuf.java
index 45d3b41e8a652..f18bccb4c9d8f 100644
--- a/java/memory/memory-netty/src/test/java/io/netty/buffer/TestNettyArrowBuf.java
+++ b/java/memory/memory-netty/src/test/java/io/netty/buffer/TestNettyArrowBuf.java
@@ -20,12 +20,15 @@
 import java.nio.ByteBuffer;
 
 import org.apache.arrow.memory.ArrowBuf;
-import org.apache.arrow.memory.ArrowByteBufAllocator;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.patch.ArrowByteBufAllocator;
 import org.junit.Assert;
 import org.junit.Test;
 
+import io.netty.buffer.CompositeByteBuf;
+import io.netty.buffer.NettyArrowBuf;
+
 public class TestNettyArrowBuf {
 
   @Test
diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/ITTestLargeArrowBuf.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/ITTestLargeArrowBuf.java
similarity index 93%
rename from java/memory/memory-netty/src/test/java/org/apache/arrow/memory/ITTestLargeArrowBuf.java
rename to java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/ITTestLargeArrowBuf.java
index fa8d510e3616b..71dba73d2896e 100644
--- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/ITTestLargeArrowBuf.java
+++ b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/ITTestLargeArrowBuf.java
@@ -15,10 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.memory;
+package org.apache.arrow.memory.netty;
 
 import static org.junit.Assert.assertEquals;
 
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestAllocationManagerNetty.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestAllocationManagerNetty.java
similarity index 90%
rename from java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestAllocationManagerNetty.java
rename to java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestAllocationManagerNetty.java
index 2dbd56480b8ef..7f1e34ddc5fc2 100644
--- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestAllocationManagerNetty.java
+++ b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestAllocationManagerNetty.java
@@ -15,10 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.memory;
+package org.apache.arrow.memory.netty;
 
 import static org.junit.Assert.assertEquals;
 
+import org.apache.arrow.memory.AllocationManager;
+import org.apache.arrow.memory.DefaultAllocationManagerOption;
 import org.junit.Test;
 
 /**
diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEmptyArrowBuf.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEmptyArrowBuf.java
similarity index 90%
rename from java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEmptyArrowBuf.java
rename to java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEmptyArrowBuf.java
index 3fd7ce74aab9d..b9948083e6f82 100644
--- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEmptyArrowBuf.java
+++ b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEmptyArrowBuf.java
@@ -15,10 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.memory;
+package org.apache.arrow.memory.netty;
 
 import static org.junit.Assert.assertEquals;
 
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.memory.RootAllocator;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -47,8 +50,8 @@ public static void afterClass() {
   public void testZeroBuf() {
     // Exercise the historical log inside the empty ArrowBuf. This is initialized statically, and there is a circular
     // dependency between ArrowBuf and BaseAllocator, so if the initialization happens in the wrong order, the
-    // historical log will be null even though BaseAllocator.DEBUG is true.
-    allocator.getEmpty().print(new StringBuilder(), 0, BaseAllocator.Verbosity.LOG_WITH_STACKTRACE);
+    // historical log will be null even though RootAllocator.DEBUG is true.
+    allocator.getEmpty().print(new StringBuilder(), 0, RootAllocator.Verbosity.LOG_WITH_STACKTRACE);
   }
 
   @Test
diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEndianness.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java
similarity index 92%
rename from java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEndianness.java
rename to java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java
index dcaeb24889e0a..a782523cbc6d6 100644
--- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestEndianness.java
+++ b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java
@@ -15,12 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.memory;
+package org.apache.arrow.memory.netty;
 
 import static org.junit.Assert.assertEquals;
 
 import java.nio.ByteOrder;
 
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
 import org.junit.Test;
 
 import io.netty.buffer.ByteBuf;
diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestNettyAllocationManager.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocationManager.java
similarity index 87%
rename from java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestNettyAllocationManager.java
rename to java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocationManager.java
index 1b64cd73363cf..39692c96ceb3d 100644
--- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/TestNettyAllocationManager.java
+++ b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocationManager.java
@@ -15,13 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.memory;
+package org.apache.arrow.memory.netty;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
+import org.apache.arrow.memory.AllocationManager;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.BufferLedger;
+import org.apache.arrow.memory.RootAllocator;
 import org.junit.Test;
 
 /**
@@ -31,8 +36,8 @@ public class TestNettyAllocationManager {
 
   static int CUSTOMIZED_ALLOCATION_CUTOFF_VALUE = 1024;
 
-  private BaseAllocator createCustomizedAllocator() {
-    return new RootAllocator(BaseAllocator.configBuilder()
+  private RootAllocator createCustomizedAllocator() {
+    return new RootAllocator(RootAllocator.configBuilder()
         .allocationManagerFactory(new AllocationManager.Factory() {
           @Override
           public AllocationManager create(BufferAllocator accountingAllocator, long size) {
@@ -65,7 +70,7 @@ private void readWriteArrowBuf(ArrowBuf buffer) {
   @Test
   public void testSmallBufferAllocation() {
     final long bufSize = CUSTOMIZED_ALLOCATION_CUTOFF_VALUE - 512L;
-    try (BaseAllocator allocator = createCustomizedAllocator();
+    try (RootAllocator allocator = createCustomizedAllocator();
          ArrowBuf buffer = allocator.buffer(bufSize)) {
 
       assertTrue(buffer.getReferenceManager() instanceof BufferLedger);
@@ -89,7 +94,7 @@ public void testSmallBufferAllocation() {
   @Test
   public void testLargeBufferAllocation() {
     final long bufSize = CUSTOMIZED_ALLOCATION_CUTOFF_VALUE + 1024L;
-    try (BaseAllocator allocator = createCustomizedAllocator();
+    try (RootAllocator allocator = createCustomizedAllocator();
          ArrowBuf buffer = allocator.buffer(bufSize)) {
       assertTrue(buffer.getReferenceManager() instanceof BufferLedger);
       BufferLedger bufferLedger = (BufferLedger) buffer.getReferenceManager();
diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java
new file mode 100644
index 0000000000000..07fdc3f784e43
--- /dev/null
+++ b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.netty;
+
+import static org.junit.Assert.assertTrue;
+
+import java.util.Collections;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.ReferenceManager;
+import org.junit.Test;
+import org.slf4j.LoggerFactory;
+
+import ch.qos.logback.classic.Level;
+import ch.qos.logback.classic.Logger;
+import ch.qos.logback.classic.spi.ILoggingEvent;
+import ch.qos.logback.core.read.ListAppender;
+import io.netty.buffer.PooledByteBufAllocatorL;
+
+/**
+ * Test netty allocators.
+ */
+public class TestNettyAllocator {
+
+  @Test
+  public void testMemoryUsage() {
+    ListAppender<ILoggingEvent> memoryLogsAppender = new ListAppender<>();
+    memoryLogsAppender.list = Collections.synchronizedList(memoryLogsAppender.list);
+    Logger logger = (Logger) LoggerFactory.getLogger("arrow.allocator");
+    try {
+      logger.setLevel(Level.TRACE);
+      logger.addAppender(memoryLogsAppender);
+      memoryLogsAppender.start();
+      try (ArrowBuf buf = new ArrowBuf(ReferenceManager.NO_OP, null,
+          1024, new PooledByteBufAllocatorL().empty.memoryAddress())) {
+        buf.memoryAddress();
+      }
+      boolean result = false;
+      long startTime = System.currentTimeMillis();
+      while ((System.currentTimeMillis() - startTime) < 10000) { // 10 seconds maximum for time to read logs
+        // Lock on the list backing the appender since a background thread might try to add more logs
+        // while stream() is iterating over list elements. This would throw a flakey ConcurrentModificationException.
+        synchronized (memoryLogsAppender.list) {
+          result = memoryLogsAppender.list.stream()
+              .anyMatch(
+                  log -> log.toString().contains("Memory Usage: \n") &&
+                      log.toString().contains("Large buffers outstanding: ") &&
+                      log.toString().contains("Normal buffers outstanding: ") &&
+                      log.getLevel().equals(Level.TRACE)
+              );
+        }
+        if (result) {
+          break;
+        }
+      }
+      assertTrue("Log messages are:\n" +
+              memoryLogsAppender.list.stream().map(ILoggingEvent::toString).collect(Collectors.joining("\n")),
+          result);
+    } finally {
+      memoryLogsAppender.stop();
+      logger.detachAppender(memoryLogsAppender);
+      logger.setLevel(null);
+    }
+  }
+}
diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml
index 9f8137308195d..5ef4e8a9149a5 100644
--- a/java/memory/memory-unsafe/pom.xml
+++ b/java/memory/memory-unsafe/pom.xml
@@ -27,6 +27,10 @@
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-memory-core</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.immutables</groupId>
+      <artifactId>value</artifactId>
+    </dependency>
   </dependencies>
 
 </project>
diff --git a/java/memory/memory-unsafe/src/main/java/module-info.java b/java/memory/memory-unsafe/src/main/java/module-info.java
new file mode 100644
index 0000000000000..aa340d21716bd
--- /dev/null
+++ b/java/memory/memory-unsafe/src/main/java/module-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module org.apache.arrow.memory.unsafe {
+  exports org.apache.arrow.memory.unsafe to org.apache.arrow.memory.core;
+  
+  requires org.apache.arrow.memory.core;
+}
diff --git a/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java b/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/DefaultAllocationManagerFactory.java
similarity index 87%
rename from java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
rename to java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/DefaultAllocationManagerFactory.java
index 720c3d02d23e4..dfb6c706856b6 100644
--- a/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
+++ b/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/DefaultAllocationManagerFactory.java
@@ -15,7 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.memory;
+package org.apache.arrow.memory.unsafe;
+
+import org.apache.arrow.memory.AllocationManager;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
 
 /**
  * The default Allocation Manager Factory for a module.
diff --git a/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/UnsafeAllocationManager.java b/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/UnsafeAllocationManager.java
similarity index 89%
rename from java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/UnsafeAllocationManager.java
rename to java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/UnsafeAllocationManager.java
index b10aba3598def..3468a6ec65c2f 100644
--- a/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/UnsafeAllocationManager.java
+++ b/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/UnsafeAllocationManager.java
@@ -15,8 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.memory;
+package org.apache.arrow.memory.unsafe;
 
+import org.apache.arrow.memory.AllocationManager;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
 import org.apache.arrow.memory.util.MemoryUtil;
 
 /**
diff --git a/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestAllocationManagerUnsafe.java b/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/unsafe/TestAllocationManagerUnsafe.java
similarity index 90%
rename from java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestAllocationManagerUnsafe.java
rename to java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/unsafe/TestAllocationManagerUnsafe.java
index 33abe92e50f12..f1ca96eea0f20 100644
--- a/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestAllocationManagerUnsafe.java
+++ b/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/unsafe/TestAllocationManagerUnsafe.java
@@ -15,10 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.memory;
+package org.apache.arrow.memory.unsafe;
 
 import static org.junit.Assert.assertEquals;
 
+import org.apache.arrow.memory.AllocationManager;
+import org.apache.arrow.memory.DefaultAllocationManagerOption;
 import org.junit.Test;
 
 /**
diff --git a/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestUnsafeAllocationManager.java b/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/unsafe/TestUnsafeAllocationManager.java
similarity index 82%
rename from java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestUnsafeAllocationManager.java
rename to java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/unsafe/TestUnsafeAllocationManager.java
index c15882a37a6d1..77233e73cb303 100644
--- a/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/TestUnsafeAllocationManager.java
+++ b/java/memory/memory-unsafe/src/test/java/org/apache/arrow/memory/unsafe/TestUnsafeAllocationManager.java
@@ -15,11 +15,16 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.memory;
+package org.apache.arrow.memory.unsafe;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import org.apache.arrow.memory.AllocationManager;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.BufferLedger;
+import org.apache.arrow.memory.RootAllocator;
 import org.junit.Test;
 
 /**
@@ -27,8 +32,8 @@
  */
 public class TestUnsafeAllocationManager {
 
-  private BaseAllocator createUnsafeAllocator() {
-    return new RootAllocator(BaseAllocator.configBuilder().allocationManagerFactory(UnsafeAllocationManager.FACTORY)
+  private BufferAllocator createUnsafeAllocator() {
+    return new RootAllocator(RootAllocator.configBuilder().allocationManagerFactory(UnsafeAllocationManager.FACTORY)
         .build());
   }
 
@@ -51,7 +56,7 @@ private void readWriteArrowBuf(ArrowBuf buffer) {
   @Test
   public void testBufferAllocation() {
     final long bufSize = 4096L;
-    try (BaseAllocator allocator = createUnsafeAllocator();
+    try (BufferAllocator allocator = createUnsafeAllocator();
          ArrowBuf buffer = allocator.buffer(bufSize)) {
       assertTrue(buffer.getReferenceManager() instanceof BufferLedger);
       BufferLedger bufferLedger = (BufferLedger) buffer.getReferenceManager();
diff --git a/java/memory/pom.xml b/java/memory/pom.xml
index c10263b97f1bf..55fbb90353f34 100644
--- a/java/memory/pom.xml
+++ b/java/memory/pom.xml
@@ -23,6 +23,7 @@
   <modules>
     <module>memory-core</module>
     <module>memory-unsafe</module>
+    <module>memory-netty-buffer-patch</module>
     <module>memory-netty</module>
   </modules>
 
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index d215d856d7a2f..5e0b6c1b54541 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -195,7 +195,7 @@
                 </plugin>
                 <plugin>
                     <artifactId>maven-javadoc-plugin</artifactId>
-                    <version>2.9.1</version>
+                    <version>3.6.3</version>
                 </plugin>
                 <plugin>
                     <artifactId>maven-resources-plugin</artifactId>
diff --git a/java/pom.xml b/java/pom.xml
index 62e63d41a9193..042488a5b949a 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -579,6 +579,17 @@
             </lifecycleMappingMetadata>
           </configuration>
         </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-javadoc-plugin</artifactId>
+          <version>3.6.3</version>
+          <configuration>
+            <source>8</source>
+            <sourceFileExcludes>
+              <sourceFileExclude>**/module-info.java</sourceFileExclude>
+            </sourceFileExcludes>
+          </configuration>
+        </plugin>
         <plugin>
           <groupId>org.apache.arrow.maven.plugins</groupId>
           <artifactId>module-info-compiler-maven-plugin</artifactId>
@@ -768,7 +779,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-javadoc-plugin</artifactId>
-        <version>3.0.0-M1</version>
+        <version>3.6.3</version>
         <reportSets>
           <reportSet><!-- by default, id = "default" -->
             <reports><!-- select non-aggregate reports -->
@@ -783,6 +794,11 @@
             </reports>
           </reportSet>
         </reportSets>
+        <configuration>
+          <sourceFileExcludes>
+            <sourceFileExclude>**/module-info.java</sourceFileExclude>
+          </sourceFileExcludes>
+        </configuration>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -934,7 +950,7 @@
             <groupId>org.apache.maven.plugins</groupId>
             <artifactId>maven-surefire-plugin</artifactId>
             <configuration>
-              <argLine>--add-opens=java.base/java.nio=ALL-UNNAMED</argLine>
+              <argLine>--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
             </configuration>
           </plugin>
         </plugins>

From 07a46555e74501f96973dc43ef54a4669d261876 Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@tabular.io>
Date: Wed, 10 Jan 2024 09:07:24 -0800
Subject: [PATCH 185/570] GH-39515: [Python] Pass in type to
 `MapType.from_arrays` (#39516)

### Rationale for this change

For Iceberg we want to add metadata type the type (the field-id), therefore we need to pass in the type analog to what we do for `ListArray.from_arrays(self, offsets, values, DataType type=None, MemoryPool pool=None, mask=None)`.

### What changes are included in this PR?

Updated a keyword argument for the `type`, and make sure that the the static method to create the MapType is exposed from the cpp side.

### Are these changes tested?

I've added a simple test.

### Are there any user-facing changes?

* Closes: #39515

Authored-by: Fokko Driesprong <fokko@tabular.io>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 python/pyarrow/array.pxi             | 21 +++++++++++++++------
 python/pyarrow/includes/libarrow.pxd |  8 ++++++++
 python/pyarrow/tests/test_array.py   | 19 ++++++++++++++++++-
 3 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 751dfbcce4342..5c2d22aef1895 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2467,7 +2467,7 @@ cdef class MapArray(ListArray):
     """
 
     @staticmethod
-    def from_arrays(offsets, keys, items, MemoryPool pool=None):
+    def from_arrays(offsets, keys, items, DataType type=None, MemoryPool pool=None):
         """
         Construct MapArray from arrays of int32 offsets and key, item arrays.
 
@@ -2476,6 +2476,8 @@ cdef class MapArray(ListArray):
         offsets : array-like or sequence (int32 type)
         keys : array-like or sequence (any type)
         items : array-like or sequence (any type)
+        type : DataType, optional
+            If not specified, a default MapArray with the keys' and items' type is used.
         pool : MemoryPool
 
         Returns
@@ -2564,11 +2566,18 @@ cdef class MapArray(ListArray):
         _keys = asarray(keys)
         _items = asarray(items)
 
-        with nogil:
-            out = GetResultValue(
-                CMapArray.FromArrays(_offsets.sp_array,
-                                     _keys.sp_array,
-                                     _items.sp_array, cpool))
+        if type is not None:
+            with nogil:
+                out = GetResultValue(
+                    CMapArray.FromArraysAndType(
+                        type.sp_type, _offsets.sp_array,
+                        _keys.sp_array, _items.sp_array, cpool))
+        else:
+            with nogil:
+                out = GetResultValue(
+                    CMapArray.FromArrays(_offsets.sp_array,
+                                         _keys.sp_array,
+                                         _items.sp_array, cpool))
         cdef Array result = pyarrow_wrap_array(out)
         result.validate()
         return result
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 82b888f584813..74e92594b04e5 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -696,6 +696,14 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
             const shared_ptr[CArray]& items,
             CMemoryPool* pool)
 
+        @staticmethod
+        CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
+            shared_ptr[CDataType],
+            const shared_ptr[CArray]& offsets,
+            const shared_ptr[CArray]& keys,
+            const shared_ptr[CArray]& items,
+            CMemoryPool* pool)
+
         shared_ptr[CArray] keys()
         shared_ptr[CArray] items()
         CMapType* map_type()
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 3dcbf399f3aca..f851d4e0b6c29 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1057,8 +1057,25 @@ def test_map_from_arrays():
 
     assert result.equals(expected)
 
-    # check invalid usage
+    # pass in the type explicitly
+    result = pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+        keys.type,
+        items.type
+    ))
+    assert result.equals(expected)
+
+    # pass in invalid types
+    with pytest.raises(pa.ArrowTypeError, match='Expected map type, got string'):
+        pa.MapArray.from_arrays(offsets, keys, items, pa.string())
 
+    with pytest.raises(pa.ArrowTypeError, match='Mismatching map items type'):
+        pa.MapArray.from_arrays(offsets, keys, items, pa.map_(
+            keys.type,
+            # Larger than the original i4
+            pa.int64()
+        ))
+
+    # check invalid usage
     offsets = [0, 1, 3, 5]
     keys = np.arange(5)
     items = np.arange(5)

From 0b3c45983cff4d9c9ac4aec5c4eff8071721b775 Mon Sep 17 00:00:00 2001
From: dsisnero <dsisnero@users.noreply.github.com>
Date: Wed, 10 Jan 2024 17:16:24 -0700
Subject: [PATCH 186/570] MINOR: [Ruby][Gandiva][Docs] Fix syntax error in
 example (#39551)

### Rationale for this change

Example code has syntax error.

### What changes are included in this PR?

Fix the syntax error.

### Are these changes tested?

No.

### Are there any user-facing changes?

Yes.

Authored-by: dsisnero <dsisnero@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ruby/red-gandiva/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ruby/red-gandiva/README.md b/ruby/red-gandiva/README.md
index aa3c0ba030709..a4df75ec8a0a0 100644
--- a/ruby/red-gandiva/README.md
+++ b/ruby/red-gandiva/README.md
@@ -63,6 +63,6 @@ end
 projector = Gandiva::Projector.new(schema, [expression1, expression2])
 table.each_record_batch do |record_batch|
   outputs = projector.evaluate(record_batch)
-  puts outputs.collect(&:values))
+  puts outputs.collect(&:values)
 end
 ```

From 30c4e157a920981a853352ea2c24473496c7e595 Mon Sep 17 00:00:00 2001
From: Josh Soref <2119212+jsoref@users.noreply.github.com>
Date: Wed, 10 Jan 2024 20:39:07 -0500
Subject: [PATCH 187/570] GH-38962: [C++] Fix spelling (array) (#38963)

### Rationale for this change

### What changes are included in this PR?

Spelling fixes to cpp/src/arrow/array/

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #38962

Authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/array/array_binary.h       | 2 +-
 cpp/src/arrow/array/array_dict.h         | 4 ++--
 cpp/src/arrow/array/array_dict_test.cc   | 2 +-
 cpp/src/arrow/array/array_list_test.cc   | 2 +-
 cpp/src/arrow/array/array_nested.cc      | 4 ++--
 cpp/src/arrow/array/array_run_end.h      | 6 +++---
 cpp/src/arrow/array/array_struct_test.cc | 8 ++++----
 cpp/src/arrow/array/builder_base.h       | 2 +-
 cpp/src/arrow/array/builder_nested.h     | 2 +-
 cpp/src/arrow/array/data.cc              | 2 +-
 10 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/cpp/src/arrow/array/array_binary.h b/cpp/src/arrow/array/array_binary.h
index fd68a379ddbfb..19fdee61243d1 100644
--- a/cpp/src/arrow/array/array_binary.h
+++ b/cpp/src/arrow/array/array_binary.h
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-// Array accessor classes for Binary, LargeBinart, String, LargeString,
+// Array accessor classes for Binary, LargeBinary, String, LargeString,
 // FixedSizeBinary
 
 #pragma once
diff --git a/cpp/src/arrow/array/array_dict.h b/cpp/src/arrow/array/array_dict.h
index 9aa0a7bcc2d66..bf376b51f8c94 100644
--- a/cpp/src/arrow/array/array_dict.h
+++ b/cpp/src/arrow/array/array_dict.h
@@ -133,7 +133,7 @@ class ARROW_EXPORT DictionaryUnifier {
   static Result<std::unique_ptr<DictionaryUnifier>> Make(
       std::shared_ptr<DataType> value_type, MemoryPool* pool = default_memory_pool());
 
-  /// \brief Unify dictionaries accross array chunks
+  /// \brief Unify dictionaries across array chunks
   ///
   /// The dictionaries in the array chunks will be unified, their indices
   /// accordingly transposed.
@@ -144,7 +144,7 @@ class ARROW_EXPORT DictionaryUnifier {
       const std::shared_ptr<ChunkedArray>& array,
       MemoryPool* pool = default_memory_pool());
 
-  /// \brief Unify dictionaries accross the chunks of each table column
+  /// \brief Unify dictionaries across the chunks of each table column
   ///
   /// The dictionaries in each table column will be unified, their indices
   /// accordingly transposed.
diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc
index a4c03b5db6371..4ae9e3d6dcbfc 100644
--- a/cpp/src/arrow/array/array_dict_test.cc
+++ b/cpp/src/arrow/array/array_dict_test.cc
@@ -1130,7 +1130,7 @@ TEST(TestDictionary, Validate) {
   ASSERT_RAISES(Invalid, arr->ValidateFull());
 
 #if !defined(__APPLE__) && !defined(ARROW_VALGRIND)
-  // GH-35712: ASSERT_DEATH would make testing slow on MacOS.
+  // GH-35712: ASSERT_DEATH would make testing slow on macOS.
   ASSERT_DEATH(
       {
         std::shared_ptr<Array> null_dict_arr =
diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc
index 0b591d401804d..b08fa99168616 100644
--- a/cpp/src/arrow/array/array_list_test.cc
+++ b/cpp/src/arrow/array/array_list_test.cc
@@ -300,7 +300,7 @@ class TestListArray : public ::testing::Test {
     ASSERT_OK(result->ValidateFull());
     AssertArraysEqual(*result, *expected);
 
-    // Offets without nulls, will replace null with empty list
+    // Offsets without nulls, will replace null with empty list
     ASSERT_OK_AND_ASSIGN(result, FromArrays(*offsets_wo_nulls, *sizes_wo_nulls, *values));
     ASSERT_OK(result->ValidateFull());
     AssertArraysEqual(*result, *std::dynamic_pointer_cast<ArrayType>(
diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc
index 0b0e340a67d4e..958c2e25380b0 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -391,7 +391,7 @@ Result<std::shared_ptr<ArrayData>> ListViewFromListImpl(
   const auto* offsets = list_data->template GetValues<offset_type>(1, 0);
   auto* sizes = sizes_buffer->mutable_data_as<offset_type>();
   // Zero the initial padding area to avoid leaking any data when buffers are
-  // sent over IPC or throught the C Data interface.
+  // sent over IPC or through the C Data interface.
   memset(sizes, 0, list_data->offset * sizeof(offset_type));
   for (int64_t i = list_data->offset; i < buffer_length; i++) {
     sizes[i] = offsets[i + 1] - offsets[i];
@@ -776,7 +776,7 @@ Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
   }
 
   if (keys->null_count() != 0) {
-    return Status::Invalid("Map can not contain NULL valued keys");
+    return Status::Invalid("Map cannot contain NULL valued keys");
   }
 
   if (keys->length() != items->length()) {
diff --git a/cpp/src/arrow/array/array_run_end.h b/cpp/src/arrow/array/array_run_end.h
index 9770aa1fbbb1c..b46b0855ab367 100644
--- a/cpp/src/arrow/array/array_run_end.h
+++ b/cpp/src/arrow/array/array_run_end.h
@@ -59,7 +59,7 @@ class ARROW_EXPORT RunEndEncodedArray : public Array {
   ///
   /// The length and offset parameters refer to the dimensions of the logical
   /// array which is the array we would get after expanding all the runs into
-  /// repeated values. As such, length can be much greater than the lenght of
+  /// repeated values. As such, length can be much greater than the length of
   /// the child run_ends and values arrays.
   RunEndEncodedArray(const std::shared_ptr<DataType>& type, int64_t length,
                      const std::shared_ptr<Array>& run_ends,
@@ -69,7 +69,7 @@ class ARROW_EXPORT RunEndEncodedArray : public Array {
   ///
   /// The length and offset parameters refer to the dimensions of the logical
   /// array which is the array we would get after expanding all the runs into
-  /// repeated values. As such, length can be much greater than the lenght of
+  /// repeated values. As such, length can be much greater than the length of
   /// the child run_ends and values arrays.
   static Result<std::shared_ptr<RunEndEncodedArray>> Make(
       const std::shared_ptr<DataType>& type, int64_t logical_length,
@@ -122,7 +122,7 @@ class ARROW_EXPORT RunEndEncodedArray : public Array {
   /// run-ends) necessary to represent the logical range of values from offset
   /// to length.
   ///
-  /// Avoid calling this function if the physical length can be estabilished in
+  /// Avoid calling this function if the physical length can be established in
   /// some other way (e.g. when iterating over the runs sequentially until the
   /// end). This function uses binary-search, so it has a O(log N) cost.
   int64_t FindPhysicalLength() const;
diff --git a/cpp/src/arrow/array/array_struct_test.cc b/cpp/src/arrow/array/array_struct_test.cc
index 73d53a7efa59b..5505ec636c7f8 100644
--- a/cpp/src/arrow/array/array_struct_test.cc
+++ b/cpp/src/arrow/array/array_struct_test.cc
@@ -541,7 +541,7 @@ TEST_F(TestStructBuilder, TestEquality) {
   ASSERT_OK(char_vb->Reserve(list_values.size()));
   ASSERT_OK(int_vb->Reserve(int_values.size()));
 
-  // setup two equal arrays, one of which takes an unequal bitmap
+  // set up two equal arrays, one of which takes an unequal bitmap
   ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data()));
   ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(),
                                   list_is_valid.data()));
@@ -574,7 +574,7 @@ TEST_F(TestStructBuilder, TestEquality) {
   ASSERT_OK(char_vb->Resize(list_values.size()));
   ASSERT_OK(int_vb->Resize(int_values.size()));
 
-  // setup an unequal one with the unequal bitmap
+  // set up an unequal one with the unequal bitmap
   ASSERT_OK(builder_->AppendValues(unequal_struct_is_valid.size(),
                                    unequal_struct_is_valid.data()));
   ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(),
@@ -592,7 +592,7 @@ TEST_F(TestStructBuilder, TestEquality) {
   ASSERT_OK(char_vb->Resize(list_values.size()));
   ASSERT_OK(int_vb->Resize(int_values.size()));
 
-  // setup an unequal one with unequal offsets
+  // set up an unequal one with unequal offsets
   ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data()));
   ASSERT_OK(list_vb->AppendValues(unequal_list_offsets.data(),
                                   unequal_list_offsets.size(),
@@ -610,7 +610,7 @@ TEST_F(TestStructBuilder, TestEquality) {
   ASSERT_OK(char_vb->Resize(list_values.size()));
   ASSERT_OK(int_vb->Resize(int_values.size()));
 
-  // setup anunequal one with unequal values
+  // set up an unequal one with unequal values
   ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data()));
   ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(),
                                   list_is_valid.data()));
diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h
index 05af850fd149c..11036797e014f 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -349,7 +349,7 @@ inline Result<std::unique_ptr<ArrayBuilder>> MakeBuilderExactIndex(
 }
 
 /// \brief Construct an empty DictionaryBuilder initialized optionally
-/// with a pre-existing dictionary
+/// with a preexisting dictionary
 /// \param[in] pool the MemoryPool to use for allocations
 /// \param[in] type the dictionary type to create the builder for
 /// \param[in] dictionary the initial dictionary, if any. May be nullptr
diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h
index 21c2d4b270eb1..8065752f3e278 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -677,7 +677,7 @@ class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder {
 
   /// \brief Vector append
   ///
-  /// If passed, valid_bytes wil be read and any zero byte
+  /// If passed, valid_bytes will be read and any zero byte
   /// will cause the corresponding slot to be null
   ///
   /// This function affects only the validity bitmap; the child values must be appended
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index c002c0817b194..8454ac8f1d5fb 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -221,7 +221,7 @@ void ArraySpan::SetMembers(const ArrayData& data) {
     this->null_count = 0;
   }
 
-  // Makes sure any other buffers are seen as null / non-existent
+  // Makes sure any other buffers are seen as null / nonexistent
   for (int i = static_cast<int>(data.buffers.size()); i < 3; ++i) {
     this->buffers[i] = {};
   }

From 2132cb361c386ab25d72b4990772ca6e5c31312c Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 11 Jan 2024 22:45:22 +0900
Subject: [PATCH 188/570] GH-39270: [C++] Avoid creating memory manager
 instance for every buffer view/copy (#39271)

### Rationale for this change

We can use `arrow::default_cpu_memory_manager()` for `default_memory_pool()`.

### What changes are included in this PR?

Check the given `pool` and use `arrow::default_cpu_memory_manager()` if it's `arrow::default_memory_pool()`.

This also caches `arrow::CPUDevice::memory_manager()` result to avoid calling it multiple times. Note that we can avoid creating needless memory manager instance without this. This just avoid calling it multiple times.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39270

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/device.cc      |  6 +++++-
 cpp/src/arrow/ipc/message.cc | 20 +++++++++-----------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc
index 14d3bac0af1b7..de709923dc44e 100644
--- a/cpp/src/arrow/device.cc
+++ b/cpp/src/arrow/device.cc
@@ -241,7 +241,11 @@ bool CPUDevice::Equals(const Device& other) const {
 }
 
 std::shared_ptr<MemoryManager> CPUDevice::memory_manager(MemoryPool* pool) {
-  return CPUMemoryManager::Make(Instance(), pool);
+  if (pool == default_memory_pool()) {
+    return default_cpu_memory_manager();
+  } else {
+    return CPUMemoryManager::Make(Instance(), pool);
+  }
 }
 
 std::shared_ptr<MemoryManager> CPUDevice::default_memory_manager() {
diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc
index fbcd6f139b6d2..e196dd7bf5389 100644
--- a/cpp/src/arrow/ipc/message.cc
+++ b/cpp/src/arrow/ipc/message.cc
@@ -607,6 +607,7 @@ class MessageDecoder::MessageDecoderImpl {
                               MemoryPool* pool, bool skip_body)
       : listener_(std::move(listener)),
         pool_(pool),
+        memory_manager_(CPUDevice::memory_manager(pool_)),
         state_(initial_state),
         next_required_size_(initial_next_required_size),
         chunks_(),
@@ -822,8 +823,7 @@ class MessageDecoder::MessageDecoderImpl {
     if (buffer->is_cpu()) {
       metadata_ = buffer;
     } else {
-      ARROW_ASSIGN_OR_RAISE(metadata_,
-                            Buffer::ViewOrCopy(buffer, CPUDevice::memory_manager(pool_)));
+      ARROW_ASSIGN_OR_RAISE(metadata_, Buffer::ViewOrCopy(buffer, memory_manager_));
     }
     return ConsumeMetadata();
   }
@@ -834,16 +834,15 @@ class MessageDecoder::MessageDecoderImpl {
         if (chunks_[0]->is_cpu()) {
           metadata_ = std::move(chunks_[0]);
         } else {
-          ARROW_ASSIGN_OR_RAISE(
-              metadata_,
-              Buffer::ViewOrCopy(chunks_[0], CPUDevice::memory_manager(pool_)));
+          ARROW_ASSIGN_OR_RAISE(metadata_,
+                                Buffer::ViewOrCopy(chunks_[0], memory_manager_));
         }
         chunks_.erase(chunks_.begin());
       } else {
         metadata_ = SliceBuffer(chunks_[0], 0, next_required_size_);
         if (!chunks_[0]->is_cpu()) {
-          ARROW_ASSIGN_OR_RAISE(
-              metadata_, Buffer::ViewOrCopy(metadata_, CPUDevice::memory_manager(pool_)));
+          ARROW_ASSIGN_OR_RAISE(metadata_,
+                                Buffer::ViewOrCopy(metadata_, memory_manager_));
         }
         chunks_[0] = SliceBuffer(chunks_[0], next_required_size_);
       }
@@ -911,8 +910,7 @@ class MessageDecoder::MessageDecoderImpl {
     if (buffer->is_cpu()) {
       return util::SafeLoadAs<int32_t>(buffer->data());
     } else {
-      ARROW_ASSIGN_OR_RAISE(auto cpu_buffer,
-                            Buffer::ViewOrCopy(buffer, CPUDevice::memory_manager(pool_)));
+      ARROW_ASSIGN_OR_RAISE(auto cpu_buffer, Buffer::ViewOrCopy(buffer, memory_manager_));
       return util::SafeLoadAs<int32_t>(cpu_buffer->data());
     }
   }
@@ -924,8 +922,7 @@ class MessageDecoder::MessageDecoderImpl {
     std::shared_ptr<Buffer> last_chunk;
     for (auto& chunk : chunks_) {
       if (!chunk->is_cpu()) {
-        ARROW_ASSIGN_OR_RAISE(
-            chunk, Buffer::ViewOrCopy(chunk, CPUDevice::memory_manager(pool_)));
+        ARROW_ASSIGN_OR_RAISE(chunk, Buffer::ViewOrCopy(chunk, memory_manager_));
       }
       auto data = chunk->data();
       auto data_size = chunk->size();
@@ -951,6 +948,7 @@ class MessageDecoder::MessageDecoderImpl {
 
   std::shared_ptr<MessageDecoderListener> listener_;
   MemoryPool* pool_;
+  std::shared_ptr<MemoryManager> memory_manager_;
   State state_;
   int64_t next_required_size_;
   std::vector<std::shared_ptr<Buffer>> chunks_;

From 04afe3f017668d7fbd74301717ce467394e216d7 Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Thu, 11 Jan 2024 22:50:56 +0900
Subject: [PATCH 189/570] GH-39427: [GLib] Update script and documentation
 (#39428)

### Rationale for this change

Modify scripts and update documentation for building GLib and running test code in a Mac environment.

### What changes are included in this PR?

* Update Documentation (`c_glib/README.md`)
* Update Script (`c_glib/test/run-test.sh`)

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.

* Closes: #39427

Lead-authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/README.md        | 22 +++++++++++++++++++++-
 c_glib/test/run-test.sh | 23 +++++++++++++++++++----
 2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/c_glib/README.md b/c_glib/README.md
index d571053c3dce8..2a4d6b8a6628c 100644
--- a/c_glib/README.md
+++ b/c_glib/README.md
@@ -142,6 +142,17 @@ $ meson compile -C c_glib.build
 $ sudo meson install -C c_glib.build
 ```
 
+> [!WARNING]
+>
+> When building Arrow GLib, it typically uses the Arrow C++ installed via Homebrew. However, this can lead to build failures
+> if there are mismatches between the changes in Arrow's GLib and C++ libraries. To resolve this, you may need to
+> reference the Arrow C++ library built locally. In such cases, use the `--cmake-prefix-path` option with the `meson setup` 
+> command to explicitly specify the library path.
+>
+> ```console
+> $ meson setup c_glib.build c_glib --cmake-prefix-path=${arrow_cpp_install_prefix} -Dgtk_doc=true
+> ```
+
 Others:
 
 ```console
@@ -231,9 +242,18 @@ Now, you can run unit tests by the followings:
 
 ```console
 $ cd c_glib.build
-$ bundle exec ../c_glib/test/run-test.sh
+$ BUNDLE_GEMFILE=../c_glib/Gemfile bundle exec ../c_glib/test/run-test.sh
 ```
 
+
+> [!NOTE]
+>
+> If debugging is necessary, you can proceed using the `DEBUGGER` option as follows:
+>
+> ```console
+> $ DEBUGGER=lldb BUNDLE_GEMFILE=../c_glib/Gemfile bundle exec ../c_glib/test/run-test.sh
+> ```
+
 ## Common build problems
 
 ### build failed - /usr/bin/ld: cannot find -larrow
diff --git a/c_glib/test/run-test.sh b/c_glib/test/run-test.sh
index 33e9fbf85d026..c7bc6edca5f0d 100755
--- a/c_glib/test/run-test.sh
+++ b/c_glib/test/run-test.sh
@@ -34,14 +34,14 @@ for module in "${modules[@]}"; do
   module_build_dir="${build_dir}/${module}"
   if [ -d "${module_build_dir}" ]; then
     LD_LIBRARY_PATH="${module_build_dir}:${LD_LIBRARY_PATH}"
+    DYLD_LIBRARY_PATH="${module_build_dir}:${DYLD_LIBRARY_PATH}"
   fi
 done
 export LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH
 
 if [ "${BUILD}" != "no" ]; then
-  if [ -f "Makefile" ]; then
-    make -j8 > /dev/null || exit $?
-  elif [ -f "build.ninja" ]; then
+  if [ -f "build.ninja" ]; then
     ninja || exit $?
   fi
 fi
@@ -59,4 +59,19 @@ for module in "${modules[@]}"; do
 done
 export GI_TYPELIB_PATH
 
-${GDB} ruby ${test_dir}/run-test.rb "$@"
+if type rbenv > /dev/null 2>&1; then
+  RUBY="$(rbenv which ruby)"
+else
+  RUBY=ruby
+fi
+DEBUGGER_ARGS=()
+case "${DEBUGGER}" in
+  "gdb")
+    DEBUGGER_ARGS+=(--args)
+    ;;
+  "lldb")
+    DEBUGGER_ARGS+=(--one-line "env DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}")
+    DEBUGGER_ARGS+=(--)
+    ;;
+esac
+${DEBUGGER} "${DEBUGGER_ARGS[@]}" "${RUBY}" ${test_dir}/run-test.rb "$@"

From 5709c0dbb52e2075bd89a4fde69030c3eac385dd Mon Sep 17 00:00:00 2001
From: Jin Shang <shangjin1997@gmail.com>
Date: Thu, 11 Jan 2024 23:38:41 +0800
Subject: [PATCH 190/570] GH-39233: [Compute] Add some duration kernels
 (#39358)

### Rationale for this change

Add kernels for durations.
### What changes are included in this PR?

In this PR I added the ones that require only registration and unit tests. More complicated ones will be in another PR for readability.


### Are these changes tested?
Yes.

### Are there any user-facing changes?
No.

* Closes: #39233

Authored-by: Jin Shang <shangjin1997@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../compute/kernels/scalar_arithmetic.cc      | 35 +++++++++
 .../arrow/compute/kernels/scalar_compare.cc   |  9 +++
 .../compute/kernels/scalar_compare_test.cc    |  7 +-
 .../compute/kernels/scalar_temporal_test.cc   | 12 +++
 .../arrow/compute/kernels/scalar_validity.cc  |  6 +-
 .../compute/kernels/scalar_validity_test.cc   |  7 ++
 docs/source/cpp/compute.rst                   | 78 +++++++++----------
 7 files changed, 113 insertions(+), 41 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index ad33d7f8951f4..44f5fea79078a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -1286,12 +1286,27 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   auto absolute_value =
       MakeUnaryArithmeticFunction<AbsoluteValue>("abs", absolute_value_doc);
   AddDecimalUnaryKernels<AbsoluteValue>(absolute_value.get());
+
+  // abs(duration)
+  for (auto unit : TimeUnit::values()) {
+    auto exec = ArithmeticExecFromOp<ScalarUnary, AbsoluteValue>(duration(unit));
+    DCHECK_OK(
+        absolute_value->AddKernel({duration(unit)}, OutputType(duration(unit)), exec));
+  }
+
   DCHECK_OK(registry->AddFunction(std::move(absolute_value)));
 
   // ----------------------------------------------------------------------
   auto absolute_value_checked = MakeUnaryArithmeticFunctionNotNull<AbsoluteValueChecked>(
       "abs_checked", absolute_value_checked_doc);
   AddDecimalUnaryKernels<AbsoluteValueChecked>(absolute_value_checked.get());
+  // abs_checked(duraton)
+  for (auto unit : TimeUnit::values()) {
+    auto exec =
+        ArithmeticExecFromOp<ScalarUnaryNotNull, AbsoluteValueChecked>(duration(unit));
+    DCHECK_OK(absolute_value_checked->AddKernel({duration(unit)},
+                                                OutputType(duration(unit)), exec));
+  }
   DCHECK_OK(registry->AddFunction(std::move(absolute_value_checked)));
 
   // ----------------------------------------------------------------------
@@ -1545,12 +1560,27 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   // ----------------------------------------------------------------------
   auto negate = MakeUnaryArithmeticFunction<Negate>("negate", negate_doc);
   AddDecimalUnaryKernels<Negate>(negate.get());
+
+  // Add neg(duration) -> duration
+  for (auto unit : TimeUnit::values()) {
+    auto exec = ArithmeticExecFromOp<ScalarUnary, Negate>(duration(unit));
+    DCHECK_OK(negate->AddKernel({duration(unit)}, OutputType(duration(unit)), exec));
+  }
+
   DCHECK_OK(registry->AddFunction(std::move(negate)));
 
   // ----------------------------------------------------------------------
   auto negate_checked = MakeUnarySignedArithmeticFunctionNotNull<NegateChecked>(
       "negate_checked", negate_checked_doc);
   AddDecimalUnaryKernels<NegateChecked>(negate_checked.get());
+
+  // Add neg_checked(duration) -> duration
+  for (auto unit : TimeUnit::values()) {
+    auto exec = ArithmeticExecFromOp<ScalarUnaryNotNull, Negate>(duration(unit));
+    DCHECK_OK(
+        negate_checked->AddKernel({duration(unit)}, OutputType(duration(unit)), exec));
+  }
+
   DCHECK_OK(registry->AddFunction(std::move(negate_checked)));
 
   // ----------------------------------------------------------------------
@@ -1581,6 +1611,11 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
   // ----------------------------------------------------------------------
   auto sign =
       MakeUnaryArithmeticFunctionWithFixedIntOutType<Sign, Int8Type>("sign", sign_doc);
+  // sign(duration)
+  for (auto unit : TimeUnit::values()) {
+    auto exec = ScalarUnary<Int8Type, Int64Type, Sign>::Exec;
+    DCHECK_OK(sign->AddKernel({duration(unit)}, int8(), std::move(exec)));
+  }
   DCHECK_OK(registry->AddFunction(std::move(sign)));
 
   // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index aad648ca275c3..daf8ed76d628d 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -22,6 +22,7 @@
 
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common_internal.h"
+#include "arrow/type.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
 
@@ -806,6 +807,14 @@ std::shared_ptr<ScalarFunction> MakeScalarMinMax(std::string name, FunctionDoc d
     kernel.mem_allocation = MemAllocation::type::PREALLOCATE;
     DCHECK_OK(func->AddKernel(std::move(kernel)));
   }
+  for (const auto& ty : DurationTypes()) {
+    auto exec = GeneratePhysicalNumeric<ScalarMinMax, Op>(ty);
+    ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec,
+                        MinMaxState::Init};
+    kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+    kernel.mem_allocation = MemAllocation::type::PREALLOCATE;
+    DCHECK_OK(func->AddKernel(std::move(kernel)));
+  }
   for (const auto& ty : BaseBinaryTypes()) {
     auto exec =
         GenerateTypeAgnosticVarBinaryBase<BinaryScalarMinMax, ArrayKernelExec, Op>(ty);
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
index 48fa780b03104..8f5952b40500a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
@@ -1281,7 +1281,7 @@ using CompareNumericBasedTypes =
     ::testing::Types<UInt8Type, UInt16Type, UInt32Type, UInt64Type, Int8Type, Int16Type,
                      Int32Type, Int64Type, FloatType, DoubleType, Date32Type, Date64Type>;
 using CompareParametricTemporalTypes =
-    ::testing::Types<TimestampType, Time32Type, Time64Type>;
+    ::testing::Types<TimestampType, Time32Type, Time64Type, DurationType>;
 using CompareFixedSizeBinaryTypes = ::testing::Types<FixedSizeBinaryType>;
 
 TYPED_TEST_SUITE(TestVarArgsCompareNumeric, CompareNumericBasedTypes);
@@ -2121,6 +2121,11 @@ TEST(TestMaxElementWiseMinElementWise, CommonTemporal) {
                   ScalarFromJSON(date64(), "172800000"),
               }),
               ResultWith(ScalarFromJSON(date64(), "86400000")));
+  EXPECT_THAT(MinElementWise({
+                  ScalarFromJSON(duration(TimeUnit::SECOND), "1"),
+                  ScalarFromJSON(duration(TimeUnit::MILLI), "12000"),
+              }),
+              ResultWith(ScalarFromJSON(duration(TimeUnit::MILLI), "1000")));
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
index d4482334285bc..8dac6525fe2e6 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc
@@ -3665,5 +3665,17 @@ TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalDate) {
   CheckScalarUnary("ceil_temporal", arr_ns, arr_ns, &round_to_2_hours);
 }
 
+TEST_F(ScalarTemporalTest, DurationUnaryArithmetics) {
+  auto arr = ArrayFromJSON(duration(TimeUnit::SECOND), "[2, -1, null, 3, 0]");
+  CheckScalarUnary("negate", arr,
+                   ArrayFromJSON(duration(TimeUnit::SECOND), "[-2, 1, null, -3, 0]"));
+  CheckScalarUnary("negate_checked", arr,
+                   ArrayFromJSON(duration(TimeUnit::SECOND), "[-2, 1, null, -3, 0]"));
+  CheckScalarUnary("abs", arr,
+                   ArrayFromJSON(duration(TimeUnit::SECOND), "[2, 1, null, 3, 0]"));
+  CheckScalarUnary("abs_checked", arr,
+                   ArrayFromJSON(duration(TimeUnit::SECOND), "[2, 1, null, 3, 0]"));
+  CheckScalarUnary("sign", arr, ArrayFromJSON(int8(), "[1, -1, null, 1, 0]"));
+}
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc
index 6b1cec0f5ccc6..8505fc4c6e0af 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc
@@ -169,6 +169,7 @@ std::shared_ptr<ScalarFunction> MakeIsFiniteFunction(std::string name, FunctionD
       func->AddKernel({InputType(Type::DECIMAL128)}, boolean(), ConstBoolExec<true>));
   DCHECK_OK(
       func->AddKernel({InputType(Type::DECIMAL256)}, boolean(), ConstBoolExec<true>));
+  DCHECK_OK(func->AddKernel({InputType(Type::DURATION)}, boolean(), ConstBoolExec<true>));
 
   return func;
 }
@@ -187,7 +188,8 @@ std::shared_ptr<ScalarFunction> MakeIsInfFunction(std::string name, FunctionDoc
       func->AddKernel({InputType(Type::DECIMAL128)}, boolean(), ConstBoolExec<false>));
   DCHECK_OK(
       func->AddKernel({InputType(Type::DECIMAL256)}, boolean(), ConstBoolExec<false>));
-
+  DCHECK_OK(
+      func->AddKernel({InputType(Type::DURATION)}, boolean(), ConstBoolExec<false>));
   return func;
 }
 
@@ -205,6 +207,8 @@ std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name, FunctionDoc
       func->AddKernel({InputType(Type::DECIMAL128)}, boolean(), ConstBoolExec<false>));
   DCHECK_OK(
       func->AddKernel({InputType(Type::DECIMAL256)}, boolean(), ConstBoolExec<false>));
+  DCHECK_OK(
+      func->AddKernel({InputType(Type::DURATION)}, boolean(), ConstBoolExec<false>));
 
   return func;
 }
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
index 94d951c838209..d1462838f3be6 100644
--- a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
@@ -103,6 +103,9 @@ TEST(TestValidityKernels, IsFinite) {
   }
   CheckScalar("is_finite", {std::make_shared<NullArray>(4)},
               ArrayFromJSON(boolean(), "[null, null, null, null]"));
+  CheckScalar("is_finite",
+              {ArrayFromJSON(duration(TimeUnit::SECOND), "[0, 1, 42, null]")},
+              ArrayFromJSON(boolean(), "[true, true, true, null]"));
 }
 
 TEST(TestValidityKernels, IsInf) {
@@ -116,6 +119,8 @@ TEST(TestValidityKernels, IsInf) {
   }
   CheckScalar("is_inf", {std::make_shared<NullArray>(4)},
               ArrayFromJSON(boolean(), "[null, null, null, null]"));
+  CheckScalar("is_inf", {ArrayFromJSON(duration(TimeUnit::SECOND), "[0, 1, 42, null]")},
+              ArrayFromJSON(boolean(), "[false, false, false, null]"));
 }
 
 TEST(TestValidityKernels, IsNan) {
@@ -129,6 +134,8 @@ TEST(TestValidityKernels, IsNan) {
   }
   CheckScalar("is_nan", {std::make_shared<NullArray>(4)},
               ArrayFromJSON(boolean(), "[null, null, null, null]"));
+  CheckScalar("is_nan", {ArrayFromJSON(duration(TimeUnit::SECOND), "[0, 1, 42, null]")},
+              ArrayFromJSON(boolean(), "[false, false, false, null]"));
 }
 
 TEST(TestValidityKernels, IsValidIsNullNullType) {
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 17d003b261dca..e7310d2c0c711 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -458,45 +458,45 @@ floating-point arguments will cast all arguments to floating-point, while mixed
 decimal and integer arguments will cast all arguments to decimals.
 Mixed time resolution temporal inputs will be cast to finest input resolution.
 
-+------------------+--------+-------------------------+----------------------+-------+
-| Function name    | Arity  | Input types             | Output type          | Notes |
-+==================+========+=========================+======================+=======+
-| abs              | Unary  | Numeric                 | Numeric              |       |
-+------------------+--------+-------------------------+----------------------+-------+
-| abs_checked      | Unary  | Numeric                 | Numeric              |       |
-+------------------+--------+-------------------------+----------------------+-------+
-| add              | Binary | Numeric/Temporal        | Numeric/Temporal     | \(1)  |
-+------------------+--------+-------------------------+----------------------+-------+
-| add_checked      | Binary | Numeric/Temporal        | Numeric/Temporal     | \(1)  |
-+------------------+--------+-------------------------+----------------------+-------+
-| divide           | Binary | Numeric/Temporal        | Numeric/Temporal     | \(1)  |
-+------------------+--------+-------------------------+----------------------+-------+
-| divide_checked   | Binary | Numeric/Temporal        | Numeric/Temporal     | \(1)  |
-+------------------+--------+-------------------------+----------------------+-------+
-| exp              | Unary  | Numeric                 | Float32/Float64      |       |
-+------------------+--------+-------------------------+----------------------+-------+
-| multiply         | Binary | Numeric/Temporal        | Numeric/Temporal     | \(1)  |
-+------------------+--------+-------------------------+----------------------+-------+
-| multiply_checked | Binary | Numeric/Temporal        | Numeric/Temporal     | \(1)  |
-+------------------+--------+-------------------------+----------------------+-------+
-| negate           | Unary  | Numeric                 | Numeric              |       |
-+------------------+--------+-------------------------+----------------------+-------+
-| negate_checked   | Unary  | Signed Numeric          | Signed Numeric       |       |
-+------------------+--------+-------------------------+----------------------+-------+
-| power            | Binary | Numeric                 | Numeric              |       |
-+------------------+--------+-------------------------+----------------------+-------+
-| power_checked    | Binary | Numeric                 | Numeric              |       |
-+------------------+--------+-------------------------+----------------------+-------+
-| sign             | Unary  | Numeric                 | Int8/Float32/Float64 | \(2)  |
-+------------------+--------+-------------------------+----------------------+-------+
-| sqrt             | Unary  | Numeric                 | Numeric              |       |
-+------------------+--------+-------------------------+----------------------+-------+
-| sqrt_checked     | Unary  | Numeric                 | Numeric              |       |
-+------------------+--------+-------------------------+----------------------+-------+
-| subtract         | Binary | Numeric/Temporal        | Numeric/Temporal     | \(1)  |
-+------------------+--------+-------------------------+----------------------+-------+
-| subtract_checked | Binary | Numeric/Temporal        | Numeric/Temporal     | \(1)  |
-+------------------+--------+-------------------------+----------------------+-------+
++------------------+--------+-------------------------+---------------------------+-------+
+| Function name    | Arity  | Input types             | Output type               | Notes |
++==================+========+=========================+===========================+=======+
+| abs              | Unary  | Numeric/Duration        | Numeric/Duration          |       |
++------------------+--------+-------------------------+---------------------------+-------+
+| abs_checked      | Unary  | Numeric/Duration        | Numeric/Duration          |       |
++------------------+--------+-------------------------+---------------------------+-------+
+| add              | Binary | Numeric/Temporal        | Numeric/Temporal          | \(1)  |
++------------------+--------+-------------------------+---------------------------+-------+
+| add_checked      | Binary | Numeric/Temporal        | Numeric/Temporal          | \(1)  |
++------------------+--------+-------------------------+---------------------------+-------+
+| divide           | Binary | Numeric/Temporal        | Numeric/Temporal          | \(1)  |
++------------------+--------+-------------------------+---------------------------+-------+
+| divide_checked   | Binary | Numeric/Temporal        | Numeric/Temporal          | \(1)  |
++------------------+--------+-------------------------+---------------------------+-------+
+| exp              | Unary  | Numeric                 | Float32/Float64           |       |
++------------------+--------+-------------------------+---------------------------+-------+
+| multiply         | Binary | Numeric/Temporal        | Numeric/Temporal          | \(1)  |
++------------------+--------+-------------------------+---------------------------+-------+
+| multiply_checked | Binary | Numeric/Temporal        | Numeric/Temporal          | \(1)  |
++------------------+--------+-------------------------+---------------------------+-------+
+| negate           | Unary  | Numeric/Duration        | Numeric/Duration          |       |
++------------------+--------+-------------------------+---------------------------+-------+
+| negate_checked   | Unary  | Signed Numeric/Duration | Signed Numeric/Duration   |       |
++------------------+--------+-------------------------+---------------------------+-------+
+| power            | Binary | Numeric                 | Numeric                   |       |
++------------------+--------+-------------------------+---------------------------+-------+
+| power_checked    | Binary | Numeric                 | Numeric                   |       |
++------------------+--------+-------------------------+---------------------------+-------+
+| sign             | Unary  | Numeric/Duration        | Int8/Float32/Float64      | \(2)  |
++------------------+--------+-------------------------+---------------------------+-------+
+| sqrt             | Unary  | Numeric                 | Numeric                   |       |
++------------------+--------+-------------------------+---------------------------+-------+
+| sqrt_checked     | Unary  | Numeric                 | Numeric                   |       |
++------------------+--------+-------------------------+---------------------------+-------+
+| subtract         | Binary | Numeric/Temporal        | Numeric/Temporal          | \(1)  |
++------------------+--------+-------------------------+---------------------------+-------+
+| subtract_checked | Binary | Numeric/Temporal        | Numeric/Temporal          | \(1)  |
++------------------+--------+-------------------------+---------------------------+-------+
 
 * \(1) Precision and scale of computed DECIMAL results
 

From d18107245e8e82a8d7ec40e0ae27f083ffbb7cc4 Mon Sep 17 00:00:00 2001
From: Jin Shang <shangjin1997@gmail.com>
Date: Thu, 11 Jan 2024 23:40:49 +0800
Subject: [PATCH 191/570] GH-38833: [C++] Avoid hash_mean overflow (#39349)

### Rationale for this change

hash_mean overflows if the sum of a group is larger than uint64 max.

### What changes are included in this PR?

Save the intermediate sum as double to avoid overflow

### Are these changes tested?

yes

### Are there any user-facing changes?
 no

* Closes: #38833

Authored-by: Jin Shang <shangjin1997@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/acero/hash_aggregate_test.cc    | 36 +++++++++++++++++++
 .../arrow/compute/kernels/hash_aggregate.cc   | 24 +++++++++----
 2 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc b/cpp/src/arrow/acero/hash_aggregate_test.cc
index a4874f3581040..2626fd50379dd 100644
--- a/cpp/src/arrow/acero/hash_aggregate_test.cc
+++ b/cpp/src/arrow/acero/hash_aggregate_test.cc
@@ -1694,6 +1694,42 @@ TEST_P(GroupBy, SumMeanProductScalar) {
   }
 }
 
+TEST_P(GroupBy, MeanOverflow) {
+  BatchesWithSchema input;
+  // would overflow if intermediate sum is integer
+  input.batches = {
+      ExecBatchFromJSON({int64(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
+
+                        "[[9223372036854775805, 1], [9223372036854775805, 1], "
+                        "[9223372036854775805, 2], [9223372036854775805, 3]]"),
+      ExecBatchFromJSON({int64(), int64()}, {ArgShape::SCALAR, ArgShape::ARRAY},
+                        "[[null, 1], [null, 1], [null, 2], [null, 3]]"),
+      ExecBatchFromJSON({int64(), int64()},
+                        "[[9223372036854775805, 1], [9223372036854775805, 2], "
+                        "[9223372036854775805, 3]]"),
+  };
+  input.schema = schema({field("argument", int64()), field("key", int64())});
+  for (bool use_threads : {true, false}) {
+    SCOPED_TRACE(use_threads ? "parallel/merged" : "serial");
+    ASSERT_OK_AND_ASSIGN(Datum actual,
+                         RunGroupBy(input, {"key"},
+                                    {
+                                        {"hash_mean", nullptr, "argument", "hash_mean"},
+                                    },
+                                    use_threads));
+    Datum expected = ArrayFromJSON(struct_({
+                                       field("key", int64()),
+                                       field("hash_mean", float64()),
+                                   }),
+                                   R"([
+      [1, 9223372036854775805],
+      [2, 9223372036854775805],
+      [3, 9223372036854775805]
+    ])");
+    AssertDatumsApproxEqual(expected, actual, /*verbose=*/true);
+  }
+}
+
 TEST_P(GroupBy, VarianceAndStddev) {
   auto batch = RecordBatchFromJSON(
       schema({field("argument", int32()), field("key", int64())}), R"([
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index c37e45513d040..5052d8dd66694 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -38,6 +38,7 @@
 #include "arrow/compute/row/grouper.h"
 #include "arrow/record_batch.h"
 #include "arrow/stl_allocator.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/bit_run_reader.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/bitmap_writer.h"
@@ -441,9 +442,10 @@ struct GroupedCountImpl : public GroupedAggregator {
 // ----------------------------------------------------------------------
 // Sum/Mean/Product implementation
 
-template <typename Type, typename Impl>
+template <typename Type, typename Impl,
+          typename AccumulateType = typename FindAccumulatorType<Type>::Type>
 struct GroupedReducingAggregator : public GroupedAggregator {
-  using AccType = typename FindAccumulatorType<Type>::Type;
+  using AccType = AccumulateType;
   using CType = typename TypeTraits<AccType>::CType;
   using InputCType = typename TypeTraits<Type>::CType;
 
@@ -483,7 +485,8 @@ struct GroupedReducingAggregator : public GroupedAggregator {
 
   Status Merge(GroupedAggregator&& raw_other,
                const ArrayData& group_id_mapping) override {
-    auto other = checked_cast<GroupedReducingAggregator<Type, Impl>*>(&raw_other);
+    auto other =
+        checked_cast<GroupedReducingAggregator<Type, Impl, AccType>*>(&raw_other);
 
     CType* reduced = reduced_.mutable_data();
     int64_t* counts = counts_.mutable_data();
@@ -733,9 +736,18 @@ using GroupedProductFactory =
 // ----------------------------------------------------------------------
 // Mean implementation
 
+template <typename T>
+struct GroupedMeanAccType {
+  using Type = typename std::conditional<is_number_type<T>::value, DoubleType,
+                                         typename FindAccumulatorType<T>::Type>::type;
+};
+
 template <typename Type>
-struct GroupedMeanImpl : public GroupedReducingAggregator<Type, GroupedMeanImpl<Type>> {
-  using Base = GroupedReducingAggregator<Type, GroupedMeanImpl<Type>>;
+struct GroupedMeanImpl
+    : public GroupedReducingAggregator<Type, GroupedMeanImpl<Type>,
+                                       typename GroupedMeanAccType<Type>::Type> {
+  using Base = GroupedReducingAggregator<Type, GroupedMeanImpl<Type>,
+                                         typename GroupedMeanAccType<Type>::Type>;
   using CType = typename Base::CType;
   using InputCType = typename Base::InputCType;
   using MeanType =
@@ -746,7 +758,7 @@ struct GroupedMeanImpl : public GroupedReducingAggregator<Type, GroupedMeanImpl<
   template <typename T = Type>
   static enable_if_number<T, CType> Reduce(const DataType&, const CType u,
                                            const InputCType v) {
-    return static_cast<CType>(to_unsigned(u) + to_unsigned(static_cast<CType>(v)));
+    return static_cast<CType>(u) + static_cast<CType>(v);
   }
 
   static CType Reduce(const DataType&, const CType u, const CType v) {

From adef53715b5f0c908dc30acfb9ed99722481cd56 Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Thu, 11 Jan 2024 18:12:59 +0100
Subject: [PATCH 192/570] GH-39549: [C++] Pass -jN to make in external projects
 (#39550)

### Rationale for this change

Previous issues with sub-make fragility are no longer an issue with our new minimum CMake version 3.16.
### What changes are included in this PR?

Remove special casing from jemalloc, pass -jN to all make based eps.

### Are these changes tested?
CI
* Closes: #39549

Authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index a2627c190f738..6bb9c0f6af2ca 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1005,14 +1005,8 @@ if("${MAKE}" STREQUAL "")
   endif()
 endif()
 
-# Using make -j in sub-make is fragile
-# see discussion https://github.com/apache/arrow/pull/2779
-if(${CMAKE_GENERATOR} MATCHES "Makefiles")
-  set(MAKE_BUILD_ARGS "")
-else()
-  # limit the maximum number of jobs for ninja
-  set(MAKE_BUILD_ARGS "-j${NPROC}")
-endif()
+# Args for external projects using make.
+set(MAKE_BUILD_ARGS "-j${NPROC}")
 
 include(FetchContent)
 set(FC_DECLARE_COMMON_OPTIONS)
@@ -2042,10 +2036,6 @@ macro(build_jemalloc)
   endif()
 
   set(JEMALLOC_BUILD_COMMAND ${MAKE} ${MAKE_BUILD_ARGS})
-  # Paralleism for Make fails with CMake > 3.28 see #39517
-  if(${CMAKE_GENERATOR} MATCHES "Makefiles")
-    list(APPEND JEMALLOC_BUILD_COMMAND "-j1")
-  endif()
 
   if(CMAKE_OSX_SYSROOT)
     list(APPEND JEMALLOC_BUILD_COMMAND "SDKROOT=${CMAKE_OSX_SYSROOT}")

From c009024ab6d29ff834d5ac2e7b27675ddda0a47a Mon Sep 17 00:00:00 2001
From: emkornfield <micahk@google.com>
Date: Thu, 11 Jan 2024 09:40:51 -0800
Subject: [PATCH 193/570] GH-39525: [C++][Parquet] Pass memory pool to decoders
 (#39526)

### Rationale for this change

Memory pools should be plumbed through where ever possible.

### What changes are included in this PR?

Pass through memory pool to decoders

### Are these changes tested?

Not directly; this was caught via some internal fuzz targets.

### Are there any user-facing changes?

No.

* Closes: #39525

Authored-by: Micah Kornfield <micahk@google.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/parquet/column_reader.cc | 44 +++++++-------------------------
 1 file changed, 9 insertions(+), 35 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index f5d9734aa1e01..ac4627d69c0f6 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -760,7 +760,7 @@ class ColumnReaderImplBase {
 
     if (page->encoding() == Encoding::PLAIN_DICTIONARY ||
         page->encoding() == Encoding::PLAIN) {
-      auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
+      auto dictionary = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_, pool_);
       dictionary->SetData(page->num_values(), page->data(), page->size());
 
       // The dictionary is fully decoded during DictionaryDecoder::Init, so the
@@ -883,47 +883,21 @@ class ColumnReaderImplBase {
       current_decoder_ = it->second.get();
     } else {
       switch (encoding) {
-        case Encoding::PLAIN: {
-          auto decoder = MakeTypedDecoder<DType>(Encoding::PLAIN, descr_);
-          current_decoder_ = decoder.get();
-          decoders_[static_cast<int>(encoding)] = std::move(decoder);
-          break;
-        }
-        case Encoding::BYTE_STREAM_SPLIT: {
-          auto decoder = MakeTypedDecoder<DType>(Encoding::BYTE_STREAM_SPLIT, descr_);
-          current_decoder_ = decoder.get();
-          decoders_[static_cast<int>(encoding)] = std::move(decoder);
-          break;
-        }
-        case Encoding::RLE: {
-          auto decoder = MakeTypedDecoder<DType>(Encoding::RLE, descr_);
+        case Encoding::PLAIN:
+        case Encoding::BYTE_STREAM_SPLIT:
+        case Encoding::RLE:
+        case Encoding::DELTA_BINARY_PACKED:
+        case Encoding::DELTA_BYTE_ARRAY:
+        case Encoding::DELTA_LENGTH_BYTE_ARRAY: {
+          auto decoder = MakeTypedDecoder<DType>(encoding, descr_, pool_);
           current_decoder_ = decoder.get();
           decoders_[static_cast<int>(encoding)] = std::move(decoder);
           break;
         }
+
         case Encoding::RLE_DICTIONARY:
           throw ParquetException("Dictionary page must be before data page.");
 
-        case Encoding::DELTA_BINARY_PACKED: {
-          auto decoder = MakeTypedDecoder<DType>(Encoding::DELTA_BINARY_PACKED, descr_);
-          current_decoder_ = decoder.get();
-          decoders_[static_cast<int>(encoding)] = std::move(decoder);
-          break;
-        }
-        case Encoding::DELTA_BYTE_ARRAY: {
-          auto decoder = MakeTypedDecoder<DType>(Encoding::DELTA_BYTE_ARRAY, descr_);
-          current_decoder_ = decoder.get();
-          decoders_[static_cast<int>(encoding)] = std::move(decoder);
-          break;
-        }
-        case Encoding::DELTA_LENGTH_BYTE_ARRAY: {
-          auto decoder =
-              MakeTypedDecoder<DType>(Encoding::DELTA_LENGTH_BYTE_ARRAY, descr_);
-          current_decoder_ = decoder.get();
-          decoders_[static_cast<int>(encoding)] = std::move(decoder);
-          break;
-        }
-
         default:
           throw ParquetException("Unknown encoding type.");
       }

From 2b4a70320232647f730b19d2fea5746c3baec752 Mon Sep 17 00:00:00 2001
From: Jin Shang <shangjin1997@gmail.com>
Date: Fri, 12 Jan 2024 01:56:46 +0800
Subject: [PATCH 194/570] GH-39231: [C++][Compute] Add binary_slice kernel for
 fixed size binary (#39245)

### Rationale for this change
Add binary_slice kernel for fixed size binary

### What changes are included in this PR?
Add binary_slice kernel for fixed size binary

### Are these changes tested?
Yes

### Are there any user-facing changes?
No

* Closes: #39231

Lead-authored-by: Jin Shang <shangjin1997@gmail.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../compute/kernels/scalar_string_ascii.cc    | 117 ++++++++++----
 .../compute/kernels/scalar_string_internal.h  |   2 +
 .../compute/kernels/scalar_string_test.cc     | 146 ++++++++++++++++--
 python/pyarrow/tests/test_compute.py          |  10 +-
 4 files changed, 233 insertions(+), 42 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
index 6764845dfca81..8fdc6172aa6d3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
@@ -95,7 +95,7 @@ struct FixedSizeBinaryTransformExecBase {
                           ctx->Allocate(output_width * input_nstrings));
     uint8_t* output_str = values_buffer->mutable_data();
 
-    const uint8_t* input_data = input.GetValues<uint8_t>(1);
+    const uint8_t* input_data = input.GetValues<uint8_t>(1, input.offset * input_width);
     for (int64_t i = 0; i < input_nstrings; i++) {
       if (!input.IsNull(i)) {
         const uint8_t* input_string = input_data + i * input_width;
@@ -132,7 +132,8 @@ struct FixedSizeBinaryTransformExecWithState
     DCHECK_EQ(1, types.size());
     const auto& options = State::Get(ctx);
     const int32_t input_width = types[0].type->byte_width();
-    const int32_t output_width = StringTransform::FixedOutputSize(options, input_width);
+    ARROW_ASSIGN_OR_RAISE(const int32_t output_width,
+                          StringTransform::FixedOutputSize(options, input_width));
     return fixed_size_binary(output_width);
   }
 };
@@ -2377,7 +2378,8 @@ struct BinaryReplaceSliceTransform : ReplaceStringSliceTransformBase {
     return output - output_start;
   }
 
-  static int32_t FixedOutputSize(const ReplaceSliceOptions& opts, int32_t input_width) {
+  static Result<int32_t> FixedOutputSize(const ReplaceSliceOptions& opts,
+                                         int32_t input_width) {
     int32_t before_slice = 0;
     int32_t after_slice = 0;
     const int32_t start = static_cast<int32_t>(opts.start);
@@ -2436,6 +2438,7 @@ void AddAsciiStringReplaceSlice(FunctionRegistry* registry) {
 
 namespace {
 struct SliceBytesTransform : StringSliceTransformBase {
+  using StringSliceTransformBase::StringSliceTransformBase;
   int64_t MaxCodeunits(int64_t ninputs, int64_t input_bytes) override {
     const SliceOptions& opt = *this->options;
     if ((opt.start >= 0) != (opt.stop >= 0)) {
@@ -2454,22 +2457,15 @@ struct SliceBytesTransform : StringSliceTransformBase {
     return SliceBackward(input, input_string_bytes, output);
   }
 
-  int64_t SliceForward(const uint8_t* input, int64_t input_string_bytes,
-                       uint8_t* output) {
-    // Slice in forward order (step > 0)
-    const SliceOptions& opt = *this->options;
-    const uint8_t* begin = input;
-    const uint8_t* end = input + input_string_bytes;
-    const uint8_t* begin_sliced;
-    const uint8_t* end_sliced;
-
-    if (!input_string_bytes) {
-      return 0;
-    }
-    // First, compute begin_sliced and end_sliced
+  static std::pair<int64_t, int64_t> SliceForwardRange(const SliceOptions& opt,
+                                                       int64_t input_string_bytes) {
+    int64_t begin = 0;
+    int64_t end = input_string_bytes;
+    int64_t begin_sliced = 0;
+    int64_t end_sliced = 0;
     if (opt.start >= 0) {
       // start counting from the left
-      begin_sliced = std::min(begin + opt.start, end);
+      begin_sliced = std::min(opt.start, end);
       if (opt.stop > opt.start) {
         // continue counting from begin_sliced
         const int64_t length = opt.stop - opt.start;
@@ -2479,7 +2475,7 @@ struct SliceBytesTransform : StringSliceTransformBase {
         end_sliced = std::max(end + opt.stop, begin_sliced);
       } else {
         // zero length slice
-        return 0;
+        return {0, 0};
       }
     } else {
       // start counting from the right
@@ -2491,7 +2487,7 @@ struct SliceBytesTransform : StringSliceTransformBase {
         // and therefore we also need this
         if (end_sliced <= begin_sliced) {
           // zero length slice
-          return 0;
+          return {0, 0};
         }
       } else if ((opt.stop < 0) && (opt.stop > opt.start)) {
         // stop is negative, but larger than start, so we count again from the right
@@ -2501,12 +2497,30 @@ struct SliceBytesTransform : StringSliceTransformBase {
         end_sliced = std::max(end + opt.stop, begin_sliced);
       } else {
         // zero length slice
-        return 0;
+        return {0, 0};
       }
     }
+    return {begin_sliced, end_sliced};
+  }
+
+  int64_t SliceForward(const uint8_t* input, int64_t input_string_bytes,
+                       uint8_t* output) {
+    // Slice in forward order (step > 0)
+    if (!input_string_bytes) {
+      return 0;
+    }
+
+    const SliceOptions& opt = *this->options;
+    auto [begin_index, end_index] = SliceForwardRange(opt, input_string_bytes);
+    const uint8_t* begin_sliced = input + begin_index;
+    const uint8_t* end_sliced = input + end_index;
+
+    if (begin_sliced == end_sliced) {
+      return 0;
+    }
 
     // Second, copy computed slice to output
-    DCHECK(begin_sliced <= end_sliced);
+    DCHECK(begin_sliced < end_sliced);
     if (opt.step == 1) {
       // fast case, where we simply can finish with a memcpy
       std::copy(begin_sliced, end_sliced, output);
@@ -2525,18 +2539,13 @@ struct SliceBytesTransform : StringSliceTransformBase {
     return dest - output;
   }
 
-  int64_t SliceBackward(const uint8_t* input, int64_t input_string_bytes,
-                        uint8_t* output) {
+  static std::pair<int64_t, int64_t> SliceBackwardRange(const SliceOptions& opt,
+                                                        int64_t input_string_bytes) {
     // Slice in reverse order (step < 0)
-    const SliceOptions& opt = *this->options;
-    const uint8_t* begin = input;
-    const uint8_t* end = input + input_string_bytes;
-    const uint8_t* begin_sliced = begin;
-    const uint8_t* end_sliced = end;
-
-    if (!input_string_bytes) {
-      return 0;
-    }
+    int64_t begin = 0;
+    int64_t end = input_string_bytes;
+    int64_t begin_sliced = begin;
+    int64_t end_sliced = end;
 
     if (opt.start >= 0) {
       // +1 because begin_sliced acts as as the end of a reverse iterator
@@ -2555,6 +2564,28 @@ struct SliceBytesTransform : StringSliceTransformBase {
     }
     end_sliced--;
 
+    if (begin_sliced <= end_sliced) {
+      // zero length slice
+      return {0, 0};
+    }
+
+    return {begin_sliced, end_sliced};
+  }
+
+  int64_t SliceBackward(const uint8_t* input, int64_t input_string_bytes,
+                        uint8_t* output) {
+    if (!input_string_bytes) {
+      return 0;
+    }
+
+    const SliceOptions& opt = *this->options;
+    auto [begin_index, end_index] = SliceBackwardRange(opt, input_string_bytes);
+    const uint8_t* begin_sliced = input + begin_index;
+    const uint8_t* end_sliced = input + end_index;
+
+    if (begin_sliced == end_sliced) {
+      return 0;
+    }
     // Copy computed slice to output
     uint8_t* dest = output;
     const uint8_t* i = begin_sliced;
@@ -2568,6 +2599,22 @@ struct SliceBytesTransform : StringSliceTransformBase {
 
     return dest - output;
   }
+
+  static Result<int32_t> FixedOutputSize(SliceOptions options, int32_t input_width_32) {
+    auto step = options.step;
+    if (step == 0) {
+      return Status::Invalid("Slice step cannot be zero");
+    }
+    if (step > 0) {
+      // forward slice
+      auto [begin_index, end_index] = SliceForwardRange(options, input_width_32);
+      return static_cast<int32_t>((end_index - begin_index + step - 1) / step);
+    } else {
+      // backward slice
+      auto [begin_index, end_index] = SliceBackwardRange(options, input_width_32);
+      return static_cast<int32_t>((end_index - begin_index + step + 1) / step);
+    }
+  }
 };
 
 template <typename Type>
@@ -2594,6 +2641,12 @@ void AddAsciiStringSlice(FunctionRegistry* registry) {
     DCHECK_OK(
         func->AddKernel({ty}, ty, std::move(exec), SliceBytesTransform::State::Init));
   }
+  using TransformExec = FixedSizeBinaryTransformExecWithState<SliceBytesTransform>;
+  ScalarKernel fsb_kernel({InputType(Type::FIXED_SIZE_BINARY)},
+                          OutputType(TransformExec::OutputType), TransformExec::Exec,
+                          StringSliceTransformBase::State::Init);
+  fsb_kernel.mem_allocation = MemAllocation::NO_PREALLOCATE;
+  DCHECK_OK(func->AddKernel(std::move(fsb_kernel)));
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_internal.h b/cpp/src/arrow/compute/kernels/scalar_string_internal.h
index 7a5d5a7c86e85..6723d11c8deb8 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_internal.h
+++ b/cpp/src/arrow/compute/kernels/scalar_string_internal.h
@@ -250,6 +250,8 @@ struct StringSliceTransformBase : public StringTransformBase {
   using State = OptionsWrapper<SliceOptions>;
 
   const SliceOptions* options;
+  StringSliceTransformBase() = default;
+  explicit StringSliceTransformBase(const SliceOptions& options) : options{&options} {}
 
   Status PreExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) override {
     options = &State::Get(ctx);
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 5dec16d89e29c..d7e35d07334ea 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -33,10 +33,10 @@
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
+#include "arrow/type_fwd.h"
 #include "arrow/util/value_parsing.h"
 
-namespace arrow {
-namespace compute {
+namespace arrow::compute {
 
 // interesting utf8 characters for testing (lower case / upper case):
 //  * ῦ / Υ͂ (3 to 4 code units) (Note, we don't support this yet, utf8proc does not use
@@ -712,11 +712,140 @@ TEST_F(TestFixedSizeBinaryKernels, BinaryLength) {
              "[6, null, 6]");
 }
 
+TEST_F(TestFixedSizeBinaryKernels, BinarySliceEmpty) {
+  SliceOptions options{2, 4};
+  CheckScalarUnary("binary_slice", ArrayFromJSON(fixed_size_binary(0), R"([""])"),
+                   ArrayFromJSON(fixed_size_binary(0), R"([""])"), &options);
+
+  CheckScalarUnary("binary_slice",
+                   ArrayFromJSON(fixed_size_binary(0), R"(["", null, ""])"),
+                   ArrayFromJSON(fixed_size_binary(0), R"(["", null, ""])"), &options);
+
+  CheckUnary("binary_slice", R"([null, null])", fixed_size_binary(2), R"([null, null])",
+             &options);
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinarySliceBasic) {
+  SliceOptions options{2, 4};
+  CheckUnary("binary_slice", R"(["abcdef", null, "foobaz"])", fixed_size_binary(2),
+             R"(["cd", null, "ob"])", &options);
+
+  SliceOptions options_edgecase_1{-3, 1};
+  CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(0),
+             R"(["", ""])", &options_edgecase_1);
+
+  SliceOptions options_edgecase_2{-10, -3};
+  CheckUnary("binary_slice", R"(["abcdef", "foobaz", null])", fixed_size_binary(3),
+             R"(["abc", "foo", null])", &options_edgecase_2);
+
+  auto input = ArrayFromJSON(this->type(), R"(["foobaz"])");
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      testing::HasSubstr("Function 'binary_slice' cannot be called without options"),
+      CallFunction("binary_slice", {input}));
+
+  SliceOptions options_invalid{2, 4, 0};
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Slice step cannot be zero"),
+      CallFunction("binary_slice", {input}, &options_invalid));
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinarySlicePosPos) {
+  SliceOptions options_step{1, 5, 2};
+  CheckUnary("binary_slice", R"([null, "abcdef", "foobaz"])", fixed_size_binary(2),
+             R"([null, "bd", "ob"])", &options_step);
+
+  SliceOptions options_step_neg{5, 0, -2};
+  CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(3),
+             R"(["fdb", "zbo"])", &options_step_neg);
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinarySlicePosNeg) {
+  SliceOptions options{2, -1};
+  CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(3),
+             R"(["cde", "oba"])", &options);
+
+  SliceOptions options_step{1, -1, 2};
+  CheckUnary("binary_slice", R"(["abcdef", null, "foobaz"])", fixed_size_binary(2),
+             R"(["bd", null, "ob"])", &options_step);
+
+  SliceOptions options_step_neg{5, -4, -2};
+  CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(2),
+             R"(["fd", "zb"])", &options_step_neg);
+
+  options_step_neg.stop = -6;
+  CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(3),
+             R"(["fdb", "zbo"])", &options_step_neg);
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinarySliceNegNeg) {
+  SliceOptions options{-2, -1};
+  CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(1),
+             R"(["e", "a"])", &options);
+
+  SliceOptions options_step{-4, -1, 2};
+  CheckUnary("binary_slice", R"(["abcdef", "foobaz", null, null])", fixed_size_binary(2),
+             R"(["ce", "oa", null, null])", &options_step);
+
+  SliceOptions options_step_neg{-1, -3, -2};
+  CheckUnary("binary_slice", R"([null, "abcdef", null, "foobaz"])", fixed_size_binary(1),
+             R"([null, "f", null, "z"])", &options_step_neg);
+
+  options_step_neg.stop = -4;
+  CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(2),
+             R"(["fd", "zb"])", &options_step_neg);
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinarySliceNegPos) {
+  SliceOptions options{-2, 4};
+  CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(0),
+             R"(["", ""])", &options);
+
+  SliceOptions options_step{-4, 5, 2};
+  CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(2),
+             R"(["ce", "oa"])", &options_step);
+
+  SliceOptions options_step_neg{-1, 1, -2};
+  CheckUnary("binary_slice", R"([null, "abcdef", "foobaz", null])", fixed_size_binary(2),
+             R"([null, "fd", "zb", null])", &options_step_neg);
+
+  options_step_neg.stop = 0;
+  CheckUnary("binary_slice", R"(["abcdef", "foobaz"])", fixed_size_binary(3),
+             R"(["fdb", "zbo"])", &options_step_neg);
+}
+
+TEST_F(TestFixedSizeBinaryKernels, BinarySliceConsistentyWithVarLenBinary) {
+  std::string source_str = "abcdef";
+  for (size_t str_len = 0; str_len < source_str.size(); ++str_len) {
+    auto input_str = source_str.substr(0, str_len);
+    auto fixed_input = ArrayFromJSON(fixed_size_binary(static_cast<int32_t>(str_len)),
+                                     R"([")" + input_str + R"("])");
+    auto varlen_input = ArrayFromJSON(binary(), R"([")" + input_str + R"("])");
+    for (auto start = -6; start <= 6; ++start) {
+      for (auto stop = -6; stop <= 6; ++stop) {
+        for (auto step = -3; step <= 4; ++step) {
+          if (step == 0) {
+            continue;
+          }
+          SliceOptions options{start, stop, step};
+          auto expected =
+              CallFunction("binary_slice", {varlen_input}, &options).ValueOrDie();
+          auto actual =
+              CallFunction("binary_slice", {fixed_input}, &options).ValueOrDie();
+          actual = Cast(actual, binary()).ValueOrDie();
+          ASSERT_OK(actual.make_array()->ValidateFull());
+          AssertDatumsEqual(expected, actual);
+        }
+      }
+    }
+  }
+}
+
 TEST_F(TestFixedSizeBinaryKernels, BinaryReplaceSlice) {
   ReplaceSliceOptions options{0, 1, "XX"};
   CheckUnary("binary_replace_slice", "[]", fixed_size_binary(7), "[]", &options);
-  CheckUnary("binary_replace_slice", R"([null, "abcdef"])", fixed_size_binary(7),
-             R"([null, "XXbcdef"])", &options);
+  CheckUnary("binary_replace_slice", R"(["foobaz", null, "abcdef"])",
+             fixed_size_binary(7), R"(["XXoobaz", null, "XXbcdef"])", &options);
 
   ReplaceSliceOptions options_shrink{0, 2, ""};
   CheckUnary("binary_replace_slice", R"([null, "abcdef"])", fixed_size_binary(4),
@@ -731,8 +860,8 @@ TEST_F(TestFixedSizeBinaryKernels, BinaryReplaceSlice) {
              R"([null, "abXXef"])", &options_middle);
 
   ReplaceSliceOptions options_neg_start{-3, -2, "XX"};
-  CheckUnary("binary_replace_slice", R"([null, "abcdef"])", fixed_size_binary(7),
-             R"([null, "abcXXef"])", &options_neg_start);
+  CheckUnary("binary_replace_slice", R"(["foobaz", null, "abcdef"])",
+             fixed_size_binary(7), R"(["fooXXaz", null, "abcXXef"])", &options_neg_start);
 
   ReplaceSliceOptions options_neg_end{2, -2, "XX"};
   CheckUnary("binary_replace_slice", R"([null, "abcdef"])", fixed_size_binary(6),
@@ -807,7 +936,7 @@ TEST_F(TestFixedSizeBinaryKernels, CountSubstringIgnoreCase) {
       offset_type(), "[0, null, 0, 1, 1, 1, 2, 2, 1]", &options);
 
   MatchSubstringOptions options_empty{"", /*ignore_case=*/true};
-  CheckUnary("count_substring", R"(["      ", null, "abcABc"])", offset_type(),
+  CheckUnary("count_substring", R"(["      ", null, "abcdef"])", offset_type(),
              "[7, null, 7]", &options_empty);
 }
 
@@ -2382,5 +2511,4 @@ TEST(TestStringKernels, UnicodeLibraryAssumptions) {
 }
 #endif
 
-}  // namespace compute
-}  // namespace arrow
+}  // namespace arrow::compute
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 7c5a134d330ac..d1eb605c71881 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -561,7 +561,8 @@ def test_slice_compatibility():
 
 
 def test_binary_slice_compatibility():
-    arr = pa.array([b"", b"a", b"a\xff", b"ab\x00", b"abc\xfb", b"ab\xf2de"])
+    data = [b"", b"a", b"a\xff", b"ab\x00", b"abc\xfb", b"ab\xf2de"]
+    arr = pa.array(data)
     for start, stop, step in itertools.product(range(-6, 6),
                                                range(-6, 6),
                                                range(-3, 4)):
@@ -574,6 +575,13 @@ def test_binary_slice_compatibility():
         assert expected.equals(result)
         # Positional options
         assert pc.binary_slice(arr, start, stop, step) == result
+        # Fixed size binary input / output
+        for item in data:
+            fsb_scalar = pa.scalar(item, type=pa.binary(len(item)))
+            expected = item[start:stop:step]
+            actual = pc.binary_slice(fsb_scalar, start, stop, step)
+            assert actual.type == pa.binary(len(expected))
+            assert actual.as_py() == expected
 
 
 def test_split_pattern():

From 6fe7480125b7fdb3a000a27fcc9cf464697b8a60 Mon Sep 17 00:00:00 2001
From: Will Jones <willjones127@gmail.com>
Date: Thu, 11 Jan 2024 10:00:49 -0800
Subject: [PATCH 195/570] GH-37164: [Python] Attach Python stacktrace to errors
 in `ConvertPyError` (#39380)

### Rationale for this change

Users might define Python generators that are used in RecordBatchReaders and then exported through the C Data Interface. However, if an error occurs in their generator, the stacktrace and message are currently swallowed in the current `ConvertPyError` implementation, which only provides the type of error. This makes debugging code that passed RBRs difficult.

### What changes are included in this PR?

Changes `ConvertPyError` to provide the fully formatted traceback in the error message.

### Are these changes tested?

Yes, added one test to validate the errors messages are propagated.

### Are there any user-facing changes?

This is a minor change in the error reporting behavior, which will provide more information.
* Closes: #37164

Authored-by: Will Jones <willjones127@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/src/arrow/python/common.cc     | 49 +++++++++++++++++--
 .../pyarrow/src/arrow/python/python_test.cc   | 17 +++++--
 python/pyarrow/tests/test_cffi.py             | 34 +++++++++++++
 3 files changed, 92 insertions(+), 8 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/common.cc b/python/pyarrow/src/arrow/python/common.cc
index 6fe2ed4dae321..2f44a9122f024 100644
--- a/python/pyarrow/src/arrow/python/common.cc
+++ b/python/pyarrow/src/arrow/python/common.cc
@@ -19,6 +19,7 @@
 
 #include <cstdlib>
 #include <mutex>
+#include <sstream>
 #include <string>
 
 #include "arrow/memory_pool.h"
@@ -90,9 +91,15 @@ class PythonErrorDetail : public StatusDetail {
 
   std::string ToString() const override {
     // This is simple enough not to need the GIL
-    const auto ty = reinterpret_cast<const PyTypeObject*>(exc_type_.obj());
-    // XXX Should we also print traceback?
-    return std::string("Python exception: ") + ty->tp_name;
+    Result<std::string> result = FormatImpl();
+
+    if (result.ok()) {
+      return result.ValueOrDie();
+    } else {
+      // Fallback to just the exception type
+      const auto ty = reinterpret_cast<const PyTypeObject*>(exc_type_.obj());
+      return std::string("Python exception: ") + ty->tp_name;
+    }
   }
 
   void RestorePyError() const {
@@ -131,6 +138,42 @@ class PythonErrorDetail : public StatusDetail {
   }
 
  protected:
+  Result<std::string> FormatImpl() const {
+    PyAcquireGIL lock;
+
+    // Use traceback.format_exception()
+    OwnedRef traceback_module;
+    RETURN_NOT_OK(internal::ImportModule("traceback", &traceback_module));
+
+    OwnedRef fmt_exception;
+    RETURN_NOT_OK(internal::ImportFromModule(traceback_module.obj(), "format_exception",
+                                             &fmt_exception));
+
+    OwnedRef formatted;
+    formatted.reset(PyObject_CallFunctionObjArgs(fmt_exception.obj(), exc_type_.obj(),
+                                                 exc_value_.obj(), exc_traceback_.obj(),
+                                                 NULL));
+    RETURN_IF_PYERROR();
+
+    std::stringstream ss;
+    ss << "Python exception: ";
+    Py_ssize_t num_lines = PySequence_Length(formatted.obj());
+    RETURN_IF_PYERROR();
+
+    for (Py_ssize_t i = 0; i < num_lines; ++i) {
+      Py_ssize_t line_size;
+
+      PyObject* line = PySequence_GetItem(formatted.obj(), i);
+      RETURN_IF_PYERROR();
+
+      const char* data = PyUnicode_AsUTF8AndSize(line, &line_size);
+      RETURN_IF_PYERROR();
+
+      ss << std::string_view(data, line_size);
+    }
+    return ss.str();
+  }
+
   PythonErrorDetail() = default;
 
   OwnedRefNoGIL exc_type_, exc_value_, exc_traceback_;
diff --git a/python/pyarrow/src/arrow/python/python_test.cc b/python/pyarrow/src/arrow/python/python_test.cc
index 01ab8a3038099..746bf410911f9 100644
--- a/python/pyarrow/src/arrow/python/python_test.cc
+++ b/python/pyarrow/src/arrow/python/python_test.cc
@@ -174,10 +174,14 @@ Status TestOwnedRefNoGILMoves() {
   }
 }
 
-std::string FormatPythonException(const std::string& exc_class_name) {
+std::string FormatPythonException(const std::string& exc_class_name,
+                                  const std::string& exc_value) {
   std::stringstream ss;
   ss << "Python exception: ";
   ss << exc_class_name;
+  ss << ": ";
+  ss << exc_value;
+  ss << "\n";
   return ss.str();
 }
 
@@ -205,7 +209,8 @@ Status TestCheckPyErrorStatus() {
   }
 
   PyErr_SetString(PyExc_TypeError, "some error");
-  ASSERT_OK(check_error(st, "some error", FormatPythonException("TypeError")));
+  ASSERT_OK(
+      check_error(st, "some error", FormatPythonException("TypeError", "some error")));
   ASSERT_TRUE(st.IsTypeError());
 
   PyErr_SetString(PyExc_ValueError, "some error");
@@ -223,7 +228,8 @@ Status TestCheckPyErrorStatus() {
   }
 
   PyErr_SetString(PyExc_NotImplementedError, "some error");
-  ASSERT_OK(check_error(st, "some error", FormatPythonException("NotImplementedError")));
+  ASSERT_OK(check_error(st, "some error",
+                        FormatPythonException("NotImplementedError", "some error")));
   ASSERT_TRUE(st.IsNotImplemented());
 
   // No override if a specific status code is given
@@ -246,7 +252,8 @@ Status TestCheckPyErrorStatusNoGIL() {
     lock.release();
     ASSERT_TRUE(st.IsUnknownError());
     ASSERT_EQ(st.message(), "zzzt");
-    ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError"));
+    ASSERT_EQ(st.detail()->ToString(),
+              FormatPythonException("ZeroDivisionError", "zzzt"));
     return Status::OK();
   }
 }
@@ -257,7 +264,7 @@ Status TestRestorePyErrorBasics() {
   ASSERT_FALSE(PyErr_Occurred());
   ASSERT_TRUE(st.IsUnknownError());
   ASSERT_EQ(st.message(), "zzzt");
-  ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError"));
+  ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError", "zzzt"));
 
   RestorePyError(st);
   ASSERT_TRUE(PyErr_Occurred());
diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
index a9c17cc100cb4..ff81b06440f03 100644
--- a/python/pyarrow/tests/test_cffi.py
+++ b/python/pyarrow/tests/test_cffi.py
@@ -414,6 +414,40 @@ def test_export_import_batch_reader(reader_factory):
         pa.RecordBatchReader._import_from_c(ptr_stream)
 
 
+@needs_cffi
+def test_export_import_exception_reader():
+    # See: https://github.com/apache/arrow/issues/37164
+    c_stream = ffi.new("struct ArrowArrayStream*")
+    ptr_stream = int(ffi.cast("uintptr_t", c_stream))
+
+    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
+    old_allocated = pa.total_allocated_bytes()
+
+    def gen():
+        if True:
+            try:
+                raise ValueError('foo')
+            except ValueError as e:
+                raise NotImplementedError('bar') from e
+        else:
+            yield from make_batches()
+
+    original = pa.RecordBatchReader.from_batches(make_schema(), gen())
+    original._export_to_c(ptr_stream)
+
+    reader = pa.RecordBatchReader._import_from_c(ptr_stream)
+    with pytest.raises(OSError) as exc_info:
+        reader.read_next_batch()
+
+    # inner *and* outer exception should be present
+    assert 'ValueError: foo' in str(exc_info.value)
+    assert 'NotImplementedError: bar' in str(exc_info.value)
+    # Stacktrace containing line of the raise statement
+    assert 'raise ValueError(\'foo\')' in str(exc_info.value)
+
+    assert pa.total_allocated_bytes() == old_allocated
+
+
 @needs_cffi
 def test_imported_batch_reader_error():
     c_stream = ffi.new("struct ArrowArrayStream*")

From 8149c390276c2f4d4e0031cd162b4498825f9062 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 11 Jan 2024 19:40:45 +0100
Subject: [PATCH 196/570] GH-39560: [C++][Parquet] Add integration test for
 BYTE_STREAM_SPLIT (#39570)

### Rationale for this change

In https://github.com/apache/parquet-testing/pull/45 , an integration file for BYTE_STREAM_SPLIT was added to the parquet-testing repo.

### What changes are included in this PR?

Add a test reading that file and ensuring the decoded values are as expected.

### Are these changes tested?

By definition.

### Are there any user-facing changes?

No.
* Closes: #39560

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/reader_test.cc | 53 ++++++++++++++++++++++++++++++++--
 cpp/submodules/parquet-testing |  2 +-
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc
index 2c2b62f5d12f6..551f62798e3b5 100644
--- a/cpp/src/parquet/reader_test.cc
+++ b/cpp/src/parquet/reader_test.cc
@@ -120,11 +120,27 @@ std::string concatenated_gzip_members() {
   return data_file("concatenated_gzip_members.parquet");
 }
 
+std::string byte_stream_split() { return data_file("byte_stream_split.zstd.parquet"); }
+
+template <typename DType, typename ValueType = typename DType::c_type>
+std::vector<ValueType> ReadColumnValues(ParquetFileReader* file_reader, int row_group,
+                                        int column, int64_t expected_values_read) {
+  auto column_reader = checked_pointer_cast<TypedColumnReader<DType>>(
+      file_reader->RowGroup(row_group)->Column(column));
+  std::vector<ValueType> values(expected_values_read);
+  int64_t values_read;
+  auto levels_read = column_reader->ReadBatch(expected_values_read, nullptr, nullptr,
+                                              values.data(), &values_read);
+  EXPECT_EQ(expected_values_read, levels_read);
+  EXPECT_EQ(expected_values_read, values_read);
+  return values;
+}
+
 // TODO: Assert on definition and repetition levels
-template <typename DType, typename ValueType>
+template <typename DType, typename ValueType = typename DType::c_type>
 void AssertColumnValues(std::shared_ptr<TypedColumnReader<DType>> col, int64_t batch_size,
                         int64_t expected_levels_read,
-                        std::vector<ValueType>& expected_values,
+                        const std::vector<ValueType>& expected_values,
                         int64_t expected_values_read) {
   std::vector<ValueType> values(batch_size);
   int64_t values_read;
@@ -1412,7 +1428,6 @@ TEST_P(TestCodec, LargeFileValues) {
 
   // column 0 ("a")
   auto col = checked_pointer_cast<ByteArrayReader>(group->Column(0));
-
   std::vector<ByteArray> values(kNumRows);
   int64_t values_read;
   auto levels_read =
@@ -1474,6 +1489,38 @@ TEST(TestFileReader, TestOverflowInt16PageOrdinal) {
   }
 }
 
+#ifdef ARROW_WITH_ZSTD
+TEST(TestByteStreamSplit, FloatIntegrationFile) {
+  auto file_path = byte_stream_split();
+  auto file = ParquetFileReader::OpenFile(file_path);
+
+  const int64_t kNumRows = 300;
+
+  ASSERT_EQ(kNumRows, file->metadata()->num_rows());
+  ASSERT_EQ(2, file->metadata()->num_columns());
+  ASSERT_EQ(1, file->metadata()->num_row_groups());
+
+  // column 0 ("f32")
+  {
+    auto values =
+        ReadColumnValues<FloatType>(file.get(), /*row_group=*/0, /*column=*/0, kNumRows);
+    ASSERT_EQ(values[0], 1.7640524f);
+    ASSERT_EQ(values[1], 0.4001572f);
+    ASSERT_EQ(values[kNumRows - 2], -0.39944902f);
+    ASSERT_EQ(values[kNumRows - 1], 0.37005588f);
+  }
+  // column 1 ("f64")
+  {
+    auto values =
+        ReadColumnValues<DoubleType>(file.get(), /*row_group=*/0, /*column=*/1, kNumRows);
+    ASSERT_EQ(values[0], -1.3065268517353166);
+    ASSERT_EQ(values[1], 1.658130679618188);
+    ASSERT_EQ(values[kNumRows - 2], -0.9301565025243212);
+    ASSERT_EQ(values[kNumRows - 1], -0.17858909208732915);
+  }
+}
+#endif  // ARROW_WITH_ZSTD
+
 struct PageIndexReaderParam {
   std::vector<int32_t> row_group_indices;
   std::vector<int32_t> column_indices;
diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing
index d69d979223e88..4cb3cff24c965 160000
--- a/cpp/submodules/parquet-testing
+++ b/cpp/submodules/parquet-testing
@@ -1 +1 @@
-Subproject commit d69d979223e883faef9dc6fe3cf573087243c28a
+Subproject commit 4cb3cff24c965fb329cdae763eabce47395a68a0

From 1a622ec18c154157341ae2562dda3b0df26550f8 Mon Sep 17 00:00:00 2001
From: James Duong <james.duong@improving.com>
Date: Thu, 11 Jan 2024 10:50:02 -0800
Subject: [PATCH 197/570] GH-38997: [Java] Modularize format and vector
 (#38995)

### Rationale for this change
This PR depends on #39011 .

Make arrow-vector and arrow-format JPMS modules for improved
compatibility with newer JDKs and tools that require modules.

### What changes are included in this PR?
* add module-info.java files for vector and format.

### Are these changes tested?
Yes, these modules are run as JPMS modules in JDK9+ unit tests.

### Are there any user-facing changes?
Yes, some test classes have moved and users can now run these as JPMS
modules.

* Closes: #38997
---
 .../apache/arrow/dataset/TestAllTypes.java    |  2 +-
 .../apache/arrow/flight/FlightTestUtil.java   |  2 +-
 java/format/src/main/java/module-info.java    | 21 ++++++++
 java/vector/pom.xml                           |  4 ++
 .../src/main/codegen/includes/vv_imports.ftl  |  3 --
 java/vector/src/main/java/module-info.java    | 50 +++++++++++++++++++
 .../{ => vector}/util/ArrowTestDataUtil.java  |  2 +-
 .../{ => vector}/util/TestSchemaUtil.java     |  2 +-
 8 files changed, 79 insertions(+), 7 deletions(-)
 create mode 100644 java/format/src/main/java/module-info.java
 create mode 100644 java/vector/src/main/java/module-info.java
 rename java/vector/src/test/java/org/apache/arrow/{ => vector}/util/ArrowTestDataUtil.java (97%)
 rename java/vector/src/test/java/org/apache/arrow/{ => vector}/util/TestSchemaUtil.java (98%)

diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
index 7be49079e7450..5293aca0c329b 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
@@ -32,7 +32,6 @@
 import org.apache.arrow.dataset.file.DatasetFileWriter;
 import org.apache.arrow.dataset.file.FileFormat;
 import org.apache.arrow.memory.BufferAllocator;
-import org.apache.arrow.util.ArrowTestDataUtil;
 import org.apache.arrow.vector.BigIntVector;
 import org.apache.arrow.vector.BitVector;
 import org.apache.arrow.vector.DateMilliVector;
@@ -76,6 +75,7 @@
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.ArrowTestDataUtil;
 import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
 import org.apache.arrow.vector.util.Text;
 import org.junit.ClassRule;
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
index 64f70856a3b05..f9def74b56d1b 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
@@ -25,7 +25,7 @@
 import java.util.List;
 import java.util.Random;
 
-import org.apache.arrow.util.ArrowTestDataUtil;
+import org.apache.arrow.vector.util.ArrowTestDataUtil;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.function.Executable;
 
diff --git a/java/format/src/main/java/module-info.java b/java/format/src/main/java/module-info.java
new file mode 100644
index 0000000000000..bda779c91afbc
--- /dev/null
+++ b/java/format/src/main/java/module-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module org.apache.arrow.format {
+  exports org.apache.arrow.flatbuf;
+  requires transitive flatbuffers.java;
+}
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index 17d8f312a52a5..a4292449c9cb2 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -30,6 +30,10 @@
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-memory-core</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.immutables</groupId>
+      <artifactId>value</artifactId>
+    </dependency>
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-core</artifactId>
diff --git a/java/vector/src/main/codegen/includes/vv_imports.ftl b/java/vector/src/main/codegen/includes/vv_imports.ftl
index c9a8820b258b1..f4c72a1a6cbae 100644
--- a/java/vector/src/main/codegen/includes/vv_imports.ftl
+++ b/java/vector/src/main/codegen/includes/vv_imports.ftl
@@ -48,9 +48,6 @@ import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.nio.ByteBuffer;
 
-import java.sql.Date;
-import java.sql.Time;
-import java.sql.Timestamp;
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.time.Duration;
diff --git a/java/vector/src/main/java/module-info.java b/java/vector/src/main/java/module-info.java
new file mode 100644
index 0000000000000..20f7094715f4d
--- /dev/null
+++ b/java/vector/src/main/java/module-info.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module org.apache.arrow.vector {
+  exports org.apache.arrow.vector;
+  exports org.apache.arrow.vector.compare;
+  exports org.apache.arrow.vector.compare.util;
+  exports org.apache.arrow.vector.complex;
+  exports org.apache.arrow.vector.complex.impl;
+  exports org.apache.arrow.vector.complex.reader;
+  exports org.apache.arrow.vector.complex.writer;
+  exports org.apache.arrow.vector.compression;
+  exports org.apache.arrow.vector.dictionary;
+  exports org.apache.arrow.vector.holders;
+  exports org.apache.arrow.vector.ipc;
+  exports org.apache.arrow.vector.ipc.message;
+  exports org.apache.arrow.vector.table;
+  exports org.apache.arrow.vector.types;
+  exports org.apache.arrow.vector.types.pojo;
+  exports org.apache.arrow.vector.util;
+  exports org.apache.arrow.vector.validate;
+
+  opens org.apache.arrow.vector.types.pojo to com.fasterxml.jackson.databind;
+
+  requires com.fasterxml.jackson.annotation;
+  requires com.fasterxml.jackson.core;
+  requires com.fasterxml.jackson.databind;
+  requires com.fasterxml.jackson.datatype.jsr310;
+  requires flatbuffers.java;
+  requires jdk.unsupported;
+  requires org.apache.arrow.format;
+  requires org.apache.arrow.memory.core;
+  requires org.apache.commons.codec;
+  requires org.eclipse.collections.impl;
+  requires org.slf4j;
+}
diff --git a/java/vector/src/test/java/org/apache/arrow/util/ArrowTestDataUtil.java b/java/vector/src/test/java/org/apache/arrow/vector/util/ArrowTestDataUtil.java
similarity index 97%
rename from java/vector/src/test/java/org/apache/arrow/util/ArrowTestDataUtil.java
rename to java/vector/src/test/java/org/apache/arrow/vector/util/ArrowTestDataUtil.java
index 120c0adc884ed..1c525c0c271ac 100644
--- a/java/vector/src/test/java/org/apache/arrow/util/ArrowTestDataUtil.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/util/ArrowTestDataUtil.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.util;
+package org.apache.arrow.vector.util;
 
 import java.nio.file.Path;
 import java.nio.file.Paths;
diff --git a/java/vector/src/test/java/org/apache/arrow/util/TestSchemaUtil.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java
similarity index 98%
rename from java/vector/src/test/java/org/apache/arrow/util/TestSchemaUtil.java
rename to java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java
index cefff83823289..52b6584086832 100644
--- a/java/vector/src/test/java/org/apache/arrow/util/TestSchemaUtil.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.util;
+package org.apache.arrow.vector.util;
 
 import static java.util.Arrays.asList;
 import static org.junit.Assert.assertEquals;

From c78a1aeb2e328cfee713f615a5e52784866725a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Fri, 12 Jan 2024 02:12:02 +0100
Subject: [PATCH 198/570] GH-38470: [CI][Integration] Install jpype and build
 JNI c-data to run integration tests (#39502)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Integration verification tasks are currently failing on CI.

### What changes are included in this PR?

Install jpype and build JNI c-data to run integration tests

### Are these changes tested?

Yes via archery

### Are there any user-facing changes?
No

* Closes: #38470

Lead-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../archery/integration/tester_java.py        | 14 +++++-
 dev/archery/setup.py                          |  1 +
 dev/release/verify-release-candidate.sh       | 47 +++++++++++++++++++
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/dev/archery/archery/integration/tester_java.py b/dev/archery/archery/integration/tester_java.py
index 032ac13e74ec2..857fe0c50af06 100644
--- a/dev/archery/archery/integration/tester_java.py
+++ b/dev/archery/archery/integration/tester_java.py
@@ -40,7 +40,6 @@ def load_version_from_pom():
 _JAVA_OPTS = [
     "-Dio.netty.tryReflectionSetAccessible=true",
     "-Darrow.struct.conflict.policy=CONFLICT_APPEND",
-    "--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED",
     # GH-39113: avoid failures accessing files in `/tmp/hsperfdata_...`
     "-XX:-UsePerfData",
 ]
@@ -83,13 +82,24 @@ def setup_jpype():
     import jpype
     jar_path = f"{_ARROW_TOOLS_JAR}:{_ARROW_C_DATA_JAR}"
     # XXX Didn't manage to tone down the logging level here (DEBUG -> INFO)
+    java_opts = _JAVA_OPTS[:]
+    proc = subprocess.run(
+        ['java', '--add-opens'],
+        stderr=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+        text=True)
+    if 'Unrecognized option: --add-opens' not in proc.stderr:
+        # Java 9+
+        java_opts.append(
+            '--add-opens=java.base/java.nio='
+            'org.apache.arrow.memory.core,ALL-UNNAMED')
     jpype.startJVM(jpype.getDefaultJVMPath(),
                    "-Djava.class.path=" + jar_path,
                    # This flag is too heavy for IPC and Flight tests
                    "-Darrow.memory.debug.allocator=true",
                    # Reduce internal use of signals by the JVM
                    "-Xrs",
-                   *_JAVA_OPTS)
+                   *java_opts)
 
 
 class _CDataBase:
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index e2c89ae204bd6..2ecc72e04e8aa 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -35,6 +35,7 @@
                         'setuptools_scm'],
     'docker': ['ruamel.yaml', 'python-dotenv'],
     'integration': ['cffi'],
+    'integration-java': ['jpype1'],
     'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8==6.1.0', 'cython-lint',
              'cmake_format==0.6.13'],
     'numpydoc': ['numpydoc==1.1.0'],
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index ebdb493f8006e..f12a5dc8b8964 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -568,10 +568,55 @@ test_package_java() {
   maybe_setup_conda maven openjdk
 
   pushd java
+
+  if [ ${TEST_INTEGRATION_JAVA} -gt 0 ]; then
+    # Build JNI for C data interface
+    local -a cmake_options=()
+    # Enable only C data interface.
+    cmake_options+=(-DARROW_JAVA_JNI_ENABLE_C=ON)
+    cmake_options+=(-DARROW_JAVA_JNI_ENABLE_DEFAULT=OFF)
+    # Disable Testing because GTest might not be present.
+    cmake_options+=(-DBUILD_TESTING=OFF)
+    if [ ! -z "${CMAKE_GENERATOR}" ]; then
+      cmake_options+=(-G "${CMAKE_GENERATOR}")
+    fi
+    local build_dir="${ARROW_TMPDIR}/java-jni-build"
+    local install_dir="${ARROW_TMPDIR}/java-jni-install"
+    local dist_dir="${ARROW_TMPDIR}/java-jni-dist"
+    cmake \
+      -S . \
+      -B "${build_dir}" \
+      -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-release} \
+      -DCMAKE_INSTALL_LIBDIR=lib \
+      -DCMAKE_INSTALL_PREFIX="${install_dir}" \
+      -DCMAKE_PREFIX_PATH="${ARROW_HOME}" \
+      "${cmake_options[@]}"
+    cmake --build "${build_dir}"
+    cmake --install "${build_dir}"
+
+    local normalized_arch=$(arch)
+    case ${normalized_arch} in
+      aarch64|arm64)
+        normalized_arch=aarch_64
+        ;;
+      i386)
+        normalized_arch=x86_64
+        ;;
+    esac
+    mkdir -p ${dist_dir}/${normalized_arch}/
+    mv ${install_dir}/lib/* ${dist_dir}/${normalized_arch}/
+    mvn install \
+        -Darrow.c.jni.dist.dir=${dist_dir} \
+        -Parrow-c-data
+  fi
+
   if [ ${TEST_JAVA} -gt 0 ]; then
     mvn test
   fi
+
+  # Build jars
   mvn package
+
   popd
 }
 
@@ -632,6 +677,7 @@ test_and_install_cpp() {
     -DARROW_JSON=ON \
     -DARROW_ORC=ON \
     -DARROW_PARQUET=ON \
+    -DARROW_SUBSTRAIT=ON \
     -DARROW_S3=${ARROW_S3} \
     -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \
     -DARROW_VERBOSE_THIRDPARTY_BUILD=ON \
@@ -904,6 +950,7 @@ test_integration() {
   maybe_setup_virtualenv
 
   pip install -e dev/archery[integration]
+  pip install -e dev/archery[integration-java]
 
   JAVA_DIR=$ARROW_SOURCE_DIR/java
   CPP_BUILD_DIR=$ARROW_TMPDIR/cpp-build

From 3cc04f1e8389deea18b88eedc5b4e3458467d9c6 Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Fri, 12 Jan 2024 05:08:40 +0100
Subject: [PATCH 199/570] GH-39523: [R] Don't override explicitly set
 NOT_CRAN=false when on dev version (#39524)

### Rationale for this change

The default linux build used in the lto job should not build with aws/gcs. A change in the build system changed this.

### What changes are included in this PR?

Revert to old behavior by not overriding explicitly set `NOT_CRAN=false`.

### Are these changes tested?

CI

### Are there any user-facing changes?

No
* Closes: #39523

Lead-authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/tools/nixlibs.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index fe8de284b16b0..9027aa227a074 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -860,7 +860,8 @@ if (is_release) {
   VERSION <- VERSION[1, 1:3]
   arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/")
 } else {
-  not_cran <- TRUE
+  # Don't override explictily set NOT_CRAN env var, as it is used in CI.
+  not_cran <- !env_is("NOT_CRAN", "false")
   arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/")
 }
 

From 22b42b0e7718c91cc80390090f64fda312ff93a0 Mon Sep 17 00:00:00 2001
From: Alexander Blazhkov <68013846+thermo911@users.noreply.github.com>
Date: Fri, 12 Jan 2024 22:56:04 +0300
Subject: [PATCH 200/570] GH-39558: [Java] Add SQL_ALL_TABLES_ARE_SELECTABLE,
 SQL_NULL_ORDERING and SQL_MAX_COLUMNS_IN_TABLE support to SqlInfoBuilder
 (#39561)

This PR adds ability to specify `SQL_ALL_TABLES_ARE_SELECTABLE` and `SQL_NULL_ORDERING` metadata in `org.apache.arrow.flight.sql.SqlInfoBuilder`.

### Rationale for this change

Without this change it is impossible to specify whether all tables are selectable, supported null ordering and maximum number of columns in table using `SqlInfoBuilder`.

### What changes are included in this PR?

In this PR two methods are added to `SqlInfoBuilder`:
- `withSqlAllTablesAreSelectable` accepting boolean parameter that specifies whether all tables are selectable
- `withSqlNullOrdering` accepting `org.apache.arrow.flight.sql.impl.FlightSql.SqlNullOrdering` value that specifies supported null ordering
- `withSqlMaxColumnsInTable` accepting long parameter that specifies maximum number of columns in table

### Are these changes tested?

To ensure correctness `org.apache.arrow.flight.TestFlightSql#testGetSqlInfoResultsWithManyArgs` test is added).

### Are there any user-facing changes?

This PR does not contain any breaking changes of user API.

* Closes: #39558

Authored-by: Alexander Blazhkov <ablazhkov@querifylabs.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/flight/sql/SqlInfoBuilder.java      | 31 ++++++++++++
 .../apache/arrow/flight/TestFlightSql.java    | 48 ++++++++++---------
 .../flight/sql/example/FlightSqlExample.java  |  5 +-
 3 files changed, 60 insertions(+), 24 deletions(-)

diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
index 18793f9b905fe..251a709f63965 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
@@ -32,6 +32,7 @@
 import java.util.function.ObjIntConsumer;
 
 import org.apache.arrow.flight.sql.impl.FlightSql.SqlInfo;
+import org.apache.arrow.flight.sql.impl.FlightSql.SqlNullOrdering;
 import org.apache.arrow.flight.sql.impl.FlightSql.SqlOuterJoinsSupportLevel;
 import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedCaseSensitivity;
 import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedElementActions;
@@ -502,6 +503,26 @@ public SqlInfoBuilder withSqlQuotedIdentifierCase(final SqlSupportedCaseSensitiv
     return withBitIntProvider(SqlInfo.SQL_QUOTED_IDENTIFIER_CASE_VALUE, value.getNumber());
   }
 
+  /**
+   * Sets a value for {@link SqlInfo#SQL_ALL_TABLES_ARE_SELECTABLE} in the builder.
+   *
+   * @param value the value for {@link SqlInfo#SQL_ALL_TABLES_ARE_SELECTABLE} to be set.
+   * @return the SqlInfoBuilder itself.
+   */
+  public SqlInfoBuilder withSqlAllTablesAreSelectable(final boolean value) {
+    return withBooleanProvider(SqlInfo.SQL_ALL_TABLES_ARE_SELECTABLE_VALUE, value);
+  }
+
+  /**
+   * Sets a value for {@link SqlInfo#SQL_NULL_ORDERING} in the builder.
+   *
+   * @param value the value for {@link SqlInfo#SQL_NULL_ORDERING} to be set.
+   * @return the SqlInfoBuilder itself.
+   */
+  public SqlInfoBuilder withSqlNullOrdering(final SqlNullOrdering value) {
+    return withBitIntProvider(SqlInfo.SQL_NULL_ORDERING_VALUE, value.getNumber());
+  }
+
   /**
    * Sets a value SqlInf @link SqlInfo#SQL_MAX_BINARY_LITERAL_LENGTH} in the builder.
    *
@@ -572,6 +593,16 @@ public SqlInfoBuilder withSqlMaxColumnsInSelect(final long value) {
     return withBitIntProvider(SqlInfo.SQL_MAX_COLUMNS_IN_SELECT_VALUE, value);
   }
 
+  /**
+   * Sets a value for {@link SqlInfo#SQL_MAX_COLUMNS_IN_TABLE} in the builder.
+   *
+   * @param value the value for {@link SqlInfo#SQL_MAX_COLUMNS_IN_TABLE} to be set.
+   * @return the SqlInfoBuilder itself.
+   */
+  public SqlInfoBuilder withSqlMaxColumnsInTable(final long value) {
+    return withBitIntProvider(SqlInfo.SQL_MAX_COLUMNS_IN_TABLE_VALUE, value);
+  }
+
   /**
    * Sets a value for {@link SqlInfo#SQL_MAX_CONNECTIONS} in the builder.
    *
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSql.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSql.java
index 7635b80ecd0fd..948364a920004 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSql.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSql.java
@@ -106,6 +106,12 @@ public static void setUp() throws Exception {
         .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION_VALUE), "10.14.2.0 - (1828579)");
     GET_SQL_INFO_EXPECTED_RESULTS_MAP
         .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE), "false");
+    GET_SQL_INFO_EXPECTED_RESULTS_MAP
+        .put(Integer.toString(FlightSql.SqlInfo.SQL_ALL_TABLES_ARE_SELECTABLE_VALUE), "true");
+    GET_SQL_INFO_EXPECTED_RESULTS_MAP
+        .put(
+            Integer.toString(FlightSql.SqlInfo.SQL_NULL_ORDERING_VALUE),
+            Integer.toString(FlightSql.SqlNullOrdering.SQL_NULLS_SORTED_AT_END_VALUE));
     GET_SQL_INFO_EXPECTED_RESULTS_MAP
         .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_CATALOG_VALUE), "false");
     GET_SQL_INFO_EXPECTED_RESULTS_MAP
@@ -122,6 +128,8 @@ public static void setUp() throws Exception {
         .put(
             Integer.toString(FlightSql.SqlInfo.SQL_QUOTED_IDENTIFIER_CASE_VALUE),
             Integer.toString(SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_CASE_INSENSITIVE_VALUE));
+    GET_SQL_INFO_EXPECTED_RESULTS_MAP
+        .put(Integer.toString(FlightSql.SqlInfo.SQL_MAX_COLUMNS_IN_TABLE_VALUE), "42");
   }
 
   @AfterAll
@@ -135,12 +143,15 @@ private static List<List<String>> getNonConformingResultsForGetSqlInfo(final Lis
         FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION,
         FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION,
         FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY,
+        FlightSql.SqlInfo.SQL_ALL_TABLES_ARE_SELECTABLE,
+        FlightSql.SqlInfo.SQL_NULL_ORDERING,
         FlightSql.SqlInfo.SQL_DDL_CATALOG,
         FlightSql.SqlInfo.SQL_DDL_SCHEMA,
         FlightSql.SqlInfo.SQL_DDL_TABLE,
         FlightSql.SqlInfo.SQL_IDENTIFIER_CASE,
         FlightSql.SqlInfo.SQL_IDENTIFIER_QUOTE_CHAR,
-        FlightSql.SqlInfo.SQL_QUOTED_IDENTIFIER_CASE);
+        FlightSql.SqlInfo.SQL_QUOTED_IDENTIFIER_CASE,
+        FlightSql.SqlInfo.SQL_MAX_COLUMNS_IN_TABLE);
   }
 
   private static List<List<String>> getNonConformingResultsForGetSqlInfo(
@@ -152,6 +163,7 @@ private static List<List<String>> getNonConformingResultsForGetSqlInfo(
         final List<String> result = results.get(index);
         final String providedName = result.get(0);
         final String expectedName = Integer.toString(args[index].getNumber());
+        System.err.println(expectedName);
         if (!(GET_SQL_INFO_EXPECTED_RESULTS_MAP.get(providedName).equals(result.get(1)) &&
             providedName.equals(expectedName))) {
           nonConformingResults.add(result);
@@ -603,31 +615,21 @@ public void testGetSqlInfoResultsWithSingleArg() throws Exception {
   }
 
   @Test
-  public void testGetSqlInfoResultsWithTwoArgs() throws Exception {
-    final FlightSql.SqlInfo[] args = {
-        FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME,
-        FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION};
-    final FlightInfo info = sqlClient.getSqlInfo(args);
-    try (final FlightStream stream = sqlClient.getStream(info.getEndpoints().get(0).getTicket())) {
-      Assertions.assertAll(
-          () -> MatcherAssert.assertThat(
-              stream.getSchema(),
-              is(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA)
-          ),
-          () -> MatcherAssert.assertThat(
-              getNonConformingResultsForGetSqlInfo(getResults(stream), args),
-              is(emptyList())
-          )
-      );
-    }
-  }
-
-  @Test
-  public void testGetSqlInfoResultsWithThreeArgs() throws Exception {
+  public void testGetSqlInfoResultsWithManyArgs() throws Exception {
     final FlightSql.SqlInfo[] args = {
         FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME,
         FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION,
-        FlightSql.SqlInfo.SQL_IDENTIFIER_QUOTE_CHAR};
+        FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION,
+        FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY,
+        FlightSql.SqlInfo.SQL_ALL_TABLES_ARE_SELECTABLE,
+        FlightSql.SqlInfo.SQL_NULL_ORDERING,
+        FlightSql.SqlInfo.SQL_DDL_CATALOG,
+        FlightSql.SqlInfo.SQL_DDL_SCHEMA,
+        FlightSql.SqlInfo.SQL_DDL_TABLE,
+        FlightSql.SqlInfo.SQL_IDENTIFIER_CASE,
+        FlightSql.SqlInfo.SQL_IDENTIFIER_QUOTE_CHAR,
+        FlightSql.SqlInfo.SQL_QUOTED_IDENTIFIER_CASE,
+        FlightSql.SqlInfo.SQL_MAX_COLUMNS_IN_TABLE};
     final FlightInfo info = sqlClient.getSqlInfo(args);
     try (final FlightStream stream = sqlClient.getStream(info.getEndpoints().get(0).getTicket())) {
       Assertions.assertAll(
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
index 3cc8f4a1c1bd5..11f38ded5fcdd 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
@@ -237,7 +237,10 @@ public FlightSqlExample(final Location location) {
                   SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UPPERCASE :
                   metaData.storesLowerCaseQuotedIdentifiers() ?
                       SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_LOWERCASE :
-                      SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UNKNOWN);
+                      SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UNKNOWN)
+          .withSqlAllTablesAreSelectable(true)
+          .withSqlNullOrdering(SqlNullOrdering.SQL_NULLS_SORTED_AT_END)
+          .withSqlMaxColumnsInTable(42);
     } catch (SQLException e) {
       throw new RuntimeException(e);
     }

From 87ed8ac432dc7e77fa0275a24e3edda9b0bc2a2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Fri, 12 Jan 2024 22:25:09 +0100
Subject: [PATCH 201/570] GH-39564: [CI][Java] Set correct version on Java BOM
 (#39580)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

The version set currently on the maintenance branch is incorrect for Java BOM.

### What changes are included in this PR?

Suggested changes to set specifically version for BOM and maven.

### Are these changes tested?

I will trigger java-jars via archery but I think this is currently only reproducible on the maintenance branch. So we will have to merge and validate there.

### Are there any user-facing changes?
No
* Closes: #39564

Authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 dev/tasks/java-jars/github.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index fbce12ee427e1..7ee68e77ee637 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -236,6 +236,8 @@ jobs:
           set -e
           pushd arrow/java
           mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }}
+          mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} -f bom
+          mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} -f maven
           popd
           arrow/ci/scripts/java_full_build.sh \
             $GITHUB_WORKSPACE/arrow \

From e6323646558ee01234ce58af273c5a834745f298 Mon Sep 17 00:00:00 2001
From: abandy <abandy@live.com>
Date: Sat, 13 Jan 2024 17:02:06 -0500
Subject: [PATCH 202/570] GH-39519: [Swift] Fix null count when using reader
 (#39520)

Currently the reader is not properly setting the null count when building an array from a stream.  This PR adds a fix for this.

* Closes: #39519

Authored-by: Alva Bandy <abandy@live.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 swift/Arrow/Sources/Arrow/ArrowReader.swift   | 12 ++-
 .../Sources/Arrow/ArrowReaderHelper.swift     | 82 +++++++++++--------
 swift/Arrow/Tests/ArrowTests/IPCTests.swift   | 40 +++++++--
 .../Tests/ArrowTests/RecordBatchTests.swift   |  9 +-
 4 files changed, 96 insertions(+), 47 deletions(-)

diff --git a/swift/Arrow/Sources/Arrow/ArrowReader.swift b/swift/Arrow/Sources/Arrow/ArrowReader.swift
index d9dc1bdb470e6..237f22dc979e3 100644
--- a/swift/Arrow/Sources/Arrow/ArrowReader.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowReader.swift
@@ -57,15 +57,17 @@ public class ArrowReader {
     private func loadPrimitiveData(_ loadInfo: DataLoadInfo) -> Result<ArrowArrayHolder, ArrowError> {
         do {
             let node = loadInfo.recordBatch.nodes(at: loadInfo.nodeIndex)!
+            let nullLength = UInt(ceil(Double(node.length) / 8))
             try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex)
             let nullBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex)!
             let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData,
-                                             length: UInt(node.nullCount), messageOffset: loadInfo.messageOffset)
+                                             length: nullLength, messageOffset: loadInfo.messageOffset)
             try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex + 1)
             let valueBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex + 1)!
             let arrowValueBuffer = makeBuffer(valueBuffer, fileData: loadInfo.fileData,
                                               length: UInt(node.length), messageOffset: loadInfo.messageOffset)
-            return makeArrayHolder(loadInfo.field, buffers: [arrowNullBuffer, arrowValueBuffer])
+            return makeArrayHolder(loadInfo.field, buffers: [arrowNullBuffer, arrowValueBuffer],
+                                   nullCount: UInt(node.nullCount))
         } catch let error as ArrowError {
             return .failure(error)
         } catch {
@@ -76,10 +78,11 @@ public class ArrowReader {
     private func loadVariableData(_ loadInfo: DataLoadInfo) -> Result<ArrowArrayHolder, ArrowError> {
         let node = loadInfo.recordBatch.nodes(at: loadInfo.nodeIndex)!
         do {
+            let nullLength = UInt(ceil(Double(node.length) / 8))
             try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex)
             let nullBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex)!
             let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData,
-                                             length: UInt(node.nullCount), messageOffset: loadInfo.messageOffset)
+                                             length: nullLength, messageOffset: loadInfo.messageOffset)
             try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex + 1)
             let offsetBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex + 1)!
             let arrowOffsetBuffer = makeBuffer(offsetBuffer, fileData: loadInfo.fileData,
@@ -88,7 +91,8 @@ public class ArrowReader {
             let valueBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex + 2)!
             let arrowValueBuffer = makeBuffer(valueBuffer, fileData: loadInfo.fileData,
                                               length: UInt(node.length), messageOffset: loadInfo.messageOffset)
-            return makeArrayHolder(loadInfo.field, buffers: [arrowNullBuffer, arrowOffsetBuffer, arrowValueBuffer])
+            return makeArrayHolder(loadInfo.field, buffers: [arrowNullBuffer, arrowOffsetBuffer, arrowValueBuffer],
+                                   nullCount: UInt(node.nullCount))
         } catch let error as ArrowError {
             return .failure(error)
         } catch {
diff --git a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift
index fa52160478f24..7b3ec04b3aa36 100644
--- a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift
@@ -18,10 +18,11 @@
 import FlatBuffers
 import Foundation
 
-private func makeBinaryHolder(_ buffers: [ArrowBuffer]) -> Result<ArrowArrayHolder, ArrowError> {
+private func makeBinaryHolder(_ buffers: [ArrowBuffer],
+                              nullCount: UInt) -> Result<ArrowArrayHolder, ArrowError> {
     do {
         let arrowData = try ArrowData(ArrowType(ArrowType.ArrowBinary), buffers: buffers,
-                                      nullCount: buffers[0].length, stride: MemoryLayout<Int8>.stride)
+                                      nullCount: nullCount, stride: MemoryLayout<Int8>.stride)
         return .success(ArrowArrayHolder(BinaryArray(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
@@ -30,10 +31,11 @@ private func makeBinaryHolder(_ buffers: [ArrowBuffer]) -> Result<ArrowArrayHold
     }
 }
 
-private func makeStringHolder(_ buffers: [ArrowBuffer]) -> Result<ArrowArrayHolder, ArrowError> {
+private func makeStringHolder(_ buffers: [ArrowBuffer],
+                              nullCount: UInt) -> Result<ArrowArrayHolder, ArrowError> {
     do {
         let arrowData = try ArrowData(ArrowType(ArrowType.ArrowString), buffers: buffers,
-                                      nullCount: buffers[0].length, stride: MemoryLayout<Int8>.stride)
+                                      nullCount: nullCount, stride: MemoryLayout<Int8>.stride)
         return .success(ArrowArrayHolder(StringArray(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
@@ -43,30 +45,32 @@ private func makeStringHolder(_ buffers: [ArrowBuffer]) -> Result<ArrowArrayHold
 }
 
 private func makeFloatHolder(_ floatType: org_apache_arrow_flatbuf_FloatingPoint,
-                             buffers: [ArrowBuffer]
+                             buffers: [ArrowBuffer],
+                             nullCount: UInt
 ) -> Result<ArrowArrayHolder, ArrowError> {
     switch floatType.precision {
     case .single:
-        return makeFixedHolder(Float.self, buffers: buffers, arrowType: ArrowType.ArrowFloat)
+        return makeFixedHolder(Float.self, buffers: buffers, arrowType: ArrowType.ArrowFloat, nullCount: nullCount)
     case .double:
-        return makeFixedHolder(Double.self, buffers: buffers, arrowType: ArrowType.ArrowDouble)
+        return makeFixedHolder(Double.self, buffers: buffers, arrowType: ArrowType.ArrowDouble, nullCount: nullCount)
     default:
         return .failure(.unknownType("Float precision \(floatType.precision) currently not supported"))
     }
 }
 
 private func makeDateHolder(_ dateType: org_apache_arrow_flatbuf_Date,
-                            buffers: [ArrowBuffer]
+                            buffers: [ArrowBuffer],
+                            nullCount: UInt
 ) -> Result<ArrowArrayHolder, ArrowError> {
     do {
         if dateType.unit == .day {
             let arrowData = try ArrowData(ArrowType(ArrowType.ArrowString), buffers: buffers,
-                                          nullCount: buffers[0].length, stride: MemoryLayout<Date>.stride)
+                                          nullCount: nullCount, stride: MemoryLayout<Date>.stride)
             return .success(ArrowArrayHolder(Date32Array(arrowData)))
         }
 
         let arrowData = try ArrowData(ArrowType(ArrowType.ArrowString), buffers: buffers,
-                                      nullCount: buffers[0].length, stride: MemoryLayout<Date>.stride)
+                                      nullCount: nullCount, stride: MemoryLayout<Date>.stride)
         return .success(ArrowArrayHolder(Date64Array(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
@@ -76,19 +80,20 @@ private func makeDateHolder(_ dateType: org_apache_arrow_flatbuf_Date,
 }
 
 private func makeTimeHolder(_ timeType: org_apache_arrow_flatbuf_Time,
-                            buffers: [ArrowBuffer]
+                            buffers: [ArrowBuffer],
+                            nullCount: UInt
 ) -> Result<ArrowArrayHolder, ArrowError> {
     do {
         if timeType.unit == .second || timeType.unit == .millisecond {
             let arrowUnit: ArrowTime32Unit = timeType.unit == .second ? .seconds : .milliseconds
             let arrowData = try ArrowData(ArrowTypeTime32(arrowUnit), buffers: buffers,
-                                          nullCount: buffers[0].length, stride: MemoryLayout<Time32>.stride)
+                                          nullCount: nullCount, stride: MemoryLayout<Time32>.stride)
             return .success(ArrowArrayHolder(FixedArray<Time32>(arrowData)))
         }
 
         let arrowUnit: ArrowTime64Unit = timeType.unit == .microsecond ? .microseconds : .nanoseconds
         let arrowData = try ArrowData(ArrowTypeTime64(arrowUnit), buffers: buffers,
-                                      nullCount: buffers[0].length, stride: MemoryLayout<Time64>.stride)
+                                      nullCount: nullCount, stride: MemoryLayout<Time64>.stride)
         return .success(ArrowArrayHolder(FixedArray<Time64>(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
@@ -97,10 +102,11 @@ private func makeTimeHolder(_ timeType: org_apache_arrow_flatbuf_Time,
     }
 }
 
-private func makeBoolHolder(_ buffers: [ArrowBuffer]) -> Result<ArrowArrayHolder, ArrowError> {
+private func makeBoolHolder(_ buffers: [ArrowBuffer],
+                            nullCount: UInt) -> Result<ArrowArrayHolder, ArrowError> {
     do {
         let arrowData = try ArrowData(ArrowType(ArrowType.ArrowBool), buffers: buffers,
-                                      nullCount: buffers[0].length, stride: MemoryLayout<UInt8>.stride)
+                                      nullCount: nullCount, stride: MemoryLayout<UInt8>.stride)
         return .success(ArrowArrayHolder(BoolArray(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
@@ -111,11 +117,12 @@ private func makeBoolHolder(_ buffers: [ArrowBuffer]) -> Result<ArrowArrayHolder
 
 private func makeFixedHolder<T>(
     _: T.Type, buffers: [ArrowBuffer],
-    arrowType: ArrowType.Info
+    arrowType: ArrowType.Info,
+    nullCount: UInt
 ) -> Result<ArrowArrayHolder, ArrowError> {
     do {
         let arrowData = try ArrowData(ArrowType(arrowType), buffers: buffers,
-                                      nullCount: buffers[0].length, stride: MemoryLayout<T>.stride)
+                                      nullCount: nullCount, stride: MemoryLayout<T>.stride)
         return .success(ArrowArrayHolder(FixedArray<T>(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
@@ -124,9 +131,10 @@ private func makeFixedHolder<T>(
     }
 }
 
-func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity
+func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity function_body_length
     _ field: org_apache_arrow_flatbuf_Field,
-    buffers: [ArrowBuffer]
+    buffers: [ArrowBuffer],
+    nullCount: UInt
 ) -> Result<ArrowArrayHolder, ArrowError> {
     let type = field.typeType
     switch type {
@@ -135,45 +143,53 @@ func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity
         let bitWidth = intType.bitWidth
         if bitWidth == 8 {
             if intType.isSigned {
-                return makeFixedHolder(Int8.self, buffers: buffers, arrowType: ArrowType.ArrowInt8)
+                return makeFixedHolder(Int8.self, buffers: buffers,
+                                       arrowType: ArrowType.ArrowInt8, nullCount: nullCount)
             } else {
-                return makeFixedHolder(UInt8.self, buffers: buffers, arrowType: ArrowType.ArrowUInt8)
+                return makeFixedHolder(UInt8.self, buffers: buffers,
+                                       arrowType: ArrowType.ArrowUInt8, nullCount: nullCount)
             }
         } else if bitWidth == 16 {
             if intType.isSigned {
-                return makeFixedHolder(Int16.self, buffers: buffers, arrowType: ArrowType.ArrowInt16)
+                return makeFixedHolder(Int16.self, buffers: buffers,
+                                       arrowType: ArrowType.ArrowInt16, nullCount: nullCount)
             } else {
-                return makeFixedHolder(UInt16.self, buffers: buffers, arrowType: ArrowType.ArrowUInt16)
+                return makeFixedHolder(UInt16.self, buffers: buffers,
+                                       arrowType: ArrowType.ArrowUInt16, nullCount: nullCount)
             }
         } else if bitWidth == 32 {
             if intType.isSigned {
-                return makeFixedHolder(Int32.self, buffers: buffers, arrowType: ArrowType.ArrowInt32)
+                return makeFixedHolder(Int32.self, buffers: buffers,
+                                       arrowType: ArrowType.ArrowInt32, nullCount: nullCount)
             } else {
-                return makeFixedHolder(UInt32.self, buffers: buffers, arrowType: ArrowType.ArrowUInt32)
+                return makeFixedHolder(UInt32.self, buffers: buffers,
+                                       arrowType: ArrowType.ArrowUInt32, nullCount: nullCount)
             }
         } else if bitWidth == 64 {
             if intType.isSigned {
-                return makeFixedHolder(Int64.self, buffers: buffers, arrowType: ArrowType.ArrowInt64)
+                return makeFixedHolder(Int64.self, buffers: buffers,
+                                       arrowType: ArrowType.ArrowInt64, nullCount: nullCount)
             } else {
-                return makeFixedHolder(UInt64.self, buffers: buffers, arrowType: ArrowType.ArrowUInt64)
+                return makeFixedHolder(UInt64.self, buffers: buffers,
+                                       arrowType: ArrowType.ArrowUInt64, nullCount: nullCount)
             }
         }
         return .failure(.unknownType("Int width \(bitWidth) currently not supported"))
     case .bool:
-        return makeBoolHolder(buffers)
+        return makeBoolHolder(buffers, nullCount: nullCount)
     case .floatingpoint:
         let floatType = field.type(type: org_apache_arrow_flatbuf_FloatingPoint.self)!
-        return makeFloatHolder(floatType, buffers: buffers)
+        return makeFloatHolder(floatType, buffers: buffers, nullCount: nullCount)
     case .utf8:
-        return makeStringHolder(buffers)
+        return makeStringHolder(buffers, nullCount: nullCount)
     case .binary:
-        return makeBinaryHolder(buffers)
+        return makeBinaryHolder(buffers, nullCount: nullCount)
     case .date:
         let dateType = field.type(type: org_apache_arrow_flatbuf_Date.self)!
-        return makeDateHolder(dateType, buffers: buffers)
+        return makeDateHolder(dateType, buffers: buffers, nullCount: nullCount)
     case .time:
         let timeType = field.type(type: org_apache_arrow_flatbuf_Time.self)!
-        return makeTimeHolder(timeType, buffers: buffers)
+        return makeTimeHolder(timeType, buffers: buffers, nullCount: nullCount)
     default:
         return .failure(.unknownType("Type \(type) currently not supported"))
     }
diff --git a/swift/Arrow/Tests/ArrowTests/IPCTests.swift b/swift/Arrow/Tests/ArrowTests/IPCTests.swift
index 59cad94ef4da5..103c3b24c7b93 100644
--- a/swift/Arrow/Tests/ArrowTests/IPCTests.swift
+++ b/swift/Arrow/Tests/ArrowTests/IPCTests.swift
@@ -64,14 +64,16 @@ func makeSchema() -> ArrowSchema {
     return schemaBuilder.addField("col1", type: ArrowType(ArrowType.ArrowUInt8), isNullable: true)
         .addField("col2", type: ArrowType(ArrowType.ArrowString), isNullable: false)
         .addField("col3", type: ArrowType(ArrowType.ArrowDate32), isNullable: false)
+        .addField("col4", type: ArrowType(ArrowType.ArrowInt32), isNullable: false)
+        .addField("col5", type: ArrowType(ArrowType.ArrowFloat), isNullable: false)
         .finish()
 }
 
 func makeRecordBatch() throws -> RecordBatch {
     let uint8Builder: NumberArrayBuilder<UInt8> = try ArrowArrayBuilders.loadNumberArrayBuilder()
     uint8Builder.append(10)
-    uint8Builder.append(22)
-    uint8Builder.append(33)
+    uint8Builder.append(nil)
+    uint8Builder.append(nil)
     uint8Builder.append(44)
     let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder()
     stringBuilder.append("test10")
@@ -85,13 +87,28 @@ func makeRecordBatch() throws -> RecordBatch {
     date32Builder.append(date2)
     date32Builder.append(date1)
     date32Builder.append(date2)
-    let intHolder = ArrowArrayHolder(try uint8Builder.finish())
+    let int32Builder: NumberArrayBuilder<Int32> = try ArrowArrayBuilders.loadNumberArrayBuilder()
+    int32Builder.append(1)
+    int32Builder.append(2)
+    int32Builder.append(3)
+    int32Builder.append(4)
+    let floatBuilder: NumberArrayBuilder<Float> = try ArrowArrayBuilders.loadNumberArrayBuilder()
+    floatBuilder.append(211.112)
+    floatBuilder.append(322.223)
+    floatBuilder.append(433.334)
+    floatBuilder.append(544.445)
+
+    let uint8Holder = ArrowArrayHolder(try uint8Builder.finish())
     let stringHolder = ArrowArrayHolder(try stringBuilder.finish())
     let date32Holder = ArrowArrayHolder(try date32Builder.finish())
+    let int32Holder = ArrowArrayHolder(try int32Builder.finish())
+    let floatHolder = ArrowArrayHolder(try floatBuilder.finish())
     let result = RecordBatch.Builder()
-        .addColumn("col1", arrowArray: intHolder)
+        .addColumn("col1", arrowArray: uint8Holder)
         .addColumn("col2", arrowArray: stringHolder)
         .addColumn("col3", arrowArray: date32Holder)
+        .addColumn("col4", arrowArray: int32Holder)
+        .addColumn("col5", arrowArray: floatHolder)
         .finish()
     switch result {
     case .success(let recordBatch):
@@ -182,15 +199,20 @@ final class IPCFileReaderTests: XCTestCase {
                 XCTAssertEqual(recordBatches.count, 1)
                 for recordBatch in recordBatches {
                     XCTAssertEqual(recordBatch.length, 4)
-                    XCTAssertEqual(recordBatch.columns.count, 3)
-                    XCTAssertEqual(recordBatch.schema.fields.count, 3)
+                    XCTAssertEqual(recordBatch.columns.count, 5)
+                    XCTAssertEqual(recordBatch.schema.fields.count, 5)
                     XCTAssertEqual(recordBatch.schema.fields[0].name, "col1")
                     XCTAssertEqual(recordBatch.schema.fields[0].type.info, ArrowType.ArrowUInt8)
                     XCTAssertEqual(recordBatch.schema.fields[1].name, "col2")
                     XCTAssertEqual(recordBatch.schema.fields[1].type.info, ArrowType.ArrowString)
                     XCTAssertEqual(recordBatch.schema.fields[2].name, "col3")
                     XCTAssertEqual(recordBatch.schema.fields[2].type.info, ArrowType.ArrowDate32)
+                    XCTAssertEqual(recordBatch.schema.fields[3].name, "col4")
+                    XCTAssertEqual(recordBatch.schema.fields[3].type.info, ArrowType.ArrowInt32)
+                    XCTAssertEqual(recordBatch.schema.fields[4].name, "col5")
+                    XCTAssertEqual(recordBatch.schema.fields[4].type.info, ArrowType.ArrowFloat)
                     let columns = recordBatch.columns
+                    XCTAssertEqual(columns[0].nullCount, 2)
                     let dateVal =
                         "\((columns[2].array as! AsString).asString(0))" // swiftlint:disable:this force_cast
                     XCTAssertEqual(dateVal, "2014-09-10 00:00:00 +0000")
@@ -227,13 +249,17 @@ final class IPCFileReaderTests: XCTestCase {
             case .success(let result):
                 XCTAssertNotNil(result.schema)
                 let schema  = result.schema!
-                XCTAssertEqual(schema.fields.count, 3)
+                XCTAssertEqual(schema.fields.count, 5)
                 XCTAssertEqual(schema.fields[0].name, "col1")
                 XCTAssertEqual(schema.fields[0].type.info, ArrowType.ArrowUInt8)
                 XCTAssertEqual(schema.fields[1].name, "col2")
                 XCTAssertEqual(schema.fields[1].type.info, ArrowType.ArrowString)
                 XCTAssertEqual(schema.fields[2].name, "col3")
                 XCTAssertEqual(schema.fields[2].type.info, ArrowType.ArrowDate32)
+                XCTAssertEqual(schema.fields[3].name, "col4")
+                XCTAssertEqual(schema.fields[3].type.info, ArrowType.ArrowInt32)
+                XCTAssertEqual(schema.fields[4].name, "col5")
+                XCTAssertEqual(schema.fields[4].type.info, ArrowType.ArrowFloat)
             case.failure(let error):
                 throw error
             }
diff --git a/swift/Arrow/Tests/ArrowTests/RecordBatchTests.swift b/swift/Arrow/Tests/ArrowTests/RecordBatchTests.swift
index ab6cad1b5e409..8820f1cdb1a91 100644
--- a/swift/Arrow/Tests/ArrowTests/RecordBatchTests.swift
+++ b/swift/Arrow/Tests/ArrowTests/RecordBatchTests.swift
@@ -23,9 +23,11 @@ final class RecordBatchTests: XCTestCase {
         let uint8Builder: NumberArrayBuilder<UInt8> = try ArrowArrayBuilders.loadNumberArrayBuilder()
         uint8Builder.append(10)
         uint8Builder.append(22)
+        uint8Builder.append(nil)
         let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder()
         stringBuilder.append("test10")
         stringBuilder.append("test22")
+        stringBuilder.append("test33")
 
         let intHolder = ArrowArrayHolder(try uint8Builder.finish())
         let stringHolder = ArrowArrayHolder(try stringBuilder.finish())
@@ -39,15 +41,16 @@ final class RecordBatchTests: XCTestCase {
             XCTAssertEqual(schema.fields.count, 2)
             XCTAssertEqual(schema.fields[0].name, "col1")
             XCTAssertEqual(schema.fields[0].type.info, ArrowType.ArrowUInt8)
-            XCTAssertEqual(schema.fields[0].isNullable, false)
+            XCTAssertEqual(schema.fields[0].isNullable, true)
             XCTAssertEqual(schema.fields[1].name, "col2")
             XCTAssertEqual(schema.fields[1].type.info, ArrowType.ArrowString)
             XCTAssertEqual(schema.fields[1].isNullable, false)
             XCTAssertEqual(recordBatch.columns.count, 2)
             let col1: ArrowArray<UInt8> = recordBatch.data(for: 0)
             let col2: ArrowArray<String> = recordBatch.data(for: 1)
-            XCTAssertEqual(col1.length, 2)
-            XCTAssertEqual(col2.length, 2)
+            XCTAssertEqual(col1.length, 3)
+            XCTAssertEqual(col2.length, 3)
+            XCTAssertEqual(col1.nullCount, 1)
         case .failure(let error):
             throw error
         }

From 7e703aae55c150c3556fe0dc972b575460cfb86b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Sun, 14 Jan 2024 15:43:00 +0100
Subject: [PATCH 203/570] GH-39588: [CI][Go] Add CGO_ENABLED=1 to
 cdata_integration build to fix macOS build with conda (#39589)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

CI job has been failing since we added integration tests.

### What changes are included in this PR?

Add `CGO_ENABLED=1` to go build cdata_integration on the verification script.

### Are these changes tested?

Yes via archery.

### Are there any user-facing changes?

No
* Closes: #39588

Authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 dev/release/verify-release-candidate.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index f12a5dc8b8964..ab5c476768ed5 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -935,7 +935,7 @@ test_go() {
         go_lib="arrow_go_integration.dll"
         ;;
     esac
-    go build -buildvcs=false -tags cdata_integration,assert -buildmode=c-shared -o ${go_lib} .
+    CGO_ENABLED=1 go build -buildvcs=false -tags cdata_integration,assert -buildmode=c-shared -o ${go_lib} .
     popd
   fi
   go clean -modcache

From d7bc55542e6187a34c27f845de0bea78f6061de2 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Mon, 15 Jan 2024 05:58:05 -0600
Subject: [PATCH 204/570] MINOR: [R] Clean up docs (#39591)

I noticed a few docs that needed cleaning up when running make commands

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Dewey Dunnington <dewey@fishandwhistle.net>
---
 r/R/dplyr-funcs-doc.R | 4 ++--
 r/inst/NOTICE.txt     | 2 +-
 r/man/acero.Rd        | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index 492729df8c12a..2042f800142b7 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -21,7 +21,7 @@
 #'
 #' The `arrow` package contains methods for 37 `dplyr` table functions, many of
 #' which are "verbs" that do transformations to one or more tables.
-#' The package also has mappings of 211 R functions to the corresponding
+#' The package also has mappings of 212 R functions to the corresponding
 #' functions in the Arrow compute library. These allow you to write code inside
 #' of `dplyr` methods that call R functions, including many in packages like
 #' `stringr` and `lubridate`, and they will get translated to Arrow and run
@@ -83,7 +83,7 @@
 #' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both
 #' `str_sub()` and `stringr::str_sub()` work.
 #'
-#' In addition to these functions, you can call any of Arrow's 254 compute
+#' In addition to these functions, you can call any of Arrow's 262 compute
 #' functions directly. Arrow has many functions that don't map to an existing R
 #' function. In other cases where there is an R function mapping, you can still
 #' call the Arrow function directly if you don't want the adaptations that the R
diff --git a/r/inst/NOTICE.txt b/r/inst/NOTICE.txt
index a609791374c28..2089c6fb20358 100644
--- a/r/inst/NOTICE.txt
+++ b/r/inst/NOTICE.txt
@@ -1,5 +1,5 @@
 Apache Arrow
-Copyright 2016-2019 The Apache Software Foundation
+Copyright 2016-2024 The Apache Software Foundation
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index 12afdc23138ac..365795d9fc65c 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -9,7 +9,7 @@
 \description{
 The \code{arrow} package contains methods for 37 \code{dplyr} table functions, many of
 which are "verbs" that do transformations to one or more tables.
-The package also has mappings of 211 R functions to the corresponding
+The package also has mappings of 212 R functions to the corresponding
 functions in the Arrow compute library. These allow you to write code inside
 of \code{dplyr} methods that call R functions, including many in packages like
 \code{stringr} and \code{lubridate}, and they will get translated to Arrow and run
@@ -71,7 +71,7 @@ can assume that the function works in Acero just as it does in R.
 Functions can be called either as \code{pkg::fun()} or just \code{fun()}, i.e. both
 \code{str_sub()} and \code{stringr::str_sub()} work.
 
-In addition to these functions, you can call any of Arrow's 254 compute
+In addition to these functions, you can call any of Arrow's 262 compute
 functions directly. Arrow has many functions that don't map to an existing R
 function. In other cases where there is an R function mapping, you can still
 call the Arrow function directly if you don't want the adaptations that the R

From 7acbaf45ce2d5be31e70b552d1a24476c67383e6 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 15 Jan 2024 15:02:45 +0100
Subject: [PATCH 205/570] GH-39504: [Docs] Update footer in main sphinx docs
 with correct attribution (#39505)

* Closes: #39504

Lead-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 docs/source/conf.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index cde0c2b31f8fd..5af7b7955fdde 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -201,7 +201,12 @@
 
 # General information about the project.
 project = u'Apache Arrow'
-copyright = f'2016-{datetime.datetime.now().year} Apache Software Foundation'
+copyright = (
+    f"2016-{datetime.datetime.now().year} Apache Software Foundation.\n"
+    "Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow "
+    "project logo are either registered trademarks or trademarks of The Apache "
+    "Software Foundation in the United States and other countries"
+)
 author = u'Apache Software Foundation'
 
 # The version info for the project you're documenting, acts as replacement for

From 1f83c8e9a1c8f862a13e2521f863978cb3c6683d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Jan 2024 10:32:02 -0800
Subject: [PATCH 206/570] MINOR: [C#] Bump Google.Protobuf from 3.25.1 to
 3.25.2 in /csharp (#39617)

Bumps [Google.Protobuf](https://github.com/protocolbuffers/protobuf) from 3.25.1 to 3.25.2.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/a9b006bddd52e289029f16aa77b77e8e0033d9ee"><code>a9b006b</code></a> Updating version.json and repo version numbers to: 25.2</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/59eebe3e76ca77682437125839f504109a44ab36"><code>59eebe3</code></a> Merge pull request <a href="https://redirect.github.com/protocolbuffers/protobuf/issues/14907">#14907</a> from mkruskal-google/cmake-install-fix</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/05ad6525c3aae2ae5b6e1987227445a420caada7"><code>05ad652</code></a> Only substitute prefixes during installation setup.</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/c709a34fbdf90f3fca092967dc90bdf7c6597ba4"><code>c709a34</code></a> Merge pull request <a href="https://redirect.github.com/protocolbuffers/protobuf/issues/14800">#14800</a> from mkruskal-google/leak-patch</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/3d5c709ebe01f1f9f3d3345cd4239e239081f8fd"><code>3d5c709</code></a> Register a shutdown delete for C++ feature defaults</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/c529459e96c01be6e077957860e92309f935bd04"><code>c529459</code></a> Merge pull request <a href="https://redirect.github.com/protocolbuffers/protobuf/issues/14765">#14765</a> from protocolbuffers/25.x-202311152135</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/6ac04475591db12de14e89d3d750519c2ca9a4cc"><code>6ac0447</code></a> Updating version.json and repo version numbers to: 25.2-dev</li>
<li>See full diff in <a href="https://github.com/protocolbuffers/protobuf/compare/v3.25.1...v3.25.2">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Google.Protobuf&package-manager=nuget&previous-version=3.25.1&new-version=3.25.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index aae26273ac282..68c3e47e01902 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
   
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.25.1" />
+    <PackageReference Include="Google.Protobuf" Version="3.25.2" />
     <PackageReference Include="Grpc.Net.Client" Version="2.59.0" />
     <PackageReference Include="Grpc.Tools" Version="2.60.0" PrivateAssets="All" />
   </ItemGroup>

From 31d02314dabb2f6e422eda7951cb90905e78011f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Jan 2024 10:33:04 -0800
Subject: [PATCH 207/570] MINOR: [C#] Bump xunit from 2.6.5 to 2.6.6 in /csharp
 (#39618)

Bumps [xunit](https://github.com/xunit/xunit) from 2.6.5 to 2.6.6.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/xunit/xunit/commit/f03fe098d1577f6dd65055d9f65686c4def2f98a"><code>f03fe09</code></a> v2.6.6</li>
<li><a href="https://github.com/xunit/xunit/commit/280a0cbf6a4e1d11f4108fbe2b811abd4c9eab76"><code>280a0cb</code></a> Update build to use .NET SDK 8 and C# 12 (<a href="https://redirect.github.com/xunit/xunit/issues/2863">#2863</a>)</li>
<li><a href="https://github.com/xunit/xunit/commit/fdf75ab641e91e3102f7f3791f6c3b4076ea0fe6"><code>fdf75ab</code></a> <a href="https://redirect.github.com/xunit/xunit/issues/2334">#2334</a>: Add assembly-level support for BeforeAfterTestAttribute (v2)</li>
<li><a href="https://github.com/xunit/xunit/commit/ba06476db53b27a37e816b82bf2ba435929a9d29"><code>ba06476</code></a> Add targetFramework for xunit.assert.nuspec</li>
<li><a href="https://github.com/xunit/xunit/commit/9db7d3053ed3ceb8ca8e679fc08d211026637db0"><code>9db7d30</code></a> Test out additional target framework dependencies in .nuspec files</li>
<li><a href="https://github.com/xunit/xunit/commit/27e91e38a3cfd7a232ba48d900fc29d4ab1121ca"><code>27e91e3</code></a> Bump up to 2.6.6-pre</li>
<li>See full diff in <a href="https://github.com/xunit/xunit/compare/2.6.5...2.6.6">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit&package-manager=nuget&previous-version=2.6.5&new-version=2.6.6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Apache.Arrow.Compression.Tests.csproj                       | 2 +-
 .../Apache.Arrow.Flight.Sql.Tests.csproj                        | 2 +-
 .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj  | 2 +-
 csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index 7a93d8f92635b..8ed7a93bdcf27 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -8,7 +8,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.5" />
+    <PackageReference Include="xunit" Version="2.6.6" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 7799577535ded..b5e7170a8c31d 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -7,7 +7,7 @@
 
     <ItemGroup>
       <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-      <PackageReference Include="xunit" Version="2.6.5" />
+      <PackageReference Include="xunit" Version="2.6.6" />
       <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
       <PackageReference Include="coverlet.collector" Version="6.0.0" />
     </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index 972aa178eabe8..a7c52846fd9a4 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -7,7 +7,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.5" />
+    <PackageReference Include="xunit" Version="2.6.6" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
     <PackageReference Include="coverlet.collector" Version="6.0.0" />
   </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index afb636123b37b..d8a92ff756751 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -15,7 +15,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.5" />
+    <PackageReference Include="xunit" Version="2.6.6" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>

From 809244e4cb3dadc8742a891342feda4ef1b27e1a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Jan 2024 14:51:55 -0500
Subject: [PATCH 208/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-jar-plugin from 2.4 to 3.3.0 in /java (#39612)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.apache.maven.plugins:maven-jar-plugin](https://github.com/apache/maven-jar-plugin) from 2.4 to 3.3.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/maven-jar-plugin/releases">org.apache.maven.plugins:maven-jar-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.3.0</h2>

<h2>🚀 New features and improvements</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MJAR-278">[MJAR-278]</a> - Update plugin (requires Maven 3.2.5+) (<a href="https://redirect.github.com/apache/maven-jar-plugin/pull/19">#19</a>) <a href="https://github.com/cstamas"><code>@​cstamas</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MJAR-280">[MJAR-280]</a> - Java 8 minimum (<a href="https://redirect.github.com/apache/maven-jar-plugin/pull/29">#29</a>) <a href="https://github.com/olamy"><code>@​olamy</code></a></li>
</ul>
<h2>🐛 Bug Fixes</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MJAR-275">[MJAR-275]</a> - Fix outputTimestamp not applied to module-info; breaks reproducible builds (<a href="https://redirect.github.com/apache/maven-jar-plugin/pull/43">#43</a>) <a href="https://github.com/jorsol"><code>@​jorsol</code></a></li>
</ul>
<h2>📦 Dependency updates</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MJAR-290">[MJAR-290]</a> - Update Plexus Utils to 3.4.2 (<a href="https://redirect.github.com/apache/maven-jar-plugin/pull/48">#48</a>) <a href="https://github.com/jorsol"><code>@​jorsol</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MJAR-291">[MJAR-291]</a> - Upgrade Parent to 37 (<a href="https://redirect.github.com/apache/maven-jar-plugin/pull/50">#50</a>) <a href="https://github.com/jorsol"><code>@​jorsol</code></a></li>
<li>Bump junit from 4.11 to 4.13.1 in /src/it/MJAR-228 (<a href="https://redirect.github.com/apache/maven-jar-plugin/pull/39">#39</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump plexus-utils from 3.3.1 to 3.4.2 (<a href="https://redirect.github.com/apache/maven-jar-plugin/pull/42">#42</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MJAR-288">[MJAR-288]</a> - Upgrade Parent to 36 (<a href="https://redirect.github.com/apache/maven-jar-plugin/pull/40">#40</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
</ul>
<h2>📝 Documentation updates</h2>
<ul>
<li>Restore mavenArchiverVersion property used in the site (<a href="https://redirect.github.com/apache/maven-jar-plugin/pull/51">#51</a>) <a href="https://github.com/jorsol"><code>@​jorsol</code></a></li>
<li>(doc) Updated create-test-jar.apt.vm removing 'and' in Maven site Create Test JAR documentation (<a href="https://redirect.github.com/apache/maven-jar-plugin/pull/34">#34</a>) <a href="https://github.com/focbenz"><code>@​focbenz</code></a></li>
</ul>
<h2>👻 Maintenance</h2>
<ul>
<li>use shared action v3 (<a href="https://redirect.github.com/apache/maven-jar-plugin/pull/49">#49</a>) <a href="https://github.com/olamy"><code>@​olamy</code></a></li>
<li>Code simplifications in AbstractMojo (<a href="https://redirect.github.com/apache/maven-jar-plugin/pull/47">#47</a>) <a href="https://github.com/rhowe"><code>@​rhowe</code></a></li>
</ul>
<h2>3.2.2</h2>
<h2>What's Changed</h2>
<ul>
<li>[MJAR-260] Fail on invalid automatic module name by <a href="https://github.com/sormuras"><code>@​sormuras</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/3">apache/maven-jar-plugin#3</a></li>
<li>[MNGSITE-393] remove more pre Maven 3 content by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/8">apache/maven-jar-plugin#8</a></li>
<li>compat not needed by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/10">apache/maven-jar-plugin#10</a></li>
<li>ignore .checkstyle by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/12">apache/maven-jar-plugin#12</a></li>
<li>Two minor dependency updates by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/11">apache/maven-jar-plugin#11</a></li>
<li>simplify test by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/13">apache/maven-jar-plugin#13</a></li>
<li>fix link to issue tracker by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/14">apache/maven-jar-plugin#14</a></li>
<li>update minimum Maven version to 3.1.0 by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/15">apache/maven-jar-plugin#15</a></li>
<li>docs: grammar by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/18">apache/maven-jar-plugin#18</a></li>
<li>Bump maven-shared-utils from 3.3.3 to 3.3.4 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/20">apache/maven-jar-plugin#20</a></li>
<li>Bump plexus-archiver from 4.2.3 to 4.2.5 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/23">apache/maven-jar-plugin#23</a></li>
<li>use shared gh action by <a href="https://github.com/olamy"><code>@​olamy</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/24">apache/maven-jar-plugin#24</a></li>
<li>Bump junit from 4.13 to 4.13.2 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/25">apache/maven-jar-plugin#25</a></li>
<li>use shared gh action - v1 by <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/28">apache/maven-jar-plugin#28</a></li>
<li>Bump maven-archiver from 3.5.0 to 3.5.1 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/27">apache/maven-jar-plugin#27</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/sormuras"><code>@​sormuras</code></a> made their first contribution in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/3">apache/maven-jar-plugin#3</a></li>
<li><a href="https://github.com/elharo"><code>@​elharo</code></a> made their first contribution in <a href="https://redirect.github.com/apache/maven-jar-plugin/pull/8">apache/maven-jar-plugin#8</a></li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-jar-plugin/commit/d68df4b277d9afcbbe4700690fd6c7584cec4149"><code>d68df4b</code></a> [maven-release-plugin] prepare release maven-jar-plugin-3.3.0</li>
<li><a href="https://github.com/apache/maven-jar-plugin/commit/fb2299a6dc12078af33983414474292bab531ed7"><code>fb2299a</code></a> Restore mavenArchiverVersion property used in the site</li>
<li><a href="https://github.com/apache/maven-jar-plugin/commit/120412767b4677acf93243b1aa01e34799df5409"><code>1204127</code></a> [MJAR-290] - Update Plexus Utils to 3.4.2</li>
<li><a href="https://github.com/apache/maven-jar-plugin/commit/5fd2fc96f1d671f113b08ec7dfbf5b97a80f3763"><code>5fd2fc9</code></a> [MJAR-291] - Upgrade Parent to 37</li>
<li><a href="https://github.com/apache/maven-jar-plugin/commit/56344da36ed291eff940b9e326c2f740e21f79be"><code>56344da</code></a> use shared action v3 (<a href="https://redirect.github.com/apache/maven-jar-plugin/issues/49">#49</a>)</li>
<li><a href="https://github.com/apache/maven-jar-plugin/commit/414849196259fb34c621b0cdaa0a9e923ca9bc42"><code>4148491</code></a> Code simplifications in AbstractMojo (<a href="https://redirect.github.com/apache/maven-jar-plugin/issues/47">#47</a>)</li>
<li><a href="https://github.com/apache/maven-jar-plugin/commit/46c017dc0ca2e35536dfe98f682184ed38a20633"><code>46c017d</code></a> [MJAR-275] - Fix outputTimestamp not applied to module-info; breaks reproduci...</li>
<li><a href="https://github.com/apache/maven-jar-plugin/commit/c02be208f3f4422ed0e9990e56602e3ee7d246a0"><code>c02be20</code></a> [MJAR-278] Update plugin (requires Maven 3.2.5+) (<a href="https://redirect.github.com/apache/maven-jar-plugin/issues/19">#19</a>)</li>
<li><a href="https://github.com/apache/maven-jar-plugin/commit/b6fe3eb5d32c92f9b1169d9dbe75fc41fb0ce149"><code>b6fe3eb</code></a> Bump junit from 4.11 to 4.13.2 in /src/it/MJAR-228</li>
<li><a href="https://github.com/apache/maven-jar-plugin/commit/78a28dd7873d0d8eb7c6684963ab860aa51f2c07"><code>78a28dd</code></a> Ignore Maven Core updates</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-jar-plugin/compare/maven-jar-plugin-2.4...maven-jar-plugin-3.3.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-jar-plugin&package-manager=maven&previous-version=2.4&new-version=3.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/maven/module-info-compiler-maven-plugin/pom.xml | 2 +-
 java/performance/pom.xml                             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml
index 46c0d563f4eb9..70d1993b33c6e 100644
--- a/java/maven/module-info-compiler-maven-plugin/pom.xml
+++ b/java/maven/module-info-compiler-maven-plugin/pom.xml
@@ -84,7 +84,7 @@
         </plugin>
         <plugin>
           <artifactId>maven-jar-plugin</artifactId>
-          <version>3.0.2</version>
+          <version>3.3.0</version>
         </plugin>
         <plugin>
           <artifactId>maven-install-plugin</artifactId>
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index 5e0b6c1b54541..eff3240890beb 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -191,7 +191,7 @@
                 </plugin>
                 <plugin>
                     <artifactId>maven-jar-plugin</artifactId>
-                    <version>2.4</version>
+                    <version>3.3.0</version>
                 </plugin>
                 <plugin>
                     <artifactId>maven-javadoc-plugin</artifactId>

From b59082a72094f112f95914a1d8ad469db87a34c9 Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Tue, 16 Jan 2024 01:21:15 +0100
Subject: [PATCH 209/570] MINOR: [CI] Update allowed_roles for crossbow
 submission (#39610)

### Rationale for this change

Improved security, this aligns the permissions with the current default repo setting of required approval for all contributors.

### What changes are included in this PR?

Only committers (members,owner and collaborator of ASF org) can submit a crossbow job.

### Are these changes tested?
Not possible.

Authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/archery/archery/bot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/archery/archery/bot.py b/dev/archery/archery/bot.py
index 68b24dc08d71b..4e5104362254c 100644
--- a/dev/archery/archery/bot.py
+++ b/dev/archery/archery/bot.py
@@ -280,7 +280,7 @@ def handle_issue_comment(self, command, payload):
             # https://developer.github.com/v4/enum/commentauthorassociation/
             # Checking  privileges here enables the bot to respond
             # without relying on the handler.
-            allowed_roles = {'OWNER', 'MEMBER', 'CONTRIBUTOR', 'COLLABORATOR'}
+            allowed_roles = {'OWNER', 'MEMBER', 'COLLABORATOR'}
             if payload['comment']['author_association'] not in allowed_roles:
                 raise EventError(
                     "Only contributors can submit requests to this bot. "

From 697b70ee0fa1f3015a45afaed725988aab4172f8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jan 2024 09:21:45 +0900
Subject: [PATCH 210/570] MINOR: [Java] Bump commons-codec:commons-codec from
 1.15 to 1.16.0 in /java (#39611)

Bumps [commons-codec:commons-codec](https://github.com/apache/commons-codec) from 1.15 to 1.16.0.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/apache/commons-codec/blob/master/RELEASE-NOTES.txt">commons-codec:commons-codec's changelog</a>.</em></p>
<blockquote>
<pre><code>          Apache Commons Codec 1.16.0
</code></pre>
<p>The Apache Commons Codec package contains simple encoder and decoders for
various formats such as Base64 and Hexadecimal.  In addition to these
widely used encoders and decoders, the codec package also maintains a
collection of phonetic encoding utilities.</p>
<p>Feature and fix release.</p>
<p>Changes in this version include:</p>
<p>New features:
o CODEC-296:  Add support for Blake3 family of hashes. Thanks to Matt Sicker.
o             Add github/codeql-action.</p>
<p>Fixed Bugs:
o CODEC-295:  Minor improvements <a href="https://redirect.github.com/apache/commons-codec/issues/67">#67</a>. Thanks to Arturo Bernal.
o             Remove duplicated words from Javadocs. Thanks to James Gan.
o CODEC-301:  Simplify assertion <a href="https://redirect.github.com/apache/commons-codec/issues/84">#84</a>. Thanks to Alexander Pinske, Alex Herbert.
o CODEC-300:  Simplify assertion <a href="https://redirect.github.com/apache/commons-codec/issues/84">#84</a>. Thanks to Arturo Bernal.
o CODEC-298:  Use Standard Charset object <a href="https://redirect.github.com/apache/commons-codec/issues/82">#82</a>. Thanks to Arturo Bernal.
o             Use String.contains() functions <a href="https://redirect.github.com/apache/commons-codec/issues/125">#125</a>. Thanks to Arturo Bernal.
o             Avoid use toString() or substring() in favor of a simplified expression <a href="https://redirect.github.com/apache/commons-codec/issues/126">#126</a>. Thanks to Arturo Bernal.
o CODEC-305:  Fix byte-skipping in Base16 decoding <a href="https://redirect.github.com/apache/commons-codec/issues/135">#135</a>. Thanks to Florian.
o             Fix several typos, improve writing in some javadocs <a href="https://redirect.github.com/apache/commons-codec/issues/139">#139</a>. Thanks to Marc Wrobel.
o             BaseNCodecOutputStream.eof() should not throw IOException. Thanks to Gary Gregory.
o             Javadoc improvements and cleanups. Thanks to Gary Gregory.
o             Deprecate BaseNCodec.isWhiteSpace(byte) and use Character.isWhitespace(int). Thanks to Gary Gregory.</p>
<p>Changes:
o             Bump actions/cache from v2 to v3.0.10 <a href="https://redirect.github.com/apache/commons-codec/issues/75">#75</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/99">#99</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/119">#119</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/138">#138</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/149">#149</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/152">#152</a>. Thanks to Dependabot, Gary Gregory.
o             Bump actions/setup-java from v1.4.1 to 3.5.1 <a href="https://redirect.github.com/apache/commons-codec/issues/60">#60</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/62">#62</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/121">#121</a>. Thanks to Dependabot, Gary Gregory.
o             Bump actions/checkout from 2.3.2 to 3.1.0 <a href="https://redirect.github.com/apache/commons-codec/issues/65">#65</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/98">#98</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/114">#114</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/153">#153</a>. Thanks to Dependabot, Gary Gregory.
o             Bump commons-parent from 52 to 58, <a href="https://redirect.github.com/apache/commons-codec/issues/147">#147</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/165">#165</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/170">#170</a>. Thanks to Dependabot, Gary Gregory.
o CODEC-285:  Bump junit from 4.13.1 to 5.9.1 <a href="https://redirect.github.com/apache/commons-codec/issues/76">#76</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/39">#39</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/140">#140</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/148">#148</a>. Thanks to Dependabot, John Patrick.
o             Bump Java 7 to 8. Thanks to Gary Gregory.
o             Bump japicmp-maven-plugin from 0.14.3 to 0.17.1. Thanks to Gary Gregory.
o             Bump jacoco-maven-plugin from 0.8.5 to 0.8.8 (Fixes Java 15 builds). Thanks to Gary Gregory.
o             Bump maven-surefire-plugin from 2.22.2 to 3.0.0-M7 <a href="https://redirect.github.com/apache/commons-codec/issues/122">#122</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/134">#134</a>. Thanks to Gary Gregory.
o             Bump maven-javadoc-plugin from 3.2.0 to 3.4.1. Thanks to Gary Gregory.
o             Bump animal-sniffer-maven-plugin from 1.19 to 1.22. Thanks to Gary Gregory.
o             Bump maven-pmd-plugin from 3.13.0 to 3.19.0, <a href="https://redirect.github.com/apache/commons-codec/issues/133">#133</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/142">#142</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/145">#145</a>. Thanks to Gary Gregory, Dependabot.
o             Bump pmd from 6.47.0 to 6.52.0. Thanks to Gary Gregory.
o             Bump maven-checkstyle-plugin from 2.17 to 3.2.0 <a href="https://redirect.github.com/apache/commons-codec/issues/143">#143</a>. Thanks to Gary Gregory.
o             Bump checkstyle from 8.45.1 to 9.3 <a href="https://redirect.github.com/apache/commons-codec/issues/97">#97</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/100">#100</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/101">#101</a>, <a href="https://redirect.github.com/apache/commons-codec/issues/103">#103</a>. Thanks to Dependabot.
o             Bump taglist-maven-plugin from 2.4 to 3.0.0 <a href="https://redirect.github.com/apache/commons-codec/issues/102">#102</a>. Thanks to Dependabot.
o             Bump jacoco-maven-plugin from 0.8.7 to 0.8.8. Thanks to Gary Gregory.</p>
<p>For complete information on Apache Commons Codec, including instructions on how to submit bug reports,</p>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/commons-codec/commit/2614a4ca9d79c2b96a3147d206b4fb27443f8ce8"><code>2614a4c</code></a> Prepare for release candidate</li>
<li><a href="https://github.com/apache/commons-codec/commit/3a6cde0506a7b473b933aad9627fdb91e6e37aa2"><code>3a6cde0</code></a> Update POM version for Apache Commons Codec release 1.16.0</li>
<li><a href="https://github.com/apache/commons-codec/commit/fa289a9e77aa3eafa1c6bef8668c8c38f17d92af"><code>fa289a9</code></a> Prepare for release candidate</li>
<li><a href="https://github.com/apache/commons-codec/commit/393b8f78c1e9787d4e440c51e98a404252f30f74"><code>393b8f7</code></a> Prepare for release candidate</li>
<li><a href="https://github.com/apache/commons-codec/commit/7bfa8afe05edd4174746ff9bbaffc12ac2bae945"><code>7bfa8af</code></a> Prepare for release candidate</li>
<li><a href="https://github.com/apache/commons-codec/commit/18d83b18ecdb6d397aecdf8047748f39b7e13c24"><code>18d83b1</code></a> Prepare for release candidate</li>
<li><a href="https://github.com/apache/commons-codec/commit/a3d825d13724e99e16f1e7536f3c04aaea64ccab"><code>a3d825d</code></a> Package private method does not need Javadoc since tag</li>
<li><a href="https://github.com/apache/commons-codec/commit/4d9f462f2b4f6588cb828c0c024aca4b05b5ae94"><code>4d9f462</code></a> New package private method does not need Javadoc since tag</li>
<li><a href="https://github.com/apache/commons-codec/commit/af4d48a43079124191af51bd9f5273c196a06fb5"><code>af4d48a</code></a> New package private method does not need Javadoc since tag</li>
<li><a href="https://github.com/apache/commons-codec/commit/cc2808b67cdc9a14fc17f913495dd28bd248c8b3"><code>cc2808b</code></a> Make new API Fluent</li>
<li>Additional commits viewable in <a href="https://github.com/apache/commons-codec/compare/rel/commons-codec-1.15...rel/commons-codec-1.16.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=commons-codec:commons-codec&package-manager=maven&previous-version=1.15&new-version=1.16.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/vector/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index a4292449c9cb2..da26fc2982765 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -53,7 +53,7 @@
     <dependency>
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>
-      <version>1.15</version>
+      <version>1.16.0</version>
     </dependency>
     <dependency>
       <groupId>org.apache.arrow</groupId>

From f57feb6505d87b572bec77fd39876a0dd271118c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jan 2024 09:23:59 +0900
Subject: [PATCH 211/570] MINOR: [Java] Bump org.jacoco:jacoco-maven-plugin
 from 0.8.7 to 0.8.11 in /java (#39615)

Bumps [org.jacoco:jacoco-maven-plugin](https://github.com/jacoco/jacoco) from 0.8.7 to 0.8.11.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/jacoco/jacoco/releases">org.jacoco:jacoco-maven-plugin's releases</a>.</em></p>
<blockquote>
<h2>0.8.11</h2>
<h2>New Features</h2>
<ul>
<li>JaCoCo now officially supports Java 21 (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1520">#1520</a>).</li>
<li>Experimental support for Java 22 class files (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1479">#1479</a>).</li>
<li>Part of bytecode generated by the Java compilers for exhaustive switch expressions is filtered out during generation of report (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1472">#1472</a>).</li>
<li>Part of bytecode generated by the Java compilers for record patterns is filtered out during generation of report (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1473">#1473</a>).</li>
</ul>
<h2>Fixed bugs</h2>
<ul>
<li>Instrumentation should not cause <code>VerifyError</code> when the last local variable of method parameters is overridden in the method body to store a value of type long or double (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/893">#893</a>).</li>
<li>Restore exec file compatibility with versions from 0.7.5 to 0.8.8 in case of class files with zero line numbers (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1492">#1492</a>).</li>
</ul>
<h2>Non-functional Changes</h2>
<ul>
<li>jacoco-maven-plugin now requires at least Java 8 (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1466">#1466</a>, <a href="https://redirect.github.com/jacoco/jacoco/issues/1468">#1468</a>).</li>
<li>JaCoCo build now requires at least Maven 3.5.4 (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1467">#1467</a>).</li>
<li>Maven 3.9.2 should not produce warnings for jacoco-maven-plugin (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1468">#1468</a>).</li>
<li>JaCoCo build now requires JDK 17 (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1482">#1482</a>).</li>
<li>JaCoCo now depends on ASM 9.6 (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1518">#1518</a>).</li>
</ul>
<h2>0.8.10</h2>
<h2>Fixed bugs</h2>
<ul>
<li>Agent should not require configuration of permissions for <code>SecurityManager</code> outside of its <code>codeBase</code> (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1425">#1425</a>).</li>
</ul>
<h2>0.8.9</h2>
<h2>New Features</h2>
<ul>
<li>JaCoCo now officially supports Java 19 and 20 (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1371">#1371</a>, <a href="https://redirect.github.com/jacoco/jacoco/issues/1386">#1386</a>).</li>
<li>Experimental support for Java 21 class files (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1386">#1386</a>).</li>
<li>Add parameter to include the current project in the <code>report-aggregate</code> Maven goal (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1007">#1007</a>).</li>
<li>Component accessors generated by the Java compilers for records are filtered out during generation of report. Contributed by Tesla Zhang (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1393">#1393</a>).</li>
</ul>
<h2>Fixed bugs</h2>
<ul>
<li>Agent should not open <code>java.lang</code> package to unnamed module of the application class loader (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1334">#1334</a>).</li>
</ul>
<h2>Non-functional Changes</h2>
<ul>
<li>JaCoCo now depends on ASM 9.5 (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1299">#1299</a>, <a href="https://redirect.github.com/jacoco/jacoco/issues/1368">#1368</a>, <a href="https://redirect.github.com/jacoco/jacoco/issues/1416">#1416</a>).</li>
<li>JaCoCo build now requires JDK 11 (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1413">#1413</a>).</li>
</ul>
<h2>0.8.8</h2>
<h2>New Features</h2>
<ul>
<li>JaCoCo now officially supports Java 17 and 18 (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1282">#1282</a>, <a href="https://redirect.github.com/jacoco/jacoco/issues/1198">#1198</a>).</li>
<li>Experimental support for Java 19 class files (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1264">#1264</a>).</li>
<li>Part of bytecode generated by the Java compilers for assert statement is filtered out during generation of report (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1196">#1196</a>).</li>
<li>Branch added by the Kotlin compiler version 1.6.0 and above for &quot;unsafe&quot; cast operator is filtered out during generation of report (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1266">#1266</a>).</li>
<li>Improved support for multiple JaCoCo runtimes in the same VM (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1057">#1057</a>).</li>
</ul>
<h2>Fixed bugs</h2>
<ul>
<li>Fixed <code>NullPointerException</code> during filtering (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1189">#1189</a>).</li>
<li>Fix range for debug symbols of method parameters (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1246">#1246</a>).</li>
</ul>
<h2>Non-functional Changes</h2>
<ul>
<li>JaCoCo now depends on ASM 9.2 (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1206">#1206</a>).</li>
<li>Messages of exceptions occurring during analysis or instrumentation now include JaCoCo version (GitHub <a href="https://redirect.github.com/jacoco/jacoco/issues/1217">#1217</a>).</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/jacoco/jacoco/commit/f33756c37f1e41041d84018047b14cb394742761"><code>f33756c</code></a> Prepare release 0.8.11</li>
<li><a href="https://github.com/jacoco/jacoco/commit/06705305b207bc42b3369f2c4ade598e4c1d7b12"><code>0670530</code></a> Upgrade animal-sniffer-maven-plugin to 1.23</li>
<li><a href="https://github.com/jacoco/jacoco/commit/206e5bed2c4f11de6e6bf20e96a27c1f4ff93e0d"><code>206e5be</code></a> Restore exec file compatibility after upgrade of ASM to version 9.5 (<a href="https://redirect.github.com/jacoco/jacoco/issues/1492">#1492</a>)</li>
<li><a href="https://github.com/jacoco/jacoco/commit/36fc079692f9cf6dcc2b6f391cc0d7fbe2a90a7a"><code>36fc079</code></a> Update documentation: JDK version 21 is officially supported (<a href="https://redirect.github.com/jacoco/jacoco/issues/1520">#1520</a>)</li>
<li><a href="https://github.com/jacoco/jacoco/commit/7162917334d9094ebb10d66f9b0f971725ad754c"><code>7162917</code></a> Add validation tests for boolean expressions (<a href="https://redirect.github.com/jacoco/jacoco/issues/1505">#1505</a>)</li>
<li><a href="https://github.com/jacoco/jacoco/commit/4bc9267836e2aba546eb4638e16bfa28ddd365a9"><code>4bc9267</code></a> Fix link to Bytecode Outline Plug-In (<a href="https://redirect.github.com/jacoco/jacoco/issues/1519">#1519</a>)</li>
<li><a href="https://github.com/jacoco/jacoco/commit/ded62fc40764cc52003df527318fd081fafd6b75"><code>ded62fc</code></a> Upgrade ASM to 9.6 (<a href="https://redirect.github.com/jacoco/jacoco/issues/1518">#1518</a>)</li>
<li><a href="https://github.com/jacoco/jacoco/commit/67982608d01b57107db31d0e0e02dda325f2cb6e"><code>6798260</code></a> Fix links to ASM website (<a href="https://redirect.github.com/jacoco/jacoco/issues/1515">#1515</a>)</li>
<li><a href="https://github.com/jacoco/jacoco/commit/4ba332f69c3ed59b1453d8c7543842cab803e303"><code>4ba332f</code></a> Fix misleading outdated javadoc (<a href="https://redirect.github.com/jacoco/jacoco/issues/1513">#1513</a>)</li>
<li><a href="https://github.com/jacoco/jacoco/commit/7ca0f0f6e4003b108d980e837adf013e9c825da7"><code>7ca0f0f</code></a> <code>Opcodes.RET</code> should be processed by <code>visitVarInsn</code> instead of <code>visitInsn</code> (#...</li>
<li>Additional commits viewable in <a href="https://github.com/jacoco/jacoco/compare/v0.8.7...v0.8.11">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.jacoco:jacoco-maven-plugin&package-manager=maven&previous-version=0.8.7&new-version=0.8.11)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/pom.xml b/java/pom.xml
index 042488a5b949a..8a8d576320300 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -496,7 +496,7 @@
         <plugin>
           <groupId>org.jacoco</groupId>
           <artifactId>jacoco-maven-plugin</artifactId>
-          <version>0.8.7</version>
+          <version>0.8.11</version>
         </plugin>
 
         <!--This plugin's configuration is used to store Eclipse m2e settings
@@ -993,7 +993,7 @@
           <plugin>
             <groupId>org.jacoco</groupId>
             <artifactId>jacoco-maven-plugin</artifactId>
-            <version>0.8.7</version>
+            <version>0.8.11</version>
             <reportSets>
               <reportSet>
                 <inherited>false</inherited><!-- don't run aggregate in child modules -->

From 0d128c6d01ccf247c2922752e589c763599ded09 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Mon, 15 Jan 2024 20:12:15 -0500
Subject: [PATCH 212/570] GH-39601: [R] Don't download cmake when
 TEST_OFFLINE_BUILD=true (#39602)

See #39601

### Are these changes tested?

Existing CI should pass. This should also pass on macbuilder without downloading cmake, and if hardcoding `download_ok <- FALSE`, it should exit cleanly and informatively.

### Are there any user-facing changes?

Define "user".
* Closes: #39601

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/DESCRIPTION                    |  3 ++-
 r/tools/nixlibs.R                | 38 ++++++++++++++++++++++----------
 r/vignettes/developers/setup.Rmd |  3 +--
 3 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index b290a75f932d5..4acd21269cc49 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -27,7 +27,8 @@ URL: https://github.com/apache/arrow/, https://arrow.apache.org/docs/r/
 BugReports: https://github.com/apache/arrow/issues
 Encoding: UTF-8
 Language: en-US
-SystemRequirements: C++17; for AWS S3 support on Linux, libcurl and openssl (optional)
+SystemRequirements: C++17; for AWS S3 support on Linux, libcurl and openssl (optional);
+    cmake >= 3.16 (build-time only, and only for full source build)
 Biarch: true
 Imports:
     assertthat,
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index 9027aa227a074..cb664388094b0 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -79,6 +79,10 @@ find_latest_nightly <- function(description_version,
 }
 
 try_download <- function(from_url, to_file, hush = quietly) {
+  if (!download_ok) {
+    # Don't even try
+    return(FALSE)
+  }
   # We download some fairly large files, so ensure the timeout is set appropriately.
   # This assumes a static library size of 100 MB (generous) and a download speed
   # of .3 MB/s (slow). This is to anticipate slower user connections or load on
@@ -496,7 +500,7 @@ build_libarrow <- function(src_dir, dst_dir) {
     Sys.setenv(MAKEFLAGS = makeflags)
   }
   if (!quietly) {
-    lg("Building with MAKEFLAGS=", makeflags)
+    lg("Building with MAKEFLAGS=%s", makeflags)
   }
   # Check for libarrow build dependencies:
   # * cmake
@@ -595,7 +599,6 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") {
 
   if (is.null(cmake)) {
     # If not found, download it
-    lg("cmake", .indent = "****")
     CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.26.4")
     if (on_macos) {
       postfix <- "-macos-universal.tar.gz"
@@ -642,10 +645,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") {
       bin_dir,
       "/cmake"
     )
-  } else {
-    # Show which one we found
-    # Full source builds will always show "cmake" in the logs
-    lg("cmake: %s", cmake, .indent = "****")
+    lg("cmake %s", CMAKE_VERSION, .indent = "****")
   }
   cmake
 }
@@ -653,6 +653,8 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") {
 find_cmake <- function(paths = c(
                          Sys.getenv("CMAKE"),
                          Sys.which("cmake"),
+                         # CRAN has it here, not on PATH
+                         if (on_macos) "/Applications/CMake.app/Contents/bin/cmake",
                          Sys.which("cmake3")
                        ),
                        version_required = "3.16") {
@@ -660,10 +662,25 @@ find_cmake <- function(paths = c(
   # version_required should be a string or packageVersion; numeric version
   # can be misleading (e.g. 3.10 is actually 3.1)
   for (path in paths) {
-    if (nzchar(path) && cmake_version(path) >= version_required) {
+    if (nzchar(path) && file.exists(path)) {
       # Sys.which() returns a named vector, but that plays badly with c() later
       names(path) <- NULL
-      return(path)
+      found_version <- cmake_version(path)
+      if (found_version >= version_required) {
+        # Show which one we found
+        lg("cmake %s: %s", found_version, path, .indent = "****")
+        # Stop searching here
+        return(path)
+      } else {
+        # Keep trying
+        lg("Not using cmake found at %s", path, .indent = "****")
+        if (found_version > 0) {
+          lg("Version >= %s required; found %s", version_required, found_version, .indent = "*****")
+        } else {
+          # If cmake_version() couldn't determine version, it returns 0
+          lg("Could not determine version; >= %s required", version_required, .indent = "*****")
+        }
+      }
     }
   }
   # If none found, return NULL
@@ -890,11 +907,8 @@ if (not_cran || on_macos) {
 # and don't fall back to a full source build
 build_ok <- !env_is("LIBARROW_BUILD", "false")
 
-# Check if we're authorized to download (not asked an offline build).
-# (Note that cmake will still be downloaded if necessary
-#  https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds)
+# Check if we're authorized to download
 download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true")
-
 download_libarrow_ok <- download_ok && !env_is("LIBARROW_DOWNLOAD", "false")
 
 # This "tools/thirdparty_dependencies" path, within the tar file, might exist if
diff --git a/r/vignettes/developers/setup.Rmd b/r/vignettes/developers/setup.Rmd
index 8e7cff7410473..119bc78419410 100644
--- a/r/vignettes/developers/setup.Rmd
+++ b/r/vignettes/developers/setup.Rmd
@@ -281,11 +281,10 @@ withr::with_makevars(list(CPPFLAGS = "", LDFLAGS = ""), remotes::install_github(
   environment variables that determine how the build works and what features
   get built.
 * `TEST_OFFLINE_BUILD`: When set to `true`, the build script will not download
-  prebuilt the C++ library binary.
+  prebuilt the C++ library binary or, if needed, `cmake`.
   It will turn off any features that require a download, unless they're available
   in `ARROW_THIRDPARTY_DEPENDENCY_DIR` or the `tools/thirdparty_download/` subfolder.
   `create_package_with_all_dependencies()` creates that subfolder.
-  Regardless of this flag's value, `cmake` will be downloaded if it's unavailable.
 
 # Troubleshooting
 

From 1df2e4ac828fe1fc1034e47bc5233cb312337bd4 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Mon, 15 Jan 2024 21:35:58 -0600
Subject: [PATCH 213/570] GH-39584: [R] fallback to source gracefully (#39587)

### Rationale for this change

Resolves #39584

### What changes are included in this PR?

We now only check the checksum after the download succeeded, and try to be quieter about it when we do. We also use bundled boost and lz4 source on macos by default (to avoid system versions of each on cran that seem to have issues)

### Are these changes tested?

I submitted a download-malignant (and verbose) build to [CRAN's macbuilder](https://mac.r-project.org/macbuilder/results/1705088784-991a5beacf4ec26e/) and it succeeds.

### Are there any user-facing changes?

In principle the macos source build is slightly altered + we have a cleaner path when file downloads fail. But both of these should be relatively non-impactful since most macos users are getting binaries from CRAN. Most importantly it helps us stay on CRAN.

**This PR contains a "Critical Fix".**
* Closes: #39584

Lead-authored-by: Jonathan Keane <jkeane@gmail.com>
Co-authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/inst/build_arrow_static.sh |   2 +
 r/tools/nixlibs.R            | 101 +++++++++++++++++++++++------------
 2 files changed, 70 insertions(+), 33 deletions(-)

diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh
index 9c9fadea4757b..d28cbcb08fbec 100755
--- a/r/inst/build_arrow_static.sh
+++ b/r/inst/build_arrow_static.sh
@@ -74,6 +74,8 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
     -DARROW_DATASET=${ARROW_DATASET:-ON} \
     -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
     -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \
+    -DBoost_SOURCE=${Boost_SOURCE:-} \
+    -Dlz4_SOURCE=${lz4_SOURCE:-} \
     -DARROW_FILESYSTEM=ON \
     -DARROW_GCS=${ARROW_GCS:-$ARROW_DEFAULT_PARAM} \
     -DARROW_JEMALLOC=${ARROW_JEMALLOC:-$ARROW_DEFAULT_PARAM} \
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index cb664388094b0..dfe379ebe20df 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -100,18 +100,7 @@ try_download <- function(from_url, to_file, hush = quietly) {
   !inherits(status, "try-error") && status == 0
 }
 
-download_binary <- function(lib) {
-  libfile <- paste0("arrow-", VERSION, ".zip")
-  binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip")
-  if (try_download(binary_url, libfile)) {
-      lg("Successfully retrieved libarrow (%s)", lib)
-  } else {
-      lg(
-        "Downloading libarrow failed for version %s (%s)\n    at %s",
-        VERSION, lib, binary_url
-      )
-    libfile <- NULL
-  }
+validate_checksum <- function(binary_url, libfile, hush = quietly) {
   # Explicitly setting the env var to "false" will skip checksum validation
   # e.g. in case the included checksums are stale.
   skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false")
@@ -120,33 +109,66 @@ download_binary <- function(lib) {
   # validate binary checksum for CRAN release only
   if (!skip_checksum && dir.exists(checksum_path) && is_release ||
     enforce_checksum) {
+    # Munge the path to the correct sha file which we include during the
+    # release process
     checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url)
     checksum_file <- file.path(checksum_path, checksum_file)
-    checksum_cmd <- "shasum"
-    checksum_args <- c("--status", "-a", "512", "-c", checksum_file)
-
-    # shasum is not available on all linux versions
-    status_shasum <- try(
-      suppressWarnings(
-        system2("shasum", args = c("--help"), stdout = FALSE, stderr = FALSE)
-      ),
-      silent = TRUE
-    )
 
-    if (inherits(status_shasum, "try-error") || is.integer(status_shasum) && status_shasum != 0) {
-      checksum_cmd <- "sha512sum"
-      checksum_args <- c("--status", "-c", checksum_file)
+    # Try `shasum`, and if that doesn't work, fall back to `sha512sum` if not found
+    # system2 doesn't generate an R error, so we can't use a tryCatch to
+    # move from shasum to sha512sum.
+    # The warnings from system2 if it fails pop up later in the log and thus are
+    # more confusing than they are helpful (so we suppress them)
+    checksum_ok <- suppressWarnings(system2(
+        "shasum",
+        args = c("--status", "-a", "512", "-c", checksum_file),
+        stdout = ifelse(quietly, FALSE, ""),
+        stderr = ifelse(quietly, FALSE, "")
+      )) == 0
+
+    if (!checksum_ok) {
+      checksum_ok <- suppressWarnings(system2(
+        "sha512sum",
+        args = c("--status", "-c", checksum_file),
+        stdout = ifelse(quietly, FALSE, ""),
+        stderr = ifelse(quietly, FALSE, "")
+      )) == 0
     }
 
-    checksum_ok <- system2(checksum_cmd, args = checksum_args)
-
-    if (checksum_ok != 0) {
-      lg("Checksum validation failed for libarrow: %s/%s", lib, libfile)
-      unlink(libfile)
-      libfile <- NULL
+    if (checksum_ok) {
+      lg("Checksum validated successfully for libarrow")
     } else {
-      lg("Checksum validated successfully for libarrow: %s/%s", lib, libfile)
+      lg("Checksum validation failed for libarrow")
+      unlink(libfile)
     }
+  } else {
+    checksum_ok <- TRUE
+  }
+
+  # Return whether the checksum was successful
+  checksum_ok
+}
+
+download_binary <- function(lib) {
+  libfile <- paste0("arrow-", VERSION, ".zip")
+  binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip")
+  if (try_download(binary_url, libfile) && validate_checksum(binary_url, libfile)) {
+    lg("Successfully retrieved libarrow (%s)", lib)
+  } else {
+    # If the download or checksum fail, we will set libfile to NULL this will
+    # normally result in a source build after this.
+    # TODO: should we condense these together and only call them when verbose?
+    lg(
+      "Unable to retrieve libarrow for version %s (%s)",
+      VERSION, lib
+    )
+    if (!quietly) {
+      lg(
+        "Attempted to download the libarrow binary from: %s",
+        binary_url
+      )
+    }
+    libfile <- NULL
   }
 
   libfile
@@ -468,7 +490,7 @@ env_vars_as_string <- function(env_var_list) {
   stopifnot(
     length(env_var_list) == length(names(env_var_list)),
     all(grepl("^[^0-9]", names(env_var_list))),
-    all(grepl("^[A-Z0-9_]+$", names(env_var_list))),
+    all(grepl("^[a-zA-Z0-9_]+$", names(env_var_list))),
     !any(grepl("'", env_var_list, fixed = TRUE))
   )
   env_var_string <- paste0(names(env_var_list), "='", env_var_list, "'", collapse = " ")
@@ -543,6 +565,19 @@ build_libarrow <- function(src_dir, dst_dir) {
     env_var_list <- c(env_var_list, ARROW_DEPENDENCY_SOURCE = "BUNDLED")
   }
 
+  # On macOS, if not otherwise set, let's override Boost_SOURCE to be bundled 
+  # Necessary due to #39590 for CRAN 
+  if (on_macos) {
+    # Using lowercase (e.g. Boost_SOURCE) to match the cmake args we use already.
+    deps_to_bundle <- c("Boost", "lz4")
+    for (dep_to_bundle in deps_to_bundle) {
+      env_var <- paste0(dep_to_bundle, "_SOURCE")
+      if (Sys.getenv(env_var) == "") {
+        env_var_list <- c(env_var_list, setNames("BUNDLED", env_var))
+      }
+    }
+  }
+
   env_var_list <- with_cloud_support(env_var_list)
 
   # turn_off_all_optional_features() needs to happen after

From ac50918a63f13088a5ea9e6c27a9fadbbb19d53f Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Tue, 16 Jan 2024 04:38:55 +0100
Subject: [PATCH 214/570] GH-39624: [R][CI] Add CMake to docker file and update
 envvars (#39625)

### Rationale for this change

CMake is now a sysreq and we don't want to default to using nightly builds in CI

### Are these changes tested?

Crossbos
* Closes: #39624

Authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 ci/scripts/r_docker_configure.sh | 5 +++--
 docker-compose.yml               | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh
index 1cbd5f0b5ea96..52db2e6df6611 100755
--- a/ci/scripts/r_docker_configure.sh
+++ b/ci/scripts/r_docker_configure.sh
@@ -91,8 +91,9 @@ if [ -f "${ARROW_SOURCE_HOME}/ci/scripts/r_install_system_dependencies.sh" ]; th
   "${ARROW_SOURCE_HOME}/ci/scripts/r_install_system_dependencies.sh"
 fi
 
-# Install rsync for bundling cpp source and curl to make sure it is installed on all images
-$PACKAGE_MANAGER install -y rsync curl
+# Install rsync for bundling cpp source and curl to make sure it is installed on all images,
+# cmake is now a listed sys req.
+$PACKAGE_MANAGER install -y rsync cmake curl
 
 # Workaround for html help install failure; see https://github.com/r-lib/devtools/issues/2084#issuecomment-530912786
 Rscript -e 'x <- file.path(R.home("doc"), "html"); if (!file.exists(x)) {dir.create(x, recursive=TRUE); file.copy(system.file("html/R.css", package="stats"), x)}'
diff --git a/docker-compose.yml b/docker-compose.yml
index 39cd473c2741b..14eff67f38971 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1414,6 +1414,7 @@ services:
       ARROW_DEPENDENCY_SOURCE: ''
       ARROW_SOURCE_HOME: '/arrow'
       FORCE_BUNDLED_BUILD: 'true'
+      LIBARROW_BINARY: 'false'
       LIBARROW_BUILD: 'true'
     extends: ubuntu-r
     command: >
@@ -1443,7 +1444,7 @@ services:
     shm_size: *shm-size
     environment:
       <<: [*common, *sccache]
-      LIBARROW_DOWNLOAD: "false"
+      LIBARROW_BINARY: "false"
       ARROW_SOURCE_HOME: "/arrow"
       ARROW_R_DEV: ${ARROW_R_DEV}
       # To test for CRAN release, delete ^^ these two env vars so we download the Apache release

From eec4942d9ceb7d6c5632584a53363c627f8fd599 Mon Sep 17 00:00:00 2001
From: Dominik Moritz <domoritz@apache.org>
Date: Tue, 16 Jan 2024 07:22:16 -0500
Subject: [PATCH 215/570] GH-39604: [JS] Do not use resizable buffers yet
 (#39607)

---
 js/src/builder/buffer.ts | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/js/src/builder/buffer.ts b/js/src/builder/buffer.ts
index 18c6dcda738b9..ad1c06b0d9f0f 100644
--- a/js/src/builder/buffer.ts
+++ b/js/src/builder/buffer.ts
@@ -27,30 +27,17 @@ function roundLengthUpToNearest64Bytes(len: number, BPE: number) {
 
 /** @ignore */
 function resizeArray<T extends TypedArray | BigIntArray>(arr: T, len = 0): T {
-    // TODO: remove when https://github.com/microsoft/TypeScript/issues/54636 is fixed
-    const buffer = arr.buffer as ArrayBufferLike & { resizable: boolean; resize: (byteLength: number) => void; maxByteLength: number };
-    const byteLength = len * arr.BYTES_PER_ELEMENT;
-    if (buffer.resizable && byteLength <= buffer.maxByteLength) {
-        buffer.resize(byteLength);
-        return arr;
-    }
-
-    // Fallback for non-resizable buffers
     return arr.length >= len ?
         arr.subarray(0, len) as T :
         memcpy(new (arr.constructor as any)(len), arr, 0);
 }
 
-/** @ignore */
-export const SAFE_ARRAY_SIZE = 2 ** 32 - 1;
-
 /** @ignore */
 export class BufferBuilder<T extends TypedArray | BigIntArray> {
 
     constructor(bufferType: ArrayCtor<T>, initialSize = 0, stride = 1) {
         this.length = Math.ceil(initialSize / stride);
-        // TODO: remove as any when https://github.com/microsoft/TypeScript/issues/54636 is fixed
-        this.buffer = new bufferType(new (ArrayBuffer as any)(this.length * bufferType.BYTES_PER_ELEMENT, { maxByteLength: SAFE_ARRAY_SIZE })) as T;
+        this.buffer = new bufferType(this.length) as T;
         this.stride = stride;
         this.BYTES_PER_ELEMENT = bufferType.BYTES_PER_ELEMENT;
         this.ArrayType = bufferType;
@@ -94,8 +81,7 @@ export class BufferBuilder<T extends TypedArray | BigIntArray> {
     }
     public clear() {
         this.length = 0;
-        // TODO: remove as any when https://github.com/microsoft/TypeScript/issues/54636 is fixed
-        this.buffer = new this.ArrayType(new (ArrayBuffer as any)(0, { maxByteLength: SAFE_ARRAY_SIZE })) as T;
+        this.buffer = new this.ArrayType() as T;
         return this;
     }
     protected _resize(newLength: number) {

From d6a8305c3c52abd6c1ed65b6081dea323a4d0247 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Tue, 16 Jan 2024 04:36:35 -0800
Subject: [PATCH 216/570] GH-39598: [C#] Fix verification script (#39605)

### What changes are included in this PR?

The verification script is modified to look for the versions of .NET now supported by the package.

### Are these changes tested?

Manually tested the verification command.

* Closes: #39598

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 dev/release/verify-release-candidate.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index ab5c476768ed5..c5e27d083013e 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -878,10 +878,10 @@ test_csharp() {
   fi
 
   if [ "${SOURCE_KIND}" = "local" ]; then
-    echo "Skipping sourelink verification on local build"
+    echo "Skipping sourcelink verification on local build"
   else
-    dotnet tool run sourcelink test artifacts/Apache.Arrow/Release/netstandard1.3/Apache.Arrow.pdb
-    dotnet tool run sourcelink test artifacts/Apache.Arrow/Release/netcoreapp3.1/Apache.Arrow.pdb
+    dotnet tool run sourcelink test artifacts/Apache.Arrow/Release/netstandard2.0/Apache.Arrow.pdb
+    dotnet tool run sourcelink test artifacts/Apache.Arrow/Release/net6.0/Apache.Arrow.pdb
   fi
 
   popd

From b233b0195de1b532fb92b4dad039b17b9427f428 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jan 2024 08:54:55 -0500
Subject: [PATCH 217/570] MINOR: [Java] Bump dep.slf4j.version from 1.7.5 to
 2.0.11 in /java (#39613)

Bumps `dep.slf4j.version` from 1.7.5 to 2.0.11.
Updates `org.slf4j:jcl-over-slf4j` from 1.7.5 to 2.0.11

Updates `org.slf4j:slf4j-api` from 2.0.9 to 2.0.11

Updates `org.slf4j:jul-to-slf4j` from 2.0.9 to 2.0.11

Updates `org.slf4j:log4j-over-slf4j` from 2.0.9 to 2.0.11

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/maven/pom.xml | 2 +-
 java/pom.xml       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 56f3c4c434f64..6e8a4cb0102f6 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -240,7 +240,7 @@
           <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>jcl-over-slf4j</artifactId>
-            <version>1.7.5</version>
+            <version>2.0.11</version>
           </dependency>
         </dependencies>
         <executions>
diff --git a/java/pom.xml b/java/pom.xml
index 8a8d576320300..147f08fe57030 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -31,7 +31,7 @@
     <target.gen.source.path>${project.build.directory}/generated-sources</target.gen.source.path>
     <dep.junit.platform.version>1.9.0</dep.junit.platform.version>
     <dep.junit.jupiter.version>5.10.1</dep.junit.jupiter.version>
-    <dep.slf4j.version>2.0.9</dep.slf4j.version>
+    <dep.slf4j.version>2.0.11</dep.slf4j.version>
     <dep.guava-bom.version>33.0.0-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.104.Final</dep.netty-bom.version>
     <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>

From a2be302b4f499b39b3d281b677c75e8f640ad7c6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Jan 2024 08:55:52 -0500
Subject: [PATCH 218/570] MINOR: [Java] Bump org.apache.orc:orc-core from 1.7.6
 to 1.9.2 in /java (#39614)

Bumps org.apache.orc:orc-core from 1.7.6 to 1.9.2.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.orc:orc-core&package-manager=maven&previous-version=1.7.6&new-version=1.9.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/dataset/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index 7d6092743bf4d..bb5636b745490 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -120,7 +120,7 @@
         <dependency>
             <groupId>org.apache.orc</groupId>
             <artifactId>orc-core</artifactId>
-            <version>1.7.6</version>
+            <version>1.9.2</version>
             <scope>test</scope>
             <exclusions>
                 <exclusion>

From 4b3de819708d4de29247676bacb78f3e7a749a6f Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 16 Jan 2024 15:24:17 +0100
Subject: [PATCH 219/570] GH-39562: [C++][Parquet] Fix crash in
 test_parquet_dataset_lazy_filtering (#39632)

### Rationale for this change

`ParquetFileFragment` stores a `SchemaManifest` that has a raw pointer to a `SchemaDescriptor`. The `SchemaDescriptor` is originally provided by a `FileMetadata` instance but, in some cases, the `FileMetadata` instance can be destroyed while the `ParquetFileFragment` is still in use. This can typically lead to bugs or crashes.

### What changes are included in this PR?

Ensure that `ParquetFileFragment` keeps an owning pointer to the `FileMetadata` instance that provides its `SchemaManifest`'s schema descriptor.

### Are these changes tested?

An assertion is added that would fail deterministically in the Python test suite.

### Are there any user-facing changes?

No.

* Closes: #39562

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/dataset/file_parquet.cc | 14 +++++++++++---
 cpp/src/arrow/dataset/file_parquet.h  |  5 ++++-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 0ce08502921f3..140917a2e6341 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -813,11 +813,17 @@ Status ParquetFileFragment::EnsureCompleteMetadata(parquet::arrow::FileReader* r
 
 Status ParquetFileFragment::SetMetadata(
     std::shared_ptr<parquet::FileMetaData> metadata,
-    std::shared_ptr<parquet::arrow::SchemaManifest> manifest) {
+    std::shared_ptr<parquet::arrow::SchemaManifest> manifest,
+    std::shared_ptr<parquet::FileMetaData> original_metadata) {
   DCHECK(row_groups_.has_value());
 
   metadata_ = std::move(metadata);
   manifest_ = std::move(manifest);
+  original_metadata_ = original_metadata ? std::move(original_metadata) : metadata_;
+  // The SchemaDescriptor needs to be owned by a FileMetaData instance,
+  // because SchemaManifest only stores a raw pointer (GH-39562).
+  DCHECK_EQ(manifest_->descr, original_metadata_->schema())
+      << "SchemaDescriptor should be owned by the original FileMetaData";
 
   statistics_expressions_.resize(row_groups_->size(), compute::literal(true));
   statistics_expressions_complete_.resize(manifest_->descr->num_columns(), false);
@@ -846,7 +852,8 @@ Result<FragmentVector> ParquetFileFragment::SplitByRowGroup(
                           parquet_format_.MakeFragment(source_, partition_expression(),
                                                        physical_schema_, {row_group}));
 
-    RETURN_NOT_OK(fragment->SetMetadata(metadata_, manifest_));
+    RETURN_NOT_OK(fragment->SetMetadata(metadata_, manifest_,
+                                        /*original_metadata=*/original_metadata_));
     fragments[i++] = std::move(fragment);
   }
 
@@ -1106,7 +1113,8 @@ ParquetDatasetFactory::CollectParquetFragments(const Partitioning& partitioning)
         format_->MakeFragment({path, filesystem_}, std::move(partition_expression),
                               physical_schema_, std::move(row_groups)));
 
-    RETURN_NOT_OK(fragment->SetMetadata(metadata_subset, manifest_));
+    RETURN_NOT_OK(fragment->SetMetadata(metadata_subset, manifest_,
+                                        /*original_metadata=*/metadata_));
     fragments[i++] = std::move(fragment);
   }
 
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index 1e81a34fb3cf0..5141f36385e3f 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -188,7 +188,8 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
                       std::optional<std::vector<int>> row_groups);
 
   Status SetMetadata(std::shared_ptr<parquet::FileMetaData> metadata,
-                     std::shared_ptr<parquet::arrow::SchemaManifest> manifest);
+                     std::shared_ptr<parquet::arrow::SchemaManifest> manifest,
+                     std::shared_ptr<parquet::FileMetaData> original_metadata = {});
 
   // Overridden to opportunistically set metadata since a reader must be opened anyway.
   Result<std::shared_ptr<Schema>> ReadPhysicalSchemaImpl() override {
@@ -219,6 +220,8 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
   std::vector<bool> statistics_expressions_complete_;
   std::shared_ptr<parquet::FileMetaData> metadata_;
   std::shared_ptr<parquet::arrow::SchemaManifest> manifest_;
+  // The FileMetaData that owns the SchemaDescriptor pointed by SchemaManifest.
+  std::shared_ptr<parquet::FileMetaData> original_metadata_;
 
   friend class ParquetFileFormat;
   friend class ParquetDatasetFactory;

From 980e7d7ca2b4ea98b029d40cfe44588a07c85b4f Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Tue, 16 Jan 2024 17:04:15 +0100
Subject: [PATCH 220/570] GH-39628: [C++] Use -j1 for cmake >= 3.28 (#39629)

### Rationale for this change

Prevent 'bad file descriptor' issue.

### What changes are included in this PR?

Use -j1 for make on CMake >= 3.28

### Are these changes tested?
Crossbow
* Closes: #39628

Authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 6bb9c0f6af2ca..1f5cd3a2b4d5d 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1005,8 +1005,13 @@ if("${MAKE}" STREQUAL "")
   endif()
 endif()
 
-# Args for external projects using make.
-set(MAKE_BUILD_ARGS "-j${NPROC}")
+# Args for external projects using make
+if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.28")
+  # Prevent 'bad file descriptor' error see #39517 #39628
+  set(MAKE_BUILD_ARGS "-j1")
+else()
+  set(MAKE_BUILD_ARGS "-j${NPROC}")
+endif()
 
 include(FetchContent)
 set(FC_DECLARE_COMMON_OPTIONS)
@@ -2634,7 +2639,7 @@ macro(build_bzip2)
                       BUILD_IN_SOURCE 1
                       BUILD_COMMAND ${MAKE} libbz2.a ${MAKE_BUILD_ARGS}
                                     ${BZIP2_EXTRA_ARGS}
-                      INSTALL_COMMAND ${MAKE} install PREFIX=${BZIP2_PREFIX}
+                      INSTALL_COMMAND ${MAKE} install -j1 PREFIX=${BZIP2_PREFIX}
                                       ${BZIP2_EXTRA_ARGS}
                       INSTALL_DIR ${BZIP2_PREFIX}
                       URL ${ARROW_BZIP2_SOURCE_URL}

From cd3321b28b0c9703e5d7105d6146c1270bbadd7f Mon Sep 17 00:00:00 2001
From: "Rossi(Ruoxi) Sun" <zanmato1984@gmail.com>
Date: Wed, 17 Jan 2024 01:14:03 +0800
Subject: [PATCH 221/570] GH-39577: [C++] Fix tail-word access cross buffer
 boundary in `CompareBinaryColumnToRow` (#39606)

### Rationale for this change

Default buffer alignment (64b) doesn't guarantee the safety of tail-word access in  `KeyCompare::CompareBinaryColumnToRow`. Comment https://github.com/apache/arrow/issues/39577#issuecomment-1889090279 is a concrete example.

### What changes are included in this PR?

Make `KeyCompare::CompareBinaryColumnToRow` tail-word safe.

### Are these changes tested?

UT included.

### Are there any user-facing changes?

No.

* Closes: #39577

Authored-by: zanmato1984 <zanmato1984@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/CMakeLists.txt          |   3 +-
 cpp/src/arrow/compute/row/compare_internal.cc |  11 +-
 cpp/src/arrow/compute/row/compare_test.cc     | 110 ++++++++++++++++++
 3 files changed, 118 insertions(+), 6 deletions(-)
 create mode 100644 cpp/src/arrow/compute/row/compare_test.cc

diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt
index 1134e0a98ae45..e14d78ff6e5ca 100644
--- a/cpp/src/arrow/compute/CMakeLists.txt
+++ b/cpp/src/arrow/compute/CMakeLists.txt
@@ -89,7 +89,8 @@ add_arrow_test(internals_test
                kernel_test.cc
                light_array_test.cc
                registry_test.cc
-               key_hash_test.cc)
+               key_hash_test.cc
+               row/compare_test.cc)
 
 add_arrow_compute_test(expression_test SOURCES expression_test.cc)
 
diff --git a/cpp/src/arrow/compute/row/compare_internal.cc b/cpp/src/arrow/compute/row/compare_internal.cc
index 7c402e7a2384d..078a8287c71c0 100644
--- a/cpp/src/arrow/compute/row/compare_internal.cc
+++ b/cpp/src/arrow/compute/row/compare_internal.cc
@@ -208,8 +208,7 @@ void KeyCompare::CompareBinaryColumnToRow(uint32_t offset_within_row,
           // Non-zero length guarantees no underflow
           int32_t num_loops_less_one =
               static_cast<int32_t>(bit_util::CeilDiv(length, 8)) - 1;
-
-          uint64_t tail_mask = ~0ULL >> (64 - 8 * (length - num_loops_less_one * 8));
+          int32_t num_tail_bytes = length - num_loops_less_one * 8;
 
           const uint64_t* key_left_ptr =
               reinterpret_cast<const uint64_t*>(left_base + irow_left * length);
@@ -224,9 +223,11 @@ void KeyCompare::CompareBinaryColumnToRow(uint32_t offset_within_row,
             uint64_t key_right = key_right_ptr[i];
             result_or |= key_left ^ key_right;
           }
-          uint64_t key_left = util::SafeLoad(key_left_ptr + i);
-          uint64_t key_right = key_right_ptr[i];
-          result_or |= tail_mask & (key_left ^ key_right);
+          uint64_t key_left = 0;
+          memcpy(&key_left, key_left_ptr + i, num_tail_bytes);
+          uint64_t key_right = 0;
+          memcpy(&key_right, key_right_ptr + i, num_tail_bytes);
+          result_or |= key_left ^ key_right;
           return result_or == 0 ? 0xff : 0;
         });
   }
diff --git a/cpp/src/arrow/compute/row/compare_test.cc b/cpp/src/arrow/compute/row/compare_test.cc
new file mode 100644
index 0000000000000..1d8562cd56d3c
--- /dev/null
+++ b/cpp/src/arrow/compute/row/compare_test.cc
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <numeric>
+
+#include "arrow/compute/row/compare_internal.h"
+#include "arrow/testing/gtest_util.h"
+
+namespace arrow {
+namespace compute {
+
+using arrow::bit_util::BytesForBits;
+using arrow::internal::CpuInfo;
+using arrow::util::MiniBatch;
+using arrow::util::TempVectorStack;
+
+// Specialized case for GH-39577.
+TEST(KeyCompare, CompareColumnsToRowsCuriousFSB) {
+  int fsb_length = 9;
+  MemoryPool* pool = default_memory_pool();
+  TempVectorStack stack;
+  ASSERT_OK(stack.Init(pool, 8 * MiniBatch::kMiniBatchLength * sizeof(uint64_t)));
+
+  int num_rows = 7;
+  auto column_right = ArrayFromJSON(fixed_size_binary(fsb_length), R"([
+      "000000000",
+      "111111111",
+      "222222222",
+      "333333333",
+      "444444444",
+      "555555555",
+      "666666666"])");
+  ExecBatch batch_right({column_right}, num_rows);
+
+  std::vector<KeyColumnMetadata> column_metadatas_right;
+  ASSERT_OK(ColumnMetadatasFromExecBatch(batch_right, &column_metadatas_right));
+
+  RowTableMetadata table_metadata_right;
+  table_metadata_right.FromColumnMetadataVector(column_metadatas_right, sizeof(uint64_t),
+                                                sizeof(uint64_t));
+
+  std::vector<KeyColumnArray> column_arrays_right;
+  ASSERT_OK(ColumnArraysFromExecBatch(batch_right, &column_arrays_right));
+
+  RowTableImpl row_table;
+  ASSERT_OK(row_table.Init(pool, table_metadata_right));
+
+  RowTableEncoder row_encoder;
+  row_encoder.Init(column_metadatas_right, sizeof(uint64_t), sizeof(uint64_t));
+  row_encoder.PrepareEncodeSelected(0, num_rows, column_arrays_right);
+
+  std::vector<uint16_t> row_ids_right(num_rows);
+  std::iota(row_ids_right.begin(), row_ids_right.end(), 0);
+  ASSERT_OK(row_encoder.EncodeSelected(&row_table, num_rows, row_ids_right.data()));
+
+  auto column_left = ArrayFromJSON(fixed_size_binary(fsb_length), R"([
+      "000000000",
+      "111111111",
+      "222222222",
+      "333333333",
+      "444444444",
+      "555555555",
+      "777777777"])");
+  ExecBatch batch_left({column_left}, num_rows);
+  std::vector<KeyColumnArray> column_arrays_left;
+  ASSERT_OK(ColumnArraysFromExecBatch(batch_left, &column_arrays_left));
+
+  std::vector<uint32_t> row_ids_left(num_rows);
+  std::iota(row_ids_left.begin(), row_ids_left.end(), 0);
+
+  LightContext ctx{CpuInfo::GetInstance()->hardware_flags(), &stack};
+
+  {
+    uint32_t num_rows_no_match;
+    std::vector<uint16_t> row_ids_out(num_rows);
+    KeyCompare::CompareColumnsToRows(num_rows, NULLPTR, row_ids_left.data(), &ctx,
+                                     &num_rows_no_match, row_ids_out.data(),
+                                     column_arrays_left, row_table, true, NULLPTR);
+    ASSERT_EQ(num_rows_no_match, 1);
+    ASSERT_EQ(row_ids_out[0], 6);
+  }
+
+  {
+    std::vector<uint8_t> match_bitvector(BytesForBits(num_rows));
+    KeyCompare::CompareColumnsToRows(num_rows, NULLPTR, row_ids_left.data(), &ctx,
+                                     NULLPTR, NULLPTR, column_arrays_left, row_table,
+                                     true, match_bitvector.data());
+    for (int i = 0; i < num_rows; ++i) {
+      SCOPED_TRACE(i);
+      ASSERT_EQ(arrow::bit_util::GetBit(match_bitvector.data(), i), i != 6);
+    }
+  }
+}
+
+}  // namespace compute
+}  // namespace arrow

From f55c0d75ce3ca2e52aa7c410b1a71642d40784d9 Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Wed, 17 Jan 2024 05:09:53 +0100
Subject: [PATCH 222/570] GH-39626: [Docs][R] Update NEWS.md for 15.0.0
 (#39627)

### What changes are included in this PR?

Update NEWS.md with things not included in 14.0.2.

* Closes: #39626

Lead-authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Co-authored-by: Bryce Mecum <petridish@gmail.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/NEWS.md | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/r/NEWS.md b/r/NEWS.md
index 1744e6e96e936..9badf4700a36e 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -19,6 +19,35 @@
 
 # arrow 14.0.2.9000
 
+##  New features
+
+* Bindings for `base::prod` have been added so you can now use it in your dplyr
+  pipelines (i.e., `tbl |> summarize(prod(col))`) without having to pull the
+  data into R (@m-muecke, #38601).
+* Calling `dimnames` or `colnames` on `Dataset` objects now returns a useful
+  result rather than just `NULL` (#38377).
+* The `code()` method on Schema objects now takes an optional `namespace` 
+  argument which, when `TRUE`, prefixes names with `arrow::` which makes
+  the output more portable (@orgadish, #38144).
+
+##  Minor improvements and fixes
+
+* Don't download cmake when TEST_OFFLINE_BUILD=true and update `SystemRequirements` (#39602).
+* Fallback to source build gracefully if binary download fails (#39587).
+* An error is now thrown instead of warning and pulling the data into R when any
+  of `sub`, `gsub`, `stringr::str_replace`, `stringr::str_replace_all` are
+  passed a length > 1 vector of values in `pattern` (@abfleishman, #39219).
+* Missing documentation was added to `?open_dataset` documenting how to use the
+  ND-JSON support added in arrow 13.0.0 (@Divyansh200102, #38258).
+* To make debugging problems easier when using arrow with AWS S3
+  (e.g., `s3_bucket`, `S3FileSystem`), the debug log level for S3 can be set
+  with the `AWS_S3_LOG_LEVEL` environment variable. 
+  See `?S3FileSystem` for more information. (#38267)
+* Using arrow with duckdb (i.e., `to_duckdb()`) no longer results in warnings
+  when quitting your R session. (#38495)
+* A large number of minor spelling mistakes were fixed (@jsoref, #38929, #38257)
+* The developer documentation has been updated to match changes made in recent releases (#38220)
+
 # arrow 14.0.2
 
 ##  Minor improvements and fixes

From 6eeee3b769f3ad6c724305b4182526307ab025d5 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Wed, 17 Jan 2024 11:12:41 +0100
Subject: [PATCH 223/570] GH-36412: [Python][CI] Fix extra deprecation warnings
 in the pandas nightly build (#39609)

Fixes left deprecation warnings coming from the pandas development version, by updating our test code to avoid the deprecated patterns.
* Closes: #36412

Lead-authored-by: AlenkaF <frim.alenka@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/pandas_compat.py               | 15 +------
 python/pyarrow/tests/parquet/test_datetime.py |  4 +-
 python/pyarrow/tests/test_compute.py          |  6 +--
 python/pyarrow/tests/test_dataset.py          |  6 ++-
 python/pyarrow/tests/test_pandas.py           | 42 +++++++++++--------
 5 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 39dee85492400..61e6318e29c24 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -967,20 +967,9 @@ def _extract_index_level(table, result_table, field_name,
         # The serialized index column was removed by the user
         return result_table, None, None
 
-    pd = _pandas_api.pd
-
     col = table.column(i)
-    values = col.to_pandas(types_mapper=types_mapper).values
-
-    if hasattr(values, 'flags') and not values.flags.writeable:
-        # ARROW-1054: in pandas 0.19.2, factorize will reject
-        # non-writeable arrays when calling MultiIndex.from_arrays
-        values = values.copy()
-
-    if isinstance(col.type, pa.lib.TimestampType) and col.type.tz is not None:
-        index_level = make_tz_aware(pd.Series(values, copy=False), col.type.tz)
-    else:
-        index_level = pd.Series(values, dtype=values.dtype, copy=False)
+    index_level = col.to_pandas(types_mapper=types_mapper)
+    index_level.name = None
     result_table = result_table.remove_column(
         result_table.schema.get_field_index(field_name)
     )
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index 6a9cbd4f73d4f..0896eb37e6473 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -116,7 +116,7 @@ def test_coerce_timestamps(tempdir):
     df_expected = df.copy()
     for i, x in enumerate(df_expected['datetime64']):
         if isinstance(x, np.ndarray):
-            df_expected['datetime64'][i] = x.astype('M8[us]')
+            df_expected.loc[i, 'datetime64'] = x.astype('M8[us]')
 
     tm.assert_frame_equal(df_expected, df_read)
 
@@ -429,7 +429,7 @@ def test_noncoerced_nanoseconds_written_without_exception(tempdir):
     # nanosecond timestamps by default
     n = 9
     df = pd.DataFrame({'x': range(n)},
-                      index=pd.date_range('2017-01-01', freq='1n', periods=n))
+                      index=pd.date_range('2017-01-01', freq='ns', periods=n))
     tb = pa.Table.from_pandas(df)
 
     filename = tempdir / 'written.parquet'
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index d1eb605c71881..34d4da580f526 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -2360,10 +2360,10 @@ def _check_temporal_rounding(ts, values, unit):
     unit_shorthand = {
         "nanosecond": "ns",
         "microsecond": "us",
-        "millisecond": "L",
+        "millisecond": "ms",
         "second": "s",
         "minute": "min",
-        "hour": "H",
+        "hour": "h",
         "day": "D"
     }
     greater_unit = {
@@ -2371,7 +2371,7 @@ def _check_temporal_rounding(ts, values, unit):
         "microsecond": "ms",
         "millisecond": "s",
         "second": "min",
-        "minute": "H",
+        "minute": "h",
         "hour": "d",
     }
     ta = pa.array(ts)
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index ae2146c0bdaee..d473299f20320 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -178,12 +178,14 @@ def multisourcefs(request):
 
     # simply split the dataframe into four chunks to construct a data source
     # from each chunk into its own directory
-    df_a, df_b, df_c, df_d = np.array_split(df, 4)
+    n = len(df)
+    df_a, df_b, df_c, df_d = [df.iloc[i:i+n//4] for i in range(0, n, n//4)]
 
     # create a directory containing a flat sequence of parquet files without
     # any partitioning involved
     mockfs.create_dir('plain')
-    for i, chunk in enumerate(np.array_split(df_a, 10)):
+    n = len(df_a)
+    for i, chunk in enumerate([df_a.iloc[i:i+n//10] for i in range(0, n, n//10)]):
         path = 'plain/chunk-{}.parquet'.format(i)
         with mockfs.open_output_stream(path) as out:
             pq.write_table(_table_from_pandas(chunk), out)
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index d15ee82d5dbf1..8106219057efe 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -113,6 +113,10 @@ def _check_pandas_roundtrip(df, expected=None, use_threads=False,
     if expected is None:
         expected = df
 
+        for col in expected.columns:
+            if expected[col].dtype == 'object':
+                expected[col] = expected[col].replace({np.nan: None})
+
     with warnings.catch_warnings():
         warnings.filterwarnings(
             "ignore", "elementwise comparison failed", DeprecationWarning)
@@ -152,6 +156,9 @@ def _check_array_roundtrip(values, expected=None, mask=None,
             expected = pd.Series(values).copy()
             expected[mask.copy()] = None
 
+        if expected.dtype == 'object':
+            expected = expected.replace({np.nan: None})
+
     tm.assert_series_equal(pd.Series(result), expected, check_names=False)
 
 
@@ -478,7 +485,7 @@ def test_mixed_column_names(self):
                                         preserve_index=True)
 
     def test_binary_column_name(self):
-        if Version("2.0.0") <= Version(pd.__version__) < Version("2.3.0"):
+        if Version("2.0.0") <= Version(pd.__version__) < Version("3.0.0"):
             # TODO: regression in pandas, hopefully fixed in next version
             # https://issues.apache.org/jira/browse/ARROW-18394
             # https://github.com/pandas-dev/pandas/issues/50127
@@ -3108,7 +3115,7 @@ def _fully_loaded_dataframe_example():
 
 @pytest.mark.parametrize('columns', ([b'foo'], ['foo']))
 def test_roundtrip_with_bytes_unicode(columns):
-    if Version("2.0.0") <= Version(pd.__version__) < Version("2.3.0"):
+    if Version("2.0.0") <= Version(pd.__version__) < Version("3.0.0"):
         # TODO: regression in pandas, hopefully fixed in next version
         # https://issues.apache.org/jira/browse/ARROW-18394
         # https://github.com/pandas-dev/pandas/issues/50127
@@ -3491,7 +3498,7 @@ def test_table_from_pandas_schema_field_order_metadata():
     # ensure that a different field order in specified schema doesn't
     # mangle metadata
     df = pd.DataFrame({
-        "datetime": pd.date_range("2020-01-01T00:00:00Z", freq="H", periods=2),
+        "datetime": pd.date_range("2020-01-01T00:00:00Z", freq="h", periods=2),
         "float": np.random.randn(2)
     })
 
@@ -4181,8 +4188,6 @@ def _Int64Dtype__from_arrow__(self, array):
 
 
 def test_convert_to_extension_array(monkeypatch):
-    import pandas.core.internals as _int
-
     # table converted from dataframe with extension types (so pandas_metadata
     # has this information)
     df = pd.DataFrame(
@@ -4193,16 +4198,15 @@ def test_convert_to_extension_array(monkeypatch):
     # Int64Dtype is recognized -> convert to extension block by default
     # for a proper roundtrip
     result = table.to_pandas()
-    assert not isinstance(_get_mgr(result).blocks[0], _int.ExtensionBlock)
     assert _get_mgr(result).blocks[0].values.dtype == np.dtype("int64")
-    assert isinstance(_get_mgr(result).blocks[1], _int.ExtensionBlock)
+    assert _get_mgr(result).blocks[1].values.dtype == pd.Int64Dtype()
     tm.assert_frame_equal(result, df)
 
     # test with missing values
     df2 = pd.DataFrame({'a': pd.array([1, 2, None], dtype='Int64')})
     table2 = pa.table(df2)
     result = table2.to_pandas()
-    assert isinstance(_get_mgr(result).blocks[0], _int.ExtensionBlock)
+    assert _get_mgr(result).blocks[0].values.dtype == pd.Int64Dtype()
     tm.assert_frame_equal(result, df2)
 
     # monkeypatch pandas Int64Dtype to *not* have the protocol method
@@ -4215,7 +4219,7 @@ def test_convert_to_extension_array(monkeypatch):
     # Int64Dtype has no __from_arrow__ -> use normal conversion
     result = table.to_pandas()
     assert len(_get_mgr(result).blocks) == 1
-    assert not isinstance(_get_mgr(result).blocks[0], _int.ExtensionBlock)
+    assert _get_mgr(result).blocks[0].values.dtype == np.dtype("int64")
 
 
 class MyCustomIntegerType(pa.ExtensionType):
@@ -4233,8 +4237,6 @@ def to_pandas_dtype(self):
 
 def test_conversion_extensiontype_to_extensionarray(monkeypatch):
     # converting extension type to linked pandas ExtensionDtype/Array
-    import pandas.core.internals as _int
-
     storage = pa.array([1, 2, 3, 4], pa.int64())
     arr = pa.ExtensionArray.from_storage(MyCustomIntegerType(), storage)
     table = pa.table({'a': arr})
@@ -4242,12 +4244,12 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
     # extension type points to Int64Dtype, which knows how to create a
     # pandas ExtensionArray
     result = arr.to_pandas()
-    assert isinstance(_get_mgr(result).blocks[0], _int.ExtensionBlock)
+    assert _get_mgr(result).blocks[0].values.dtype == pd.Int64Dtype()
     expected = pd.Series([1, 2, 3, 4], dtype='Int64')
     tm.assert_series_equal(result, expected)
 
     result = table.to_pandas()
-    assert isinstance(_get_mgr(result).blocks[0], _int.ExtensionBlock)
+    assert _get_mgr(result).blocks[0].values.dtype == pd.Int64Dtype()
     expected = pd.DataFrame({'a': pd.array([1, 2, 3, 4], dtype='Int64')})
     tm.assert_frame_equal(result, expected)
 
@@ -4261,7 +4263,7 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
             pd.core.arrays.integer.NumericDtype, "__from_arrow__")
 
     result = arr.to_pandas()
-    assert not isinstance(_get_mgr(result).blocks[0], _int.ExtensionBlock)
+    assert _get_mgr(result).blocks[0].values.dtype == np.dtype("int64")
     expected = pd.Series([1, 2, 3, 4])
     tm.assert_series_equal(result, expected)
 
@@ -4312,10 +4314,14 @@ def test_array_to_pandas():
 def test_roundtrip_empty_table_with_extension_dtype_index():
     df = pd.DataFrame(index=pd.interval_range(start=0, end=3))
     table = pa.table(df)
-    table.to_pandas().index == pd.Index([{'left': 0, 'right': 1},
-                                         {'left': 1, 'right': 2},
-                                         {'left': 2, 'right': 3}],
-                                        dtype='object')
+    if Version(pd.__version__) > Version("1.0"):
+        tm.assert_index_equal(table.to_pandas().index, df.index)
+    else:
+        tm.assert_index_equal(table.to_pandas().index,
+                              pd.Index([{'left': 0, 'right': 1},
+                                        {'left': 1, 'right': 2},
+                                        {'left': 2, 'right': 3}],
+                                       dtype='object'))
 
 
 @pytest.mark.parametrize("index", ["a", ["a", "b"]])

From 96645ebc5037b6b4eab127c274f4871bbef99d77 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 17 Jan 2024 11:26:37 +0100
Subject: [PATCH 224/570] GH-39599: [Python] Avoid leaking references to Numpy
 dtypes (#39636)

### Rationale for this change

`PyArray_DescrFromScalar` returns a new reference, so we should be careful to decref it when we don't use it anymore.

### Are these changes tested?

No.

### Are there any user-facing changes?

No.
* Closes: #39599

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/array.pxi                      |  3 +-
 python/pyarrow/includes/libarrow_python.pxd   |  2 +-
 python/pyarrow/src/arrow/python/inference.cc  |  5 +-
 .../pyarrow/src/arrow/python/numpy_convert.cc | 77 ++++++++-----------
 .../pyarrow/src/arrow/python/numpy_convert.h  |  6 +-
 .../src/arrow/python/numpy_to_arrow.cc        | 11 ++-
 .../src/arrow/python/python_to_arrow.cc       |  6 +-
 python/pyarrow/types.pxi                      |  6 +-
 8 files changed, 48 insertions(+), 68 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 5c2d22aef1895..1416f5f4346d9 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -66,8 +66,7 @@ cdef shared_ptr[CDataType] _ndarray_to_type(object values,
     dtype = values.dtype
 
     if type is None and dtype != object:
-        with nogil:
-            check_status(NumPyDtypeToArrow(dtype, &c_type))
+        c_type = GetResultValue(NumPyDtypeToArrow(dtype))
 
     if type is not None:
         c_type = type.sp_type
diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd
index e3179062a1e52..906f0b7d28e59 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -73,7 +73,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
         object obj, object mask, const PyConversionOptions& options,
         CMemoryPool* pool)
 
-    CStatus NumPyDtypeToArrow(object dtype, shared_ptr[CDataType]* type)
+    CResult[shared_ptr[CDataType]] NumPyDtypeToArrow(object dtype)
 
     CStatus NdarrayToArrow(CMemoryPool* pool, object ao, object mo,
                            c_bool from_pandas,
diff --git a/python/pyarrow/src/arrow/python/inference.cc b/python/pyarrow/src/arrow/python/inference.cc
index 9537aec574470..10116f9afad69 100644
--- a/python/pyarrow/src/arrow/python/inference.cc
+++ b/python/pyarrow/src/arrow/python/inference.cc
@@ -468,10 +468,7 @@ class TypeInferrer {
     if (numpy_dtype_count_ > 0) {
       // All NumPy scalars and Nones/nulls
       if (numpy_dtype_count_ + none_count_ == total_count_) {
-        std::shared_ptr<DataType> type;
-        RETURN_NOT_OK(NumPyDtypeToArrow(numpy_unifier_.current_dtype(), &type));
-        *out = type;
-        return Status::OK();
+        return NumPyDtypeToArrow(numpy_unifier_.current_dtype()).Value(out);
       }
 
       // The "bad path": data contains a mix of NumPy scalars and
diff --git a/python/pyarrow/src/arrow/python/numpy_convert.cc b/python/pyarrow/src/arrow/python/numpy_convert.cc
index 49706807644d2..dfee88c092e65 100644
--- a/python/pyarrow/src/arrow/python/numpy_convert.cc
+++ b/python/pyarrow/src/arrow/python/numpy_convert.cc
@@ -59,12 +59,11 @@ NumPyBuffer::~NumPyBuffer() {
 
 #define TO_ARROW_TYPE_CASE(NPY_NAME, FACTORY) \
   case NPY_##NPY_NAME:                        \
-    *out = FACTORY();                         \
-    break;
+    return FACTORY();
 
 namespace {
 
-Status GetTensorType(PyObject* dtype, std::shared_ptr<DataType>* out) {
+Result<std::shared_ptr<DataType>> GetTensorType(PyObject* dtype) {
   if (!PyObject_TypeCheck(dtype, &PyArrayDescr_Type)) {
     return Status::TypeError("Did not pass numpy.dtype object");
   }
@@ -84,11 +83,8 @@ Status GetTensorType(PyObject* dtype, std::shared_ptr<DataType>* out) {
     TO_ARROW_TYPE_CASE(FLOAT16, float16);
     TO_ARROW_TYPE_CASE(FLOAT32, float32);
     TO_ARROW_TYPE_CASE(FLOAT64, float64);
-    default: {
-      return Status::NotImplemented("Unsupported numpy type ", descr->type_num);
-    }
   }
-  return Status::OK();
+  return Status::NotImplemented("Unsupported numpy type ", descr->type_num);
 }
 
 Status GetNumPyType(const DataType& type, int* type_num) {
@@ -120,15 +116,21 @@ Status GetNumPyType(const DataType& type, int* type_num) {
 
 }  // namespace
 
-Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out) {
+Result<std::shared_ptr<DataType>> NumPyScalarToArrowDataType(PyObject* scalar) {
+  PyArray_Descr* descr = PyArray_DescrFromScalar(scalar);
+  OwnedRef descr_ref(reinterpret_cast<PyObject*>(descr));
+  return NumPyDtypeToArrow(descr);
+}
+
+Result<std::shared_ptr<DataType>> NumPyDtypeToArrow(PyObject* dtype) {
   if (!PyObject_TypeCheck(dtype, &PyArrayDescr_Type)) {
     return Status::TypeError("Did not pass numpy.dtype object");
   }
   PyArray_Descr* descr = reinterpret_cast<PyArray_Descr*>(dtype);
-  return NumPyDtypeToArrow(descr, out);
+  return NumPyDtypeToArrow(descr);
 }
 
-Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
+Result<std::shared_ptr<DataType>> NumPyDtypeToArrow(PyArray_Descr* descr) {
   int type_num = fix_numpy_type_num(descr->type_num);
 
   switch (type_num) {
@@ -151,20 +153,15 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
           reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
       switch (date_dtype->meta.base) {
         case NPY_FR_s:
-          *out = timestamp(TimeUnit::SECOND);
-          break;
+          return timestamp(TimeUnit::SECOND);
         case NPY_FR_ms:
-          *out = timestamp(TimeUnit::MILLI);
-          break;
+          return timestamp(TimeUnit::MILLI);
         case NPY_FR_us:
-          *out = timestamp(TimeUnit::MICRO);
-          break;
+          return timestamp(TimeUnit::MICRO);
         case NPY_FR_ns:
-          *out = timestamp(TimeUnit::NANO);
-          break;
+          return timestamp(TimeUnit::NANO);
         case NPY_FR_D:
-          *out = date32();
-          break;
+          return date32();
         case NPY_FR_GENERIC:
           return Status::NotImplemented("Unbound or generic datetime64 time unit");
         default:
@@ -176,29 +173,22 @@ Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
           reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
       switch (timedelta_dtype->meta.base) {
         case NPY_FR_s:
-          *out = duration(TimeUnit::SECOND);
-          break;
+          return duration(TimeUnit::SECOND);
         case NPY_FR_ms:
-          *out = duration(TimeUnit::MILLI);
-          break;
+          return duration(TimeUnit::MILLI);
         case NPY_FR_us:
-          *out = duration(TimeUnit::MICRO);
-          break;
+          return duration(TimeUnit::MICRO);
         case NPY_FR_ns:
-          *out = duration(TimeUnit::NANO);
-          break;
+          return duration(TimeUnit::NANO);
         case NPY_FR_GENERIC:
           return Status::NotImplemented("Unbound or generic timedelta64 time unit");
         default:
           return Status::NotImplemented("Unsupported timedelta64 time unit");
       }
     } break;
-    default: {
-      return Status::NotImplemented("Unsupported numpy type ", descr->type_num);
-    }
   }
 
-  return Status::OK();
+  return Status::NotImplemented("Unsupported numpy type ", descr->type_num);
 }
 
 #undef TO_ARROW_TYPE_CASE
@@ -230,9 +220,8 @@ Status NdarrayToTensor(MemoryPool* pool, PyObject* ao,
     strides[i] = array_strides[i];
   }
 
-  std::shared_ptr<DataType> type;
-  RETURN_NOT_OK(
-      GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray)), &type));
+  ARROW_ASSIGN_OR_RAISE(
+      auto type, GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray))));
   *out = std::make_shared<Tensor>(type, data, shape, strides, dim_names);
   return Status::OK();
 }
@@ -435,9 +424,9 @@ Status NdarraysToSparseCOOTensor(MemoryPool* pool, PyObject* data_ao, PyObject*
 
   PyArrayObject* ndarray_data = reinterpret_cast<PyArrayObject*>(data_ao);
   std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(data_ao);
-  std::shared_ptr<DataType> type_data;
-  RETURN_NOT_OK(GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray_data)),
-                              &type_data));
+  ARROW_ASSIGN_OR_RAISE(
+      auto type_data,
+      GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray_data))));
 
   std::shared_ptr<Tensor> coords;
   RETURN_NOT_OK(NdarrayToTensor(pool, coords_ao, {}, &coords));
@@ -462,9 +451,9 @@ Status NdarraysToSparseCSXMatrix(MemoryPool* pool, PyObject* data_ao, PyObject*
 
   PyArrayObject* ndarray_data = reinterpret_cast<PyArrayObject*>(data_ao);
   std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(data_ao);
-  std::shared_ptr<DataType> type_data;
-  RETURN_NOT_OK(GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray_data)),
-                              &type_data));
+  ARROW_ASSIGN_OR_RAISE(
+      auto type_data,
+      GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray_data))));
 
   std::shared_ptr<Tensor> indptr, indices;
   RETURN_NOT_OK(NdarrayToTensor(pool, indptr_ao, {}, &indptr));
@@ -491,9 +480,9 @@ Status NdarraysToSparseCSFTensor(MemoryPool* pool, PyObject* data_ao, PyObject*
   const int ndim = static_cast<const int>(shape.size());
   PyArrayObject* ndarray_data = reinterpret_cast<PyArrayObject*>(data_ao);
   std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(data_ao);
-  std::shared_ptr<DataType> type_data;
-  RETURN_NOT_OK(GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray_data)),
-                              &type_data));
+  ARROW_ASSIGN_OR_RAISE(
+      auto type_data,
+      GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray_data))));
 
   std::vector<std::shared_ptr<Tensor>> indptr(ndim - 1);
   std::vector<std::shared_ptr<Tensor>> indices(ndim);
diff --git a/python/pyarrow/src/arrow/python/numpy_convert.h b/python/pyarrow/src/arrow/python/numpy_convert.h
index 10451077a221d..2d1086e135528 100644
--- a/python/pyarrow/src/arrow/python/numpy_convert.h
+++ b/python/pyarrow/src/arrow/python/numpy_convert.h
@@ -49,9 +49,11 @@ class ARROW_PYTHON_EXPORT NumPyBuffer : public Buffer {
 };
 
 ARROW_PYTHON_EXPORT
-Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out);
+Result<std::shared_ptr<DataType>> NumPyDtypeToArrow(PyObject* dtype);
 ARROW_PYTHON_EXPORT
-Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out);
+Result<std::shared_ptr<DataType>> NumPyDtypeToArrow(PyArray_Descr* descr);
+ARROW_PYTHON_EXPORT
+Result<std::shared_ptr<DataType>> NumPyScalarToArrowDataType(PyObject* scalar);
 
 ARROW_PYTHON_EXPORT Status NdarrayToTensor(MemoryPool* pool, PyObject* ao,
                                            const std::vector<std::string>& dim_names,
diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index 2727ce32f4494..8903df31be826 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -462,8 +462,7 @@ template <typename ArrowType>
 inline Status NumPyConverter::ConvertData(std::shared_ptr<Buffer>* data) {
   RETURN_NOT_OK(PrepareInputData<ArrowType>(data));
 
-  std::shared_ptr<DataType> input_type;
-  RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
+  ARROW_ASSIGN_OR_RAISE(auto input_type, NumPyDtypeToArrow(dtype_));
 
   if (!input_type->Equals(*type_)) {
     RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_, type_,
@@ -490,7 +489,7 @@ inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* d
       Status s = StaticCastBuffer<int64_t, int32_t>(**data, length_, pool_, data);
       RETURN_NOT_OK(s);
     } else {
-      RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
+      ARROW_ASSIGN_OR_RAISE(input_type, NumPyDtypeToArrow(dtype_));
       if (!input_type->Equals(*type_)) {
         // The null bitmap was already computed in VisitNative()
         RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
@@ -498,7 +497,7 @@ inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* d
       }
     }
   } else {
-    RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
+    ARROW_ASSIGN_OR_RAISE(input_type, NumPyDtypeToArrow(dtype_));
     if (!input_type->Equals(*type_)) {
       RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
                                type_, cast_options_, pool_, data));
@@ -531,7 +530,7 @@ inline Status NumPyConverter::ConvertData<Date64Type>(std::shared_ptr<Buffer>* d
       }
       *data = std::move(result);
     } else {
-      RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
+      ARROW_ASSIGN_OR_RAISE(input_type, NumPyDtypeToArrow(dtype_));
       if (!input_type->Equals(*type_)) {
         // The null bitmap was already computed in VisitNative()
         RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
@@ -539,7 +538,7 @@ inline Status NumPyConverter::ConvertData<Date64Type>(std::shared_ptr<Buffer>* d
       }
     }
   } else {
-    RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
+    ARROW_ASSIGN_OR_RAISE(input_type, NumPyDtypeToArrow(dtype_));
     if (!input_type->Equals(*type_)) {
       RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
                                type_, cast_options_, pool_, data));
diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index 23b92598e321e..d1d94ac17a13e 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -386,8 +386,7 @@ class PyValue {
       }
     } else if (PyArray_CheckAnyScalarExact(obj)) {
       // validate that the numpy scalar has np.datetime64 dtype
-      std::shared_ptr<DataType> numpy_type;
-      RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &numpy_type));
+      ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj));
       if (!numpy_type->Equals(*type)) {
         return Status::NotImplemented("Expected np.datetime64 but got: ",
                                       numpy_type->ToString());
@@ -466,8 +465,7 @@ class PyValue {
       }
     } else if (PyArray_CheckAnyScalarExact(obj)) {
       // validate that the numpy scalar has np.datetime64 dtype
-      std::shared_ptr<DataType> numpy_type;
-      RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &numpy_type));
+      ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj));
       if (!numpy_type->Equals(*type)) {
         return Status::NotImplemented("Expected np.timedelta64 but got: ",
                                       numpy_type->ToString());
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 912ee39f7d712..b6dc53d633543 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -5140,12 +5140,8 @@ def from_numpy_dtype(object dtype):
     >>> pa.from_numpy_dtype(np.str_)
     DataType(string)
     """
-    cdef shared_ptr[CDataType] c_type
     dtype = np.dtype(dtype)
-    with nogil:
-        check_status(NumPyDtypeToArrow(dtype, &c_type))
-
-    return pyarrow_wrap_data_type(c_type)
+    return pyarrow_wrap_data_type(GetResultValue(NumPyDtypeToArrow(dtype)))
 
 
 def is_boolean_value(object obj):

From f15a7002794865be9ffc6fe338c2fce4f259c476 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Wed, 17 Jan 2024 20:37:03 +0800
Subject: [PATCH 225/570] GH-39336: [C++][Parquet] Minor: Style enhancement for
 parquet::FileMetaData (#39337)

### Rationale for this change

Enhance the style of `parquet::FileMetaData::Subset`.

### Are these changes tested?

Already tested

### Are there any user-facing changes?

no

* Closes: #39336

Lead-authored-by: mwish <maplewish117@gmail.com>
Co-authored-by: mwish <1506118561@qq.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/arrow/util/bit_stream_utils.h |  2 +-
 cpp/src/parquet/metadata.cc           | 18 ++++++++++--------
 cpp/src/parquet/metadata.h            |  8 +++++++-
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/cpp/src/arrow/util/bit_stream_utils.h b/cpp/src/arrow/util/bit_stream_utils.h
index 2afb2e5193697..811694e43b76c 100644
--- a/cpp/src/arrow/util/bit_stream_utils.h
+++ b/cpp/src/arrow/util/bit_stream_utils.h
@@ -183,7 +183,7 @@ class BitReader {
 
   /// Returns the number of bytes left in the stream, not including the current
   /// byte (i.e., there may be an additional fraction of a byte).
-  int bytes_left() {
+  int bytes_left() const {
     return max_bytes_ -
            (byte_offset_ + static_cast<int>(bit_util::BytesForBits(bit_offset_)));
   }
diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc
index d651ea5db0f18..3f101b5ae3ac6 100644
--- a/cpp/src/parquet/metadata.cc
+++ b/cpp/src/parquet/metadata.cc
@@ -761,7 +761,7 @@ class FileMetaData::FileMetaDataImpl {
     return metadata_->row_groups[i];
   }
 
-  void AppendRowGroups(const std::unique_ptr<FileMetaDataImpl>& other) {
+  void AppendRowGroups(FileMetaDataImpl* other) {
     std::ostringstream diff_output;
     if (!schema()->Equals(*other->schema(), &diff_output)) {
       auto msg = "AppendRowGroups requires equal schemas.\n" + diff_output.str();
@@ -800,6 +800,7 @@ class FileMetaData::FileMetaDataImpl {
     metadata->schema = metadata_->schema;
 
     metadata->row_groups.resize(row_groups.size());
+
     int i = 0;
     for (int selected_index : row_groups) {
       metadata->num_rows += row_group(selected_index).num_rows;
@@ -822,7 +823,7 @@ class FileMetaData::FileMetaDataImpl {
   }
 
   void set_file_decryptor(std::shared_ptr<InternalFileDecryptor> file_decryptor) {
-    file_decryptor_ = file_decryptor;
+    file_decryptor_ = std::move(file_decryptor);
   }
 
  private:
@@ -886,13 +887,14 @@ std::shared_ptr<FileMetaData> FileMetaData::Make(
     const void* metadata, uint32_t* metadata_len,
     std::shared_ptr<InternalFileDecryptor> file_decryptor) {
   return std::shared_ptr<FileMetaData>(new FileMetaData(
-      metadata, metadata_len, default_reader_properties(), file_decryptor));
+      metadata, metadata_len, default_reader_properties(), std::move(file_decryptor)));
 }
 
 FileMetaData::FileMetaData(const void* metadata, uint32_t* metadata_len,
                            const ReaderProperties& properties,
                            std::shared_ptr<InternalFileDecryptor> file_decryptor)
-    : impl_(new FileMetaDataImpl(metadata, metadata_len, properties, file_decryptor)) {}
+    : impl_(new FileMetaDataImpl(metadata, metadata_len, properties,
+                                 std::move(file_decryptor))) {}
 
 FileMetaData::FileMetaData() : impl_(new FileMetaDataImpl()) {}
 
@@ -942,7 +944,7 @@ const std::string& FileMetaData::footer_signing_key_metadata() const {
 
 void FileMetaData::set_file_decryptor(
     std::shared_ptr<InternalFileDecryptor> file_decryptor) {
-  impl_->set_file_decryptor(file_decryptor);
+  impl_->set_file_decryptor(std::move(file_decryptor));
 }
 
 ParquetVersion::type FileMetaData::version() const {
@@ -975,7 +977,7 @@ const std::shared_ptr<const KeyValueMetadata>& FileMetaData::key_value_metadata(
 void FileMetaData::set_file_path(const std::string& path) { impl_->set_file_path(path); }
 
 void FileMetaData::AppendRowGroups(const FileMetaData& other) {
-  impl_->AppendRowGroups(other.impl_);
+  impl_->AppendRowGroups(other.impl_.get());
 }
 
 std::shared_ptr<FileMetaData> FileMetaData::Subset(
@@ -1839,7 +1841,7 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl {
   std::unique_ptr<FileMetaData> Finish(
       const std::shared_ptr<const KeyValueMetadata>& key_value_metadata) {
     int64_t total_rows = 0;
-    for (auto row_group : row_groups_) {
+    for (const auto& row_group : row_groups_) {
       total_rows += row_group.num_rows;
     }
     metadata_->__set_num_rows(total_rows);
@@ -1858,7 +1860,7 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl {
         format::KeyValue kv_pair;
         kv_pair.__set_key(key_value_metadata_->key(i));
         kv_pair.__set_value(key_value_metadata_->value(i));
-        metadata_->key_value_metadata.push_back(kv_pair);
+        metadata_->key_value_metadata.push_back(std::move(kv_pair));
       }
       metadata_->__isset.key_value_metadata = true;
     }
diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h
index e47c45ff0492a..640b898024346 100644
--- a/cpp/src/parquet/metadata.h
+++ b/cpp/src/parquet/metadata.h
@@ -306,9 +306,15 @@ class PARQUET_EXPORT FileMetaData {
   int num_schema_elements() const;
 
   /// \brief The total number of rows.
+  ///
+  /// If the FileMetaData was obtained by calling `SubSet()`, this is the total
+  /// number of rows in the selected row groups.
   int64_t num_rows() const;
 
   /// \brief The number of row groups in the file.
+  ///
+  /// If the FileMetaData was obtained by calling `SubSet()`, this is the number
+  /// of selected row groups.
   int num_row_groups() const;
 
   /// \brief Return the RowGroupMetaData of the corresponding row group ordinal.
@@ -338,7 +344,7 @@ class PARQUET_EXPORT FileMetaData {
   /// \brief Size of the original thrift encoded metadata footer.
   uint32_t size() const;
 
-  /// \brief Indicate if all of the FileMetadata's RowGroups can be decompressed.
+  /// \brief Indicate if all of the FileMetaData's RowGroups can be decompressed.
   ///
   /// This will return false if any of the RowGroup's page is compressed with a
   /// compression format which is not compiled in the current parquet library.

From 1076229dbc35c9b29cc170ff3193620277c280a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?JB=20Onofr=C3=A9?= <jbonofre@apache.org>
Date: Wed, 17 Jan 2024 14:35:04 +0100
Subject: [PATCH 226/570] GH-39654: [Java] Upgrade to Netty 4.1.105.Final
 (#39655)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Netty 4.1.105.Final brings few fixes that would be good to have in Arrow, especially:

- DNS error improvements
- Fix exception on HTTP chunk size overflow
- Save HTTP2 pseudo-header lower-case validation

The release news is there: https://netty.io/news/2024/01/16/4-1-105-Final.html
and the full Release Notes here: https://github.com/netty/netty/issues?q=milestone%3A4.1.105.Final+is%3Aclosed

### What changes are included in this PR?

Netty 4.1.105.Final update.

### Are these changes tested?

Yes tested locally.

### Are there any user-facing changes?

Not directly.

* Closes: #39654

Authored-by: JB Onofré <jbonofre@apache.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 147f08fe57030..b2513d586268b 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -33,7 +33,7 @@
     <dep.junit.jupiter.version>5.10.1</dep.junit.jupiter.version>
     <dep.slf4j.version>2.0.11</dep.slf4j.version>
     <dep.guava-bom.version>33.0.0-jre</dep.guava-bom.version>
-    <dep.netty-bom.version>4.1.104.Final</dep.netty-bom.version>
+    <dep.netty-bom.version>4.1.105.Final</dep.netty-bom.version>
     <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.16.0</dep.jackson-bom.version>

From 3acc2eab739bfcc89bd39c3242dad154a980acb7 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Wed, 17 Jan 2024 21:36:53 +0800
Subject: [PATCH 227/570] GH-39390: [C++] Thirdparty: bump default zlib to 1.3
 (#39391)

### Rationale for this change

This patch bump default zlib to 1.3 (currently we're default using 1.2.13 as default). 1.3 includes some bugfixes.

### What changes are included in this PR?

Update vendor in arrow

### Are these changes tested?

Maybe we already has tests?

### Are there any user-facing changes?

no

* Closes: #39390

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/thirdparty/versions.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index e9df0c8d7566b..2664775c0fbf4 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -115,8 +115,8 @@ ARROW_UTF8PROC_BUILD_VERSION=v2.7.0
 ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=4bb121e297293c0fd55f08f83afab6d35d48f0af4ecc07523ad8ec99aa2b12a1
 ARROW_XSIMD_BUILD_VERSION=9.0.1
 ARROW_XSIMD_BUILD_SHA256_CHECKSUM=b1bb5f92167fd3a4f25749db0be7e61ed37e0a5d943490f3accdcd2cd2918cc0
-ARROW_ZLIB_BUILD_VERSION=1.2.13
-ARROW_ZLIB_BUILD_SHA256_CHECKSUM=b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30
+ARROW_ZLIB_BUILD_VERSION=1.3
+ARROW_ZLIB_BUILD_SHA256_CHECKSUM=ff0ba4c292013dbc27530b3a81e1f9a813cd39de01ca5e0f8bf355702efa593e
 ARROW_ZSTD_BUILD_VERSION=1.5.5
 ARROW_ZSTD_BUILD_SHA256_CHECKSUM=9c4396cc829cfae319a6e2615202e82aad41372073482fce286fac78646d3ee4
 

From 4a33d2f8f8895da46e1d19e7662c344bc452bda2 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Wed, 17 Jan 2024 16:58:48 +0100
Subject: [PATCH 228/570] GH-39640: [Docs] Pin pydata-sphinx-theme to 0.14.1
 (#39658)

The version warning banner in the documentation has the wrong version: it currently uses `pydata-sphinx-version` instead of Arrow dev version. Testing if the update in upstream fixes this error in version `14.0.1`.
* Closes: #39640

Authored-by: AlenkaF <frim.alenka@gmail.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 ci/conda_env_sphinx.txt | 2 +-
 docs/requirements.txt   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt
index 0e50875fc1ef8..d0f494d2e085d 100644
--- a/ci/conda_env_sphinx.txt
+++ b/ci/conda_env_sphinx.txt
@@ -20,7 +20,7 @@ breathe
 doxygen
 ipython
 numpydoc
-pydata-sphinx-theme=0.14
+pydata-sphinx-theme=0.14.1
 sphinx-autobuild
 sphinx-design
 sphinx-copybutton
diff --git a/docs/requirements.txt b/docs/requirements.txt
index da2327a6df5fc..aee2eb662c06b 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,7 +5,7 @@
 breathe
 ipython
 numpydoc
-pydata-sphinx-theme==0.14
+pydata-sphinx-theme==0.14.1
 sphinx-autobuild
 sphinx-design
 sphinx-copybutton

From 1dc3b81875d243f5d135caa6b2db7a669888ecb4 Mon Sep 17 00:00:00 2001
From: "Rossi(Ruoxi) Sun" <zanmato1984@gmail.com>
Date: Thu, 18 Jan 2024 00:26:48 +0800
Subject: [PATCH 229/570] GH-39583: [C++] Fix the issue of ExecBatchBuilder
 when appending consecutive tail rows with the same id may exceed buffer
 boundary (for fixed size types) (#39585)

### Rationale for this change

#39583 is a subsequent issue of #32570 (fixed by #39234). The last issue and fixed only resolved var length types. It turns out fixed size types have the same issue.

### What changes are included in this PR?

Do the same fix of #39234 for fixed size types.

### Are these changes tested?

UT included.

### Are there any user-facing changes?

* Closes: #39583

Authored-by: zanmato1984 <zanmato1984@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/light_array.cc      | 21 +++----
 cpp/src/arrow/compute/light_array_test.cc | 75 +++++++++++++++++++++--
 2 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/cpp/src/arrow/compute/light_array.cc b/cpp/src/arrow/compute/light_array.cc
index 73ea01a03a8fa..66d8477b029a0 100644
--- a/cpp/src/arrow/compute/light_array.cc
+++ b/cpp/src/arrow/compute/light_array.cc
@@ -383,27 +383,22 @@ int ExecBatchBuilder::NumRowsToSkip(const std::shared_ptr<ArrayData>& column,
 
   KeyColumnMetadata column_metadata =
       ColumnMetadataFromDataType(column->type).ValueOrDie();
+  ARROW_DCHECK(!column_metadata.is_fixed_length || column_metadata.fixed_length > 0);
 
   int num_rows_left = num_rows;
   int num_bytes_skipped = 0;
   while (num_rows_left > 0 && num_bytes_skipped < num_tail_bytes_to_skip) {
+    --num_rows_left;
+    int row_id_removed = row_ids[num_rows_left];
     if (column_metadata.is_fixed_length) {
-      if (column_metadata.fixed_length == 0) {
-        num_rows_left = std::max(num_rows_left, 8) - 8;
-        ++num_bytes_skipped;
-      } else {
-        --num_rows_left;
-        num_bytes_skipped += column_metadata.fixed_length;
-      }
+      num_bytes_skipped += column_metadata.fixed_length;
     } else {
-      --num_rows_left;
-      int row_id_removed = row_ids[num_rows_left];
       const int32_t* offsets = column->GetValues<int32_t>(1);
       num_bytes_skipped += offsets[row_id_removed + 1] - offsets[row_id_removed];
-      // Skip consecutive rows with the same id
-      while (num_rows_left > 0 && row_id_removed == row_ids[num_rows_left - 1]) {
-        --num_rows_left;
-      }
+    }
+    // Skip consecutive rows with the same id
+    while (num_rows_left > 0 && row_id_removed == row_ids[num_rows_left - 1]) {
+      --num_rows_left;
     }
   }
 
diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc
index 3ceba43604b28..d50e9675517c3 100644
--- a/cpp/src/arrow/compute/light_array_test.cc
+++ b/cpp/src/arrow/compute/light_array_test.cc
@@ -474,15 +474,18 @@ TEST(ExecBatchBuilder, AppendBatchesSomeRows) {
 TEST(ExecBatchBuilder, AppendBatchDupRows) {
   std::unique_ptr<MemoryPool> owned_pool = MemoryPool::CreateDefault();
   MemoryPool* pool = owned_pool.get();
+
   // Case of cross-word copying for the last row, which may exceed the buffer boundary.
-  // This is a simplified case of GH-32570
+  //
   {
+    // This is a simplified case of GH-32570
     // 64-byte data fully occupying one minimal 64-byte aligned memory region.
-    ExecBatch batch_string = JSONToExecBatch({binary()}, R"([["123456789ABCDEF0"],
-      ["123456789ABCDEF0"],
-      ["123456789ABCDEF0"],
-      ["ABCDEF0"],
-      ["123456789"]])");  // 9-byte tail row, larger than a word.
+    ExecBatch batch_string = JSONToExecBatch({binary()}, R"([
+        ["123456789ABCDEF0"],
+        ["123456789ABCDEF0"],
+        ["123456789ABCDEF0"],
+        ["ABCDEF0"],
+        ["123456789"]])");  // 9-byte tail row, larger than a word.
     ASSERT_EQ(batch_string[0].array()->buffers[1]->capacity(), 64);
     ASSERT_EQ(batch_string[0].array()->buffers[2]->capacity(), 64);
     ExecBatchBuilder builder;
@@ -494,6 +497,66 @@ TEST(ExecBatchBuilder, AppendBatchDupRows) {
     ASSERT_EQ(batch_string_appended, built);
     ASSERT_NE(0, pool->bytes_allocated());
   }
+
+  {
+    // This is a simplified case of GH-39583, using fsb(3) type.
+    // 63-byte data occupying almost one minimal 64-byte aligned memory region.
+    ExecBatch batch_fsb = JSONToExecBatch({fixed_size_binary(3)}, R"([
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["000"],
+        ["123"]])");  // 3-byte tail row, not aligned to a word.
+    ASSERT_EQ(batch_fsb[0].array()->buffers[1]->capacity(), 64);
+    ExecBatchBuilder builder;
+    uint16_t row_ids[4] = {20, 20, 20,
+                           20};  // Get the last row 4 times, 3 to skip a word.
+    ASSERT_OK(builder.AppendSelected(pool, batch_fsb, 4, row_ids, /*num_cols=*/1));
+    ExecBatch built = builder.Flush();
+    ExecBatch batch_fsb_appended = JSONToExecBatch(
+        {fixed_size_binary(3)}, R"([["123"], ["123"], ["123"], ["123"]])");
+    ASSERT_EQ(batch_fsb_appended, built);
+    ASSERT_NE(0, pool->bytes_allocated());
+  }
+
+  {
+    // This is a simplified case of GH-39583, using fsb(9) type.
+    // 63-byte data occupying almost one minimal 64-byte aligned memory region.
+    ExecBatch batch_fsb = JSONToExecBatch({fixed_size_binary(9)}, R"([
+        ["000000000"],
+        ["000000000"],
+        ["000000000"],
+        ["000000000"],
+        ["000000000"],
+        ["000000000"],
+        ["123456789"]])");  // 9-byte tail row, not aligned to a word.
+    ASSERT_EQ(batch_fsb[0].array()->buffers[1]->capacity(), 64);
+    ExecBatchBuilder builder;
+    uint16_t row_ids[2] = {6, 6};  // Get the last row 2 times, 1 to skip a word.
+    ASSERT_OK(builder.AppendSelected(pool, batch_fsb, 2, row_ids, /*num_cols=*/1));
+    ExecBatch built = builder.Flush();
+    ExecBatch batch_fsb_appended =
+        JSONToExecBatch({fixed_size_binary(9)}, R"([["123456789"], ["123456789"]])");
+    ASSERT_EQ(batch_fsb_appended, built);
+    ASSERT_NE(0, pool->bytes_allocated());
+  }
+
   ASSERT_EQ(0, pool->bytes_allocated());
 }
 

From ed726fa7e1786719af1f0f3a731f615fe46ae7b1 Mon Sep 17 00:00:00 2001
From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com>
Date: Wed, 17 Jan 2024 11:52:57 -0500
Subject: [PATCH 230/570] MINOR: [CI] Add new collaborator (vibhatha) (#39661)

* Add new collaborator @ vibhatha (60 commits to Arrow repo)
* Remove @ felipecrv, who has become a committer
* Alphabetize list

Authored-by: Dane Pitkin <dane@voltrondata.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 .asf.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.asf.yaml b/.asf.yaml
index d2522ecae0b43..ae2709a8b0acd 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -19,13 +19,13 @@ github:
   description: "Apache Arrow is a multi-language toolbox for accelerated data interchange and in-memory processing"
   homepage: https://arrow.apache.org/
   collaborators:
+    - amoeba
     - anjakefala
     - benibus
     - danepitkin
     - davisusanibar
-    - felipecrv
     - js8544
-    - amoeba
+    - vibhatha
 
 notifications:
   commits:      commits@arrow.apache.org

From c170af41ba0c30b80aa4172da0b3637206368cf2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A2nio?= <janioprates@gmail.com>
Date: Wed, 17 Jan 2024 14:00:39 -0300
Subject: [PATCH 231/570] GH-39552: [Go] inclusion of option to use replacer
 when creating csv strings with go library (#39576)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rationale for this change
Make it possible to remove unwanted characters from strings

What changes are included in this PR?
Add new function to optionally  setup a replacer in csv Writer  Write method

Are these changes tested?
Yes

Are there any user-facing changes?
Adds an optional methods.

* Closes: #39552

Lead-authored-by: Jânio <janioprates@gmail.com>
Co-authored-by: janiodev <janio.otoni@blip.ai>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/csv/common.go      | 14 ++++++++++++++
 go/arrow/csv/transformer.go | 12 ++++++------
 go/arrow/csv/writer.go      | 24 +++++++++++++-----------
 go/arrow/csv/writer_test.go |  6 ++++--
 4 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go
index 99dac29f4d728..31ca61f323d36 100644
--- a/go/arrow/csv/common.go
+++ b/go/arrow/csv/common.go
@@ -21,6 +21,7 @@ package csv
 import (
 	"errors"
 	"fmt"
+	"strings"
 
 	"github.com/apache/arrow/go/v15/arrow"
 	"github.com/apache/arrow/go/v15/arrow/memory"
@@ -223,6 +224,19 @@ func WithIncludeColumns(cols []string) Option {
 	}
 }
 
+// WithStringsReplacer receives a replacer to be applied in the string fields
+// of the CSV. This is useful to remove unwanted characters from the string.
+func WithStringsReplacer(replacer *strings.Replacer) Option {
+	return func(cfg config) {
+		switch cfg := cfg.(type) {
+		case *Writer:
+			cfg.stringReplacer = replacer.Replace
+		default:
+			panic(fmt.Errorf("arrow/csv: unknown config type %T", cfg))
+		}
+	}
+}
+
 func validate(schema *arrow.Schema) {
 	for i, f := range schema.Fields() {
 		switch ft := f.Type.(type) {
diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go
index 0f0181520b847..78b16446d4def 100644
--- a/go/arrow/csv/transformer.go
+++ b/go/arrow/csv/transformer.go
@@ -29,7 +29,7 @@ import (
 	"github.com/apache/arrow/go/v15/arrow/array"
 )
 
-func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) []string {
+func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, stringsReplacer func(string)string) []string {
 	res := make([]string, col.Len())
 	switch typ.(type) {
 	case *arrow.BooleanType:
@@ -144,7 +144,7 @@ func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) []
 		arr := col.(*array.String)
 		for i := 0; i < arr.Len(); i++ {
 			if arr.IsValid(i) {
-				res[i] = arr.Value(i)
+				res[i] = stringsReplacer(arr.Value(i))
 			} else {
 				res[i] = w.nullValue
 			}
@@ -153,7 +153,7 @@ func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) []
 		arr := col.(*array.LargeString)
 		for i := 0; i < arr.Len(); i++ {
 			if arr.IsValid(i) {
-				res[i] = arr.Value(i)
+				res[i] = stringsReplacer(arr.Value(i))
 			} else {
 				res[i] = w.nullValue
 			}
@@ -224,7 +224,7 @@ func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) []
 				var b bytes.Buffer
 				b.Write([]byte{'{'})
 				writer := csv.NewWriter(&b)
-				writer.Write(w.transformColToStringArr(list.DataType(), list))
+				writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer))
 				writer.Flush()
 				b.Truncate(b.Len() - 1)
 				b.Write([]byte{'}'})
@@ -243,7 +243,7 @@ func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) []
 				var b bytes.Buffer
 				b.Write([]byte{'{'})
 				writer := csv.NewWriter(&b)
-				writer.Write(w.transformColToStringArr(list.DataType(), list))
+				writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer))
 				writer.Flush()
 				b.Truncate(b.Len() - 1)
 				b.Write([]byte{'}'})
@@ -262,7 +262,7 @@ func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array) []
 				var b bytes.Buffer
 				b.Write([]byte{'{'})
 				writer := csv.NewWriter(&b)
-				writer.Write(w.transformColToStringArr(list.DataType(), list))
+				writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer))
 				writer.Flush()
 				b.Truncate(b.Len() - 1)
 				b.Write([]byte{'}'})
diff --git a/go/arrow/csv/writer.go b/go/arrow/csv/writer.go
index a672008b58a07..b939b72984b0f 100644
--- a/go/arrow/csv/writer.go
+++ b/go/arrow/csv/writer.go
@@ -27,12 +27,13 @@ import (
 
 // Writer wraps encoding/csv.Writer and writes arrow.Record based on a schema.
 type Writer struct {
-	boolFormatter func(bool) string
-	header        bool
-	nullValue     string
-	once          sync.Once
-	schema        *arrow.Schema
-	w             *csv.Writer
+	boolFormatter  func(bool) string
+	header         bool
+	nullValue      string
+	stringReplacer func(string) string
+	once           sync.Once
+	schema         *arrow.Schema
+	w              *csv.Writer
 }
 
 // NewWriter returns a writer that writes arrow.Records to the CSV file
@@ -45,10 +46,11 @@ func NewWriter(w io.Writer, schema *arrow.Schema, opts ...Option) *Writer {
 	validate(schema)
 
 	ww := &Writer{
-		boolFormatter: strconv.FormatBool, // override by passing WithBoolWriter() as an option
-		nullValue:     "NULL",             // override by passing WithNullWriter() as an option
-		schema:        schema,
-		w:             csv.NewWriter(w),
+		boolFormatter:  strconv.FormatBool,                 // override by passing WithBoolWriter() as an option
+		nullValue:      "NULL",                             // override by passing WithNullWriter() as an option
+		stringReplacer: func(x string) string { return x }, // override by passing WithStringsReplacer() as an option
+		schema:         schema,
+		w:              csv.NewWriter(w),
 	}
 	for _, opt := range opts {
 		opt(ww)
@@ -81,7 +83,7 @@ func (w *Writer) Write(record arrow.Record) error {
 	}
 
 	for j, col := range record.Columns() {
-		rows := w.transformColToStringArr(w.schema.Field(j).Type, col)
+		rows := w.transformColToStringArr(w.schema.Field(j).Type, col, w.stringReplacer)
 		for i, row := range rows {
 			recs[i][j] = row
 		}
diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go
index 644cae0933f4c..b1bd3251c5622 100644
--- a/go/arrow/csv/writer_test.go
+++ b/go/arrow/csv/writer_test.go
@@ -23,6 +23,7 @@ import (
 	"fmt"
 	"io"
 	"log"
+	"strings"
 	"testing"
 
 	"github.com/apache/arrow/go/v15/arrow"
@@ -250,8 +251,8 @@ func testCSVWriter(t *testing.T, data [][]string, writeHeader bool, fmtr func(bo
 	b.Field(9).(*array.Float16Builder).AppendValues([]float16.Num{float16.New(0.0), float16.New(0.1), float16.New(0.2)}, nil)
 	b.Field(10).(*array.Float32Builder).AppendValues([]float32{0.0, 0.1, 0.2}, nil)
 	b.Field(11).(*array.Float64Builder).AppendValues([]float64{0.0, 0.1, 0.2}, nil)
-	b.Field(12).(*array.StringBuilder).AppendValues([]string{"str-0", "str-1", "str-2"}, nil)
-	b.Field(13).(*array.LargeStringBuilder).AppendValues([]string{"str-0", "str-1", "str-2"}, nil)
+	b.Field(12).(*array.StringBuilder).AppendValues([]string{"str_0", "str-1", "str-2"}, nil)
+	b.Field(13).(*array.LargeStringBuilder).AppendValues([]string{"str_0", "str-1", "str-2"}, nil)
 	b.Field(14).(*array.TimestampBuilder).AppendValues(genTimestamps(arrow.Second), nil)
 	b.Field(15).(*array.Date32Builder).AppendValues([]arrow.Date32{17304, 19304, 20304}, nil)
 	b.Field(16).(*array.Date64Builder).AppendValues([]arrow.Date64{1840400000000, 1940400000000, 2040400000000}, nil)
@@ -300,6 +301,7 @@ func testCSVWriter(t *testing.T, data [][]string, writeHeader bool, fmtr func(bo
 		csv.WithHeader(writeHeader),
 		csv.WithNullWriter(nullVal),
 		csv.WithBoolWriter(fmtr),
+		csv.WithStringsReplacer(strings.NewReplacer("_", "-")),
 	)
 	err := w.Write(rec)
 	if err != nil {

From 1c49b9f0cf378c6a6455b17b5d0b9bc04c77ab5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Thu, 18 Jan 2024 04:22:57 +0100
Subject: [PATCH 232/570] GH-39656: [Release] Update platform tags for macOS
 wheels to macosx_10_15 (#39657)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Currently the binary verification for releases fails due to wrong macOS platform version.

### What changes are included in this PR?

Update to the current generated platform tag.

### Are these changes tested?

No, but I've validated this is the corrected generated platform tag for the wheels on the Release Candidate: https://apache.jfrog.io/ui/native/arrow/python-rc/15.0.0-rc1/

### Are there any user-facing changes?

Not because of this change but there was a minimum version of macOS as part of the PR that caused this issue.
* Closes: #39656

Authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/verify-release-candidate.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index c5e27d083013e..90f071c5b4865 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -1136,7 +1136,7 @@ test_macos_wheels() {
     local check_flight=OFF
   else
     local python_versions="3.8 3.9 3.10 3.11 3.12"
-    local platform_tags="macosx_10_14_x86_64"
+    local platform_tags="macosx_10_15_x86_64"
   fi
 
   # verify arch-native wheels inside an arch-native conda environment

From 1f5dece9681d804bd3b03b2106b7426217b213e9 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 18 Jan 2024 18:46:17 +0900
Subject: [PATCH 233/570] GH-39683: [Release] Use temporary direction with
 TEST_BINARY=1 (#39684)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

We should use temporary directory to verify in clean environment.

### What changes are included in this PR?

Use `ARROW_TMPDIR` for prefix of download directory.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39683

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 dev/release/verify-release-candidate.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 90f071c5b4865..04fc7fd563f65 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -173,7 +173,7 @@ test_binary() {
   show_header "Testing binary artifacts"
   maybe_setup_conda
 
-  local download_dir=binaries
+  local download_dir=${ARROW_TMPDIR}/binaries
   mkdir -p ${download_dir}
 
   ${PYTHON:-python3} $SOURCE_DIR/download_rc_binaries.py $VERSION $RC_NUMBER \

From a2aa1c4122fca88f87b297711fa49b8ad355416d Mon Sep 17 00:00:00 2001
From: "Rossi(Ruoxi) Sun" <zanmato1984@gmail.com>
Date: Thu, 18 Jan 2024 19:44:26 +0800
Subject: [PATCH 234/570] GH-39332: [C++] Explicit error in ExecBatchBuilder
 when appending var length data exceeds offset limit (int32 max) (#39383)

### Rationale for this change

When appending var length data in `ExecBatchBuilder`, the offset is potentially to overflow if the batch contains 4GB data or more. This may further result in segmentation fault during the subsequent data content copying. For details, please refer to this comment: https://github.com/apache/arrow/issues/39332#issuecomment-1870690063.

The solution is let user to use the "large" counterpart data type to avoid the overflow, but we may need explicit error information when such overflow happens.

### What changes are included in this PR?

1. Detect the offset overflow in appending data in `ExecBatchBuilder` and explicitly throw.
2. Change the offset type from `uint32_t` to `int32_t` in `ExecBatchBuilder` and respects the `BinaryBuilder::memory_limit()` which is `2GB - 2B` as the rest part of the codebase.

### Are these changes tested?

UT included.

### Are there any user-facing changes?

No.

* Closes: #39332

Lead-authored-by: zanmato <zanmato1984@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Rossi(Ruoxi) Sun <zanmato1984@gmail.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/light_array.cc      | 47 ++++++++++-------
 cpp/src/arrow/compute/light_array.h       |  2 +-
 cpp/src/arrow/compute/light_array_test.cc | 64 +++++++++++++++++++++++
 cpp/src/arrow/testing/generator.cc        |  9 +++-
 4 files changed, 100 insertions(+), 22 deletions(-)

diff --git a/cpp/src/arrow/compute/light_array.cc b/cpp/src/arrow/compute/light_array.cc
index 66d8477b029a0..b225e04b05cea 100644
--- a/cpp/src/arrow/compute/light_array.cc
+++ b/cpp/src/arrow/compute/light_array.cc
@@ -20,6 +20,8 @@
 #include <type_traits>
 
 #include "arrow/util/bitmap_ops.h"
+#include "arrow/util/int_util_overflow.h"
+#include "arrow/util/macros.h"
 
 namespace arrow {
 namespace compute {
@@ -325,11 +327,10 @@ Status ResizableArrayData::ResizeVaryingLengthBuffer() {
   column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
 
   if (!column_metadata.is_fixed_length) {
-    int min_new_size = static_cast<int>(reinterpret_cast<const uint32_t*>(
-        buffers_[kFixedLengthBuffer]->data())[num_rows_]);
+    int64_t min_new_size = buffers_[kFixedLengthBuffer]->data_as<int32_t>()[num_rows_];
     ARROW_DCHECK(var_len_buf_size_ > 0);
     if (var_len_buf_size_ < min_new_size) {
-      int new_size = var_len_buf_size_;
+      int64_t new_size = var_len_buf_size_;
       while (new_size < min_new_size) {
         new_size *= 2;
       }
@@ -465,12 +466,11 @@ void ExecBatchBuilder::Visit(const std::shared_ptr<ArrayData>& column, int num_r
 
   if (!metadata.is_fixed_length) {
     const uint8_t* ptr_base = column->buffers[2]->data();
-    const uint32_t* offsets =
-        reinterpret_cast<const uint32_t*>(column->buffers[1]->data()) + column->offset;
+    const int32_t* offsets = column->GetValues<int32_t>(1);
     for (int i = 0; i < num_rows; ++i) {
       uint16_t row_id = row_ids[i];
       const uint8_t* field_ptr = ptr_base + offsets[row_id];
-      uint32_t field_length = offsets[row_id + 1] - offsets[row_id];
+      int32_t field_length = offsets[row_id + 1] - offsets[row_id];
       process_value_fn(i, field_ptr, field_length);
     }
   } else {
@@ -480,7 +480,7 @@ void ExecBatchBuilder::Visit(const std::shared_ptr<ArrayData>& column, int num_r
       const uint8_t* field_ptr =
           column->buffers[1]->data() +
           (column->offset + row_id) * static_cast<int64_t>(metadata.fixed_length);
-      process_value_fn(i, field_ptr, metadata.fixed_length);
+      process_value_fn(i, field_ptr, static_cast<int32_t>(metadata.fixed_length));
     }
   }
 }
@@ -511,14 +511,14 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
         break;
       case 1:
         Visit(source, num_rows_to_append, row_ids,
-              [&](int i, const uint8_t* ptr, uint32_t num_bytes) {
+              [&](int i, const uint8_t* ptr, int32_t num_bytes) {
                 target->mutable_data(1)[num_rows_before + i] = *ptr;
               });
         break;
       case 2:
         Visit(
             source, num_rows_to_append, row_ids,
-            [&](int i, const uint8_t* ptr, uint32_t num_bytes) {
+            [&](int i, const uint8_t* ptr, int32_t num_bytes) {
               reinterpret_cast<uint16_t*>(target->mutable_data(1))[num_rows_before + i] =
                   *reinterpret_cast<const uint16_t*>(ptr);
             });
@@ -526,7 +526,7 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
       case 4:
         Visit(
             source, num_rows_to_append, row_ids,
-            [&](int i, const uint8_t* ptr, uint32_t num_bytes) {
+            [&](int i, const uint8_t* ptr, int32_t num_bytes) {
               reinterpret_cast<uint32_t*>(target->mutable_data(1))[num_rows_before + i] =
                   *reinterpret_cast<const uint32_t*>(ptr);
             });
@@ -534,7 +534,7 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
       case 8:
         Visit(
             source, num_rows_to_append, row_ids,
-            [&](int i, const uint8_t* ptr, uint32_t num_bytes) {
+            [&](int i, const uint8_t* ptr, int32_t num_bytes) {
               reinterpret_cast<uint64_t*>(target->mutable_data(1))[num_rows_before + i] =
                   *reinterpret_cast<const uint64_t*>(ptr);
             });
@@ -544,7 +544,7 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
             num_rows_to_append -
             NumRowsToSkip(source, num_rows_to_append, row_ids, sizeof(uint64_t));
         Visit(source, num_rows_to_process, row_ids,
-              [&](int i, const uint8_t* ptr, uint32_t num_bytes) {
+              [&](int i, const uint8_t* ptr, int32_t num_bytes) {
                 uint64_t* dst = reinterpret_cast<uint64_t*>(
                     target->mutable_data(1) +
                     static_cast<int64_t>(num_bytes) * (num_rows_before + i));
@@ -558,7 +558,7 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
         if (num_rows_to_append > num_rows_to_process) {
           Visit(source, num_rows_to_append - num_rows_to_process,
                 row_ids + num_rows_to_process,
-                [&](int i, const uint8_t* ptr, uint32_t num_bytes) {
+                [&](int i, const uint8_t* ptr, int32_t num_bytes) {
                   uint64_t* dst = reinterpret_cast<uint64_t*>(
                       target->mutable_data(1) +
                       static_cast<int64_t>(num_bytes) *
@@ -575,16 +575,23 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
 
     // Step 1: calculate target offsets
     //
-    uint32_t* offsets = reinterpret_cast<uint32_t*>(target->mutable_data(1));
-    uint32_t sum = num_rows_before == 0 ? 0 : offsets[num_rows_before];
+    int32_t* offsets = reinterpret_cast<int32_t*>(target->mutable_data(1));
+    int32_t sum = num_rows_before == 0 ? 0 : offsets[num_rows_before];
     Visit(source, num_rows_to_append, row_ids,
-          [&](int i, const uint8_t* ptr, uint32_t num_bytes) {
+          [&](int i, const uint8_t* ptr, int32_t num_bytes) {
             offsets[num_rows_before + i] = num_bytes;
           });
     for (int i = 0; i < num_rows_to_append; ++i) {
-      uint32_t length = offsets[num_rows_before + i];
+      int32_t length = offsets[num_rows_before + i];
       offsets[num_rows_before + i] = sum;
-      sum += length;
+      int32_t new_sum_maybe_overflow = 0;
+      if (ARROW_PREDICT_FALSE(
+              arrow::internal::AddWithOverflow(sum, length, &new_sum_maybe_overflow))) {
+        return Status::Invalid("Overflow detected in ExecBatchBuilder when appending ",
+                               num_rows_before + i + 1, "-th element of length ", length,
+                               " bytes to current length ", sum, " bytes");
+      }
+      sum = new_sum_maybe_overflow;
     }
     offsets[num_rows_before + num_rows_to_append] = sum;
 
@@ -598,7 +605,7 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
         num_rows_to_append -
         NumRowsToSkip(source, num_rows_to_append, row_ids, sizeof(uint64_t));
     Visit(source, num_rows_to_process, row_ids,
-          [&](int i, const uint8_t* ptr, uint32_t num_bytes) {
+          [&](int i, const uint8_t* ptr, int32_t num_bytes) {
             uint64_t* dst = reinterpret_cast<uint64_t*>(target->mutable_data(2) +
                                                         offsets[num_rows_before + i]);
             const uint64_t* src = reinterpret_cast<const uint64_t*>(ptr);
@@ -608,7 +615,7 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
             }
           });
     Visit(source, num_rows_to_append - num_rows_to_process, row_ids + num_rows_to_process,
-          [&](int i, const uint8_t* ptr, uint32_t num_bytes) {
+          [&](int i, const uint8_t* ptr, int32_t num_bytes) {
             uint64_t* dst = reinterpret_cast<uint64_t*>(
                 target->mutable_data(2) +
                 offsets[num_rows_before + num_rows_to_process + i]);
diff --git a/cpp/src/arrow/compute/light_array.h b/cpp/src/arrow/compute/light_array.h
index 84aa86d64bb62..67de71bf56c92 100644
--- a/cpp/src/arrow/compute/light_array.h
+++ b/cpp/src/arrow/compute/light_array.h
@@ -353,7 +353,7 @@ class ARROW_EXPORT ResizableArrayData {
   MemoryPool* pool_;
   int num_rows_;
   int num_rows_allocated_;
-  int var_len_buf_size_;
+  int64_t var_len_buf_size_;
   static constexpr int kMaxBuffers = 3;
   std::shared_ptr<ResizableBuffer> buffers_[kMaxBuffers];
 };
diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc
index d50e9675517c3..ecc5f3ad37931 100644
--- a/cpp/src/arrow/compute/light_array_test.cc
+++ b/cpp/src/arrow/compute/light_array_test.cc
@@ -407,6 +407,70 @@ TEST(ExecBatchBuilder, AppendValuesBeyondLimit) {
   ASSERT_EQ(0, pool->bytes_allocated());
 }
 
+TEST(ExecBatchBuilder, AppendVarLengthBeyondLimit) {
+  // GH-39332: check appending variable-length data past 2GB.
+  if constexpr (sizeof(void*) == 4) {
+    GTEST_SKIP() << "Test only works on 64-bit platforms";
+  }
+
+  std::unique_ptr<MemoryPool> owned_pool = MemoryPool::CreateDefault();
+  MemoryPool* pool = owned_pool.get();
+  constexpr auto eight_mb = 8 * 1024 * 1024;
+  constexpr auto eight_mb_minus_one = eight_mb - 1;
+  // String of size 8mb to repetitively fill the heading multiple of 8mbs of an array
+  // of int32_max bytes.
+  std::string str_8mb(eight_mb, 'a');
+  // String of size (8mb - 1) to be the last element of an array of int32_max bytes.
+  std::string str_8mb_minus_1(eight_mb_minus_one, 'b');
+  std::shared_ptr<Array> values_8mb = ConstantArrayGenerator::String(1, str_8mb);
+  std::shared_ptr<Array> values_8mb_minus_1 =
+      ConstantArrayGenerator::String(1, str_8mb_minus_1);
+
+  ExecBatch batch_8mb({values_8mb}, 1);
+  ExecBatch batch_8mb_minus_1({values_8mb_minus_1}, 1);
+
+  auto num_rows = std::numeric_limits<int32_t>::max() / eight_mb;
+  std::vector<uint16_t> body_row_ids(num_rows, 0);
+  std::vector<uint16_t> tail_row_id(1, 0);
+
+  {
+    // Building an array of (int32_max + 1) = (8mb * num_rows + 8mb) bytes should raise an
+    // error of overflow.
+    ExecBatchBuilder builder;
+    ASSERT_OK(builder.AppendSelected(pool, batch_8mb, num_rows, body_row_ids.data(),
+                                     /*num_cols=*/1));
+    std::stringstream ss;
+    ss << "Invalid: Overflow detected in ExecBatchBuilder when appending " << num_rows + 1
+       << "-th element of length " << eight_mb << " bytes to current length "
+       << eight_mb * num_rows << " bytes";
+    ASSERT_RAISES_WITH_MESSAGE(
+        Invalid, ss.str(),
+        builder.AppendSelected(pool, batch_8mb, 1, tail_row_id.data(),
+                               /*num_cols=*/1));
+  }
+
+  {
+    // Building an array of int32_max = (8mb * num_rows + 8mb - 1) bytes should succeed.
+    ExecBatchBuilder builder;
+    ASSERT_OK(builder.AppendSelected(pool, batch_8mb, num_rows, body_row_ids.data(),
+                                     /*num_cols=*/1));
+    ASSERT_OK(builder.AppendSelected(pool, batch_8mb_minus_1, 1, tail_row_id.data(),
+                                     /*num_cols=*/1));
+    ExecBatch built = builder.Flush();
+    auto datum = built[0];
+    ASSERT_TRUE(datum.is_array());
+    auto array = datum.array_as<StringArray>();
+    ASSERT_EQ(array->length(), num_rows + 1);
+    for (int i = 0; i < num_rows; ++i) {
+      ASSERT_EQ(array->GetString(i), str_8mb);
+    }
+    ASSERT_EQ(array->GetString(num_rows), str_8mb_minus_1);
+    ASSERT_NE(0, pool->bytes_allocated());
+  }
+
+  ASSERT_EQ(0, pool->bytes_allocated());
+}
+
 TEST(KeyColumnArray, FromExecBatch) {
   ExecBatch batch =
       JSONToExecBatch({int64(), boolean()}, "[[1, true], [2, false], [null, null]]");
diff --git a/cpp/src/arrow/testing/generator.cc b/cpp/src/arrow/testing/generator.cc
index 36c88c20efe6e..5ea6a541e8922 100644
--- a/cpp/src/arrow/testing/generator.cc
+++ b/cpp/src/arrow/testing/generator.cc
@@ -38,6 +38,7 @@
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/string.h"
 
@@ -103,7 +104,13 @@ std::shared_ptr<arrow::Array> ConstantArrayGenerator::Float64(int64_t size,
 
 std::shared_ptr<arrow::Array> ConstantArrayGenerator::String(int64_t size,
                                                              std::string value) {
-  return ConstantArray<StringType>(size, value);
+  using BuilderType = typename TypeTraits<StringType>::BuilderType;
+  auto type = TypeTraits<StringType>::type_singleton();
+  auto builder_fn = [&](BuilderType* builder) {
+    DCHECK_OK(builder->Append(std::string_view(value.data())));
+  };
+  return ArrayFromBuilderVisitor(type, value.size() * size, size, builder_fn)
+      .ValueOrDie();
 }
 
 std::shared_ptr<arrow::Array> ConstantArrayGenerator::Zeroes(

From 858574d0bd1f3ef4157d0446cfb05cef05aac96b Mon Sep 17 00:00:00 2001
From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com>
Date: Thu, 18 Jan 2024 11:09:50 -0500
Subject: [PATCH 235/570] GH-39466: [Go][Parquet] Align Arrow and Parquet
 Timestamp Instant/Local Semantics (#39467)

### Rationale for this change

Closes: #39466

### What changes are included in this PR?

- Update logic for determining whether an Arrow Timestamp should have `isAdjustedToUTC=true` on conversion to Parquet.
- Update conversion from Parquet Timestamp to Arrow Timestamp to align with Parquet Format [backward-compatibilty](https://github.com/apache/parquet-format/blob/eb4b31c1d64a01088d02a2f9aefc6c17c54cc6fc/LogicalTypes.md?plain=1#L480-L485) rules.
- Refactor Timestamp serialization methods to reduce duplicated code

### Are these changes tested?

Yes,
- Logical type mapping in existing test updated.
- New tests for roundtrip behavior of timestamps with various timezone settings, with/without store_schema enabled.
- New test to clarify equality behavior of timestamps with instant semantics, as well as Go-related quirks with timezone-unaware timestamps.

### Are there any user-facing changes?

Yes, users of `pqarrow.FileWriter` will produce Parquet files in which the `TIMESTAMP` type is normalized to UTC IFF the Arrow type provided has a timezone specified. This is different from the current Go behavior but aligned that of other implementations.

The conversion from Parquet to Arrow has been updated as well to reflect the Parquet format [document](https://github.com/apache/parquet-format/blob/eb4b31c1d64a01088d02a2f9aefc6c17c54cc6fc/LogicalTypes.md?plain=1#L480-L485). Rust already [implements](https://github.com/apache/arrow-rs/blob/a61e824abdd7b38ea214828480430ff2a13f2ead/parquet/src/arrow/schema/primitive.rs#L211-L239) the spec as described and #39489 has been reported due to a mismatch in the handling of convertedTypes in C++.

* Closes: #39466

Authored-by: Joel Lubinitsky <joel@cherre.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/array/timestamp.go             | 11 ++--
 go/arrow/array/timestamp_test.go        | 49 ++++++++++++++++-
 go/arrow/datatype_fixedwidth.go         | 19 +++----
 go/parquet/pqarrow/encode_arrow_test.go | 70 +++++++++++++++++++++++++
 go/parquet/pqarrow/schema.go            | 13 +++--
 go/parquet/pqarrow/schema_test.go       |  6 +--
 6 files changed, 140 insertions(+), 28 deletions(-)

diff --git a/go/arrow/array/timestamp.go b/go/arrow/array/timestamp.go
index 6ffb43e067af0..0cc46a127fc51 100644
--- a/go/arrow/array/timestamp.go
+++ b/go/arrow/array/timestamp.go
@@ -91,16 +91,15 @@ func (a *Timestamp) ValueStr(i int) string {
 		return NullValueStr
 	}
 
-	dt := a.DataType().(*arrow.TimestampType)
-	z, _ := dt.GetZone()
-	return a.values[i].ToTime(dt.Unit).In(z).Format("2006-01-02 15:04:05.999999999Z0700")
+	toTime, _ := a.DataType().(*arrow.TimestampType).GetToTimeFunc()
+	return toTime(a.values[i]).Format("2006-01-02 15:04:05.999999999Z0700")
 }
 
 func (a *Timestamp) GetOneForMarshal(i int) interface{} {
-	if a.IsNull(i) {
-		return nil
+	if val := a.ValueStr(i); val != NullValueStr {
+		return val
 	}
-	return a.values[i].ToTime(a.DataType().(*arrow.TimestampType).Unit).Format("2006-01-02 15:04:05.999999999")
+	return nil
 }
 
 func (a *Timestamp) MarshalJSON() ([]byte, error) {
diff --git a/go/arrow/array/timestamp_test.go b/go/arrow/array/timestamp_test.go
index acbad8b586dd4..c172ad811dc37 100644
--- a/go/arrow/array/timestamp_test.go
+++ b/go/arrow/array/timestamp_test.go
@@ -234,7 +234,7 @@ func TestTimestampBuilder_Resize(t *testing.T) {
 	assert.Equal(t, 5, ab.Len())
 }
 
-func TestTimestampValueStr(t *testing.T) {		
+func TestTimestampValueStr(t *testing.T) {
 	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
 	defer mem.AssertSize(t, 0)
 
@@ -251,3 +251,50 @@ func TestTimestampValueStr(t *testing.T) {
 	assert.Equal(t, "1968-11-30 13:30:45-0700", arr.ValueStr(0))
 	assert.Equal(t, "2016-02-29 10:42:23-0700", arr.ValueStr(1))
 }
+
+func TestTimestampEquality(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	tsDatatypes := []*arrow.TimestampType{
+		{Unit: arrow.Second},
+		{Unit: arrow.Second, TimeZone: "UTC"},
+		{Unit: arrow.Second, TimeZone: "America/Phoenix"},
+	}
+
+	arrs := make([]*array.Timestamp, 0, len(tsDatatypes))
+	for _, dt := range tsDatatypes {
+		bldr := array.NewTimestampBuilder(mem, dt)
+		defer bldr.Release()
+
+		bldr.Append(-34226955)
+		bldr.Append(1456767743)
+
+		arr := bldr.NewTimestampArray()
+		defer arr.Release()
+
+		arrs = append(arrs, arr)
+	}
+
+	// No timezone, "wall clock" semantics
+	// These timestamps have no actual timezone, but we still represent as UTC per Go conventions
+	assert.Equal(t, "1968-11-30 20:30:45Z", arrs[0].ValueStr(0))
+	assert.Equal(t, "2016-02-29 17:42:23Z", arrs[0].ValueStr(1))
+
+	// UTC timezone, "instant" semantics
+	assert.Equal(t, "1968-11-30 20:30:45Z", arrs[1].ValueStr(0))
+	assert.Equal(t, "2016-02-29 17:42:23Z", arrs[1].ValueStr(1))
+
+	// America/Phoenix timezone, "instant" semantics
+	assert.Equal(t, "1968-11-30 13:30:45-0700", arrs[2].ValueStr(0))
+	assert.Equal(t, "2016-02-29 10:42:23-0700", arrs[2].ValueStr(1))
+
+	// Despite timezone and semantics, the physical values are equivalent
+	assert.Equal(t, arrs[0].Value(0), arrs[1].Value(0))
+	assert.Equal(t, arrs[0].Value(0), arrs[2].Value(0))
+	assert.Equal(t, arrs[1].Value(0), arrs[2].Value(0))
+
+	assert.Equal(t, arrs[0].Value(1), arrs[1].Value(1))
+	assert.Equal(t, arrs[0].Value(1), arrs[2].Value(1))
+	assert.Equal(t, arrs[1].Value(1), arrs[2].Value(1))
+}
diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go
index 1a3074e59e75f..158dbd67b1b5e 100644
--- a/go/arrow/datatype_fixedwidth.go
+++ b/go/arrow/datatype_fixedwidth.go
@@ -348,8 +348,11 @@ type TemporalWithUnit interface {
 }
 
 // TimestampType is encoded as a 64-bit signed integer since the UNIX epoch (2017-01-01T00:00:00Z).
-// The zero-value is a second and time zone neutral. Time zone neutral can be
-// considered UTC without having "UTC" as a time zone.
+// The zero-value is a second and time zone neutral. In Arrow semantics, time zone neutral does not
+// represent a physical point in time, but rather a "wall clock" time that only has meaning within
+// the context that produced it. In Go, time.Time can only represent instants; there is no notion
+// of "wall clock" time. Therefore, time zone neutral timestamps are represented as UTC per Go
+// conventions even though the Arrow type itself has no time zone.
 type TimestampType struct {
 	Unit     TimeUnit
 	TimeZone string
@@ -454,17 +457,7 @@ func (t *TimestampType) GetToTimeFunc() (func(Timestamp) time.Time, error) {
 		return nil, err
 	}
 
-	switch t.Unit {
-	case Second:
-		return func(v Timestamp) time.Time { return time.Unix(int64(v), 0).In(tz) }, nil
-	case Millisecond:
-		return func(v Timestamp) time.Time { return time.UnixMilli(int64(v)).In(tz) }, nil
-	case Microsecond:
-		return func(v Timestamp) time.Time { return time.UnixMicro(int64(v)).In(tz) }, nil
-	case Nanosecond:
-		return func(v Timestamp) time.Time { return time.Unix(0, int64(v)).In(tz) }, nil
-	}
-	return nil, fmt.Errorf("invalid timestamp unit: %s", t.Unit)
+	return func(v Timestamp) time.Time { return v.ToTime(t.Unit).In(tz) }, nil
 }
 
 // Time32Type is encoded as a 32-bit signed integer, representing either seconds or milliseconds since midnight.
diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go
index 75eb965d033b5..25d31b54e1b31 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -171,6 +171,41 @@ func makeDateTypeTable(mem memory.Allocator, expected bool, partialDays bool) ar
 	return array.NewTableFromRecords(arrsc, []arrow.Record{rec})
 }
 
+func makeTimestampTypeTable(mem memory.Allocator, expected bool) arrow.Table {
+	isValid := []bool{true, true, true, false, true, true}
+
+	// Timestamp with relative (i.e. local) semantics. Make sure it roundtrips without being incorrectly converted to an absolute point in time.
+	f0 := arrow.Field{Name: "f0", Type: &arrow.TimestampType{Unit: arrow.Millisecond}, Nullable: true, Metadata: arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"1"})}
+
+	// Timestamp with absolute (i.e. instant) semantics. The physical representation is always from Unix epoch in UTC timezone.
+	// TimeZone is used for display purposes and can be stripped on roundtrip without changing the actual instant referred to.
+	// WithStoreSchema will preserve the original timezone, but the instant in will be equivalent even if it's not used.
+	f1 := arrow.Field{Name: "f1", Type: &arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "EST"}, Nullable: true, Metadata: arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"2"})}
+	f1X := arrow.Field{Name: "f1", Type: &arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "UTC"}, Nullable: true, Metadata: arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"2"})}
+
+	fieldList := []arrow.Field{f0}
+	if expected {
+		fieldList = append(fieldList, f1X)
+	} else {
+		fieldList = append(fieldList, f1)
+	}
+
+	arrsc := arrow.NewSchema(fieldList, nil)
+
+	ts64msValues := []arrow.Timestamp{1489269, 1489270, 1489271, 1489272, 1489272, 1489273}
+
+	bldr := array.NewRecordBuilder(mem, arrsc)
+	defer bldr.Release()
+
+	bldr.Field(0).(*array.TimestampBuilder).AppendValues(ts64msValues, isValid)
+	bldr.Field(1).(*array.TimestampBuilder).AppendValues(ts64msValues, isValid)
+
+	rec := bldr.NewRecord()
+	defer rec.Release()
+
+	return array.NewTableFromRecords(arrsc, []arrow.Record{rec})
+}
+
 func TestWriteArrowCols(t *testing.T) {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	defer mem.AssertSize(t, 0)
@@ -954,6 +989,25 @@ func (ps *ParquetIOTestSuite) TestDate64ReadWriteTable() {
 	ps.Truef(array.TableEqual(date32ExpectedOutputTable, roundTripOutputTable), "expected table: %s\ngot table: %s", date32ExpectedOutputTable, roundTripOutputTable)
 }
 
+func (ps *ParquetIOTestSuite) TestTimestampTZReadWriteTable() {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(ps.T(), 0)
+
+	inputTable := makeTimestampTypeTable(mem, false)
+	defer inputTable.Release()
+	buf := writeTableToBuffer(ps.T(), mem, inputTable, inputTable.NumRows(), pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem)))
+	defer buf.Release()
+
+	reader := ps.createReader(mem, buf.Bytes())
+	roundTripOutputTable := ps.readTable(reader)
+	defer roundTripOutputTable.Release()
+
+	expectedOutputTable := makeTimestampTypeTable(mem, true)
+	defer expectedOutputTable.Release()
+
+	ps.Truef(array.TableEqual(expectedOutputTable, roundTripOutputTable), "expected table: %s\ngot table: %s", expectedOutputTable, roundTripOutputTable)
+}
+
 func (ps *ParquetIOTestSuite) TestDate64ReadWriteTableWithPartialDays() {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	defer mem.AssertSize(ps.T(), 0)
@@ -973,6 +1027,22 @@ func (ps *ParquetIOTestSuite) TestDate64ReadWriteTableWithPartialDays() {
 	ps.Truef(array.TableEqual(date32ExpectedOutputTable, roundTripOutputTable), "expected table: %s\ngot table: %s", date32ExpectedOutputTable, roundTripOutputTable)
 }
 
+func (ps *ParquetIOTestSuite) TestTimestampTZStoreSchemaReadWriteTable() {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(ps.T(), 0)
+
+	inputTable := makeTimestampTypeTable(mem, false)
+	defer inputTable.Release()
+	buf := writeTableToBuffer(ps.T(), mem, inputTable, inputTable.NumRows(), pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem), pqarrow.WithStoreSchema()))
+	defer buf.Release()
+
+	reader := ps.createReader(mem, buf.Bytes())
+	roundTripOutputTable := ps.readTable(reader)
+	defer roundTripOutputTable.Release()
+
+	ps.Truef(array.TableEqual(inputTable, roundTripOutputTable), "expected table: %s\ngot table: %s", inputTable, roundTripOutputTable)
+}
+
 func (ps *ParquetIOTestSuite) TestLargeBinaryReadWriteTable() {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	defer mem.AssertSize(ps.T(), 0)
diff --git a/go/parquet/pqarrow/schema.go b/go/parquet/pqarrow/schema.go
index 383d47fbaabed..f2aa4cdfe05ad 100644
--- a/go/parquet/pqarrow/schema.go
+++ b/go/parquet/pqarrow/schema.go
@@ -125,7 +125,7 @@ func isDictionaryReadSupported(dt arrow.DataType) bool {
 }
 
 func arrowTimestampToLogical(typ *arrow.TimestampType, unit arrow.TimeUnit) schema.LogicalType {
-	utc := typ.TimeZone == "" || typ.TimeZone == "UTC"
+	isAdjustedToUTC := typ.TimeZone != ""
 
 	// for forward compatibility reasons, and because there's no other way
 	// to signal to old readers that values are timestamps, we force
@@ -146,7 +146,7 @@ func arrowTimestampToLogical(typ *arrow.TimestampType, unit arrow.TimeUnit) sche
 		return schema.NoLogicalType{}
 	}
 
-	return schema.NewTimestampLogicalTypeForce(utc, scunit)
+	return schema.NewTimestampLogicalTypeForce(isAdjustedToUTC, scunit)
 }
 
 func getTimestampMeta(typ *arrow.TimestampType, props *parquet.WriterProperties, arrprops ArrowWriterProperties) (parquet.Type, schema.LogicalType, error) {
@@ -519,9 +519,12 @@ func arrowTime64(logical *schema.TimeLogicalType) (arrow.DataType, error) {
 }
 
 func arrowTimestamp(logical *schema.TimestampLogicalType) (arrow.DataType, error) {
-	tz := "UTC"
-	if logical.IsFromConvertedType() {
-		tz = ""
+	tz := ""
+
+	// ConvertedTypes are adjusted to UTC per backward compatibility guidelines
+	// https://github.com/apache/parquet-format/blob/eb4b31c1d64a01088d02a2f9aefc6c17c54cc6fc/LogicalTypes.md?plain=1#L480-L485
+	if logical.IsAdjustedToUTC() || logical.IsFromConvertedType() {
+		tz = "UTC"
 	}
 
 	switch logical.TimeUnit() {
diff --git a/go/parquet/pqarrow/schema_test.go b/go/parquet/pqarrow/schema_test.go
index a3c2c7a4ff60c..f320b903033db 100644
--- a/go/parquet/pqarrow/schema_test.go
+++ b/go/parquet/pqarrow/schema_test.go
@@ -304,7 +304,7 @@ func TestCoerceTImestampV1(t *testing.T) {
 	arrowFields := make([]arrow.Field, 0)
 
 	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("timestamp", parquet.Repetitions.Required,
-		schema.NewTimestampLogicalTypeForce(false, schema.TimeUnitMicros), parquet.Types.Int64, 0, -1)))
+		schema.NewTimestampLogicalTypeForce(true, schema.TimeUnitMicros), parquet.Types.Int64, 0, -1)))
 	arrowFields = append(arrowFields, arrow.Field{Name: "timestamp", Type: &arrow.TimestampType{Unit: arrow.Millisecond, TimeZone: "EST"}})
 
 	arrowSchema := arrow.NewSchema(arrowFields, nil)
@@ -323,11 +323,11 @@ func TestAutoCoerceTImestampV1(t *testing.T) {
 	arrowFields := make([]arrow.Field, 0)
 
 	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("timestamp", parquet.Repetitions.Required,
-		schema.NewTimestampLogicalTypeForce(false, schema.TimeUnitMicros), parquet.Types.Int64, 0, -1)))
+		schema.NewTimestampLogicalTypeForce(true, schema.TimeUnitMicros), parquet.Types.Int64, 0, -1)))
 	arrowFields = append(arrowFields, arrow.Field{Name: "timestamp", Type: &arrow.TimestampType{Unit: arrow.Nanosecond, TimeZone: "EST"}})
 
 	parquetFields = append(parquetFields, schema.Must(schema.NewPrimitiveNodeLogical("timestamp[ms]", parquet.Repetitions.Required,
-		schema.NewTimestampLogicalTypeForce(true, schema.TimeUnitMillis), parquet.Types.Int64, 0, -1)))
+		schema.NewTimestampLogicalTypeForce(false, schema.TimeUnitMillis), parquet.Types.Int64, 0, -1)))
 	arrowFields = append(arrowFields, arrow.Field{Name: "timestamp[ms]", Type: &arrow.TimestampType{Unit: arrow.Second}})
 
 	arrowSchema := arrow.NewSchema(arrowFields, nil)

From 55afcf0450aa2b611e78335bdbfd77e55ae3bc9f Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Thu, 18 Jan 2024 15:30:38 -0500
Subject: [PATCH 236/570] GH-39672: [Go] Time to Date32/Date64 conversion
 issues for non-UTC timezones (#39674)

A failing unit test in release verification led to discovering an issue with timestamp to date conversions for non-utc timezones.

Upon some investigation I was able to determine that it was the conflation of casting conversion behavior (normalize to cast a Timestamp to a Date) vs flat conversion. I've fixed this conflation of concerns and the version of the methods which are exported properly converts non-UTC timezones to dates without affecting Casting behavior.

### Are these changes tested?
yes

### Are there any user-facing changes?
The methods `Date32FromTime` and `Date64FromTime` will properly handle timezones now.

* Closes: #39672

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/compute/internal/kernels/cast_temporal.go |  8 ++++++++
 go/arrow/datatype_fixedwidth.go                    | 10 ----------
 go/arrow/datatype_fixedwidth_test.go               | 10 ++++++++++
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/go/arrow/compute/internal/kernels/cast_temporal.go b/go/arrow/compute/internal/kernels/cast_temporal.go
index 542a8a4590b28..48e2bfb6cada1 100644
--- a/go/arrow/compute/internal/kernels/cast_temporal.go
+++ b/go/arrow/compute/internal/kernels/cast_temporal.go
@@ -112,6 +112,10 @@ func TimestampToDate32(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.Exec
 
 	return ScalarUnaryNotNull(func(_ *exec.KernelCtx, arg0 arrow.Timestamp, _ *error) arrow.Date32 {
 		tm := fnToTime(arg0)
+		if _, offset := tm.Zone(); offset != 0 {
+			// normalize the tm
+			tm = tm.Add(time.Duration(offset) * time.Second).UTC()
+		}
 		return arrow.Date32FromTime(tm)
 	})(ctx, batch, out)
 }
@@ -125,6 +129,10 @@ func TimestampToDate64(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.Exec
 
 	return ScalarUnaryNotNull(func(_ *exec.KernelCtx, arg0 arrow.Timestamp, _ *error) arrow.Date64 {
 		tm := fnToTime(arg0)
+		if _, offset := tm.Zone(); offset != 0 {
+			// normalize the tm
+			tm = tm.Add(time.Duration(offset) * time.Second).UTC()
+		}
 		return arrow.Date64FromTime(tm)
 	})(ctx, batch, out)
 }
diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go
index 158dbd67b1b5e..9248dcb8c9a35 100644
--- a/go/arrow/datatype_fixedwidth.go
+++ b/go/arrow/datatype_fixedwidth.go
@@ -70,11 +70,6 @@ type (
 
 // Date32FromTime returns a Date32 value from a time object
 func Date32FromTime(t time.Time) Date32 {
-	if _, offset := t.Zone(); offset != 0 {
-		// properly account for timezone adjustments before we calculate
-		// the number of days by adjusting the time and converting to UTC
-		t = t.Add(time.Duration(offset) * time.Second).UTC()
-	}
 	return Date32(t.Truncate(24*time.Hour).Unix() / int64((time.Hour * 24).Seconds()))
 }
 
@@ -88,11 +83,6 @@ func (d Date32) FormattedString() string {
 
 // Date64FromTime returns a Date64 value from a time object
 func Date64FromTime(t time.Time) Date64 {
-	if _, offset := t.Zone(); offset != 0 {
-		// properly account for timezone adjustments before we calculate
-		// the actual value by adjusting the time and converting to UTC
-		t = t.Add(time.Duration(offset) * time.Second).UTC()
-	}
 	// truncate to the start of the day to get the correct value
 	t = t.Truncate(24 * time.Hour)
 	return Date64(t.Unix()*1e3 + int64(t.Nanosecond())/1e6)
diff --git a/go/arrow/datatype_fixedwidth_test.go b/go/arrow/datatype_fixedwidth_test.go
index b3cbb465f3db6..d6caa21e1a255 100644
--- a/go/arrow/datatype_fixedwidth_test.go
+++ b/go/arrow/datatype_fixedwidth_test.go
@@ -428,3 +428,13 @@ func TestMonthIntervalType(t *testing.T) {
 		t.Fatalf("invalid type stringer: got=%q, want=%q", got, want)
 	}
 }
+
+func TestDateFromTime(t *testing.T) {
+	loc, _ := time.LoadLocation("Asia/Hong_Kong")
+	tm := time.Date(2024, time.January, 18, 3, 0, 0, 0, loc)
+
+	wantD32 := time.Date(2024, time.January, 17, 0, 0, 0, 0, time.UTC).Truncate(24*time.Hour).Unix() / int64((time.Hour * 24).Seconds())
+	wantD64 := time.Date(2024, time.January, 17, 0, 0, 0, 0, time.UTC).UnixMilli()
+	assert.EqualValues(t, wantD64, arrow.Date64FromTime(tm))
+	assert.EqualValues(t, wantD32, arrow.Date32FromTime(tm))
+}

From 143a7da1038c3b9b9ad9587f668ca7abcc3520f8 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Fri, 19 Jan 2024 11:25:30 -0500
Subject: [PATCH 237/570] GH-39574: [Go] Enable PollFlightInfo in Flight RPC
 (#39575)

### Rationale for this change

It's impossible to use the current bindings with PollFlightInfo. Required for apache/arrow-adbc#1457.

### What changes are included in this PR?

Add new methods that expose PollFlightInfo.

### Are these changes tested?

Yes

### Are there any user-facing changes?

Adds new methods.
* Closes: #39574

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 go/arrow/flight/flightsql/client.go      | 92 ++++++++++++++++++++++++
 go/arrow/flight/flightsql/server.go      | 54 ++++++++++++++
 go/arrow/flight/flightsql/server_test.go | 60 ++++++++++++++++
 3 files changed, 206 insertions(+)

diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go
index c0c7e2cf20a28..89784b483b01b 100644
--- a/go/arrow/flight/flightsql/client.go
+++ b/go/arrow/flight/flightsql/client.go
@@ -82,6 +82,17 @@ func flightInfoForCommand(ctx context.Context, cl *Client, cmd proto.Message, op
 	return cl.getFlightInfo(ctx, desc, opts...)
 }
 
+func pollInfoForCommand(ctx context.Context, cl *Client, cmd proto.Message, retryDescriptor *flight.FlightDescriptor, opts ...grpc.CallOption) (*flight.PollInfo, error) {
+	if retryDescriptor != nil {
+		return cl.Client.PollFlightInfo(ctx, retryDescriptor, opts...)
+	}
+	desc, err := descForCommand(cmd)
+	if err != nil {
+		return nil, err
+	}
+	return cl.Client.PollFlightInfo(ctx, desc, opts...)
+}
+
 func schemaForCommand(ctx context.Context, cl *Client, cmd proto.Message, opts ...grpc.CallOption) (*flight.SchemaResult, error) {
 	desc, err := descForCommand(cmd)
 	if err != nil {
@@ -123,6 +134,14 @@ func (c *Client) Execute(ctx context.Context, query string, opts ...grpc.CallOpt
 	return flightInfoForCommand(ctx, c, &cmd, opts...)
 }
 
+// ExecutePoll idempotently starts execution of a query/checks for completion.
+// To check for completion, pass the FlightDescriptor from the previous call
+// to ExecutePoll as the retryDescriptor.
+func (c *Client) ExecutePoll(ctx context.Context, query string, retryDescriptor *flight.FlightDescriptor, opts ...grpc.CallOption) (*flight.PollInfo, error) {
+	cmd := pb.CommandStatementQuery{Query: query}
+	return pollInfoForCommand(ctx, c, &cmd, retryDescriptor, opts...)
+}
+
 // GetExecuteSchema gets the schema of the result set of a query without
 // executing the query itself.
 func (c *Client) GetExecuteSchema(ctx context.Context, query string, opts ...grpc.CallOption) (*flight.SchemaResult, error) {
@@ -136,6 +155,12 @@ func (c *Client) ExecuteSubstrait(ctx context.Context, plan SubstraitPlan, opts
 	return flightInfoForCommand(ctx, c, &cmd, opts...)
 }
 
+func (c *Client) ExecuteSubstraitPoll(ctx context.Context, plan SubstraitPlan, retryDescriptor *flight.FlightDescriptor, opts ...grpc.CallOption) (*flight.PollInfo, error) {
+	cmd := pb.CommandStatementSubstraitPlan{
+		Plan: &pb.SubstraitPlan{Plan: plan.Plan, Version: plan.Version}}
+	return pollInfoForCommand(ctx, c, &cmd, retryDescriptor, opts...)
+}
+
 func (c *Client) GetExecuteSubstraitSchema(ctx context.Context, plan SubstraitPlan, opts ...grpc.CallOption) (*flight.SchemaResult, error) {
 	cmd := pb.CommandStatementSubstraitPlan{
 		Plan: &pb.SubstraitPlan{Plan: plan.Plan, Version: plan.Version}}
@@ -606,6 +631,15 @@ func (tx *Txn) Execute(ctx context.Context, query string, opts ...grpc.CallOptio
 	return flightInfoForCommand(ctx, tx.c, cmd, opts...)
 }
 
+func (tx *Txn) ExecutePoll(ctx context.Context, query string, retryDescriptor *flight.FlightDescriptor, opts ...grpc.CallOption) (*flight.PollInfo, error) {
+	if !tx.txn.IsValid() {
+		return nil, ErrInvalidTxn
+	}
+	// The server should encode the transaction into the retry descriptor
+	cmd := &pb.CommandStatementQuery{Query: query, TransactionId: tx.txn}
+	return pollInfoForCommand(ctx, tx.c, cmd, retryDescriptor, opts...)
+}
+
 func (tx *Txn) ExecuteSubstrait(ctx context.Context, plan SubstraitPlan, opts ...grpc.CallOption) (*flight.FlightInfo, error) {
 	if !tx.txn.IsValid() {
 		return nil, ErrInvalidTxn
@@ -616,6 +650,18 @@ func (tx *Txn) ExecuteSubstrait(ctx context.Context, plan SubstraitPlan, opts ..
 	return flightInfoForCommand(ctx, tx.c, cmd, opts...)
 }
 
+func (tx *Txn) ExecuteSubstraitPoll(ctx context.Context, plan SubstraitPlan, retryDescriptor *flight.FlightDescriptor, opts ...grpc.CallOption) (*flight.PollInfo, error) {
+	if !tx.txn.IsValid() {
+		return nil, ErrInvalidTxn
+	}
+	// The server should encode the transaction into the retry descriptor
+	cmd := &pb.CommandStatementSubstraitPlan{
+		Plan:          &pb.SubstraitPlan{Plan: plan.Plan, Version: plan.Version},
+		TransactionId: tx.txn,
+	}
+	return pollInfoForCommand(ctx, tx.c, cmd, retryDescriptor, opts...)
+}
+
 func (tx *Txn) GetExecuteSchema(ctx context.Context, query string, opts ...grpc.CallOption) (*flight.SchemaResult, error) {
 	if !tx.txn.IsValid() {
 		return nil, ErrInvalidTxn
@@ -981,6 +1027,52 @@ func (p *PreparedStatement) Execute(ctx context.Context, opts ...grpc.CallOption
 	return p.client.getFlightInfo(ctx, desc, opts...)
 }
 
+// ExecutePoll executes the prepared statement on the server and returns a PollInfo
+// indicating the progress of execution.
+//
+// Will error if already closed.
+func (p *PreparedStatement) ExecutePoll(ctx context.Context, retryDescriptor *flight.FlightDescriptor, opts ...grpc.CallOption) (*flight.PollInfo, error) {
+	if p.closed {
+		return nil, errors.New("arrow/flightsql: prepared statement already closed")
+	}
+
+	cmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: p.handle}
+
+	desc := retryDescriptor
+	var err error
+
+	if desc == nil {
+		desc, err = descForCommand(cmd)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	if retryDescriptor == nil {
+		if p.hasBindParameters() {
+			pstream, err := p.client.Client.DoPut(ctx, opts...)
+			if err != nil {
+				return nil, err
+			}
+
+			wr, err := p.writeBindParameters(pstream, desc)
+			if err != nil {
+				return nil, err
+			}
+			if err = wr.Close(); err != nil {
+				return nil, err
+			}
+			pstream.CloseSend()
+
+			// wait for the server to ack the result
+			if _, err = pstream.Recv(); err != nil && err != io.EOF {
+				return nil, err
+			}
+		}
+	}
+	return p.client.Client.PollFlightInfo(ctx, desc, opts...)
+}
+
 // ExecuteUpdate executes the prepared statement update query on the server
 // and returns the number of rows affected. If SetParameters was called,
 // the parameter bindings will be sent with the request to execute.
diff --git a/go/arrow/flight/flightsql/server.go b/go/arrow/flight/flightsql/server.go
index 5b1764707c298..2ec02e2829962 100644
--- a/go/arrow/flight/flightsql/server.go
+++ b/go/arrow/flight/flightsql/server.go
@@ -524,6 +524,22 @@ func (BaseServer) RenewFlightEndpoint(context.Context, *flight.RenewFlightEndpoi
 	return nil, status.Error(codes.Unimplemented, "RenewFlightEndpoint not implemented")
 }
 
+func (BaseServer) PollFlightInfo(context.Context, *flight.FlightDescriptor) (*flight.PollInfo, error) {
+	return nil, status.Error(codes.Unimplemented, "PollFlightInfo not implemented")
+}
+
+func (BaseServer) PollFlightInfoStatement(context.Context, StatementQuery, *flight.FlightDescriptor) (*flight.PollInfo, error) {
+	return nil, status.Error(codes.Unimplemented, "PollFlightInfoStatement not implemented")
+}
+
+func (BaseServer) PollFlightInfoSubstraitPlan(context.Context, StatementSubstraitPlan, *flight.FlightDescriptor) (*flight.PollInfo, error) {
+	return nil, status.Error(codes.Unimplemented, "PollFlightInfoSubstraitPlan not implemented")
+}
+
+func (BaseServer) PollFlightInfoPreparedStatement(context.Context, PreparedStatementQuery, *flight.FlightDescriptor) (*flight.PollInfo, error) {
+	return nil, status.Error(codes.Unimplemented, "PollFlightInfoPreparedStatement not implemented")
+}
+
 func (BaseServer) EndTransaction(context.Context, ActionEndTransactionRequest) error {
 	return status.Error(codes.Unimplemented, "EndTransaction not implemented")
 }
@@ -652,6 +668,14 @@ type Server interface {
 	CancelFlightInfo(context.Context, *flight.CancelFlightInfoRequest) (flight.CancelFlightInfoResult, error)
 	// RenewFlightEndpoint attempts to extend the expiration of a FlightEndpoint
 	RenewFlightEndpoint(context.Context, *flight.RenewFlightEndpointRequest) (*flight.FlightEndpoint, error)
+	// PollFlightInfo is a generic handler for PollFlightInfo requests.
+	PollFlightInfo(context.Context, *flight.FlightDescriptor) (*flight.PollInfo, error)
+	// PollFlightInfoStatement handles polling for query execution.
+	PollFlightInfoStatement(context.Context, StatementQuery, *flight.FlightDescriptor) (*flight.PollInfo, error)
+	// PollFlightInfoSubstraitPlan handles polling for query execution.
+	PollFlightInfoSubstraitPlan(context.Context, StatementSubstraitPlan, *flight.FlightDescriptor) (*flight.PollInfo, error)
+	// PollFlightInfoPreparedStatement handles polling for query execution.
+	PollFlightInfoPreparedStatement(context.Context, PreparedStatementQuery, *flight.FlightDescriptor) (*flight.PollInfo, error)
 
 	mustEmbedBaseServer()
 }
@@ -729,6 +753,36 @@ func (f *flightSqlServer) GetFlightInfo(ctx context.Context, request *flight.Fli
 	return nil, status.Error(codes.InvalidArgument, "requested command is invalid")
 }
 
+func (f *flightSqlServer) PollFlightInfo(ctx context.Context, request *flight.FlightDescriptor) (*flight.PollInfo, error) {
+	var (
+		anycmd anypb.Any
+		cmd    proto.Message
+		err    error
+	)
+	// If we can't parse things, be friendly and defer to the server
+	// implementation. This is especially important for this method since
+	// the server returns a custom FlightDescriptor for future requests.
+	if err = proto.Unmarshal(request.Cmd, &anycmd); err != nil {
+		return f.srv.PollFlightInfo(ctx, request)
+	}
+
+	if cmd, err = anycmd.UnmarshalNew(); err != nil {
+		return f.srv.PollFlightInfo(ctx, request)
+	}
+
+	switch cmd := cmd.(type) {
+	case *pb.CommandStatementQuery:
+		return f.srv.PollFlightInfoStatement(ctx, cmd, request)
+	case *pb.CommandStatementSubstraitPlan:
+		return f.srv.PollFlightInfoSubstraitPlan(ctx, &statementSubstraitPlan{cmd}, request)
+	case *pb.CommandPreparedStatementQuery:
+		return f.srv.PollFlightInfoPreparedStatement(ctx, cmd, request)
+	}
+	// XXX: for now we won't support the other methods
+
+	return f.srv.PollFlightInfo(ctx, request)
+}
+
 func (f *flightSqlServer) GetSchema(ctx context.Context, request *flight.FlightDescriptor) (*flight.SchemaResult, error) {
 	var (
 		anycmd anypb.Any
diff --git a/go/arrow/flight/flightsql/server_test.go b/go/arrow/flight/flightsql/server_test.go
index e444da4aaf4a2..956a1714c671c 100644
--- a/go/arrow/flight/flightsql/server_test.go
+++ b/go/arrow/flight/flightsql/server_test.go
@@ -56,6 +56,36 @@ func (*testServer) GetFlightInfoStatement(ctx context.Context, q flightsql.State
 	}, nil
 }
 
+func (*testServer) PollFlightInfo(ctx context.Context, fd *flight.FlightDescriptor) (*flight.PollInfo, error) {
+	return &flight.PollInfo{
+		Info: &flight.FlightInfo{
+			FlightDescriptor: fd,
+			Endpoint: []*flight.FlightEndpoint{{
+				Ticket: &flight.Ticket{Ticket: []byte{}},
+			}, {
+				Ticket: &flight.Ticket{Ticket: []byte{}},
+			}},
+		},
+		FlightDescriptor: nil,
+	}, nil
+}
+
+func (*testServer) PollFlightInfoStatement(ctx context.Context, q flightsql.StatementQuery, fd *flight.FlightDescriptor) (*flight.PollInfo, error) {
+	ticket, err := flightsql.CreateStatementQueryTicket([]byte(q.GetQuery()))
+	if err != nil {
+		return nil, err
+	}
+	return &flight.PollInfo{
+		Info: &flight.FlightInfo{
+			FlightDescriptor: fd,
+			Endpoint: []*flight.FlightEndpoint{{
+				Ticket: &flight.Ticket{Ticket: ticket},
+			}},
+		},
+		FlightDescriptor: &flight.FlightDescriptor{Cmd: []byte{}},
+	}, nil
+}
+
 func (*testServer) DoGetStatement(ctx context.Context, ticket flightsql.StatementQueryTicket) (sc *arrow.Schema, cc <-chan flight.StreamChunk, err error) {
 	handle := string(ticket.GetStatementHandle())
 	switch handle {
@@ -189,6 +219,20 @@ func (s *FlightSqlServerSuite) TestExecuteChunkError() {
 	}
 }
 
+func (s *FlightSqlServerSuite) TestExecutePoll() {
+	poll, err := s.cl.ExecutePoll(context.TODO(), "1", nil)
+	s.NoError(err)
+	s.NotNil(poll)
+	s.NotNil(poll.GetFlightDescriptor())
+	s.Len(poll.GetInfo().Endpoint, 1)
+
+	poll, err = s.cl.ExecutePoll(context.TODO(), "1", poll.GetFlightDescriptor())
+	s.NoError(err)
+	s.NotNil(poll)
+	s.Nil(poll.GetFlightDescriptor())
+	s.Len(poll.GetInfo().Endpoint, 2)
+}
+
 type UnimplementedFlightSqlServerSuite struct {
 	suite.Suite
 
@@ -314,6 +358,22 @@ func (s *UnimplementedFlightSqlServerSuite) TestGetTypeInfo() {
 	s.Nil(info)
 }
 
+func (s *UnimplementedFlightSqlServerSuite) TestPoll() {
+	poll, err := s.cl.ExecutePoll(context.TODO(), "", nil)
+	st, ok := status.FromError(err)
+	s.True(ok)
+	s.Equal(codes.Unimplemented, st.Code())
+	s.Equal("PollFlightInfoStatement not implemented", st.Message())
+	s.Nil(poll)
+
+	poll, err = s.cl.ExecuteSubstraitPoll(context.TODO(), flightsql.SubstraitPlan{}, nil)
+	st, ok = status.FromError(err)
+	s.True(ok)
+	s.Equal(codes.Unimplemented, st.Code())
+	s.Equal("PollFlightInfoSubstraitPlan not implemented", st.Message())
+	s.Nil(poll)
+}
+
 func getTicket(cmd proto.Message) *flight.Ticket {
 	var anycmd anypb.Any
 	anycmd.MarshalFrom(cmd)

From 92682f0f6064224acd6dd746ac45d6df4b1963c4 Mon Sep 17 00:00:00 2001
From: James Duong <james.duong@improving.com>
Date: Fri, 19 Jan 2024 14:56:16 -0800
Subject: [PATCH 238/570] GH-39001: [Java] Modularize remaining modules
 (#39221)

### Rationale for this change
Modularize remaining modules outside of memory modules, vector, and format.

### What changes are included in this PR?

### Are these changes tested?
Yes, existing unit tests now run with modules when using JDK9+.

### Are there any user-facing changes?
Yes. There are new command-line options that may be necessary. The way of specifying the output directory for
JNI native library builds differs. The flight-grpc module has been eliminated since it is now built into flight-core.
Documentation has been updated for these changes.

**This PR includes breaking changes to public APIs.**
There are a number of package structure changes and some modules now need additional command-line arguments.

* Closes: #39001

Authored-by: James Duong <james.duong@improving.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 ci/scripts/integration_arrow_build.sh         |   2 +-
 ci/scripts/java_jni_build.sh                  |   2 -
 ci/scripts/java_jni_macos_build.sh            |  11 +-
 ci/scripts/java_jni_manylinux_build.sh        |  11 +-
 ci/scripts/java_jni_windows_build.sh          |   3 +-
 .../archery/integration/tester_java.py        |   2 +
 dev/tasks/java-jars/github.yml                |  38 +++---
 dev/tasks/tasks.yml                           |   7 -
 docker-compose.yml                            |   2 +-
 docs/source/developers/java/building.rst      |  54 ++++----
 docs/source/java/install.rst                  |  35 ++++-
 docs/source/java/overview.rst                 |   3 -
 java/CMakeLists.txt                           |  17 +++
 java/adapter/avro/pom.xml                     |   5 +
 .../avro/src/main/java/module-info.java       |  26 ++++
 .../arrow/{ => adapter/avro}/AvroToArrow.java |   2 +-
 .../{ => adapter/avro}/AvroToArrowConfig.java |   2 +-
 .../avro}/AvroToArrowConfigBuilder.java       |   2 +-
 .../{ => adapter/avro}/AvroToArrowUtils.java  |  50 +++----
 .../avro}/AvroToArrowVectorIterator.java      |   4 +-
 .../avro}/consumers/AvroArraysConsumer.java   |   2 +-
 .../avro}/consumers/AvroBooleanConsumer.java  |   2 +-
 .../avro}/consumers/AvroBytesConsumer.java    |   2 +-
 .../avro}/consumers/AvroDoubleConsumer.java   |   2 +-
 .../avro}/consumers/AvroEnumConsumer.java     |   2 +-
 .../avro}/consumers/AvroFixedConsumer.java    |   2 +-
 .../avro}/consumers/AvroFloatConsumer.java    |   2 +-
 .../avro}/consumers/AvroIntConsumer.java      |   2 +-
 .../avro}/consumers/AvroLongConsumer.java     |   2 +-
 .../avro}/consumers/AvroMapConsumer.java      |   2 +-
 .../avro}/consumers/AvroNullConsumer.java     |   2 +-
 .../avro}/consumers/AvroStringConsumer.java   |   2 +-
 .../avro}/consumers/AvroStructConsumer.java   |   2 +-
 .../avro}/consumers/AvroUnionsConsumer.java   |   2 +-
 .../avro}/consumers/BaseAvroConsumer.java     |   2 +-
 .../consumers/CompositeAvroConsumer.java      |   2 +-
 .../avro}/consumers/Consumer.java             |   2 +-
 .../avro}/consumers/SkipConsumer.java         |   2 +-
 .../avro}/consumers/SkipFunction.java         |   2 +-
 .../consumers/logical/AvroDateConsumer.java   |   4 +-
 .../logical/AvroDecimalConsumer.java          |   4 +-
 .../logical/AvroTimeMicroConsumer.java        |   4 +-
 .../logical/AvroTimeMillisConsumer.java       |   4 +-
 .../logical/AvroTimestampMicrosConsumer.java  |   4 +-
 .../logical/AvroTimestampMillisConsumer.java  |   4 +-
 .../avro}/AvroLogicalTypesTest.java           |   2 +-
 .../{ => adapter/avro}/AvroSkipFieldTest.java |   2 +-
 .../{ => adapter/avro}/AvroTestBase.java      |  28 ++--
 .../avro}/AvroToArrowIteratorTest.java        |   2 +-
 .../{ => adapter/avro}/AvroToArrowTest.java   |   2 +-
 .../avro}/TestWriteReadAvroRecord.java        |   9 +-
 java/adapter/jdbc/pom.xml                     |  28 ++++
 .../jdbc/src/main/java/module-info.java       |  28 ++++
 java/adapter/orc/CMakeLists.txt               |   9 +-
 java/adapter/orc/pom.xml                      |   4 +
 .../orc/src/main/java/module-info.java        |  24 ++++
 .../apache/arrow/adapter/orc/OrcJniUtils.java |   2 +-
 java/algorithm/pom.xml                        |   4 +
 java/algorithm/src/main/java/module-info.java |  29 +++++
 java/bom/pom.xml                              |   5 -
 java/c/CMakeLists.txt                         |   9 +-
 java/c/pom.xml                                |   4 +
 java/c/src/main/java/module-info.java         |  27 ++++
 .../main/java/org/apache/arrow/c/Data.java    |   2 -
 .../{vector => c}/StructVectorLoader.java     |   4 +-
 .../{vector => c}/StructVectorUnloader.java   |   4 +-
 .../org/apache/arrow/c/jni/JniLoader.java     |   2 +-
 java/compression/pom.xml                      |   4 +
 .../src/main/java/module-info.java            |  25 ++++
 java/dataset/CMakeLists.txt                   |  10 +-
 java/dataset/pom.xml                          |  39 ++++++
 java/dataset/src/main/java/module-info.java   |  29 +++++
 .../jni/DirectReservationListener.java        |   7 +-
 .../apache/arrow/dataset/jni/JniLoader.java   |   2 +-
 .../arrow/dataset/ParquetWriteSupport.java    |  27 +++-
 .../apache/arrow/dataset/TestAllTypes.java    |   2 +-
 java/flight/flight-core/pom.xml               |  37 ++++++
 .../src/main/java/module-info.java            |  42 ++++++
 .../apache/arrow/flight/FlightGrpcUtils.java  |   0
 .../apache/arrow/flight/FlightTestUtil.java   |   2 +-
 .../arrow/flight/TestFlightGrpcUtils.java     |   0
 .../src/test/protobuf/test.proto              |   0
 java/flight/flight-grpc/pom.xml               | 123 ------------------
 .../src/test/resources/logback.xml            |  28 ----
 java/flight/flight-sql/pom.xml                |  27 ++++
 .../flight-sql/src/main/java/module-info.java |  29 +++++
 .../flight/{ => sql/test}/TestFlightSql.java  |  13 +-
 .../{ => sql/test}/TestFlightSqlStreams.java  |  11 +-
 java/flight/pom.xml                           |   1 -
 java/gandiva/CMakeLists.txt                   |   9 +-
 java/gandiva/pom.xml                          |   4 +
 java/gandiva/src/main/java/module-info.java   |  29 +++++
 .../arrow/gandiva/evaluator/JniLoader.java    |   2 +-
 .../arrow/adapter/AvroAdapterBenchmarks.java  |   8 +-
 java/pom.xml                                  |   5 -
 java/tools/pom.xml                            |   4 +
 java/tools/src/main/java/module-info.java     |  27 ++++
 .../{ => test}/util/ArrowTestDataUtil.java    |   2 +-
 98 files changed, 763 insertions(+), 355 deletions(-)
 create mode 100644 java/adapter/avro/src/main/java/module-info.java
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/AvroToArrow.java (98%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/AvroToArrowConfig.java (98%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/AvroToArrowConfigBuilder.java (98%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/AvroToArrowUtils.java (95%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/AvroToArrowVectorIterator.java (98%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroArraysConsumer.java (97%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroBooleanConsumer.java (96%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroBytesConsumer.java (97%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroDoubleConsumer.java (96%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroEnumConsumer.java (96%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroFixedConsumer.java (96%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroFloatConsumer.java (96%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroIntConsumer.java (96%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroLongConsumer.java (96%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroMapConsumer.java (98%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroNullConsumer.java (96%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroStringConsumer.java (97%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroStructConsumer.java (97%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/AvroUnionsConsumer.java (98%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/BaseAvroConsumer.java (97%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/CompositeAvroConsumer.java (97%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/Consumer.java (97%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/SkipConsumer.java (97%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/SkipFunction.java (95%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/logical/AvroDateConsumer.java (91%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/logical/AvroDecimalConsumer.java (95%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/logical/AvroTimeMicroConsumer.java (91%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/logical/AvroTimeMillisConsumer.java (91%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/logical/AvroTimestampMicrosConsumer.java (92%)
 rename java/adapter/avro/src/main/java/org/apache/arrow/{ => adapter/avro}/consumers/logical/AvroTimestampMillisConsumer.java (92%)
 rename java/adapter/avro/src/test/java/org/apache/arrow/{ => adapter/avro}/AvroLogicalTypesTest.java (99%)
 rename java/adapter/avro/src/test/java/org/apache/arrow/{ => adapter/avro}/AvroSkipFieldTest.java (99%)
 rename java/adapter/avro/src/test/java/org/apache/arrow/{ => adapter/avro}/AvroTestBase.java (88%)
 rename java/adapter/avro/src/test/java/org/apache/arrow/{ => adapter/avro}/AvroToArrowIteratorTest.java (99%)
 rename java/adapter/avro/src/test/java/org/apache/arrow/{ => adapter/avro}/AvroToArrowTest.java (99%)
 rename java/adapter/avro/src/test/java/org/apache/arrow/{ => adapter/avro}/TestWriteReadAvroRecord.java (91%)
 create mode 100644 java/adapter/jdbc/src/main/java/module-info.java
 create mode 100644 java/adapter/orc/src/main/java/module-info.java
 create mode 100644 java/algorithm/src/main/java/module-info.java
 create mode 100644 java/c/src/main/java/module-info.java
 rename java/c/src/main/java/org/apache/arrow/{vector => c}/StructVectorLoader.java (98%)
 rename java/c/src/main/java/org/apache/arrow/{vector => c}/StructVectorUnloader.java (97%)
 create mode 100644 java/compression/src/main/java/module-info.java
 create mode 100644 java/dataset/src/main/java/module-info.java
 create mode 100644 java/flight/flight-core/src/main/java/module-info.java
 rename java/flight/{flight-grpc => flight-core}/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java (100%)
 rename java/flight/{flight-grpc => flight-core}/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java (100%)
 rename java/flight/{flight-grpc => flight-core}/src/test/protobuf/test.proto (100%)
 delete mode 100644 java/flight/flight-grpc/pom.xml
 delete mode 100644 java/flight/flight-grpc/src/test/resources/logback.xml
 create mode 100644 java/flight/flight-sql/src/main/java/module-info.java
 rename java/flight/flight-sql/src/test/java/org/apache/arrow/flight/{ => sql/test}/TestFlightSql.java (98%)
 rename java/flight/flight-sql/src/test/java/org/apache/arrow/flight/{ => sql/test}/TestFlightSqlStreams.java (96%)
 create mode 100644 java/gandiva/src/main/java/module-info.java
 create mode 100644 java/tools/src/main/java/module-info.java
 rename java/vector/src/test/java/org/apache/arrow/vector/{ => test}/util/ArrowTestDataUtil.java (97%)

diff --git a/ci/scripts/integration_arrow_build.sh b/ci/scripts/integration_arrow_build.sh
index 02f593bf77b23..e5c31527aedff 100755
--- a/ci/scripts/integration_arrow_build.sh
+++ b/ci/scripts/integration_arrow_build.sh
@@ -46,7 +46,7 @@ if [ "${ARROW_INTEGRATION_JAVA}" == "ON" ]; then
     export ARROW_JAVA_CDATA="ON"
     export JAVA_JNI_CMAKE_ARGS="-DARROW_JAVA_JNI_ENABLE_DEFAULT=OFF -DARROW_JAVA_JNI_ENABLE_C=ON"
 
-    ${arrow_dir}/ci/scripts/java_jni_build.sh ${arrow_dir} ${ARROW_HOME} ${build_dir} /tmp/dist/java/$(arch)
+    ${arrow_dir}/ci/scripts/java_jni_build.sh ${arrow_dir} ${ARROW_HOME} ${build_dir} /tmp/dist/java
     ${arrow_dir}/ci/scripts/java_build.sh ${arrow_dir} ${build_dir} /tmp/dist/java
 fi
 
diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_jni_build.sh
index 320c98c04df1e..d989351ab7e4d 100755
--- a/ci/scripts/java_jni_build.sh
+++ b/ci/scripts/java_jni_build.sh
@@ -24,7 +24,6 @@ arrow_install_dir=${2}
 build_dir=${3}/java_jni
 # The directory where the final binaries will be stored when scripts finish
 dist_dir=${4}
-
 prefix_dir="${build_dir}/java-jni"
 
 echo "=== Clear output directories and leftovers ==="
@@ -56,7 +55,6 @@ cmake \
   -DBUILD_TESTING=${ARROW_JAVA_BUILD_TESTS} \
   -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
   -DCMAKE_PREFIX_PATH=${arrow_install_dir} \
-  -DCMAKE_INSTALL_LIBDIR=lib \
   -DCMAKE_INSTALL_PREFIX=${prefix_dir} \
   -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
   -DProtobuf_USE_STATIC_LIBS=ON \
diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh
index d66c39a37c5bd..4ecc029bdd3c2 100755
--- a/ci/scripts/java_jni_macos_build.sh
+++ b/ci/scripts/java_jni_macos_build.sh
@@ -31,7 +31,7 @@ case ${normalized_arch} in
     ;;
 esac
 # The directory where the final binaries will be stored when scripts finish
-dist_dir=${3}/${normalized_arch}
+dist_dir=${3}
 
 echo "=== Clear output directories and leftovers ==="
 # Clear output directories and leftovers
@@ -82,7 +82,6 @@ cmake \
   -DARROW_S3=${ARROW_S3} \
   -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \
   -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-  -DCMAKE_INSTALL_LIBDIR=lib \
   -DCMAKE_INSTALL_PREFIX=${install_dir} \
   -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
   -DGTest_SOURCE=BUNDLED \
@@ -138,8 +137,8 @@ archery linking check-dependencies \
   --allow libncurses \
   --allow libobjc \
   --allow libz \
-  libarrow_cdata_jni.dylib \
-  libarrow_dataset_jni.dylib \
-  libarrow_orc_jni.dylib \
-  libgandiva_jni.dylib
+  arrow_cdata_jni/${normalized_arch}/libarrow_cdata_jni.dylib \
+  arrow_dataset_jni/${normalized_arch}/libarrow_dataset_jni.dylib \
+  arrow_orc_jni/${normalized_arch}/libarrow_orc_jni.dylib \
+  gandiva_jni/${normalized_arch}/libgandiva_jni.dylib
 popd
diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh
index 03939715e390f..da4987d307ce4 100755
--- a/ci/scripts/java_jni_manylinux_build.sh
+++ b/ci/scripts/java_jni_manylinux_build.sh
@@ -28,7 +28,7 @@ case ${normalized_arch} in
     ;;
 esac
 # The directory where the final binaries will be stored when scripts finish
-dist_dir=${3}/${normalized_arch}
+dist_dir=${3}
 
 echo "=== Clear output directories and leftovers ==="
 # Clear output directories and leftovers
@@ -91,7 +91,6 @@ cmake \
   -DARROW_S3=${ARROW_S3} \
   -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \
   -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-  -DCMAKE_INSTALL_LIBDIR=lib \
   -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
   -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
   -DGTest_SOURCE=BUNDLED \
@@ -164,8 +163,8 @@ archery linking check-dependencies \
   --allow libstdc++ \
   --allow libz \
   --allow linux-vdso \
-  libarrow_cdata_jni.so \
-  libarrow_dataset_jni.so \
-  libarrow_orc_jni.so \
-  libgandiva_jni.so
+  arrow_cdata_jni/${normalized_arch}/libarrow_cdata_jni.so \
+  arrow_dataset_jni/${normalized_arch}/libarrow_dataset_jni.so \
+  arrow_orc_jni/${normalized_arch}/libarrow_orc_jni.so \
+  gandiva_jni/${normalized_arch}/libgandiva_jni.so
 popd
diff --git a/ci/scripts/java_jni_windows_build.sh b/ci/scripts/java_jni_windows_build.sh
index 778ee9696790e..39288f4a9d0ce 100755
--- a/ci/scripts/java_jni_windows_build.sh
+++ b/ci/scripts/java_jni_windows_build.sh
@@ -22,7 +22,7 @@ set -ex
 arrow_dir=${1}
 build_dir=${2}
 # The directory where the final binaries will be stored when scripts finish
-dist_dir=${3}/x86_64
+dist_dir=${3}
 
 echo "=== Clear output directories and leftovers ==="
 # Clear output directories and leftovers
@@ -72,7 +72,6 @@ cmake \
   -DARROW_WITH_SNAPPY=ON \
   -DARROW_WITH_ZSTD=ON \
   -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-  -DCMAKE_INSTALL_LIBDIR=lib \
   -DCMAKE_INSTALL_PREFIX=${install_dir} \
   -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
   -GNinja \
diff --git a/dev/archery/archery/integration/tester_java.py b/dev/archery/archery/integration/tester_java.py
index 857fe0c50af06..8e7a0bb99f9de 100644
--- a/dev/archery/archery/integration/tester_java.py
+++ b/dev/archery/archery/integration/tester_java.py
@@ -259,6 +259,8 @@ def __init__(self, *args, **kwargs):
             self._java_opts.append(
                 '--add-opens=java.base/java.nio='
                 'org.apache.arrow.memory.core,ALL-UNNAMED')
+            self._java_opts.append(
+                '--add-reads=org.apache.arrow.flight.core=ALL-UNNAMED')
 
     def _run(self, arrow_path=None, json_path=None, command='VALIDATE'):
         cmd = (
diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index 7ee68e77ee637..086d1fdbe811f 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -208,29 +208,29 @@ jobs:
         run: |
           set -x
 
-          test -f arrow/java-dist/x86_64/libarrow_cdata_jni.so
-          test -f arrow/java-dist/x86_64/libarrow_dataset_jni.so
-          test -f arrow/java-dist/x86_64/libarrow_orc_jni.so
-          test -f arrow/java-dist/x86_64/libgandiva_jni.so
+          test -f arrow/java-dist/arrow_cdata_jni/x86_64/libarrow_cdata_jni.so
+          test -f arrow/java-dist/arrow_dataset_jni/x86_64/libarrow_dataset_jni.so
+          test -f arrow/java-dist/arrow_orc_jni/x86_64/libarrow_orc_jni.so
+          test -f arrow/java-dist/gandiva_jni/x86_64/libgandiva_jni.so
 
-          test -f arrow/java-dist/aarch_64/libarrow_cdata_jni.so
-          test -f arrow/java-dist/aarch_64/libarrow_dataset_jni.so
-          test -f arrow/java-dist/aarch_64/libarrow_orc_jni.so
-          test -f arrow/java-dist/aarch_64/libgandiva_jni.so
+          test -f arrow/java-dist/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.so
+          test -f arrow/java-dist/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.so
+          test -f arrow/java-dist/arrow_orc_jni/aarch_64/libarrow_orc_jni.so
+          test -f arrow/java-dist/gandiva_jni/aarch_64/libgandiva_jni.so
 
-          test -f arrow/java-dist/x86_64/libarrow_cdata_jni.dylib
-          test -f arrow/java-dist/x86_64/libarrow_dataset_jni.dylib
-          test -f arrow/java-dist/x86_64/libarrow_orc_jni.dylib
-          test -f arrow/java-dist/x86_64/libgandiva_jni.dylib
+          test -f arrow/java-dist/arrow_cdata_jni/x86_64/libarrow_cdata_jni.dylib
+          test -f arrow/java-dist/arrow_dataset_jni/x86_64/libarrow_dataset_jni.dylib
+          test -f arrow/java-dist/arrow_orc_jni/x86_64/libarrow_orc_jni.dylib
+          test -f arrow/java-dist/gandiva_jni/x86_64/libgandiva_jni.dylib
 
-          test -f arrow/java-dist/aarch_64/libarrow_cdata_jni.dylib
-          test -f arrow/java-dist/aarch_64/libarrow_dataset_jni.dylib
-          test -f arrow/java-dist/aarch_64/libarrow_orc_jni.dylib
-          test -f arrow/java-dist/aarch_64/libgandiva_jni.dylib
+          test -f arrow/java-dist/arrow_cdata_jni/aarch_64/libarrow_cdata_jni.dylib
+          test -f arrow/java-dist/arrow_dataset_jni/aarch_64/libarrow_dataset_jni.dylib
+          test -f arrow/java-dist/arrow_orc_jni/aarch_64/libarrow_orc_jni.dylib
+          test -f arrow/java-dist/gandiva_jni/aarch_64/libgandiva_jni.dylib
 
-          test -f arrow/java-dist/x86_64/arrow_cdata_jni.dll
-          test -f arrow/java-dist/x86_64/arrow_dataset_jni.dll
-          test -f arrow/java-dist/x86_64/arrow_orc_jni.dll
+          test -f arrow/java-dist/arrow_cdata_jni/x86_64/arrow_cdata_jni.dll
+          test -f arrow/java-dist/arrow_dataset_jni/x86_64/arrow_dataset_jni.dll
+          test -f arrow/java-dist/arrow_orc_jni/x86_64/arrow_orc_jni.dll
       - name: Build bundled jar
         run: |
           set -e
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 04faef427e281..ca45d48bcd470 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -810,13 +810,6 @@ tasks:
       - flight-core-{no_rc_snapshot_version}-tests.jar
       - flight-core-{no_rc_snapshot_version}.jar
       - flight-core-{no_rc_snapshot_version}.pom
-      - flight-grpc-{no_rc_snapshot_version}-cyclonedx.json
-      - flight-grpc-{no_rc_snapshot_version}-cyclonedx.xml
-      - flight-grpc-{no_rc_snapshot_version}-javadoc.jar
-      - flight-grpc-{no_rc_snapshot_version}-sources.jar
-      - flight-grpc-{no_rc_snapshot_version}-tests.jar
-      - flight-grpc-{no_rc_snapshot_version}.jar
-      - flight-grpc-{no_rc_snapshot_version}.pom
       - flight-integration-tests-{no_rc_snapshot_version}-cyclonedx.json
       - flight-integration-tests-{no_rc_snapshot_version}-cyclonedx.xml
       - flight-integration-tests-{no_rc_snapshot_version}-jar-with-dependencies.jar
diff --git a/docker-compose.yml b/docker-compose.yml
index 14eff67f38971..a08345c198fa0 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1343,7 +1343,7 @@ services:
     command:
       [ "/arrow/ci/scripts/cpp_build.sh /arrow /build &&
           /arrow/ci/scripts/python_build.sh /arrow /build &&
-          /arrow/ci/scripts/java_jni_build.sh /arrow $${ARROW_HOME} /build /tmp/dist/java/$$(arch) &&
+          /arrow/ci/scripts/java_jni_build.sh /arrow $${ARROW_HOME} /build /tmp/dist/java/ &&
           /arrow/ci/scripts/java_build.sh /arrow /build /tmp/dist/java &&
           /arrow/ci/scripts/java_cdata_integration.sh /arrow /tmp/dist/java" ]
 
diff --git a/docs/source/developers/java/building.rst b/docs/source/developers/java/building.rst
index 0e831915e09b9..27e2de97328c3 100644
--- a/docs/source/developers/java/building.rst
+++ b/docs/source/developers/java/building.rst
@@ -115,9 +115,8 @@ Maven
       $ export JAVA_HOME=<absolute path to your java home>
       $ java --version
       $ mvn generate-resources -Pgenerate-libs-cdata-all-os -N
-      $ ls -latr ../java-dist/lib/<your system's architecture>
-      |__ libarrow_cdata_jni.dylib
-      |__ libarrow_cdata_jni.so
+      $ ls -latr ../java-dist/lib
+      |__ arrow_cdata_jni/
 
 - To build only the JNI C Data Interface library (Windows):
 
@@ -125,8 +124,8 @@ Maven
 
       $ cd arrow/java
       $ mvn generate-resources -Pgenerate-libs-cdata-all-os -N
-      $ dir "../java-dist/bin/x86_64"
-      |__ arrow_cdata_jni.dll
+      $ dir "../java-dist/bin"
+      |__ arrow_cdata_jni/
 
 - To build all JNI libraries (macOS / Linux) except the JNI C Data Interface library:
 
@@ -136,10 +135,10 @@ Maven
       $ export JAVA_HOME=<absolute path to your java home>
       $ java --version
       $ mvn generate-resources -Pgenerate-libs-jni-macos-linux -N
-      $ ls -latr java-dist/lib/<your system's architecture>/*_{jni,java}.*
-      |__ libarrow_dataset_jni.dylib
-      |__ libarrow_orc_jni.dylib
-      |__ libgandiva_jni.dylib
+      $ ls -latr java-dist/lib
+      |__ arrow_dataset_jni/
+      |__ arrow_orc_jni/
+      |__ gandiva_jni/
 
 - To build all JNI libraries (Windows) except the JNI C Data Interface library:
 
@@ -147,8 +146,8 @@ Maven
 
       $ cd arrow/java
       $ mvn generate-resources -Pgenerate-libs-jni-windows -N
-      $ dir "../java-dist/bin/x86_64"
-      |__ arrow_dataset_jni.dll
+      $ dir "../java-dist/bin"
+      |__ arrow_dataset_jni/
 
 CMake
 ~~~~~
@@ -166,12 +165,10 @@ CMake
           -DARROW_JAVA_JNI_ENABLE_DEFAULT=OFF \
           -DBUILD_TESTING=OFF \
           -DCMAKE_BUILD_TYPE=Release \
-          -DCMAKE_INSTALL_LIBDIR=lib/<your system's architecture> \
           -DCMAKE_INSTALL_PREFIX=java-dist
       $ cmake --build java-cdata --target install --config Release
       $ ls -latr java-dist/lib
-      |__ libarrow_cdata_jni.dylib
-      |__ libarrow_cdata_jni.so
+      |__ arrow_cdata_jni/
 
 - To build only the JNI C Data Interface library (Windows):
 
@@ -186,11 +183,10 @@ CMake
           -DARROW_JAVA_JNI_ENABLE_DEFAULT=OFF ^
           -DBUILD_TESTING=OFF ^
           -DCMAKE_BUILD_TYPE=Release ^
-          -DCMAKE_INSTALL_LIBDIR=lib/x86_64 ^
           -DCMAKE_INSTALL_PREFIX=java-dist
       $ cmake --build java-cdata --target install --config Release
       $ dir "java-dist/bin"
-      |__ arrow_cdata_jni.dll
+      |__ arrow_cdata_jni/
 
 - To build all JNI libraries (macOS / Linux) except the JNI C Data Interface library:
 
@@ -222,7 +218,6 @@ CMake
           -DARROW_SUBSTRAIT=ON \
           -DARROW_USE_CCACHE=ON \
           -DCMAKE_BUILD_TYPE=Release \
-          -DCMAKE_INSTALL_LIBDIR=lib/<your system's architecture> \
           -DCMAKE_INSTALL_PREFIX=java-dist \
           -DCMAKE_UNITY_BUILD=ON
       $ cmake --build cpp-jni --target install --config Release
@@ -233,16 +228,15 @@ CMake
           -DARROW_JAVA_JNI_ENABLE_DEFAULT=ON \
           -DBUILD_TESTING=OFF \
           -DCMAKE_BUILD_TYPE=Release \
-          -DCMAKE_INSTALL_LIBDIR=lib/<your system's architecture> \
           -DCMAKE_INSTALL_PREFIX=java-dist \
           -DCMAKE_PREFIX_PATH=$PWD/java-dist \
           -DProtobuf_ROOT=$PWD/../cpp-jni/protobuf_ep-install \
           -DProtobuf_USE_STATIC_LIBS=ON
       $ cmake --build java-jni --target install --config Release
-      $ ls -latr java-dist/lib/<your system's architecture>/*_{jni,java}.*
-      |__ libarrow_dataset_jni.dylib
-      |__ libarrow_orc_jni.dylib
-      |__ libgandiva_jni.dylib
+      $ ls -latr java-dist/lib/
+      |__ arrow_dataset_jni/
+      |__ arrow_orc_jni/
+      |__ gandiva_jni/
 
 - To build all JNI libraries (Windows) except the JNI C Data Interface library:
 
@@ -271,7 +265,6 @@ CMake
           -DARROW_WITH_ZLIB=ON ^
           -DARROW_WITH_ZSTD=ON ^
           -DCMAKE_BUILD_TYPE=Release ^
-          -DCMAKE_INSTALL_LIBDIR=lib/x86_64 ^
           -DCMAKE_INSTALL_PREFIX=java-dist ^
           -DCMAKE_UNITY_BUILD=ON ^
           -GNinja
@@ -288,13 +281,12 @@ CMake
           -DARROW_JAVA_JNI_ENABLE_ORC=ON ^
           -DBUILD_TESTING=OFF ^
           -DCMAKE_BUILD_TYPE=Release ^
-          -DCMAKE_INSTALL_LIBDIR=lib/x86_64 ^
           -DCMAKE_INSTALL_PREFIX=java-dist ^
           -DCMAKE_PREFIX_PATH=$PWD/java-dist
       $ cmake --build java-jni --target install --config Release
       $ dir "java-dist/bin"
-      |__ arrow_orc_jni.dll
-      |__ arrow_dataset_jni.dll
+      |__ arrow_orc_jni/
+      |__ arrow_dataset_jni/
 
 Archery
 ~~~~~~~
@@ -303,11 +295,11 @@ Archery
 
     $ cd arrow
     $ archery docker run java-jni-manylinux-2014
-    $ ls -latr java-dist/<your system's architecture>/
-    |__ libarrow_cdata_jni.so
-    |__ libarrow_dataset_jni.so
-    |__ libarrow_orc_jni.so
-    |__ libgandiva_jni.so
+    $ ls -latr java-dist
+    |__ arrow_cdata_jni/
+    |__ arrow_dataset_jni/
+    |__ arrow_orc_jni/
+    |__ gandiva_jni/
 
 Building Java JNI Modules
 -------------------------
diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst
index b7484536f2367..783687fb1f773 100644
--- a/docs/source/java/install.rst
+++ b/docs/source/java/install.rst
@@ -43,7 +43,40 @@ adding ``--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED
    $ env _JAVA_OPTIONS="--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED" java -jar ...
 
 Otherwise, you may see errors like ``module java.base does not "opens
-java.nio" to unnamed module``.
+java.nio" to unnamed module`` or ``module java.base does not "opens
+java.nio" to org.apache.arrow.memory.core``
+
+Note that the command has changed from Arrow 15 and earlier. If you are still using the flags from that version
+(``--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED``) you will see the
+``module java.base does not "opens java.nio" to org.apache.arrow.memory.core`` error.
+
+If you are using flight-core or dependent modules, you will need to mark that flight-core can read unnamed modules.
+Modifying the command above for Flight:
+
+.. code-block:: shell
+
+   # Directly on the command line
+   $ java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -jar ...
+   # Indirectly via environment variables
+   $ env _JAVA_OPTIONS="--add-reads=org.apache.arrow.flight.core=ALL-UNNAMED --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED" java -jar ...
+
+Otherwise, you may see errors like ``java.lang.IllegalAccessError: superclass access check failed: class
+org.apache.arrow.flight.ArrowMessage$ArrowBufRetainingCompositeByteBuf (in module org.apache.arrow.flight.core) 
+cannot access class io.netty.buffer.CompositeByteBuf (in unnamed module ...) because module
+org.apache.arrow.flight.core does not read unnamed module ...
+
+Finally, if you are using arrow-dataset, you'll also need to report that JDK internals need to be exposed.
+Modifying the command above for arrow-memory:
+.. code-block:: shell
+
+   # Directly on the command line
+   $ java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -jar ...
+   # Indirectly via environment variables
+   $ env _JAVA_OPTIONS="--add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED" java -jar ...
+
+Otherwise you may see errors such as ``java.lang.RuntimeException: java.lang.reflect.InaccessibleObjectException: 
+Unable to make static void java.nio.Bits.reserveMemory(long,long) accessible: module 
+java.base does not "opens java.nio" to module org.apache.arrow.dataset``
 
 If using Maven and Surefire for unit testing, :ref:`this argument must
 be added to Surefire as well <java-install-maven-testing>`.
diff --git a/docs/source/java/overview.rst b/docs/source/java/overview.rst
index 4b30b8e000f12..9d9cbad8a26c1 100644
--- a/docs/source/java/overview.rst
+++ b/docs/source/java/overview.rst
@@ -56,9 +56,6 @@ but some modules are JNI bindings to the C++ library.
    * - flight-core
      - (Experimental) An RPC mechanism for transferring ValueVectors.
      - Native
-   * - flight-grpc
-     - (Experimental) Contains utility class to expose Flight gRPC service and client.
-     - Native
    * - flight-sql
      - (Experimental) Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight.
      - Native
diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt
index 04fa51ff98ca0..8b29f37d80a1b 100644
--- a/java/CMakeLists.txt
+++ b/java/CMakeLists.txt
@@ -74,6 +74,23 @@ if(BUILD_TESTING)
                                                   GTest::gtest_main)
 endif()
 
+# The ARROW_JAVA_JNI_ARCH_DIR will automatically be derived the normalized
+# operating system from system processor. The user can override this variable
+# if auto-detection fails.
+if("${ARROW_JAVA_JNI_ARCH_DIR}" STREQUAL "")
+  if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "aarch64")
+    set(ARROW_JAVA_JNI_ARCH_DIR "aarch_64")
+  elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "i386")
+    set(ARROW_JAVA_JNI_ARCH_DIR "x86_64")
+  elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64")
+    set(ARROW_JAVA_JNI_ARCH_DIR "aarch_64")
+  elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "AMD64")
+    set(ARROW_JAVA_JNI_ARCH_DIR "x86_64")
+  else()
+    set(ARROW_JAVA_JNI_ARCH_DIR "${CMAKE_SYSTEM_PROCESSOR}")
+  endif()
+endif()
+
 if(ARROW_JAVA_JNI_ENABLE_C)
   add_subdirectory(c)
 endif()
diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml
index c0410ea4c2314..90864eab006a2 100644
--- a/java/adapter/avro/pom.xml
+++ b/java/adapter/avro/pom.xml
@@ -46,6 +46,11 @@
       <artifactId>arrow-vector</artifactId>
     </dependency>
 
+    <dependency>
+      <groupId>org.immutables</groupId>
+      <artifactId>value</artifactId>
+    </dependency>
+
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro</artifactId>
diff --git a/java/adapter/avro/src/main/java/module-info.java b/java/adapter/avro/src/main/java/module-info.java
new file mode 100644
index 0000000000000..5c6204be60e9c
--- /dev/null
+++ b/java/adapter/avro/src/main/java/module-info.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module org.apache.arrow.adapter.avro {
+  exports org.apache.arrow.adapter.avro.consumers;
+  exports org.apache.arrow.adapter.avro.consumers.logical;
+  exports org.apache.arrow.adapter.avro;
+
+  requires org.apache.arrow.memory.core;
+  requires org.apache.arrow.vector;
+  requires org.apache.avro;
+}
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrow.java
similarity index 98%
rename from java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrow.java
index 33f180393780e..8baa60a72ddc3 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrow.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow;
+package org.apache.arrow.adapter.avro;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfig.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfig.java
similarity index 98%
rename from java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfig.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfig.java
index 4f59ef3843dda..f9210fb012523 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfig.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfig.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow;
+package org.apache.arrow.adapter.avro;
 
 import java.util.Set;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfigBuilder.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfigBuilder.java
similarity index 98%
rename from java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfigBuilder.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfigBuilder.java
index 474c1eb5ca7c2..41e486d0a1ce0 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfigBuilder.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowConfigBuilder.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow;
+package org.apache.arrow.adapter.avro;
 
 import java.util.HashSet;
 import java.util.Set;
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java
similarity index 95%
rename from java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java
index 80293c8b85c8b..1f5ad9e768950 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow;
+package org.apache.arrow.adapter.avro;
 
 import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
 import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
@@ -31,30 +31,30 @@
 import java.util.Set;
 import java.util.stream.Collectors;
 
-import org.apache.arrow.consumers.AvroArraysConsumer;
-import org.apache.arrow.consumers.AvroBooleanConsumer;
-import org.apache.arrow.consumers.AvroBytesConsumer;
-import org.apache.arrow.consumers.AvroDoubleConsumer;
-import org.apache.arrow.consumers.AvroEnumConsumer;
-import org.apache.arrow.consumers.AvroFixedConsumer;
-import org.apache.arrow.consumers.AvroFloatConsumer;
-import org.apache.arrow.consumers.AvroIntConsumer;
-import org.apache.arrow.consumers.AvroLongConsumer;
-import org.apache.arrow.consumers.AvroMapConsumer;
-import org.apache.arrow.consumers.AvroNullConsumer;
-import org.apache.arrow.consumers.AvroStringConsumer;
-import org.apache.arrow.consumers.AvroStructConsumer;
-import org.apache.arrow.consumers.AvroUnionsConsumer;
-import org.apache.arrow.consumers.CompositeAvroConsumer;
-import org.apache.arrow.consumers.Consumer;
-import org.apache.arrow.consumers.SkipConsumer;
-import org.apache.arrow.consumers.SkipFunction;
-import org.apache.arrow.consumers.logical.AvroDateConsumer;
-import org.apache.arrow.consumers.logical.AvroDecimalConsumer;
-import org.apache.arrow.consumers.logical.AvroTimeMicroConsumer;
-import org.apache.arrow.consumers.logical.AvroTimeMillisConsumer;
-import org.apache.arrow.consumers.logical.AvroTimestampMicrosConsumer;
-import org.apache.arrow.consumers.logical.AvroTimestampMillisConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroArraysConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroBooleanConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroBytesConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroDoubleConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroEnumConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroFixedConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroFloatConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroIntConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroLongConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroMapConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroNullConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroStringConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroStructConsumer;
+import org.apache.arrow.adapter.avro.consumers.AvroUnionsConsumer;
+import org.apache.arrow.adapter.avro.consumers.CompositeAvroConsumer;
+import org.apache.arrow.adapter.avro.consumers.Consumer;
+import org.apache.arrow.adapter.avro.consumers.SkipConsumer;
+import org.apache.arrow.adapter.avro.consumers.SkipFunction;
+import org.apache.arrow.adapter.avro.consumers.logical.AvroDateConsumer;
+import org.apache.arrow.adapter.avro.consumers.logical.AvroDecimalConsumer;
+import org.apache.arrow.adapter.avro.consumers.logical.AvroTimeMicroConsumer;
+import org.apache.arrow.adapter.avro.consumers.logical.AvroTimeMillisConsumer;
+import org.apache.arrow.adapter.avro.consumers.logical.AvroTimestampMicrosConsumer;
+import org.apache.arrow.adapter.avro.consumers.logical.AvroTimestampMillisConsumer;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.BaseIntVector;
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowVectorIterator.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java
similarity index 98%
rename from java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowVectorIterator.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java
index 1faa7595c68b0..4a439ade81181 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowVectorIterator.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow;
+package org.apache.arrow.adapter.avro;
 
 import java.io.EOFException;
 import java.util.ArrayList;
@@ -23,7 +23,7 @@
 import java.util.List;
 import java.util.stream.Collectors;
 
-import org.apache.arrow.consumers.CompositeAvroConsumer;
+import org.apache.arrow.adapter.avro.consumers.CompositeAvroConsumer;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroArraysConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java
similarity index 97%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroArraysConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java
index b9d0f84cfde4e..fd25986c32b95 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroArraysConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java
similarity index 96%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java
index 4ca5f24451f90..bf41828d19f7a 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java
similarity index 97%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java
index eede68ebd39dc..c8370e480608d 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java
similarity index 96%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java
index 356707a140b0a..7cc7dd33b15a9 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroEnumConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java
similarity index 96%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroEnumConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java
index 2f4443b741785..32a2c85f6fc50 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroEnumConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFixedConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java
similarity index 96%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFixedConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java
index a065466e395cf..16b70898fd36a 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFixedConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java
similarity index 96%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java
index c8de4a21af448..b09d2881875b6 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java
similarity index 96%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java
index bc8d4de78abc3..ae5a2719c5642 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java
similarity index 96%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java
index b9016c58f05c8..4db836acc4586 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroMapConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java
similarity index 98%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroMapConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java
index b8e8bd585eef3..1ea97e63b61e5 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroMapConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroNullConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java
similarity index 96%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroNullConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java
index 64768008a9571..4c7bb8c03bad3 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroNullConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java
similarity index 97%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java
index 10fe234ac6cb8..072270aa6c081 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStructConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java
similarity index 97%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStructConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java
index 792d01ee502c3..a02b1577f9fa8 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStructConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroUnionsConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java
similarity index 98%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroUnionsConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java
index c0bb0200fcd71..76287543b0646 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroUnionsConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/BaseAvroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java
similarity index 97%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/BaseAvroConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java
index 303be8e504f54..66a6cda68401e 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/BaseAvroConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import org.apache.arrow.vector.FieldVector;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/CompositeAvroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java
similarity index 97%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/CompositeAvroConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java
index af476d27cb8f7..97812226180ac 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/CompositeAvroConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java
similarity index 97%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java
index 8c4ee9a96e004..c2ae1ce77b282 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java
similarity index 97%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java
index 94c5b339d87f3..1ac0a6d71557b 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipFunction.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java
similarity index 95%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipFunction.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java
index 61938916a5eca..93fc4a7fede3f 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipFunction.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers;
+package org.apache.arrow.adapter.avro.consumers;
 
 import java.io.IOException;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDateConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java
similarity index 91%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDateConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java
index 3aa8970d9a97f..a5c36d88fb76a 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDateConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers.logical;
+package org.apache.arrow.adapter.avro.consumers.logical;
 
 import java.io.IOException;
 
-import org.apache.arrow.consumers.BaseAvroConsumer;
+import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer;
 import org.apache.arrow.vector.DateDayVector;
 import org.apache.avro.io.Decoder;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDecimalConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java
similarity index 95%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDecimalConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java
index 24d73cf829ffc..ebe5ca3884e5e 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDecimalConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java
@@ -15,12 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers.logical;
+package org.apache.arrow.adapter.avro.consumers.logical;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
 
-import org.apache.arrow.consumers.BaseAvroConsumer;
+import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.DecimalVector;
 import org.apache.avro.io.Decoder;
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMicroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java
similarity index 91%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMicroConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java
index e68ba158ffaf3..89216d4ad1436 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMicroConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers.logical;
+package org.apache.arrow.adapter.avro.consumers.logical;
 
 import java.io.IOException;
 
-import org.apache.arrow.consumers.BaseAvroConsumer;
+import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer;
 import org.apache.arrow.vector.TimeMicroVector;
 import org.apache.avro.io.Decoder;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMillisConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java
similarity index 91%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMillisConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java
index f76186fc3785a..ab5df8d4bc8ac 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMillisConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers.logical;
+package org.apache.arrow.adapter.avro.consumers.logical;
 
 import java.io.IOException;
 
-import org.apache.arrow.consumers.BaseAvroConsumer;
+import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer;
 import org.apache.arrow.vector.TimeMilliVector;
 import org.apache.avro.io.Decoder;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMicrosConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java
similarity index 92%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMicrosConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java
index 82da0e8054b50..93b39d479ff0e 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMicrosConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers.logical;
+package org.apache.arrow.adapter.avro.consumers.logical;
 
 import java.io.IOException;
 
-import org.apache.arrow.consumers.BaseAvroConsumer;
+import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer;
 import org.apache.arrow.vector.TimeStampMicroVector;
 import org.apache.avro.io.Decoder;
 
diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMillisConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java
similarity index 92%
rename from java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMillisConsumer.java
rename to java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java
index 159f49e1480a6..9e651c3959f81 100644
--- a/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMillisConsumer.java
+++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.consumers.logical;
+package org.apache.arrow.adapter.avro.consumers.logical;
 
 import java.io.IOException;
 
-import org.apache.arrow.consumers.BaseAvroConsumer;
+import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer;
 import org.apache.arrow.vector.TimeStampMilliVector;
 import org.apache.avro.io.Decoder;
 
diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/AvroLogicalTypesTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java
similarity index 99%
rename from java/adapter/avro/src/test/java/org/apache/arrow/AvroLogicalTypesTest.java
rename to java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java
index 050a50ddaeae4..6ee04e33a5ce1 100644
--- a/java/adapter/avro/src/test/java/org/apache/arrow/AvroLogicalTypesTest.java
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow;
+package org.apache.arrow.adapter.avro;
 
 import static junit.framework.TestCase.assertNull;
 import static junit.framework.TestCase.assertTrue;
diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/AvroSkipFieldTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java
similarity index 99%
rename from java/adapter/avro/src/test/java/org/apache/arrow/AvroSkipFieldTest.java
rename to java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java
index b946dbd8653fd..a37eca6514e04 100644
--- a/java/adapter/avro/src/test/java/org/apache/arrow/AvroSkipFieldTest.java
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow;
+package org.apache.arrow.adapter.avro;
 
 import static org.junit.Assert.assertEquals;
 
diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java
similarity index 88%
rename from java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
rename to java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java
index 16d8e52722c44..60a3a285db3aa 100644
--- a/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow;
+package org.apache.arrow.adapter.avro;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
@@ -24,9 +24,9 @@
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.lang.reflect.Method;
 import java.nio.ByteBuffer;
-import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -62,11 +62,23 @@ public void init() {
     config = new AvroToArrowConfigBuilder(allocator).build();
   }
 
-  protected Schema getSchema(String schemaName) throws Exception {
-    Path schemaPath = Paths.get(Paths.get(TestWriteReadAvroRecord.class.getResource("/").toURI()).toString(),
-        "schema", schemaName);
-
-    return new Schema.Parser().parse(schemaPath.toFile());
+  public static Schema getSchema(String schemaName) throws Exception {
+    try {
+      // Attempt to use JDK 9 behavior of getting the module then the resource stream from the module.
+      // Note that this code is caller-sensitive.
+      Method getModuleMethod = Class.class.getMethod("getModule");
+      Object module = getModuleMethod.invoke(TestWriteReadAvroRecord.class);
+      Method getResourceAsStreamFromModule = module.getClass().getMethod("getResourceAsStream", String.class);
+      try (InputStream is = (InputStream) getResourceAsStreamFromModule.invoke(module, "/schema/" + schemaName)) {
+        return new Schema.Parser()
+            .parse(is);
+      }
+    } catch (NoSuchMethodException ex) {
+      // Use JDK8 behavior.
+      try (InputStream is = TestWriteReadAvroRecord.class.getResourceAsStream("/schema/" + schemaName)) {
+        return new Schema.Parser().parse(is);
+      }
+    }
   }
 
   protected VectorSchemaRoot writeAndRead(Schema schema, List data) throws Exception {
diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowIteratorTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java
similarity index 99%
rename from java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowIteratorTest.java
rename to java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java
index 2b05a19f38067..02f7a3733734c 100644
--- a/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowIteratorTest.java
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow;
+package org.apache.arrow.adapter.avro;
 
 import static org.junit.Assert.assertEquals;
 
diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java
similarity index 99%
rename from java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java
rename to java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java
index c007e1ac7ebd2..1c64204191762 100644
--- a/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow;
+package org.apache.arrow.adapter.avro;
 
 import static org.junit.Assert.assertEquals;
 
diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java
similarity index 91%
rename from java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java
rename to java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java
index 0a153a28cbc2e..afbddaa6ed87a 100644
--- a/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java
@@ -15,13 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.arrow;
+package org.apache.arrow.adapter.avro;
 
 import static org.junit.Assert.assertEquals;
 
 import java.io.File;
-import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -48,10 +46,7 @@ public class TestWriteReadAvroRecord {
   public void testWriteAndRead() throws Exception {
 
     File dataFile = TMP.newFile();
-    Path schemaPath = Paths.get(
-            Paths.get(TestWriteReadAvroRecord.class.getResource("/").toURI()).toString(),
-            "schema", "test.avsc");
-    Schema schema = new Schema.Parser().parse(schemaPath.toFile());
+    Schema schema = AvroTestBase.getSchema("test.avsc");
 
     //write data to disk
     GenericRecord user1 = new GenericData.Record(schema);
diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml
index f95956d1f61d5..e964aa1871a0e 100644
--- a/java/adapter/jdbc/pom.xml
+++ b/java/adapter/jdbc/pom.xml
@@ -47,6 +47,11 @@
             <classifier>${arrow.vector.classifier}</classifier>
         </dependency>
 
+        <dependency>
+            <groupId>org.immutables</groupId>
+            <artifactId>value</artifactId>
+        </dependency>
+
         <!-- https://mvnrepository.com/artifact/com.h2database/h2 -->
         <dependency>
             <groupId>com.h2database</groupId>
@@ -85,4 +90,27 @@
 
     </dependencies>
 
+    <profiles>
+        <profile>
+            <id>jdk11+</id>
+            <activation>
+                <jdk>[11,]</jdk>
+                <property>
+                    <name>!m2e.version</name>
+                </property>
+            </activation>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-surefire-plugin</artifactId>
+                        <configuration combine.self="override">
+                            <argLine>--add-reads=org.apache.arrow.adapter.jdbc=com.fasterxml.jackson.dataformat.yaml --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
+
 </project>
diff --git a/java/adapter/jdbc/src/main/java/module-info.java b/java/adapter/jdbc/src/main/java/module-info.java
new file mode 100644
index 0000000000000..5b59ce768472a
--- /dev/null
+++ b/java/adapter/jdbc/src/main/java/module-info.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module org.apache.arrow.adapter.jdbc {
+  exports org.apache.arrow.adapter.jdbc.consumer;
+  exports org.apache.arrow.adapter.jdbc;
+  exports org.apache.arrow.adapter.jdbc.binder;
+
+  requires com.fasterxml.jackson.databind;
+  requires java.sql;
+  requires jdk.unsupported;
+  requires org.apache.arrow.memory.core;
+  requires org.apache.arrow.vector;
+}
diff --git a/java/adapter/orc/CMakeLists.txt b/java/adapter/orc/CMakeLists.txt
index a9b3a48027937..d29856ff8cd5e 100644
--- a/java/adapter/orc/CMakeLists.txt
+++ b/java/adapter/orc/CMakeLists.txt
@@ -37,6 +37,11 @@ set_property(TARGET arrow_java_jni_orc PROPERTY OUTPUT_NAME "arrow_orc_jni")
 target_link_libraries(arrow_java_jni_orc arrow_java_jni_orc_headers jni
                       Arrow::arrow_static)
 
+set(ARROW_JAVA_JNI_ORC_LIBDIR
+    "${CMAKE_INSTALL_PREFIX}/lib/arrow_orc_jni/${ARROW_JAVA_JNI_ARCH_DIR}")
+set(ARROW_JAVA_JNI_ORC_BINDIR
+    "${CMAKE_INSTALL_PREFIX}/bin/arrow_orc_jni/${ARROW_JAVA_JNI_ARCH_DIR}")
+
 install(TARGETS arrow_java_jni_orc
-        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-        RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+        LIBRARY DESTINATION ${ARROW_JAVA_JNI_ORC_LIBDIR}
+        RUNTIME DESTINATION ${ARROW_JAVA_JNI_ORC_BINDIR})
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index a42a458e2072a..605b9871639ea 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -31,6 +31,10 @@
             <scope>compile</scope>
             <classifier>${arrow.vector.classifier}</classifier>
         </dependency>
+        <dependency>
+            <groupId>org.immutables</groupId>
+            <artifactId>value</artifactId>
+        </dependency>
         <dependency>
             <groupId>org.apache.orc</groupId>
             <artifactId>orc-core</artifactId>
diff --git a/java/adapter/orc/src/main/java/module-info.java b/java/adapter/orc/src/main/java/module-info.java
new file mode 100644
index 0000000000000..d18a978e93fa8
--- /dev/null
+++ b/java/adapter/orc/src/main/java/module-info.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+open module org.apache.arrow.adapter.orc {
+  exports org.apache.arrow.adapter.orc;
+
+  requires hadoop.client.api;
+  requires org.apache.arrow.memory.core;
+  requires org.apache.arrow.vector;
+}
diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
index 2701c228709c2..9b599234bdf51 100644
--- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
+++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
@@ -39,7 +39,7 @@ static void loadOrcAdapterLibraryFromJar()
     synchronized (OrcJniUtils.class) {
       if (!isLoaded) {
         final String libraryToLoad =
-            getNormalizedArch() + "/" + System.mapLibraryName(LIBRARY_NAME);
+            LIBRARY_NAME + "/" + getNormalizedArch() + "/" + System.mapLibraryName(LIBRARY_NAME);
         final File libraryFile =
             moveFileFromJarToTemp(System.getProperty("java.io.tmpdir"), libraryToLoad, LIBRARY_NAME);
         System.load(libraryFile.getAbsolutePath());
diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml
index 3e32d955ec417..99740f2002847 100644
--- a/java/algorithm/pom.xml
+++ b/java/algorithm/pom.xml
@@ -42,6 +42,10 @@
       <artifactId>arrow-memory-netty</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.immutables</groupId>
+      <artifactId>value</artifactId>
+    </dependency>
   </dependencies>
 
   <build>
diff --git a/java/algorithm/src/main/java/module-info.java b/java/algorithm/src/main/java/module-info.java
new file mode 100644
index 0000000000000..b347f55aa4d00
--- /dev/null
+++ b/java/algorithm/src/main/java/module-info.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module org.apache.arrow.algorithm {
+  exports org.apache.arrow.algorithm.search;
+  exports org.apache.arrow.algorithm.deduplicate;
+  exports org.apache.arrow.algorithm.dictionary;
+  exports org.apache.arrow.algorithm.rank;
+  exports org.apache.arrow.algorithm.misc;
+  exports org.apache.arrow.algorithm.sort;
+
+  requires jdk.unsupported;
+  requires org.apache.arrow.memory.core;
+  requires org.apache.arrow.vector;
+}
diff --git a/java/bom/pom.xml b/java/bom/pom.xml
index 5c2ed33dadddf..7ffb833e7f6d7 100644
--- a/java/bom/pom.xml
+++ b/java/bom/pom.xml
@@ -82,11 +82,6 @@
         <artifactId>flight-core</artifactId>
         <version>${project.version}</version>
       </dependency>
-      <dependency>
-        <groupId>org.apache.arrow</groupId>
-        <artifactId>flight-grpc</artifactId>
-        <version>${project.version}</version>
-      </dependency>
       <dependency>
         <groupId>org.apache.arrow</groupId>
         <artifactId>flight-integration-tests</artifactId>
diff --git a/java/c/CMakeLists.txt b/java/c/CMakeLists.txt
index 8ff208aaeb010..83909c5e13e1b 100644
--- a/java/c/CMakeLists.txt
+++ b/java/c/CMakeLists.txt
@@ -30,6 +30,11 @@ add_library(arrow_java_jni_cdata SHARED src/main/cpp/jni_wrapper.cc)
 set_property(TARGET arrow_java_jni_cdata PROPERTY OUTPUT_NAME "arrow_cdata_jni")
 target_link_libraries(arrow_java_jni_cdata arrow_java_jni_cdata_headers jni)
 
+set(ARROW_JAVA_JNI_C_LIBDIR
+    "${CMAKE_INSTALL_PREFIX}/lib/arrow_cdata_jni/${ARROW_JAVA_JNI_ARCH_DIR}")
+set(ARROW_JAVA_JNI_C_BINDIR
+    "${CMAKE_INSTALL_PREFIX}/bin/arrow_cdata_jni/${ARROW_JAVA_JNI_ARCH_DIR}")
+
 install(TARGETS arrow_java_jni_cdata
-        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-        RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+        LIBRARY DESTINATION ${ARROW_JAVA_JNI_C_LIBDIR}
+        RUNTIME DESTINATION ${ARROW_JAVA_JNI_C_BINDIR})
diff --git a/java/c/pom.xml b/java/c/pom.xml
index 8fc3f36994d8a..a999292979d56 100644
--- a/java/c/pom.xml
+++ b/java/c/pom.xml
@@ -48,6 +48,10 @@
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-api</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.immutables</groupId>
+            <artifactId>value</artifactId>
+        </dependency>
         <dependency>
             <groupId>org.apache.arrow</groupId>
             <artifactId>arrow-memory-unsafe</artifactId>
diff --git a/java/c/src/main/java/module-info.java b/java/c/src/main/java/module-info.java
new file mode 100644
index 0000000000000..0a62c9b9875b4
--- /dev/null
+++ b/java/c/src/main/java/module-info.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+  * contributor license agreements.  See the NOTICE file distributed with
+  * this work for additional information regarding copyright ownership.
+  * The ASF licenses this file to You under the Apache License, Version 2.0
+  * (the "License"); you may not use this file except in compliance with
+  * the License.  You may obtain a copy of the License at
+  *
+  *    http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+
+open module org.apache.arrow.c {
+  exports org.apache.arrow.c;
+  exports org.apache.arrow.c.jni;
+
+  requires flatbuffers.java;
+  requires jdk.unsupported;
+  requires org.apache.arrow.memory.core;
+  requires org.apache.arrow.vector;
+  requires org.slf4j;
+}
diff --git a/java/c/src/main/java/org/apache/arrow/c/Data.java b/java/c/src/main/java/org/apache/arrow/c/Data.java
index a92853b3504f0..c90ce7604d6e7 100644
--- a/java/c/src/main/java/org/apache/arrow/c/Data.java
+++ b/java/c/src/main/java/org/apache/arrow/c/Data.java
@@ -19,8 +19,6 @@
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.FieldVector;
-import org.apache.arrow.vector.StructVectorLoader;
-import org.apache.arrow.vector.StructVectorUnloader;
 import org.apache.arrow.vector.VectorLoader;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.VectorUnloader;
diff --git a/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java
similarity index 98%
rename from java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
rename to java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java
index 4a62be7851ac7..d9afd0189d807 100644
--- a/java/c/src/main/java/org/apache/arrow/vector/StructVectorLoader.java
+++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.vector;
+package org.apache.arrow.c;
 
 import static org.apache.arrow.util.Preconditions.checkArgument;
 
@@ -27,6 +27,8 @@
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.TypeLayout;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.compression.CompressionCodec;
 import org.apache.arrow.vector.compression.CompressionUtil;
diff --git a/java/c/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java
similarity index 97%
rename from java/c/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java
rename to java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java
index e75156cf237bb..aa6d9b4d0f6a7 100644
--- a/java/c/src/main/java/org/apache/arrow/vector/StructVectorUnloader.java
+++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java
@@ -15,12 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.vector;
+package org.apache.arrow.c;
 
 import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.TypeLayout;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.compression.CompressionCodec;
 import org.apache.arrow.vector.compression.CompressionUtil;
diff --git a/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java b/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java
index e435461349257..ef9f432cf0036 100644
--- a/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java
+++ b/java/c/src/main/java/org/apache/arrow/c/jni/JniLoader.java
@@ -80,7 +80,7 @@ private synchronized void loadRemaining() {
 
   private void load(String name) {
     final String libraryToLoad =
-        getNormalizedArch() + "/" + System.mapLibraryName(name);
+        name + "/" + getNormalizedArch() + "/" + System.mapLibraryName(name);
     try {
       File temp = File.createTempFile("jnilib-", ".tmp", new File(System.getProperty("java.io.tmpdir")));
       temp.deleteOnExit();
diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index 9a9f029fee137..e8008c9754374 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -35,6 +35,10 @@
       <artifactId>arrow-memory-unsafe</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.immutables</groupId>
+      <artifactId>value</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-compress</artifactId>
diff --git a/java/compression/src/main/java/module-info.java b/java/compression/src/main/java/module-info.java
new file mode 100644
index 0000000000000..6bf989e4c142e
--- /dev/null
+++ b/java/compression/src/main/java/module-info.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module org.apache.arrow.compression {
+  exports org.apache.arrow.compression;
+
+  requires com.github.luben.zstd_jni;
+  requires org.apache.arrow.memory.core;
+  requires org.apache.arrow.vector;
+  requires org.apache.commons.compress;
+}
diff --git a/java/dataset/CMakeLists.txt b/java/dataset/CMakeLists.txt
index ede3ee7330d21..348850c3be5da 100644
--- a/java/dataset/CMakeLists.txt
+++ b/java/dataset/CMakeLists.txt
@@ -47,6 +47,12 @@ if(BUILD_TESTING)
   add_test(NAME arrow-java-jni-dataset-test COMMAND arrow-java-jni-dataset-test)
 endif()
 
+set(ARROW_JAVA_JNI_DATASET_LIBDIR
+    "${CMAKE_INSTALL_PREFIX}/lib/arrow_dataset_jni/${ARROW_JAVA_JNI_ARCH_DIR}")
+
+set(ARROW_JAVA_JNI_DATASET_BINDIR
+    "${CMAKE_INSTALL_PREFIX}/bin/arrow_dataset_jni/${ARROW_JAVA_JNI_ARCH_DIR}")
+
 install(TARGETS arrow_java_jni_dataset
-        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-        RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+        LIBRARY DESTINATION ${ARROW_JAVA_JNI_DATASET_LIBDIR}
+        RUNTIME DESTINATION ${ARROW_JAVA_JNI_DATASET_BINDIR})
diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index bb5636b745490..a18f443b7e15a 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -47,6 +47,10 @@
             <artifactId>arrow-c-data</artifactId>
             <scope>compile</scope>
         </dependency>
+        <dependency>
+            <groupId>org.immutables</groupId>
+            <artifactId>value</artifactId>
+        </dependency>
         <dependency>
             <groupId>org.apache.arrow</groupId>
             <artifactId>arrow-memory-netty</artifactId>
@@ -161,6 +165,15 @@
         </resources>
 
         <plugins>
+            <plugin>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <configuration>
+                    <enableAssertions>false</enableAssertions>
+                    <systemPropertyVariables>
+                        <arrow.test.dataRoot>${project.basedir}/../../testing/data</arrow.test.dataRoot>
+                    </systemPropertyVariables>
+                </configuration>
+            </plugin>
             <plugin>
                 <groupId>org.xolstice.maven.plugins</groupId>
                 <artifactId>protobuf-maven-plugin</artifactId>
@@ -182,4 +195,30 @@
         </plugins>
     </build>
 
+    <profiles>
+        <profile>
+            <id>jdk11+</id>
+            <activation>
+                <jdk>[11,]</jdk>
+                <property>
+                    <name>!m2e.version</name>
+                </property>
+            </activation>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-surefire-plugin</artifactId>
+                        <configuration combine.self="override">
+                            <enableAssertions>false</enableAssertions>
+                            <systemPropertyVariables>
+                                <arrow.test.dataRoot>${project.basedir}/../../testing/data</arrow.test.dataRoot>
+                            </systemPropertyVariables>
+                            <argLine>--add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
 </project>
diff --git a/java/dataset/src/main/java/module-info.java b/java/dataset/src/main/java/module-info.java
new file mode 100644
index 0000000000000..1672d12ffec69
--- /dev/null
+++ b/java/dataset/src/main/java/module-info.java
@@ -0,0 +1,29 @@
+/*
+
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+open module org.apache.arrow.dataset {
+  exports org.apache.arrow.dataset.file;
+  exports org.apache.arrow.dataset.source;
+  exports org.apache.arrow.dataset.jni;
+  exports org.apache.arrow.dataset.substrait;
+  exports org.apache.arrow.dataset.scanner;
+
+  requires org.apache.arrow.c;
+  requires org.apache.arrow.memory.core;
+  requires org.apache.arrow.vector;
+}
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/DirectReservationListener.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/DirectReservationListener.java
index eb26400cbf882..3922e90335da4 100644
--- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/DirectReservationListener.java
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/DirectReservationListener.java
@@ -40,7 +40,12 @@ private DirectReservationListener() {
       methodUnreserve = this.getDeclaredMethodBaseOnJDKVersion(classBits, "unreserveMemory");
       methodUnreserve.setAccessible(true);
     } catch (Exception e) {
-      throw new RuntimeException(e);
+      final RuntimeException failure = new RuntimeException(
+          "Failed to initialize DirectReservationListener. When starting Java you must include " +
+              "`--add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED` " +
+              "(See https://arrow.apache.org/docs/java/install.html)", e);
+      failure.printStackTrace();
+      throw failure;
     }
   }
 
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniLoader.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniLoader.java
index a3b31c73e8540..cf2f8fe29e8ba 100644
--- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniLoader.java
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniLoader.java
@@ -79,7 +79,7 @@ private synchronized void loadRemaining() {
 
   private void load(String name) {
     final String libraryToLoad =
-        getNormalizedArch() + "/" + System.mapLibraryName(name);
+        name + "/" + getNormalizedArch() + "/" + System.mapLibraryName(name);
     try {
       File temp = File.createTempFile("jnilib-", ".tmp", new File(System.getProperty("java.io.tmpdir")));
       temp.deleteOnExit();
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/ParquetWriteSupport.java b/java/dataset/src/test/java/org/apache/arrow/dataset/ParquetWriteSupport.java
index 75f905877cd1f..2352a65e8fb62 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/ParquetWriteSupport.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/ParquetWriteSupport.java
@@ -18,8 +18,8 @@
 package org.apache.arrow.dataset;
 
 import java.io.File;
-import java.nio.file.Path;
-import java.nio.file.Paths;
+import java.io.InputStream;
+import java.lang.reflect.Method;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -47,7 +47,7 @@ public class ParquetWriteSupport implements AutoCloseable {
 
 
   public ParquetWriteSupport(String schemaName, File outputFolder) throws Exception {
-    avroSchema = readSchemaFromFile(schemaName);
+    avroSchema = getSchema(schemaName);
     path = outputFolder.getPath() + "/" + "generated-" + random.nextLong() + ".parquet";
     uri = "file://" + path;
     writer = AvroParquetWriter
@@ -56,10 +56,23 @@ public ParquetWriteSupport(String schemaName, File outputFolder) throws Exceptio
         .build();
   }
 
-  private static Schema readSchemaFromFile(String schemaName) throws Exception {
-    Path schemaPath = Paths.get(ParquetWriteSupport.class.getResource("/").getPath(),
-        "avroschema", schemaName);
-    return new org.apache.avro.Schema.Parser().parse(schemaPath.toFile());
+  public static Schema getSchema(String schemaName) throws Exception {
+    try {
+      // Attempt to use JDK 9 behavior of getting the module then the resource stream from the module.
+      // Note that this code is caller-sensitive.
+      Method getModuleMethod = Class.class.getMethod("getModule");
+      Object module = getModuleMethod.invoke(ParquetWriteSupport.class);
+      Method getResourceAsStreamFromModule = module.getClass().getMethod("getResourceAsStream", String.class);
+      try (InputStream is = (InputStream) getResourceAsStreamFromModule.invoke(module, "/avroschema/" + schemaName)) {
+        return new Schema.Parser()
+            .parse(is);
+      }
+    } catch (NoSuchMethodException ex) {
+      // Use JDK8 behavior.
+      try (InputStream is = ParquetWriteSupport.class.getResourceAsStream("/avroschema/" + schemaName)) {
+        return new Schema.Parser().parse(is);
+      }
+    }
   }
 
   public static ParquetWriteSupport writeTempFile(String schemaName, File outputFolder,
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
index 5293aca0c329b..13b247452348d 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
@@ -68,6 +68,7 @@
 import org.apache.arrow.vector.complex.impl.UnionListWriter;
 import org.apache.arrow.vector.ipc.ArrowStreamReader;
 import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.test.util.ArrowTestDataUtil;
 import org.apache.arrow.vector.types.DateUnit;
 import org.apache.arrow.vector.types.FloatingPointPrecision;
 import org.apache.arrow.vector.types.TimeUnit;
@@ -75,7 +76,6 @@
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.types.pojo.Schema;
-import org.apache.arrow.vector.util.ArrowTestDataUtil;
 import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
 import org.apache.arrow.vector.util.Text;
 import org.junit.ClassRule;
diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index 8f41d2b65b7d1..ec3034d14e271 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -95,6 +95,11 @@
       <artifactId>grpc-services</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>io.grpc</groupId>
+      <artifactId>grpc-inprocess</artifactId>
+      <scope>test</scope>
+    </dependency>
 
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
@@ -108,6 +113,10 @@
       <groupId>javax.annotation</groupId>
       <artifactId>javax.annotation-api</artifactId>
      </dependency>
+    <dependency>
+      <groupId>org.immutables</groupId>
+      <artifactId>value</artifactId>
+    </dependency>
 
     <dependency>
       <groupId>com.google.api.grpc</groupId>
@@ -305,4 +314,32 @@
       </plugin>
     </plugins>
   </build>
+
+  <profiles>
+    <profile>
+      <id>jdk11+</id>
+      <activation>
+        <jdk>[11,]</jdk>
+        <property>
+          <name>!m2e.version</name>
+        </property>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-surefire-plugin</artifactId>
+            <configuration combine.self="override">
+              <argLine>--add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
+              <enableAssertions>false</enableAssertions>
+              <systemPropertyVariables>
+                <arrow.test.dataRoot>${project.basedir}/../../../testing/data</arrow.test.dataRoot>
+              </systemPropertyVariables>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+
 </project>
diff --git a/java/flight/flight-core/src/main/java/module-info.java b/java/flight/flight-core/src/main/java/module-info.java
new file mode 100644
index 0000000000000..f6bf5b73b0972
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/module-info.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module org.apache.arrow.flight.core {
+  exports org.apache.arrow.flight;
+  exports org.apache.arrow.flight.auth;
+  exports org.apache.arrow.flight.auth2;
+  exports org.apache.arrow.flight.client;
+  exports org.apache.arrow.flight.impl;
+  exports org.apache.arrow.flight.sql.impl;
+
+  requires com.fasterxml.jackson.databind;
+  requires com.google.common;
+  requires com.google.errorprone.annotations;
+  requires io.grpc;
+  requires io.grpc.internal;
+  requires io.grpc.netty;
+  requires io.grpc.protobuf;
+  requires io.grpc.stub;
+  requires io.netty.common;
+  requires io.netty.handler;
+  requires io.netty.transport;
+  requires org.apache.arrow.format;
+  requires org.apache.arrow.memory.core;
+  requires org.apache.arrow.vector;
+  requires protobuf.java;
+  requires org.slf4j;
+}
diff --git a/java/flight/flight-grpc/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
similarity index 100%
rename from java/flight/flight-grpc/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
rename to java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
index f9def74b56d1b..11510dbd32058 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
@@ -25,7 +25,7 @@
 import java.util.List;
 import java.util.Random;
 
-import org.apache.arrow.vector.util.ArrowTestDataUtil;
+import org.apache.arrow.vector.test.util.ArrowTestDataUtil;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.function.Executable;
 
diff --git a/java/flight/flight-grpc/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java
similarity index 100%
rename from java/flight/flight-grpc/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java
rename to java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java
diff --git a/java/flight/flight-grpc/src/test/protobuf/test.proto b/java/flight/flight-core/src/test/protobuf/test.proto
similarity index 100%
rename from java/flight/flight-grpc/src/test/protobuf/test.proto
rename to java/flight/flight-core/src/test/protobuf/test.proto
diff --git a/java/flight/flight-grpc/pom.xml b/java/flight/flight-grpc/pom.xml
deleted file mode 100644
index af765f8c436be..0000000000000
--- a/java/flight/flight-grpc/pom.xml
+++ /dev/null
@@ -1,123 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
-  license agreements. See the NOTICE file distributed with this work for additional
-  information regarding copyright ownership. The ASF licenses this file to
-  You under the Apache License, Version 2.0 (the "License"); you may not use
-  this file except in compliance with the License. You may obtain a copy of
-  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
-  by applicable law or agreed to in writing, software distributed under the
-  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
-  OF ANY KIND, either express or implied. See the License for the specific
-  language governing permissions and limitations under the License. -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <parent>
-    <artifactId>arrow-flight</artifactId>
-    <groupId>org.apache.arrow</groupId>
-    <version>15.0.0-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-  <modelVersion>4.0.0</modelVersion>
-
-  <artifactId>flight-grpc</artifactId>
-  <name>Arrow Flight GRPC</name>
-  <description>(Experimental)Contains utility class to expose Flight gRPC service and client</description>
-  <packaging>jar</packaging>
-
-  <properties>
-    <forkCount>1</forkCount>
-  </properties>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.arrow</groupId>
-      <artifactId>flight-core</artifactId>
-      <exclusions>
-        <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty-transport-native-unix-common</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty-transport-native-kqueue</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>io.netty</groupId>
-          <artifactId>netty-transport-native-epoll</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>io.grpc</groupId>
-      <artifactId>grpc-stub</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>io.grpc</groupId>
-      <artifactId>grpc-inprocess</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.arrow</groupId>
-      <artifactId>arrow-memory-core</artifactId>
-      <scope>compile</scope>
-    </dependency>
-    <dependency>
-       <groupId>org.apache.arrow</groupId>
-       <artifactId>arrow-memory-netty</artifactId>
-       <scope>runtime</scope>
-     </dependency>
-     <dependency>
-       <groupId>io.grpc</groupId>
-       <artifactId>grpc-protobuf</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.google.guava</groupId>
-       <artifactId>guava</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>com.google.protobuf</groupId>
-       <artifactId>protobuf-java</artifactId>
-     </dependency>
-     <dependency>
-       <groupId>io.grpc</groupId>
-       <artifactId>grpc-api</artifactId>
-     </dependency>
-  </dependencies>
-
-  <build>
-    <extensions>
-      <!-- provides os.detected.classifier (i.e. linux-x86_64, osx-x86_64) property -->
-      <extension>
-          <groupId>kr.motd.maven</groupId>
-          <artifactId>os-maven-plugin</artifactId>
-          <version>1.7.0</version>
-      </extension>
-    </extensions>
-      <plugins>
-        <plugin>
-          <groupId>org.xolstice.maven.plugins</groupId>
-          <artifactId>protobuf-maven-plugin</artifactId>
-          <version>0.6.1</version>
-          <configuration>
-            <protocArtifact>com.google.protobuf:protoc:${dep.protobuf-bom.version}:exe:${os.detected.classifier}</protocArtifact>
-            <clearOutputDirectory>false</clearOutputDirectory>
-            <pluginId>grpc-java</pluginId>
-            <pluginArtifact>io.grpc:protoc-gen-grpc-java:${dep.grpc-bom.version}:exe:${os.detected.classifier}</pluginArtifact>
-          </configuration>
-          <executions>
-            <execution>
-              <id>test</id>
-              <configuration>
-                <protoSourceRoot>${basedir}/src/test/protobuf</protoSourceRoot>
-                <outputDirectory>${project.build.directory}/generated-test-sources//protobuf</outputDirectory>
-              </configuration>
-              <goals>
-                <goal>compile</goal>
-                <goal>compile-custom</goal>
-              </goals>
-            </execution>
-          </executions>
-        </plugin>
-      </plugins>
-  </build>
-
-</project>
diff --git a/java/flight/flight-grpc/src/test/resources/logback.xml b/java/flight/flight-grpc/src/test/resources/logback.xml
deleted file mode 100644
index 4c54d18a210ff..0000000000000
--- a/java/flight/flight-grpc/src/test/resources/logback.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
-  license agreements. See the NOTICE file distributed with this work for additional
-  information regarding copyright ownership. The ASF licenses this file to
-  You under the Apache License, Version 2.0 (the "License"); you may not use
-  this file except in compliance with the License. You may obtain a copy of
-  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
-  by applicable law or agreed to in writing, software distributed under the
-  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
-  OF ANY KIND, either express or implied. See the License for the specific
-  language governing permissions and limitations under the License. -->
-
-<configuration>
-  <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
-  <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
-    <!-- encoders are assigned the type
-         ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
-    <encoder>
-      <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
-    </encoder>
-  </appender>
-
-  <logger name="org.apache.arrow" additivity="false">
-    <level value="info" />
-    <appender-ref ref="STDOUT" />
-  </logger>
-
-</configuration>
diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml
index 3c7e4b3495e5a..5ae1a0a23f3c8 100644
--- a/java/flight/flight-sql/pom.xml
+++ b/java/flight/flight-sql/pom.xml
@@ -50,6 +50,10 @@
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-memory-core</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.immutables</groupId>
+      <artifactId>value</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-jdbc</artifactId>
@@ -110,4 +114,27 @@
     </dependency>
   </dependencies>
 
+  <profiles>
+    <profile>
+      <id>jdk11+</id>
+      <activation>
+        <jdk>[11,]</jdk>
+        <property>
+          <name>!m2e.version</name>
+        </property>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-surefire-plugin</artifactId>
+            <configuration combine.self="override">
+              <argLine>--add-reads=org.apache.arrow.flight.sql=org.slf4j --add-reads=org.apache.arrow.flight.core=ALL-UNNAMED --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+
 </project>
diff --git a/java/flight/flight-sql/src/main/java/module-info.java b/java/flight/flight-sql/src/main/java/module-info.java
new file mode 100644
index 0000000000000..5514d5b870afd
--- /dev/null
+++ b/java/flight/flight-sql/src/main/java/module-info.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module org.apache.arrow.flight.sql {
+  exports org.apache.arrow.flight.sql;
+  exports org.apache.arrow.flight.sql.example;
+  exports org.apache.arrow.flight.sql.util;
+
+  requires com.google.common;
+  requires java.sql;
+  requires org.apache.arrow.flight.core;
+  requires org.apache.arrow.memory.core;
+  requires org.apache.arrow.vector;
+  requires protobuf.java;
+}
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSql.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java
similarity index 98%
rename from java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSql.java
rename to java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java
index 948364a920004..a39736e939f0b 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSql.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.flight;
+package org.apache.arrow.flight.sql.test;
 
 import static java.util.Arrays.asList;
 import static java.util.Collections.emptyList;
@@ -38,6 +38,15 @@
 import java.util.Optional;
 import java.util.stream.IntStream;
 
+import org.apache.arrow.flight.CancelFlightInfoRequest;
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightRuntimeException;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.FlightStatusCode;
+import org.apache.arrow.flight.FlightStream;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.RenewFlightEndpointRequest;
 import org.apache.arrow.flight.sql.FlightSqlClient;
 import org.apache.arrow.flight.sql.FlightSqlClient.PreparedStatement;
 import org.apache.arrow.flight.sql.FlightSqlColumnMetadata;
@@ -914,7 +923,7 @@ public void testCancelFlightInfo() {
     FlightInfo info = sqlClient.getSqlInfo();
     CancelFlightInfoRequest request = new CancelFlightInfoRequest(info);
     FlightRuntimeException fre = assertThrows(FlightRuntimeException.class, () -> sqlClient.cancelFlightInfo(request));
-    assertEquals(FlightStatusCode.UNIMPLEMENTED, fre.status().code());
+    Assertions.assertEquals(FlightStatusCode.UNIMPLEMENTED, fre.status().code());
   }
 
   @Test
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSqlStreams.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStreams.java
similarity index 96%
rename from java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSqlStreams.java
rename to java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStreams.java
index 1dd925eb53add..1dd96f0fd4e9c 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/TestFlightSqlStreams.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStreams.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.flight;
+package org.apache.arrow.flight.sql.test;
 
 import static java.util.Arrays.asList;
 import static java.util.Collections.emptyList;
@@ -28,6 +28,15 @@
 import java.util.Collections;
 import java.util.List;
 
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightDescriptor;
+import org.apache.arrow.flight.FlightEndpoint;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.FlightStream;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.Ticket;
 import org.apache.arrow.flight.sql.BasicFlightSqlProducer;
 import org.apache.arrow.flight.sql.FlightSqlClient;
 import org.apache.arrow.flight.sql.FlightSqlProducer;
diff --git a/java/flight/pom.xml b/java/flight/pom.xml
index 7ddda94f77b49..9ef01d07a7388 100644
--- a/java/flight/pom.xml
+++ b/java/flight/pom.xml
@@ -26,7 +26,6 @@
 
     <modules>
         <module>flight-core</module>
-        <module>flight-grpc</module>
         <module>flight-sql</module>
         <module>flight-sql-jdbc-core</module>
         <module>flight-sql-jdbc-driver</module>
diff --git a/java/gandiva/CMakeLists.txt b/java/gandiva/CMakeLists.txt
index 2aa8d92959e42..369829d7a30d5 100644
--- a/java/gandiva/CMakeLists.txt
+++ b/java/gandiva/CMakeLists.txt
@@ -84,6 +84,11 @@ if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
   )
 endif()
 
+set(ARROW_JAVA_JNI_GANDIVA_LIBDIR
+    "${CMAKE_INSTALL_PREFIX}/lib/gandiva_jni/${ARROW_JAVA_JNI_ARCH_DIR}")
+set(ARROW_JAVA_JNI_GANDIVA_BINDIR
+    "${CMAKE_INSTALL_PREFIX}/bin/gandiva_jni/${ARROW_JAVA_JNI_ARCH_DIR}")
+
 install(TARGETS arrow_java_jni_gandiva
-        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-        RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+        LIBRARY DESTINATION ${ARROW_JAVA_JNI_GANDIVA_LIBDIR}
+        RUNTIME DESTINATION ${ARROW_JAVA_JNI_GANDIVA_BINDIR})
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index e837a09ff8330..330c156a0346b 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -34,6 +34,10 @@
             <groupId>org.apache.arrow</groupId>
             <artifactId>arrow-memory-core</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.immutables</groupId>
+            <artifactId>value</artifactId>
+        </dependency>
         <dependency>
             <groupId>org.apache.arrow</groupId>
             <artifactId>arrow-memory-netty</artifactId>
diff --git a/java/gandiva/src/main/java/module-info.java b/java/gandiva/src/main/java/module-info.java
new file mode 100644
index 0000000000000..533717d91f7f0
--- /dev/null
+++ b/java/gandiva/src/main/java/module-info.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+open module org.apache.arrow.gandiva {
+  exports org.apache.arrow.gandiva.expression;
+  exports org.apache.arrow.gandiva.exceptions;
+  exports org.apache.arrow.gandiva.evaluator;
+  exports org.apache.arrow.gandiva.ipc;
+
+  requires com.google.common;
+  requires org.apache.arrow.format;
+  requires org.apache.arrow.memory.core;
+  requires org.apache.arrow.vector;
+  requires org.slf4j;
+  requires protobuf.java;
+}
\ No newline at end of file
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
index 2528989f3784b..57748e9c8e1af 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java
@@ -71,7 +71,7 @@ private static JniLoader setupInstance() throws GandivaException {
   private static void loadGandivaLibraryFromJar(final String tmpDir)
           throws IOException, GandivaException {
     final String libraryToLoad =
-        getNormalizedArch() + "/" + System.mapLibraryName(LIBRARY_NAME);
+        LIBRARY_NAME + "/" + getNormalizedArch() + "/" + System.mapLibraryName(LIBRARY_NAME);
     final File libraryFile = moveFileFromJarToTemp(tmpDir, libraryToLoad, LIBRARY_NAME);
     System.load(libraryFile.getAbsolutePath());
   }
diff --git a/java/performance/src/test/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java b/java/performance/src/test/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java
index 884647b5af3ea..c07aeffafb10c 100644
--- a/java/performance/src/test/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java
+++ b/java/performance/src/test/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java
@@ -21,10 +21,10 @@
 import java.io.ByteArrayOutputStream;
 import java.util.concurrent.TimeUnit;
 
-import org.apache.arrow.AvroToArrow;
-import org.apache.arrow.AvroToArrowConfig;
-import org.apache.arrow.AvroToArrowConfigBuilder;
-import org.apache.arrow.AvroToArrowVectorIterator;
+import org.apache.arrow.adapter.avro.AvroToArrow;
+import org.apache.arrow.adapter.avro.AvroToArrowConfig;
+import org.apache.arrow.adapter.avro.AvroToArrowConfigBuilder;
+import org.apache.arrow.adapter.avro.AvroToArrowVectorIterator;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.IntVector;
diff --git a/java/pom.xml b/java/pom.xml
index b2513d586268b..6fc4df67af3e7 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -1055,7 +1055,6 @@
                     -DARROW_JAVA_JNI_ENABLE_DEFAULT=OFF
                     -DBUILD_TESTING=OFF
                     -DCMAKE_BUILD_TYPE=Release
-                    -DCMAKE_INSTALL_LIBDIR=lib/${os.detected.arch}
                     -DCMAKE_INSTALL_PREFIX=${arrow.c.jni.dist.dir}
                   </commandlineArgs>
                   <workingDirectory>../</workingDirectory>
@@ -1128,7 +1127,6 @@
                     -DARROW_SUBSTRAIT=${ARROW_DATASET}
                     -DARROW_USE_CCACHE=ON
                     -DCMAKE_BUILD_TYPE=Release
-                    -DCMAKE_INSTALL_LIBDIR=lib/${os.detected.arch}
                     -DCMAKE_INSTALL_PREFIX=java-dist
                     -DCMAKE_UNITY_BUILD=ON
                   </commandlineArgs>
@@ -1169,7 +1167,6 @@
                     -DARROW_JAVA_JNI_ENABLE_DEFAULT=ON
                     -DBUILD_TESTING=OFF
                     -DCMAKE_BUILD_TYPE=Release
-                    -DCMAKE_INSTALL_LIBDIR=lib/${os.detected.arch}
                     -DCMAKE_INSTALL_PREFIX=${arrow.dataset.jni.dist.dir}
                     -DCMAKE_PREFIX_PATH=${project.basedir}/../java-dist/lib/${os.detected.arch}/cmake
                     -DProtobuf_USE_STATIC_LIBS=ON
@@ -1248,7 +1245,6 @@
                     -DARROW_WITH_ZLIB=ON
                     -DARROW_WITH_ZSTD=ON
                     -DCMAKE_BUILD_TYPE=Release
-                    -DCMAKE_INSTALL_LIBDIR=lib/${os.detected.arch}
                     -DCMAKE_INSTALL_PREFIX=java-dist
                     -DCMAKE_UNITY_BUILD=ON
                     -GNinja
@@ -1290,7 +1286,6 @@
                     -DARROW_JAVA_JNI_ENABLE_DEFAULT=ON
                     -DBUILD_TESTING=OFF
                     -DCMAKE_BUILD_TYPE=Release
-                    -DCMAKE_INSTALL_LIBDIR=lib/${os.detected.arch}
                     -DCMAKE_INSTALL_PREFIX=${arrow.dataset.jni.dist.dir}
                     -DCMAKE_PREFIX_PATH=${project.basedir}/../java-dist/lib/${os.detected.arch}/cmake
                   </commandlineArgs>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 8df436bac9aef..0d7eacfe2ddce 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -34,6 +34,10 @@
             <groupId>org.apache.arrow</groupId>
             <artifactId>arrow-compression</artifactId>
         </dependency>
+        <dependency>
+          <groupId>org.immutables</groupId>
+          <artifactId>value</artifactId>
+        </dependency>
         <dependency>
           <groupId>com.google.guava</groupId>
           <artifactId>guava</artifactId>
diff --git a/java/tools/src/main/java/module-info.java b/java/tools/src/main/java/module-info.java
new file mode 100644
index 0000000000000..6b4329eb84f2a
--- /dev/null
+++ b/java/tools/src/main/java/module-info.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module org.apache.arrow.tools {
+  exports org.apache.arrow.tools;
+
+  requires com.fasterxml.jackson.databind;
+  requires com.google.common;
+  requires org.apache.arrow.compression;
+  requires org.apache.arrow.memory.core;
+  requires org.apache.arrow.vector;
+  requires org.slf4j;
+}
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/ArrowTestDataUtil.java b/java/vector/src/test/java/org/apache/arrow/vector/test/util/ArrowTestDataUtil.java
similarity index 97%
rename from java/vector/src/test/java/org/apache/arrow/vector/util/ArrowTestDataUtil.java
rename to java/vector/src/test/java/org/apache/arrow/vector/test/util/ArrowTestDataUtil.java
index 1c525c0c271ac..901a09e313f59 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/util/ArrowTestDataUtil.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/test/util/ArrowTestDataUtil.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.arrow.vector.util;
+package org.apache.arrow.vector.test.util;
 
 import java.nio.file.Path;
 import java.nio.file.Paths;

From 05b8f366e17ee6f21df4746bb6a65be399dfb68d Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Fri, 19 Jan 2024 18:43:37 -0500
Subject: [PATCH 239/570] GH-39697: [R] Source build should check if offline
 (#39699)

### Rationale for this change

CRAN.

### What changes are included in this PR?

See the commit messages

### Are these changes tested?

Existing CI should pass and not be affected. We should confirm that source builds get all features enabled. We should test on macbuilder with this package and with one where we've inserted `download.file <- function(...) stop()` at the top of nixlibs.R to simulate offline behavior.

### Are there any user-facing changes?

I hope there is only one user affected.
* Closes: #39697

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 dev/tasks/r/github.linux.offline.build.yml |  2 +-
 dev/tasks/tasks.yml                        |  2 +-
 r/NEWS.md                                  |  2 +-
 r/configure                                |  2 +-
 r/tools/nixlibs.R                          | 63 +++++++++++++---------
 r/vignettes/developers/setup.Rmd           |  2 +-
 6 files changed, 43 insertions(+), 30 deletions(-)

diff --git a/dev/tasks/r/github.linux.offline.build.yml b/dev/tasks/r/github.linux.offline.build.yml
index b116accda891c..7a747ac480084 100644
--- a/dev/tasks/r/github.linux.offline.build.yml
+++ b/dev/tasks/r/github.linux.offline.build.yml
@@ -79,7 +79,7 @@ jobs:
         shell: Rscript {0}
       - name: Install
         env:
-          TEST_OFFLINE_BUILD: true
+          ARROW_OFFLINE_BUILD: true
           LIBARROW_MINIMAL: false
         {{ macros.github_set_sccache_envvars()|indent(8)}}
         run: |
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index ca45d48bcd470..64620d41993d5 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1272,7 +1272,7 @@ tasks:
       r_org: library
       r_image: r-base
       r_tag: latest
-      flags: '-e TEST_OFFLINE_BUILD=true'
+      flags: '-e ARROW_OFFLINE_BUILD=true'
 
   test-r-dev-duckdb:
     ci: github
diff --git a/r/NEWS.md b/r/NEWS.md
index 9badf4700a36e..22eb5b34ceb0f 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -32,7 +32,7 @@
 
 ##  Minor improvements and fixes
 
-* Don't download cmake when TEST_OFFLINE_BUILD=true and update `SystemRequirements` (#39602).
+* Don't download cmake when ARROW_OFFLINE_BUILD=true and update `SystemRequirements` (#39602).
 * Fallback to source build gracefully if binary download fails (#39587).
 * An error is now thrown instead of warning and pulling the data into R when any
   of `sub`, `gsub`, `stringr::str_replace`, `stringr::str_replace_all` are
diff --git a/r/configure b/r/configure
index 029fc004dfc4c..0882ee6719c4b 100755
--- a/r/configure
+++ b/r/configure
@@ -73,7 +73,7 @@ FORCE_BUNDLED_BUILD=`echo $FORCE_BUNDLED_BUILD | tr '[:upper:]' '[:lower:]'`
 ARROW_USE_PKG_CONFIG=`echo $ARROW_USE_PKG_CONFIG | tr '[:upper:]' '[:lower:]'`
 # Just used in testing: whether or not it is ok to download dependencies (in the
 # bundled build)
-TEST_OFFLINE_BUILD=`echo $TEST_OFFLINE_BUILD | tr '[:upper:]' '[:lower:]'`
+ARROW_OFFLINE_BUILD=`echo $ARROW_OFFLINE_BUILD | tr '[:upper:]' '[:lower:]'`
 
 VERSION=`grep '^Version' DESCRIPTION | sed s/Version:\ //`
 UNAME=`uname -s`
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index dfe379ebe20df..17c6ab0a8078b 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -120,11 +120,11 @@ validate_checksum <- function(binary_url, libfile, hush = quietly) {
     # The warnings from system2 if it fails pop up later in the log and thus are
     # more confusing than they are helpful (so we suppress them)
     checksum_ok <- suppressWarnings(system2(
-        "shasum",
-        args = c("--status", "-a", "512", "-c", checksum_file),
-        stdout = ifelse(quietly, FALSE, ""),
-        stderr = ifelse(quietly, FALSE, "")
-      )) == 0
+      "shasum",
+      args = c("--status", "-a", "512", "-c", checksum_file),
+      stdout = ifelse(quietly, FALSE, ""),
+      stderr = ifelse(quietly, FALSE, "")
+    )) == 0
 
     if (!checksum_ok) {
       checksum_ok <- suppressWarnings(system2(
@@ -565,8 +565,8 @@ build_libarrow <- function(src_dir, dst_dir) {
     env_var_list <- c(env_var_list, ARROW_DEPENDENCY_SOURCE = "BUNDLED")
   }
 
-  # On macOS, if not otherwise set, let's override Boost_SOURCE to be bundled 
-  # Necessary due to #39590 for CRAN 
+  # On macOS, if not otherwise set, let's override Boost_SOURCE to be bundled
+  # Necessary due to #39590 for CRAN
   if (on_macos) {
     # Using lowercase (e.g. Boost_SOURCE) to match the cmake args we use already.
     deps_to_bundle <- c("Boost", "lz4")
@@ -906,28 +906,13 @@ on_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
 # For local debugging, set ARROW_R_DEV=TRUE to make this script print more
 quietly <- !env_is("ARROW_R_DEV", "true")
 
-not_cran <- env_is("NOT_CRAN", "true")
-
-if (is_release) {
-  VERSION <- VERSION[1, 1:3]
-  arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/")
-} else {
-  # Don't override explictily set NOT_CRAN env var, as it is used in CI.
-  not_cran <- !env_is("NOT_CRAN", "false")
-  arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/")
-}
-
-if (!is_release && !test_mode) {
-  VERSION <- find_latest_nightly(VERSION)
-}
-
 # To collect dirs to rm on exit, use cleanup() to add dirs
 # we reset it to avoid errors on reruns in the same session.
 options(.arrow.cleanup = character())
 on.exit(unlink(getOption(".arrow.cleanup"), recursive = TRUE), add = TRUE)
 
-# enable full featured builds for macOS in case of CRAN source builds.
-if (not_cran || on_macos) {
+not_cran <- env_is("NOT_CRAN", "true")
+if (not_cran) {
   # Set more eager defaults
   if (env_is("LIBARROW_BINARY", "")) {
     Sys.setenv(LIBARROW_BINARY = "true")
@@ -943,9 +928,37 @@ if (not_cran || on_macos) {
 build_ok <- !env_is("LIBARROW_BUILD", "false")
 
 # Check if we're authorized to download
-download_ok <- !test_mode && !env_is("TEST_OFFLINE_BUILD", "true")
+download_ok <- !test_mode && !env_is("ARROW_OFFLINE_BUILD", "true")
+if (!download_ok) {
+  lg("Dependency downloading disabled. Unset ARROW_OFFLINE_BUILD to enable", .indent = "***")
+}
+# If not forbidden from downloading, check if we are offline and turn off downloading.
+# The default libarrow source build will download its source dependencies and fail
+# if they can't be retrieved.
+# But, don't do this if the user has requested a binary or a non-minimal build:
+# we should error rather than silently succeeding with a minimal build.
+if (download_ok && Sys.getenv("LIBARROW_BINARY") %in% c("false", "") && !env_is("LIBARROW_MINIMAL", "false")) {
+  download_ok <- try_download("https://apache.jfrog.io/artifactory/arrow/r/", tempfile())
+  if (!download_ok) {
+    lg("Network connection not available", .indent = "***")
+  }
+}
+
 download_libarrow_ok <- download_ok && !env_is("LIBARROW_DOWNLOAD", "false")
 
+# Set binary repos
+if (is_release) {
+  VERSION <- VERSION[1, 1:3]
+  arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/")
+} else {
+  arrow_repo <- paste0(getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), "/libarrow/")
+}
+
+# If we're on a dev version, look for the most recent libarrow binary version
+if (download_libarrow_ok && !is_release && !test_mode) {
+  VERSION <- find_latest_nightly(VERSION)
+}
+
 # This "tools/thirdparty_dependencies" path, within the tar file, might exist if
 # create_package_with_all_dependencies() was run, or if someone has created it
 # manually before running make build.
diff --git a/r/vignettes/developers/setup.Rmd b/r/vignettes/developers/setup.Rmd
index 119bc78419410..4c1eab1e6972f 100644
--- a/r/vignettes/developers/setup.Rmd
+++ b/r/vignettes/developers/setup.Rmd
@@ -280,7 +280,7 @@ withr::with_makevars(list(CPPFLAGS = "", LDFLAGS = ""), remotes::install_github(
 * See the user-facing [article on installation](../install.html) for a large number of
   environment variables that determine how the build works and what features
   get built.
-* `TEST_OFFLINE_BUILD`: When set to `true`, the build script will not download
+* `ARROW_OFFLINE_BUILD`: When set to `true`, the build script will not download
   prebuilt the C++ library binary or, if needed, `cmake`.
   It will turn off any features that require a download, unless they're available
   in `ARROW_THIRDPARTY_DEPENDENCY_DIR` or the `tools/thirdparty_download/` subfolder.

From cbc7349eb9fc49a10691194b0e5573e38a17d0f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Sun, 21 Jan 2024 16:06:39 +0100
Subject: [PATCH 240/570] MINOR: [Release] Update versions for 16.0.0-SNAPSHOT

---
 c_glib/meson.build                            |  2 +-
 ci/scripts/PKGBUILD                           |  2 +-
 cpp/CMakeLists.txt                            |  2 +-
 cpp/vcpkg.json                                |  2 +-
 csharp/Directory.Build.props                  |  2 +-
 .../homebrew-formulae/apache-arrow-glib.rb    |  2 +-
 dev/tasks/homebrew-formulae/apache-arrow.rb   |  2 +-
 docs/source/_static/versions.json             |  9 ++++--
 go/README.md                                  |  2 +-
 go/arrow/_examples/helloworld/main.go         |  8 ++---
 go/arrow/_tools/tmpl/main.go                  |  2 +-
 go/arrow/array.go                             |  4 +--
 go/arrow/array/array.go                       |  6 ++--
 go/arrow/array/array_test.go                  | 10 +++---
 go/arrow/array/binary.go                      |  6 ++--
 go/arrow/array/binary_test.go                 |  6 ++--
 go/arrow/array/binarybuilder.go               |  8 ++---
 go/arrow/array/binarybuilder_test.go          |  6 ++--
 go/arrow/array/boolean.go                     |  8 ++---
 go/arrow/array/boolean_test.go                |  4 +--
 go/arrow/array/booleanbuilder.go              | 10 +++---
 go/arrow/array/booleanbuilder_test.go         |  6 ++--
 go/arrow/array/bufferbuilder.go               |  8 ++---
 go/arrow/array/bufferbuilder_byte.go          |  2 +-
 go/arrow/array/bufferbuilder_numeric.gen.go   |  6 ++--
 .../array/bufferbuilder_numeric.gen.go.tmpl   |  6 ++--
 go/arrow/array/bufferbuilder_numeric_test.go  |  4 +--
 go/arrow/array/builder.go                     |  8 ++---
 go/arrow/array/builder_test.go                |  4 +--
 go/arrow/array/compare.go                     |  6 ++--
 go/arrow/array/compare_test.go                | 10 +++---
 go/arrow/array/concat.go                      | 14 ++++----
 go/arrow/array/concat_test.go                 | 10 +++---
 go/arrow/array/data.go                        |  6 ++--
 go/arrow/array/data_test.go                   |  4 +--
 go/arrow/array/decimal128.go                  | 12 +++----
 go/arrow/array/decimal128_test.go             |  8 ++---
 go/arrow/array/decimal256.go                  | 12 +++----
 go/arrow/array/decimal256_test.go             |  8 ++---
 go/arrow/array/decimal_test.go                | 12 +++----
 go/arrow/array/dictionary.go                  | 20 ++++++------
 go/arrow/array/dictionary_test.go             | 14 ++++----
 go/arrow/array/diff.go                        |  2 +-
 go/arrow/array/diff_test.go                   | 10 +++---
 go/arrow/array/encoded.go                     | 12 +++----
 go/arrow/array/encoded_test.go                |  8 ++---
 go/arrow/array/extension.go                   |  6 ++--
 go/arrow/array/extension_test.go              |  8 ++---
 go/arrow/array/fixed_size_list.go             | 10 +++---
 go/arrow/array/fixed_size_list_test.go        |  6 ++--
 go/arrow/array/fixedsize_binary.go            |  4 +--
 go/arrow/array/fixedsize_binary_test.go       |  6 ++--
 go/arrow/array/fixedsize_binarybuilder.go     |  8 ++---
 .../array/fixedsize_binarybuilder_test.go     |  4 +--
 go/arrow/array/float16.go                     |  6 ++--
 go/arrow/array/float16_builder.go             | 12 +++----
 go/arrow/array/float16_builder_test.go        |  6 ++--
 go/arrow/array/interval.go                    | 10 +++---
 go/arrow/array/interval_test.go               |  6 ++--
 go/arrow/array/json_reader.go                 |  8 ++---
 go/arrow/array/json_reader_test.go            |  6 ++--
 go/arrow/array/list.go                        | 10 +++---
 go/arrow/array/list_test.go                   |  6 ++--
 go/arrow/array/map.go                         |  6 ++--
 go/arrow/array/map_test.go                    |  6 ++--
 go/arrow/array/null.go                        |  8 ++---
 go/arrow/array/null_test.go                   |  6 ++--
 go/arrow/array/numeric.gen.go                 |  4 +--
 go/arrow/array/numeric.gen.go.tmpl            |  4 +--
 go/arrow/array/numeric_test.go                |  8 ++---
 go/arrow/array/numericbuilder.gen.go          | 10 +++---
 go/arrow/array/numericbuilder.gen.go.tmpl     | 10 +++---
 go/arrow/array/numericbuilder.gen_test.go     |  6 ++--
 .../array/numericbuilder.gen_test.go.tmpl     |  6 ++--
 go/arrow/array/record.go                      |  8 ++---
 go/arrow/array/record_test.go                 |  6 ++--
 go/arrow/array/string.go                      |  6 ++--
 go/arrow/array/string_test.go                 |  8 ++---
 go/arrow/array/struct.go                      | 10 +++---
 go/arrow/array/struct_test.go                 |  6 ++--
 go/arrow/array/table.go                       |  4 +--
 go/arrow/array/table_test.go                  |  6 ++--
 go/arrow/array/timestamp.go                   | 10 +++---
 go/arrow/array/timestamp_test.go              |  6 ++--
 go/arrow/array/union.go                       | 12 +++----
 go/arrow/array/union_test.go                  |  6 ++--
 go/arrow/array/util.go                        | 10 +++---
 go/arrow/array/util_test.go                   | 14 ++++----
 go/arrow/arrio/arrio.go                       |  2 +-
 go/arrow/arrio/arrio_test.go                  | 10 +++---
 go/arrow/avro/avro2parquet/main.go            |  8 ++---
 go/arrow/avro/reader.go                       |  8 ++---
 go/arrow/avro/reader_test.go                  |  2 +-
 go/arrow/avro/reader_types.go                 | 12 +++----
 go/arrow/avro/schema.go                       |  6 ++--
 go/arrow/avro/schema_test.go                  |  2 +-
 go/arrow/bitutil/bitmaps.go                   |  6 ++--
 go/arrow/bitutil/bitmaps_test.go              |  4 +--
 go/arrow/bitutil/bitutil.go                   |  2 +-
 go/arrow/bitutil/bitutil_test.go              |  4 +--
 go/arrow/cdata/cdata.go                       |  8 ++---
 go/arrow/cdata/cdata_exports.go               | 10 +++---
 go/arrow/cdata/cdata_test.go                  | 12 +++----
 go/arrow/cdata/cdata_test_framework.go        |  8 ++---
 go/arrow/cdata/exports.go                     |  4 +--
 go/arrow/cdata/import_allocator.go            |  2 +-
 go/arrow/cdata/interface.go                   |  8 ++---
 go/arrow/cdata/test/test_cimport.go           |  8 ++---
 go/arrow/compute/arithmetic.go                | 12 +++----
 go/arrow/compute/arithmetic_test.go           | 20 ++++++------
 go/arrow/compute/cast.go                      | 10 +++---
 go/arrow/compute/cast_test.go                 | 20 ++++++------
 go/arrow/compute/datum.go                     |  6 ++--
 go/arrow/compute/example_test.go              | 10 +++---
 go/arrow/compute/exec.go                      |  6 ++--
 go/arrow/compute/exec/kernel.go               |  8 ++---
 go/arrow/compute/exec/kernel_test.go          | 12 +++----
 go/arrow/compute/exec/span.go                 | 10 +++---
 go/arrow/compute/exec/span_test.go            | 16 +++++-----
 go/arrow/compute/exec/utils.go                |  8 ++---
 go/arrow/compute/exec/utils_test.go           |  8 ++---
 go/arrow/compute/exec_internals_test.go       | 14 ++++----
 go/arrow/compute/exec_test.go                 | 12 +++----
 go/arrow/compute/executor.go                  | 16 +++++-----
 go/arrow/compute/expression.go                | 16 +++++-----
 go/arrow/compute/expression_test.go           | 10 +++---
 go/arrow/compute/exprs/builders.go            |  4 +--
 go/arrow/compute/exprs/builders_test.go       |  4 +--
 go/arrow/compute/exprs/exec.go                | 18 +++++------
 go/arrow/compute/exprs/exec_internal_test.go  |  8 ++---
 go/arrow/compute/exprs/exec_test.go           | 12 +++----
 go/arrow/compute/exprs/extension_types.go     |  4 +--
 go/arrow/compute/exprs/field_refs.go          | 10 +++---
 go/arrow/compute/exprs/types.go               |  4 +--
 go/arrow/compute/fieldref.go                  |  4 +--
 go/arrow/compute/fieldref_test.go             |  8 ++---
 go/arrow/compute/functions.go                 |  4 +--
 go/arrow/compute/functions_test.go            |  4 +--
 .../internal/kernels/base_arithmetic.go       | 10 +++---
 .../internal/kernels/base_arithmetic_amd64.go |  6 ++--
 .../kernels/base_arithmetic_avx2_amd64.go     |  2 +-
 .../kernels/base_arithmetic_sse4_amd64.go     |  2 +-
 .../kernels/basic_arithmetic_noasm.go         |  4 +--
 .../compute/internal/kernels/boolean_cast.go  |  6 ++--
 go/arrow/compute/internal/kernels/cast.go     |  6 ++--
 .../compute/internal/kernels/cast_numeric.go  |  2 +-
 .../kernels/cast_numeric_avx2_amd64.go        |  2 +-
 .../kernels/cast_numeric_neon_arm64.go        |  2 +-
 .../kernels/cast_numeric_sse4_amd64.go        |  2 +-
 .../compute/internal/kernels/cast_temporal.go |  8 ++---
 go/arrow/compute/internal/kernels/helpers.go  | 14 ++++----
 .../compute/internal/kernels/numeric_cast.go  | 14 ++++----
 go/arrow/compute/internal/kernels/rounding.go | 10 +++---
 .../internal/kernels/scalar_arithmetic.go     | 14 ++++----
 .../internal/kernels/scalar_boolean.go        |  6 ++--
 .../kernels/scalar_comparison_amd64.go        |  2 +-
 .../kernels/scalar_comparison_avx2_amd64.go   |  2 +-
 .../kernels/scalar_comparison_noasm.go        |  2 +-
 .../kernels/scalar_comparison_sse4_amd64.go   |  2 +-
 .../internal/kernels/scalar_comparisons.go    | 16 +++++-----
 .../compute/internal/kernels/string_casts.go  | 12 +++----
 go/arrow/compute/internal/kernels/types.go    |  8 ++---
 .../compute/internal/kernels/vector_hash.go   | 14 ++++----
 .../internal/kernels/vector_run_end_encode.go | 16 +++++-----
 .../internal/kernels/vector_selection.go      | 14 ++++----
 go/arrow/compute/registry.go                  |  2 +-
 go/arrow/compute/registry_test.go             |  6 ++--
 go/arrow/compute/scalar_bool.go               |  6 ++--
 go/arrow/compute/scalar_bool_test.go          | 10 +++---
 go/arrow/compute/scalar_compare.go            |  6 ++--
 go/arrow/compute/scalar_compare_test.go       | 18 +++++------
 go/arrow/compute/selection.go                 |  8 ++---
 go/arrow/compute/utils.go                     | 12 +++----
 go/arrow/compute/vector_hash.go               |  4 +--
 go/arrow/compute/vector_hash_test.go          | 12 +++----
 go/arrow/compute/vector_run_end_test.go       | 14 ++++----
 go/arrow/compute/vector_run_ends.go           |  4 +--
 go/arrow/compute/vector_selection_test.go     | 18 +++++------
 go/arrow/csv/common.go                        |  4 +--
 go/arrow/csv/reader.go                        | 14 ++++----
 go/arrow/csv/reader_test.go                   | 14 ++++----
 go/arrow/csv/transformer.go                   |  4 +--
 go/arrow/csv/writer.go                        |  2 +-
 go/arrow/csv/writer_test.go                   | 16 +++++-----
 go/arrow/datatype.go                          |  2 +-
 go/arrow/datatype_binary_test.go              |  2 +-
 go/arrow/datatype_extension_test.go           |  4 +--
 go/arrow/datatype_fixedwidth.go               |  2 +-
 go/arrow/datatype_fixedwidth_test.go          |  2 +-
 go/arrow/datatype_nested.go                   |  2 +-
 go/arrow/datatype_null_test.go                |  2 +-
 go/arrow/datatype_viewheader.go               |  6 ++--
 go/arrow/datatype_viewheader_inline.go        |  2 +-
 go/arrow/datatype_viewheader_inline_go1.19.go |  2 +-
 go/arrow/datatype_viewheader_inline_tinygo.go |  2 +-
 go/arrow/decimal128/decimal128.go             |  2 +-
 go/arrow/decimal128/decimal128_test.go        |  2 +-
 go/arrow/decimal256/decimal256.go             |  4 +--
 go/arrow/decimal256/decimal256_test.go        |  2 +-
 go/arrow/doc.go                               |  2 +-
 go/arrow/encoded/ree_utils.go                 |  2 +-
 go/arrow/encoded/ree_utils_test.go            |  8 ++---
 go/arrow/endian/endian.go                     |  4 +--
 go/arrow/example_test.go                      |  8 ++---
 go/arrow/flight/basic_auth_flight_test.go     |  2 +-
 go/arrow/flight/client.go                     |  2 +-
 go/arrow/flight/cookie_middleware_test.go     |  2 +-
 go/arrow/flight/example_flight_server_test.go |  2 +-
 go/arrow/flight/flight_middleware_test.go     |  4 +--
 go/arrow/flight/flight_test.go                | 10 +++---
 go/arrow/flight/flightsql/client.go           | 12 +++----
 go/arrow/flight/flightsql/client_test.go      | 12 +++----
 go/arrow/flight/flightsql/column_metadata.go  |  2 +-
 go/arrow/flight/flightsql/driver/README.md    |  6 ++--
 .../flight/flightsql/driver/config_test.go    |  2 +-
 go/arrow/flight/flightsql/driver/driver.go    | 10 +++---
 .../flight/flightsql/driver/driver_test.go    | 14 ++++----
 go/arrow/flight/flightsql/driver/utils.go     |  4 +--
 .../flight/flightsql/driver/utils_test.go     | 12 +++----
 .../cmd/sqlite_flightsql_server/main.go       |  6 ++--
 .../flightsql/example/sql_batch_reader.go     | 10 +++---
 .../flight/flightsql/example/sqlite_info.go   |  4 +--
 .../flight/flightsql/example/sqlite_server.go | 14 ++++----
 .../sqlite_tables_schema_batch_reader.go      | 12 +++----
 .../flight/flightsql/example/type_info.go     |  8 ++---
 .../flightsql/schema_ref/reference_schemas.go |  2 +-
 go/arrow/flight/flightsql/server.go           | 16 +++++-----
 go/arrow/flight/flightsql/server_test.go      | 12 +++----
 go/arrow/flight/flightsql/sql_info.go         |  4 +--
 .../flight/flightsql/sqlite_server_test.go    | 16 +++++-----
 go/arrow/flight/flightsql/types.go            |  2 +-
 go/arrow/flight/record_batch_reader.go        | 12 +++----
 go/arrow/flight/record_batch_writer.go        |  6 ++--
 go/arrow/flight/server.go                     |  2 +-
 go/arrow/flight/server_example_test.go        |  2 +-
 go/arrow/internal/arrdata/arrdata.go          | 16 +++++-----
 go/arrow/internal/arrdata/ioutil.go           | 10 +++---
 go/arrow/internal/arrjson/arrjson.go          | 20 ++++++------
 go/arrow/internal/arrjson/arrjson_test.go     |  6 ++--
 go/arrow/internal/arrjson/option.go           |  4 +--
 go/arrow/internal/arrjson/reader.go           | 10 +++---
 go/arrow/internal/arrjson/writer.go           | 10 +++---
 .../internal/cdata_integration/entrypoints.go |  8 ++---
 go/arrow/internal/dictutils/dict.go           |  6 ++--
 go/arrow/internal/dictutils/dict_test.go      |  8 ++---
 .../arrow-flight-integration-client/main.go   |  2 +-
 .../arrow-flight-integration-server/main.go   |  2 +-
 .../internal/flight_integration/scenario.go   | 18 +++++------
 .../internal/testing/gen/random_array_gen.go  | 10 +++---
 go/arrow/internal/testing/tools/bits_test.go  |  2 +-
 go/arrow/internal/testing/tools/data_types.go |  4 +--
 go/arrow/internal/utils.go                    |  4 +--
 go/arrow/ipc/cmd/arrow-cat/main.go            |  4 +--
 go/arrow/ipc/cmd/arrow-cat/main_test.go       |  8 ++---
 go/arrow/ipc/cmd/arrow-file-to-stream/main.go |  6 ++--
 .../ipc/cmd/arrow-file-to-stream/main_test.go |  4 +--
 .../cmd/arrow-json-integration-test/main.go   | 12 +++----
 .../arrow-json-integration-test/main_test.go  |  4 +--
 go/arrow/ipc/cmd/arrow-ls/main.go             |  4 +--
 go/arrow/ipc/cmd/arrow-ls/main_test.go        |  8 ++---
 go/arrow/ipc/cmd/arrow-stream-to-file/main.go |  6 ++--
 .../ipc/cmd/arrow-stream-to-file/main_test.go |  4 +--
 go/arrow/ipc/compression.go                   |  6 ++--
 go/arrow/ipc/endian_swap.go                   |  6 ++--
 go/arrow/ipc/endian_swap_test.go              | 10 +++---
 go/arrow/ipc/file_reader.go                   | 16 +++++-----
 go/arrow/ipc/file_test.go                     |  6 ++--
 go/arrow/ipc/file_writer.go                   | 10 +++---
 go/arrow/ipc/ipc.go                           |  8 ++---
 go/arrow/ipc/ipc_test.go                      |  8 ++---
 go/arrow/ipc/message.go                       |  6 ++--
 go/arrow/ipc/message_test.go                  |  6 ++--
 go/arrow/ipc/metadata.go                      | 10 +++---
 go/arrow/ipc/metadata_test.go                 | 12 +++----
 go/arrow/ipc/reader.go                        | 14 ++++----
 go/arrow/ipc/reader_test.go                   |  6 ++--
 go/arrow/ipc/stream_test.go                   |  6 ++--
 go/arrow/ipc/writer.go                        | 18 +++++------
 go/arrow/ipc/writer_test.go                   | 10 +++---
 go/arrow/math/float64.go                      |  2 +-
 go/arrow/math/float64_avx2_amd64.go           |  2 +-
 go/arrow/math/float64_neon_arm64.go           |  2 +-
 go/arrow/math/float64_sse4_amd64.go           |  2 +-
 go/arrow/math/float64_test.go                 |  6 ++--
 go/arrow/math/int64.go                        |  2 +-
 go/arrow/math/int64_avx2_amd64.go             |  2 +-
 go/arrow/math/int64_neon_arm64.go             |  2 +-
 go/arrow/math/int64_sse4_amd64.go             |  2 +-
 go/arrow/math/int64_test.go                   |  6 ++--
 go/arrow/math/type.go.tmpl                    |  2 +-
 go/arrow/math/type_simd_amd64.go.tmpl         |  2 +-
 go/arrow/math/type_simd_arm64.go.tmpl         |  2 +-
 go/arrow/math/type_test.go.tmpl               |  6 ++--
 go/arrow/math/uint64.go                       |  2 +-
 go/arrow/math/uint64_avx2_amd64.go            |  2 +-
 go/arrow/math/uint64_neon_arm64.go            |  2 +-
 go/arrow/math/uint64_sse4_amd64.go            |  2 +-
 go/arrow/math/uint64_test.go                  |  6 ++--
 go/arrow/memory/buffer.go                     |  2 +-
 go/arrow/memory/buffer_test.go                |  2 +-
 go/arrow/memory/cgo_allocator.go              |  2 +-
 go/arrow/memory/default_mallocator.go         |  2 +-
 go/arrow/memory/default_mallocator_test.go    |  4 +--
 go/arrow/memory/mallocator/mallocator_test.go |  2 +-
 go/arrow/memory/memory_test.go                |  2 +-
 go/arrow/record.go                            |  2 +-
 go/arrow/scalar/append.go                     | 10 +++---
 go/arrow/scalar/append_test.go                | 10 +++---
 go/arrow/scalar/binary.go                     |  4 +--
 go/arrow/scalar/compare.go                    |  2 +-
 go/arrow/scalar/nested.go                     |  8 ++---
 go/arrow/scalar/numeric.gen.go                |  6 ++--
 go/arrow/scalar/numeric.gen_test.go           |  4 +--
 go/arrow/scalar/numeric.gen_test.go.tmpl      |  4 +--
 go/arrow/scalar/parse.go                      | 12 +++----
 go/arrow/scalar/scalar.go                     | 20 ++++++------
 go/arrow/scalar/scalar_test.go                | 12 +++----
 go/arrow/scalar/temporal.go                   |  2 +-
 go/arrow/schema.go                            |  2 +-
 go/arrow/schema_test.go                       |  2 +-
 go/arrow/table.go                             |  2 +-
 go/arrow/tensor/numeric.gen.go                |  2 +-
 go/arrow/tensor/numeric.gen.go.tmpl           |  4 +--
 go/arrow/tensor/numeric.gen_test.go           |  8 ++---
 go/arrow/tensor/numeric.gen_test.go.tmpl      |  8 ++---
 go/arrow/tensor/tensor.go                     |  4 +--
 go/arrow/tensor/tensor_test.go                |  8 ++---
 go/arrow/type_traits.go                       |  6 ++--
 go/arrow/type_traits_boolean.go               |  2 +-
 go/arrow/type_traits_decimal128.go            |  4 +--
 go/arrow/type_traits_decimal256.go            |  4 +--
 go/arrow/type_traits_float16.go               |  4 +--
 go/arrow/type_traits_interval.go              |  4 +--
 go/arrow/type_traits_numeric.gen.go           |  2 +-
 go/arrow/type_traits_numeric.gen.go.tmpl      |  2 +-
 go/arrow/type_traits_numeric.gen_test.go      |  2 +-
 go/arrow/type_traits_numeric.gen_test.go.tmpl |  2 +-
 go/arrow/type_traits_test.go                  |  8 ++---
 go/arrow/type_traits_timestamp.go             |  2 +-
 go/arrow/type_traits_view.go                  |  2 +-
 go/arrow/util/byte_size.go                    |  6 ++--
 go/arrow/util/byte_size_test.go               |  8 ++---
 go/go.mod                                     |  2 +-
 go/internal/bitutils/bit_block_counter.go     |  4 +--
 .../bitutils/bit_block_counter_test.go        |  6 ++--
 go/internal/bitutils/bit_run_reader.go        |  6 ++--
 go/internal/bitutils/bit_run_reader_test.go   |  6 ++--
 go/internal/bitutils/bit_set_run_reader.go    |  4 +--
 .../bitutils/bit_set_run_reader_test.go       |  6 ++--
 go/internal/bitutils/bitmap_generate.go       |  2 +-
 go/internal/bitutils/bitmap_generate_test.go  |  2 +-
 go/internal/hashing/xxh3_memo_table.gen.go    |  6 ++--
 .../hashing/xxh3_memo_table.gen.go.tmpl       |  4 +--
 go/internal/types/extension_types.go          |  6 ++--
 go/internal/types/extension_types_test.go     | 10 +++---
 go/internal/utils/transpose_ints_def.go       |  2 +-
 go/internal/utils/transpose_ints_test.go      |  2 +-
 go/parquet/cmd/parquet_reader/dumper.go       |  6 ++--
 go/parquet/cmd/parquet_reader/main.go         | 10 +++---
 go/parquet/cmd/parquet_schema/main.go         |  4 +--
 go/parquet/compress/brotli.go                 |  2 +-
 go/parquet/compress/compress.go               |  2 +-
 go/parquet/compress/compress_test.go          |  2 +-
 go/parquet/compress/zstd.go                   |  2 +-
 go/parquet/doc.go                             |  6 ++--
 go/parquet/encryption_properties.go           |  2 +-
 go/parquet/encryption_properties_test.go      |  4 +--
 go/parquet/encryption_read_config_test.go     |  8 ++---
 go/parquet/encryption_write_config_test.go    |  8 ++---
 go/parquet/file/column_reader.go              | 14 ++++----
 go/parquet/file/column_reader_test.go         | 12 +++----
 go/parquet/file/column_reader_types.gen.go    |  6 ++--
 .../file/column_reader_types.gen.go.tmpl      |  4 +--
 go/parquet/file/column_writer.go              | 16 +++++-----
 go/parquet/file/column_writer_test.go         | 26 +++++++--------
 go/parquet/file/column_writer_types.gen.go    | 14 ++++----
 .../file/column_writer_types.gen.go.tmpl      |  8 ++---
 go/parquet/file/file_reader.go                |  8 ++---
 go/parquet/file/file_reader_mmap.go           |  2 +-
 go/parquet/file/file_reader_mmap_windows.go   |  2 +-
 go/parquet/file/file_reader_test.go           | 20 ++++++------
 go/parquet/file/file_writer.go                | 10 +++---
 go/parquet/file/file_writer_test.go           | 14 ++++----
 go/parquet/file/level_conversion.go           | 10 +++---
 go/parquet/file/level_conversion_test.go      |  6 ++--
 go/parquet/file/page_reader.go                | 14 ++++----
 go/parquet/file/page_writer.go                | 18 +++++------
 go/parquet/file/record_reader.go              | 16 +++++-----
 go/parquet/file/row_group_reader.go           |  8 ++---
 go/parquet/file/row_group_writer.go           |  8 ++---
 go/parquet/file/row_group_writer_test.go      |  8 ++---
 go/parquet/internal/bmi/bmi_test.go           |  2 +-
 .../internal/encoding/boolean_decoder.go      |  8 ++---
 .../internal/encoding/boolean_encoder.go      |  8 ++---
 .../internal/encoding/byte_array_decoder.go   | 12 +++----
 .../internal/encoding/byte_array_encoder.go   | 10 +++---
 go/parquet/internal/encoding/decoder.go       | 20 ++++++------
 .../internal/encoding/delta_bit_packing.go    | 10 +++---
 .../internal/encoding/delta_byte_array.go     |  6 ++--
 .../encoding/delta_byte_array_test.go         |  4 +--
 .../encoding/delta_length_byte_array.go       |  6 ++--
 go/parquet/internal/encoding/encoder.go       | 16 +++++-----
 .../encoding/encoding_benchmarks_test.go      | 16 +++++-----
 go/parquet/internal/encoding/encoding_test.go | 14 ++++----
 .../encoding/fixed_len_byte_array_decoder.go  |  4 +--
 .../encoding/fixed_len_byte_array_encoder.go  |  6 ++--
 go/parquet/internal/encoding/levels.go        | 10 +++---
 go/parquet/internal/encoding/levels_test.go   | 10 +++---
 go/parquet/internal/encoding/memo_table.go    | 10 +++---
 .../internal/encoding/memo_table_test.go      | 10 +++---
 .../internal/encoding/memo_table_types.gen.go |  4 +--
 .../encoding/memo_table_types.gen.go.tmpl     |  2 +-
 .../encoding/plain_encoder_types.gen.go       | 10 +++---
 .../encoding/plain_encoder_types.gen.go.tmpl  |  8 ++---
 .../internal/encoding/typed_encoder.gen.go    | 18 +++++------
 .../encoding/typed_encoder.gen.go.tmpl        | 14 ++++----
 go/parquet/internal/encoding/types.go         | 10 +++---
 go/parquet/internal/encryption/aes.go         |  2 +-
 go/parquet/internal/encryption/decryptor.go   |  4 +--
 go/parquet/internal/encryption/encryptor.go   |  4 +--
 go/parquet/internal/testutils/pagebuilder.go  | 14 ++++----
 .../internal/testutils/primitive_typed.go     | 10 +++---
 go/parquet/internal/testutils/random.go       | 16 +++++-----
 go/parquet/internal/testutils/random_arrow.go |  8 ++---
 go/parquet/internal/testutils/utils.go        |  2 +-
 go/parquet/internal/thrift/helpers.go         |  2 +-
 .../internal/utils/bit_benchmark_test.go      |  6 ++--
 .../internal/utils/bit_packing_arm64.go       |  2 +-
 go/parquet/internal/utils/bit_reader.go       |  8 ++---
 go/parquet/internal/utils/bit_reader_test.go  | 10 +++---
 go/parquet/internal/utils/bit_writer.go       |  2 +-
 go/parquet/internal/utils/bitmap_writer.go    |  2 +-
 .../internal/utils/bitmap_writer_test.go      |  4 +--
 go/parquet/internal/utils/rle.go              |  8 ++---
 .../internal/utils/typed_rle_dict.gen.go      |  6 ++--
 .../internal/utils/typed_rle_dict.gen.go.tmpl |  6 ++--
 go/parquet/metadata/app_version.go            |  4 +--
 go/parquet/metadata/column_chunk.go           | 14 ++++----
 go/parquet/metadata/file.go                   | 12 +++----
 go/parquet/metadata/metadata_test.go          |  6 ++--
 go/parquet/metadata/row_group.go              |  8 ++---
 go/parquet/metadata/stat_compare_test.go      |  4 +--
 go/parquet/metadata/statistics.go             | 18 +++++------
 go/parquet/metadata/statistics_test.go        | 12 +++----
 go/parquet/metadata/statistics_types.gen.go   | 18 +++++------
 .../metadata/statistics_types.gen.go.tmpl     | 14 ++++----
 go/parquet/pqarrow/column_readers.go          | 20 ++++++------
 go/parquet/pqarrow/encode_arrow.go            | 20 ++++++------
 go/parquet/pqarrow/encode_arrow_test.go       | 32 +++++++++----------
 go/parquet/pqarrow/encode_dict_compute.go     | 16 +++++-----
 go/parquet/pqarrow/encode_dict_nocompute.go   |  4 +--
 go/parquet/pqarrow/encode_dictionary_test.go  | 16 +++++-----
 go/parquet/pqarrow/file_reader.go             | 14 ++++----
 go/parquet/pqarrow/file_reader_test.go        | 16 +++++-----
 go/parquet/pqarrow/file_writer.go             | 12 +++----
 go/parquet/pqarrow/file_writer_test.go        | 10 +++---
 go/parquet/pqarrow/helpers.go                 |  2 +-
 go/parquet/pqarrow/path_builder.go            | 10 +++---
 go/parquet/pqarrow/path_builder_test.go       |  8 ++---
 go/parquet/pqarrow/properties.go              |  6 ++--
 go/parquet/pqarrow/reader_writer_test.go      | 12 +++----
 go/parquet/pqarrow/schema.go                  | 18 +++++------
 go/parquet/pqarrow/schema_test.go             | 18 +++++------
 go/parquet/reader_properties.go               |  4 +--
 go/parquet/reader_writer_properties_test.go   |  6 ++--
 go/parquet/schema/column.go                   |  4 +--
 go/parquet/schema/converted_types.go          |  2 +-
 go/parquet/schema/converted_types_test.go     |  2 +-
 go/parquet/schema/helpers.go                  |  2 +-
 go/parquet/schema/helpers_test.go             |  4 +--
 go/parquet/schema/logical_types.go            |  8 ++---
 go/parquet/schema/logical_types_test.go       |  6 ++--
 go/parquet/schema/node.go                     |  4 +--
 go/parquet/schema/reflection.go               |  6 ++--
 go/parquet/schema/reflection_test.go          |  6 ++--
 go/parquet/schema/schema.go                   |  4 +--
 go/parquet/schema/schema_element_test.go      |  4 +--
 go/parquet/schema/schema_flatten_test.go      |  4 +--
 go/parquet/schema/schema_test.go              |  6 ++--
 go/parquet/types.go                           |  4 +--
 go/parquet/writer_properties.go               |  6 ++--
 java/adapter/avro/pom.xml                     |  2 +-
 java/adapter/jdbc/pom.xml                     |  2 +-
 java/adapter/orc/pom.xml                      |  2 +-
 java/algorithm/pom.xml                        |  2 +-
 java/bom/pom.xml                              |  2 +-
 java/c/pom.xml                                |  2 +-
 java/compression/pom.xml                      |  2 +-
 java/dataset/pom.xml                          |  2 +-
 java/flight/flight-core/pom.xml               |  2 +-
 java/flight/flight-integration-tests/pom.xml  |  2 +-
 java/flight/flight-sql-jdbc-core/pom.xml      |  2 +-
 java/flight/flight-sql-jdbc-driver/pom.xml    |  2 +-
 java/flight/flight-sql/pom.xml                |  2 +-
 java/flight/pom.xml                           |  2 +-
 java/format/pom.xml                           |  2 +-
 java/gandiva/pom.xml                          |  2 +-
 .../module-info-compiler-maven-plugin/pom.xml |  2 +-
 java/maven/pom.xml                            |  2 +-
 java/memory/memory-core/pom.xml               |  2 +-
 java/memory/memory-netty-buffer-patch/pom.xml |  2 +-
 java/memory/memory-netty/pom.xml              |  2 +-
 java/memory/memory-unsafe/pom.xml             |  2 +-
 java/memory/pom.xml                           |  2 +-
 java/performance/pom.xml                      |  2 +-
 java/pom.xml                                  |  2 +-
 java/tools/pom.xml                            |  2 +-
 java/vector/pom.xml                           |  2 +-
 js/package.json                               |  2 +-
 matlab/CMakeLists.txt                         |  2 +-
 python/CMakeLists.txt                         |  2 +-
 python/setup.py                               |  2 +-
 r/DESCRIPTION                                 |  2 +-
 r/NEWS.md                                     |  4 ++-
 r/pkgdown/assets/versions.json                |  8 +++--
 ruby/red-arrow-cuda/lib/arrow-cuda/version.rb |  2 +-
 .../lib/arrow-dataset/version.rb              |  2 +-
 .../lib/arrow-flight-sql/version.rb           |  2 +-
 .../lib/arrow-flight/version.rb               |  2 +-
 ruby/red-arrow/lib/arrow/version.rb           |  2 +-
 ruby/red-gandiva/lib/gandiva/version.rb       |  2 +-
 ruby/red-parquet/lib/parquet/version.rb       |  2 +-
 522 files changed, 1793 insertions(+), 1782 deletions(-)

diff --git a/c_glib/meson.build b/c_glib/meson.build
index 7c495d2567d72..ffd41d4d574a7 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -24,7 +24,7 @@ project('arrow-glib', 'c', 'cpp',
           'cpp_std=c++17',
         ])
 
-version = '15.0.0-SNAPSHOT'
+version = '16.0.0-SNAPSHOT'
 if version.endswith('-SNAPSHOT')
   version_numbers = version.split('-')[0].split('.')
   version_tag = version.split('-')[1]
diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index 674acc99f54a9..50d4fc28c58f3 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -18,7 +18,7 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=14.0.2.9000
+pkgver=15.0.0.9000
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
 arch=("any")
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d26e06a146b56..016cd8a1b9ec8 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -71,7 +71,7 @@ if(POLICY CMP0135)
   cmake_policy(SET CMP0135 NEW)
 endif()
 
-set(ARROW_VERSION "15.0.0-SNAPSHOT")
+set(ARROW_VERSION "16.0.0-SNAPSHOT")
 
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
 
diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index c0bf5dce50e32..a0f0aa1008dcd 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -1,6 +1,6 @@
 {
   "name": "arrow",
-  "version-string": "15.0.0-SNAPSHOT",
+  "version-string": "16.0.0-SNAPSHOT",
   "dependencies": [
     "abseil",
     {
diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props
index ae6edda0e2f0e..7cc4b35f2a614 100644
--- a/csharp/Directory.Build.props
+++ b/csharp/Directory.Build.props
@@ -29,7 +29,7 @@
     <Product>Apache Arrow library</Product>
     <Copyright>Copyright 2016-2019 The Apache Software Foundation</Copyright>
     <Company>The Apache Software Foundation</Company>
-    <Version>15.0.0-SNAPSHOT</Version>
+    <Version>16.0.0-SNAPSHOT</Version>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb
index e29354def4c1c..f47c4d737f1a0 100644
--- a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb
+++ b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb
@@ -29,7 +29,7 @@
 class ApacheArrowGlib < Formula
   desc "GLib bindings for Apache Arrow"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-15.0.0-SNAPSHOT/apache-arrow-15.0.0-SNAPSHOT.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-16.0.0-SNAPSHOT/apache-arrow-16.0.0-SNAPSHOT.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   license "Apache-2.0"
   head "https://github.com/apache/arrow.git", branch: "main"
diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb
index 14d229b477dc8..881ca50fdecf3 100644
--- a/dev/tasks/homebrew-formulae/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/apache-arrow.rb
@@ -29,7 +29,7 @@
 class ApacheArrow < Formula
   desc "Columnar in-memory analytics layer designed to accelerate big data"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-15.0.0-SNAPSHOT/apache-arrow-15.0.0-SNAPSHOT.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-16.0.0-SNAPSHOT/apache-arrow-16.0.0-SNAPSHOT.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   license "Apache-2.0"
   head "https://github.com/apache/arrow.git", branch: "main"
diff --git a/docs/source/_static/versions.json b/docs/source/_static/versions.json
index 10e179420b803..d647709cda723 100644
--- a/docs/source/_static/versions.json
+++ b/docs/source/_static/versions.json
@@ -1,15 +1,20 @@
 [
     {
-        "name": "15.0 (dev)",
+        "name": "16.0 (dev)",
         "version": "dev/",
         "url": "https://arrow.apache.org/docs/dev/"
     },
     {
-        "name": "14.0 (stable)",
+        "name": "15.0 (stable)",
         "version": "",
         "url": "https://arrow.apache.org/docs/",
         "preferred": true
     },
+    {
+        "name": "14.0",
+        "version": "14.0/",
+        "url": "https://arrow.apache.org/docs/14.0/"
+    },
     {
         "name": "13.0",
         "version": "13.0/",
diff --git a/go/README.md b/go/README.md
index c45bcd756f81c..4a9e151ddf234 100644
--- a/go/README.md
+++ b/go/README.md
@@ -20,7 +20,7 @@
 Apache Arrow for Go
 ===================
 
-[![Go Reference](https://pkg.go.dev/badge/github.com/apache/arrow/go/v15.svg)](https://pkg.go.dev/github.com/apache/arrow/go/v15)
+[![Go Reference](https://pkg.go.dev/badge/github.com/apache/arrow/go/v16.svg)](https://pkg.go.dev/github.com/apache/arrow/go/v16)
 
 [Apache Arrow][arrow] is a cross-language development platform for in-memory
 data. It specifies a standardized language-independent columnar memory format
diff --git a/go/arrow/_examples/helloworld/main.go b/go/arrow/_examples/helloworld/main.go
index 156a52b86da1c..9952cc9497389 100644
--- a/go/arrow/_examples/helloworld/main.go
+++ b/go/arrow/_examples/helloworld/main.go
@@ -19,10 +19,10 @@ package main
 import (
 	"os"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/math"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/math"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func main() {
diff --git a/go/arrow/_tools/tmpl/main.go b/go/arrow/_tools/tmpl/main.go
index c591c3b96803c..3c60e52d317dd 100644
--- a/go/arrow/_tools/tmpl/main.go
+++ b/go/arrow/_tools/tmpl/main.go
@@ -28,7 +28,7 @@ import (
 	"strings"
 	"text/template"
 
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 const Ext = ".tmpl"
diff --git a/go/arrow/array.go b/go/arrow/array.go
index eed859cf46649..ca83989ab9bfc 100644
--- a/go/arrow/array.go
+++ b/go/arrow/array.go
@@ -19,8 +19,8 @@ package arrow
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // ArrayData is the underlying memory and metadata of an Arrow array, corresponding
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index 5aacc8f99a4ee..c8d82cc79d32d 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -19,9 +19,9 @@ package array
 import (
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 const (
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index bbfbee83585da..9e422bb9d545f 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -19,11 +19,11 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/tools"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go
index 9e26de7a6d820..f1023d40d6a1f 100644
--- a/go/arrow/array/binary.go
+++ b/go/arrow/array/binary.go
@@ -23,9 +23,9 @@ import (
 	"strings"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 type BinaryLike interface {
diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go
index 8a11ec7dab8c4..5b3d6f0d8ba95 100644
--- a/go/arrow/array/binary_test.go
+++ b/go/arrow/array/binary_test.go
@@ -20,9 +20,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/binarybuilder.go b/go/arrow/array/binarybuilder.go
index 21ad576508e9e..9de7921e29dfc 100644
--- a/go/arrow/array/binarybuilder.go
+++ b/go/arrow/array/binarybuilder.go
@@ -25,10 +25,10 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // A BinaryBuilder is used to build a Binary array using the Append methods.
diff --git a/go/arrow/array/binarybuilder_test.go b/go/arrow/array/binarybuilder_test.go
index 96be73da6516d..589396f2c5a04 100644
--- a/go/arrow/array/binarybuilder_test.go
+++ b/go/arrow/array/binarybuilder_test.go
@@ -20,9 +20,9 @@ import (
 	"bytes"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/boolean.go b/go/arrow/array/boolean.go
index 43bac64a4c990..ce9d4136122cd 100644
--- a/go/arrow/array/boolean.go
+++ b/go/arrow/array/boolean.go
@@ -21,10 +21,10 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // A type which represents an immutable sequence of boolean values.
diff --git a/go/arrow/array/boolean_test.go b/go/arrow/array/boolean_test.go
index bcd17ee5967d6..165fb470d6f89 100644
--- a/go/arrow/array/boolean_test.go
+++ b/go/arrow/array/boolean_test.go
@@ -22,8 +22,8 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/booleanbuilder.go b/go/arrow/array/booleanbuilder.go
index cd0cffd5e43e2..b09d5f08f8401 100644
--- a/go/arrow/array/booleanbuilder.go
+++ b/go/arrow/array/booleanbuilder.go
@@ -23,11 +23,11 @@ import (
 	"strconv"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 type BooleanBuilder struct {
diff --git a/go/arrow/array/booleanbuilder_test.go b/go/arrow/array/booleanbuilder_test.go
index f6f6c80dab37f..25b6678155fa5 100644
--- a/go/arrow/array/booleanbuilder_test.go
+++ b/go/arrow/array/booleanbuilder_test.go
@@ -19,9 +19,9 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/tools"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/bufferbuilder.go b/go/arrow/array/bufferbuilder.go
index 13741ba8926ac..61d0b2d65ba88 100644
--- a/go/arrow/array/bufferbuilder.go
+++ b/go/arrow/array/bufferbuilder.go
@@ -20,10 +20,10 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 type bufBuilder interface {
diff --git a/go/arrow/array/bufferbuilder_byte.go b/go/arrow/array/bufferbuilder_byte.go
index 9b2b559ba9f68..82a132c1c3b06 100644
--- a/go/arrow/array/bufferbuilder_byte.go
+++ b/go/arrow/array/bufferbuilder_byte.go
@@ -16,7 +16,7 @@
 
 package array
 
-import "github.com/apache/arrow/go/v15/arrow/memory"
+import "github.com/apache/arrow/go/v16/arrow/memory"
 
 type byteBufferBuilder struct {
 	bufferBuilder
diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go b/go/arrow/array/bufferbuilder_numeric.gen.go
index a7961166c0edd..923168728a03f 100644
--- a/go/arrow/array/bufferbuilder_numeric.gen.go
+++ b/go/arrow/array/bufferbuilder_numeric.gen.go
@@ -19,9 +19,9 @@
 package array
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 type int64BufferBuilder struct {
diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl b/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl
index 845d7ef01c89a..ae6b2d8e0dea6 100644
--- a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl
+++ b/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl
@@ -17,9 +17,9 @@
 package array
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 {{range .In}}
diff --git a/go/arrow/array/bufferbuilder_numeric_test.go b/go/arrow/array/bufferbuilder_numeric_test.go
index b51e9ae9207ea..4d218fd103e92 100644
--- a/go/arrow/array/bufferbuilder_numeric_test.go
+++ b/go/arrow/array/bufferbuilder_numeric_test.go
@@ -20,8 +20,8 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/endian"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index 279804a1cdb9f..4751f57cc5b9c 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 const (
diff --git a/go/arrow/array/builder_test.go b/go/arrow/array/builder_test.go
index 7bec86d86cc8b..ab3507e225544 100644
--- a/go/arrow/array/builder_test.go
+++ b/go/arrow/array/builder_test.go
@@ -19,8 +19,8 @@ package array
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/tools"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go
index 372293a61d6cb..6c806b06b98cf 100644
--- a/go/arrow/array/compare.go
+++ b/go/arrow/array/compare.go
@@ -20,9 +20,9 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
 )
 
 // RecordEqual reports whether the two provided records are equal.
diff --git a/go/arrow/array/compare_test.go b/go/arrow/array/compare_test.go
index 4fc9cf50e8643..995eb35e7d250 100644
--- a/go/arrow/array/compare_test.go
+++ b/go/arrow/array/compare_test.go
@@ -22,11 +22,11 @@ import (
 	"sort"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go
index f0bc2855eb1e4..9e0cb53abc54b 100644
--- a/go/arrow/array/concat.go
+++ b/go/arrow/array/concat.go
@@ -23,13 +23,13 @@ import (
 	"math/bits"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/encoded"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	"github.com/apache/arrow/go/v15/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/encoded"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	"github.com/apache/arrow/go/v16/internal/utils"
 )
 
 // Concatenate creates a new arrow.Array which is the concatenation of the
diff --git a/go/arrow/array/concat_test.go b/go/arrow/array/concat_test.go
index a3686b45700aa..23d0d4b5ca8e7 100644
--- a/go/arrow/array/concat_test.go
+++ b/go/arrow/array/concat_test.go
@@ -23,11 +23,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/array/data.go b/go/arrow/array/data.go
index ddd9cf0c895d5..9ef1da6e14729 100644
--- a/go/arrow/array/data.go
+++ b/go/arrow/array/data.go
@@ -22,9 +22,9 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // Data represents the memory and metadata of an Arrow array.
diff --git a/go/arrow/array/data_test.go b/go/arrow/array/data_test.go
index dd4793a7cdbfa..63af01ffdad2d 100644
--- a/go/arrow/array/data_test.go
+++ b/go/arrow/array/data_test.go
@@ -19,8 +19,8 @@ package array
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/decimal128.go b/go/arrow/array/decimal128.go
index 16a492db09c67..0dca320cda959 100644
--- a/go/arrow/array/decimal128.go
+++ b/go/arrow/array/decimal128.go
@@ -25,12 +25,12 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // A type which represents an immutable sequence of 128-bit decimal values.
diff --git a/go/arrow/array/decimal128_test.go b/go/arrow/array/decimal128_test.go
index 8c26d00cdc18e..836a6987df69f 100644
--- a/go/arrow/array/decimal128_test.go
+++ b/go/arrow/array/decimal128_test.go
@@ -19,10 +19,10 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go
index 8f72e414d1959..452ac96625bc8 100644
--- a/go/arrow/array/decimal256.go
+++ b/go/arrow/array/decimal256.go
@@ -25,12 +25,12 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // Decimal256 is a type that represents an immutable sequence of 256-bit decimal values.
diff --git a/go/arrow/array/decimal256_test.go b/go/arrow/array/decimal256_test.go
index 6085d6b5a6a59..4f0c441210643 100644
--- a/go/arrow/array/decimal256_test.go
+++ b/go/arrow/array/decimal256_test.go
@@ -19,10 +19,10 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/decimal_test.go b/go/arrow/array/decimal_test.go
index 67900447be1cf..d63c807fa9dbf 100644
--- a/go/arrow/array/decimal_test.go
+++ b/go/arrow/array/decimal_test.go
@@ -21,12 +21,12 @@ import (
 	"math/big"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go
index bbde4e4f1e5bd..35f0fb20492ba 100644
--- a/go/arrow/array/dictionary.go
+++ b/go/arrow/array/dictionary.go
@@ -25,16 +25,16 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/hashing"
-	"github.com/apache/arrow/go/v15/internal/json"
-	"github.com/apache/arrow/go/v15/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/hashing"
+	"github.com/apache/arrow/go/v16/internal/json"
+	"github.com/apache/arrow/go/v16/internal/utils"
 )
 
 // Dictionary represents the type for dictionary-encoded data with a data
diff --git a/go/arrow/array/dictionary_test.go b/go/arrow/array/dictionary_test.go
index f32cc9555f9bc..2986b2ae70c04 100644
--- a/go/arrow/array/dictionary_test.go
+++ b/go/arrow/array/dictionary_test.go
@@ -24,13 +24,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/array/diff.go b/go/arrow/array/diff.go
index 6bf6372531fd7..fa1f404578c39 100644
--- a/go/arrow/array/diff.go
+++ b/go/arrow/array/diff.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 // Edit represents one entry in the edit script to compare two arrays.
diff --git a/go/arrow/array/diff_test.go b/go/arrow/array/diff_test.go
index 17539c38d282f..341ee3ea89c61 100644
--- a/go/arrow/array/diff_test.go
+++ b/go/arrow/array/diff_test.go
@@ -23,11 +23,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
+	"github.com/apache/arrow/go/v16/internal/types"
 )
 
 type diffTestCase struct {
diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go
index 8ca1416b92ab3..48ce582274b93 100644
--- a/go/arrow/array/encoded.go
+++ b/go/arrow/array/encoded.go
@@ -23,12 +23,12 @@ import (
 	"reflect"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/encoded"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
-	"github.com/apache/arrow/go/v15/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/encoded"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
+	"github.com/apache/arrow/go/v16/internal/utils"
 )
 
 // RunEndEncoded represents an array containing two children:
diff --git a/go/arrow/array/encoded_test.go b/go/arrow/array/encoded_test.go
index 5bfac7a1a96e6..b19ed9c3cca3d 100644
--- a/go/arrow/array/encoded_test.go
+++ b/go/arrow/array/encoded_test.go
@@ -20,10 +20,10 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
diff --git a/go/arrow/array/extension.go b/go/arrow/array/extension.go
index 021b8e7bc451b..b1d4178821c81 100644
--- a/go/arrow/array/extension.go
+++ b/go/arrow/array/extension.go
@@ -20,9 +20,9 @@ import (
 	"fmt"
 	"reflect"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // ExtensionArray is the interface that needs to be implemented to handle
diff --git a/go/arrow/array/extension_test.go b/go/arrow/array/extension_test.go
index a8e2b0dfd59bb..21dbdf6bd6c6d 100644
--- a/go/arrow/array/extension_test.go
+++ b/go/arrow/array/extension_test.go
@@ -19,10 +19,10 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/arrow/array/fixed_size_list.go b/go/arrow/array/fixed_size_list.go
index 5923d68590b15..29ec328401865 100644
--- a/go/arrow/array/fixed_size_list.go
+++ b/go/arrow/array/fixed_size_list.go
@@ -22,11 +22,11 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // FixedSizeList represents an immutable sequence of N array values.
diff --git a/go/arrow/array/fixed_size_list_test.go b/go/arrow/array/fixed_size_list_test.go
index 5c01199ddf987..64c2c65b9e2b4 100644
--- a/go/arrow/array/fixed_size_list_test.go
+++ b/go/arrow/array/fixed_size_list_test.go
@@ -20,9 +20,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/fixedsize_binary.go b/go/arrow/array/fixedsize_binary.go
index 6cdaeace939fd..b5c8994373dce 100644
--- a/go/arrow/array/fixedsize_binary.go
+++ b/go/arrow/array/fixedsize_binary.go
@@ -22,8 +22,8 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // A type which represents an immutable sequence of fixed-length binary strings.
diff --git a/go/arrow/array/fixedsize_binary_test.go b/go/arrow/array/fixedsize_binary_test.go
index 785725537cbdd..07a5c36f71a16 100644
--- a/go/arrow/array/fixedsize_binary_test.go
+++ b/go/arrow/array/fixedsize_binary_test.go
@@ -21,9 +21,9 @@ import (
 
 	"github.com/stretchr/testify/assert"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func TestFixedSizeBinary(t *testing.T) {
diff --git a/go/arrow/array/fixedsize_binarybuilder.go b/go/arrow/array/fixedsize_binarybuilder.go
index 230a65fd2d352..33548dcfa1975 100644
--- a/go/arrow/array/fixedsize_binarybuilder.go
+++ b/go/arrow/array/fixedsize_binarybuilder.go
@@ -23,10 +23,10 @@ import (
 	"reflect"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // A FixedSizeBinaryBuilder is used to build a FixedSizeBinary array using the Append methods.
diff --git a/go/arrow/array/fixedsize_binarybuilder_test.go b/go/arrow/array/fixedsize_binarybuilder_test.go
index 8e4a0ac1e46a7..8f82e27b4b876 100644
--- a/go/arrow/array/fixedsize_binarybuilder_test.go
+++ b/go/arrow/array/fixedsize_binarybuilder_test.go
@@ -19,8 +19,8 @@ package array
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/float16.go b/go/arrow/array/float16.go
index 4260f8e3774b4..6fe35f2401949 100644
--- a/go/arrow/array/float16.go
+++ b/go/arrow/array/float16.go
@@ -20,9 +20,9 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // A type which represents an immutable sequence of Float16 values.
diff --git a/go/arrow/array/float16_builder.go b/go/arrow/array/float16_builder.go
index 033b9fa2d8028..dafa8a5602352 100644
--- a/go/arrow/array/float16_builder.go
+++ b/go/arrow/array/float16_builder.go
@@ -23,12 +23,12 @@ import (
 	"strconv"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 type Float16Builder struct {
diff --git a/go/arrow/array/float16_builder_test.go b/go/arrow/array/float16_builder_test.go
index f8c5890179869..3826861bc4a4c 100644
--- a/go/arrow/array/float16_builder_test.go
+++ b/go/arrow/array/float16_builder_test.go
@@ -19,9 +19,9 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/interval.go b/go/arrow/array/interval.go
index 2a5529f1c30f7..7af9806dc15f7 100644
--- a/go/arrow/array/interval.go
+++ b/go/arrow/array/interval.go
@@ -23,11 +23,11 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 func NewIntervalData(data arrow.ArrayData) arrow.Array {
diff --git a/go/arrow/array/interval_test.go b/go/arrow/array/interval_test.go
index f83fc52dfa34e..6b6be3ef7a3e4 100644
--- a/go/arrow/array/interval_test.go
+++ b/go/arrow/array/interval_test.go
@@ -20,9 +20,9 @@ import (
 	"math"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/json_reader.go b/go/arrow/array/json_reader.go
index 2f05d4b70dd76..c8462798ad554 100644
--- a/go/arrow/array/json_reader.go
+++ b/go/arrow/array/json_reader.go
@@ -22,10 +22,10 @@ import (
 	"io"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 type Option func(config)
diff --git a/go/arrow/array/json_reader_test.go b/go/arrow/array/json_reader_test.go
index 7f12bf211dd04..f8398e91b562f 100644
--- a/go/arrow/array/json_reader_test.go
+++ b/go/arrow/array/json_reader_test.go
@@ -20,9 +20,9 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go
index 9d959b5e43b78..d383a73a196a0 100644
--- a/go/arrow/array/list.go
+++ b/go/arrow/array/list.go
@@ -22,11 +22,11 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 type ListLike interface {
diff --git a/go/arrow/array/list_test.go b/go/arrow/array/list_test.go
index 011b5d7426b22..37f1d3f489232 100644
--- a/go/arrow/array/list_test.go
+++ b/go/arrow/array/list_test.go
@@ -20,9 +20,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go
index fe07a68785067..da80ab016e727 100644
--- a/go/arrow/array/map.go
+++ b/go/arrow/array/map.go
@@ -20,9 +20,9 @@ import (
 	"bytes"
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // Map represents an immutable sequence of Key/Value structs. It is a
diff --git a/go/arrow/array/map_test.go b/go/arrow/array/map_test.go
index a7ecc032682bc..8cd874f0aadb3 100644
--- a/go/arrow/array/map_test.go
+++ b/go/arrow/array/map_test.go
@@ -20,9 +20,9 @@ import (
 	"strconv"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/null.go b/go/arrow/array/null.go
index 2735a88a92cb3..19b7c15f498f1 100644
--- a/go/arrow/array/null.go
+++ b/go/arrow/array/null.go
@@ -23,10 +23,10 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // Null represents an immutable, degenerate array with no physical storage.
diff --git a/go/arrow/array/null_test.go b/go/arrow/array/null_test.go
index 5d230ec5cec71..d4855e244a083 100644
--- a/go/arrow/array/null_test.go
+++ b/go/arrow/array/null_test.go
@@ -19,9 +19,9 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go
index 59c9a979768d5..0c7346ddf1964 100644
--- a/go/arrow/array/numeric.gen.go
+++ b/go/arrow/array/numeric.gen.go
@@ -23,8 +23,8 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // A type which represents an immutable sequence of int64 values.
diff --git a/go/arrow/array/numeric.gen.go.tmpl b/go/arrow/array/numeric.gen.go.tmpl
index 027456009daad..2ccb8d8b83679 100644
--- a/go/arrow/array/numeric.gen.go.tmpl
+++ b/go/arrow/array/numeric.gen.go.tmpl
@@ -21,8 +21,8 @@ import (
 	"strings"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 {{range .In}}
diff --git a/go/arrow/array/numeric_test.go b/go/arrow/array/numeric_test.go
index 91dd724c8da50..34cab789259fd 100644
--- a/go/arrow/array/numeric_test.go
+++ b/go/arrow/array/numeric_test.go
@@ -21,10 +21,10 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/numericbuilder.gen.go b/go/arrow/array/numericbuilder.gen.go
index 52b189d6ed453..5f79b87c73788 100644
--- a/go/arrow/array/numericbuilder.gen.go
+++ b/go/arrow/array/numericbuilder.gen.go
@@ -27,11 +27,11 @@ import (
 	"sync/atomic"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 type Int64Builder struct {
diff --git a/go/arrow/array/numericbuilder.gen.go.tmpl b/go/arrow/array/numericbuilder.gen.go.tmpl
index 82ac35465d424..95bc9accbe8ec 100644
--- a/go/arrow/array/numericbuilder.gen.go.tmpl
+++ b/go/arrow/array/numericbuilder.gen.go.tmpl
@@ -17,11 +17,11 @@
 package array
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 {{range .In}}
diff --git a/go/arrow/array/numericbuilder.gen_test.go b/go/arrow/array/numericbuilder.gen_test.go
index e1f72773403d8..10751e42de21a 100644
--- a/go/arrow/array/numericbuilder.gen_test.go
+++ b/go/arrow/array/numericbuilder.gen_test.go
@@ -21,9 +21,9 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/numericbuilder.gen_test.go.tmpl b/go/arrow/array/numericbuilder.gen_test.go.tmpl
index eddd884e2eddf..41f68715fddbc 100644
--- a/go/arrow/array/numericbuilder.gen_test.go.tmpl
+++ b/go/arrow/array/numericbuilder.gen_test.go.tmpl
@@ -19,9 +19,9 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/record.go b/go/arrow/array/record.go
index f25e7c9a874b3..b4a03410c4fbf 100644
--- a/go/arrow/array/record.go
+++ b/go/arrow/array/record.go
@@ -22,10 +22,10 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // RecordReader reads a stream of records.
diff --git a/go/arrow/array/record_test.go b/go/arrow/array/record_test.go
index 7d438d1f1f81e..c961c04af2c23 100644
--- a/go/arrow/array/record_test.go
+++ b/go/arrow/array/record_test.go
@@ -21,9 +21,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go
index c8517ba3056df..6453bcaf7bdc5 100644
--- a/go/arrow/array/string.go
+++ b/go/arrow/array/string.go
@@ -23,9 +23,9 @@ import (
 	"strings"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 type StringLike interface {
diff --git a/go/arrow/array/string_test.go b/go/arrow/array/string_test.go
index 3df56a2675252..8006bc4740b4d 100644
--- a/go/arrow/array/string_test.go
+++ b/go/arrow/array/string_test.go
@@ -21,10 +21,10 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go
index 5e723c9dc7553..dc6fab689735b 100644
--- a/go/arrow/array/struct.go
+++ b/go/arrow/array/struct.go
@@ -23,11 +23,11 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // Struct represents an ordered sequence of relative types.
diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go
index 1b0dc5a3e4b19..895d9508dcb8b 100644
--- a/go/arrow/array/struct_test.go
+++ b/go/arrow/array/struct_test.go
@@ -20,9 +20,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/table.go b/go/arrow/array/table.go
index b34650e182de4..197179b5ca4c3 100644
--- a/go/arrow/array/table.go
+++ b/go/arrow/array/table.go
@@ -23,8 +23,8 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 // NewColumnSlice returns a new zero-copy slice of the column with the indicated
diff --git a/go/arrow/array/table_test.go b/go/arrow/array/table_test.go
index 9535ae6b089bb..6ce69f66d67ac 100644
--- a/go/arrow/array/table_test.go
+++ b/go/arrow/array/table_test.go
@@ -22,9 +22,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func TestChunked(t *testing.T) {
diff --git a/go/arrow/array/timestamp.go b/go/arrow/array/timestamp.go
index 0cc46a127fc51..21a02dd402099 100644
--- a/go/arrow/array/timestamp.go
+++ b/go/arrow/array/timestamp.go
@@ -24,11 +24,11 @@ import (
 	"sync/atomic"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // Timestamp represents an immutable sequence of arrow.Timestamp values.
diff --git a/go/arrow/array/timestamp_test.go b/go/arrow/array/timestamp_test.go
index c172ad811dc37..5a6e470c58804 100644
--- a/go/arrow/array/timestamp_test.go
+++ b/go/arrow/array/timestamp_test.go
@@ -20,9 +20,9 @@ import (
 	"testing"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/union.go b/go/arrow/array/union.go
index 1af3e70472065..2400b9f88e0f6 100644
--- a/go/arrow/array/union.go
+++ b/go/arrow/array/union.go
@@ -25,12 +25,12 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 // Union is a convenience interface to encompass both Sparse and Dense
diff --git a/go/arrow/array/union_test.go b/go/arrow/array/union_test.go
index e876f5def26d3..45373b2e96d6a 100644
--- a/go/arrow/array/union_test.go
+++ b/go/arrow/array/union_test.go
@@ -21,9 +21,9 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
diff --git a/go/arrow/array/util.go b/go/arrow/array/util.go
index c9b730b040611..f4781ce4e3cbc 100644
--- a/go/arrow/array/util.go
+++ b/go/arrow/array/util.go
@@ -22,11 +22,11 @@ import (
 	"io"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/hashing"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/hashing"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 func min(a, b int) int {
diff --git a/go/arrow/array/util_test.go b/go/arrow/array/util_test.go
index 84a6debdf3946..262f45d356933 100644
--- a/go/arrow/array/util_test.go
+++ b/go/arrow/array/util_test.go
@@ -25,13 +25,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/arrio/arrio.go b/go/arrow/arrio/arrio.go
index 51cf6dc46d44a..6825b14cf403a 100644
--- a/go/arrow/arrio/arrio.go
+++ b/go/arrow/arrio/arrio.go
@@ -22,7 +22,7 @@ import (
 	"errors"
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 // Reader is the interface that wraps the Read method.
diff --git a/go/arrow/arrio/arrio_test.go b/go/arrow/arrio/arrio_test.go
index c80d5d2569d67..e05385eed2d17 100644
--- a/go/arrow/arrio/arrio_test.go
+++ b/go/arrow/arrio/arrio_test.go
@@ -22,11 +22,11 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/arrio"
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/arrio"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 type copyKind int
diff --git a/go/arrow/avro/avro2parquet/main.go b/go/arrow/avro/avro2parquet/main.go
index 45377b46a444c..f1c5eb36e9142 100644
--- a/go/arrow/avro/avro2parquet/main.go
+++ b/go/arrow/avro/avro2parquet/main.go
@@ -26,10 +26,10 @@ import (
 	"runtime/pprof"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow/avro"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/compress"
-	pq "github.com/apache/arrow/go/v15/parquet/pqarrow"
+	"github.com/apache/arrow/go/v16/arrow/avro"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/compress"
+	pq "github.com/apache/arrow/go/v16/parquet/pqarrow"
 )
 
 var (
diff --git a/go/arrow/avro/reader.go b/go/arrow/avro/reader.go
index e72a5632bdd6e..19a9023c57a68 100644
--- a/go/arrow/avro/reader.go
+++ b/go/arrow/avro/reader.go
@@ -23,10 +23,10 @@ import (
 	"io"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/hamba/avro/v2/ocf"
 	"github.com/tidwall/sjson"
 
diff --git a/go/arrow/avro/reader_test.go b/go/arrow/avro/reader_test.go
index e94d4f48fb933..375c653eee85b 100644
--- a/go/arrow/avro/reader_test.go
+++ b/go/arrow/avro/reader_test.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 	hamba "github.com/hamba/avro/v2"
 )
 
diff --git a/go/arrow/avro/reader_types.go b/go/arrow/avro/reader_types.go
index 974fea1f14e5a..484155a43a794 100644
--- a/go/arrow/avro/reader_types.go
+++ b/go/arrow/avro/reader_types.go
@@ -23,12 +23,12 @@ import (
 	"fmt"
 	"math/big"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
 )
 
 type dataLoader struct {
diff --git a/go/arrow/avro/schema.go b/go/arrow/avro/schema.go
index 32e37096c68f2..c9bb4388b8c15 100644
--- a/go/arrow/avro/schema.go
+++ b/go/arrow/avro/schema.go
@@ -22,9 +22,9 @@ import (
 	"math"
 	"strconv"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/internal/types"
 	avro "github.com/hamba/avro/v2"
 )
 
diff --git a/go/arrow/avro/schema_test.go b/go/arrow/avro/schema_test.go
index 08a3fe1ed7440..ca97debfbad4c 100644
--- a/go/arrow/avro/schema_test.go
+++ b/go/arrow/avro/schema_test.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 	hamba "github.com/hamba/avro/v2"
 )
 
diff --git a/go/arrow/bitutil/bitmaps.go b/go/arrow/bitutil/bitmaps.go
index 887a1920bc933..3e211c213014b 100644
--- a/go/arrow/bitutil/bitmaps.go
+++ b/go/arrow/bitutil/bitmaps.go
@@ -22,9 +22,9 @@ import (
 	"math/bits"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // BitmapReader is a simple bitmap reader for a byte slice.
diff --git a/go/arrow/bitutil/bitmaps_test.go b/go/arrow/bitutil/bitmaps_test.go
index c926bff39e09a..918db3c7ad437 100644
--- a/go/arrow/bitutil/bitmaps_test.go
+++ b/go/arrow/bitutil/bitmaps_test.go
@@ -22,8 +22,8 @@ import (
 	"strconv"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
diff --git a/go/arrow/bitutil/bitutil.go b/go/arrow/bitutil/bitutil.go
index dc510a8b374c4..82747ee1417b8 100644
--- a/go/arrow/bitutil/bitutil.go
+++ b/go/arrow/bitutil/bitutil.go
@@ -22,7 +22,7 @@ import (
 	"reflect"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 var (
diff --git a/go/arrow/bitutil/bitutil_test.go b/go/arrow/bitutil/bitutil_test.go
index 189c8541f4925..752da59a0c7b6 100644
--- a/go/arrow/bitutil/bitutil_test.go
+++ b/go/arrow/bitutil/bitutil_test.go
@@ -21,8 +21,8 @@ import (
 	"math/rand"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/tools"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go
index 64cc8456e8153..7e8f678f7e2d9 100644
--- a/go/arrow/cdata/cdata.go
+++ b/go/arrow/cdata/cdata.go
@@ -46,10 +46,10 @@ import (
 	"syscall"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go
index 9c7c238ffb7b4..d59c87712eedf 100644
--- a/go/arrow/cdata/cdata_exports.go
+++ b/go/arrow/cdata/cdata_exports.go
@@ -45,11 +45,11 @@ import (
 	"strings"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/internal"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/internal"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
 )
 
 func encodeCMetadata(keys, values []string) []byte {
diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go
index 607cfe53217a6..354e560ea31ed 100644
--- a/go/arrow/cdata/cdata_test.go
+++ b/go/arrow/cdata/cdata_test.go
@@ -35,12 +35,12 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/memory/mallocator"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/memory/mallocator"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go
index 1251b20201e41..2a4eec9b0e782 100644
--- a/go/arrow/cdata/cdata_test_framework.go
+++ b/go/arrow/cdata/cdata_test_framework.go
@@ -69,10 +69,10 @@ import (
 	"runtime/cgo"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/internal"
-	"github.com/apache/arrow/go/v15/arrow/memory/mallocator"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/internal"
+	"github.com/apache/arrow/go/v16/arrow/memory/mallocator"
 )
 
 const (
diff --git a/go/arrow/cdata/exports.go b/go/arrow/cdata/exports.go
index 7353df62d113a..f499908976e59 100644
--- a/go/arrow/cdata/exports.go
+++ b/go/arrow/cdata/exports.go
@@ -20,8 +20,8 @@ import (
 	"runtime/cgo"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 // #include <stdlib.h>
diff --git a/go/arrow/cdata/import_allocator.go b/go/arrow/cdata/import_allocator.go
index cf1c6a961ff37..2ba8429cdbc7f 100644
--- a/go/arrow/cdata/import_allocator.go
+++ b/go/arrow/cdata/import_allocator.go
@@ -20,7 +20,7 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 // #include "arrow/c/helpers.h"
diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go
index d55a068aa1564..3d9e7cb8ddd22 100644
--- a/go/arrow/cdata/interface.go
+++ b/go/arrow/cdata/interface.go
@@ -22,10 +22,10 @@ package cdata
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/arrio"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/arrio"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/arrow/cdata/test/test_cimport.go b/go/arrow/cdata/test/test_cimport.go
index 147c3691f0c71..8fd161c225eef 100644
--- a/go/arrow/cdata/test/test_cimport.go
+++ b/go/arrow/cdata/test/test_cimport.go
@@ -23,10 +23,10 @@ import (
 	"fmt"
 	"runtime"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/cdata"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/cdata"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // #include <stdint.h>
diff --git a/go/arrow/compute/arithmetic.go b/go/arrow/compute/arithmetic.go
index 052d79610bcba..ebc3124e43688 100644
--- a/go/arrow/compute/arithmetic.go
+++ b/go/arrow/compute/arithmetic.go
@@ -22,12 +22,12 @@ import (
 	"context"
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/compute/internal/kernels"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 )
 
 type (
diff --git a/go/arrow/compute/arithmetic_test.go b/go/arrow/compute/arithmetic_test.go
index 34c1bc6d98d65..f39d5a45a4c30 100644
--- a/go/arrow/compute/arithmetic_test.go
+++ b/go/arrow/compute/arithmetic_test.go
@@ -26,16 +26,16 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/compute/internal/kernels"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/klauspost/cpuid/v2"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
diff --git a/go/arrow/compute/cast.go b/go/arrow/compute/cast.go
index 385c0e6858968..f4b4a039ffc3c 100644
--- a/go/arrow/compute/cast.go
+++ b/go/arrow/compute/cast.go
@@ -23,11 +23,11 @@ import (
 	"fmt"
 	"sync"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/compute/internal/kernels"
 )
 
 var (
diff --git a/go/arrow/compute/cast_test.go b/go/arrow/compute/cast_test.go
index 10957a45167aa..149ff19ca4ef6 100644
--- a/go/arrow/compute/cast_test.go
+++ b/go/arrow/compute/cast_test.go
@@ -26,16 +26,16 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/compute/datum.go b/go/arrow/compute/datum.go
index 388cfa10156d5..43ed13fcfcc69 100644
--- a/go/arrow/compute/datum.go
+++ b/go/arrow/compute/datum.go
@@ -21,9 +21,9 @@ package compute
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 )
 
 //go:generate go run golang.org/x/tools/cmd/stringer -type=DatumKind -linecomment
diff --git a/go/arrow/compute/example_test.go b/go/arrow/compute/example_test.go
index e2b07b7e191ae..e2cc91c4b1aa9 100644
--- a/go/arrow/compute/example_test.go
+++ b/go/arrow/compute/example_test.go
@@ -23,11 +23,11 @@ import (
 	"fmt"
 	"log"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // This example demonstrates how to register a custom scalar function.
diff --git a/go/arrow/compute/exec.go b/go/arrow/compute/exec.go
index eba47e64bd509..31e7657b5a3aa 100644
--- a/go/arrow/compute/exec.go
+++ b/go/arrow/compute/exec.go
@@ -22,9 +22,9 @@ import (
 	"context"
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 func haveChunkedArray(values []Datum) bool {
diff --git a/go/arrow/compute/exec/kernel.go b/go/arrow/compute/exec/kernel.go
index 766857f63e565..20cc617a1ae27 100644
--- a/go/arrow/compute/exec/kernel.go
+++ b/go/arrow/compute/exec/kernel.go
@@ -24,10 +24,10 @@ import (
 	"hash/maphash"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"golang.org/x/exp/slices"
 )
 
diff --git a/go/arrow/compute/exec/kernel_test.go b/go/arrow/compute/exec/kernel_test.go
index 4df6b42ff9408..a07b993c6adf1 100644
--- a/go/arrow/compute/exec/kernel_test.go
+++ b/go/arrow/compute/exec/kernel_test.go
@@ -22,12 +22,12 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/compute/exec/span.go b/go/arrow/compute/exec/span.go
index 55753de9e0e73..6f9bb240e3469 100644
--- a/go/arrow/compute/exec/span.go
+++ b/go/arrow/compute/exec/span.go
@@ -23,11 +23,11 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 )
 
 // BufferSpan is a lightweight Buffer holder for ArraySpans that does not
diff --git a/go/arrow/compute/exec/span_test.go b/go/arrow/compute/exec/span_test.go
index 474c005b44642..a814d150831d6 100644
--- a/go/arrow/compute/exec/span_test.go
+++ b/go/arrow/compute/exec/span_test.go
@@ -24,14 +24,14 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/compute/exec/utils.go b/go/arrow/compute/exec/utils.go
index 1b5e69a502cfd..868161010bf59 100644
--- a/go/arrow/compute/exec/utils.go
+++ b/go/arrow/compute/exec/utils.go
@@ -24,10 +24,10 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"golang.org/x/exp/constraints"
 	"golang.org/x/exp/slices"
 )
diff --git a/go/arrow/compute/exec/utils_test.go b/go/arrow/compute/exec/utils_test.go
index 345d6dcf3b4c4..2504286b5e240 100644
--- a/go/arrow/compute/exec/utils_test.go
+++ b/go/arrow/compute/exec/utils_test.go
@@ -21,10 +21,10 @@ package exec_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/compute/exec_internals_test.go b/go/arrow/compute/exec_internals_test.go
index bae32268862ff..0a6be416076fc 100644
--- a/go/arrow/compute/exec_internals_test.go
+++ b/go/arrow/compute/exec_internals_test.go
@@ -24,13 +24,13 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/arrow/compute/exec_test.go b/go/arrow/compute/exec_test.go
index a37f67c03e8ce..0fbfa55e548f0 100644
--- a/go/arrow/compute/exec_test.go
+++ b/go/arrow/compute/exec_test.go
@@ -22,12 +22,12 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/arrow/compute/executor.go b/go/arrow/compute/executor.go
index b8144a4a8de87..53c9ec155528b 100644
--- a/go/arrow/compute/executor.go
+++ b/go/arrow/compute/executor.go
@@ -25,14 +25,14 @@ import (
 	"runtime"
 	"sync"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/internal"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/internal"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 )
 
 // ExecCtx holds simple contextual information for execution
diff --git a/go/arrow/compute/expression.go b/go/arrow/compute/expression.go
index fbb6c502d98ab..a81cb24b5a575 100644
--- a/go/arrow/compute/expression.go
+++ b/go/arrow/compute/expression.go
@@ -28,14 +28,14 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/compute/internal/kernels"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 )
 
 var hashSeed = maphash.MakeSeed()
diff --git a/go/arrow/compute/expression_test.go b/go/arrow/compute/expression_test.go
index b3b44ae1faa61..8964cb8af0696 100644
--- a/go/arrow/compute/expression_test.go
+++ b/go/arrow/compute/expression_test.go
@@ -22,11 +22,11 @@ package compute_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/compute/exprs/builders.go b/go/arrow/compute/exprs/builders.go
index ebe1d4c27385b..8b273a7a6bb1d 100644
--- a/go/arrow/compute/exprs/builders.go
+++ b/go/arrow/compute/exprs/builders.go
@@ -25,8 +25,8 @@ import (
 	"strings"
 	"unicode"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute"
 	"github.com/substrait-io/substrait-go/expr"
 	"github.com/substrait-io/substrait-go/extensions"
 	"github.com/substrait-io/substrait-go/types"
diff --git a/go/arrow/compute/exprs/builders_test.go b/go/arrow/compute/exprs/builders_test.go
index 69501622359d5..d78eb7f1c7cda 100644
--- a/go/arrow/compute/exprs/builders_test.go
+++ b/go/arrow/compute/exprs/builders_test.go
@@ -21,8 +21,8 @@ package exprs_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/exprs"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/exprs"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/substrait-io/substrait-go/expr"
diff --git a/go/arrow/compute/exprs/exec.go b/go/arrow/compute/exprs/exec.go
index d43703f5c1c8b..6a780be32f019 100644
--- a/go/arrow/compute/exprs/exec.go
+++ b/go/arrow/compute/exprs/exec.go
@@ -23,15 +23,15 @@ import (
 	"fmt"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/substrait-io/substrait-go/expr"
 	"github.com/substrait-io/substrait-go/extensions"
 	"github.com/substrait-io/substrait-go/types"
diff --git a/go/arrow/compute/exprs/exec_internal_test.go b/go/arrow/compute/exprs/exec_internal_test.go
index 680bf36f11958..2525d1c8c7cae 100644
--- a/go/arrow/compute/exprs/exec_internal_test.go
+++ b/go/arrow/compute/exprs/exec_internal_test.go
@@ -23,10 +23,10 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/compute/exprs/exec_test.go b/go/arrow/compute/exprs/exec_test.go
index 65cafc8e62ddb..cf7aac9a9b2e8 100644
--- a/go/arrow/compute/exprs/exec_test.go
+++ b/go/arrow/compute/exprs/exec_test.go
@@ -23,12 +23,12 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/compute/exprs"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/compute/exprs"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/substrait-io/substrait-go/expr"
diff --git a/go/arrow/compute/exprs/extension_types.go b/go/arrow/compute/exprs/extension_types.go
index db7992b8f089f..24031eea66def 100644
--- a/go/arrow/compute/exprs/extension_types.go
+++ b/go/arrow/compute/exprs/extension_types.go
@@ -24,8 +24,8 @@ import (
 	"reflect"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 type simpleExtensionTypeFactory[P comparable] struct {
diff --git a/go/arrow/compute/exprs/field_refs.go b/go/arrow/compute/exprs/field_refs.go
index e95e3c8c9abb9..941159bd47443 100644
--- a/go/arrow/compute/exprs/field_refs.go
+++ b/go/arrow/compute/exprs/field_refs.go
@@ -21,11 +21,11 @@ package exprs
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/substrait-io/substrait-go/expr"
 )
 
diff --git a/go/arrow/compute/exprs/types.go b/go/arrow/compute/exprs/types.go
index 87e08233cc00a..dee0ad9eb91ab 100644
--- a/go/arrow/compute/exprs/types.go
+++ b/go/arrow/compute/exprs/types.go
@@ -24,8 +24,8 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute"
 	"github.com/substrait-io/substrait-go/expr"
 	"github.com/substrait-io/substrait-go/extensions"
 	"github.com/substrait-io/substrait-go/types"
diff --git a/go/arrow/compute/fieldref.go b/go/arrow/compute/fieldref.go
index 036e1e355ed75..ab6d856f85f0d 100644
--- a/go/arrow/compute/fieldref.go
+++ b/go/arrow/compute/fieldref.go
@@ -27,8 +27,8 @@ import (
 	"unicode"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 var (
diff --git a/go/arrow/compute/fieldref_test.go b/go/arrow/compute/fieldref_test.go
index 72985012e4a92..1ddb3267ff1b3 100644
--- a/go/arrow/compute/fieldref_test.go
+++ b/go/arrow/compute/fieldref_test.go
@@ -19,10 +19,10 @@ package compute_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/compute/functions.go b/go/arrow/compute/functions.go
index f35d9facaf2d3..e1c7b66d822e7 100644
--- a/go/arrow/compute/functions.go
+++ b/go/arrow/compute/functions.go
@@ -23,8 +23,8 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
 )
 
 type Function interface {
diff --git a/go/arrow/compute/functions_test.go b/go/arrow/compute/functions_test.go
index 1e6bbd598e0c9..f07b5dc1eb589 100644
--- a/go/arrow/compute/functions_test.go
+++ b/go/arrow/compute/functions_test.go
@@ -21,8 +21,8 @@ package compute_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/compute/internal/kernels/base_arithmetic.go b/go/arrow/compute/internal/kernels/base_arithmetic.go
index b795c04c39ead..009251da87dc7 100644
--- a/go/arrow/compute/internal/kernels/base_arithmetic.go
+++ b/go/arrow/compute/internal/kernels/base_arithmetic.go
@@ -24,11 +24,11 @@ import (
 	"math/bits"
 
 	"github.com/JohnCGriffin/overflow"
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go
index 51b1866fb68fa..7800a3abd7bb3 100644
--- a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go
+++ b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go
@@ -21,9 +21,9 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 	"golang.org/x/exp/constraints"
 	"golang.org/x/sys/cpu"
 )
diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go
index 6814e834b4bf3..b49ebf0060e01 100644
--- a/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go
+++ b/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 //go:noescape
diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go
index 633ec5f4f18e5..da02db2c90f78 100644
--- a/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go
+++ b/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 //go:noescape
diff --git a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go
index 2c1559fe0f0fd..f77408c51d756 100644
--- a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go
+++ b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go
@@ -19,8 +19,8 @@
 package kernels
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/compute/internal/kernels/boolean_cast.go b/go/arrow/compute/internal/kernels/boolean_cast.go
index 6109d25790940..30ff952795180 100644
--- a/go/arrow/compute/internal/kernels/boolean_cast.go
+++ b/go/arrow/compute/internal/kernels/boolean_cast.go
@@ -22,9 +22,9 @@ import (
 	"strconv"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
 )
 
 func isNonZero[T arrow.FixedWidthType](ctx *exec.KernelCtx, in []T, out []byte) error {
diff --git a/go/arrow/compute/internal/kernels/cast.go b/go/arrow/compute/internal/kernels/cast.go
index bc4ee3abd128c..a2676c7f7182a 100644
--- a/go/arrow/compute/internal/kernels/cast.go
+++ b/go/arrow/compute/internal/kernels/cast.go
@@ -19,9 +19,9 @@
 package kernels
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
 )
 
 type CastOptions struct {
diff --git a/go/arrow/compute/internal/kernels/cast_numeric.go b/go/arrow/compute/internal/kernels/cast_numeric.go
index 2e893c7205f6a..b77f2177a04f3 100644
--- a/go/arrow/compute/internal/kernels/cast_numeric.go
+++ b/go/arrow/compute/internal/kernels/cast_numeric.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 var castNumericUnsafe func(itype, otype arrow.Type, in, out []byte, len int) = castNumericGo
diff --git a/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go b/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go
index eafa4b41001ae..1609983740d00 100644
--- a/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go
+++ b/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 //go:noescape
diff --git a/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go b/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go
index 75ad79fd0e7f3..7e00d3075cfcb 100644
--- a/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go
+++ b/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 	"golang.org/x/sys/cpu"
 )
 
diff --git a/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go b/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go
index d04f393c4c099..4f0f6c2bc424e 100644
--- a/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go
+++ b/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 //go:noescape
diff --git a/go/arrow/compute/internal/kernels/cast_temporal.go b/go/arrow/compute/internal/kernels/cast_temporal.go
index 48e2bfb6cada1..3e6edb6a38918 100644
--- a/go/arrow/compute/internal/kernels/cast_temporal.go
+++ b/go/arrow/compute/internal/kernels/cast_temporal.go
@@ -24,10 +24,10 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 const millisecondsInDay = 86400000
diff --git a/go/arrow/compute/internal/kernels/helpers.go b/go/arrow/compute/internal/kernels/helpers.go
index 1ac09ba43bfb5..d47885d2f4084 100644
--- a/go/arrow/compute/internal/kernels/helpers.go
+++ b/go/arrow/compute/internal/kernels/helpers.go
@@ -22,13 +22,13 @@ import (
 	"fmt"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/compute/internal/kernels/numeric_cast.go b/go/arrow/compute/internal/kernels/numeric_cast.go
index d31edfdd3087c..4667abd4751ae 100644
--- a/go/arrow/compute/internal/kernels/numeric_cast.go
+++ b/go/arrow/compute/internal/kernels/numeric_cast.go
@@ -23,13 +23,13 @@ import (
 	"strconv"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/compute/internal/kernels/rounding.go b/go/arrow/compute/internal/kernels/rounding.go
index 345c779085fe8..e9d547d6f1e9f 100644
--- a/go/arrow/compute/internal/kernels/rounding.go
+++ b/go/arrow/compute/internal/kernels/rounding.go
@@ -22,11 +22,11 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/compute/internal/kernels/scalar_arithmetic.go b/go/arrow/compute/internal/kernels/scalar_arithmetic.go
index f1ed21065e404..14bf36ee7ba07 100644
--- a/go/arrow/compute/internal/kernels/scalar_arithmetic.go
+++ b/go/arrow/compute/internal/kernels/scalar_arithmetic.go
@@ -22,13 +22,13 @@ import (
 	"fmt"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 )
 
 // scalar kernel that ignores (assumed all-null inputs) and returns null
diff --git a/go/arrow/compute/internal/kernels/scalar_boolean.go b/go/arrow/compute/internal/kernels/scalar_boolean.go
index 0707c92e6a198..6005d45c102fb 100644
--- a/go/arrow/compute/internal/kernels/scalar_boolean.go
+++ b/go/arrow/compute/internal/kernels/scalar_boolean.go
@@ -19,9 +19,9 @@
 package kernels
 
 import (
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 )
 
 type computeWordFN func(leftTrue, leftFalse, rightTrue, rightFalse uint64) (outValid, outData uint64)
diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go
index 52cd2c31a2aa4..93862ce617eae 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 	"golang.org/x/sys/cpu"
 )
 
diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go
index cf9fc1eeedbb8..3f0066dc59159 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 //go:noescape
diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go
index b36524baa126b..a4811981953bd 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go
@@ -18,7 +18,7 @@
 
 package kernels
 
-import "github.com/apache/arrow/go/v15/arrow"
+import "github.com/apache/arrow/go/v16/arrow"
 
 func genCompareKernel[T arrow.NumericType](op CompareOperator) *CompareData {
 	return genGoCompareKernel(getCmpOp[T](op))
diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go
index f8b36a1e4be76..a47668c3e725c 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 //go:noescape
diff --git a/go/arrow/compute/internal/kernels/scalar_comparisons.go b/go/arrow/compute/internal/kernels/scalar_comparisons.go
index 29e6db29cb267..1c8418020afa1 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparisons.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparisons.go
@@ -23,14 +23,14 @@ import (
 	"fmt"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
 )
 
 type binaryKernel func(left, right, out []byte, offset int)
diff --git a/go/arrow/compute/internal/kernels/string_casts.go b/go/arrow/compute/internal/kernels/string_casts.go
index d9cf52320b3aa..21bfe7bd87e00 100644
--- a/go/arrow/compute/internal/kernels/string_casts.go
+++ b/go/arrow/compute/internal/kernels/string_casts.go
@@ -23,12 +23,12 @@ import (
 	"strconv"
 	"unicode/utf8"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
 )
 
 func validateUtf8Fsb(input *exec.ArraySpan) error {
diff --git a/go/arrow/compute/internal/kernels/types.go b/go/arrow/compute/internal/kernels/types.go
index 481eab36059f8..0237e1a9994b6 100644
--- a/go/arrow/compute/internal/kernels/types.go
+++ b/go/arrow/compute/internal/kernels/types.go
@@ -21,10 +21,10 @@ package kernels
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 )
 
 var (
diff --git a/go/arrow/compute/internal/kernels/vector_hash.go b/go/arrow/compute/internal/kernels/vector_hash.go
index f6c9a7f39db93..5c9ed0a8c712e 100644
--- a/go/arrow/compute/internal/kernels/vector_hash.go
+++ b/go/arrow/compute/internal/kernels/vector_hash.go
@@ -21,13 +21,13 @@ package kernels
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	"github.com/apache/arrow/go/v15/internal/hashing"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	"github.com/apache/arrow/go/v16/internal/hashing"
 )
 
 type HashState interface {
diff --git a/go/arrow/compute/internal/kernels/vector_run_end_encode.go b/go/arrow/compute/internal/kernels/vector_run_end_encode.go
index 017b9712025b7..6275a61510993 100644
--- a/go/arrow/compute/internal/kernels/vector_run_end_encode.go
+++ b/go/arrow/compute/internal/kernels/vector_run_end_encode.go
@@ -24,14 +24,14 @@ import (
 	"sort"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 type RunEndEncodeState struct {
diff --git a/go/arrow/compute/internal/kernels/vector_selection.go b/go/arrow/compute/internal/kernels/vector_selection.go
index f08bb4100bf88..7734b7b123eaa 100644
--- a/go/arrow/compute/internal/kernels/vector_selection.go
+++ b/go/arrow/compute/internal/kernels/vector_selection.go
@@ -22,13 +22,13 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
 )
 
 type NullSelectionBehavior int8
diff --git a/go/arrow/compute/registry.go b/go/arrow/compute/registry.go
index 379e0ccbe86a9..dc89fe1a621e0 100644
--- a/go/arrow/compute/registry.go
+++ b/go/arrow/compute/registry.go
@@ -21,7 +21,7 @@ package compute
 import (
 	"sync"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 	"golang.org/x/exp/maps"
 	"golang.org/x/exp/slices"
 )
diff --git a/go/arrow/compute/registry_test.go b/go/arrow/compute/registry_test.go
index b725091090434..16e51514f8b2a 100644
--- a/go/arrow/compute/registry_test.go
+++ b/go/arrow/compute/registry_test.go
@@ -23,9 +23,9 @@ import (
 	"errors"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
 	"github.com/stretchr/testify/assert"
 	"golang.org/x/exp/slices"
 )
diff --git a/go/arrow/compute/scalar_bool.go b/go/arrow/compute/scalar_bool.go
index 1f28a6e2bfcb2..a7e85dfeb479c 100644
--- a/go/arrow/compute/scalar_bool.go
+++ b/go/arrow/compute/scalar_bool.go
@@ -21,9 +21,9 @@ package compute
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/compute/internal/kernels"
 )
 
 var (
diff --git a/go/arrow/compute/scalar_bool_test.go b/go/arrow/compute/scalar_bool_test.go
index bd4f3c5c0df2b..dd9bfbbd8a689 100644
--- a/go/arrow/compute/scalar_bool_test.go
+++ b/go/arrow/compute/scalar_bool_test.go
@@ -23,11 +23,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/stretchr/testify/require"
 )
 
diff --git a/go/arrow/compute/scalar_compare.go b/go/arrow/compute/scalar_compare.go
index 24a4191a10999..6150e610b9a3d 100644
--- a/go/arrow/compute/scalar_compare.go
+++ b/go/arrow/compute/scalar_compare.go
@@ -21,9 +21,9 @@ package compute
 import (
 	"context"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/compute/internal/kernels"
 )
 
 type compareFunction struct {
diff --git a/go/arrow/compute/scalar_compare_test.go b/go/arrow/compute/scalar_compare_test.go
index 1fa0591692ecb..0e08fc11dcbca 100644
--- a/go/arrow/compute/scalar_compare_test.go
+++ b/go/arrow/compute/scalar_compare_test.go
@@ -24,15 +24,15 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/compute/internal/kernels"
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/compute/selection.go b/go/arrow/compute/selection.go
index 90bc5280ef2a7..77d76b95863c2 100644
--- a/go/arrow/compute/selection.go
+++ b/go/arrow/compute/selection.go
@@ -22,10 +22,10 @@ import (
 	"context"
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/compute/internal/kernels"
 	"golang.org/x/sync/errgroup"
 )
 
diff --git a/go/arrow/compute/utils.go b/go/arrow/compute/utils.go
index 9de6523fd0b5f..9c9b63955eb02 100644
--- a/go/arrow/compute/utils.go
+++ b/go/arrow/compute/utils.go
@@ -24,12 +24,12 @@ import (
 	"math"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/compute/internal/kernels"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/arrow/compute/vector_hash.go b/go/arrow/compute/vector_hash.go
index 144c123380fe2..e7d7aaa5a7562 100644
--- a/go/arrow/compute/vector_hash.go
+++ b/go/arrow/compute/vector_hash.go
@@ -21,8 +21,8 @@ package compute
 import (
 	"context"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/internal/kernels"
 )
 
 var (
diff --git a/go/arrow/compute/vector_hash_test.go b/go/arrow/compute/vector_hash_test.go
index c37db584805d0..ba604f31e9fca 100644
--- a/go/arrow/compute/vector_hash_test.go
+++ b/go/arrow/compute/vector_hash_test.go
@@ -23,12 +23,12 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/compute/vector_run_end_test.go b/go/arrow/compute/vector_run_end_test.go
index 51c0f834ceb6a..2c4170e07cd1c 100644
--- a/go/arrow/compute/vector_run_end_test.go
+++ b/go/arrow/compute/vector_run_end_test.go
@@ -25,13 +25,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/arrow/compute/vector_run_ends.go b/go/arrow/compute/vector_run_ends.go
index 3e47c67de3c35..8ce369241739f 100644
--- a/go/arrow/compute/vector_run_ends.go
+++ b/go/arrow/compute/vector_run_ends.go
@@ -21,8 +21,8 @@ package compute
 import (
 	"context"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/compute/internal/kernels"
 )
 
 var (
diff --git a/go/arrow/compute/vector_selection_test.go b/go/arrow/compute/vector_selection_test.go
index 4e38bc995cdfc..c49f751fd0526 100644
--- a/go/arrow/compute/vector_selection_test.go
+++ b/go/arrow/compute/vector_selection_test.go
@@ -24,15 +24,15 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/compute/exec"
-	"github.com/apache/arrow/go/v15/arrow/compute/internal/kernels"
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/compute/exec"
+	"github.com/apache/arrow/go/v16/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go
index 31ca61f323d36..173d8559c7e26 100644
--- a/go/arrow/csv/common.go
+++ b/go/arrow/csv/common.go
@@ -23,8 +23,8 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 var (
diff --git a/go/arrow/csv/reader.go b/go/arrow/csv/reader.go
index e58b426d837fb..05e79e6500fc4 100644
--- a/go/arrow/csv/reader.go
+++ b/go/arrow/csv/reader.go
@@ -29,13 +29,13 @@ import (
 	"time"
 	"unicode/utf8"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // Reader wraps encoding/csv.Reader and creates array.Records from a schema.
diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go
index dfcb6625bd7cc..7e131c6dfe0b7 100644
--- a/go/arrow/csv/reader_test.go
+++ b/go/arrow/csv/reader_test.go
@@ -25,13 +25,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/csv"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/csv"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go
index 78b16446d4def..6f66fdb474f63 100644
--- a/go/arrow/csv/transformer.go
+++ b/go/arrow/csv/transformer.go
@@ -25,8 +25,8 @@ import (
 	"math/big"
 	"strconv"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, stringsReplacer func(string)string) []string {
diff --git a/go/arrow/csv/writer.go b/go/arrow/csv/writer.go
index b939b72984b0f..d99f4c86c3785 100644
--- a/go/arrow/csv/writer.go
+++ b/go/arrow/csv/writer.go
@@ -22,7 +22,7 @@ import (
 	"strconv"
 	"sync"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 // Writer wraps encoding/csv.Writer and writes arrow.Record based on a schema.
diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go
index b1bd3251c5622..9ca5e063989a4 100644
--- a/go/arrow/csv/writer_test.go
+++ b/go/arrow/csv/writer_test.go
@@ -26,14 +26,14 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/csv"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/csv"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"github.com/google/uuid"
 )
 
diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go
index b2f2329e5ccaa..f8fffe5d36399 100644
--- a/go/arrow/datatype.go
+++ b/go/arrow/datatype.go
@@ -21,7 +21,7 @@ import (
 	"hash/maphash"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 // Type is a logical type. They can be expressed as
diff --git a/go/arrow/datatype_binary_test.go b/go/arrow/datatype_binary_test.go
index 083d69ee3e5d4..48c4f0c839079 100644
--- a/go/arrow/datatype_binary_test.go
+++ b/go/arrow/datatype_binary_test.go
@@ -19,7 +19,7 @@ package arrow_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 func TestBinaryType(t *testing.T) {
diff --git a/go/arrow/datatype_extension_test.go b/go/arrow/datatype_extension_test.go
index 9811800400506..92d04b5c3db7a 100644
--- a/go/arrow/datatype_extension_test.go
+++ b/go/arrow/datatype_extension_test.go
@@ -20,8 +20,8 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go
index 9248dcb8c9a35..21edd41d232e4 100644
--- a/go/arrow/datatype_fixedwidth.go
+++ b/go/arrow/datatype_fixedwidth.go
@@ -22,7 +22,7 @@ import (
 	"sync"
 	"time"
 
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/internal/json"
 
 	"golang.org/x/xerrors"
 )
diff --git a/go/arrow/datatype_fixedwidth_test.go b/go/arrow/datatype_fixedwidth_test.go
index d6caa21e1a255..2ef05554ad86c 100644
--- a/go/arrow/datatype_fixedwidth_test.go
+++ b/go/arrow/datatype_fixedwidth_test.go
@@ -21,7 +21,7 @@ import (
 	"testing"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go
index 9a8873a50eb1a..1a31eb6312ba9 100644
--- a/go/arrow/datatype_nested.go
+++ b/go/arrow/datatype_nested.go
@@ -22,7 +22,7 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 type (
diff --git a/go/arrow/datatype_null_test.go b/go/arrow/datatype_null_test.go
index 57cddfadb8ef8..b12d1fc60db6a 100644
--- a/go/arrow/datatype_null_test.go
+++ b/go/arrow/datatype_null_test.go
@@ -19,7 +19,7 @@ package arrow_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 func TestNullType(t *testing.T) {
diff --git a/go/arrow/datatype_viewheader.go b/go/arrow/datatype_viewheader.go
index 54b9256b34604..bbd7f977db809 100644
--- a/go/arrow/datatype_viewheader.go
+++ b/go/arrow/datatype_viewheader.go
@@ -20,9 +20,9 @@ import (
 	"bytes"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 const (
diff --git a/go/arrow/datatype_viewheader_inline.go b/go/arrow/datatype_viewheader_inline.go
index 89ac1d06adcdf..9ffc0a1f98339 100644
--- a/go/arrow/datatype_viewheader_inline.go
+++ b/go/arrow/datatype_viewheader_inline.go
@@ -21,7 +21,7 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 func (sh *ViewHeader) InlineString() (data string) {
diff --git a/go/arrow/datatype_viewheader_inline_go1.19.go b/go/arrow/datatype_viewheader_inline_go1.19.go
index aec66009d9492..3933f44df065c 100644
--- a/go/arrow/datatype_viewheader_inline_go1.19.go
+++ b/go/arrow/datatype_viewheader_inline_go1.19.go
@@ -22,7 +22,7 @@ import (
 	"reflect"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 func (sh *ViewHeader) InlineString() (data string) {
diff --git a/go/arrow/datatype_viewheader_inline_tinygo.go b/go/arrow/datatype_viewheader_inline_tinygo.go
index bff63a273a722..3c0dea4a72587 100644
--- a/go/arrow/datatype_viewheader_inline_tinygo.go
+++ b/go/arrow/datatype_viewheader_inline_tinygo.go
@@ -22,7 +22,7 @@ import (
 	"reflect"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 func (sh *ViewHeader) InlineString() (data string) {
diff --git a/go/arrow/decimal128/decimal128.go b/go/arrow/decimal128/decimal128.go
index 7ce8cd51b0717..68b4d66e1bd38 100644
--- a/go/arrow/decimal128/decimal128.go
+++ b/go/arrow/decimal128/decimal128.go
@@ -23,7 +23,7 @@ import (
 	"math/big"
 	"math/bits"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 const (
diff --git a/go/arrow/decimal128/decimal128_test.go b/go/arrow/decimal128/decimal128_test.go
index 05af1f557f1f9..a36ae0622aa98 100644
--- a/go/arrow/decimal128/decimal128_test.go
+++ b/go/arrow/decimal128/decimal128_test.go
@@ -22,7 +22,7 @@ import (
 	"math/big"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/decimal256/decimal256.go b/go/arrow/decimal256/decimal256.go
index 65be0df9dd424..cb1a41c84b5a6 100644
--- a/go/arrow/decimal256/decimal256.go
+++ b/go/arrow/decimal256/decimal256.go
@@ -23,8 +23,8 @@ import (
 	"math/big"
 	"math/bits"
 
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 const (
diff --git a/go/arrow/decimal256/decimal256_test.go b/go/arrow/decimal256/decimal256_test.go
index 3be6a1944406f..3e0a97e430b45 100644
--- a/go/arrow/decimal256/decimal256_test.go
+++ b/go/arrow/decimal256/decimal256_test.go
@@ -23,7 +23,7 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/doc.go b/go/arrow/doc.go
index c8da330e4f8a1..2f7c8c2acf1ce 100644
--- a/go/arrow/doc.go
+++ b/go/arrow/doc.go
@@ -36,7 +36,7 @@ To build with tinygo include the noasm build tag.
 */
 package arrow
 
-const PkgVersion = "15.0.0-SNAPSHOT"
+const PkgVersion = "16.0.0-SNAPSHOT"
 
 //go:generate go run _tools/tmpl/main.go -i -data=numeric.tmpldata type_traits_numeric.gen.go.tmpl type_traits_numeric.gen_test.go.tmpl array/numeric.gen.go.tmpl array/numericbuilder.gen.go.tmpl array/bufferbuilder_numeric.gen.go.tmpl
 //go:generate go run _tools/tmpl/main.go -i -data=datatype_numeric.gen.go.tmpldata datatype_numeric.gen.go.tmpl tensor/numeric.gen.go.tmpl tensor/numeric.gen_test.go.tmpl
diff --git a/go/arrow/encoded/ree_utils.go b/go/arrow/encoded/ree_utils.go
index 2ff4e7cadd38b..daa265369ca7b 100644
--- a/go/arrow/encoded/ree_utils.go
+++ b/go/arrow/encoded/ree_utils.go
@@ -20,7 +20,7 @@ import (
 	"math"
 	"sort"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 // FindPhysicalIndex performs a binary search on the run-ends to return
diff --git a/go/arrow/encoded/ree_utils_test.go b/go/arrow/encoded/ree_utils_test.go
index 57d11e9b4edbe..454ab2810008c 100644
--- a/go/arrow/encoded/ree_utils_test.go
+++ b/go/arrow/encoded/ree_utils_test.go
@@ -21,10 +21,10 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/encoded"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/encoded"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/endian/endian.go b/go/arrow/endian/endian.go
index d4865025b1fb0..7b7ce57f5d561 100644
--- a/go/arrow/endian/endian.go
+++ b/go/arrow/endian/endian.go
@@ -17,8 +17,8 @@
 package endian
 
 import (
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
 )
 
 type Endianness flatbuf.Endianness
diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go
index b28cc9093739f..9b7ae11d451ad 100644
--- a/go/arrow/example_test.go
+++ b/go/arrow/example_test.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"log"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/tensor"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/tensor"
 )
 
 // This example demonstrates how to build an array of int64 values using a builder and Append.
diff --git a/go/arrow/flight/basic_auth_flight_test.go b/go/arrow/flight/basic_auth_flight_test.go
index 4097bf02edeff..e1cc6c1cd7b3c 100755
--- a/go/arrow/flight/basic_auth_flight_test.go
+++ b/go/arrow/flight/basic_auth_flight_test.go
@@ -22,7 +22,7 @@ import (
 	"io"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/credentials/insecure"
diff --git a/go/arrow/flight/client.go b/go/arrow/flight/client.go
index 4f44bdc5ebd58..312c8a76b6f0e 100644
--- a/go/arrow/flight/client.go
+++ b/go/arrow/flight/client.go
@@ -26,7 +26,7 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/gen/flight"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/metadata"
diff --git a/go/arrow/flight/cookie_middleware_test.go b/go/arrow/flight/cookie_middleware_test.go
index bdfe4fb32c1a5..0adf4927652d4 100644
--- a/go/arrow/flight/cookie_middleware_test.go
+++ b/go/arrow/flight/cookie_middleware_test.go
@@ -28,7 +28,7 @@ import (
 	"testing"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"google.golang.org/grpc"
diff --git a/go/arrow/flight/example_flight_server_test.go b/go/arrow/flight/example_flight_server_test.go
index d3a71b3b67ade..af8094a953867 100755
--- a/go/arrow/flight/example_flight_server_test.go
+++ b/go/arrow/flight/example_flight_server_test.go
@@ -23,7 +23,7 @@ import (
 	"io"
 	"log"
 
-	"github.com/apache/arrow/go/v15/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/credentials/insecure"
diff --git a/go/arrow/flight/flight_middleware_test.go b/go/arrow/flight/flight_middleware_test.go
index e6faa1b7df1d2..a99436c59d8ed 100755
--- a/go/arrow/flight/flight_middleware_test.go
+++ b/go/arrow/flight/flight_middleware_test.go
@@ -23,8 +23,8 @@ import (
 	sync "sync"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"google.golang.org/grpc"
diff --git a/go/arrow/flight/flight_test.go b/go/arrow/flight/flight_test.go
index cd37658603d5b..d023704276eab 100755
--- a/go/arrow/flight/flight_test.go
+++ b/go/arrow/flight/flight_test.go
@@ -23,11 +23,11 @@ import (
 	"io"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/credentials/insecure"
diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go
index 89784b483b01b..928118cf299c1 100644
--- a/go/arrow/flight/flightsql/client.go
+++ b/go/arrow/flight/flightsql/client.go
@@ -22,12 +22,12 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	pb "github.com/apache/arrow/go/v15/arrow/flight/gen/flight"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	pb "github.com/apache/arrow/go/v16/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"google.golang.org/grpc"
 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/types/known/anypb"
diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go
index 3efe4ba4049b8..a4fb83f984f1d 100644
--- a/go/arrow/flight/flightsql/client_test.go
+++ b/go/arrow/flight/flightsql/client_test.go
@@ -22,12 +22,12 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql"
-	pb "github.com/apache/arrow/go/v15/arrow/flight/gen/flight"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
+	pb "github.com/apache/arrow/go/v16/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/mock"
 	"github.com/stretchr/testify/suite"
 	"google.golang.org/grpc"
diff --git a/go/arrow/flight/flightsql/column_metadata.go b/go/arrow/flight/flightsql/column_metadata.go
index 8e0a69b99d055..875acdb1e435d 100644
--- a/go/arrow/flight/flightsql/column_metadata.go
+++ b/go/arrow/flight/flightsql/column_metadata.go
@@ -19,7 +19,7 @@ package flightsql
 import (
 	"strconv"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 const (
diff --git a/go/arrow/flight/flightsql/driver/README.md b/go/arrow/flight/flightsql/driver/README.md
index fade2cc5c4eec..7fe24e7bd3f3b 100644
--- a/go/arrow/flight/flightsql/driver/README.md
+++ b/go/arrow/flight/flightsql/driver/README.md
@@ -36,7 +36,7 @@ connection pooling, transactions combined with ease of use (see (#usage)).
 ## Prerequisites
 
 * Go 1.17+
-* Installation via `go get -u github.com/apache/arrow/go/v15/arrow/flight/flightsql`
+* Installation via `go get -u github.com/apache/arrow/go/v16/arrow/flight/flightsql`
 * Backend speaking FlightSQL
 
 ---------------------------------------
@@ -55,7 +55,7 @@ import (
     "database/sql"
     "time"
 
-    _ "github.com/apache/arrow/go/v15/arrow/flight/flightsql"
+    _ "github.com/apache/arrow/go/v16/arrow/flight/flightsql"
 )
 
 // Open the connection to an SQLite backend
@@ -141,7 +141,7 @@ import (
     "log"
     "time"
 
-    "github.com/apache/arrow/go/v15/arrow/flight/flightsql"
+    "github.com/apache/arrow/go/v16/arrow/flight/flightsql"
 )
 
 func main() {
diff --git a/go/arrow/flight/flightsql/driver/config_test.go b/go/arrow/flight/flightsql/driver/config_test.go
index 6b24b535585d9..b3c3433362d05 100644
--- a/go/arrow/flight/flightsql/driver/config_test.go
+++ b/go/arrow/flight/flightsql/driver/config_test.go
@@ -22,7 +22,7 @@ import (
 
 	"github.com/stretchr/testify/require"
 
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql/driver"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql/driver"
 )
 
 func TestConfigTLSRegistry(t *testing.T) {
diff --git a/go/arrow/flight/flightsql/driver/driver.go b/go/arrow/flight/flightsql/driver/driver.go
index f74bfa378a303..852a97fb4d3ca 100644
--- a/go/arrow/flight/flightsql/driver/driver.go
+++ b/go/arrow/flight/flightsql/driver/driver.go
@@ -25,11 +25,11 @@ import (
 	"sort"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials"
diff --git a/go/arrow/flight/flightsql/driver/driver_test.go b/go/arrow/flight/flightsql/driver/driver_test.go
index 24eb5ee6812c0..58193626f51dc 100644
--- a/go/arrow/flight/flightsql/driver/driver_test.go
+++ b/go/arrow/flight/flightsql/driver/driver_test.go
@@ -33,13 +33,13 @@ import (
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql/driver"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql/example"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql/driver"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql/example"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 const defaultTableName = "drivertest"
diff --git a/go/arrow/flight/flightsql/driver/utils.go b/go/arrow/flight/flightsql/driver/utils.go
index 022e9da4925f9..f7bd2a2e02113 100644
--- a/go/arrow/flight/flightsql/driver/utils.go
+++ b/go/arrow/flight/flightsql/driver/utils.go
@@ -21,8 +21,8 @@ import (
 	"fmt"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 // *** GRPC helpers ***
diff --git a/go/arrow/flight/flightsql/driver/utils_test.go b/go/arrow/flight/flightsql/driver/utils_test.go
index 8f5ea031adf28..6b1adfed47503 100644
--- a/go/arrow/flight/flightsql/driver/utils_test.go
+++ b/go/arrow/flight/flightsql/driver/utils_test.go
@@ -22,12 +22,12 @@ import (
 	"testing"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/require"
 )
 
diff --git a/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go b/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go
index 1a50422f826f3..209e2287890b5 100644
--- a/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go
+++ b/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go
@@ -27,9 +27,9 @@ import (
 	"os"
 	"strconv"
 
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql/example"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql/example"
 )
 
 func main() {
diff --git a/go/arrow/flight/flightsql/example/sql_batch_reader.go b/go/arrow/flight/flightsql/example/sql_batch_reader.go
index 8c87021672de5..27a71153ffca9 100644
--- a/go/arrow/flight/flightsql/example/sql_batch_reader.go
+++ b/go/arrow/flight/flightsql/example/sql_batch_reader.go
@@ -26,11 +26,11 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
 	"google.golang.org/protobuf/types/known/wrapperspb"
diff --git a/go/arrow/flight/flightsql/example/sqlite_info.go b/go/arrow/flight/flightsql/example/sqlite_info.go
index c993af97f7449..a097fa1954435 100644
--- a/go/arrow/flight/flightsql/example/sqlite_info.go
+++ b/go/arrow/flight/flightsql/example/sqlite_info.go
@@ -20,8 +20,8 @@
 package example
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
 )
 
 func SqlInfoResultMap() flightsql.SqlInfoResultMap {
diff --git a/go/arrow/flight/flightsql/example/sqlite_server.go b/go/arrow/flight/flightsql/example/sqlite_server.go
index 24cadd957e584..d87efbc9844b1 100644
--- a/go/arrow/flight/flightsql/example/sqlite_server.go
+++ b/go/arrow/flight/flightsql/example/sqlite_server.go
@@ -45,13 +45,13 @@ import (
 	"strings"
 	"sync"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql/schema_ref"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql/schema_ref"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/metadata"
diff --git a/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go b/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go
index e07a98fffdbf7..08b217a3eadc1 100644
--- a/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go
+++ b/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go
@@ -25,12 +25,12 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	sqlite3 "modernc.org/sqlite/lib"
 )
 
diff --git a/go/arrow/flight/flightsql/example/type_info.go b/go/arrow/flight/flightsql/example/type_info.go
index 0180fc1f23af5..03f906a8f9624 100644
--- a/go/arrow/flight/flightsql/example/type_info.go
+++ b/go/arrow/flight/flightsql/example/type_info.go
@@ -22,10 +22,10 @@ package example
 import (
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql/schema_ref"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql/schema_ref"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func GetTypeInfoResult(mem memory.Allocator) arrow.Record {
diff --git a/go/arrow/flight/flightsql/schema_ref/reference_schemas.go b/go/arrow/flight/flightsql/schema_ref/reference_schemas.go
index b65688306473b..eb756686187d0 100644
--- a/go/arrow/flight/flightsql/schema_ref/reference_schemas.go
+++ b/go/arrow/flight/flightsql/schema_ref/reference_schemas.go
@@ -18,7 +18,7 @@
 // by FlightSQL servers and clients.
 package schema_ref
 
-import "github.com/apache/arrow/go/v15/arrow"
+import "github.com/apache/arrow/go/v16/arrow"
 
 var (
 	Catalogs = arrow.NewSchema(
diff --git a/go/arrow/flight/flightsql/server.go b/go/arrow/flight/flightsql/server.go
index 2ec02e2829962..b825f121f3a16 100644
--- a/go/arrow/flight/flightsql/server.go
+++ b/go/arrow/flight/flightsql/server.go
@@ -20,14 +20,14 @@ import (
 	"context"
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql/schema_ref"
-	pb "github.com/apache/arrow/go/v15/arrow/flight/gen/flight"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql/schema_ref"
+	pb "github.com/apache/arrow/go/v16/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
 	"google.golang.org/protobuf/proto"
diff --git a/go/arrow/flight/flightsql/server_test.go b/go/arrow/flight/flightsql/server_test.go
index 956a1714c671c..22bbe3f8154b2 100644
--- a/go/arrow/flight/flightsql/server_test.go
+++ b/go/arrow/flight/flightsql/server_test.go
@@ -22,12 +22,12 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql"
-	pb "github.com/apache/arrow/go/v15/arrow/flight/gen/flight"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
+	pb "github.com/apache/arrow/go/v16/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/suite"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
diff --git a/go/arrow/flight/flightsql/sql_info.go b/go/arrow/flight/flightsql/sql_info.go
index 5f78e1facd581..ed6cf5f2cc2d9 100644
--- a/go/arrow/flight/flightsql/sql_info.go
+++ b/go/arrow/flight/flightsql/sql_info.go
@@ -17,8 +17,8 @@
 package flightsql
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 const (
diff --git a/go/arrow/flight/flightsql/sqlite_server_test.go b/go/arrow/flight/flightsql/sqlite_server_test.go
index e6fa798c55cf1..da8369e91b458 100644
--- a/go/arrow/flight/flightsql/sqlite_server_test.go
+++ b/go/arrow/flight/flightsql/sqlite_server_test.go
@@ -26,14 +26,14 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql/example"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql/schema_ref"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql/example"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql/schema_ref"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 	"google.golang.org/grpc/codes"
diff --git a/go/arrow/flight/flightsql/types.go b/go/arrow/flight/flightsql/types.go
index 5a26414d8c232..d89e68f028bb8 100644
--- a/go/arrow/flight/flightsql/types.go
+++ b/go/arrow/flight/flightsql/types.go
@@ -17,7 +17,7 @@
 package flightsql
 
 import (
-	pb "github.com/apache/arrow/go/v15/arrow/flight/gen/flight"
+	pb "github.com/apache/arrow/go/v16/arrow/flight/gen/flight"
 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/types/known/anypb"
 )
diff --git a/go/arrow/flight/record_batch_reader.go b/go/arrow/flight/record_batch_reader.go
index f137c75172368..f0741139fd510 100644
--- a/go/arrow/flight/record_batch_reader.go
+++ b/go/arrow/flight/record_batch_reader.go
@@ -21,12 +21,12 @@ import (
 	"fmt"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/arrio"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/arrio"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // DataStreamReader is an interface for receiving flight data messages on a stream
diff --git a/go/arrow/flight/record_batch_writer.go b/go/arrow/flight/record_batch_writer.go
index 426dca6c3e907..6fbec8d7bd9ce 100644
--- a/go/arrow/flight/record_batch_writer.go
+++ b/go/arrow/flight/record_batch_writer.go
@@ -19,9 +19,9 @@ package flight
 import (
 	"bytes"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // DataStreamWriter is an interface that represents an Arrow Flight stream
diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go
index 3e1da64dcf8c4..c70aceabcfe8e 100644
--- a/go/arrow/flight/server.go
+++ b/go/arrow/flight/server.go
@@ -22,7 +22,7 @@ import (
 	"os"
 	"os/signal"
 
-	"github.com/apache/arrow/go/v15/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/gen/flight"
 	"google.golang.org/grpc"
 )
 
diff --git a/go/arrow/flight/server_example_test.go b/go/arrow/flight/server_example_test.go
index 4e2e4f4c6b587..f6cb9ce59b99a 100644
--- a/go/arrow/flight/server_example_test.go
+++ b/go/arrow/flight/server_example_test.go
@@ -21,7 +21,7 @@ import (
 	"fmt"
 	"net"
 
-	"github.com/apache/arrow/go/v15/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials/insecure"
 	"google.golang.org/grpc/health"
diff --git a/go/arrow/internal/arrdata/arrdata.go b/go/arrow/internal/arrdata/arrdata.go
index 985388094eb51..bb04bcdd373ef 100644
--- a/go/arrow/internal/arrdata/arrdata.go
+++ b/go/arrow/internal/arrdata/arrdata.go
@@ -21,14 +21,14 @@ import (
 	"fmt"
 	"sort"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
 )
 
 var (
diff --git a/go/arrow/internal/arrdata/ioutil.go b/go/arrow/internal/arrdata/ioutil.go
index a6becc2151ef3..5e09fe01c8af5 100644
--- a/go/arrow/internal/arrdata/ioutil.go
+++ b/go/arrow/internal/arrdata/ioutil.go
@@ -23,11 +23,11 @@ import (
 	"sync"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // CheckArrowFile checks whether a given ARROW file contains the expected list of records.
diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go
index 49f711cdacd76..fdc0d4e2f1cab 100644
--- a/go/arrow/internal/arrjson/arrjson.go
+++ b/go/arrow/internal/arrjson/arrjson.go
@@ -26,16 +26,16 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 type Schema struct {
diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go
index 164210cbc230d..6b5b414d49802 100644
--- a/go/arrow/internal/arrjson/arrjson_test.go
+++ b/go/arrow/internal/arrjson/arrjson_test.go
@@ -22,9 +22,9 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/internal/arrjson/option.go b/go/arrow/internal/arrjson/option.go
index 47d40f857c54e..e310ce727aa80 100644
--- a/go/arrow/internal/arrjson/option.go
+++ b/go/arrow/internal/arrjson/option.go
@@ -17,8 +17,8 @@
 package arrjson
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 type config struct {
diff --git a/go/arrow/internal/arrjson/reader.go b/go/arrow/internal/arrjson/reader.go
index 226fa1b1919f9..dff9534b782d4 100644
--- a/go/arrow/internal/arrjson/reader.go
+++ b/go/arrow/internal/arrjson/reader.go
@@ -20,11 +20,11 @@ import (
 	"io"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/arrio"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/arrio"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 type Reader struct {
diff --git a/go/arrow/internal/arrjson/writer.go b/go/arrow/internal/arrjson/writer.go
index fdd36e97d9b42..37ddcbec28921 100644
--- a/go/arrow/internal/arrjson/writer.go
+++ b/go/arrow/internal/arrjson/writer.go
@@ -20,11 +20,11 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/arrio"
-	"github.com/apache/arrow/go/v15/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/arrio"
+	"github.com/apache/arrow/go/v16/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v16/internal/json"
 )
 
 const (
diff --git a/go/arrow/internal/cdata_integration/entrypoints.go b/go/arrow/internal/cdata_integration/entrypoints.go
index a40db8316f848..384bcb886d8c2 100644
--- a/go/arrow/internal/cdata_integration/entrypoints.go
+++ b/go/arrow/internal/cdata_integration/entrypoints.go
@@ -25,10 +25,10 @@ import (
 	"runtime"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/cdata"
-	"github.com/apache/arrow/go/v15/arrow/internal/arrjson"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/cdata"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrjson"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // #include <stdint.h>
diff --git a/go/arrow/internal/dictutils/dict.go b/go/arrow/internal/dictutils/dict.go
index 5c0bf54dafcbd..ae7fe4d5c88c6 100644
--- a/go/arrow/internal/dictutils/dict.go
+++ b/go/arrow/internal/dictutils/dict.go
@@ -21,9 +21,9 @@ import (
 	"fmt"
 	"hash/maphash"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 type Kind int8
diff --git a/go/arrow/internal/dictutils/dict_test.go b/go/arrow/internal/dictutils/dict_test.go
index 9d2f7ae4782c7..86ce6709798c5 100644
--- a/go/arrow/internal/dictutils/dict_test.go
+++ b/go/arrow/internal/dictutils/dict_test.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func TestDictMemo(t *testing.T) {
diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go
index 1afb53c5294f4..e53e035315b58 100755
--- a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go
+++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go
@@ -22,7 +22,7 @@ import (
 	"fmt"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/flight_integration"
+	"github.com/apache/arrow/go/v16/arrow/internal/flight_integration"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials/insecure"
 )
diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go
index d382ff9e88670..8def9993795d1 100644
--- a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go
+++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go
@@ -23,7 +23,7 @@ import (
 	"os"
 	"syscall"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/flight_integration"
+	"github.com/apache/arrow/go/v16/arrow/internal/flight_integration"
 )
 
 var (
diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go
index 0b12d22cc7ed7..91658a694ecab 100644
--- a/go/arrow/internal/flight_integration/scenario.go
+++ b/go/arrow/internal/flight_integration/scenario.go
@@ -31,15 +31,15 @@ import (
 	"strings"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v15/arrow/flight/flightsql/schema_ref"
-	"github.com/apache/arrow/go/v15/arrow/internal/arrjson"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql/schema_ref"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrjson"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"golang.org/x/xerrors"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
diff --git a/go/arrow/internal/testing/gen/random_array_gen.go b/go/arrow/internal/testing/gen/random_array_gen.go
index 3e5a11fe88d2d..16ff7a0b41e94 100644
--- a/go/arrow/internal/testing/gen/random_array_gen.go
+++ b/go/arrow/internal/testing/gen/random_array_gen.go
@@ -19,11 +19,11 @@ package gen
 import (
 	"math"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"golang.org/x/exp/rand"
 	"gonum.org/v1/gonum/stat/distuv"
 )
diff --git a/go/arrow/internal/testing/tools/bits_test.go b/go/arrow/internal/testing/tools/bits_test.go
index 6ad8ac5b12b68..b33f1a173a34d 100644
--- a/go/arrow/internal/testing/tools/bits_test.go
+++ b/go/arrow/internal/testing/tools/bits_test.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/tools"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/internal/testing/tools/data_types.go b/go/arrow/internal/testing/tools/data_types.go
index 161cfa431491d..490b9474232d7 100644
--- a/go/arrow/internal/testing/tools/data_types.go
+++ b/go/arrow/internal/testing/tools/data_types.go
@@ -21,8 +21,8 @@ package tools
 import (
 	"reflect"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/float16"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/internal/utils.go b/go/arrow/internal/utils.go
index 7b5df167ea432..f3831f6961bec 100644
--- a/go/arrow/internal/utils.go
+++ b/go/arrow/internal/utils.go
@@ -17,8 +17,8 @@
 package internal
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
 )
 
 const CurMetadataVersion = flatbuf.MetadataVersionV5
diff --git a/go/arrow/ipc/cmd/arrow-cat/main.go b/go/arrow/ipc/cmd/arrow-cat/main.go
index 0251b08c09bff..8fe55054c44a9 100644
--- a/go/arrow/ipc/cmd/arrow-cat/main.go
+++ b/go/arrow/ipc/cmd/arrow-cat/main.go
@@ -63,8 +63,8 @@ import (
 	"log"
 	"os"
 
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func main() {
diff --git a/go/arrow/ipc/cmd/arrow-cat/main_test.go b/go/arrow/ipc/cmd/arrow-cat/main_test.go
index b6528b85b416d..d2a28271ce98e 100644
--- a/go/arrow/ipc/cmd/arrow-cat/main_test.go
+++ b/go/arrow/ipc/cmd/arrow-cat/main_test.go
@@ -23,10 +23,10 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func TestCatStream(t *testing.T) {
diff --git a/go/arrow/ipc/cmd/arrow-file-to-stream/main.go b/go/arrow/ipc/cmd/arrow-file-to-stream/main.go
index 6187b94465211..d47d4019775f2 100644
--- a/go/arrow/ipc/cmd/arrow-file-to-stream/main.go
+++ b/go/arrow/ipc/cmd/arrow-file-to-stream/main.go
@@ -24,9 +24,9 @@ import (
 	"log"
 	"os"
 
-	"github.com/apache/arrow/go/v15/arrow/arrio"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/arrio"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func main() {
diff --git a/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go b/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go
index 2f66a8b48c59d..edd49e5acbf98 100644
--- a/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go
+++ b/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go
@@ -21,8 +21,8 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func TestFileToStream(t *testing.T) {
diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
index 7db2fa1d1c5db..0c55c38cf991f 100644
--- a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
+++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
@@ -22,12 +22,12 @@ import (
 	"log"
 	"os"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/arrio"
-	"github.com/apache/arrow/go/v15/arrow/internal/arrjson"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/arrio"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrjson"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/internal/types"
 )
 
 func main() {
diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go
index eb702f0aa2bc5..1d1fd76d97738 100644
--- a/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go
+++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go
@@ -20,8 +20,8 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func TestIntegration(t *testing.T) {
diff --git a/go/arrow/ipc/cmd/arrow-ls/main.go b/go/arrow/ipc/cmd/arrow-ls/main.go
index 4230ae2449985..a4634c65df19f 100644
--- a/go/arrow/ipc/cmd/arrow-ls/main.go
+++ b/go/arrow/ipc/cmd/arrow-ls/main.go
@@ -61,8 +61,8 @@ import (
 	"log"
 	"os"
 
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func main() {
diff --git a/go/arrow/ipc/cmd/arrow-ls/main_test.go b/go/arrow/ipc/cmd/arrow-ls/main_test.go
index bda1c2dc4d3dc..36c293ffc0e0a 100644
--- a/go/arrow/ipc/cmd/arrow-ls/main_test.go
+++ b/go/arrow/ipc/cmd/arrow-ls/main_test.go
@@ -23,10 +23,10 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func TestLsStream(t *testing.T) {
diff --git a/go/arrow/ipc/cmd/arrow-stream-to-file/main.go b/go/arrow/ipc/cmd/arrow-stream-to-file/main.go
index 7ed3f6a281d9f..3fbb534145c2b 100644
--- a/go/arrow/ipc/cmd/arrow-stream-to-file/main.go
+++ b/go/arrow/ipc/cmd/arrow-stream-to-file/main.go
@@ -24,9 +24,9 @@ import (
 	"log"
 	"os"
 
-	"github.com/apache/arrow/go/v15/arrow/arrio"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/arrio"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func main() {
diff --git a/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go b/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go
index 73196551054ac..d5545763f1169 100644
--- a/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go
+++ b/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go
@@ -21,8 +21,8 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func TestStreamToFile(t *testing.T) {
diff --git a/go/arrow/ipc/compression.go b/go/arrow/ipc/compression.go
index 06a9cf67cfb6b..7d9c19093ad7f 100644
--- a/go/arrow/ipc/compression.go
+++ b/go/arrow/ipc/compression.go
@@ -19,9 +19,9 @@ package ipc
 import (
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/klauspost/compress/zstd"
 	"github.com/pierrec/lz4/v4"
 )
diff --git a/go/arrow/ipc/endian_swap.go b/go/arrow/ipc/endian_swap.go
index 35ba0e4e764f9..244a5718c9da7 100644
--- a/go/arrow/ipc/endian_swap.go
+++ b/go/arrow/ipc/endian_swap.go
@@ -21,9 +21,9 @@ import (
 	"fmt"
 	"math/bits"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // swap the endianness of the array's buffers as needed in-place to save
diff --git a/go/arrow/ipc/endian_swap_test.go b/go/arrow/ipc/endian_swap_test.go
index 1c724103f4441..8667e1cf5d28d 100644
--- a/go/arrow/ipc/endian_swap_test.go
+++ b/go/arrow/ipc/endian_swap_test.go
@@ -20,11 +20,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index 7bc7f6ebfaa09..d0118279afa4d 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -23,14 +23,14 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/internal"
-	"github.com/apache/arrow/go/v15/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/internal"
+	"github.com/apache/arrow/go/v16/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // FileReader is an Arrow file reader.
diff --git a/go/arrow/ipc/file_test.go b/go/arrow/ipc/file_test.go
index 5f4dac1f899bb..87c3f4ddd4a65 100644
--- a/go/arrow/ipc/file_test.go
+++ b/go/arrow/ipc/file_test.go
@@ -21,9 +21,9 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func TestFile(t *testing.T) {
diff --git a/go/arrow/ipc/file_writer.go b/go/arrow/ipc/file_writer.go
index 55e4d7c2dc22c..5812745e7d759 100644
--- a/go/arrow/ipc/file_writer.go
+++ b/go/arrow/ipc/file_writer.go
@@ -21,11 +21,11 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // PayloadWriter is an interface for injecting a different payloadwriter
diff --git a/go/arrow/ipc/ipc.go b/go/arrow/ipc/ipc.go
index 8cf56e8331fcb..16ca907350934 100644
--- a/go/arrow/ipc/ipc.go
+++ b/go/arrow/ipc/ipc.go
@@ -19,10 +19,10 @@ package ipc
 import (
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/arrio"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/arrio"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 const (
diff --git a/go/arrow/ipc/ipc_test.go b/go/arrow/ipc/ipc_test.go
index d02d8734b56d4..247cad04ee409 100644
--- a/go/arrow/ipc/ipc_test.go
+++ b/go/arrow/ipc/ipc_test.go
@@ -29,10 +29,10 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func TestArrow12072(t *testing.T) {
diff --git a/go/arrow/ipc/message.go b/go/arrow/ipc/message.go
index 5295c5df30137..b1616c891fd50 100644
--- a/go/arrow/ipc/message.go
+++ b/go/arrow/ipc/message.go
@@ -22,9 +22,9 @@ import (
 	"io"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // MetadataVersion represents the Arrow metadata version.
diff --git a/go/arrow/ipc/message_test.go b/go/arrow/ipc/message_test.go
index 912d112229f0c..2ad3334c1e0d6 100644
--- a/go/arrow/ipc/message_test.go
+++ b/go/arrow/ipc/message_test.go
@@ -22,9 +22,9 @@ import (
 	"io"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func TestMessageReaderBodyInAllocator(t *testing.T) {
diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go
index 7a7f9b3e212b7..62167bcecfb69 100644
--- a/go/arrow/ipc/metadata.go
+++ b/go/arrow/ipc/metadata.go
@@ -23,11 +23,11 @@ import (
 	"io"
 	"sort"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	flatbuffers "github.com/google/flatbuffers/go"
 )
 
diff --git a/go/arrow/ipc/metadata_test.go b/go/arrow/ipc/metadata_test.go
index a35068813ba56..339059db5efdc 100644
--- a/go/arrow/ipc/metadata_test.go
+++ b/go/arrow/ipc/metadata_test.go
@@ -21,12 +21,12 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
 	flatbuffers "github.com/google/flatbuffers/go"
 	"github.com/stretchr/testify/assert"
 )
diff --git a/go/arrow/ipc/reader.go b/go/arrow/ipc/reader.go
index 826062fafdb46..9b0810c50e67f 100644
--- a/go/arrow/ipc/reader.go
+++ b/go/arrow/ipc/reader.go
@@ -23,13 +23,13 @@ import (
 	"io"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 // Reader reads records from an io.Reader.
diff --git a/go/arrow/ipc/reader_test.go b/go/arrow/ipc/reader_test.go
index 42bb3fea3e963..35ada57018701 100644
--- a/go/arrow/ipc/reader_test.go
+++ b/go/arrow/ipc/reader_test.go
@@ -22,9 +22,9 @@ import (
 	"io"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/ipc/stream_test.go b/go/arrow/ipc/stream_test.go
index 201bfb82a5677..fc0cdce62da95 100644
--- a/go/arrow/ipc/stream_test.go
+++ b/go/arrow/ipc/stream_test.go
@@ -22,9 +22,9 @@ import (
 	"strconv"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func TestStream(t *testing.T) {
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index 31ce53a0f1af7..588273166aa99 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -26,15 +26,15 @@ import (
 	"sync"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
 )
 
 type swriter struct {
diff --git a/go/arrow/ipc/writer_test.go b/go/arrow/ipc/writer_test.go
index ea7592554c888..a1bf92efbcc8a 100644
--- a/go/arrow/ipc/writer_test.go
+++ b/go/arrow/ipc/writer_test.go
@@ -24,11 +24,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/math/float64.go b/go/arrow/math/float64.go
index 8d72ecf1b2668..f99cba737f8dc 100644
--- a/go/arrow/math/float64.go
+++ b/go/arrow/math/float64.go
@@ -19,7 +19,7 @@
 package math
 
 import (
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 type Float64Funcs struct {
diff --git a/go/arrow/math/float64_avx2_amd64.go b/go/arrow/math/float64_avx2_amd64.go
index e78d4affe9f6f..0d98573720627 100644
--- a/go/arrow/math/float64_avx2_amd64.go
+++ b/go/arrow/math/float64_avx2_amd64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/float64_neon_arm64.go b/go/arrow/math/float64_neon_arm64.go
index a5013960d9ec7..91b018ff655ec 100755
--- a/go/arrow/math/float64_neon_arm64.go
+++ b/go/arrow/math/float64_neon_arm64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/float64_sse4_amd64.go b/go/arrow/math/float64_sse4_amd64.go
index cdd88f91c2c2b..8ec3a46f676b1 100644
--- a/go/arrow/math/float64_sse4_amd64.go
+++ b/go/arrow/math/float64_sse4_amd64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/float64_test.go b/go/arrow/math/float64_test.go
index 3a7c247c5df8c..f734f35dcb13c 100644
--- a/go/arrow/math/float64_test.go
+++ b/go/arrow/math/float64_test.go
@@ -21,9 +21,9 @@ package math_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/math"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/math"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/math/int64.go b/go/arrow/math/int64.go
index b8236dfbb6552..9abc1f591d68c 100644
--- a/go/arrow/math/int64.go
+++ b/go/arrow/math/int64.go
@@ -19,7 +19,7 @@
 package math
 
 import (
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 type Int64Funcs struct {
diff --git a/go/arrow/math/int64_avx2_amd64.go b/go/arrow/math/int64_avx2_amd64.go
index 5c16e790c1e55..84a6d60d5f2c6 100644
--- a/go/arrow/math/int64_avx2_amd64.go
+++ b/go/arrow/math/int64_avx2_amd64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/int64_neon_arm64.go b/go/arrow/math/int64_neon_arm64.go
index 00ec48b6e4735..03ad1b43d80a7 100755
--- a/go/arrow/math/int64_neon_arm64.go
+++ b/go/arrow/math/int64_neon_arm64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/int64_sse4_amd64.go b/go/arrow/math/int64_sse4_amd64.go
index a894bad2ef6a3..c3d76991b074b 100644
--- a/go/arrow/math/int64_sse4_amd64.go
+++ b/go/arrow/math/int64_sse4_amd64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/int64_test.go b/go/arrow/math/int64_test.go
index 3781fd3570785..dd5b78d107737 100644
--- a/go/arrow/math/int64_test.go
+++ b/go/arrow/math/int64_test.go
@@ -21,9 +21,9 @@ package math_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/math"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/math"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/math/type.go.tmpl b/go/arrow/math/type.go.tmpl
index f1a4dfa1362f8..7ec4dda4feadf 100644
--- a/go/arrow/math/type.go.tmpl
+++ b/go/arrow/math/type.go.tmpl
@@ -17,7 +17,7 @@
 package math
 
 import (
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 {{$def := .D}}
diff --git a/go/arrow/math/type_simd_amd64.go.tmpl b/go/arrow/math/type_simd_amd64.go.tmpl
index 77dee758b05cd..336496f39d51f 100644
--- a/go/arrow/math/type_simd_amd64.go.tmpl
+++ b/go/arrow/math/type_simd_amd64.go.tmpl
@@ -21,7 +21,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 {{$name := printf "%s_%s" .In.Type .D.arch}}
diff --git a/go/arrow/math/type_simd_arm64.go.tmpl b/go/arrow/math/type_simd_arm64.go.tmpl
index 77dee758b05cd..336496f39d51f 100755
--- a/go/arrow/math/type_simd_arm64.go.tmpl
+++ b/go/arrow/math/type_simd_arm64.go.tmpl
@@ -21,7 +21,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 {{$name := printf "%s_%s" .In.Type .D.arch}}
diff --git a/go/arrow/math/type_test.go.tmpl b/go/arrow/math/type_test.go.tmpl
index 969377e454e52..9c412327b7a85 100644
--- a/go/arrow/math/type_test.go.tmpl
+++ b/go/arrow/math/type_test.go.tmpl
@@ -19,9 +19,9 @@ package math_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/math"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/math"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/math/uint64.go b/go/arrow/math/uint64.go
index b9a70360224ea..6da7427879d49 100644
--- a/go/arrow/math/uint64.go
+++ b/go/arrow/math/uint64.go
@@ -19,7 +19,7 @@
 package math
 
 import (
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 type Uint64Funcs struct {
diff --git a/go/arrow/math/uint64_avx2_amd64.go b/go/arrow/math/uint64_avx2_amd64.go
index c1f48efefd8eb..01b29f5e9b27b 100644
--- a/go/arrow/math/uint64_avx2_amd64.go
+++ b/go/arrow/math/uint64_avx2_amd64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/uint64_neon_arm64.go b/go/arrow/math/uint64_neon_arm64.go
index 37ca4ecad04e7..df7bcc7340361 100755
--- a/go/arrow/math/uint64_neon_arm64.go
+++ b/go/arrow/math/uint64_neon_arm64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/uint64_sse4_amd64.go b/go/arrow/math/uint64_sse4_amd64.go
index 7163c1de171de..031a36635fb46 100644
--- a/go/arrow/math/uint64_sse4_amd64.go
+++ b/go/arrow/math/uint64_sse4_amd64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/uint64_test.go b/go/arrow/math/uint64_test.go
index e8ba42b59aa10..d332e8e766c74 100644
--- a/go/arrow/math/uint64_test.go
+++ b/go/arrow/math/uint64_test.go
@@ -21,9 +21,9 @@ package math_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/math"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/math"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/memory/buffer.go b/go/arrow/memory/buffer.go
index ffdb41e3dbe9e..f45d0d55da602 100644
--- a/go/arrow/memory/buffer.go
+++ b/go/arrow/memory/buffer.go
@@ -19,7 +19,7 @@ package memory
 import (
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 // Buffer is a wrapper type for a buffer of bytes.
diff --git a/go/arrow/memory/buffer_test.go b/go/arrow/memory/buffer_test.go
index 92bb071d0abde..b97fe4b3ae4ab 100644
--- a/go/arrow/memory/buffer_test.go
+++ b/go/arrow/memory/buffer_test.go
@@ -19,7 +19,7 @@ package memory_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/memory/cgo_allocator.go b/go/arrow/memory/cgo_allocator.go
index af25d1899a6fe..9b8bb467a2c4f 100644
--- a/go/arrow/memory/cgo_allocator.go
+++ b/go/arrow/memory/cgo_allocator.go
@@ -22,7 +22,7 @@ package memory
 import (
 	"runtime"
 
-	cga "github.com/apache/arrow/go/v15/arrow/memory/internal/cgoalloc"
+	cga "github.com/apache/arrow/go/v16/arrow/memory/internal/cgoalloc"
 )
 
 // CgoArrowAllocator is an allocator which exposes the C++ memory pool class
diff --git a/go/arrow/memory/default_mallocator.go b/go/arrow/memory/default_mallocator.go
index 4a9ef942fd08d..414f2c2d87e62 100644
--- a/go/arrow/memory/default_mallocator.go
+++ b/go/arrow/memory/default_mallocator.go
@@ -19,7 +19,7 @@
 package memory
 
 import (
-	"github.com/apache/arrow/go/v15/arrow/memory/mallocator"
+	"github.com/apache/arrow/go/v16/arrow/memory/mallocator"
 )
 
 // DefaultAllocator is a default implementation of Allocator and can be used anywhere
diff --git a/go/arrow/memory/default_mallocator_test.go b/go/arrow/memory/default_mallocator_test.go
index 5a38e8b4e843c..904230cba503c 100644
--- a/go/arrow/memory/default_mallocator_test.go
+++ b/go/arrow/memory/default_mallocator_test.go
@@ -21,8 +21,8 @@ package memory_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/memory/mallocator"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/memory/mallocator"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/memory/mallocator/mallocator_test.go b/go/arrow/memory/mallocator/mallocator_test.go
index 91b74383ed494..fa98208f66ecc 100644
--- a/go/arrow/memory/mallocator/mallocator_test.go
+++ b/go/arrow/memory/mallocator/mallocator_test.go
@@ -23,7 +23,7 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/memory/mallocator"
+	"github.com/apache/arrow/go/v16/arrow/memory/mallocator"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/memory/memory_test.go b/go/arrow/memory/memory_test.go
index adaa8359369be..bedabe364f842 100644
--- a/go/arrow/memory/memory_test.go
+++ b/go/arrow/memory/memory_test.go
@@ -19,7 +19,7 @@ package memory_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/record.go b/go/arrow/record.go
index 885af38034938..28974ea1dc0ba 100644
--- a/go/arrow/record.go
+++ b/go/arrow/record.go
@@ -16,7 +16,7 @@
 
 package arrow
 
-import "github.com/apache/arrow/go/v15/internal/json"
+import "github.com/apache/arrow/go/v16/internal/json"
 
 // Record is a collection of equal-length arrays matching a particular Schema.
 // Also known as a RecordBatch in the spec and in some implementations.
diff --git a/go/arrow/scalar/append.go b/go/arrow/scalar/append.go
index fe8dd32e9eae5..f87e16dcf4603 100644
--- a/go/arrow/scalar/append.go
+++ b/go/arrow/scalar/append.go
@@ -21,11 +21,11 @@ package scalar
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/float16"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/scalar/append_test.go b/go/arrow/scalar/append_test.go
index e509bca1fb7b0..3b24dff579e0d 100644
--- a/go/arrow/scalar/append_test.go
+++ b/go/arrow/scalar/append_test.go
@@ -23,11 +23,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/internal/testing/tools"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/scalar/binary.go b/go/arrow/scalar/binary.go
index b6abe9cba7b1c..763583916982e 100644
--- a/go/arrow/scalar/binary.go
+++ b/go/arrow/scalar/binary.go
@@ -21,8 +21,8 @@ import (
 	"fmt"
 	"unicode/utf8"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 type BinaryScalar interface {
diff --git a/go/arrow/scalar/compare.go b/go/arrow/scalar/compare.go
index b4a3fe08d26fe..c5ccb73b77c52 100644
--- a/go/arrow/scalar/compare.go
+++ b/go/arrow/scalar/compare.go
@@ -16,7 +16,7 @@
 
 package scalar
 
-import "github.com/apache/arrow/go/v15/arrow"
+import "github.com/apache/arrow/go/v16/arrow"
 
 // Equals returns true if two scalars are equal, which means they have the same
 // datatype, validity and value.
diff --git a/go/arrow/scalar/nested.go b/go/arrow/scalar/nested.go
index cf89dc9fbdc17..f22290ecfb373 100644
--- a/go/arrow/scalar/nested.go
+++ b/go/arrow/scalar/nested.go
@@ -21,10 +21,10 @@ import (
 	"errors"
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/arrow/scalar/numeric.gen.go b/go/arrow/scalar/numeric.gen.go
index da088162f4017..d5875df1d622a 100644
--- a/go/arrow/scalar/numeric.gen.go
+++ b/go/arrow/scalar/numeric.gen.go
@@ -24,9 +24,9 @@ import (
 	"reflect"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
 )
 
 type Int8 struct {
diff --git a/go/arrow/scalar/numeric.gen_test.go b/go/arrow/scalar/numeric.gen_test.go
index 807857c1c98c2..c13f36129e54f 100644
--- a/go/arrow/scalar/numeric.gen_test.go
+++ b/go/arrow/scalar/numeric.gen_test.go
@@ -21,8 +21,8 @@ package scalar_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/scalar/numeric.gen_test.go.tmpl b/go/arrow/scalar/numeric.gen_test.go.tmpl
index 7f45898a20d29..98fc3d11a18a4 100644
--- a/go/arrow/scalar/numeric.gen_test.go.tmpl
+++ b/go/arrow/scalar/numeric.gen_test.go.tmpl
@@ -19,8 +19,8 @@ package scalar_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/scalar/parse.go b/go/arrow/scalar/parse.go
index 5002f98a65c42..50269211bb9df 100644
--- a/go/arrow/scalar/parse.go
+++ b/go/arrow/scalar/parse.go
@@ -25,12 +25,12 @@ import (
 	"strings"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 type TypeToScalar interface {
diff --git a/go/arrow/scalar/scalar.go b/go/arrow/scalar/scalar.go
index 9744c07fb05a3..1f4d9d007c3ed 100644
--- a/go/arrow/scalar/scalar.go
+++ b/go/arrow/scalar/scalar.go
@@ -26,16 +26,16 @@ import (
 	"strconv"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/encoded"
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/encoded"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/arrow/scalar/scalar_test.go b/go/arrow/scalar/scalar_test.go
index e85f160624d18..e20e3089d6efc 100644
--- a/go/arrow/scalar/scalar_test.go
+++ b/go/arrow/scalar/scalar_test.go
@@ -25,12 +25,12 @@ import (
 	"testing"
 	"time"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/scalar"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/scalar/temporal.go b/go/arrow/scalar/temporal.go
index ee13c84429e8e..ad42188c26997 100644
--- a/go/arrow/scalar/temporal.go
+++ b/go/arrow/scalar/temporal.go
@@ -22,7 +22,7 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 func temporalToString(s TemporalScalar) string {
diff --git a/go/arrow/schema.go b/go/arrow/schema.go
index 7a05bb1888972..d4943cc4ae632 100644
--- a/go/arrow/schema.go
+++ b/go/arrow/schema.go
@@ -21,7 +21,7 @@ import (
 	"sort"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/endian"
 )
 
 type Metadata struct {
diff --git a/go/arrow/schema_test.go b/go/arrow/schema_test.go
index fd94620aee650..a01ba839fa525 100644
--- a/go/arrow/schema_test.go
+++ b/go/arrow/schema_test.go
@@ -21,7 +21,7 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/endian"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/table.go b/go/arrow/table.go
index 82dc283706b65..f0728108d94b9 100644
--- a/go/arrow/table.go
+++ b/go/arrow/table.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 // Table represents a logical sequence of chunked arrays of equal length. It is
diff --git a/go/arrow/tensor/numeric.gen.go b/go/arrow/tensor/numeric.gen.go
index d207f0bfa2c9c..eac0fe5cc7fcb 100644
--- a/go/arrow/tensor/numeric.gen.go
+++ b/go/arrow/tensor/numeric.gen.go
@@ -19,7 +19,7 @@
 package tensor
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 // Int8 is an n-dim array of int8s.
diff --git a/go/arrow/tensor/numeric.gen.go.tmpl b/go/arrow/tensor/numeric.gen.go.tmpl
index e03f986da3f8f..0813035ac06e1 100644
--- a/go/arrow/tensor/numeric.gen.go.tmpl
+++ b/go/arrow/tensor/numeric.gen.go.tmpl
@@ -17,8 +17,8 @@
 package tensor
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
 )
 
 {{range .In}}
diff --git a/go/arrow/tensor/numeric.gen_test.go b/go/arrow/tensor/numeric.gen_test.go
index 3a7c3570c216b..9750d6681ca9a 100644
--- a/go/arrow/tensor/numeric.gen_test.go
+++ b/go/arrow/tensor/numeric.gen_test.go
@@ -23,10 +23,10 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/tensor"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/tensor"
 )
 
 func TestTensorInt8(t *testing.T) {
diff --git a/go/arrow/tensor/numeric.gen_test.go.tmpl b/go/arrow/tensor/numeric.gen_test.go.tmpl
index 34f01b40d8084..7110f87e9c454 100644
--- a/go/arrow/tensor/numeric.gen_test.go.tmpl
+++ b/go/arrow/tensor/numeric.gen_test.go.tmpl
@@ -21,10 +21,10 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/tensor"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/tensor"
 )
 
 {{range .In}}
diff --git a/go/arrow/tensor/tensor.go b/go/arrow/tensor/tensor.go
index 1f2ed7e82141b..f96720f0d0f8f 100644
--- a/go/arrow/tensor/tensor.go
+++ b/go/arrow/tensor/tensor.go
@@ -21,8 +21,8 @@ import (
 	"fmt"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 // Interface represents an n-dimensional array of numerical data.
diff --git a/go/arrow/tensor/tensor_test.go b/go/arrow/tensor/tensor_test.go
index 552c4c1c223e0..e6a2547b7beef 100644
--- a/go/arrow/tensor/tensor_test.go
+++ b/go/arrow/tensor/tensor_test.go
@@ -21,10 +21,10 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/tensor"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/tensor"
 )
 
 func TestTensor(t *testing.T) {
diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go
index 67fa8a266b35f..54e1bbdc31162 100644
--- a/go/arrow/type_traits.go
+++ b/go/arrow/type_traits.go
@@ -20,9 +20,9 @@ import (
 	"reflect"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/float16"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/type_traits_boolean.go b/go/arrow/type_traits_boolean.go
index c164d45954fc0..6b22c264c6676 100644
--- a/go/arrow/type_traits_boolean.go
+++ b/go/arrow/type_traits_boolean.go
@@ -17,7 +17,7 @@
 package arrow
 
 import (
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
 )
 
 type booleanTraits struct{}
diff --git a/go/arrow/type_traits_decimal128.go b/go/arrow/type_traits_decimal128.go
index d600ba29c1186..8d9bcc8c3f66e 100644
--- a/go/arrow/type_traits_decimal128.go
+++ b/go/arrow/type_traits_decimal128.go
@@ -19,8 +19,8 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/endian"
 )
 
 // Decimal128 traits
diff --git a/go/arrow/type_traits_decimal256.go b/go/arrow/type_traits_decimal256.go
index fded46a0a52d0..73fd9b81ffe69 100644
--- a/go/arrow/type_traits_decimal256.go
+++ b/go/arrow/type_traits_decimal256.go
@@ -19,8 +19,8 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/endian"
 )
 
 // Decimal256 traits
diff --git a/go/arrow/type_traits_float16.go b/go/arrow/type_traits_float16.go
index 5369ad352f839..2340ac567b62c 100644
--- a/go/arrow/type_traits_float16.go
+++ b/go/arrow/type_traits_float16.go
@@ -19,8 +19,8 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/float16"
 )
 
 // Float16 traits
diff --git a/go/arrow/type_traits_interval.go b/go/arrow/type_traits_interval.go
index ca530a72323ff..79fd029310bc9 100644
--- a/go/arrow/type_traits_interval.go
+++ b/go/arrow/type_traits_interval.go
@@ -19,8 +19,8 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/internal/debug"
 )
 
 var (
diff --git a/go/arrow/type_traits_numeric.gen.go b/go/arrow/type_traits_numeric.gen.go
index 06412466032f9..34d1d7843ce14 100644
--- a/go/arrow/type_traits_numeric.gen.go
+++ b/go/arrow/type_traits_numeric.gen.go
@@ -22,7 +22,7 @@ import (
 	"math"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/endian"
 )
 
 var (
diff --git a/go/arrow/type_traits_numeric.gen.go.tmpl b/go/arrow/type_traits_numeric.gen.go.tmpl
index e98f59528c6aa..af471738b6cb6 100644
--- a/go/arrow/type_traits_numeric.gen.go.tmpl
+++ b/go/arrow/type_traits_numeric.gen.go.tmpl
@@ -20,7 +20,7 @@ import (
 	"math"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/endian"
 )
 
 var (
diff --git a/go/arrow/type_traits_numeric.gen_test.go b/go/arrow/type_traits_numeric.gen_test.go
index ac2d0726ed6fa..a830a7a0fdb34 100644
--- a/go/arrow/type_traits_numeric.gen_test.go
+++ b/go/arrow/type_traits_numeric.gen_test.go
@@ -22,7 +22,7 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 func TestInt64Traits(t *testing.T) {
diff --git a/go/arrow/type_traits_numeric.gen_test.go.tmpl b/go/arrow/type_traits_numeric.gen_test.go.tmpl
index 4948f42cf542e..b033d57e5d890 100644
--- a/go/arrow/type_traits_numeric.gen_test.go.tmpl
+++ b/go/arrow/type_traits_numeric.gen_test.go.tmpl
@@ -20,7 +20,7 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 {{- range .In}}
diff --git a/go/arrow/type_traits_test.go b/go/arrow/type_traits_test.go
index 0ae88b4bd82b6..869da426fad96 100644
--- a/go/arrow/type_traits_test.go
+++ b/go/arrow/type_traits_test.go
@@ -22,10 +22,10 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/float16"
 )
 
 func TestBooleanTraits(t *testing.T) {
diff --git a/go/arrow/type_traits_timestamp.go b/go/arrow/type_traits_timestamp.go
index 8e9970a719f54..3117f983edd0b 100644
--- a/go/arrow/type_traits_timestamp.go
+++ b/go/arrow/type_traits_timestamp.go
@@ -19,7 +19,7 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/endian"
 )
 
 var TimestampTraits timestampTraits
diff --git a/go/arrow/type_traits_view.go b/go/arrow/type_traits_view.go
index be3f15fed69ae..8a56702900e9f 100644
--- a/go/arrow/type_traits_view.go
+++ b/go/arrow/type_traits_view.go
@@ -19,7 +19,7 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/endian"
 )
 
 var ViewHeaderTraits viewHeaderTraits
diff --git a/go/arrow/util/byte_size.go b/go/arrow/util/byte_size.go
index 6d6fc021f8005..840af58ec0614 100644
--- a/go/arrow/util/byte_size.go
+++ b/go/arrow/util/byte_size.go
@@ -17,9 +17,9 @@
 package util
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 )
 
 func isArrayDataNil(arrayData arrow.ArrayData) bool {
diff --git a/go/arrow/util/byte_size_test.go b/go/arrow/util/byte_size_test.go
index a218c69558fe9..a0302e72a8a34 100644
--- a/go/arrow/util/byte_size_test.go
+++ b/go/arrow/util/byte_size_test.go
@@ -20,10 +20,10 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/arrow/util"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow/util"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/go.mod b/go/go.mod
index 73a1cb7e7738b..c07abaf3c9888 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -14,7 +14,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-module github.com/apache/arrow/go/v15
+module github.com/apache/arrow/go/v16
 
 go 1.20
 
diff --git a/go/internal/bitutils/bit_block_counter.go b/go/internal/bitutils/bit_block_counter.go
index 50996b10e8851..a53a1d9c1cb47 100644
--- a/go/internal/bitutils/bit_block_counter.go
+++ b/go/internal/bitutils/bit_block_counter.go
@@ -21,8 +21,8 @@ import (
 	"math/bits"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/internal/utils"
 )
 
 func loadWord(byt []byte) uint64 {
diff --git a/go/internal/bitutils/bit_block_counter_test.go b/go/internal/bitutils/bit_block_counter_test.go
index 790105c290182..441bb3c4b6e68 100644
--- a/go/internal/bitutils/bit_block_counter_test.go
+++ b/go/internal/bitutils/bit_block_counter_test.go
@@ -19,9 +19,9 @@ package bitutils_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
 	"github.com/stretchr/testify/assert"
 	"golang.org/x/exp/rand"
 )
diff --git a/go/internal/bitutils/bit_run_reader.go b/go/internal/bitutils/bit_run_reader.go
index f09149d7ec5df..5d0e92e76c734 100644
--- a/go/internal/bitutils/bit_run_reader.go
+++ b/go/internal/bitutils/bit_run_reader.go
@@ -22,9 +22,9 @@ import (
 	"math/bits"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/internal/utils"
 )
 
 // BitRun represents a run of bits with the same value of length Len
diff --git a/go/internal/bitutils/bit_run_reader_test.go b/go/internal/bitutils/bit_run_reader_test.go
index 7db76768a9476..da90860c56986 100644
--- a/go/internal/bitutils/bit_run_reader_test.go
+++ b/go/internal/bitutils/bit_run_reader_test.go
@@ -21,9 +21,9 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/internal/bitutils/bit_set_run_reader.go b/go/internal/bitutils/bit_set_run_reader.go
index 374b8d4aab39a..47d09881d5a13 100644
--- a/go/internal/bitutils/bit_set_run_reader.go
+++ b/go/internal/bitutils/bit_set_run_reader.go
@@ -20,8 +20,8 @@ import (
 	"encoding/binary"
 	"math/bits"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/internal/utils"
 )
 
 // IsMultipleOf64 returns whether v is a multiple of 64.
diff --git a/go/internal/bitutils/bit_set_run_reader_test.go b/go/internal/bitutils/bit_set_run_reader_test.go
index 832993671ef6d..136b9fad92c59 100644
--- a/go/internal/bitutils/bit_set_run_reader_test.go
+++ b/go/internal/bitutils/bit_set_run_reader_test.go
@@ -20,9 +20,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	"github.com/apache/arrow/go/v15/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	"github.com/apache/arrow/go/v16/internal/utils"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/internal/bitutils/bitmap_generate.go b/go/internal/bitutils/bitmap_generate.go
index 08b5fceab57d3..d88aeee901194 100644
--- a/go/internal/bitutils/bitmap_generate.go
+++ b/go/internal/bitutils/bitmap_generate.go
@@ -16,7 +16,7 @@
 
 package bitutils
 
-import "github.com/apache/arrow/go/v15/arrow/bitutil"
+import "github.com/apache/arrow/go/v16/arrow/bitutil"
 
 // GenerateBits writes sequential bits to a bitmap. Bits preceding the
 // initial start offset are preserved, bits following the bitmap may
diff --git a/go/internal/bitutils/bitmap_generate_test.go b/go/internal/bitutils/bitmap_generate_test.go
index c9a6203864a20..e5e68f477037a 100644
--- a/go/internal/bitutils/bitmap_generate_test.go
+++ b/go/internal/bitutils/bitmap_generate_test.go
@@ -19,7 +19,7 @@ package bitutils_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/internal/bitutils"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
 	"golang.org/x/exp/rand"
 )
 
diff --git a/go/internal/hashing/xxh3_memo_table.gen.go b/go/internal/hashing/xxh3_memo_table.gen.go
index 39b82cdeff9a2..ca27fdf42ebb4 100644
--- a/go/internal/hashing/xxh3_memo_table.gen.go
+++ b/go/internal/hashing/xxh3_memo_table.gen.go
@@ -21,9 +21,9 @@ package hashing
 import (
 	"math"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/internal/utils"
 )
 
 type payloadInt8 struct {
diff --git a/go/internal/hashing/xxh3_memo_table.gen.go.tmpl b/go/internal/hashing/xxh3_memo_table.gen.go.tmpl
index 527008ad63c3c..b1b52f3bcfcf9 100644
--- a/go/internal/hashing/xxh3_memo_table.gen.go.tmpl
+++ b/go/internal/hashing/xxh3_memo_table.gen.go.tmpl
@@ -17,8 +17,8 @@
 package hashing
 
 import (
-  "github.com/apache/arrow/go/v15/arrow/bitutil"  
-  "github.com/apache/arrow/go/v15/internal/utils"  
+  "github.com/apache/arrow/go/v16/arrow/bitutil"  
+  "github.com/apache/arrow/go/v16/internal/utils"  
 )
 
 {{range .In}}
diff --git a/go/internal/types/extension_types.go b/go/internal/types/extension_types.go
index e24c89efc7b8b..f2b3c4a02ec59 100644
--- a/go/internal/types/extension_types.go
+++ b/go/internal/types/extension_types.go
@@ -24,9 +24,9 @@ import (
 	"reflect"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/internal/json"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/internal/json"
 	"github.com/google/uuid"
 	"golang.org/x/xerrors"
 )
diff --git a/go/internal/types/extension_types_test.go b/go/internal/types/extension_types_test.go
index f93f1000c9e4a..e6132b611d455 100644
--- a/go/internal/types/extension_types_test.go
+++ b/go/internal/types/extension_types_test.go
@@ -20,11 +20,11 @@ import (
 	"bytes"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/json"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/json"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"github.com/google/uuid"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
diff --git a/go/internal/utils/transpose_ints_def.go b/go/internal/utils/transpose_ints_def.go
index c52598d7148ea..2680429acf8cf 100644
--- a/go/internal/utils/transpose_ints_def.go
+++ b/go/internal/utils/transpose_ints_def.go
@@ -19,7 +19,7 @@ package utils
 import (
 	"errors"
 
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 //go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata -d arch=avx2 transpose_ints_simd.go.tmpl=transpose_ints_avx2_amd64.go
diff --git a/go/internal/utils/transpose_ints_test.go b/go/internal/utils/transpose_ints_test.go
index 73b2bbce3fc14..16e3832868e50 100644
--- a/go/internal/utils/transpose_ints_test.go
+++ b/go/internal/utils/transpose_ints_test.go
@@ -22,7 +22,7 @@ import (
 	"math/rand"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/internal/utils"
+	"github.com/apache/arrow/go/v16/internal/utils"
 )
 
 var (
diff --git a/go/parquet/cmd/parquet_reader/dumper.go b/go/parquet/cmd/parquet_reader/dumper.go
index 4cb2ea4a96fee..11248502119c4 100644
--- a/go/parquet/cmd/parquet_reader/dumper.go
+++ b/go/parquet/cmd/parquet_reader/dumper.go
@@ -22,9 +22,9 @@ import (
 	"reflect"
 	"time"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 )
 
 const defaultBatchSize = 128
diff --git a/go/parquet/cmd/parquet_reader/main.go b/go/parquet/cmd/parquet_reader/main.go
index 0d651d8c294c5..c315aeeb32e79 100644
--- a/go/parquet/cmd/parquet_reader/main.go
+++ b/go/parquet/cmd/parquet_reader/main.go
@@ -25,11 +25,11 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/internal/json"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/internal/json"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 
 	"github.com/docopt/docopt-go"
 )
diff --git a/go/parquet/cmd/parquet_schema/main.go b/go/parquet/cmd/parquet_schema/main.go
index 01d541e8ac925..8c59358248cee 100644
--- a/go/parquet/cmd/parquet_schema/main.go
+++ b/go/parquet/cmd/parquet_schema/main.go
@@ -20,8 +20,8 @@ import (
 	"fmt"
 	"os"
 
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/docopt/docopt-go"
 )
 
diff --git a/go/parquet/compress/brotli.go b/go/parquet/compress/brotli.go
index a1199d9711435..8a7e92a1403c3 100644
--- a/go/parquet/compress/brotli.go
+++ b/go/parquet/compress/brotli.go
@@ -21,7 +21,7 @@ import (
 	"io"
 
 	"github.com/andybalholm/brotli"
-	"github.com/apache/arrow/go/v15/parquet/internal/debug"
+	"github.com/apache/arrow/go/v16/parquet/internal/debug"
 )
 
 type brotliCodec struct{}
diff --git a/go/parquet/compress/compress.go b/go/parquet/compress/compress.go
index f61147eb1ea10..dc45b6ee9311f 100644
--- a/go/parquet/compress/compress.go
+++ b/go/parquet/compress/compress.go
@@ -23,7 +23,7 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
 )
 
 // Compression is an alias to the thrift compression codec enum type for easy use
diff --git a/go/parquet/compress/compress_test.go b/go/parquet/compress/compress_test.go
index d1c55b15bc3cc..e2ff8871c3b59 100644
--- a/go/parquet/compress/compress_test.go
+++ b/go/parquet/compress/compress_test.go
@@ -22,7 +22,7 @@ import (
 	"math/rand"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/parquet/compress"
+	"github.com/apache/arrow/go/v16/parquet/compress"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/compress/zstd.go b/go/parquet/compress/zstd.go
index fd8ec81953359..02ffd2eae568a 100644
--- a/go/parquet/compress/zstd.go
+++ b/go/parquet/compress/zstd.go
@@ -20,7 +20,7 @@ import (
 	"io"
 	"sync"
 
-	"github.com/apache/arrow/go/v15/parquet/internal/debug"
+	"github.com/apache/arrow/go/v16/parquet/internal/debug"
 	"github.com/klauspost/compress/zstd"
 )
 
diff --git a/go/parquet/doc.go b/go/parquet/doc.go
index afeee00587ef5..dfe20b6ffd86a 100644
--- a/go/parquet/doc.go
+++ b/go/parquet/doc.go
@@ -29,9 +29,9 @@
 // Install
 //
 // You can download the library and cli utilities via:
-//   go get -u github.com/apache/arrow/go/v15/parquet
-//   go install github.com/apache/arrow/go/v15/parquet/cmd/parquet_reader@latest
-//   go install github.com/apache/arrow/go/v15/parquet/cmd/parquet_schema@latest
+//   go get -u github.com/apache/arrow/go/v16/parquet
+//   go install github.com/apache/arrow/go/v16/parquet/cmd/parquet_reader@latest
+//   go install github.com/apache/arrow/go/v16/parquet/cmd/parquet_schema@latest
 //
 // Modules
 //
diff --git a/go/parquet/encryption_properties.go b/go/parquet/encryption_properties.go
index 0eadc5fb0451c..dcb00ab7d09ca 100644
--- a/go/parquet/encryption_properties.go
+++ b/go/parquet/encryption_properties.go
@@ -20,7 +20,7 @@ import (
 	"crypto/rand"
 	"unicode/utf8"
 
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
 )
 
 // Constants that will be used as the default values with encryption/decryption
diff --git a/go/parquet/encryption_properties_test.go b/go/parquet/encryption_properties_test.go
index ab028927c5ecb..e73435897435f 100644
--- a/go/parquet/encryption_properties_test.go
+++ b/go/parquet/encryption_properties_test.go
@@ -19,8 +19,8 @@ package parquet_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/encryption_read_config_test.go b/go/parquet/encryption_read_config_test.go
index 53b7ba3c621c0..8e981fbb12b55 100644
--- a/go/parquet/encryption_read_config_test.go
+++ b/go/parquet/encryption_read_config_test.go
@@ -23,10 +23,10 @@ import (
 	"path"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/parquet/encryption_write_config_test.go b/go/parquet/encryption_write_config_test.go
index 558b89208c706..7075fd6366cc7 100644
--- a/go/parquet/encryption_write_config_test.go
+++ b/go/parquet/encryption_write_config_test.go
@@ -23,10 +23,10 @@ import (
 	"path/filepath"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/compress"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/compress"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/parquet/file/column_reader.go b/go/parquet/file/column_reader.go
index 342fb3b198abe..40d93a35c3407 100644
--- a/go/parquet/file/column_reader.go
+++ b/go/parquet/file/column_reader.go
@@ -21,13 +21,13 @@ import (
 	"fmt"
 	"sync"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/column_reader_test.go b/go/parquet/file/column_reader_test.go
index a6725bc02fee0..f19039797fabe 100755
--- a/go/parquet/file/column_reader_test.go
+++ b/go/parquet/file/column_reader_test.go
@@ -24,12 +24,12 @@ import (
 	"sync"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
diff --git a/go/parquet/file/column_reader_types.gen.go b/go/parquet/file/column_reader_types.gen.go
index 3fb113780f811..d8ddef4bd1081 100644
--- a/go/parquet/file/column_reader_types.gen.go
+++ b/go/parquet/file/column_reader_types.gen.go
@@ -21,9 +21,9 @@ package file
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
 )
 
 // Int32ColumnChunkReader is the Typed Column chunk reader instance for reading
diff --git a/go/parquet/file/column_reader_types.gen.go.tmpl b/go/parquet/file/column_reader_types.gen.go.tmpl
index 261b5f0bfacab..153c4f7527ae9 100644
--- a/go/parquet/file/column_reader_types.gen.go.tmpl
+++ b/go/parquet/file/column_reader_types.gen.go.tmpl
@@ -17,8 +17,8 @@
 package file
 
 import (
-    "github.com/apache/arrow/go/v15/parquet"
-    "github.com/apache/arrow/go/v15/parquet/internal/encoding"
+    "github.com/apache/arrow/go/v16/parquet"
+    "github.com/apache/arrow/go/v16/parquet/internal/encoding"
 )
 
 {{range .In}}
diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go
index 0b0d1145d604e..ac857d17e632d 100755
--- a/go/parquet/file/column_writer.go
+++ b/go/parquet/file/column_writer.go
@@ -21,14 +21,14 @@ import (
 	"encoding/binary"
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 )
 
 //go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=../internal/encoding/physical_types.tmpldata column_writer_types.gen.go.tmpl
diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go
index 05c1cadebf6cf..8011ac2487995 100755
--- a/go/parquet/file/column_writer_test.go
+++ b/go/parquet/file/column_writer_test.go
@@ -24,19 +24,19 @@ import (
 	"sync"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	arrutils "github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/compress"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	arrutils "github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/compress"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
 	"github.com/stretchr/testify/suite"
diff --git a/go/parquet/file/column_writer_types.gen.go b/go/parquet/file/column_writer_types.gen.go
index b4d7954639319..2766d3aa4aca2 100644
--- a/go/parquet/file/column_writer_types.gen.go
+++ b/go/parquet/file/column_writer_types.gen.go
@@ -22,13 +22,13 @@ import (
 	"errors"
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/column_writer_types.gen.go.tmpl b/go/parquet/file/column_writer_types.gen.go.tmpl
index 70bcfe679eb92..fcd3750012af5 100644
--- a/go/parquet/file/column_writer_types.gen.go.tmpl
+++ b/go/parquet/file/column_writer_types.gen.go.tmpl
@@ -19,10 +19,10 @@ package file
 import (
     "fmt"
 
-    "github.com/apache/arrow/go/v15/parquet"
-    "github.com/apache/arrow/go/v15/parquet/metadata"
-    "github.com/apache/arrow/go/v15/parquet/internal/encoding"
-    format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
+    "github.com/apache/arrow/go/v16/parquet"
+    "github.com/apache/arrow/go/v16/parquet/metadata"
+    "github.com/apache/arrow/go/v16/parquet/internal/encoding"
+    format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
 )
 
 {{range .In}}
diff --git a/go/parquet/file/file_reader.go b/go/parquet/file/file_reader.go
index 29162e4a4ec27..b0c8cbf9c7244 100644
--- a/go/parquet/file/file_reader.go
+++ b/go/parquet/file/file_reader.go
@@ -25,10 +25,10 @@ import (
 	"runtime"
 	"sync"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/file_reader_mmap.go b/go/parquet/file/file_reader_mmap.go
index 03e12adf08c8c..43fb22f07bf99 100644
--- a/go/parquet/file/file_reader_mmap.go
+++ b/go/parquet/file/file_reader_mmap.go
@@ -22,7 +22,7 @@ package file
 import (
 	"io"
 
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/parquet"
 	"golang.org/x/exp/mmap"
 	"golang.org/x/xerrors"
 )
diff --git a/go/parquet/file/file_reader_mmap_windows.go b/go/parquet/file/file_reader_mmap_windows.go
index 06a9e97160fe0..bf6436accee02 100644
--- a/go/parquet/file/file_reader_mmap_windows.go
+++ b/go/parquet/file/file_reader_mmap_windows.go
@@ -22,7 +22,7 @@ package file
 import (
 	"errors"
 
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/parquet"
 )
 
 func mmapOpen(filename string) (parquet.ReaderAtSeeker, error) {
diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go
index f3248925cf5b9..c68f30b7ba7f4 100644
--- a/go/parquet/file/file_reader_test.go
+++ b/go/parquet/file/file_reader_test.go
@@ -25,16 +25,16 @@ import (
 	"path"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/compress"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/thrift"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/compress"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/thrift"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	libthrift "github.com/apache/thrift/lib/go/thrift"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
diff --git a/go/parquet/file/file_writer.go b/go/parquet/file/file_writer.go
index 1d7f7840dac50..a2cf397cbc80b 100644
--- a/go/parquet/file/file_writer.go
+++ b/go/parquet/file/file_writer.go
@@ -21,11 +21,11 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 )
 
 // Writer is the primary interface for writing a parquet file
diff --git a/go/parquet/file/file_writer_test.go b/go/parquet/file/file_writer_test.go
index f32e403a8d534..434c9852c5823 100644
--- a/go/parquet/file/file_writer_test.go
+++ b/go/parquet/file/file_writer_test.go
@@ -22,13 +22,13 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/compress"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v15/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/compress"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/parquet/file/level_conversion.go b/go/parquet/file/level_conversion.go
index 251468658ae30..9eb33fae361b3 100755
--- a/go/parquet/file/level_conversion.go
+++ b/go/parquet/file/level_conversion.go
@@ -22,11 +22,11 @@ import (
 	"math/bits"
 	"unsafe"
 
-	shared_utils "github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/bmi"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	shared_utils "github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/bmi"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/level_conversion_test.go b/go/parquet/file/level_conversion_test.go
index 54e52c5e7abb6..4b72fac859866 100644
--- a/go/parquet/file/level_conversion_test.go
+++ b/go/parquet/file/level_conversion_test.go
@@ -20,9 +20,9 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/parquet/internal/bmi"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/parquet/internal/bmi"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/file/page_reader.go b/go/parquet/file/page_reader.go
index 01f253aff8864..0acd841762c20 100644
--- a/go/parquet/file/page_reader.go
+++ b/go/parquet/file/page_reader.go
@@ -23,13 +23,13 @@ import (
 	"sync"
 
 	"github.com/JohnCGriffin/overflow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/compress"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/thrift"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/compress"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/thrift"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/page_writer.go b/go/parquet/file/page_writer.go
index c16476fbb232c..97e352d77043c 100644
--- a/go/parquet/file/page_writer.go
+++ b/go/parquet/file/page_writer.go
@@ -20,15 +20,15 @@ import (
 	"bytes"
 	"sync"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/compress"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/thrift"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/compress"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/thrift"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
 	libthrift "github.com/apache/thrift/lib/go/thrift"
 	"golang.org/x/xerrors"
 )
diff --git a/go/parquet/file/record_reader.go b/go/parquet/file/record_reader.go
index 5698f49d9f2bb..cbc66dc7746c9 100755
--- a/go/parquet/file/record_reader.go
+++ b/go/parquet/file/record_reader.go
@@ -23,14 +23,14 @@ import (
 	"unsafe"
 
 	"github.com/JohnCGriffin/overflow"
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/row_group_reader.go b/go/parquet/file/row_group_reader.go
index 3c1c1edb0b484..43f1a69ece553 100644
--- a/go/parquet/file/row_group_reader.go
+++ b/go/parquet/file/row_group_reader.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"sync"
 
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/row_group_writer.go b/go/parquet/file/row_group_writer.go
index 74f4becb55e08..7ab23e2291020 100644
--- a/go/parquet/file/row_group_writer.go
+++ b/go/parquet/file/row_group_writer.go
@@ -17,10 +17,10 @@
 package file
 
 import (
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/row_group_writer_test.go b/go/parquet/file/row_group_writer_test.go
index 2ea317cb91001..d8ebba1b83fc0 100644
--- a/go/parquet/file/row_group_writer_test.go
+++ b/go/parquet/file/row_group_writer_test.go
@@ -20,10 +20,10 @@ import (
 	"bytes"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/apache/thrift/lib/go/thrift"
 	"github.com/stretchr/testify/assert"
 )
diff --git a/go/parquet/internal/bmi/bmi_test.go b/go/parquet/internal/bmi/bmi_test.go
index a5278dfef2211..e229b40050ca2 100644
--- a/go/parquet/internal/bmi/bmi_test.go
+++ b/go/parquet/internal/bmi/bmi_test.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/parquet/internal/bmi"
+	"github.com/apache/arrow/go/v16/parquet/internal/bmi"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/internal/encoding/boolean_decoder.go b/go/parquet/internal/encoding/boolean_decoder.go
index 353f443855952..8f386ec752beb 100644
--- a/go/parquet/internal/encoding/boolean_decoder.go
+++ b/go/parquet/internal/encoding/boolean_decoder.go
@@ -23,10 +23,10 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	shared_utils "github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	shared_utils "github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
 )
 
 // PlainBooleanDecoder is for the Plain Encoding type, there is no
diff --git a/go/parquet/internal/encoding/boolean_encoder.go b/go/parquet/internal/encoding/boolean_encoder.go
index 3e01bde369d8b..33a414562ce57 100644
--- a/go/parquet/internal/encoding/boolean_encoder.go
+++ b/go/parquet/internal/encoding/boolean_encoder.go
@@ -19,10 +19,10 @@ package encoding
 import (
 	"encoding/binary"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/debug"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/debug"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
 )
 
 const (
diff --git a/go/parquet/internal/encoding/byte_array_decoder.go b/go/parquet/internal/encoding/byte_array_decoder.go
index 0c1c858fb48bb..ef58fd83329a4 100644
--- a/go/parquet/internal/encoding/byte_array_decoder.go
+++ b/go/parquet/internal/encoding/byte_array_decoder.go
@@ -19,12 +19,12 @@ package encoding
 import (
 	"encoding/binary"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	pqutils "github.com/apache/arrow/go/v15/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	pqutils "github.com/apache/arrow/go/v16/parquet/internal/utils"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encoding/byte_array_encoder.go b/go/parquet/internal/encoding/byte_array_encoder.go
index 9270b5531768c..456c6ccb5270a 100644
--- a/go/parquet/internal/encoding/byte_array_encoder.go
+++ b/go/parquet/internal/encoding/byte_array_encoder.go
@@ -21,11 +21,11 @@ import (
 	"fmt"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
 )
 
 // PlainByteArrayEncoder encodes byte arrays according to the spec for Plain encoding
diff --git a/go/parquet/internal/encoding/decoder.go b/go/parquet/internal/encoding/decoder.go
index acb57fbce7806..2ba5152a99a8f 100644
--- a/go/parquet/internal/encoding/decoder.go
+++ b/go/parquet/internal/encoding/decoder.go
@@ -20,16 +20,16 @@ import (
 	"bytes"
 	"reflect"
 
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	shared_utils "github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/debug"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	shared_utils "github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/debug"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go
index 6ac799f1c179d..cd4f6d5f81716 100644
--- a/go/parquet/internal/encoding/delta_bit_packing.go
+++ b/go/parquet/internal/encoding/delta_bit_packing.go
@@ -23,11 +23,11 @@ import (
 	"math/bits"
 	"reflect"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	shared_utils "github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	shared_utils "github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
 )
 
 // see the deltaBitPack encoder for a description of the encoding format that is
diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go
index 18bd12015a430..db31fa7c2bb4d 100644
--- a/go/parquet/internal/encoding/delta_byte_array.go
+++ b/go/parquet/internal/encoding/delta_byte_array.go
@@ -17,9 +17,9 @@
 package encoding
 
 import (
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encoding/delta_byte_array_test.go b/go/parquet/internal/encoding/delta_byte_array_test.go
index 0a206796f742a..eda7c8c35cdc2 100644
--- a/go/parquet/internal/encoding/delta_byte_array_test.go
+++ b/go/parquet/internal/encoding/delta_byte_array_test.go
@@ -18,8 +18,8 @@ package encoding
 
 import (
 	"fmt"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
 	"github.com/stretchr/testify/assert"
 	"testing"
 )
diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go
index 183eb453ca0a3..1906f4cb217b1 100644
--- a/go/parquet/internal/encoding/delta_length_byte_array.go
+++ b/go/parquet/internal/encoding/delta_length_byte_array.go
@@ -17,9 +17,9 @@
 package encoding
 
 import (
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encoding/encoder.go b/go/parquet/internal/encoding/encoder.go
index 7023309397a3b..8539b5f84b69c 100644
--- a/go/parquet/internal/encoding/encoder.go
+++ b/go/parquet/internal/encoding/encoder.go
@@ -21,14 +21,14 @@ import (
 	"math/bits"
 	"reflect"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	"github.com/apache/arrow/go/v15/parquet"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	"github.com/apache/arrow/go/v16/parquet"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 )
 
 //go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata plain_encoder_types.gen.go.tmpl typed_encoder.gen.go.tmpl
diff --git a/go/parquet/internal/encoding/encoding_benchmarks_test.go b/go/parquet/internal/encoding/encoding_benchmarks_test.go
index e0645e9de54e4..db95ede315f27 100644
--- a/go/parquet/internal/encoding/encoding_benchmarks_test.go
+++ b/go/parquet/internal/encoding/encoding_benchmarks_test.go
@@ -21,14 +21,14 @@ import (
 	"math"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/hashing"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v15/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/hashing"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 )
 
 const (
diff --git a/go/parquet/internal/encoding/encoding_test.go b/go/parquet/internal/encoding/encoding_test.go
index 48e2316b0f897..d6fb38f7083f6 100644
--- a/go/parquet/internal/encoding/encoding_test.go
+++ b/go/parquet/internal/encoding/encoding_test.go
@@ -26,13 +26,13 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v15/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
index 2054e1bb85f21..63b8b469c4b0b 100644
--- a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
+++ b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
@@ -19,8 +19,8 @@ package encoding
 import (
 	"math"
 
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go
index 39202c8e25d9f..5e45d3f36b662 100644
--- a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go
+++ b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go
@@ -19,9 +19,9 @@ package encoding
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	"github.com/apache/arrow/go/v16/parquet"
 )
 
 // PlainFixedLenByteArrayEncoder writes the raw bytes of the byte array
diff --git a/go/parquet/internal/encoding/levels.go b/go/parquet/internal/encoding/levels.go
index 2a6dc24933714..976df4e810e41 100644
--- a/go/parquet/internal/encoding/levels.go
+++ b/go/parquet/internal/encoding/levels.go
@@ -24,11 +24,11 @@ import (
 	"math/bits"
 
 	"github.com/JohnCGriffin/overflow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	shared_utils "github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	shared_utils "github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
 )
 
 // LevelEncoder is for handling the encoding of Definition and Repetition levels
diff --git a/go/parquet/internal/encoding/levels_test.go b/go/parquet/internal/encoding/levels_test.go
index 304ce32b3106d..6b31f4afe22b8 100644
--- a/go/parquet/internal/encoding/levels_test.go
+++ b/go/parquet/internal/encoding/levels_test.go
@@ -21,11 +21,11 @@ import (
 	"strconv"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/internal/encoding/memo_table.go b/go/parquet/internal/encoding/memo_table.go
index a36ad32973c96..df775b4cad709 100644
--- a/go/parquet/internal/encoding/memo_table.go
+++ b/go/parquet/internal/encoding/memo_table.go
@@ -20,11 +20,11 @@ import (
 	"math"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/hashing"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/hashing"
+	"github.com/apache/arrow/go/v16/parquet"
 )
 
 //go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata memo_table_types.gen.go.tmpl
diff --git a/go/parquet/internal/encoding/memo_table_test.go b/go/parquet/internal/encoding/memo_table_test.go
index 1b9337010f855..5c0ad40c3920a 100644
--- a/go/parquet/internal/encoding/memo_table_test.go
+++ b/go/parquet/internal/encoding/memo_table_test.go
@@ -20,11 +20,11 @@ import (
 	"math"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/hashing"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/hashing"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/parquet/internal/encoding/memo_table_types.gen.go b/go/parquet/internal/encoding/memo_table_types.gen.go
index 4da2721437814..dd165efaaa157 100644
--- a/go/parquet/internal/encoding/memo_table_types.gen.go
+++ b/go/parquet/internal/encoding/memo_table_types.gen.go
@@ -19,8 +19,8 @@
 package encoding
 
 import (
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
 )
 
 // standard map based implementation of memo tables which can be more efficient
diff --git a/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl b/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl
index 75335f25ff1f7..88f8d40d47ba7 100644
--- a/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl
+++ b/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl
@@ -17,7 +17,7 @@
 package encoding
 
 import (
-  "github.com/apache/arrow/go/v15/parquet"
+  "github.com/apache/arrow/go/v16/parquet"
 )
 
 // standard map based implementation of memo tables which can be more efficient
diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go b/go/parquet/internal/encoding/plain_encoder_types.gen.go
index a41f754f62a88..e8da35d52719f 100644
--- a/go/parquet/internal/encoding/plain_encoder_types.gen.go
+++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go
@@ -24,11 +24,11 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
index 74f63e78bccf3..b60d71b2555fd 100644
--- a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
+++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
@@ -20,10 +20,10 @@ import (
   "encoding/binary"
   "fmt"
 
-  "github.com/apache/arrow/go/v15/arrow"
-  "github.com/apache/arrow/go/v15/parquet"
-  "github.com/apache/arrow/go/v15/internal/utils"
-  "github.com/apache/arrow/go/v15/internal/bitutils"
+  "github.com/apache/arrow/go/v16/arrow"
+  "github.com/apache/arrow/go/v16/parquet"
+  "github.com/apache/arrow/go/v16/internal/utils"
+  "github.com/apache/arrow/go/v16/internal/bitutils"
 )
 
 var (
diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go
index 04db72178f3ee..77452da00b29b 100644
--- a/go/parquet/internal/encoding/typed_encoder.gen.go
+++ b/go/parquet/internal/encoding/typed_encoder.gen.go
@@ -22,15 +22,15 @@ import (
 	"fmt"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	shared_utils "github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	shared_utils "github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
index ceb755caa0b46..78ade64e750aa 100644
--- a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
+++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
@@ -17,13 +17,13 @@
 package encoding
 
 import (
-  "github.com/apache/arrow/go/v15/parquet"
-  "github.com/apache/arrow/go/v15/parquet/schema"
-  format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-  "github.com/apache/arrow/go/v15/arrow"
-  "github.com/apache/arrow/go/v15/parquet/internal/utils"
-  shared_utils "github.com/apache/arrow/go/v15/internal/utils"
-  "github.com/apache/arrow/go/v15/internal/bitutils"
+  "github.com/apache/arrow/go/v16/parquet"
+  "github.com/apache/arrow/go/v16/parquet/schema"
+  format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+  "github.com/apache/arrow/go/v16/arrow"
+  "github.com/apache/arrow/go/v16/parquet/internal/utils"
+  shared_utils "github.com/apache/arrow/go/v16/internal/utils"
+  "github.com/apache/arrow/go/v16/internal/bitutils"
 )
 
 // fully typed encoder interfaces to enable writing against encoder/decoders
diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go
index f8d860c88a059..18d101a65966f 100644
--- a/go/parquet/internal/encoding/types.go
+++ b/go/parquet/internal/encoding/types.go
@@ -20,11 +20,11 @@ import (
 	"io"
 	"sync"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encryption/aes.go b/go/parquet/internal/encryption/aes.go
index b6e9130ef81cd..526669e35016d 100644
--- a/go/parquet/internal/encryption/aes.go
+++ b/go/parquet/internal/encryption/aes.go
@@ -29,7 +29,7 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/parquet"
 )
 
 // important constants for handling the aes encryption
diff --git a/go/parquet/internal/encryption/decryptor.go b/go/parquet/internal/encryption/decryptor.go
index 658559e6082c4..2ea4130beb4e8 100644
--- a/go/parquet/internal/encryption/decryptor.go
+++ b/go/parquet/internal/encryption/decryptor.go
@@ -19,8 +19,8 @@ package encryption
 import (
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
 )
 
 // FileDecryptor is an interface used by the filereader for decrypting an
diff --git a/go/parquet/internal/encryption/encryptor.go b/go/parquet/internal/encryption/encryptor.go
index bdbae4740a44f..d9616c16a9c64 100644
--- a/go/parquet/internal/encryption/encryptor.go
+++ b/go/parquet/internal/encryption/encryptor.go
@@ -19,8 +19,8 @@ package encryption
 import (
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
 )
 
 // FileEncryptor is the interface for constructing encryptors for the different
diff --git a/go/parquet/internal/testutils/pagebuilder.go b/go/parquet/internal/testutils/pagebuilder.go
index 525921d9631f9..0b1fea64ad828 100644
--- a/go/parquet/internal/testutils/pagebuilder.go
+++ b/go/parquet/internal/testutils/pagebuilder.go
@@ -22,13 +22,13 @@ import (
 	"io"
 	"reflect"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/compress"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/compress"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/mock"
 )
 
diff --git a/go/parquet/internal/testutils/primitive_typed.go b/go/parquet/internal/testutils/primitive_typed.go
index 50627b2e275ff..ef50ace7fdf79 100644
--- a/go/parquet/internal/testutils/primitive_typed.go
+++ b/go/parquet/internal/testutils/primitive_typed.go
@@ -20,11 +20,11 @@ import (
 	"fmt"
 	"reflect"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 )
 
 type PrimitiveTypedTest struct {
diff --git a/go/parquet/internal/testutils/random.go b/go/parquet/internal/testutils/random.go
index 4d697693510d8..3139e7f7c94f0 100644
--- a/go/parquet/internal/testutils/random.go
+++ b/go/parquet/internal/testutils/random.go
@@ -24,14 +24,14 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/endian"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/pqarrow"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/endian"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/pqarrow"
 
 	"golang.org/x/exp/rand"
 	"gonum.org/v1/gonum/stat/distuv"
diff --git a/go/parquet/internal/testutils/random_arrow.go b/go/parquet/internal/testutils/random_arrow.go
index 7dd2a3e8b77e3..91fa473adcb03 100644
--- a/go/parquet/internal/testutils/random_arrow.go
+++ b/go/parquet/internal/testutils/random_arrow.go
@@ -17,10 +17,10 @@
 package testutils
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/memory"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/memory"
 	"golang.org/x/exp/rand"
 )
 
diff --git a/go/parquet/internal/testutils/utils.go b/go/parquet/internal/testutils/utils.go
index 3da76c17ddc32..5a8f6cb3eeb0c 100644
--- a/go/parquet/internal/testutils/utils.go
+++ b/go/parquet/internal/testutils/utils.go
@@ -19,7 +19,7 @@ package testutils
 import (
 	"reflect"
 
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/parquet"
 )
 
 var typeToParquetTypeMap = map[reflect.Type]parquet.Type{
diff --git a/go/parquet/internal/thrift/helpers.go b/go/parquet/internal/thrift/helpers.go
index 3835830ac6c2d..81c7e38fe7df7 100644
--- a/go/parquet/internal/thrift/helpers.go
+++ b/go/parquet/internal/thrift/helpers.go
@@ -23,7 +23,7 @@ import (
 	"context"
 	"io"
 
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
 	"github.com/apache/thrift/lib/go/thrift"
 )
 
diff --git a/go/parquet/internal/utils/bit_benchmark_test.go b/go/parquet/internal/utils/bit_benchmark_test.go
index 14353380a5694..3ef548ed2d631 100644
--- a/go/parquet/internal/utils/bit_benchmark_test.go
+++ b/go/parquet/internal/utils/bit_benchmark_test.go
@@ -20,9 +20,9 @@ import (
 	"strconv"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	"github.com/apache/arrow/go/v15/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	"github.com/apache/arrow/go/v16/parquet/internal/testutils"
 )
 
 type linearBitRunReader struct {
diff --git a/go/parquet/internal/utils/bit_packing_arm64.go b/go/parquet/internal/utils/bit_packing_arm64.go
index e9fb2ef1d810b..0025db75c8755 100644
--- a/go/parquet/internal/utils/bit_packing_arm64.go
+++ b/go/parquet/internal/utils/bit_packing_arm64.go
@@ -23,7 +23,7 @@ import (
 	"github.com/klauspost/cpuid/v2"
 	// import for side effect of initializing feature flags
 	// based on ARM_ENABLE_EXT env var
-	_ "github.com/apache/arrow/go/v15/parquet/internal/bmi"
+	_ "github.com/apache/arrow/go/v16/parquet/internal/bmi"
 )
 
 func init() {
diff --git a/go/parquet/internal/utils/bit_reader.go b/go/parquet/internal/utils/bit_reader.go
index d327be5f5253e..5ddbe3bea1ca6 100644
--- a/go/parquet/internal/utils/bit_reader.go
+++ b/go/parquet/internal/utils/bit_reader.go
@@ -24,10 +24,10 @@ import (
 	"reflect"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
 )
 
 // masks for grabbing the trailing bits based on the number of trailing bits desired
diff --git a/go/parquet/internal/utils/bit_reader_test.go b/go/parquet/internal/utils/bit_reader_test.go
index 5ce1b799b463b..3e5d4ed724bc5 100644
--- a/go/parquet/internal/utils/bit_reader_test.go
+++ b/go/parquet/internal/utils/bit_reader_test.go
@@ -25,11 +25,11 @@ import (
 	"strconv"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 	"golang.org/x/exp/rand"
diff --git a/go/parquet/internal/utils/bit_writer.go b/go/parquet/internal/utils/bit_writer.go
index 6cb255f5b0473..106461d33e048 100644
--- a/go/parquet/internal/utils/bit_writer.go
+++ b/go/parquet/internal/utils/bit_writer.go
@@ -21,7 +21,7 @@ import (
 	"io"
 	"log"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
 )
 
 // WriterAtBuffer is a convenience struct for providing a WriteAt function
diff --git a/go/parquet/internal/utils/bitmap_writer.go b/go/parquet/internal/utils/bitmap_writer.go
index 3ef99291e3748..514275f8728b1 100644
--- a/go/parquet/internal/utils/bitmap_writer.go
+++ b/go/parquet/internal/utils/bitmap_writer.go
@@ -20,7 +20,7 @@ import (
 	"encoding/binary"
 	"math/bits"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
 )
 
 // BitmapWriter is an interface for bitmap writers so that we can use multiple
diff --git a/go/parquet/internal/utils/bitmap_writer_test.go b/go/parquet/internal/utils/bitmap_writer_test.go
index 3dddc7567903e..812402ec8d0d0 100644
--- a/go/parquet/internal/utils/bitmap_writer_test.go
+++ b/go/parquet/internal/utils/bitmap_writer_test.go
@@ -22,8 +22,8 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/parquet/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/parquet/internal/utils"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/parquet/internal/utils/rle.go b/go/parquet/internal/utils/rle.go
index dffe55402b95a..5c668567b23f4 100644
--- a/go/parquet/internal/utils/rle.go
+++ b/go/parquet/internal/utils/rle.go
@@ -24,10 +24,10 @@ import (
 	"encoding/binary"
 	"math"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go b/go/parquet/internal/utils/typed_rle_dict.gen.go
index 37dc49a695806..ef67ee1fe2f36 100644
--- a/go/parquet/internal/utils/typed_rle_dict.gen.go
+++ b/go/parquet/internal/utils/typed_rle_dict.gen.go
@@ -19,9 +19,9 @@
 package utils
 
 import (
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl
index 88c7dd979ebf1..f4fa5dedc0636 100644
--- a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl
+++ b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl
@@ -17,9 +17,9 @@
 package utils
 
 import (
-  "github.com/apache/arrow/go/v15/parquet"
-  "github.com/apache/arrow/go/v15/internal/bitutils"
-  "github.com/apache/arrow/go/v15/internal/utils"
+  "github.com/apache/arrow/go/v16/parquet"
+  "github.com/apache/arrow/go/v16/internal/bitutils"
+  "github.com/apache/arrow/go/v16/internal/utils"
 )
 
 {{range .In}}
diff --git a/go/parquet/metadata/app_version.go b/go/parquet/metadata/app_version.go
index 9966827026106..6c4f06a6c0e6d 100644
--- a/go/parquet/metadata/app_version.go
+++ b/go/parquet/metadata/app_version.go
@@ -21,8 +21,8 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 )
 
 var (
diff --git a/go/parquet/metadata/column_chunk.go b/go/parquet/metadata/column_chunk.go
index 729f741e1b4f9..8cda380d39e3d 100644
--- a/go/parquet/metadata/column_chunk.go
+++ b/go/parquet/metadata/column_chunk.go
@@ -22,13 +22,13 @@ import (
 	"io"
 	"reflect"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/compress"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/thrift"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/compress"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/thrift"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/metadata/file.go b/go/parquet/metadata/file.go
index fbbbae1eef892..f40081f172a75 100644
--- a/go/parquet/metadata/file.go
+++ b/go/parquet/metadata/file.go
@@ -24,12 +24,12 @@ import (
 	"reflect"
 	"unicode/utf8"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/compress"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/thrift"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/compress"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/thrift"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/metadata/metadata_test.go b/go/parquet/metadata/metadata_test.go
index 8caa319f83e63..872d92325f655 100644
--- a/go/parquet/metadata/metadata_test.go
+++ b/go/parquet/metadata/metadata_test.go
@@ -21,9 +21,9 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/parquet/metadata/row_group.go b/go/parquet/metadata/row_group.go
index 2923720371abe..fec5d95540acc 100644
--- a/go/parquet/metadata/row_group.go
+++ b/go/parquet/metadata/row_group.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"reflect"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 )
 
 // RowGroupMetaData is a proxy around the thrift RowGroup meta data object
diff --git a/go/parquet/metadata/stat_compare_test.go b/go/parquet/metadata/stat_compare_test.go
index 041696d84d17a..ad403d15f0082 100644
--- a/go/parquet/metadata/stat_compare_test.go
+++ b/go/parquet/metadata/stat_compare_test.go
@@ -20,8 +20,8 @@ import (
 	"encoding/binary"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/parquet/metadata/statistics.go b/go/parquet/metadata/statistics.go
index a9bda405bb9b5..ba52e247d458b 100644
--- a/go/parquet/metadata/statistics.go
+++ b/go/parquet/metadata/statistics.go
@@ -22,15 +22,15 @@ import (
 	"math"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/debug"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/debug"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 )
 
 //go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=statistics_types.tmpldata statistics_types.gen.go.tmpl
diff --git a/go/parquet/metadata/statistics_test.go b/go/parquet/metadata/statistics_test.go
index 19311dc8955d3..02c19c1039968 100644
--- a/go/parquet/metadata/statistics_test.go
+++ b/go/parquet/metadata/statistics_test.go
@@ -21,12 +21,12 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/metadata/statistics_types.gen.go b/go/parquet/metadata/statistics_types.gen.go
index a0e2949251368..3854898354c85 100644
--- a/go/parquet/metadata/statistics_types.gen.go
+++ b/go/parquet/metadata/statistics_types.gen.go
@@ -22,15 +22,15 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	shared_utils "github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	shared_utils "github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/metadata/statistics_types.gen.go.tmpl b/go/parquet/metadata/statistics_types.gen.go.tmpl
index 26fe7f1531999..7dbb9b2d8c4c0 100644
--- a/go/parquet/metadata/statistics_types.gen.go.tmpl
+++ b/go/parquet/metadata/statistics_types.gen.go.tmpl
@@ -19,13 +19,13 @@ package metadata
 import (
   "fmt"
 
-  "github.com/apache/arrow/go/v15/arrow"
-  "github.com/apache/arrow/go/v15/parquet"
-  "github.com/apache/arrow/go/v15/parquet/schema"
-  "github.com/apache/arrow/go/v15/parquet/internal/utils"
-  shared_utils "github.com/apache/arrow/go/v15/internal/utils"
-  "github.com/apache/arrow/go/v15/parquet/internal/encoding"
-  "github.com/apache/arrow/go/v15/internal/bitutils"
+  "github.com/apache/arrow/go/v16/arrow"
+  "github.com/apache/arrow/go/v16/parquet"
+  "github.com/apache/arrow/go/v16/parquet/schema"
+  "github.com/apache/arrow/go/v16/parquet/internal/utils"
+  shared_utils "github.com/apache/arrow/go/v16/internal/utils"
+  "github.com/apache/arrow/go/v16/parquet/internal/encoding"
+  "github.com/apache/arrow/go/v16/internal/bitutils"
 )
 
 {{range .In}}
diff --git a/go/parquet/pqarrow/column_readers.go b/go/parquet/pqarrow/column_readers.go
index a403b2196a80c..440753de24f65 100644
--- a/go/parquet/pqarrow/column_readers.go
+++ b/go/parquet/pqarrow/column_readers.go
@@ -26,16 +26,16 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"golang.org/x/sync/errgroup"
 )
 
diff --git a/go/parquet/pqarrow/encode_arrow.go b/go/parquet/pqarrow/encode_arrow.go
index 0836d135243da..cad9d14e5b319 100644
--- a/go/parquet/pqarrow/encode_arrow.go
+++ b/go/parquet/pqarrow/encode_arrow.go
@@ -25,16 +25,16 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/internal/debug"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/internal/debug"
 )
 
 // get the count of the number of leaf arrays for the type
diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go
index 25d31b54e1b31..fe82a89e88fba 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -25,22 +25,22 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/bitutil"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/decimal256"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/compress"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v15/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v15/parquet/pqarrow"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/bitutil"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/decimal256"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/compress"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v16/parquet/pqarrow"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/google/uuid"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
diff --git a/go/parquet/pqarrow/encode_dict_compute.go b/go/parquet/pqarrow/encode_dict_compute.go
index b43b4002ed0af..f2e257024cfcc 100644
--- a/go/parquet/pqarrow/encode_dict_compute.go
+++ b/go/parquet/pqarrow/encode_dict_compute.go
@@ -21,14 +21,14 @@ package pqarrow
 import (
 	"context"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/internal/debug"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/internal/debug"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
 )
 
 func isDictEncoding(enc parquet.Encoding) bool {
diff --git a/go/parquet/pqarrow/encode_dict_nocompute.go b/go/parquet/pqarrow/encode_dict_nocompute.go
index 73ec5cfc46682..db3d248aa80cf 100644
--- a/go/parquet/pqarrow/encode_dict_nocompute.go
+++ b/go/parquet/pqarrow/encode_dict_nocompute.go
@@ -21,8 +21,8 @@ package pqarrow
 import (
 	"errors"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/parquet/file"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/parquet/file"
 )
 
 func writeDictionaryArrow(*arrowWriteContext, file.ColumnChunkWriter, arrow.Array, []int16, []int16, bool) (err error) {
diff --git a/go/parquet/pqarrow/encode_dictionary_test.go b/go/parquet/pqarrow/encode_dictionary_test.go
index 28ebee53e1b83..2f16fc99b5367 100644
--- a/go/parquet/pqarrow/encode_dictionary_test.go
+++ b/go/parquet/pqarrow/encode_dictionary_test.go
@@ -26,14 +26,14 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/compute"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v15/parquet/pqarrow"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/compute"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v16/parquet/pqarrow"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/parquet/pqarrow/file_reader.go b/go/parquet/pqarrow/file_reader.go
index a55dbe46e07fa..7cd922dcee87c 100755
--- a/go/parquet/pqarrow/file_reader.go
+++ b/go/parquet/pqarrow/file_reader.go
@@ -23,13 +23,13 @@ import (
 	"sync"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/arrio"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/arrio"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"golang.org/x/sync/errgroup"
 	"golang.org/x/xerrors"
 )
diff --git a/go/parquet/pqarrow/file_reader_test.go b/go/parquet/pqarrow/file_reader_test.go
index 0c52eec9e3459..1547004ebf076 100644
--- a/go/parquet/pqarrow/file_reader_test.go
+++ b/go/parquet/pqarrow/file_reader_test.go
@@ -26,14 +26,14 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/pqarrow"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/pqarrow"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/parquet/pqarrow/file_writer.go b/go/parquet/pqarrow/file_writer.go
index 1164cd690c399..b989add03409a 100644
--- a/go/parquet/pqarrow/file_writer.go
+++ b/go/parquet/pqarrow/file_writer.go
@@ -22,12 +22,12 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/internal/utils"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/internal/utils"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/pqarrow/file_writer_test.go b/go/parquet/pqarrow/file_writer_test.go
index 0b76733a62876..8e80df5179d01 100644
--- a/go/parquet/pqarrow/file_writer_test.go
+++ b/go/parquet/pqarrow/file_writer_test.go
@@ -21,11 +21,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/pqarrow"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/pqarrow"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/parquet/pqarrow/helpers.go b/go/parquet/pqarrow/helpers.go
index 39d17cf80ae68..db141c377fb33 100644
--- a/go/parquet/pqarrow/helpers.go
+++ b/go/parquet/pqarrow/helpers.go
@@ -17,7 +17,7 @@
 package pqarrow
 
 import (
-	"github.com/apache/arrow/go/v15/arrow"
+	"github.com/apache/arrow/go/v16/arrow"
 )
 
 func releaseArrays(arrays []arrow.Array) {
diff --git a/go/parquet/pqarrow/path_builder.go b/go/parquet/pqarrow/path_builder.go
index 6b94205f5dcc8..42352de69eed0 100644
--- a/go/parquet/pqarrow/path_builder.go
+++ b/go/parquet/pqarrow/path_builder.go
@@ -21,11 +21,11 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/bitutils"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/bitutils"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/pqarrow/path_builder_test.go b/go/parquet/pqarrow/path_builder_test.go
index e0a60262d3f4a..54fccf6cd22d7 100644
--- a/go/parquet/pqarrow/path_builder_test.go
+++ b/go/parquet/pqarrow/path_builder_test.go
@@ -20,10 +20,10 @@ import (
 	"context"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
 	"github.com/google/uuid"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
diff --git a/go/parquet/pqarrow/properties.go b/go/parquet/pqarrow/properties.go
index cc100fa80d87b..d301e5bff9885 100755
--- a/go/parquet/pqarrow/properties.go
+++ b/go/parquet/pqarrow/properties.go
@@ -19,9 +19,9 @@ package pqarrow
 import (
 	"context"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet/internal/encoding"
 )
 
 // ArrowWriterProperties are used to determine how to manipulate the arrow data
diff --git a/go/parquet/pqarrow/reader_writer_test.go b/go/parquet/pqarrow/reader_writer_test.go
index 9d09bcec15da6..b3bf8ab8b29fe 100644
--- a/go/parquet/pqarrow/reader_writer_test.go
+++ b/go/parquet/pqarrow/reader_writer_test.go
@@ -22,12 +22,12 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/array"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/pqarrow"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/pqarrow"
 	"golang.org/x/exp/rand"
 	"gonum.org/v1/gonum/stat/distuv"
 )
diff --git a/go/parquet/pqarrow/schema.go b/go/parquet/pqarrow/schema.go
index f2aa4cdfe05ad..c3694482f425e 100644
--- a/go/parquet/pqarrow/schema.go
+++ b/go/parquet/pqarrow/schema.go
@@ -22,15 +22,15 @@ import (
 	"math"
 	"strconv"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/decimal128"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/file"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/decimal128"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/file"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/pqarrow/schema_test.go b/go/parquet/pqarrow/schema_test.go
index f320b903033db..5edebb369ff1d 100644
--- a/go/parquet/pqarrow/schema_test.go
+++ b/go/parquet/pqarrow/schema_test.go
@@ -20,15 +20,15 @@ import (
 	"encoding/base64"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	"github.com/apache/arrow/go/v15/arrow/flight"
-	"github.com/apache/arrow/go/v15/arrow/ipc"
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/types"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/metadata"
-	"github.com/apache/arrow/go/v15/parquet/pqarrow"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/ipc"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/types"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/pqarrow"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/parquet/reader_properties.go b/go/parquet/reader_properties.go
index be2377527d782..d7d75d3a23c16 100644
--- a/go/parquet/reader_properties.go
+++ b/go/parquet/reader_properties.go
@@ -21,8 +21,8 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/internal/utils"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/internal/utils"
 )
 
 // ReaderProperties are used to define how the file reader will handle buffering and allocating buffers
diff --git a/go/parquet/reader_writer_properties_test.go b/go/parquet/reader_writer_properties_test.go
index 698129471adda..538896c86bc59 100644
--- a/go/parquet/reader_writer_properties_test.go
+++ b/go/parquet/reader_writer_properties_test.go
@@ -20,9 +20,9 @@ import (
 	"bytes"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/compress"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/compress"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/schema/column.go b/go/parquet/schema/column.go
index 02d0d83e8465e..d6747795d1c76 100644
--- a/go/parquet/schema/column.go
+++ b/go/parquet/schema/column.go
@@ -20,8 +20,8 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
 )
 
 // Column encapsulates the information necessary to interpret primitive
diff --git a/go/parquet/schema/converted_types.go b/go/parquet/schema/converted_types.go
index 58051dba7f19b..9417881a59f14 100644
--- a/go/parquet/schema/converted_types.go
+++ b/go/parquet/schema/converted_types.go
@@ -17,7 +17,7 @@
 package schema
 
 import (
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
 )
 
 // ConvertedType corresponds to the ConvertedType in the parquet.Thrift,
diff --git a/go/parquet/schema/converted_types_test.go b/go/parquet/schema/converted_types_test.go
index 8b8e061466474..3148a6e4b1f78 100644
--- a/go/parquet/schema/converted_types_test.go
+++ b/go/parquet/schema/converted_types_test.go
@@ -19,7 +19,7 @@ package schema_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/schema/helpers.go b/go/parquet/schema/helpers.go
index 13075a065f073..a4ce5e4ea8bad 100644
--- a/go/parquet/schema/helpers.go
+++ b/go/parquet/schema/helpers.go
@@ -17,7 +17,7 @@
 package schema
 
 import (
-	"github.com/apache/arrow/go/v15/parquet"
+	"github.com/apache/arrow/go/v16/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/schema/helpers_test.go b/go/parquet/schema/helpers_test.go
index 98f3cab36d433..5f7c0f7c0e5d0 100644
--- a/go/parquet/schema/helpers_test.go
+++ b/go/parquet/schema/helpers_test.go
@@ -21,8 +21,8 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/schema/logical_types.go b/go/parquet/schema/logical_types.go
index 69e69363887cd..90d0bdfcd1e6a 100644
--- a/go/parquet/schema/logical_types.go
+++ b/go/parquet/schema/logical_types.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v15/internal/json"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/internal/debug"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/internal/json"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/internal/debug"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
 )
 
 // DecimalMetadata is a struct for managing scale and precision information between
diff --git a/go/parquet/schema/logical_types_test.go b/go/parquet/schema/logical_types_test.go
index 0fd91daf8d668..18247118b752e 100644
--- a/go/parquet/schema/logical_types_test.go
+++ b/go/parquet/schema/logical_types_test.go
@@ -19,9 +19,9 @@ package schema_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/internal/json"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/internal/json"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/schema/node.go b/go/parquet/schema/node.go
index c1b325eb90183..611f52c9e1215 100644
--- a/go/parquet/schema/node.go
+++ b/go/parquet/schema/node.go
@@ -19,8 +19,8 @@ package schema
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
 	"github.com/apache/thrift/lib/go/thrift"
 	"golang.org/x/xerrors"
 )
diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go
index 7f8b337795592..4eee3c86c35f0 100644
--- a/go/parquet/schema/reflection.go
+++ b/go/parquet/schema/reflection.go
@@ -22,9 +22,9 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/parquet"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/parquet"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/schema/reflection_test.go b/go/parquet/schema/reflection_test.go
index e3a880cacc1e8..9fa289388ec65 100644
--- a/go/parquet/schema/reflection_test.go
+++ b/go/parquet/schema/reflection_test.go
@@ -22,9 +22,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/arrow/float16"
-	"github.com/apache/arrow/go/v15/parquet"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/arrow/float16"
+	"github.com/apache/arrow/go/v16/parquet"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/schema/schema.go b/go/parquet/schema/schema.go
index ace2775763a06..e4864e9330622 100644
--- a/go/parquet/schema/schema.go
+++ b/go/parquet/schema/schema.go
@@ -35,8 +35,8 @@ import (
 	"io"
 	"strings"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/schema/schema_element_test.go b/go/parquet/schema/schema_element_test.go
index d190ffe5a253a..af281146980af 100644
--- a/go/parquet/schema/schema_element_test.go
+++ b/go/parquet/schema/schema_element_test.go
@@ -19,8 +19,8 @@ package schema
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
diff --git a/go/parquet/schema/schema_flatten_test.go b/go/parquet/schema/schema_flatten_test.go
index 34f4ac8d3c450..6b7de6b9b718c 100644
--- a/go/parquet/schema/schema_flatten_test.go
+++ b/go/parquet/schema/schema_flatten_test.go
@@ -19,8 +19,8 @@ package schema
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
diff --git a/go/parquet/schema/schema_test.go b/go/parquet/schema/schema_test.go
index 232a386a25470..3f0bdab79aa9e 100644
--- a/go/parquet/schema/schema_test.go
+++ b/go/parquet/schema/schema_test.go
@@ -20,9 +20,9 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v15/parquet"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v15/parquet/schema"
+	"github.com/apache/arrow/go/v16/parquet"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/apache/thrift/lib/go/thrift"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
diff --git a/go/parquet/types.go b/go/parquet/types.go
index 0020a079d94a9..8742c3ba8bfba 100644
--- a/go/parquet/types.go
+++ b/go/parquet/types.go
@@ -24,8 +24,8 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v15/arrow"
-	format "github.com/apache/arrow/go/v15/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v16/arrow"
+	format "github.com/apache/arrow/go/v16/parquet/internal/gen-go/parquet"
 )
 
 const (
diff --git a/go/parquet/writer_properties.go b/go/parquet/writer_properties.go
index 9e33bddf7faa7..7fd3ed9f4be8d 100644
--- a/go/parquet/writer_properties.go
+++ b/go/parquet/writer_properties.go
@@ -17,8 +17,8 @@
 package parquet
 
 import (
-	"github.com/apache/arrow/go/v15/arrow/memory"
-	"github.com/apache/arrow/go/v15/parquet/compress"
+	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/apache/arrow/go/v16/parquet/compress"
 )
 
 // Constants for default property values used for the default reader, writer and column props.
@@ -46,7 +46,7 @@ const (
 	DefaultStatsEnabled = true
 	// If the stats are larger than 4K the writer will skip writing them out anyways.
 	DefaultMaxStatsSize int64 = 4096
-	DefaultCreatedBy          = "parquet-go version 15.0.0-SNAPSHOT"
+	DefaultCreatedBy          = "parquet-go version 16.0.0-SNAPSHOT"
 	DefaultRootName           = "schema"
 )
 
diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml
index 90864eab006a2..6644748b5e597 100644
--- a/java/adapter/avro/pom.xml
+++ b/java/adapter/avro/pom.xml
@@ -16,7 +16,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>15.0.0-SNAPSHOT</version>
+    <version>16.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml
index e964aa1871a0e..dfcd4cfe8f61b 100644
--- a/java/adapter/jdbc/pom.xml
+++ b/java/adapter/jdbc/pom.xml
@@ -16,7 +16,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>15.0.0-SNAPSHOT</version>
+        <version>16.0.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 605b9871639ea..265a9a71b80e2 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -115,7 +115,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>15.0.0-SNAPSHOT</version>
+        <version>16.0.0-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml
index 99740f2002847..25669010d2d42 100644
--- a/java/algorithm/pom.xml
+++ b/java/algorithm/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>15.0.0-SNAPSHOT</version>
+    <version>16.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-algorithm</artifactId>
   <name>Arrow Algorithms</name>
diff --git a/java/bom/pom.xml b/java/bom/pom.xml
index 7ffb833e7f6d7..025632c45a56d 100644
--- a/java/bom/pom.xml
+++ b/java/bom/pom.xml
@@ -20,7 +20,7 @@
 
   <groupId>org.apache.arrow</groupId>
   <artifactId>arrow-bom</artifactId>
-  <version>15.0.0-SNAPSHOT</version>
+  <version>16.0.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Arrow Bill of Materials</name>
   <description>Arrow Bill of Materials</description>
diff --git a/java/c/pom.xml b/java/c/pom.xml
index a999292979d56..ffd41b62dd674 100644
--- a/java/c/pom.xml
+++ b/java/c/pom.xml
@@ -13,7 +13,7 @@
     <parent>
         <artifactId>arrow-java-root</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>15.0.0-SNAPSHOT</version>
+        <version>16.0.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index e8008c9754374..dea8c778735a8 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>15.0.0-SNAPSHOT</version>
+    <version>16.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-compression</artifactId>
   <name>Arrow Compression</name>
diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index a18f443b7e15a..8723fafa8dadd 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -15,7 +15,7 @@
     <parent>
         <artifactId>arrow-java-root</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>15.0.0-SNAPSHOT</version>
+        <version>16.0.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index ec3034d14e271..b7624d7748e7f 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <artifactId>arrow-flight</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>15.0.0-SNAPSHOT</version>
+    <version>16.0.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml
index bb4f6a6b18733..944c624d630a2 100644
--- a/java/flight/flight-integration-tests/pom.xml
+++ b/java/flight/flight-integration-tests/pom.xml
@@ -15,7 +15,7 @@
     <parent>
         <artifactId>arrow-flight</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>15.0.0-SNAPSHOT</version>
+        <version>16.0.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 
diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml
index 1f20912b9974f..ce1f52e39676e 100644
--- a/java/flight/flight-sql-jdbc-core/pom.xml
+++ b/java/flight/flight-sql-jdbc-core/pom.xml
@@ -16,7 +16,7 @@
     <parent>
         <artifactId>arrow-flight</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>15.0.0-SNAPSHOT</version>
+        <version>16.0.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
     <modelVersion>4.0.0</modelVersion>
diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml
index 653ee5c192756..9ab9e1820cd27 100644
--- a/java/flight/flight-sql-jdbc-driver/pom.xml
+++ b/java/flight/flight-sql-jdbc-driver/pom.xml
@@ -16,7 +16,7 @@
     <parent>
         <artifactId>arrow-flight</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>15.0.0-SNAPSHOT</version>
+        <version>16.0.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
     <modelVersion>4.0.0</modelVersion>
diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml
index 5ae1a0a23f3c8..a0598f70b9545 100644
--- a/java/flight/flight-sql/pom.xml
+++ b/java/flight/flight-sql/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <artifactId>arrow-flight</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>15.0.0-SNAPSHOT</version>
+    <version>16.0.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/java/flight/pom.xml b/java/flight/pom.xml
index 9ef01d07a7388..3ce1b1149ef20 100644
--- a/java/flight/pom.xml
+++ b/java/flight/pom.xml
@@ -15,7 +15,7 @@
     <parent>
         <artifactId>arrow-java-root</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>15.0.0-SNAPSHOT</version>
+        <version>16.0.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/format/pom.xml b/java/format/pom.xml
index 3f581311e20ea..a98edefbeb217 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -15,7 +15,7 @@
 <parent>
   <artifactId>arrow-java-root</artifactId>
   <groupId>org.apache.arrow</groupId>
-  <version>15.0.0-SNAPSHOT</version>
+  <version>16.0.0-SNAPSHOT</version>
 </parent>
 
 <artifactId>arrow-format</artifactId>
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index 330c156a0346b..d0290b6814ed5 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -14,7 +14,7 @@
     <parent>
       <groupId>org.apache.arrow</groupId>
       <artifactId>arrow-java-root</artifactId>
-      <version>15.0.0-SNAPSHOT</version>
+      <version>16.0.0-SNAPSHOT</version>
     </parent>
 
     <groupId>org.apache.arrow.gandiva</groupId>
diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml
index 70d1993b33c6e..e6bf8a63cb74a 100644
--- a/java/maven/module-info-compiler-maven-plugin/pom.xml
+++ b/java/maven/module-info-compiler-maven-plugin/pom.xml
@@ -16,7 +16,7 @@
   <parent>
     <groupId>org.apache.arrow.maven.plugins</groupId>
     <artifactId>arrow-maven-plugins</artifactId>
-    <version>15.0.0-SNAPSHOT</version>
+    <version>16.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>module-info-compiler-maven-plugin</artifactId>
   <packaging>maven-plugin</packaging>
diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 6e8a4cb0102f6..3a88ec762e19c 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -17,7 +17,7 @@
   -->
   <groupId>org.apache.arrow.maven.plugins</groupId>
   <artifactId>arrow-maven-plugins</artifactId>
-  <version>15.0.0-SNAPSHOT</version>
+  <version>16.0.0-SNAPSHOT</version>
   <name>Arrow Maven Plugins</name>
   <packaging>pom</packaging>
 
diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml
index 6e411c0cd5440..2a92d032942c9 100644
--- a/java/memory/memory-core/pom.xml
+++ b/java/memory/memory-core/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>15.0.0-SNAPSHOT</version>
+    <version>16.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/memory-netty-buffer-patch/pom.xml b/java/memory/memory-netty-buffer-patch/pom.xml
index 1d4407c638d8a..97b224e9ccc5c 100644
--- a/java/memory/memory-netty-buffer-patch/pom.xml
+++ b/java/memory/memory-netty-buffer-patch/pom.xml
@@ -15,7 +15,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>15.0.0-SNAPSHOT</version>
+    <version>16.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml
index 159ab5160c983..9b20e1bde2ae7 100644
--- a/java/memory/memory-netty/pom.xml
+++ b/java/memory/memory-netty/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>15.0.0-SNAPSHOT</version>
+    <version>16.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml
index 5ef4e8a9149a5..07a140e594522 100644
--- a/java/memory/memory-unsafe/pom.xml
+++ b/java/memory/memory-unsafe/pom.xml
@@ -13,7 +13,7 @@
   <parent>
     <artifactId>arrow-memory</artifactId>
     <groupId>org.apache.arrow</groupId>
-    <version>15.0.0-SNAPSHOT</version>
+    <version>16.0.0-SNAPSHOT</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
 
diff --git a/java/memory/pom.xml b/java/memory/pom.xml
index 55fbb90353f34..9e2d612765738 100644
--- a/java/memory/pom.xml
+++ b/java/memory/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>15.0.0-SNAPSHOT</version>
+    <version>16.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-memory</artifactId>
   <name>Arrow Memory</name>
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index eff3240890beb..a302a216c53d3 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <artifactId>arrow-java-root</artifactId>
         <groupId>org.apache.arrow</groupId>
-        <version>15.0.0-SNAPSHOT</version>
+        <version>16.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-performance</artifactId>
     <packaging>jar</packaging>
diff --git a/java/pom.xml b/java/pom.xml
index 6fc4df67af3e7..3951f1c1bc8ed 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -20,7 +20,7 @@
 
   <groupId>org.apache.arrow</groupId>
   <artifactId>arrow-java-root</artifactId>
-  <version>15.0.0-SNAPSHOT</version>
+  <version>16.0.0-SNAPSHOT</version>
   <packaging>pom</packaging>
 
   <name>Apache Arrow Java Root POM</name>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 0d7eacfe2ddce..0688fae1ab78c 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>org.apache.arrow</groupId>
         <artifactId>arrow-java-root</artifactId>
-        <version>15.0.0-SNAPSHOT</version>
+        <version>16.0.0-SNAPSHOT</version>
     </parent>
     <artifactId>arrow-tools</artifactId>
     <name>Arrow Tools</name>
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index da26fc2982765..dc453963b62f6 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>15.0.0-SNAPSHOT</version>
+    <version>16.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-vector</artifactId>
   <name>Arrow Vectors</name>
diff --git a/js/package.json b/js/package.json
index d1346eb37c9ed..57f9267afa3a8 100644
--- a/js/package.json
+++ b/js/package.json
@@ -121,5 +121,5 @@
   "engines": {
     "node": ">=12.0"
   },
-  "version": "15.0.0-SNAPSHOT"
+  "version": "16.0.0-SNAPSHOT"
 }
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index 47d2acd613f8b..206ecb318b3cc 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -94,7 +94,7 @@ endfunction()
 
 set(CMAKE_CXX_STANDARD 17)
 
-set(MLARROW_VERSION "15.0.0-SNAPSHOT")
+set(MLARROW_VERSION "16.0.0-SNAPSHOT")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}")
 
 project(mlarrow VERSION "${MLARROW_BASE_VERSION}")
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 2df1e67b9f4c7..54a5b99e058a5 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -21,7 +21,7 @@
 cmake_minimum_required(VERSION 3.16)
 project(pyarrow)
 
-set(PYARROW_VERSION "15.0.0-SNAPSHOT")
+set(PYARROW_VERSION "16.0.0-SNAPSHOT")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}")
 
 # Running from a Python sdist tarball
diff --git a/python/setup.py b/python/setup.py
index 51eb40af088e5..d7a2da2077cdd 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -407,7 +407,7 @@ def get_outputs(self):
 
 # If the event of not running from a git clone (e.g. from a git archive
 # or a Python sdist), see if we can set the version number ourselves
-default_version = '15.0.0-SNAPSHOT'
+default_version = '16.0.0-SNAPSHOT'
 if (not os.path.exists('../.git') and
         not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')):
     os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'] = \
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 4acd21269cc49..21cc4dec902d2 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: arrow
 Title: Integration to 'Apache' 'Arrow'
-Version: 14.0.2.9000
+Version: 15.0.0.9000
 Authors@R: c(
     person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")),
     person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
diff --git a/r/NEWS.md b/r/NEWS.md
index 22eb5b34ceb0f..58c82c5128b82 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,7 +17,9 @@
   under the License.
 -->
 
-# arrow 14.0.2.9000
+# arrow 15.0.0.9000
+
+# arrow 15.0.0
 
 ##  New features
 
diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json
index 35a1ef3b5ecb3..0b7f9884f9b6f 100644
--- a/r/pkgdown/assets/versions.json
+++ b/r/pkgdown/assets/versions.json
@@ -1,12 +1,16 @@
 [
     {
-        "name": "14.0.2.9000 (dev)",
+        "name": "15.0.0.9000 (dev)",
         "version": "dev/"
     },
     {
-        "name": "14.0.2 (release)",
+        "name": "15.0.0 (release)",
         "version": ""
     },
+    {
+        "name": "14.0.2",
+        "version": "14.0/"
+    },
     {
         "name": "13.0.0.1",
         "version": "13.0/"
diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
index 8551b647cb86f..816751fcba8ff 100644
--- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
+++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowCUDA
-  VERSION = "15.0.0-SNAPSHOT"
+  VERSION = "16.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
index acfdd675687be..e391493e15974 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowDataset
-  VERSION = "15.0.0-SNAPSHOT"
+  VERSION = "16.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb
index 3354678e30032..d90751be80cb0 100644
--- a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb
+++ b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowFlightSQL
-  VERSION = "15.0.0-SNAPSHOT"
+  VERSION = "16.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb
index f2141a68432e5..6c2d676809f8f 100644
--- a/ruby/red-arrow-flight/lib/arrow-flight/version.rb
+++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowFlight
-  VERSION = "15.0.0-SNAPSHOT"
+  VERSION = "16.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb
index 235a7df75d672..2b1c14e389116 100644
--- a/ruby/red-arrow/lib/arrow/version.rb
+++ b/ruby/red-arrow/lib/arrow/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Arrow
-  VERSION = "15.0.0-SNAPSHOT"
+  VERSION = "16.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb
index 6a1835f0e50e8..0a20a520194b0 100644
--- a/ruby/red-gandiva/lib/gandiva/version.rb
+++ b/ruby/red-gandiva/lib/gandiva/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Gandiva
-  VERSION = "15.0.0-SNAPSHOT"
+  VERSION = "16.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb
index c5a945c4e4297..cd61c772a3285 100644
--- a/ruby/red-parquet/lib/parquet/version.rb
+++ b/ruby/red-parquet/lib/parquet/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Parquet
-  VERSION = "15.0.0-SNAPSHOT"
+  VERSION = "16.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")

From 48d47525b838c2455f198a0dba984a79f8ca8d1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Sun, 21 Jan 2024 16:06:39 +0100
Subject: [PATCH 241/570] MINOR: [Release] Update .deb package names for 16.0.0

---
 .../apache-arrow/debian/control.in            | 104 +++++++++---------
 ...500.install => libarrow-acero1600.install} |   0
 ...install => libarrow-cuda-glib1600.install} |   0
 ...1500.install => libarrow-cuda1600.install} |   0
 ...tall => libarrow-dataset-glib1600.install} |   0
 ...0.install => libarrow-dataset1600.install} |   0
 ...stall => libarrow-flight-glib1600.install} |   0
 ...l => libarrow-flight-sql-glib1600.install} |   0
 ...nstall => libarrow-flight-sql1600.install} |   0
 ...00.install => libarrow-flight1600.install} |   0
 ...1500.install => libarrow-glib1600.install} |   0
 ...arrow1500.install => libarrow1600.install} |   0
 ...00.install => libgandiva-glib1600.install} |   0
 ...iva1500.install => libgandiva1600.install} |   0
 ...00.install => libparquet-glib1600.install} |   0
 ...uet1500.install => libparquet1600.install} |   0
 dev/tasks/tasks.yml                           |  60 +++++-----
 17 files changed, 82 insertions(+), 82 deletions(-)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-acero1500.install => libarrow-acero1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-cuda-glib1500.install => libarrow-cuda-glib1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-cuda1500.install => libarrow-cuda1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-dataset-glib1500.install => libarrow-dataset-glib1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-dataset1500.install => libarrow-dataset1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight-glib1500.install => libarrow-flight-glib1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight-sql-glib1500.install => libarrow-flight-sql-glib1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight-sql1500.install => libarrow-flight-sql1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-flight1500.install => libarrow-flight1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow-glib1500.install => libarrow-glib1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libarrow1500.install => libarrow1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libgandiva-glib1500.install => libgandiva-glib1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libgandiva1500.install => libgandiva1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libparquet-glib1500.install => libparquet-glib1600.install} (100%)
 rename dev/tasks/linux-packages/apache-arrow/debian/{libparquet1500.install => libparquet1600.install} (100%)

diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index 6ea7e56e88365..5b8de89dcd67e 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -41,7 +41,7 @@ Build-Depends-Indep: libglib2.0-doc
 Standards-Version: 3.9.8
 Homepage: https://arrow.apache.org/
 
-Package: libarrow1500
+Package: libarrow1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -61,12 +61,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1500 (= ${binary:Version})
+  libarrow1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides tools.
 
-Package: libarrow-cuda1500
+Package: libarrow-cuda1600
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -74,12 +74,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1500 (= ${binary:Version})
+  libarrow1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for CUDA support.
 
-Package: libarrow-acero1500
+Package: libarrow-acero1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -87,12 +87,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1500 (= ${binary:Version})
+  libarrow1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Acero module.
 
-Package: libarrow-dataset1500
+Package: libarrow-dataset1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -100,13 +100,13 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-acero1500 (= ${binary:Version}),
-  libparquet1500 (= ${binary:Version})
+  libarrow-acero1600 (= ${binary:Version}),
+  libparquet1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Dataset module.
 
-Package: libarrow-flight1500
+Package: libarrow-flight1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -114,12 +114,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1500 (= ${binary:Version})
+  libarrow1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight RPC system.
 
-Package: libarrow-flight-sql1500
+Package: libarrow-flight-sql1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -127,7 +127,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-flight1500 (= ${binary:Version})
+  libarrow-flight1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight SQL system.
@@ -138,7 +138,7 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow1500 (= ${binary:Version}),
+  libarrow1600 (= ${binary:Version}),
   libbrotli-dev,
   libbz2-dev,
   libcurl4-openssl-dev,
@@ -163,7 +163,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-cuda1500 (= ${binary:Version})
+  libarrow-cuda1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for CUDA support.
@@ -174,7 +174,7 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow-acero1500 (= ${binary:Version}),
+  libarrow-acero1600 (= ${binary:Version}),
   libparquet-dev (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -187,7 +187,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-acero-dev (= ${binary:Version}),
-  libarrow-dataset1500 (= ${binary:Version}),
+  libarrow-dataset1600 (= ${binary:Version}),
   libparquet-dev (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -200,7 +200,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-flight1500 (= ${binary:Version}),
+  libarrow-flight1600 (= ${binary:Version}),
   libc-ares-dev,
 @USE_SYSTEM_GRPC@  libgrpc++-dev,
 @USE_SYSTEM_PROTOBUF@  libprotobuf-dev,
@@ -216,12 +216,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-flight-dev (= ${binary:Version}),
-  libarrow-flight-sql1500 (= ${binary:Version})
+  libarrow-flight-sql1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Flight SQL system.
 
-Package: libgandiva1500
+Package: libgandiva1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -229,7 +229,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1500 (= ${binary:Version})
+  libarrow1600 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -242,13 +242,13 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libgandiva1500 (= ${binary:Version})
+  libgandiva1600 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
  This package provides C++ header files.
 
-Package: libparquet1500
+Package: libparquet1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -268,7 +268,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libparquet1500 (= ${binary:Version})
+  libparquet1600 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides tools.
@@ -280,13 +280,13 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libparquet1500 (= ${binary:Version}),
+  libparquet1600 (= ${binary:Version}),
   libthrift-dev
 Description: Apache Parquet is a columnar storage format
  .
  This package provides C++ header files.
 
-Package: libarrow-glib1500
+Package: libarrow-glib1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -294,7 +294,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1500 (= ${binary:Version})
+  libarrow1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files.
@@ -318,7 +318,7 @@ Depends:
   ${misc:Depends},
   libglib2.0-dev,
   libarrow-acero-dev (= ${binary:Version}),
-  libarrow-glib1500 (= ${binary:Version}),
+  libarrow-glib1600 (= ${binary:Version}),
   gir1.2-arrow-1.0 (= ${binary:Version})
 Suggests: libarrow-glib-doc
 Description: Apache Arrow is a data processing library for analysis
@@ -336,7 +336,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations.
 
-Package: libarrow-cuda-glib1500
+Package: libarrow-cuda-glib1600
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -344,8 +344,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1500 (= ${binary:Version}),
-  libarrow-cuda1500 (= ${binary:Version})
+  libarrow-glib1600 (= ${binary:Version}),
+  libarrow-cuda1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for CUDA support.
@@ -370,13 +370,13 @@ Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-cuda-glib1500 (= ${binary:Version}),
+  libarrow-cuda-glib1600 (= ${binary:Version}),
   gir1.2-arrow-cuda-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based header files for CUDA support.
 
-Package: libarrow-dataset-glib1500
+Package: libarrow-dataset-glib1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -384,8 +384,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1500 (= ${binary:Version}),
-  libarrow-dataset1500 (= ${binary:Version})
+  libarrow-glib1600 (= ${binary:Version}),
+  libarrow-dataset1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for dataset module.
@@ -410,7 +410,7 @@ Depends:
   ${misc:Depends},
   libarrow-dataset-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-dataset-glib1500 (= ${binary:Version}),
+  libarrow-dataset-glib1600 (= ${binary:Version}),
   gir1.2-arrow-dataset-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -427,7 +427,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for dataset module.
 
-Package: libarrow-flight-glib1500
+Package: libarrow-flight-glib1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -435,8 +435,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1500 (= ${binary:Version}),
-  libarrow-flight1500 (= ${binary:Version})
+  libarrow-glib1600 (= ${binary:Version}),
+  libarrow-flight1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for Apache Arrow Flight.
@@ -462,7 +462,7 @@ Depends:
   ${misc:Depends},
   libarrow-flight-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-flight-glib1500 (= ${binary:Version}),
+  libarrow-flight-glib1600 (= ${binary:Version}),
   gir1.2-arrow-flight-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -479,7 +479,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for Apache Arrow Flight.
 
-Package: libarrow-flight-sql-glib1500
+Package: libarrow-flight-sql-glib1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -487,8 +487,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-flight-glib1500 (= ${binary:Version}),
-  libarrow-flight-sql1500 (= ${binary:Version})
+  libarrow-flight-glib1600 (= ${binary:Version}),
+  libarrow-flight-sql1600 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for Apache Arrow Flight SQL.
@@ -514,7 +514,7 @@ Depends:
   ${misc:Depends},
   libarrow-flight-sql-dev (= ${binary:Version}),
   libarrow-flight-glib-dev (= ${binary:Version}),
-  libarrow-flight-sql-glib1500 (= ${binary:Version}),
+  libarrow-flight-sql-glib1600 (= ${binary:Version}),
   gir1.2-arrow-flight-sql-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -531,7 +531,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for Apache Arrow Flight SQL.
 
-Package: libgandiva-glib1500
+Package: libgandiva-glib1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -539,8 +539,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1500 (= ${binary:Version}),
-  libgandiva1500 (= ${binary:Version})
+  libarrow-glib1600 (= ${binary:Version}),
+  libgandiva1600 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -567,7 +567,7 @@ Depends:
   ${misc:Depends},
   libgandiva-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libgandiva-glib1500 (= ${binary:Version}),
+  libgandiva-glib1600 (= ${binary:Version}),
   gir1.2-gandiva-1.0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
@@ -586,7 +586,7 @@ Description: Gandiva is a toolset for compiling and evaluating expressions
  .
  This package provides documentations.
 
-Package: libparquet-glib1500
+Package: libparquet-glib1600
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -594,8 +594,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1500 (= ${binary:Version}),
-  libparquet1500 (= ${binary:Version})
+  libarrow-glib1600 (= ${binary:Version}),
+  libparquet1600 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides GLib based library files.
@@ -620,7 +620,7 @@ Depends:
   ${misc:Depends},
   libarrow-glib-dev (= ${binary:Version}),
   libparquet-dev (= ${binary:Version}),
-  libparquet-glib1500 (= ${binary:Version}),
+  libparquet-glib1600 (= ${binary:Version}),
   gir1.2-parquet-1.0 (= ${binary:Version})
 Suggests: libparquet-glib-doc
 Description: Apache Parquet is a columnar storage format
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1600.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet1500.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet1600.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet1500.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet1600.install
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 64620d41993d5..6c59364d51a50 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -502,59 +502,59 @@ tasks:
       - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-acero-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-acero1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-acero1500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-acero1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-acero1600_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib1500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset1500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset-glib1600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset1600_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-glib1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight-glib1500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-glib1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight-glib1600_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-sql-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-sql-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-sql-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-sql-glib1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight-sql-glib1500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-sql1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight-sql1500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight1500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-sql-glib1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight-sql-glib1600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-sql1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight-sql1600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight1600_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib1500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow1500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-glib1600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow1600_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib1500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva1500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva-glib1600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva1600_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib1500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet1500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet-glib1600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet1600_{no_rc_version}-1_[a-z0-9]+.deb
       - parquet-tools_{no_rc_version}-1_[a-z0-9]+.deb
     {% if architecture == "amd64" %}
       - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda-glib1500_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda1500-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda1500_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda-glib1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-cuda-glib1600_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda1600-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-cuda1600_{no_rc_version}-1_[a-z0-9]+.deb
     {% endif %}
   {% endfor %}
 {% endfor %}

From bb7f584bf7a0b67e90337e1abc5f0ae74d88b1fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Sun, 21 Jan 2024 16:06:40 +0100
Subject: [PATCH 242/570] MINOR: [Release] Update .deb/.rpm changelogs for
 15.0.0

---
 .../linux-packages/apache-arrow-apt-source/debian/changelog | 6 ++++++
 .../apache-arrow-release/yum/apache-arrow-release.spec.in   | 3 +++
 dev/tasks/linux-packages/apache-arrow/debian/changelog      | 6 ++++++
 dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in     | 3 +++
 4 files changed, 18 insertions(+)

diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
index 32a5a38afebf3..bc83f0ed7c4b0 100644
--- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow-apt-source (15.0.0-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Raúl Cumplido <raulcumplido@gmail.com>  Tue, 16 Jan 2024 14:38:51 -0000
+
 apache-arrow-apt-source (14.0.2-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
index 348f8064ecc5f..9b6c963593fc3 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
@@ -102,6 +102,9 @@ else
 fi
 
 %changelog
+* Tue Jan 16 2024 Raúl Cumplido <raulcumplido@gmail.com> - 15.0.0-1
+- New upstream release.
+
 * Tue Dec 12 2023 Raúl Cumplido <raulcumplido@gmail.com> - 14.0.2-1
 - New upstream release.
 
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog
index b14bb0985893e..edff045a48111 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow (15.0.0-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Raúl Cumplido <raulcumplido@gmail.com>  Tue, 16 Jan 2024 14:38:51 -0000
+
 apache-arrow (14.0.2-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index 44421ce0ea1e4..79b4eadd92265 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -864,6 +864,9 @@ Documentation for Apache Parquet GLib.
 %{_datadir}/gtk-doc/html/parquet-glib/
 
 %changelog
+* Tue Jan 16 2024 Raúl Cumplido <raulcumplido@gmail.com> - 15.0.0-1
+- New upstream release.
+
 * Tue Dec 12 2023 Raúl Cumplido <raulcumplido@gmail.com> - 14.0.2-1
 - New upstream release.
 

From 26f515a667884f1b9f1da102adbe96e75b52e162 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sun, 21 Jan 2024 09:22:54 -0800
Subject: [PATCH 243/570] MINOR: [C#] Update copyright year on built assembly
 (#39726)

### Rationale for this change

The copyright is currently shown as "2016-2019". A lot has changed since 2019, both in the world in general and in Arrow.

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/Directory.Build.props | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props
index 7cc4b35f2a614..c759c49b395d8 100644
--- a/csharp/Directory.Build.props
+++ b/csharp/Directory.Build.props
@@ -27,7 +27,7 @@
   <!-- AssemblyInfo properties -->
   <PropertyGroup>
     <Product>Apache Arrow library</Product>
-    <Copyright>Copyright 2016-2019 The Apache Software Foundation</Copyright>
+    <Copyright>Copyright 2016-2024 The Apache Software Foundation</Copyright>
     <Company>The Apache Software Foundation</Company>
     <Version>16.0.0-SNAPSHOT</Version>
   </PropertyGroup>

From c33ffb033a49c17619f2547d873569980ee91b9c Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 22 Jan 2024 11:19:09 +0900
Subject: [PATCH 244/570] GH-39702: [GLib] Add support for time zone in
 GArrowTimestampDataType (#39717)

### Rationale for this change

Timestamp data type in Apache Arrow supports time zone but Apache Arrow C GLib didn't support it. Timestamp data type has "timezone-aware" mode and "timezone-naive" mode. If a timestamp data type has a valid time zone information, it uses "timezone-aware" mode. If a timestamp data type doesn't have a valid time zone information, it uses "timezone-naive" mode. Apache Arrow C GLib should support both of them.

### What changes are included in this PR?

This change adds a new `GTimeZone *time_zone` argument to `garrow_timestamp_data_type_new()` instead of adding a new `garrow_timestamp_data_type_new_time_zone()` function. This breaks backward compatibility for Apache Arrow C GLib users. But this still keeps backward compatibility for users of bindings such as Ruby and Vala. Because the new `GTimeZone *time_zone` is nullable.

I tried to use the "adding a new
`garrow_timestamp_data_type_new_time_zone()` function" approach but Vala didn't like it. Both of
`garrow_timestamp_data_type_new_time_zone()` (constructor) and `garrow_timestamp_data_type_get_time_zone()` (instance method or property reader) were mapped to
`GArrow.TimestampDataType.time_zone()`.

So I chose the "adding a new argument to
`garrow_timestamp_data_type_new()`" approach.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.

**This PR includes breaking changes to public APIs.**

* Closes: #39702

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/basic-data-type.cpp     | 109 ++++++++++++++++++++--
 c_glib/arrow-glib/basic-data-type.h       |   6 +-
 c_glib/arrow-glib/version.h.in            |  23 +++++
 c_glib/doc/arrow-glib/arrow-glib-docs.xml |   4 +
 c_glib/test/test-timestamp-data-type.rb   |  17 ++++
 5 files changed, 147 insertions(+), 12 deletions(-)

diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp
index 0697646e5806d..0de9466eee456 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -125,9 +125,9 @@ G_BEGIN_DECLS
  * data types.
  */
 
-typedef struct GArrowDataTypePrivate_ {
+struct GArrowDataTypePrivate {
   std::shared_ptr<arrow::DataType> data_type;
-} GArrowDataTypePrivate;
+};
 
 enum {
   PROP_DATA_TYPE = 1
@@ -1113,9 +1113,71 @@ garrow_date64_data_type_new(void)
 }
 
 
-G_DEFINE_TYPE(GArrowTimestampDataType,
-              garrow_timestamp_data_type,
-              GARROW_TYPE_TEMPORAL_DATA_TYPE)
+struct GArrowTimestampDataTypePrivate {
+  GTimeZone *time_zone;
+};
+
+enum {
+  PROP_TIME_ZONE = 1
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowTimestampDataType,
+                           garrow_timestamp_data_type,
+                           GARROW_TYPE_TEMPORAL_DATA_TYPE)
+
+#define GARROW_TIMESTAMP_DATA_TYPE_GET_PRIVATE(object)  \
+  static_cast<GArrowTimestampDataTypePrivate *>(        \
+    garrow_timestamp_data_type_get_instance_private(    \
+      GARROW_TIMESTAMP_DATA_TYPE(object)))
+
+static void
+garrow_timestamp_data_type_dispose(GObject *object)
+{
+  auto priv = GARROW_TIMESTAMP_DATA_TYPE_GET_PRIVATE(object);
+
+  if (priv->time_zone) {
+    g_time_zone_unref(priv->time_zone);
+    priv->time_zone = nullptr;
+  }
+
+  G_OBJECT_CLASS(garrow_timestamp_data_type_parent_class)->dispose(object);
+}
+
+static void
+garrow_timestamp_data_type_set_property(GObject *object,
+                                        guint prop_id,
+                                        const GValue *value,
+                                        GParamSpec *pspec)
+{
+  auto priv = GARROW_TIMESTAMP_DATA_TYPE_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_TIME_ZONE:
+    priv->time_zone = static_cast<GTimeZone *>(g_value_dup_boxed(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_timestamp_data_type_get_property(GObject *object,
+                                        guint prop_id,
+                                        GValue *value,
+                                        GParamSpec *pspec)
+{
+  auto priv = GARROW_TIMESTAMP_DATA_TYPE_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_TIME_ZONE:
+    g_value_set_boxed(value, priv->time_zone);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
 
 static void
 garrow_timestamp_data_type_init(GArrowTimestampDataType *object)
@@ -1125,11 +1187,32 @@ garrow_timestamp_data_type_init(GArrowTimestampDataType *object)
 static void
 garrow_timestamp_data_type_class_init(GArrowTimestampDataTypeClass *klass)
 {
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->dispose      = garrow_timestamp_data_type_dispose;
+  gobject_class->set_property = garrow_timestamp_data_type_set_property;
+  gobject_class->get_property = garrow_timestamp_data_type_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GArrowTimestampDataType:time-zone:
+   *
+   * The time zone of this data type.
+   *
+   * Since: 16.0.0
+   */
+  spec = g_param_spec_boxed("time-zone",
+                            "Time zone",
+                            "The time zone of this data type",
+                            G_TYPE_TIME_ZONE,
+                            static_cast<GParamFlags>(G_PARAM_READWRITE |
+                                                     G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_TIME_ZONE, spec);
 }
 
 /**
  * garrow_timestamp_data_type_new:
  * @unit: The unit of the timestamp data.
+ * @time_zone: (nullable): The time zone of the timestamp data.
  *
  * Returns: A newly created the number of
  *   seconds/milliseconds/microseconds/nanoseconds since UNIX epoch in
@@ -1138,30 +1221,36 @@ garrow_timestamp_data_type_class_init(GArrowTimestampDataTypeClass *klass)
  * Since: 0.7.0
  */
 GArrowTimestampDataType *
-garrow_timestamp_data_type_new(GArrowTimeUnit unit)
+garrow_timestamp_data_type_new(GArrowTimeUnit unit,
+                               GTimeZone *time_zone)
 {
   auto arrow_unit = garrow_time_unit_to_raw(unit);
-  auto arrow_data_type = arrow::timestamp(arrow_unit);
+  std::string arrow_timezone;
+  if (time_zone) {
+    arrow_timezone = g_time_zone_get_identifier(time_zone);
+  }
+  auto arrow_data_type = arrow::timestamp(arrow_unit, arrow_timezone);
   auto data_type =
     GARROW_TIMESTAMP_DATA_TYPE(g_object_new(GARROW_TYPE_TIMESTAMP_DATA_TYPE,
                                             "data-type", &arrow_data_type,
+                                            "time-zone", time_zone,
                                             NULL));
   return data_type;
 }
 
 /**
  * garrow_timestamp_data_type_get_unit:
- * @timestamp_data_type: The #GArrowTimestampDataType.
+ * @data_type: The #GArrowTimestampDataType.
  *
  * Returns: The unit of the timestamp data type.
  *
  * Since: 0.8.0
  */
 GArrowTimeUnit
-garrow_timestamp_data_type_get_unit(GArrowTimestampDataType *timestamp_data_type)
+garrow_timestamp_data_type_get_unit(GArrowTimestampDataType *data_type)
 {
   const auto arrow_data_type =
-    garrow_data_type_get_raw(GARROW_DATA_TYPE(timestamp_data_type));
+    garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   const auto arrow_timestamp_data_type =
     std::static_pointer_cast<arrow::TimestampType>(arrow_data_type);
   return garrow_time_unit_from_raw(arrow_timestamp_data_type->unit());
diff --git a/c_glib/arrow-glib/basic-data-type.h b/c_glib/arrow-glib/basic-data-type.h
index affbfcf13c283..f1c5af409c9da 100644
--- a/c_glib/arrow-glib/basic-data-type.h
+++ b/c_glib/arrow-glib/basic-data-type.h
@@ -425,9 +425,11 @@ struct _GArrowTimestampDataTypeClass
   GArrowTemporalDataTypeClass parent_class;
 };
 
-GArrowTimestampDataType *garrow_timestamp_data_type_new   (GArrowTimeUnit unit);
+GArrowTimestampDataType *
+garrow_timestamp_data_type_new(GArrowTimeUnit unit,
+                               GTimeZone *time_zone);
 GArrowTimeUnit
-garrow_timestamp_data_type_get_unit (GArrowTimestampDataType *timestamp_data_type);
+garrow_timestamp_data_type_get_unit(GArrowTimestampDataType *data_type);
 
 
 #define GARROW_TYPE_TIME_DATA_TYPE (garrow_time_data_type_get_type())
diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in
index abb8ba08708de..01760fbfed1ff 100644
--- a/c_glib/arrow-glib/version.h.in
+++ b/c_glib/arrow-glib/version.h.in
@@ -110,6 +110,15 @@
 #  define GARROW_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor)
 #endif
 
+/**
+ * GARROW_VERSION_16_0:
+ *
+ * You can use this macro value for compile time API version check.
+ *
+ * Since: 16.0.0
+ */
+#define GARROW_VERSION_16_0 G_ENCODE_VERSION(16, 0)
+
 /**
  * GARROW_VERSION_15_0:
  *
@@ -355,6 +364,20 @@
 
 #define GARROW_AVAILABLE_IN_ALL
 
+#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_16_0
+#  define GARROW_DEPRECATED_IN_16_0                GARROW_DEPRECATED
+#  define GARROW_DEPRECATED_IN_16_0_FOR(function)  GARROW_DEPRECATED_FOR(function)
+#else
+#  define GARROW_DEPRECATED_IN_16_0
+#  define GARROW_DEPRECATED_IN_16_0_FOR(function)
+#endif
+
+#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_16_0
+#  define GARROW_AVAILABLE_IN_16_0 GARROW_UNAVAILABLE(16, 0)
+#else
+#  define GARROW_AVAILABLE_IN_16_0
+#endif
+
 #if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_15_0
 #  define GARROW_DEPRECATED_IN_15_0                GARROW_DEPRECATED
 #  define GARROW_DEPRECATED_IN_15_0_FOR(function)  GARROW_DEPRECATED_FOR(function)
diff --git a/c_glib/doc/arrow-glib/arrow-glib-docs.xml b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
index 57b4b98701686..e92eb955675ed 100644
--- a/c_glib/doc/arrow-glib/arrow-glib-docs.xml
+++ b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
@@ -193,6 +193,10 @@
     <title>Index of deprecated API</title>
     <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
   </index>
+  <index id="api-index-16-0-0" role="16.0.0">
+    <title>Index of new symbols in 16.0.0</title>
+    <xi:include href="xml/api-index-16.0.0.xml"><xi:fallback /></xi:include>
+  </index>
   <index id="api-index-13-0-0" role="13.0.0">
     <title>Index of new symbols in 13.0.0</title>
     <xi:include href="xml/api-index-13.0.0.xml"><xi:fallback /></xi:include>
diff --git a/c_glib/test/test-timestamp-data-type.rb b/c_glib/test/test-timestamp-data-type.rb
index dac3a9bc631d0..69437609feebf 100644
--- a/c_glib/test/test-timestamp-data-type.rb
+++ b/c_glib/test/test-timestamp-data-type.rb
@@ -26,6 +26,23 @@ def test_name
     assert_equal("timestamp", data_type.name)
   end
 
+  sub_test_case("time_zone") do
+    def test_nil
+      data_type = Arrow::TimestampDataType.new(:micro)
+      assert_nil(data_type.time_zone)
+    end
+
+    def test_time_zone
+      data_type = Arrow::TimestampDataType.new(:micro, GLib::TimeZone.new("UTC"))
+      time_zone = data_type.time_zone
+      assert_not_nil(time_zone)
+      # glib2 gem 4.2.1 or later is required
+      if time_zone.respond_to?(:identifier)
+        assert_equal("UTC", time_zone.identifier)
+      end
+    end
+  end
+
   sub_test_case("second") do
     def setup
       @data_type = Arrow::TimestampDataType.new(:second)

From a7f81404c656707e3f7f2dc2cc8b36b0dec2dadf Mon Sep 17 00:00:00 2001
From: 0x0000ffff <idailylife@users.noreply.github.com>
Date: Mon, 22 Jan 2024 22:27:05 +0800
Subject: [PATCH 245/570] GH-39579: [Python] fix raising ValueError on
 _ensure_partitioning (#39593)

### Rationale for this change
The `_ensure_partitioning` method in dataset.py is missing a "raise" which currently ignores bad scheme silently.

### What changes are included in this PR?

Fixed the typo.

### Are these changes tested?
Tried with new code that the exception is properly raised.

### Are there any user-facing changes?
No.

* Closes: #39579

Lead-authored-by: idailylife <iorange@126.com>
Co-authored-by: 0x0000ffff <idailylife@users.noreply.github.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 python/pyarrow/dataset.py            |  4 ++--
 python/pyarrow/tests/test_dataset.py | 11 +++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index 9301a5fee5ade..f83753ac57d47 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -292,8 +292,8 @@ def _ensure_partitioning(scheme):
     elif isinstance(scheme, (Partitioning, PartitioningFactory)):
         pass
     else:
-        ValueError("Expected Partitioning or PartitioningFactory, got {}"
-                   .format(type(scheme)))
+        raise ValueError("Expected Partitioning or PartitioningFactory, got {}"
+                         .format(type(scheme)))
     return scheme
 
 
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index d473299f20320..a4838d63a6b0b 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -701,6 +701,17 @@ def test_partitioning():
             load_back_table = load_back.to_table()
             assert load_back_table.equals(table)
 
+    # test invalid partitioning input
+    with tempfile.TemporaryDirectory() as tempdir:
+        partitioning = ds.DirectoryPartitioning(partitioning_schema)
+        ds.write_dataset(table, tempdir,
+                         format='ipc', partitioning=partitioning)
+        load_back = None
+        with pytest.raises(ValueError,
+                           match="Expected Partitioning or PartitioningFactory"):
+            load_back = ds.dataset(tempdir, format='ipc', partitioning=int(0))
+        assert load_back is None
+
 
 def test_partitioning_pickling(pickle_module):
     schema = pa.schema([

From 3b73f438bf0a1abc76009f017c1df9c244854aca Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Mon, 22 Jan 2024 09:38:57 -0500
Subject: [PATCH 246/570] GH-39690: [C++][FlightRPC] Fix nullptr dereference in
 PollInfo (#39711)

### Rationale for this change

The current implementation is a bit painful to use due to the lack of a move constructor.

### What changes are included in this PR?

- Fix a crash in PollInfo with a nullptr FlightInfo.
- Declare all necessary constructors (https://en.cppreference.com/w/cpp/language/rule_of_three)

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes, this adds new copy constructors.

* Closes: #39673.
* Closes: #39690

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/cmake_modules/FindClangTools.cmake         |  3 ++-
 cpp/src/arrow/flight/flight_internals_test.cc  |  2 ++
 cpp/src/arrow/flight/serialization_internal.cc | 10 +++++++---
 cpp/src/arrow/flight/types.cc                  |  7 ++++++-
 cpp/src/arrow/flight/types.h                   | 13 ++++++++++++-
 5 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/cpp/cmake_modules/FindClangTools.cmake b/cpp/cmake_modules/FindClangTools.cmake
index 90df60bf541d4..1364ccbed8162 100644
--- a/cpp/cmake_modules/FindClangTools.cmake
+++ b/cpp/cmake_modules/FindClangTools.cmake
@@ -40,7 +40,8 @@ set(CLANG_TOOLS_SEARCH_PATHS
     /usr/local/bin
     /usr/bin
     "C:/Program Files/LLVM/bin" # Windows, non-conda
-    "$ENV{CONDA_PREFIX}/Library/bin") # Windows, conda
+    "$ENV{CONDA_PREFIX}/Library/bin" # Windows, conda
+    "$ENV{CONDA_PREFIX}/bin") # Unix, conda
 if(APPLE)
   find_program(BREW brew)
   if(BREW)
diff --git a/cpp/src/arrow/flight/flight_internals_test.cc b/cpp/src/arrow/flight/flight_internals_test.cc
index 522973bec7231..a1c5250ba66fa 100644
--- a/cpp/src/arrow/flight/flight_internals_test.cc
+++ b/cpp/src/arrow/flight/flight_internals_test.cc
@@ -282,6 +282,7 @@ TEST(FlightTypes, PollInfo) {
                std::nullopt},
       PollInfo{std::make_unique<FlightInfo>(info), FlightDescriptor::Command("poll"), 0.1,
                expiration_time},
+      PollInfo{},
   };
   std::vector<std::string> reprs = {
       "<PollInfo info=" + info.ToString() +
@@ -290,6 +291,7 @@ TEST(FlightTypes, PollInfo) {
       "<PollInfo info=" + info.ToString() +
           " descriptor=<FlightDescriptor cmd='poll'> "
           "progress=0.1 expiration_time=2023-06-19 03:14:06.004339000>",
+      "<PollInfo info=null descriptor=null progress=null expiration_time=null>",
   };
 
   ASSERT_NO_FATAL_FAILURE(TestRoundtrip<pb::PollInfo>(values, reprs));
diff --git a/cpp/src/arrow/flight/serialization_internal.cc b/cpp/src/arrow/flight/serialization_internal.cc
index 64a40564afd72..e5a7503a6386b 100644
--- a/cpp/src/arrow/flight/serialization_internal.cc
+++ b/cpp/src/arrow/flight/serialization_internal.cc
@@ -306,8 +306,10 @@ Status ToProto(const FlightInfo& info, pb::FlightInfo* pb_info) {
 // PollInfo
 
 Status FromProto(const pb::PollInfo& pb_info, PollInfo* info) {
-  ARROW_ASSIGN_OR_RAISE(auto flight_info, FromProto(pb_info.info()));
-  info->info = std::make_unique<FlightInfo>(std::move(flight_info));
+  if (pb_info.has_info()) {
+    ARROW_ASSIGN_OR_RAISE(auto flight_info, FromProto(pb_info.info()));
+    info->info = std::make_unique<FlightInfo>(std::move(flight_info));
+  }
   if (pb_info.has_flight_descriptor()) {
     FlightDescriptor descriptor;
     RETURN_NOT_OK(FromProto(pb_info.flight_descriptor(), &descriptor));
@@ -331,7 +333,9 @@ Status FromProto(const pb::PollInfo& pb_info, PollInfo* info) {
 }
 
 Status ToProto(const PollInfo& info, pb::PollInfo* pb_info) {
-  RETURN_NOT_OK(ToProto(*info.info, pb_info->mutable_info()));
+  if (info.info) {
+    RETURN_NOT_OK(ToProto(*info.info, pb_info->mutable_info()));
+  }
   if (info.descriptor) {
     RETURN_NOT_OK(ToProto(*info.descriptor, pb_info->mutable_flight_descriptor()));
   }
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index 9da83fa8a11f2..1d43c41b69d9f 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -373,7 +373,12 @@ arrow::Result<std::unique_ptr<PollInfo>> PollInfo::Deserialize(
 
 std::string PollInfo::ToString() const {
   std::stringstream ss;
-  ss << "<PollInfo info=" << info->ToString();
+  ss << "<PollInfo info=";
+  if (info) {
+    ss << info->ToString();
+  } else {
+    ss << "null";
+  }
   ss << " descriptor=";
   if (descriptor) {
     ss << descriptor->ToString();
diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h
index 2342c758273a3..790a2067dd705 100644
--- a/cpp/src/arrow/flight/types.h
+++ b/cpp/src/arrow/flight/types.h
@@ -693,11 +693,22 @@ class ARROW_FLIGHT_EXPORT PollInfo {
         progress(progress),
         expiration_time(expiration_time) {}
 
-  explicit PollInfo(const PollInfo& other)
+  // Must not be explicit; to declare one we must declare all ("rule of five")
+  PollInfo(const PollInfo& other)  // NOLINT(runtime/explicit)
       : info(other.info ? std::make_unique<FlightInfo>(*other.info) : NULLPTR),
         descriptor(other.descriptor),
         progress(other.progress),
         expiration_time(other.expiration_time) {}
+  PollInfo(PollInfo&& other) noexcept = default;  // NOLINT(runtime/explicit)
+  ~PollInfo() = default;
+  PollInfo& operator=(const PollInfo& other) {
+    info = other.info ? std::make_unique<FlightInfo>(*other.info) : NULLPTR;
+    descriptor = other.descriptor;
+    progress = other.progress;
+    expiration_time = other.expiration_time;
+    return *this;
+  }
+  PollInfo& operator=(PollInfo&& other) = default;
 
   /// \brief Get the wire-format representation of this type.
   ///

From 92c5afe69abeeeb43503488b14681a321170da1d Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 22 Jan 2024 18:28:15 +0100
Subject: [PATCH 247/570] GH-39706: [Archery] Fix `benchmark diff` subcommand
 (#39733)

### What changes are included in this PR?

This fixes a regression introduced in GH-39303.

### Are these changes tested?

Only manually.

### Are there any user-facing changes?

No.

* Closes: #39706

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 dev/archery/archery/benchmark/runner.py |  1 +
 dev/archery/archery/cli.py              | 14 +++++++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index c12c74135e96e..a91989fb95257 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -210,6 +210,7 @@ def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs):
         """
         build = None
         if StaticBenchmarkRunner.is_json_result(rev_or_path):
+            kwargs.pop('benchmark_extras', None)
             return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs)
         elif CMakeBuild.is_build_dir(rev_or_path):
             build = CMakeBuild.from_path(rev_or_path)
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 052fe23bfc969..0ad3eee14d1f3 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -407,7 +407,7 @@ def benchmark_filter_options(cmd):
 @click.pass_context
 def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
                    java_home, java_options, build_extras, benchmark_extras,
-                   language, **kwargs):
+                   cpp_benchmark_extras, language, **kwargs):
     """ List benchmark suite.
     """
     with tmpdir(preserve=preserve) as root:
@@ -418,7 +418,8 @@ def benchmark_list(ctx, rev_or_path, src, preserve, output, cmake_extras,
                 cmake_extras=cmake_extras, **kwargs)
 
             runner_base = CppBenchmarkRunner.from_rev_or_path(
-                src, root, rev_or_path, conf)
+                src, root, rev_or_path, conf,
+                benchmark_extras=cpp_benchmark_extras)
 
         elif language == "java":
             for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
@@ -546,7 +547,8 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
 def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
                    suite_filter, benchmark_filter, repetitions, no_counters,
                    java_home, java_options, build_extras, benchmark_extras,
-                   threshold, contender, baseline, **kwargs):
+                   cpp_benchmark_extras, threshold, contender, baseline,
+                   **kwargs):
     """Compare (diff) benchmark runs.
 
     This command acts like git-diff but for benchmark results.
@@ -633,12 +635,14 @@ def benchmark_diff(ctx, src, preserve, output, language, cmake_extras,
                 src, root, contender, conf,
                 repetitions=repetitions,
                 suite_filter=suite_filter,
-                benchmark_filter=benchmark_filter)
+                benchmark_filter=benchmark_filter,
+                benchmark_extras=cpp_benchmark_extras)
             runner_base = CppBenchmarkRunner.from_rev_or_path(
                 src, root, baseline, conf,
                 repetitions=repetitions,
                 suite_filter=suite_filter,
-                benchmark_filter=benchmark_filter)
+                benchmark_filter=benchmark_filter,
+                benchmark_extras=cpp_benchmark_extras)
 
         elif language == "java":
             for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:

From 315e06ae5b5733727696dd5a1298dcbb0e9ae6f0 Mon Sep 17 00:00:00 2001
From: Divyansh200102 <146909065+Divyansh200102@users.noreply.github.com>
Date: Tue, 23 Jan 2024 00:18:57 +0530
Subject: [PATCH 248/570] GH-35369: [Docs] Add a missing space after ref:`IPC
 format <format-ipc>` (#38276)

* Closes: #35369

Lead-authored-by: Divyansh200102 <divyanshkhatri200102@gmail.com>
Co-authored-by: Divyansh200102 <146909065+Divyansh200102@users.noreply.github.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 docs/source/format/Glossary.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/format/Glossary.rst b/docs/source/format/Glossary.rst
index 65d6e0afa4557..3f2f118a95d6d 100644
--- a/docs/source/format/Glossary.rst
+++ b/docs/source/format/Glossary.rst
@@ -174,7 +174,7 @@ Glossary
        .. seealso:: :term:`data type`
 
    record batch
-       **In the :ref:`IPC format <format-ipc>`**: the primitive unit
+       **In the** :ref:`IPC format <format-ipc>`: the primitive unit
        of data.  A record batch consists of an ordered list of
        :term:`buffers <buffer>` corresponding to a :term:`schema`.
 

From 4fc2a708bac92614a42c07ea1f23b1a4a3af88cb Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Mon, 22 Jan 2024 17:23:54 -0300
Subject: [PATCH 249/570] GH-39718: [C++][FS][Azure] Remove
 StatusFromErrorResponse as it's not necessary (#39719)

### Rationale for this change

Only the "*IfExists" functions from the Azure SDK ever set `response.Value.Deleted` to `false` to indicate that a resource wasn't found and the request succeeded without deleting anything.

It's better that we use the `Delete()` versions of these functions instead of `DeleteIfExists` and check the `ErrorCode` ourselves to return an appropriate `Status` instead of something generic.

### What changes are included in this PR?

 - Removing `StatusFromErrorResponse`
 - Comments explaining the error handling decisions
 - Addition of a boolean parameter to `DeleteDirOnFileSystem` that controls how it fails when the directory being deleted doesn't exist

### Are these changes tested?

 - Yes, by the existing tests in `azurefs_test.cc`.
* Closes: #39718

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc | 62 ++++++++++-------------------
 1 file changed, 22 insertions(+), 40 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 730adabd48bec..a5179c22190e1 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -305,31 +305,9 @@ Status ValidateFileLocation(const AzureLocation& location) {
   return internal::AssertNoTrailingSlash(location.path);
 }
 
-std::string_view BodyTextView(const Http::RawResponse& raw_response) {
-  const auto& body = raw_response.GetBody();
-#ifndef NDEBUG
-  auto& headers = raw_response.GetHeaders();
-  auto content_type = headers.find("Content-Type");
-  if (content_type != headers.end()) {
-    DCHECK_EQ(std::string_view{content_type->second}.substr(5), "text/");
-  }
-#endif
-  return std::string_view{reinterpret_cast<const char*>(body.data()), body.size()};
-}
-
-Status StatusFromErrorResponse(const std::string& url,
-                               const Http::RawResponse& raw_response,
-                               const std::string& context) {
-  // There isn't an Azure specification that response body on error
-  // doesn't contain any binary data but we assume it. We hope that
-  // error response body has useful information for the error.
-  auto body_text = BodyTextView(raw_response);
-  return Status::IOError(context, ": ", url, ": ", raw_response.GetReasonPhrase(), " (",
-                         static_cast<int>(raw_response.GetStatusCode()),
-                         "): ", body_text);
-}
-
 bool IsContainerNotFound(const Storage::StorageException& e) {
+  // In some situations, only the ReasonPhrase is set and the
+  // ErrorCode is empty, so we check both.
   if (e.ErrorCode == "ContainerNotFound" ||
       e.ReasonPhrase == "The specified container does not exist." ||
       e.ReasonPhrase == "The specified filesystem does not exist.") {
@@ -1515,13 +1493,9 @@ class AzureFileSystem::Impl {
     DCHECK(location.path.empty());
     try {
       auto response = container_client.Delete();
-      if (response.Value.Deleted) {
-        return Status::OK();
-      } else {
-        return StatusFromErrorResponse(
-            container_client.GetUrl(), *response.RawResponse,
-            "Failed to delete a container: " + location.container);
-      }
+      // Only the "*IfExists" functions ever set Deleted to false.
+      // All the others either succeed or throw an exception.
+      DCHECK(response.Value.Deleted);
     } catch (const Storage::StorageException& exception) {
       if (IsContainerNotFound(exception)) {
         return PathNotFound(location);
@@ -1530,6 +1504,7 @@ class AzureFileSystem::Impl {
                                "Failed to delete a container: ", location.container, ": ",
                                container_client.GetUrl());
     }
+    return Status::OK();
   }
 
   /// Deletes contents of a directory and possibly the directory itself
@@ -1649,23 +1624,29 @@ class AzureFileSystem::Impl {
   /// \pre location.container is not empty.
   /// \pre location.path is not empty.
   Status DeleteDirOnFileSystem(const DataLake::DataLakeFileSystemClient& adlfs_client,
-                               const AzureLocation& location) {
+                               const AzureLocation& location, bool recursive,
+                               bool require_dir_to_exist) {
     DCHECK(!location.container.empty());
     DCHECK(!location.path.empty());
     auto directory_client = adlfs_client.GetDirectoryClient(location.path);
-    // XXX: should "directory not found" be considered an error?
     try {
-      auto response = directory_client.DeleteRecursive();
-      if (response.Value.Deleted) {
+      auto response =
+          recursive ? directory_client.DeleteRecursive() : directory_client.DeleteEmpty();
+      // Only the "*IfExists" functions ever set Deleted to false.
+      // All the others either succeed or throw an exception.
+      DCHECK(response.Value.Deleted);
+    } catch (const Storage::StorageException& exception) {
+      if (exception.ErrorCode == "FilesystemNotFound" ||
+          exception.ErrorCode == "PathNotFound") {
+        if (require_dir_to_exist) {
+          return PathNotFound(location);
+        }
         return Status::OK();
-      } else {
-        return StatusFromErrorResponse(directory_client.GetUrl(), *response.RawResponse,
-                                       "Failed to delete a directory: " + location.path);
       }
-    } catch (const Storage::StorageException& exception) {
       return ExceptionToStatus(exception, "Failed to delete a directory: ", location.path,
                                ": ", directory_client.GetUrl());
     }
+    return Status::OK();
   }
 
   /// \pre location.container is not empty.
@@ -1855,7 +1836,8 @@ Status AzureFileSystem::DeleteDir(const std::string& path) {
     return PathNotFound(location);
   }
   if (hns_support == HNSSupport::kEnabled) {
-    return impl_->DeleteDirOnFileSystem(adlfs_client, location);
+    return impl_->DeleteDirOnFileSystem(adlfs_client, location, /*recursive=*/true,
+                                        /*require_dir_to_exist=*/true);
   }
   DCHECK_EQ(hns_support, HNSSupport::kDisabled);
   auto container_client = impl_->GetBlobContainerClient(location.container);

From f8f6eb9d1694abee26af5f011ff81aeb05164758 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jan 2024 06:34:49 +0900
Subject: [PATCH 250/570] MINOR: [CI] Bump actions/cache from 3 to 4 (#39741)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [actions/cache](https://github.com/actions/cache) from 3 to 4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/actions/cache/releases">actions/cache's releases</a>.</em></p>
<blockquote>
<h2>v4.0.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Update action to node20 by <a href="https://github.com/takost"><code>@​takost</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1284">actions/cache#1284</a></li>
<li>feat: save-always flag by <a href="https://github.com/to-s"><code>@​to-s</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1242">actions/cache#1242</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/takost"><code>@​takost</code></a> made their first contribution in <a href="https://redirect.github.com/actions/cache/pull/1284">actions/cache#1284</a></li>
<li><a href="https://github.com/to-s"><code>@​to-s</code></a> made their first contribution in <a href="https://redirect.github.com/actions/cache/pull/1242">actions/cache#1242</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/cache/compare/v3...v4.0.0">https://github.com/actions/cache/compare/v3...v4.0.0</a></p>
<h2>v3.3.3</h2>
<h2>What's Changed</h2>
<ul>
<li>Cache v3.3.3 by <a href="https://github.com/robherley"><code>@​robherley</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1302">actions/cache#1302</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/robherley"><code>@​robherley</code></a> made their first contribution in <a href="https://redirect.github.com/actions/cache/pull/1302">actions/cache#1302</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/cache/compare/v3...v3.3.3">https://github.com/actions/cache/compare/v3...v3.3.3</a></p>
<h2>v3.3.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Fixed readme with new segment timeout values by <a href="https://github.com/kotewar"><code>@​kotewar</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1133">actions/cache#1133</a></li>
<li>Readme fixes by <a href="https://github.com/kotewar"><code>@​kotewar</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1134">actions/cache#1134</a></li>
<li>Updated description of the lookup-only input for main action by <a href="https://github.com/kotewar"><code>@​kotewar</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1130">actions/cache#1130</a></li>
<li>Change two new actions mention as quoted text by <a href="https://github.com/bishal-pdMSFT"><code>@​bishal-pdMSFT</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1131">actions/cache#1131</a></li>
<li>Update Cross-OS Caching tips by <a href="https://github.com/pdotl"><code>@​pdotl</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1122">actions/cache#1122</a></li>
<li>Bazel example (Take <a href="https://redirect.github.com/actions/cache/issues/2">#2</a>️⃣) by <a href="https://github.com/vorburger"><code>@​vorburger</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1132">actions/cache#1132</a></li>
<li>Remove actions to add new PRs and issues to a project board by <a href="https://github.com/jorendorff"><code>@​jorendorff</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1187">actions/cache#1187</a></li>
<li>Consume latest toolkit and fix dangling promise bug by <a href="https://github.com/chkimes"><code>@​chkimes</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1217">actions/cache#1217</a></li>
<li>Bump action version to 3.3.2 by <a href="https://github.com/bethanyj28"><code>@​bethanyj28</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1236">actions/cache#1236</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/vorburger"><code>@​vorburger</code></a> made their first contribution in <a href="https://redirect.github.com/actions/cache/pull/1132">actions/cache#1132</a></li>
<li><a href="https://github.com/jorendorff"><code>@​jorendorff</code></a> made their first contribution in <a href="https://redirect.github.com/actions/cache/pull/1187">actions/cache#1187</a></li>
<li><a href="https://github.com/chkimes"><code>@​chkimes</code></a> made their first contribution in <a href="https://redirect.github.com/actions/cache/pull/1217">actions/cache#1217</a></li>
<li><a href="https://github.com/bethanyj28"><code>@​bethanyj28</code></a> made their first contribution in <a href="https://redirect.github.com/actions/cache/pull/1236">actions/cache#1236</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/cache/compare/v3...v3.3.2">https://github.com/actions/cache/compare/v3...v3.3.2</a></p>
<h2>v3.3.1</h2>
<h2>What's Changed</h2>
<ul>
<li>Reduced download segment size to 128 MB and timeout to 10 minutes by <a href="https://github.com/kotewar"><code>@​kotewar</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1129">actions/cache#1129</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/cache/compare/v3...v3.3.1">https://github.com/actions/cache/compare/v3...v3.3.1</a></p>
<h2>v3.3.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Bug: Permission is missing in cache delete example by <a href="https://github.com/kotokaze"><code>@​kotokaze</code></a> in <a href="https://redirect.github.com/actions/cache/pull/1123">actions/cache#1123</a></li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/actions/cache/commit/13aacd865c20de90d75de3b17ebe84f7a17d57d2"><code>13aacd8</code></a> Merge pull request <a href="https://redirect.github.com/actions/cache/issues/1242">#1242</a> from to-s/main</li>
<li><a href="https://github.com/actions/cache/commit/53b35c543921fe2e8b288765ff817de9de8d906f"><code>53b35c5</code></a> Merge branch 'main' into main</li>
<li><a href="https://github.com/actions/cache/commit/65b8989fab3bb394817bdb845a453dff480c2b51"><code>65b8989</code></a> Merge pull request <a href="https://redirect.github.com/actions/cache/issues/1284">#1284</a> from takost/update-to-node-20</li>
<li><a href="https://github.com/actions/cache/commit/d0be34d54485f31ca2ccbe66e6ea3d96544a807b"><code>d0be34d</code></a> Fix dist</li>
<li><a href="https://github.com/actions/cache/commit/66cf064d47313d2cccf392d01bd10925da2bd072"><code>66cf064</code></a> Merge branch 'main' into update-to-node-20</li>
<li><a href="https://github.com/actions/cache/commit/1326563738ddb735c5f2ce85cba8c79f33b728cd"><code>1326563</code></a> Merge branch 'main' into main</li>
<li><a href="https://github.com/actions/cache/commit/e71876755e268d6cc25a5d3e3c46ae447e35290a"><code>e718767</code></a> Fix format</li>
<li><a href="https://github.com/actions/cache/commit/01229828ffa049a8dee4db27bcb23ed33f2b451f"><code>0122982</code></a> Apply workaround for earlyExit</li>
<li><a href="https://github.com/actions/cache/commit/3185ecfd6135856ca6d904ae032cff4f39b8b365"><code>3185ecf</code></a> Update &quot;only-&quot; actions to node20</li>
<li><a href="https://github.com/actions/cache/commit/25618a0a675e8447e5ffc8ed9b7ddb2aaf927f65"><code>25618a0</code></a> Bump version</li>
<li>Additional commits viewable in <a href="https://github.com/actions/cache/compare/v3...v4">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/cache&package-manager=github_actions&previous-version=3&new-version=4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/cpp.yml         | 8 ++++----
 .github/workflows/docs.yml        | 2 +-
 .github/workflows/docs_light.yml  | 2 +-
 .github/workflows/integration.yml | 2 +-
 .github/workflows/java.yml        | 2 +-
 .github/workflows/java_jni.yml    | 4 ++--
 .github/workflows/js.yml          | 4 ++--
 .github/workflows/matlab.yml      | 6 +++---
 .github/workflows/python.yml      | 2 +-
 .github/workflows/r.yml           | 4 ++--
 .github/workflows/r_nightly.yml   | 2 +-
 .github/workflows/ruby.yml        | 8 ++++----
 12 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 3d4fb10b10c39..bd14f1b895bf6 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -101,7 +101,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Cache Docker Volumes
-        uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: .docker
           key: ${{ matrix.image }}-${{ hashFiles('cpp/**') }}
@@ -214,7 +214,7 @@ jobs:
         run: |
           echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT
       - name: Cache ccache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ${{ steps.ccache-info.outputs.cache-dir }}
           key: cpp-ccache-macos-${{ hashFiles('cpp/**') }}
@@ -310,7 +310,7 @@ jobs:
         run: |
           echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT
       - name: Cache ccache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ${{ steps.ccache-info.outputs.cache-dir }}
           key: cpp-ccache-windows-${{ env.CACHE_VERSION }}-${{ hashFiles('cpp/**') }}
@@ -402,7 +402,7 @@ jobs:
         shell: msys2 {0}
         run: ci/scripts/msys2_setup.sh cpp
       - name: Cache ccache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ccache
           key: cpp-ccache-${{ matrix.msystem_lower}}-${{ hashFiles('cpp/**') }}
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 098b5ff29df5a..e394347e95261 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -45,7 +45,7 @@ jobs:
         run: |
           ci/scripts/util_free_space.sh
       - name: Cache Docker Volumes
-        uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: .docker
           key: ubuntu-docs-${{ hashFiles('cpp/**') }}
diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml
index 8d10060c9d8a0..5303531f34350 100644
--- a/.github/workflows/docs_light.yml
+++ b/.github/workflows/docs_light.yml
@@ -51,7 +51,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Cache Docker Volumes
-        uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: .docker
           key: conda-docs-${{ hashFiles('cpp/**') }}
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 4960b4dbd61e8..adb6fb2b57c75 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -75,7 +75,7 @@ jobs:
         run: |
           ci/scripts/util_free_space.sh
       - name: Cache Docker Volumes
-        uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: .docker
           key: conda-${{ hashFiles('cpp/**') }}
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index ee4c1b21c37d4..1f1fc1b47a3c8 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -69,7 +69,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Cache Docker Volumes
-        uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: .docker
           key: maven-${{ hashFiles('java/**') }}
diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index 9f05a357a11d3..45de57f360a42 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -63,7 +63,7 @@ jobs:
         run: |
           ci/scripts/util_free_space.sh
       - name: Cache Docker Volumes
-        uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: .docker
           key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }}
@@ -103,7 +103,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Cache Docker Volumes
-        uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: .docker
           key: maven-${{ hashFiles('java/**') }}
diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index e2c76a3d1cb24..0d09e30d6eab5 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -91,7 +91,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Jest Cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: js/.jest-cache
           key: js-jest-cache-${{ runner.os }}-${{ hashFiles('js/src/**/*.ts', 'js/test/**/*.ts', 'js/yarn.lock') }}
@@ -121,7 +121,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Jest Cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: js/.jest-cache
           key: js-jest-cache-${{ runner.os }}-${{ hashFiles('js/src/**/*.ts', 'js/test/**/*.ts', 'js/yarn.lock') }}
diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml
index 6921e12213b5b..512ff2bb929b3 100644
--- a/.github/workflows/matlab.yml
+++ b/.github/workflows/matlab.yml
@@ -65,7 +65,7 @@ jobs:
         shell: bash
         run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT
       - name: Cache ccache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ${{ steps.ccache-info.outputs.cache-dir }}
           key: matlab-ccache-ubuntu-${{ hashFiles('cpp/**', 'matlab/**') }}
@@ -113,7 +113,7 @@ jobs:
         shell: bash
         run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT
       - name: Cache ccache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ${{ steps.ccache-info.outputs.cache-dir }}
           key: matlab-ccache-macos-${{ hashFiles('cpp/**', 'matlab/**') }}
@@ -155,7 +155,7 @@ jobs:
         shell: bash
         run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT
       - name: Cache ccache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: |
             ${{ steps.ccache-info.outputs.cache-dir }}
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index d9979da0ee12a..6e3797b29c21e 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -94,7 +94,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Cache Docker Volumes
-        uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: .docker
           key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }}
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 4fc308a28d4d6..2a801b6040ec8 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -73,7 +73,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Cache Docker Volumes
-        uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: .docker
           # As this key is identical on both matrix builds only one will be able to successfully cache,
@@ -206,7 +206,7 @@ jobs:
           ci/scripts/ccache_setup.sh
           echo "CCACHE_DIR=$(cygpath --absolute --windows ccache)" >> $GITHUB_ENV
       - name: Cache ccache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ccache
           key: r-${{ matrix.config.rtools }}-ccache-mingw-${{ matrix.config.arch }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-${{ github.run_id }}
diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml
index 27a32d22f90c0..a57a8cddea3c0 100644
--- a/.github/workflows/r_nightly.yml
+++ b/.github/workflows/r_nightly.yml
@@ -86,7 +86,7 @@ jobs:
             exit 1
           fi
       - name: Cache Repo
-        uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: repo
           key: r-nightly-${{ github.run_id }}
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index be30865ac7ac6..74d56895f4c34 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -76,7 +76,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Cache Docker Volumes
-        uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: .docker
           key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }}
@@ -167,7 +167,7 @@ jobs:
         run: |
           echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT
       - name: Cache ccache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ${{ steps.ccache-info.outputs.cache-dir }}
           key: ruby-ccache-macos-${{ hashFiles('cpp/**') }}
@@ -252,7 +252,7 @@ jobs:
         run: |
           ridk exec bash ci\scripts\msys2_setup.sh ruby
       - name: Cache ccache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ccache
           key: ruby-ccache-ucrt${{ matrix.mingw-n-bits }}-${{ hashFiles('cpp/**') }}
@@ -277,7 +277,7 @@ jobs:
           Write-Output "gem-dir=$(ridk exec gem env gemdir)" | `
             Out-File -FilePath $env:GITHUB_OUTPUT -Encoding utf8 -Append
       - name: Cache RubyGems
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ${{ steps.rubygems-info.outputs.gem-dir }}
           key: ruby-rubygems-ucrt${{ matrix.mingw-n-bits }}-${{ hashFiles('**/Gemfile', 'ruby/*/*.gemspec') }}

From 6eb62892a4cd6ff37df13a4a8be8f14d468c962c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jan 2024 06:35:12 +0900
Subject: [PATCH 251/570] MINOR: [Java] Bump joda-time:joda-time from 2.12.5 to
 2.12.6 in /java (#39742)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [joda-time:joda-time](https://github.com/JodaOrg/joda-time) from 2.12.5 to 2.12.6.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/JodaOrg/joda-time/releases">joda-time:joda-time's releases</a>.</em></p>
<blockquote>
<h2>Release v2.12.6</h2>
<p>See the <a href="https://www.joda.org/joda-time/changes-report.html#a2.12.6">change notes</a> for more information.</p>
<h2>What's Changed</h2>
<ul>
<li>Match more time-zone IDs by <a href="https://github.com/jodastephen"><code>@​jodastephen</code></a> in <a href="https://redirect.github.com/JodaOrg/joda-time/pull/733">JodaOrg/joda-time#733</a></li>
<li>Bump maven-install-plugin from 3.1.0 to 3.1.1 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/JodaOrg/joda-time/pull/707">JodaOrg/joda-time#707</a></li>
<li>Bump maven-resources-plugin from 3.3.0 to 3.3.1 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/JodaOrg/joda-time/pull/708">JodaOrg/joda-time#708</a></li>
<li>Update time zone data to 2023dgtz by <a href="https://github.com/github-actions"><code>@​github-actions</code></a> in <a href="https://redirect.github.com/JodaOrg/joda-time/pull/755">JodaOrg/joda-time#755</a></li>
<li>improve exception message in DateTimeFormatter by <a href="https://github.com/cyrilou242"><code>@​cyrilou242</code></a> in <a href="https://redirect.github.com/JodaOrg/joda-time/pull/731">JodaOrg/joda-time#731</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/cyrilou242"><code>@​cyrilou242</code></a> made their first contribution in <a href="https://redirect.github.com/JodaOrg/joda-time/pull/731">JodaOrg/joda-time#731</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/JodaOrg/joda-time/compare/v2.12.5...v2.12.6">https://github.com/JodaOrg/joda-time/compare/v2.12.5...v2.12.6</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/JodaOrg/joda-time/commit/d95b7d6ae4d09be7ca565909eb36a2b5272da6ef"><code>d95b7d6</code></a> Release v2.12.6</li>
<li><a href="https://github.com/JodaOrg/joda-time/commit/20384c347a2d0f89aa07424158126ffdcee461d3"><code>20384c3</code></a> Update after PR</li>
<li><a href="https://github.com/JodaOrg/joda-time/commit/a804c5e4316b7f883d8aa0c689b431ea8cfb5d32"><code>a804c5e</code></a> improve exception message in DateTimeFormatter (<a href="https://redirect.github.com/JodaOrg/joda-time/issues/731">#731</a>)</li>
<li><a href="https://github.com/JodaOrg/joda-time/commit/8186a3176e27922310ac2a5064c0dda4673306bd"><code>8186a31</code></a> Update dependencies</li>
<li><a href="https://github.com/JodaOrg/joda-time/commit/74ce3e9920a97f7c88ba1cf79d314d3d5ed1d141"><code>74ce3e9</code></a> Update dependencies</li>
<li><a href="https://github.com/JodaOrg/joda-time/commit/7c8f13f6ed834530124453390cab38c5ccc9b21b"><code>7c8f13f</code></a> Update time zone data to 2023dgtz (<a href="https://redirect.github.com/JodaOrg/joda-time/issues/755">#755</a>)</li>
<li><a href="https://github.com/JodaOrg/joda-time/commit/1b5c2a0899c9fcf705d0892946deab00c4619a97"><code>1b5c2a0</code></a> Update FUNDING.yml</li>
<li><a href="https://github.com/JodaOrg/joda-time/commit/b92d88eea7df04f066d141c6e3d834f2894e52ef"><code>b92d88e</code></a> Bump maven-resources-plugin from 3.3.0 to 3.3.1 (<a href="https://redirect.github.com/JodaOrg/joda-time/issues/708">#708</a>)</li>
<li><a href="https://github.com/JodaOrg/joda-time/commit/56b104ea4fee0cb57f95cfcc647b76bcc4a1ecb2"><code>56b104e</code></a> Bump maven-install-plugin from 3.1.0 to 3.1.1 (<a href="https://redirect.github.com/JodaOrg/joda-time/issues/707">#707</a>)</li>
<li><a href="https://github.com/JodaOrg/joda-time/commit/0e251d278604252a9986f2338c1cee56a9e052a2"><code>0e251d2</code></a> Match more time-zone IDs (<a href="https://redirect.github.com/JodaOrg/joda-time/issues/733">#733</a>)</li>
<li>See full diff in <a href="https://github.com/JodaOrg/joda-time/compare/v2.12.5...v2.12.6">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=joda-time:joda-time&package-manager=maven&previous-version=2.12.5&new-version=2.12.6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/flight/flight-sql-jdbc-driver/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml
index 9ab9e1820cd27..28534a9b0badd 100644
--- a/java/flight/flight-sql-jdbc-driver/pom.xml
+++ b/java/flight/flight-sql-jdbc-driver/pom.xml
@@ -107,7 +107,7 @@
         <dependency>
             <groupId>joda-time</groupId>
             <artifactId>joda-time</artifactId>
-            <version>2.12.5</version>
+            <version>2.12.6</version>
             <scope>runtime</scope>
         </dependency>
 

From 067d177c803f3fe785b24cfc9167cd4843dafe59 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jan 2024 06:35:35 +0900
Subject: [PATCH 252/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-plugin-plugin from 3.10.2 to 3.11.0 in /java
 (#39743)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.apache.maven.plugins:maven-plugin-plugin](https://github.com/apache/maven-plugin-tools) from 3.10.2 to 3.11.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/maven-plugin-tools/releases">org.apache.maven.plugins:maven-plugin-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.11.0</h2>
<h2><a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12317820&amp;version=12353824">Release Notes - Maven Plugin Tools - Version 3.11.0</a></h2>
<h2>Bug</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-496">MPLUGIN-496</a>] - Translation for keys report.plugin.goal.yes,no are missing</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-499">MPLUGIN-499</a>] - Deprecate descriptions are missing in description table</li>
</ul>
<h2>Improvement</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-450">MPLUGIN-450</a>] - Make goal prefix mandatory by default</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-474">MPLUGIN-474</a>] - Improve descriptor docs for requiredJavaVersion</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-492">MPLUGIN-492</a>] - Documentation for plugins in general: Goals comprises more than that</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-495">MPLUGIN-495</a>] - WARNINGs based on usage of <code>@ Component</code> for MavenSession/MavenProject instead of <code>@ Parameter</code></li>
</ul>
<h2>Task</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-493">MPLUGIN-493</a>] - Consistently evaluate skip parameter in MavenReport#canGenerateReport()</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-498">MPLUGIN-498</a>] - Move section rendering to separate methods</li>
</ul>
<h2>Dependency upgrade</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-494">MPLUGIN-494</a>] - Upgrade to Parent 41</li>
<li>[<a href="https://issues.apache.org/jira/browse/MPLUGIN-497">MPLUGIN-497</a>] - Upgrade components</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/4178d33ea1121a73114caa94983b0e4c425f3b2d"><code>4178d33</code></a> [maven-release-plugin] prepare release maven-plugin-tools-3.11.0</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/25d920f53e12ef20a1d01bf9aae3a4c1ce738964"><code>25d920f</code></a> [MNG-5695] document Maven 3.2.5+ scoped components usage</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/641849004597c74304b9d7e06379190130bdaf3e"><code>6418490</code></a> [MPLUGIN-495] WARNINGs based on usage of <a href="https://github.com/Component"><code>@​Component</code></a> for MavenSession/MavenPro...</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/8b93d125d212c1cd1bc5d5d682604395408aabd4"><code>8b93d12</code></a> Bump org.jsoup:jsoup from 1.17.1 to 1.17.2</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/f4973acb0326d222ce5fd23eabbb82cbd7cddef6"><code>f4973ac</code></a> Bump org.assertj:assertj-core from 3.24.2 to 3.25.1</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/7dd3a259585b50f3d450d68e480eb3da8d19e70b"><code>7dd3a25</code></a> [MPLUGIN-499] Add deprecate description in parameters table (<a href="https://redirect.github.com/apache/maven-plugin-tools/issues/250">#250</a>)</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/9bb13f0427d8795d2c47c5068fc4c8ba552892e2"><code>9bb13f0</code></a> [MPLUGIN-492] Documentation for plugins in general: Goals comprises more than...</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/fc412185093fcb24430191c4697e3217f822a967"><code>fc41218</code></a> [MPLUGIN-498] Move section rendering to separate methods</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/ed4774bcd8b8d2d1f7ff1196cf7644054cb3ae14"><code>ed4774b</code></a> [MPLUGIN-450] Require goalPrefix to be valid (<a href="https://redirect.github.com/apache/maven-plugin-tools/issues/240">#240</a>)</li>
<li><a href="https://github.com/apache/maven-plugin-tools/commit/331cf42ba758c79ad3c4fca0464c8cfee8255e41"><code>331cf42</code></a> [MPLUGIN-497] Upgrade components</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-plugin-tools/compare/maven-plugin-tools-3.10.2...maven-plugin-tools-3.11.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-plugin-plugin&package-manager=maven&previous-version=3.10.2&new-version=3.11.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/maven/module-info-compiler-maven-plugin/pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml
index e6bf8a63cb74a..5231c4c1f0430 100644
--- a/java/maven/module-info-compiler-maven-plugin/pom.xml
+++ b/java/maven/module-info-compiler-maven-plugin/pom.xml
@@ -80,7 +80,7 @@
         </plugin>
         <plugin>
           <artifactId>maven-plugin-plugin</artifactId>
-          <version>3.10.2</version>
+          <version>3.11.0</version>
         </plugin>
         <plugin>
           <artifactId>maven-jar-plugin</artifactId>
@@ -104,7 +104,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-plugin-plugin</artifactId>
-        <version>3.10.2</version>
+        <version>3.11.0</version>
         <configuration>
           <skipErrorNoDescriptorsFound>true</skipErrorNoDescriptorsFound>
         </configuration>

From c27cf66905b98f3cd260630e98cd9cc90a9c6741 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jan 2024 06:35:55 +0900
Subject: [PATCH 253/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-install-plugin from 2.5.1 to 3.1.1 in /java
 (#39744)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.apache.maven.plugins:maven-install-plugin](https://github.com/apache/maven-install-plugin) from 2.5.1 to 3.1.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/maven-install-plugin/releases">org.apache.maven.plugins:maven-install-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.1.1</h2>
<h2>What's Changed</h2>
<ul>
<li>[MINSTALL-186] Use proper repositorySystemSession by <a href="https://github.com/rbioteau"><code>@​rbioteau</code></a> in <a href="https://redirect.github.com/apache/maven-install-plugin/pull/49">apache/maven-install-plugin#49</a></li>
<li>[MINSTALL-188] Upgrade Parent to 39 by <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a> in <a href="https://redirect.github.com/apache/maven-install-plugin/pull/53">apache/maven-install-plugin#53</a></li>
<li>[MINSTALL-189] Add parameter to lax project validation by <a href="https://github.com/cstamas"><code>@​cstamas</code></a> in <a href="https://redirect.github.com/apache/maven-install-plugin/pull/57">apache/maven-install-plugin#57</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/rbioteau"><code>@​rbioteau</code></a> made their first contribution in <a href="https://redirect.github.com/apache/maven-install-plugin/pull/49">apache/maven-install-plugin#49</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/apache/maven-install-plugin/compare/maven-install-plugin-3.1.0...maven-install-plugin-3.1.1">https://github.com/apache/maven-install-plugin/compare/maven-install-plugin-3.1.0...maven-install-plugin-3.1.1</a></p>
<h2>3.1.0</h2>
<h2>Bug</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MINSTALL-179">MINSTALL-179</a>] - installAtEnd when module does not use m-install-p</li>
</ul>
<h2>Improvement</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MINSTALL-183">MINSTALL-183</a>] - Don't use metadata from main artifact to fetch pom.xml</li>
<li>[<a href="https://issues.apache.org/jira/browse/MINSTALL-185">MINSTALL-185</a>] - Install all artifacts in one request</li>
</ul>
<h2>Task</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MINSTALL-181">MINSTALL-181</a>] - Require Java 8</li>
<li>[<a href="https://issues.apache.org/jira/browse/MINSTALL-184">MINSTALL-184</a>] - Cleanup IT tests</li>
</ul>
<h2>Dependency upgrade</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MINSTALL-180">MINSTALL-180</a>] - Upgrade Parent to 37</li>
<li>[<a href="https://issues.apache.org/jira/browse/MINSTALL-182">MINSTALL-182</a>] - Bump mockito-core from 2.28.2 to 4.8.1</li>
</ul>
<h2>3.0.1</h2>
<h2>What's Changed</h2>
<ul>
<li>[MINSTALL-160] Generated POM is not installed if original POM exists by <a href="https://github.com/cstamas"><code>@​cstamas</code></a> in <a href="https://redirect.github.com/apache/maven-install-plugin/pull/36">apache/maven-install-plugin#36</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/apache/maven-install-plugin/compare/maven-install-plugin-3.0.0...maven-install-plugin-3.0.1">https://github.com/apache/maven-install-plugin/compare/maven-install-plugin-3.0.0...maven-install-plugin-3.0.1</a></p>
<h2>3.0.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Fix grammar by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-install-plugin/pull/6">apache/maven-install-plugin#6</a></li>
<li>add .checkstyle to .ignore by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-install-plugin/pull/7">apache/maven-install-plugin#7</a></li>
<li>[MINSTALL-143] Remove a lot of checksum related dead code and commented out tests  by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-install-plugin/pull/8">apache/maven-install-plugin#8</a></li>
<li>Minor improvement to English grammar in error msg by <a href="https://github.com/cpsauer"><code>@​cpsauer</code></a> in <a href="https://redirect.github.com/apache/maven-install-plugin/pull/11">apache/maven-install-plugin#11</a></li>
<li>docs: grammar by <a href="https://github.com/elharo"><code>@​elharo</code></a> in <a href="https://redirect.github.com/apache/maven-install-plugin/pull/10">apache/maven-install-plugin#10</a></li>
<li>[MINSTALL-164] - Create GitHub Actions by <a href="https://github.com/bmarwell"><code>@​bmarwell</code></a> in <a href="https://redirect.github.com/apache/maven-install-plugin/pull/9">apache/maven-install-plugin#9</a></li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-install-plugin/commit/2a366ed910b119360c49330d533c2b00972e034e"><code>2a366ed</code></a> [maven-release-plugin] prepare release maven-install-plugin-3.1.1</li>
<li><a href="https://github.com/apache/maven-install-plugin/commit/0d0df3560139841641da3ec6823730f797bd1862"><code>0d0df35</code></a> [MINSTALL-189] Add parameter to lax project validation (<a href="https://redirect.github.com/apache/maven-install-plugin/issues/57">#57</a>)</li>
<li><a href="https://github.com/apache/maven-install-plugin/commit/ba4cdf27f42ac9c1fa8cba60ec2ee812934d9547"><code>ba4cdf2</code></a> [MINSTALL-188] Upgrade Parent to 39 - code reformat</li>
<li><a href="https://github.com/apache/maven-install-plugin/commit/1f9c6df7f8f3e962c65cf4434faf86ade8820640"><code>1f9c6df</code></a> [MINSTALL-188] Upgrade Parent to 39</li>
<li><a href="https://github.com/apache/maven-install-plugin/commit/508a3cd11402ebd3f009828d9110f1010bc5e2f2"><code>508a3cd</code></a> Disable merge button and add jira autolink</li>
<li><a href="https://github.com/apache/maven-install-plugin/commit/4cbbc08d5114539f81519db392ad1b4e401e394c"><code>4cbbc08</code></a> [MINSTALL-186] Use proper repositorySystemSession</li>
<li><a href="https://github.com/apache/maven-install-plugin/commit/bb5360974ceedce3cc05d94c3e0b01525b60f696"><code>bb53609</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li><a href="https://github.com/apache/maven-install-plugin/commit/4a985dc3097a15f74b53df2e4829bf80dfd2d2ef"><code>4a985dc</code></a> [maven-release-plugin] prepare release maven-install-plugin-3.1.0</li>
<li><a href="https://github.com/apache/maven-install-plugin/commit/b8635ab665ff57a963dcfd8856e9df6c63ccebc2"><code>b8635ab</code></a> Remove broken link to Release Info example</li>
<li><a href="https://github.com/apache/maven-install-plugin/commit/ce0666e4220514e99a9384300981d2b50223b265"><code>ce0666e</code></a> [MINSTALL-184] Cleanup IT tests</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-install-plugin/compare/maven-install-plugin-2.5.1...maven-install-plugin-3.1.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-install-plugin&package-manager=maven&previous-version=2.5.1&new-version=3.1.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/maven/module-info-compiler-maven-plugin/pom.xml | 2 +-
 java/performance/pom.xml                             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml
index 5231c4c1f0430..37d14ad412d88 100644
--- a/java/maven/module-info-compiler-maven-plugin/pom.xml
+++ b/java/maven/module-info-compiler-maven-plugin/pom.xml
@@ -88,7 +88,7 @@
         </plugin>
         <plugin>
           <artifactId>maven-install-plugin</artifactId>
-          <version>2.5.2</version>
+          <version>3.1.1</version>
         </plugin>
         <plugin>
           <artifactId>maven-deploy-plugin</artifactId>
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index a302a216c53d3..a1d53171f549b 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -187,7 +187,7 @@
                 </plugin>
                 <plugin>
                     <artifactId>maven-install-plugin</artifactId>
-                    <version>2.5.1</version>
+                    <version>3.1.1</version>
                 </plugin>
                 <plugin>
                     <artifactId>maven-jar-plugin</artifactId>

From c9ca62ad781f1dd999570a51cae6d7c464af6e5c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 23 Jan 2024 06:36:22 +0900
Subject: [PATCH 254/570] MINOR: [Java] Bump org.mockito:mockito-inline from
 4.11.0 to 5.2.0 in /java (#39746)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.mockito:mockito-inline](https://github.com/mockito/mockito) from 4.11.0 to 5.2.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/mockito/mockito/releases">org.mockito:mockito-inline's releases</a>.</em></p>
<blockquote>
<h2>v5.2.0</h2>
<p><em>Changelog generated by <a href="https://github.com/shipkit/shipkit-changelog">Shipkit Changelog Gradle Plugin</a></em></p>
<h4>5.2.0</h4>
<ul>
<li>2023-03-09 - <a href="https://github.com/mockito/mockito/compare/v5.1.1...v5.2.0">25 commit(s)</a> by Andriy Redko, Iulian Dragos, Roberto Trunfio, Róbert Papp, dependabot[bot], jfrantzius, tobiasbaum</li>
<li>Fixes 2933: IOUtils does not depend on platform encoding any more [(<a href="https://redirect.github.com/mockito/mockito/issues/2935">#2935</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2935">mockito/mockito#2935</a>)</li>
<li>Execution with mockito-inline fails on platforms with EBCDIC as default encoding [(<a href="https://redirect.github.com/mockito/mockito/issues/2933">#2933</a>)](<a href="https://redirect.github.com/mockito/mockito/issues/2933">mockito/mockito#2933</a>)</li>
<li>Bump io.github.gradle-nexus:publish-plugin from 1.2.0 to 1.3.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2932">#2932</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2932">mockito/mockito#2932</a>)</li>
<li>Bump versions.bytebuddy from 1.14.0 to 1.14.1 [(<a href="https://redirect.github.com/mockito/mockito/issues/2931">#2931</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2931">mockito/mockito#2931</a>)</li>
<li>Bump com.diffplug.spotless from 6.15.0 to 6.16.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2930">#2930</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2930">mockito/mockito#2930</a>)</li>
<li>Bump com.google.googlejavaformat:google-java-format from 1.15.0 to 1.16.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2928">#2928</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2928">mockito/mockito#2928</a>)</li>
<li>Bump io.github.gradle-nexus:publish-plugin from 1.1.0 to 1.2.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2924">#2924</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2924">mockito/mockito#2924</a>)</li>
<li>Feature 2921 generic types [(<a href="https://redirect.github.com/mockito/mockito/issues/2923">#2923</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2923">mockito/mockito#2923</a>)</li>
<li>Bump com.github.ben-manes.versions from 0.45.0 to 0.46.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2922">#2922</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2922">mockito/mockito#2922</a>)</li>
<li>Use generic type information in TypeBasedCandidateFilter to circumvent type erasure [(<a href="https://redirect.github.com/mockito/mockito/issues/2921">#2921</a>)](<a href="https://redirect.github.com/mockito/mockito/issues/2921">mockito/mockito#2921</a>)</li>
<li>Make project relocatable by using relative paths in the OSGi test task [(<a href="https://redirect.github.com/mockito/mockito/issues/2920">#2920</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2920">mockito/mockito#2920</a>)</li>
<li>Cache misses due to OSGi tests referencing absolute paths [(<a href="https://redirect.github.com/mockito/mockito/issues/2919">#2919</a>)](<a href="https://redirect.github.com/mockito/mockito/issues/2919">mockito/mockito#2919</a>)</li>
<li>Bump versions.bytebuddy from 1.13.0 to 1.14.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2918">#2918</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2918">mockito/mockito#2918</a>)</li>
<li>Bump gradle/wrapper-validation-action from 1.0.5 to 1.0.6 [(<a href="https://redirect.github.com/mockito/mockito/issues/2917">#2917</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2917">mockito/mockito#2917</a>)</li>
<li>Bump com.diffplug.spotless from 6.14.1 to 6.15.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2913">#2913</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2913">mockito/mockito#2913</a>)</li>
<li>Bump versions.bytebuddy from 1.12.23 to 1.13.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2912">#2912</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2912">mockito/mockito#2912</a>)</li>
<li>Bump ru.vyarus.animalsniffer from 1.6.0 to 1.7.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2911">#2911</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2911">mockito/mockito#2911</a>)</li>
<li>Bump org.codehaus.groovy:groovy from 3.0.14 to 3.0.15 [(<a href="https://redirect.github.com/mockito/mockito/issues/2910">#2910</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2910">mockito/mockito#2910</a>)</li>
<li>Fixes <a href="https://redirect.github.com/mockito/mockito/issues/2905">#2905</a> : ThreadLocal classes can be mocked. [(<a href="https://redirect.github.com/mockito/mockito/issues/2908">#2908</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2908">mockito/mockito#2908</a>)</li>
<li>StackOverflow while mocking a ThreadLocal on Mockito 5.1.1 [(<a href="https://redirect.github.com/mockito/mockito/issues/2905">#2905</a>)](<a href="https://redirect.github.com/mockito/mockito/issues/2905">mockito/mockito#2905</a>)</li>
<li>Fix most Gradle warnings in build [(<a href="https://redirect.github.com/mockito/mockito/issues/2904">#2904</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2904">mockito/mockito#2904</a>)</li>
<li>Android CI improvements, improves <a href="https://redirect.github.com/mockito/mockito/issues/2892">#2892</a> [(<a href="https://redirect.github.com/mockito/mockito/issues/2903">#2903</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2903">mockito/mockito#2903</a>)</li>
<li>Bump com.diffplug.spotless from 6.14.0 to 6.14.1 [(<a href="https://redirect.github.com/mockito/mockito/issues/2902">#2902</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2902">mockito/mockito#2902</a>)</li>
<li>Bump versions.bytebuddy from 1.12.22 to 1.12.23 [(<a href="https://redirect.github.com/mockito/mockito/issues/2901">#2901</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2901">mockito/mockito#2901</a>)</li>
<li>CI on Android API level 33 for Java 11 compatibility testing [(<a href="https://redirect.github.com/mockito/mockito/issues/2899">#2899</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2899">mockito/mockito#2899</a>)</li>
<li>Bump kotlinVersion from 1.8.0 to 1.8.10 [(<a href="https://redirect.github.com/mockito/mockito/issues/2897">#2897</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2897">mockito/mockito#2897</a>)</li>
<li>Bump com.github.ben-manes.versions from 0.44.0 to 0.45.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2895">#2895</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2895">mockito/mockito#2895</a>)</li>
<li>Simplify and modernize Android Test module. [(<a href="https://redirect.github.com/mockito/mockito/issues/2894">#2894</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2894">mockito/mockito#2894</a>)</li>
<li>Set up Android Github Action pipeline. Fixes <a href="https://redirect.github.com/mockito/mockito/issues/2892">#2892</a> [(<a href="https://redirect.github.com/mockito/mockito/issues/2893">#2893</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2893">mockito/mockito#2893</a>)</li>
</ul>
<h2>v5.1.1</h2>
<p><em>Changelog generated by <a href="https://github.com/shipkit/shipkit-changelog">Shipkit Changelog Gradle Plugin</a></em></p>
<h4>5.1.1</h4>
<ul>
<li>2023-01-30 - <a href="https://github.com/mockito/mockito/compare/v5.1.0...v5.1.1">1 commit(s)</a> by Andriy Redko</li>
<li>StackWalker.Option not found on Mockito 5.1.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2891">#2891</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2891">mockito/mockito#2891</a>)</li>
<li>StackWalker.Option not found on Mockito 5.1.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2890">#2890</a>)](<a href="https://redirect.github.com/mockito/mockito/issues/2890">mockito/mockito#2890</a>)</li>
</ul>
<h2>v5.1.0</h2>
<p><em>Changelog generated by <a href="https://github.com/shipkit/shipkit-changelog">Shipkit Changelog Gradle Plugin</a></em></p>
<h4>5.1.0</h4>
<ul>
<li>2023-01-29 - <a href="https://github.com/mockito/mockito/compare/v5.0.0...v5.1.0">12 commit(s)</a> by Andriy Redko, Ashley, Róbert Papp, Stephan Schroevers, Tim te Beek, dependabot[bot]</li>
<li>Fixes some mistakes and missing details in documentation [(<a href="https://redirect.github.com/mockito/mockito/issues/2889">#2889</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2889">mockito/mockito#2889</a>)</li>
<li>Bump com.diffplug.spotless from 6.13.0 to 6.14.0 [(<a href="https://redirect.github.com/mockito/mockito/issues/2888">#2888</a>)](<a href="https://redirect.github.com/mockito/mockito/pull/2888">mockito/mockito#2888</a>)</li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/mockito/mockito/commit/74c811a60a5bf6b9ecb10d79ea95dba860b5cbf8"><code>74c811a</code></a> Make InjectMocks aware of generic types (<a href="https://redirect.github.com/mockito/mockito/issues/2923">#2923</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/fc136e4373eb0b952c08a18916d66ecc10db1ec3"><code>fc136e4</code></a> Explicitly use UTF-8 in IOUtils (<a href="https://redirect.github.com/mockito/mockito/issues/2935">#2935</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/94734709cd9127486d2d2d99b6c50c97ec6de49c"><code>9473470</code></a> Bump io.github.gradle-nexus:publish-plugin from 1.2.0 to 1.3.0 (<a href="https://redirect.github.com/mockito/mockito/issues/2932">#2932</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/dddeb725b3626558506bb69f0d87d09161a56b3c"><code>dddeb72</code></a> Bump versions.bytebuddy from 1.14.0 to 1.14.1 (<a href="https://redirect.github.com/mockito/mockito/issues/2931">#2931</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/92ed60276c3800618bcab7f469b913bfd9704690"><code>92ed602</code></a> Bump com.google.googlejavaformat:google-java-format from 1.15.0 to 1.16.0 (<a href="https://redirect.github.com/mockito/mockito/issues/2">#2</a>...</li>
<li><a href="https://github.com/mockito/mockito/commit/19ef24a19f540ae900f63848ae9b8f1d8146c4f5"><code>19ef24a</code></a> Bump com.diffplug.spotless from 6.15.0 to 6.16.0 (<a href="https://redirect.github.com/mockito/mockito/issues/2930">#2930</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/8b96cc1ffeec9fe89e4c9f014c6f5e3a610457d4"><code>8b96cc1</code></a> Bump io.github.gradle-nexus:publish-plugin from 1.1.0 to 1.2.0 (<a href="https://redirect.github.com/mockito/mockito/issues/2924">#2924</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/4eb275c0dba9d933980e2d72c09f9429b74bacf6"><code>4eb275c</code></a> Make project relocatable by using relative paths in the OSGi test task (<a href="https://redirect.github.com/mockito/mockito/issues/2920">#2920</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/d9373348aab69203f9b38ee7b072bc186da34910"><code>d937334</code></a> Bump com.github.ben-manes.versions from 0.45.0 to 0.46.0 (<a href="https://redirect.github.com/mockito/mockito/issues/2922">#2922</a>)</li>
<li><a href="https://github.com/mockito/mockito/commit/60b0e7148be08dc1ae208b73d4ff5cb07424bbbd"><code>60b0e71</code></a> Bump versions.bytebuddy from 1.13.0 to 1.14.0 (<a href="https://redirect.github.com/mockito/mockito/issues/2918">#2918</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/mockito/mockito/compare/v4.11.0...v5.2.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.mockito:mockito-inline&package-manager=maven&previous-version=4.11.0&new-version=5.2.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/flight/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/flight/pom.xml b/java/flight/pom.xml
index 3ce1b1149ef20..2f777ab42b756 100644
--- a/java/flight/pom.xml
+++ b/java/flight/pom.xml
@@ -60,7 +60,7 @@
             </activation>
             <properties>
                 <mockito.core.version>4.11.0</mockito.core.version>
-                <mockito.inline.version>4.11.0</mockito.inline.version>
+                <mockito.inline.version>5.2.0</mockito.inline.version>
             </properties>
         </profile>
     </profiles>

From 78ec4dc9e823f4283d52f62f9092330e3a9c717b Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 23 Jan 2024 10:36:40 +0100
Subject: [PATCH 255/570] GH-39747: [C++][Parquet] Make BYTE_STREAM_SPLIT
 routines type-agnostic (#39748)

### Rationale for this change

The low-level BYTE_STREAM_SPLIT routines currently reference the logical type they are operating on (float or double). However, the BYTE_STREAM_SPLIT encoding is type-agnostic and only cares about the type width. Removing references to logical types makes these routines easier to reuse.

### Are these changes tested?

Yes, including more exhaustive SIMD tests.

### Are there any user-facing changes?

No. These routines are internal.

* Closes: #39747

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/util/byte_stream_split_internal.h   | 305 +++++++++---------
 cpp/src/arrow/util/byte_stream_split_test.cc  |  30 +-
 cpp/src/parquet/encoding.cc                   |  22 +-
 cpp/src/parquet/encoding_benchmark.cc         |  35 +-
 4 files changed, 199 insertions(+), 193 deletions(-)

diff --git a/cpp/src/arrow/util/byte_stream_split_internal.h b/cpp/src/arrow/util/byte_stream_split_internal.h
index 4bc732ec24313..f70b3991473fa 100644
--- a/cpp/src/arrow/util/byte_stream_split_internal.h
+++ b/cpp/src/arrow/util/byte_stream_split_internal.h
@@ -26,7 +26,6 @@
 #include <cstdint>
 
 #ifdef ARROW_HAVE_SSE4_2
-// Enable the SIMD for ByteStreamSplit Encoder/Decoder
 #define ARROW_HAVE_SIMD_SPLIT
 #endif  // ARROW_HAVE_SSE4_2
 
@@ -37,17 +36,15 @@ namespace arrow::util::internal {
 //
 
 #if defined(ARROW_HAVE_SSE4_2)
-template <typename T>
+template <int kNumStreams>
 void ByteStreamSplitDecodeSse2(const uint8_t* data, int64_t num_values, int64_t stride,
-                               T* out) {
-  constexpr size_t kNumStreams = sizeof(T);
-  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
-  constexpr size_t kNumStreamsLog2 = (kNumStreams == 8U ? 3U : 2U);
+                               uint8_t* out) {
+  static_assert(kNumStreams == 4 || kNumStreams == 8, "Invalid number of streams.");
+  constexpr int kNumStreamsLog2 = (kNumStreams == 8 ? 3 : 2);
   constexpr int64_t kBlockSize = sizeof(__m128i) * kNumStreams;
 
-  const int64_t size = num_values * sizeof(T);
+  const int64_t size = num_values * kNumStreams;
   const int64_t num_blocks = size / kBlockSize;
-  uint8_t* output_data = reinterpret_cast<uint8_t*>(out);
 
   // First handle suffix.
   // This helps catch if the simd-based processing overflows into the suffix
@@ -55,11 +52,11 @@ void ByteStreamSplitDecodeSse2(const uint8_t* data, int64_t num_values, int64_t
   const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
   for (int64_t i = num_processed_elements; i < num_values; ++i) {
     uint8_t gathered_byte_data[kNumStreams];
-    for (size_t b = 0; b < kNumStreams; ++b) {
-      const size_t byte_index = b * stride + i;
+    for (int b = 0; b < kNumStreams; ++b) {
+      const int64_t byte_index = b * stride + i;
       gathered_byte_data[b] = data[byte_index];
     }
-    out[i] = arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]);
+    memcpy(out + i * kNumStreams, gathered_byte_data, kNumStreams);
   }
 
   // The blocks get processed hierarchically using the unpack intrinsics.
@@ -67,53 +64,52 @@ void ByteStreamSplitDecodeSse2(const uint8_t* data, int64_t num_values, int64_t
   // Stage 1: AAAA BBBB CCCC DDDD
   // Stage 2: ACAC ACAC BDBD BDBD
   // Stage 3: ABCD ABCD ABCD ABCD
-  __m128i stage[kNumStreamsLog2 + 1U][kNumStreams];
-  constexpr size_t kNumStreamsHalf = kNumStreams / 2U;
+  __m128i stage[kNumStreamsLog2 + 1][kNumStreams];
+  constexpr int kNumStreamsHalf = kNumStreams / 2U;
 
   for (int64_t i = 0; i < num_blocks; ++i) {
-    for (size_t j = 0; j < kNumStreams; ++j) {
+    for (int j = 0; j < kNumStreams; ++j) {
       stage[0][j] = _mm_loadu_si128(
           reinterpret_cast<const __m128i*>(&data[i * sizeof(__m128i) + j * stride]));
     }
-    for (size_t step = 0; step < kNumStreamsLog2; ++step) {
-      for (size_t j = 0; j < kNumStreamsHalf; ++j) {
+    for (int step = 0; step < kNumStreamsLog2; ++step) {
+      for (int j = 0; j < kNumStreamsHalf; ++j) {
         stage[step + 1U][j * 2] =
             _mm_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
         stage[step + 1U][j * 2 + 1U] =
             _mm_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
       }
     }
-    for (size_t j = 0; j < kNumStreams; ++j) {
-      _mm_storeu_si128(reinterpret_cast<__m128i*>(
-                           &output_data[(i * kNumStreams + j) * sizeof(__m128i)]),
-                       stage[kNumStreamsLog2][j]);
+    for (int j = 0; j < kNumStreams; ++j) {
+      _mm_storeu_si128(
+          reinterpret_cast<__m128i*>(out + (i * kNumStreams + j) * sizeof(__m128i)),
+          stage[kNumStreamsLog2][j]);
     }
   }
 }
 
-template <typename T>
-void ByteStreamSplitEncodeSse2(const uint8_t* raw_values, const size_t num_values,
+template <int kNumStreams>
+void ByteStreamSplitEncodeSse2(const uint8_t* raw_values, const int64_t num_values,
                                uint8_t* output_buffer_raw) {
-  constexpr size_t kNumStreams = sizeof(T);
-  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
-  constexpr size_t kBlockSize = sizeof(__m128i) * kNumStreams;
+  static_assert(kNumStreams == 4 || kNumStreams == 8, "Invalid number of streams.");
+  constexpr int kBlockSize = sizeof(__m128i) * kNumStreams;
 
   __m128i stage[3][kNumStreams];
   __m128i final_result[kNumStreams];
 
-  const size_t size = num_values * sizeof(T);
-  const size_t num_blocks = size / kBlockSize;
+  const int64_t size = num_values * kNumStreams;
+  const int64_t num_blocks = size / kBlockSize;
   const __m128i* raw_values_sse = reinterpret_cast<const __m128i*>(raw_values);
   __m128i* output_buffer_streams[kNumStreams];
-  for (size_t i = 0; i < kNumStreams; ++i) {
+  for (int i = 0; i < kNumStreams; ++i) {
     output_buffer_streams[i] =
         reinterpret_cast<__m128i*>(&output_buffer_raw[num_values * i]);
   }
 
   // First handle suffix.
-  const size_t num_processed_elements = (num_blocks * kBlockSize) / sizeof(T);
-  for (size_t i = num_processed_elements; i < num_values; ++i) {
-    for (size_t j = 0U; j < kNumStreams; ++j) {
+  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
+  for (int64_t i = num_processed_elements; i < num_values; ++i) {
+    for (int j = 0; j < kNumStreams; ++j) {
       const uint8_t byte_in_value = raw_values[i * kNumStreams + j];
       output_buffer_raw[j * num_values + i] = byte_in_value;
     }
@@ -131,48 +127,47 @@ void ByteStreamSplitEncodeSse2(const uint8_t* raw_values, const size_t num_value
   //   0: AAAA AAAA BBBB BBBB 1: CCCC CCCC DDDD DDDD ...
   // Step 4: __mm_unpacklo_epi64 and _mm_unpackhi_epi64:
   //   0: AAAA AAAA AAAA AAAA 1: BBBB BBBB BBBB BBBB ...
-  for (size_t block_index = 0; block_index < num_blocks; ++block_index) {
+  for (int64_t block_index = 0; block_index < num_blocks; ++block_index) {
     // First copy the data to stage 0.
-    for (size_t i = 0; i < kNumStreams; ++i) {
+    for (int i = 0; i < kNumStreams; ++i) {
       stage[0][i] = _mm_loadu_si128(&raw_values_sse[block_index * kNumStreams + i]);
     }
 
     // The shuffling of bytes is performed through the unpack intrinsics.
     // In my measurements this gives better performance then an implementation
     // which uses the shuffle intrinsics.
-    for (size_t stage_lvl = 0; stage_lvl < 2U; ++stage_lvl) {
-      for (size_t i = 0; i < kNumStreams / 2U; ++i) {
+    for (int stage_lvl = 0; stage_lvl < 2; ++stage_lvl) {
+      for (int i = 0; i < kNumStreams / 2; ++i) {
         stage[stage_lvl + 1][i * 2] =
             _mm_unpacklo_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
         stage[stage_lvl + 1][i * 2 + 1] =
             _mm_unpackhi_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
       }
     }
-    if constexpr (kNumStreams == 8U) {
+    if constexpr (kNumStreams == 8) {
       // This is the path for double.
       __m128i tmp[8];
-      for (size_t i = 0; i < 4; ++i) {
+      for (int i = 0; i < 4; ++i) {
         tmp[i * 2] = _mm_unpacklo_epi32(stage[2][i], stage[2][i + 4]);
         tmp[i * 2 + 1] = _mm_unpackhi_epi32(stage[2][i], stage[2][i + 4]);
       }
-
-      for (size_t i = 0; i < 4; ++i) {
+      for (int i = 0; i < 4; ++i) {
         final_result[i * 2] = _mm_unpacklo_epi32(tmp[i], tmp[i + 4]);
         final_result[i * 2 + 1] = _mm_unpackhi_epi32(tmp[i], tmp[i + 4]);
       }
     } else {
       // this is the path for float.
       __m128i tmp[4];
-      for (size_t i = 0; i < 2; ++i) {
+      for (int i = 0; i < 2; ++i) {
         tmp[i * 2] = _mm_unpacklo_epi8(stage[2][i * 2], stage[2][i * 2 + 1]);
         tmp[i * 2 + 1] = _mm_unpackhi_epi8(stage[2][i * 2], stage[2][i * 2 + 1]);
       }
-      for (size_t i = 0; i < 2; ++i) {
+      for (int i = 0; i < 2; ++i) {
         final_result[i * 2] = _mm_unpacklo_epi64(tmp[i], tmp[i + 2]);
         final_result[i * 2 + 1] = _mm_unpackhi_epi64(tmp[i], tmp[i + 2]);
       }
     }
-    for (size_t i = 0; i < kNumStreams; ++i) {
+    for (int i = 0; i < kNumStreams; ++i) {
       _mm_storeu_si128(&output_buffer_streams[i][block_index], final_result[i]);
     }
   }
@@ -180,52 +175,50 @@ void ByteStreamSplitEncodeSse2(const uint8_t* raw_values, const size_t num_value
 #endif  // ARROW_HAVE_SSE4_2
 
 #if defined(ARROW_HAVE_AVX2)
-template <typename T>
+template <int kNumStreams>
 void ByteStreamSplitDecodeAvx2(const uint8_t* data, int64_t num_values, int64_t stride,
-                               T* out) {
-  constexpr size_t kNumStreams = sizeof(T);
-  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
-  constexpr size_t kNumStreamsLog2 = (kNumStreams == 8U ? 3U : 2U);
+                               uint8_t* out) {
+  static_assert(kNumStreams == 4 || kNumStreams == 8, "Invalid number of streams.");
+  constexpr int kNumStreamsLog2 = (kNumStreams == 8 ? 3 : 2);
   constexpr int64_t kBlockSize = sizeof(__m256i) * kNumStreams;
 
-  const int64_t size = num_values * sizeof(T);
+  const int64_t size = num_values * kNumStreams;
   if (size < kBlockSize)  // Back to SSE for small size
-    return ByteStreamSplitDecodeSse2(data, num_values, stride, out);
+    return ByteStreamSplitDecodeSse2<kNumStreams>(data, num_values, stride, out);
   const int64_t num_blocks = size / kBlockSize;
-  uint8_t* output_data = reinterpret_cast<uint8_t*>(out);
 
   // First handle suffix.
   const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
   for (int64_t i = num_processed_elements; i < num_values; ++i) {
     uint8_t gathered_byte_data[kNumStreams];
-    for (size_t b = 0; b < kNumStreams; ++b) {
-      const size_t byte_index = b * stride + i;
+    for (int b = 0; b < kNumStreams; ++b) {
+      const int64_t byte_index = b * stride + i;
       gathered_byte_data[b] = data[byte_index];
     }
-    out[i] = arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]);
+    memcpy(out + i * kNumStreams, gathered_byte_data, kNumStreams);
   }
 
   // Processed hierarchically using unpack intrinsics, then permute intrinsics.
-  __m256i stage[kNumStreamsLog2 + 1U][kNumStreams];
+  __m256i stage[kNumStreamsLog2 + 1][kNumStreams];
   __m256i final_result[kNumStreams];
-  constexpr size_t kNumStreamsHalf = kNumStreams / 2U;
+  constexpr int kNumStreamsHalf = kNumStreams / 2;
 
   for (int64_t i = 0; i < num_blocks; ++i) {
-    for (size_t j = 0; j < kNumStreams; ++j) {
+    for (int j = 0; j < kNumStreams; ++j) {
       stage[0][j] = _mm256_loadu_si256(
           reinterpret_cast<const __m256i*>(&data[i * sizeof(__m256i) + j * stride]));
     }
 
-    for (size_t step = 0; step < kNumStreamsLog2; ++step) {
-      for (size_t j = 0; j < kNumStreamsHalf; ++j) {
-        stage[step + 1U][j * 2] =
+    for (int step = 0; step < kNumStreamsLog2; ++step) {
+      for (int j = 0; j < kNumStreamsHalf; ++j) {
+        stage[step + 1][j * 2] =
             _mm256_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
-        stage[step + 1U][j * 2 + 1U] =
+        stage[step + 1][j * 2 + 1] =
             _mm256_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
       }
     }
 
-    if constexpr (kNumStreams == 8U) {
+    if constexpr (kNumStreams == 8) {
       // path for double, 128i index:
       //   {0x00, 0x08}, {0x01, 0x09}, {0x02, 0x0A}, {0x03, 0x0B},
       //   {0x04, 0x0C}, {0x05, 0x0D}, {0x06, 0x0E}, {0x07, 0x0F},
@@ -258,40 +251,41 @@ void ByteStreamSplitDecodeAvx2(const uint8_t* data, int64_t num_values, int64_t
                                                   stage[kNumStreamsLog2][3], 0b00110001);
     }
 
-    for (size_t j = 0; j < kNumStreams; ++j) {
-      _mm256_storeu_si256(reinterpret_cast<__m256i*>(
-                              &output_data[(i * kNumStreams + j) * sizeof(__m256i)]),
-                          final_result[j]);
+    for (int j = 0; j < kNumStreams; ++j) {
+      _mm256_storeu_si256(
+          reinterpret_cast<__m256i*>(out + (i * kNumStreams + j) * sizeof(__m256i)),
+          final_result[j]);
     }
   }
 }
 
-template <typename T>
-void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const size_t num_values,
+template <int kNumStreams>
+void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const int64_t num_values,
                                uint8_t* output_buffer_raw) {
-  constexpr size_t kNumStreams = sizeof(T);
-  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
-  constexpr size_t kBlockSize = sizeof(__m256i) * kNumStreams;
+  static_assert(kNumStreams == 4 || kNumStreams == 8, "Invalid number of streams.");
+  constexpr int kBlockSize = sizeof(__m256i) * kNumStreams;
 
-  if constexpr (kNumStreams == 8U)  // Back to SSE, currently no path for double.
-    return ByteStreamSplitEncodeSse2<T>(raw_values, num_values, output_buffer_raw);
+  if constexpr (kNumStreams == 8)  // Back to SSE, currently no path for double.
+    return ByteStreamSplitEncodeSse2<kNumStreams>(raw_values, num_values,
+                                                  output_buffer_raw);
 
-  const size_t size = num_values * sizeof(T);
+  const int64_t size = num_values * kNumStreams;
   if (size < kBlockSize)  // Back to SSE for small size
-    return ByteStreamSplitEncodeSse2<T>(raw_values, num_values, output_buffer_raw);
-  const size_t num_blocks = size / kBlockSize;
+    return ByteStreamSplitEncodeSse2<kNumStreams>(raw_values, num_values,
+                                                  output_buffer_raw);
+  const int64_t num_blocks = size / kBlockSize;
   const __m256i* raw_values_simd = reinterpret_cast<const __m256i*>(raw_values);
   __m256i* output_buffer_streams[kNumStreams];
 
-  for (size_t i = 0; i < kNumStreams; ++i) {
+  for (int i = 0; i < kNumStreams; ++i) {
     output_buffer_streams[i] =
         reinterpret_cast<__m256i*>(&output_buffer_raw[num_values * i]);
   }
 
   // First handle suffix.
-  const size_t num_processed_elements = (num_blocks * kBlockSize) / sizeof(T);
-  for (size_t i = num_processed_elements; i < num_values; ++i) {
-    for (size_t j = 0U; j < kNumStreams; ++j) {
+  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
+  for (int64_t i = num_processed_elements; i < num_values; ++i) {
+    for (int j = 0; j < kNumStreams; ++j) {
       const uint8_t byte_in_value = raw_values[i * kNumStreams + j];
       output_buffer_raw[j * num_values + i] = byte_in_value;
     }
@@ -301,20 +295,20 @@ void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const size_t num_value
   // 1. Processed hierarchically to 32i block using the unpack intrinsics.
   // 2. Pack 128i block using _mm256_permutevar8x32_epi32.
   // 3. Pack final 256i block with _mm256_permute2x128_si256.
-  constexpr size_t kNumUnpack = 3U;
+  constexpr int kNumUnpack = 3;
   __m256i stage[kNumUnpack + 1][kNumStreams];
   static const __m256i kPermuteMask =
       _mm256_set_epi32(0x07, 0x03, 0x06, 0x02, 0x05, 0x01, 0x04, 0x00);
   __m256i permute[kNumStreams];
   __m256i final_result[kNumStreams];
 
-  for (size_t block_index = 0; block_index < num_blocks; ++block_index) {
-    for (size_t i = 0; i < kNumStreams; ++i) {
+  for (int64_t block_index = 0; block_index < num_blocks; ++block_index) {
+    for (int i = 0; i < kNumStreams; ++i) {
       stage[0][i] = _mm256_loadu_si256(&raw_values_simd[block_index * kNumStreams + i]);
     }
 
-    for (size_t stage_lvl = 0; stage_lvl < kNumUnpack; ++stage_lvl) {
-      for (size_t i = 0; i < kNumStreams / 2U; ++i) {
+    for (int stage_lvl = 0; stage_lvl < kNumUnpack; ++stage_lvl) {
+      for (int i = 0; i < kNumStreams / 2; ++i) {
         stage[stage_lvl + 1][i * 2] =
             _mm256_unpacklo_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
         stage[stage_lvl + 1][i * 2 + 1] =
@@ -322,7 +316,7 @@ void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const size_t num_value
       }
     }
 
-    for (size_t i = 0; i < kNumStreams; ++i) {
+    for (int i = 0; i < kNumStreams; ++i) {
       permute[i] = _mm256_permutevar8x32_epi32(stage[kNumUnpack][i], kPermuteMask);
     }
 
@@ -331,7 +325,7 @@ void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const size_t num_value
     final_result[2] = _mm256_permute2x128_si256(permute[1], permute[3], 0b00100000);
     final_result[3] = _mm256_permute2x128_si256(permute[1], permute[3], 0b00110001);
 
-    for (size_t i = 0; i < kNumStreams; ++i) {
+    for (int i = 0; i < kNumStreams; ++i) {
       _mm256_storeu_si256(&output_buffer_streams[i][block_index], final_result[i]);
     }
   }
@@ -339,53 +333,51 @@ void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const size_t num_value
 #endif  // ARROW_HAVE_AVX2
 
 #if defined(ARROW_HAVE_AVX512)
-template <typename T>
+template <int kNumStreams>
 void ByteStreamSplitDecodeAvx512(const uint8_t* data, int64_t num_values, int64_t stride,
-                                 T* out) {
-  constexpr size_t kNumStreams = sizeof(T);
-  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
-  constexpr size_t kNumStreamsLog2 = (kNumStreams == 8U ? 3U : 2U);
+                                 uint8_t* out) {
+  static_assert(kNumStreams == 4 || kNumStreams == 8, "Invalid number of streams.");
+  constexpr int kNumStreamsLog2 = (kNumStreams == 8 ? 3 : 2);
   constexpr int64_t kBlockSize = sizeof(__m512i) * kNumStreams;
 
-  const int64_t size = num_values * sizeof(T);
+  const int64_t size = num_values * kNumStreams;
   if (size < kBlockSize)  // Back to AVX2 for small size
-    return ByteStreamSplitDecodeAvx2(data, num_values, stride, out);
+    return ByteStreamSplitDecodeAvx2<kNumStreams>(data, num_values, stride, out);
   const int64_t num_blocks = size / kBlockSize;
-  uint8_t* output_data = reinterpret_cast<uint8_t*>(out);
 
   // First handle suffix.
   const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
   for (int64_t i = num_processed_elements; i < num_values; ++i) {
     uint8_t gathered_byte_data[kNumStreams];
-    for (size_t b = 0; b < kNumStreams; ++b) {
-      const size_t byte_index = b * stride + i;
+    for (int b = 0; b < kNumStreams; ++b) {
+      const int64_t byte_index = b * stride + i;
       gathered_byte_data[b] = data[byte_index];
     }
-    out[i] = arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]);
+    memcpy(out + i * kNumStreams, gathered_byte_data, kNumStreams);
   }
 
   // Processed hierarchically using the unpack, then two shuffles.
-  __m512i stage[kNumStreamsLog2 + 1U][kNumStreams];
+  __m512i stage[kNumStreamsLog2 + 1][kNumStreams];
   __m512i shuffle[kNumStreams];
   __m512i final_result[kNumStreams];
-  constexpr size_t kNumStreamsHalf = kNumStreams / 2U;
+  constexpr int kNumStreamsHalf = kNumStreams / 2U;
 
   for (int64_t i = 0; i < num_blocks; ++i) {
-    for (size_t j = 0; j < kNumStreams; ++j) {
+    for (int j = 0; j < kNumStreams; ++j) {
       stage[0][j] = _mm512_loadu_si512(
           reinterpret_cast<const __m512i*>(&data[i * sizeof(__m512i) + j * stride]));
     }
 
-    for (size_t step = 0; step < kNumStreamsLog2; ++step) {
-      for (size_t j = 0; j < kNumStreamsHalf; ++j) {
-        stage[step + 1U][j * 2] =
+    for (int step = 0; step < kNumStreamsLog2; ++step) {
+      for (int j = 0; j < kNumStreamsHalf; ++j) {
+        stage[step + 1][j * 2] =
             _mm512_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
-        stage[step + 1U][j * 2 + 1U] =
+        stage[step + 1][j * 2 + 1] =
             _mm512_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
       }
     }
 
-    if constexpr (kNumStreams == 8U) {
+    if constexpr (kNumStreams == 8) {
       // path for double, 128i index:
       // {0x00, 0x04, 0x08, 0x0C}, {0x10, 0x14, 0x18, 0x1C},
       // {0x01, 0x05, 0x09, 0x0D}, {0x11, 0x15, 0x19, 0x1D},
@@ -435,49 +427,49 @@ void ByteStreamSplitDecodeAvx512(const uint8_t* data, int64_t num_values, int64_
       final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101);
     }
 
-    for (size_t j = 0; j < kNumStreams; ++j) {
-      _mm512_storeu_si512(reinterpret_cast<__m512i*>(
-                              &output_data[(i * kNumStreams + j) * sizeof(__m512i)]),
-                          final_result[j]);
+    for (int j = 0; j < kNumStreams; ++j) {
+      _mm512_storeu_si512(
+          reinterpret_cast<__m512i*>(out + (i * kNumStreams + j) * sizeof(__m512i)),
+          final_result[j]);
     }
   }
 }
 
-template <typename T>
-void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const size_t num_values,
+template <int kNumStreams>
+void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const int64_t num_values,
                                  uint8_t* output_buffer_raw) {
-  constexpr size_t kNumStreams = sizeof(T);
-  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
-  constexpr size_t kBlockSize = sizeof(__m512i) * kNumStreams;
+  static_assert(kNumStreams == 4 || kNumStreams == 8, "Invalid number of streams.");
+  constexpr int kBlockSize = sizeof(__m512i) * kNumStreams;
 
-  const size_t size = num_values * sizeof(T);
+  const int64_t size = num_values * kNumStreams;
 
   if (size < kBlockSize)  // Back to AVX2 for small size
-    return ByteStreamSplitEncodeAvx2<T>(raw_values, num_values, output_buffer_raw);
+    return ByteStreamSplitEncodeAvx2<kNumStreams>(raw_values, num_values,
+                                                  output_buffer_raw);
 
-  const size_t num_blocks = size / kBlockSize;
+  const int64_t num_blocks = size / kBlockSize;
   const __m512i* raw_values_simd = reinterpret_cast<const __m512i*>(raw_values);
   __m512i* output_buffer_streams[kNumStreams];
-  for (size_t i = 0; i < kNumStreams; ++i) {
+  for (int i = 0; i < kNumStreams; ++i) {
     output_buffer_streams[i] =
         reinterpret_cast<__m512i*>(&output_buffer_raw[num_values * i]);
   }
 
   // First handle suffix.
-  const size_t num_processed_elements = (num_blocks * kBlockSize) / sizeof(T);
-  for (size_t i = num_processed_elements; i < num_values; ++i) {
-    for (size_t j = 0U; j < kNumStreams; ++j) {
+  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
+  for (int64_t i = num_processed_elements; i < num_values; ++i) {
+    for (int j = 0; j < kNumStreams; ++j) {
       const uint8_t byte_in_value = raw_values[i * kNumStreams + j];
       output_buffer_raw[j * num_values + i] = byte_in_value;
     }
   }
 
-  constexpr size_t KNumUnpack = (kNumStreams == 8U) ? 2U : 3U;
+  constexpr int KNumUnpack = (kNumStreams == 8) ? 2 : 3;
   __m512i final_result[kNumStreams];
   __m512i unpack[KNumUnpack + 1][kNumStreams];
   __m512i permutex[kNumStreams];
   __m512i permutex_mask;
-  if constexpr (kNumStreams == 8U) {
+  if constexpr (kNumStreams == 8) {
     // use _mm512_set_epi32, no _mm512_set_epi16 for some old gcc version.
     permutex_mask = _mm512_set_epi32(0x001F0017, 0x000F0007, 0x001E0016, 0x000E0006,
                                      0x001D0015, 0x000D0005, 0x001C0014, 0x000C0004,
@@ -488,13 +480,13 @@ void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const size_t num_val
                                      0x09, 0x05, 0x01, 0x0C, 0x08, 0x04, 0x00);
   }
 
-  for (size_t block_index = 0; block_index < num_blocks; ++block_index) {
-    for (size_t i = 0; i < kNumStreams; ++i) {
+  for (int64_t block_index = 0; block_index < num_blocks; ++block_index) {
+    for (int i = 0; i < kNumStreams; ++i) {
       unpack[0][i] = _mm512_loadu_si512(&raw_values_simd[block_index * kNumStreams + i]);
     }
 
-    for (size_t unpack_lvl = 0; unpack_lvl < KNumUnpack; ++unpack_lvl) {
-      for (size_t i = 0; i < kNumStreams / 2U; ++i) {
+    for (int unpack_lvl = 0; unpack_lvl < KNumUnpack; ++unpack_lvl) {
+      for (int i = 0; i < kNumStreams / 2; ++i) {
         unpack[unpack_lvl + 1][i * 2] = _mm512_unpacklo_epi8(
             unpack[unpack_lvl][i * 2], unpack[unpack_lvl][i * 2 + 1]);
         unpack[unpack_lvl + 1][i * 2 + 1] = _mm512_unpackhi_epi8(
@@ -502,7 +494,7 @@ void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const size_t num_val
       }
     }
 
-    if constexpr (kNumStreams == 8U) {
+    if constexpr (kNumStreams == 8) {
       // path for double
       // 1. unpack to epi16 block
       // 2. permutexvar_epi16 to 128i block
@@ -511,7 +503,7 @@ void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const size_t num_val
       //   {0x01, 0x05, 0x09, 0x0D}, {0x11, 0x15, 0x19, 0x1D},
       //   {0x02, 0x06, 0x0A, 0x0E}, {0x12, 0x16, 0x1A, 0x1E},
       //   {0x03, 0x07, 0x0B, 0x0F}, {0x13, 0x17, 0x1B, 0x1F},
-      for (size_t i = 0; i < kNumStreams; ++i)
+      for (int i = 0; i < kNumStreams; ++i)
         permutex[i] = _mm512_permutexvar_epi16(permutex_mask, unpack[KNumUnpack][i]);
 
       __m512i shuffle[kNumStreams];
@@ -537,7 +529,7 @@ void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const size_t num_val
       // 1. Processed hierarchically to 32i block using the unpack intrinsics.
       // 2. Pack 128i block using _mm256_permutevar8x32_epi32.
       // 3. Pack final 256i block with _mm256_permute2x128_si256.
-      for (size_t i = 0; i < kNumStreams; ++i)
+      for (int i = 0; i < kNumStreams; ++i)
         permutex[i] = _mm512_permutexvar_epi32(permutex_mask, unpack[KNumUnpack][i]);
 
       final_result[0] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b01000100);
@@ -546,7 +538,7 @@ void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const size_t num_val
       final_result[3] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b11101110);
     }
 
-    for (size_t i = 0; i < kNumStreams; ++i) {
+    for (int i = 0; i < kNumStreams; ++i) {
       _mm512_storeu_si512(&output_buffer_streams[i][block_index], final_result[i]);
     }
   }
@@ -554,32 +546,32 @@ void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const size_t num_val
 #endif  // ARROW_HAVE_AVX512
 
 #if defined(ARROW_HAVE_SIMD_SPLIT)
-template <typename T>
+template <int kNumStreams>
 void inline ByteStreamSplitDecodeSimd(const uint8_t* data, int64_t num_values,
-                                      int64_t stride, T* out) {
+                                      int64_t stride, uint8_t* out) {
 #if defined(ARROW_HAVE_AVX512)
-  return ByteStreamSplitDecodeAvx512(data, num_values, stride, out);
+  return ByteStreamSplitDecodeAvx512<kNumStreams>(data, num_values, stride, out);
 #elif defined(ARROW_HAVE_AVX2)
-  return ByteStreamSplitDecodeAvx2(data, num_values, stride, out);
+  return ByteStreamSplitDecodeAvx2<kNumStreams>(data, num_values, stride, out);
 #elif defined(ARROW_HAVE_SSE4_2)
-  return ByteStreamSplitDecodeSse2(data, num_values, stride, out);
+  return ByteStreamSplitDecodeSse2<kNumStreams>(data, num_values, stride, out);
 #else
 #error "ByteStreamSplitDecodeSimd not implemented"
 #endif
 }
 
-template <typename T>
+template <int kNumStreams>
 void inline ByteStreamSplitEncodeSimd(const uint8_t* raw_values, const int64_t num_values,
                                       uint8_t* output_buffer_raw) {
 #if defined(ARROW_HAVE_AVX512)
-  return ByteStreamSplitEncodeAvx512<T>(raw_values, static_cast<size_t>(num_values),
-                                        output_buffer_raw);
+  return ByteStreamSplitEncodeAvx512<kNumStreams>(raw_values, num_values,
+                                                  output_buffer_raw);
 #elif defined(ARROW_HAVE_AVX2)
-  return ByteStreamSplitEncodeAvx2<T>(raw_values, static_cast<size_t>(num_values),
-                                      output_buffer_raw);
+  return ByteStreamSplitEncodeAvx2<kNumStreams>(raw_values, num_values,
+                                                output_buffer_raw);
 #elif defined(ARROW_HAVE_SSE4_2)
-  return ByteStreamSplitEncodeSse2<T>(raw_values, static_cast<size_t>(num_values),
-                                      output_buffer_raw);
+  return ByteStreamSplitEncodeSse2<kNumStreams>(raw_values, num_values,
+                                                output_buffer_raw);
 #else
 #error "ByteStreamSplitEncodeSimd not implemented"
 #endif
@@ -678,10 +670,9 @@ inline void DoMergeStreams(const uint8_t** src_streams, int width, int64_t nvalu
   }
 }
 
-template <typename T>
+template <int kNumStreams>
 void ByteStreamSplitEncodeScalar(const uint8_t* raw_values, const int64_t num_values,
                                  uint8_t* output_buffer_raw) {
-  constexpr int kNumStreams = static_cast<int>(sizeof(T));
   std::array<uint8_t*, kNumStreams> dest_streams;
   for (int stream = 0; stream < kNumStreams; ++stream) {
     dest_streams[stream] = &output_buffer_raw[stream * num_values];
@@ -689,35 +680,35 @@ void ByteStreamSplitEncodeScalar(const uint8_t* raw_values, const int64_t num_va
   DoSplitStreams(raw_values, kNumStreams, num_values, dest_streams.data());
 }
 
-template <typename T>
+template <int kNumStreams>
 void ByteStreamSplitDecodeScalar(const uint8_t* data, int64_t num_values, int64_t stride,
-                                 T* out) {
-  constexpr int kNumStreams = static_cast<int>(sizeof(T));
+                                 uint8_t* out) {
   std::array<const uint8_t*, kNumStreams> src_streams;
   for (int stream = 0; stream < kNumStreams; ++stream) {
     src_streams[stream] = &data[stream * stride];
   }
-  DoMergeStreams(src_streams.data(), kNumStreams, num_values,
-                 reinterpret_cast<uint8_t*>(out));
+  DoMergeStreams(src_streams.data(), kNumStreams, num_values, out);
 }
 
-template <typename T>
+template <int kNumStreams>
 void inline ByteStreamSplitEncode(const uint8_t* raw_values, const int64_t num_values,
                                   uint8_t* output_buffer_raw) {
 #if defined(ARROW_HAVE_SIMD_SPLIT)
-  return ByteStreamSplitEncodeSimd<T>(raw_values, num_values, output_buffer_raw);
+  return ByteStreamSplitEncodeSimd<kNumStreams>(raw_values, num_values,
+                                                output_buffer_raw);
 #else
-  return ByteStreamSplitEncodeScalar<T>(raw_values, num_values, output_buffer_raw);
+  return ByteStreamSplitEncodeScalar<kNumStreams>(raw_values, num_values,
+                                                  output_buffer_raw);
 #endif
 }
 
-template <typename T>
+template <int kNumStreams>
 void inline ByteStreamSplitDecode(const uint8_t* data, int64_t num_values, int64_t stride,
-                                  T* out) {
+                                  uint8_t* out) {
 #if defined(ARROW_HAVE_SIMD_SPLIT)
-  return ByteStreamSplitDecodeSimd(data, num_values, stride, out);
+  return ByteStreamSplitDecodeSimd<kNumStreams>(data, num_values, stride, out);
 #else
-  return ByteStreamSplitDecodeScalar(data, num_values, stride, out);
+  return ByteStreamSplitDecodeScalar<kNumStreams>(data, num_values, stride, out);
 #endif
 }
 
diff --git a/cpp/src/arrow/util/byte_stream_split_test.cc b/cpp/src/arrow/util/byte_stream_split_test.cc
index c98f0a086738b..71c6063179ea6 100644
--- a/cpp/src/arrow/util/byte_stream_split_test.cc
+++ b/cpp/src/arrow/util/byte_stream_split_test.cc
@@ -61,18 +61,30 @@ void ReferenceByteStreamSplitEncode(const uint8_t* src, int width,
 template <typename T>
 class TestByteStreamSplitSpecialized : public ::testing::Test {
  public:
-  using EncodeFunc = NamedFunc<std::function<decltype(ByteStreamSplitEncode<T>)>>;
-  using DecodeFunc = NamedFunc<std::function<decltype(ByteStreamSplitDecode<T>)>>;
-
   static constexpr int kWidth = static_cast<int>(sizeof(T));
 
+  using EncodeFunc = NamedFunc<std::function<decltype(ByteStreamSplitEncode<kWidth>)>>;
+  using DecodeFunc = NamedFunc<std::function<decltype(ByteStreamSplitDecode<kWidth>)>>;
+
   void SetUp() override {
     encode_funcs_.push_back({"reference", &ReferenceEncode});
-    encode_funcs_.push_back({"scalar", &ByteStreamSplitEncodeScalar<T>});
-    decode_funcs_.push_back({"scalar", &ByteStreamSplitDecodeScalar<T>});
+    encode_funcs_.push_back({"scalar", &ByteStreamSplitEncodeScalar<kWidth>});
+    decode_funcs_.push_back({"scalar", &ByteStreamSplitDecodeScalar<kWidth>});
 #if defined(ARROW_HAVE_SIMD_SPLIT)
-    encode_funcs_.push_back({"simd", &ByteStreamSplitEncodeSimd<T>});
-    decode_funcs_.push_back({"simd", &ByteStreamSplitDecodeSimd<T>});
+    encode_funcs_.push_back({"simd", &ByteStreamSplitEncodeSimd<kWidth>});
+    decode_funcs_.push_back({"simd", &ByteStreamSplitDecodeSimd<kWidth>});
+#endif
+#if defined(ARROW_HAVE_SSE4_2)
+    encode_funcs_.push_back({"sse2", &ByteStreamSplitEncodeSse2<kWidth>});
+    decode_funcs_.push_back({"sse2", &ByteStreamSplitDecodeSse2<kWidth>});
+#endif
+#if defined(ARROW_HAVE_AVX2)
+    encode_funcs_.push_back({"avx2", &ByteStreamSplitEncodeAvx2<kWidth>});
+    decode_funcs_.push_back({"avx2", &ByteStreamSplitDecodeAvx2<kWidth>});
+#endif
+#if defined(ARROW_HAVE_AVX512)
+    encode_funcs_.push_back({"avx512", &ByteStreamSplitEncodeAvx512<kWidth>});
+    decode_funcs_.push_back({"avx512", &ByteStreamSplitDecodeAvx512<kWidth>});
 #endif
   }
 
@@ -92,7 +104,7 @@ class TestByteStreamSplitSpecialized : public ::testing::Test {
         ARROW_SCOPED_TRACE("decode_func = ", decode_func);
         decoded.assign(decoded.size(), T{});
         decode_func.func(encoded.data(), num_values, /*stride=*/num_values,
-                         decoded.data());
+                         reinterpret_cast<uint8_t*>(decoded.data()));
         ASSERT_EQ(decoded, input);
       }
     }
@@ -118,7 +130,7 @@ class TestByteStreamSplitSpecialized : public ::testing::Test {
       while (offset < num_values) {
         auto chunk_size = std::min<int64_t>(num_values - offset, chunk_size_dist(gen));
         decode_func.func(encoded.data() + offset, chunk_size, /*stride=*/num_values,
-                         decoded.data() + offset);
+                         reinterpret_cast<uint8_t*>(decoded.data() + offset));
         offset += chunk_size;
       }
       ASSERT_EQ(offset, num_values);
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index b07ad6c9fb062..b801b5ab11bb9 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -850,8 +850,8 @@ std::shared_ptr<Buffer> ByteStreamSplitEncoder<DType>::FlushValues() {
       AllocateBuffer(this->memory_pool(), EstimatedDataEncodedSize());
   uint8_t* output_buffer_raw = output_buffer->mutable_data();
   const uint8_t* raw_values = sink_.data();
-  ::arrow::util::internal::ByteStreamSplitEncode<T>(raw_values, num_values_in_buffer_,
-                                                    output_buffer_raw);
+  ::arrow::util::internal::ByteStreamSplitEncode<sizeof(T)>(
+      raw_values, num_values_in_buffer_, output_buffer_raw);
   sink_.Reset();
   num_values_in_buffer_ = 0;
   return std::move(output_buffer);
@@ -3577,7 +3577,7 @@ class ByteStreamSplitDecoder : public DecoderImpl, virtual public TypedDecoder<D
   int num_values_in_buffer_{0};
   std::shared_ptr<Buffer> decode_buffer_;
 
-  static constexpr size_t kNumStreams = sizeof(T);
+  static constexpr int kNumStreams = sizeof(T);
 };
 
 template <typename DType>
@@ -3607,8 +3607,8 @@ int ByteStreamSplitDecoder<DType>::Decode(T* buffer, int max_values) {
   const int num_decoded_previously = num_values_in_buffer_ - num_values_;
   const uint8_t* data = data_ + num_decoded_previously;
 
-  ::arrow::util::internal::ByteStreamSplitDecode<T>(data, values_to_decode,
-                                                    num_values_in_buffer_, buffer);
+  ::arrow::util::internal::ByteStreamSplitDecode<kNumStreams>(
+      data, values_to_decode, num_values_in_buffer_, reinterpret_cast<uint8_t*>(buffer));
   num_values_ -= values_to_decode;
   len_ -= sizeof(T) * values_to_decode;
   return values_to_decode;
@@ -3618,7 +3618,7 @@ template <typename DType>
 int ByteStreamSplitDecoder<DType>::DecodeArrow(
     int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset,
     typename EncodingTraits<DType>::Accumulator* builder) {
-  constexpr int value_size = static_cast<int>(kNumStreams);
+  constexpr int value_size = kNumStreams;
   int values_decoded = num_values - null_count;
   if (ARROW_PREDICT_FALSE(len_ < value_size * values_decoded)) {
     ParquetException::EofException();
@@ -3634,8 +3634,9 @@ int ByteStreamSplitDecoder<DType>::DecodeArrow(
   // Use fast decoding into intermediate buffer.  This will also decode
   // some null values, but it's fast enough that we don't care.
   T* decode_out = EnsureDecodeBuffer(values_decoded);
-  ::arrow::util::internal::ByteStreamSplitDecode<T>(data, values_decoded,
-                                                    num_values_in_buffer_, decode_out);
+  ::arrow::util::internal::ByteStreamSplitDecode<kNumStreams>(
+      data, values_decoded, num_values_in_buffer_,
+      reinterpret_cast<uint8_t*>(decode_out));
 
   // XXX If null_count is 0, we could even append in bulk or decode directly into
   // builder
@@ -3648,12 +3649,13 @@ int ByteStreamSplitDecoder<DType>::DecodeArrow(
       [&]() { builder->UnsafeAppendNull(); });
 
 #else
+  // XXX should operate over runs of 0s / 1s
   VisitNullBitmapInline(
       valid_bits, valid_bits_offset, num_values, null_count,
       [&]() {
         uint8_t gathered_byte_data[kNumStreams];
-        for (size_t b = 0; b < kNumStreams; ++b) {
-          const size_t byte_index = b * num_values_in_buffer_ + offset;
+        for (int b = 0; b < kNumStreams; ++b) {
+          const int64_t byte_index = b * num_values_in_buffer_ + offset;
           gathered_byte_data[b] = data[byte_index];
         }
         builder->UnsafeAppend(SafeLoadAs<T>(&gathered_byte_data[0]));
diff --git a/cpp/src/parquet/encoding_benchmark.cc b/cpp/src/parquet/encoding_benchmark.cc
index b5b6cc8d93e03..76c411244b22d 100644
--- a/cpp/src/parquet/encoding_benchmark.cc
+++ b/cpp/src/parquet/encoding_benchmark.cc
@@ -369,7 +369,8 @@ static void BM_ByteStreamSplitDecode(benchmark::State& state, DecodeFunc&& decod
 
   for (auto _ : state) {
     decode_func(values_raw, static_cast<int64_t>(values.size()),
-                static_cast<int64_t>(values.size()), output.data());
+                static_cast<int64_t>(values.size()),
+                reinterpret_cast<uint8_t*>(output.data()));
     benchmark::ClobberMemory();
   }
   state.SetBytesProcessed(state.iterations() * values.size() * sizeof(T));
@@ -390,22 +391,22 @@ static void BM_ByteStreamSplitEncode(benchmark::State& state, EncodeFunc&& encod
 
 static void BM_ByteStreamSplitDecode_Float_Scalar(benchmark::State& state) {
   BM_ByteStreamSplitDecode<float>(
-      state, ::arrow::util::internal::ByteStreamSplitDecodeScalar<float>);
+      state, ::arrow::util::internal::ByteStreamSplitDecodeScalar<sizeof(float)>);
 }
 
 static void BM_ByteStreamSplitDecode_Double_Scalar(benchmark::State& state) {
   BM_ByteStreamSplitDecode<double>(
-      state, ::arrow::util::internal::ByteStreamSplitDecodeScalar<double>);
+      state, ::arrow::util::internal::ByteStreamSplitDecodeScalar<sizeof(double)>);
 }
 
 static void BM_ByteStreamSplitEncode_Float_Scalar(benchmark::State& state) {
   BM_ByteStreamSplitEncode<float>(
-      state, ::arrow::util::internal::ByteStreamSplitEncodeScalar<float>);
+      state, ::arrow::util::internal::ByteStreamSplitEncodeScalar<sizeof(float)>);
 }
 
 static void BM_ByteStreamSplitEncode_Double_Scalar(benchmark::State& state) {
   BM_ByteStreamSplitEncode<double>(
-      state, ::arrow::util::internal::ByteStreamSplitEncodeScalar<double>);
+      state, ::arrow::util::internal::ByteStreamSplitEncodeScalar<sizeof(double)>);
 }
 
 BENCHMARK(BM_ByteStreamSplitDecode_Float_Scalar)->Range(MIN_RANGE, MAX_RANGE);
@@ -416,22 +417,22 @@ BENCHMARK(BM_ByteStreamSplitEncode_Double_Scalar)->Range(MIN_RANGE, MAX_RANGE);
 #if defined(ARROW_HAVE_SSE4_2)
 static void BM_ByteStreamSplitDecode_Float_Sse2(benchmark::State& state) {
   BM_ByteStreamSplitDecode<float>(
-      state, ::arrow::util::internal::ByteStreamSplitDecodeSse2<float>);
+      state, ::arrow::util::internal::ByteStreamSplitDecodeSse2<sizeof(float)>);
 }
 
 static void BM_ByteStreamSplitDecode_Double_Sse2(benchmark::State& state) {
   BM_ByteStreamSplitDecode<double>(
-      state, ::arrow::util::internal::ByteStreamSplitDecodeSse2<double>);
+      state, ::arrow::util::internal::ByteStreamSplitDecodeSse2<sizeof(double)>);
 }
 
 static void BM_ByteStreamSplitEncode_Float_Sse2(benchmark::State& state) {
   BM_ByteStreamSplitEncode<float>(
-      state, ::arrow::util::internal::ByteStreamSplitEncodeSse2<float>);
+      state, ::arrow::util::internal::ByteStreamSplitEncodeSse2<sizeof(float)>);
 }
 
 static void BM_ByteStreamSplitEncode_Double_Sse2(benchmark::State& state) {
   BM_ByteStreamSplitEncode<double>(
-      state, ::arrow::util::internal::ByteStreamSplitEncodeSse2<double>);
+      state, ::arrow::util::internal::ByteStreamSplitEncodeSse2<sizeof(double)>);
 }
 
 BENCHMARK(BM_ByteStreamSplitDecode_Float_Sse2)->Range(MIN_RANGE, MAX_RANGE);
@@ -443,22 +444,22 @@ BENCHMARK(BM_ByteStreamSplitEncode_Double_Sse2)->Range(MIN_RANGE, MAX_RANGE);
 #if defined(ARROW_HAVE_AVX2)
 static void BM_ByteStreamSplitDecode_Float_Avx2(benchmark::State& state) {
   BM_ByteStreamSplitDecode<float>(
-      state, ::arrow::util::internal::ByteStreamSplitDecodeAvx2<float>);
+      state, ::arrow::util::internal::ByteStreamSplitDecodeAvx2<sizeof(float)>);
 }
 
 static void BM_ByteStreamSplitDecode_Double_Avx2(benchmark::State& state) {
   BM_ByteStreamSplitDecode<double>(
-      state, ::arrow::util::internal::ByteStreamSplitDecodeAvx2<double>);
+      state, ::arrow::util::internal::ByteStreamSplitDecodeAvx2<sizeof(double)>);
 }
 
 static void BM_ByteStreamSplitEncode_Float_Avx2(benchmark::State& state) {
   BM_ByteStreamSplitEncode<float>(
-      state, ::arrow::util::internal::ByteStreamSplitEncodeAvx2<float>);
+      state, ::arrow::util::internal::ByteStreamSplitEncodeAvx2<sizeof(float)>);
 }
 
 static void BM_ByteStreamSplitEncode_Double_Avx2(benchmark::State& state) {
   BM_ByteStreamSplitEncode<double>(
-      state, ::arrow::util::internal::ByteStreamSplitEncodeAvx2<double>);
+      state, ::arrow::util::internal::ByteStreamSplitEncodeAvx2<sizeof(double)>);
 }
 
 BENCHMARK(BM_ByteStreamSplitDecode_Float_Avx2)->Range(MIN_RANGE, MAX_RANGE);
@@ -470,22 +471,22 @@ BENCHMARK(BM_ByteStreamSplitEncode_Double_Avx2)->Range(MIN_RANGE, MAX_RANGE);
 #if defined(ARROW_HAVE_AVX512)
 static void BM_ByteStreamSplitDecode_Float_Avx512(benchmark::State& state) {
   BM_ByteStreamSplitDecode<float>(
-      state, ::arrow::util::internal::ByteStreamSplitDecodeAvx512<float>);
+      state, ::arrow::util::internal::ByteStreamSplitDecodeAvx512<sizeof(float)>);
 }
 
 static void BM_ByteStreamSplitDecode_Double_Avx512(benchmark::State& state) {
   BM_ByteStreamSplitDecode<double>(
-      state, ::arrow::util::internal::ByteStreamSplitDecodeAvx512<double>);
+      state, ::arrow::util::internal::ByteStreamSplitDecodeAvx512<sizeof(double)>);
 }
 
 static void BM_ByteStreamSplitEncode_Float_Avx512(benchmark::State& state) {
   BM_ByteStreamSplitEncode<float>(
-      state, ::arrow::util::internal::ByteStreamSplitEncodeAvx512<float>);
+      state, ::arrow::util::internal::ByteStreamSplitEncodeAvx512<sizeof(float)>);
 }
 
 static void BM_ByteStreamSplitEncode_Double_Avx512(benchmark::State& state) {
   BM_ByteStreamSplitEncode<double>(
-      state, ::arrow::util::internal::ByteStreamSplitEncodeAvx512<double>);
+      state, ::arrow::util::internal::ByteStreamSplitEncodeAvx512<sizeof(double)>);
 }
 
 BENCHMARK(BM_ByteStreamSplitDecode_Float_Avx512)->Range(MIN_RANGE, MAX_RANGE);

From 015c2d676f62477aca29fdd244988ea13b331814 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 23 Jan 2024 11:20:57 +0100
Subject: [PATCH 256/570] MINOR: Revert "GH-39628: [C++] Use -j1 for cmake >=
 3.28" (#39736)

Revert apache/arrow#39629: it makes all builds using CMake >= 3.28 much slower, while only addressing a very specific build failure. Hopefully we can find a more targeted workaround.

Authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 1f5cd3a2b4d5d..6bb9c0f6af2ca 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1005,13 +1005,8 @@ if("${MAKE}" STREQUAL "")
   endif()
 endif()
 
-# Args for external projects using make
-if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.28")
-  # Prevent 'bad file descriptor' error see #39517 #39628
-  set(MAKE_BUILD_ARGS "-j1")
-else()
-  set(MAKE_BUILD_ARGS "-j${NPROC}")
-endif()
+# Args for external projects using make.
+set(MAKE_BUILD_ARGS "-j${NPROC}")
 
 include(FetchContent)
 set(FC_DECLARE_COMMON_OPTIONS)
@@ -2639,7 +2634,7 @@ macro(build_bzip2)
                       BUILD_IN_SOURCE 1
                       BUILD_COMMAND ${MAKE} libbz2.a ${MAKE_BUILD_ARGS}
                                     ${BZIP2_EXTRA_ARGS}
-                      INSTALL_COMMAND ${MAKE} install -j1 PREFIX=${BZIP2_PREFIX}
+                      INSTALL_COMMAND ${MAKE} install PREFIX=${BZIP2_PREFIX}
                                       ${BZIP2_EXTRA_ARGS}
                       INSTALL_DIR ${BZIP2_PREFIX}
                       URL ${ARROW_BZIP2_SOURCE_URL}

From eed53bbd59957a80c8f55fe4d265cd2371fbea11 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 23 Jan 2024 12:32:57 +0100
Subject: [PATCH 257/570] MINOR: [Docs] Fix formatting of note on Device data
 interface docs (#39757)

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 docs/source/format/CDeviceDataInterface.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/format/CDeviceDataInterface.rst b/docs/source/format/CDeviceDataInterface.rst
index 76b7132681b02..b5b7229a679e1 100644
--- a/docs/source/format/CDeviceDataInterface.rst
+++ b/docs/source/format/CDeviceDataInterface.rst
@@ -341,8 +341,8 @@ Notes:
 
 * \(1) Currently unknown if framework has an event type to support.
 * \(2) Extension Device has producer defined semantics and thus if
-       synchronization is needed for an extension device, the producer
-       should document the type.
+  synchronization is needed for an extension device, the producer
+  should document the type.
 
 
 Semantics

From 7e9f2658786b966685ddedf6b90415968f207b75 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Tue, 23 Jan 2024 12:43:05 +0100
Subject: [PATCH 258/570] GH-38655: [C++] "iso_calendar" kernel returns
 incorrect results for array length > 32 (#39360)

### Rationale for this change

When defining `StructArray`'s field builders for `ISOCalendar` we don't pre-allocate memory and then use unsafe append. This causes the resulting array to be at most 32 rows long.

### What changes are included in this PR?

This introduces required memory pre-allocation in the `ISOCalendar` c++ kernel.

### Are these changes tested?

This adds a test for the Python wrapper.

### Are there any user-facing changes?

Fixes the behavior of `iso_calendar` kernel.
* Closes: #38655

Lead-authored-by: Rok Mihevc <rok@mihevc.org>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 .../arrow/compute/kernels/scalar_temporal_unary.cc  |  2 +-
 python/pyarrow/tests/test_compute.py                | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
index a88ce389360f5..f49e201492c9b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
@@ -1510,7 +1510,7 @@ struct ISOCalendar {
     for (int i = 0; i < 3; i++) {
       field_builders.push_back(
           checked_cast<BuilderType*>(struct_builder->field_builder(i)));
-      RETURN_NOT_OK(field_builders[i]->Reserve(1));
+      RETURN_NOT_OK(field_builders[i]->Reserve(in.length));
     }
     auto visit_null = [&]() { return struct_builder->AppendNull(); };
     std::function<Status(typename InType::c_type arg)> visit_value;
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 34d4da580f526..4b58dc65bae9b 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -2263,6 +2263,19 @@ def test_extract_datetime_components():
             _check_datetime_components(timestamps, timezone)
 
 
+@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
+def test_iso_calendar_longer_array(unit):
+    # https://github.com/apache/arrow/issues/38655
+    # ensure correct result for array length > 32
+    arr = pa.array([datetime.datetime(2022, 1, 2, 9)]*50, pa.timestamp(unit))
+    result = pc.iso_calendar(arr)
+    expected = pa.StructArray.from_arrays(
+        [[2021]*50, [52]*50, [7]*50],
+        names=['iso_year', 'iso_week', 'iso_day_of_week']
+    )
+    assert result.equals(expected)
+
+
 @pytest.mark.pandas
 @pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(),
                     reason="Timezone database is not installed on Windows")

From ae9c0a96a0a7dbf4710bb16cf043269a1bc33aa7 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com>
Date: Tue, 23 Jan 2024 20:25:39 +0530
Subject: [PATCH 259/570] GH-39330: [Java][CI] Fix or suppress spurious
 errorprone warnings (#39529)

### Rationale for this change

This PR fixes the warnings generated by the errorprone library.

### What changes are included in this PR?

Updating the code to remove warnings.

Covered modules
- [x] algorithm
- [x] compression
- [x] flight
- [x] tools
- [x] vector

### Are these changes tested?

Tested by existing test cases.

### Are there any user-facing changes?

No
* Closes: #39330

Lead-authored-by: Vibhatha Lakmal Abeykoon <vibhatha@gmail.com>
Co-authored-by: vibhatha <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../algorithm/search/ParallelSearcher.java    |   5 +-
 .../FixedWidthOutOfPlaceVectorSorter.java     |   6 +-
 .../VariableWidthOutOfPlaceVectorSorter.java  |  11 +-
 .../deduplicate/TestDeduplicationUtils.java   |   6 +-
 .../TestVectorRunDeduplicator.java            |   8 +-
 .../TestHashTableBasedDictionaryBuilder.java  |  59 ++++----
 .../TestHashTableDictionaryEncoder.java       |  20 +--
 .../TestLinearDictionaryEncoder.java          |  20 +--
 .../TestSearchDictionaryEncoder.java          |  20 +--
 .../TestSearchTreeBasedDictionaryBuilder.java |  59 ++++----
 .../algorithm/misc/TestPartialSumUtils.java   |   2 +-
 .../arrow/algorithm/rank/TestVectorRank.java  |  22 +--
 .../search/TestParallelSearcher.java          |   5 +-
 .../search/TestVectorRangeSearcher.java       |   2 +-
 .../algorithm/search/TestVectorSearcher.java  |   6 +-
 .../sort/TestCompositeVectorComparator.java   |   8 +-
 .../sort/TestDefaultVectorComparator.java     |   8 +-
 .../algorithm/sort/TestFixedWidthSorting.java |  12 +-
 .../arrow/algorithm/sort/TestSortingUtil.java |   3 +-
 .../sort/TestStableVectorComparator.java      |  56 ++++----
 ...stVariableWidthOutOfPlaceVectorSorter.java |  35 ++---
 .../sort/TestVariableWidthSorting.java        |  11 +-
 .../compression/TestCompressionCodec.java     |   4 +-
 java/flight/flight-core/pom.xml               |   4 +
 .../org/apache/arrow/flight/ArrowMessage.java |  15 +-
 .../arrow/flight/CancelFlightInfoResult.java  |   2 +-
 .../arrow/flight/ErrorFlightMetadata.java     |   2 +-
 .../arrow/flight/FlightCallHeaders.java       |  12 +-
 .../org/apache/arrow/flight/FlightClient.java |  21 ++-
 .../apache/arrow/flight/FlightDescriptor.java |   2 +-
 .../apache/arrow/flight/FlightEndpoint.java   |   7 +-
 .../org/apache/arrow/flight/FlightInfo.java   |   2 +-
 .../org/apache/arrow/flight/FlightServer.java |  28 ++--
 .../apache/arrow/flight/FlightService.java    |  30 ++--
 .../org/apache/arrow/flight/FlightStream.java |  24 +++-
 .../org/apache/arrow/flight/Location.java     |   4 +-
 .../org/apache/arrow/flight/PollInfo.java     |   6 +-
 .../java/org/apache/arrow/flight/Ticket.java  |   2 +-
 .../arrow/flight/auth/AuthConstants.java      |   4 +-
 .../auth2/BearerTokenAuthenticator.java       |   1 -
 .../flight/grpc/ClientInterceptorAdapter.java |   7 +-
 .../arrow/flight/grpc/MetadataAdapter.java    |  20 +--
 .../flight/grpc/ServerInterceptorAdapter.java |   2 +-
 .../apache/arrow/flight/grpc/StatusUtils.java |   3 +-
 .../apache/arrow/flight/FlightTestUtil.java   |   3 -
 .../arrow/flight/TestApplicationMetadata.java |   3 +-
 .../apache/arrow/flight/TestBackPressure.java |   6 +-
 .../arrow/flight/TestBasicOperation.java      |  10 +-
 .../apache/arrow/flight/TestCallOptions.java  |   7 +-
 .../arrow/flight/TestDictionaryUtils.java     |   7 +-
 .../apache/arrow/flight/TestDoExchange.java   |  14 +-
 .../arrow/flight/TestErrorMetadata.java       |   4 +-
 .../arrow/flight/TestFlightGrpcUtils.java     |   2 +-
 .../arrow/flight/TestFlightService.java       |   5 +-
 .../apache/arrow/flight/TestLargeMessage.java |   3 +-
 .../arrow/flight/TestServerMiddleware.java    |  32 ++---
 .../java/org/apache/arrow/flight/TestTls.java |   6 +-
 .../flight/client/TestCookieHandling.java     |   1 +
 .../flight/perf/PerformanceTestServer.java    |  35 ++---
 .../apache/arrow/flight/perf/TestPerf.java    |  16 +--
 .../tests/IntegrationTestClient.java          |   4 +-
 .../driver/jdbc/ArrowFlightJdbcDriver.java    |   2 +
 .../driver/jdbc/ArrowFlightMetaImpl.java      |   5 -
 .../ArrowFlightJdbcBaseIntVectorAccessor.java |  22 ++-
 .../ArrowFlightJdbcBitVectorAccessor.java     |   1 -
 .../ArrowFlightConnectionConfigImpl.java      |   1 +
 .../driver/jdbc/utils/ConnectionWrapper.java  |   8 +-
 .../arrow/driver/jdbc/utils/ConvertUtils.java |   7 +-
 .../arrow/driver/jdbc/utils/UrlParser.java    |   3 +-
 .../driver/jdbc/ArrowFlightJdbcArrayTest.java |   2 +-
 .../jdbc/ArrowFlightJdbcDriverTest.java       |   4 +-
 .../driver/jdbc/ConnectionMutualTlsTest.java  |   4 -
 .../arrow/driver/jdbc/ResultSetTest.java      |  12 +-
 .../accessor/ArrowFlightJdbcAccessorTest.java |   8 +-
 ...FlightJdbcTimeStampVectorAccessorTest.java |   6 +-
 ...stractArrowFlightJdbcListAccessorTest.java |   6 +-
 ...rowFlightJdbcStructVectorAccessorTest.java |   4 +-
 .../ArrowFlightJdbcBitVectorAccessorTest.java |   2 +-
 .../jdbc/utils/MockFlightSqlProducer.java     |   4 +-
 .../jdbc/utils/RootAllocatorTestRule.java     |  19 +--
 .../jdbc/utils/ThrowableAssertionUtils.java   |   2 +-
 .../arrow/flight/sql/FlightSqlProducer.java   |   2 +-
 .../arrow/flight/sql/SqlInfoBuilder.java      |   5 +-
 .../flight/sql/example/FlightSqlExample.java  |   8 +-
 .../org/apache/arrow/tools/FileRoundtrip.java |   6 +-
 .../arrow/tools/ArrowFileTestFixtures.java    |   2 -
 .../apache/arrow/tools/EchoServerTest.java    |   1 -
 .../apache/arrow/tools/TestFileRoundtrip.java |   2 +-
 .../arrow/vector/BaseFixedWidthVector.java    |   9 +-
 .../vector/BaseLargeVariableWidthVector.java  |  10 ++
 .../apache/arrow/vector/BaseValueVector.java  |   6 +-
 .../arrow/vector/BaseVariableWidthVector.java |  21 ++-
 .../org/apache/arrow/vector/BigIntVector.java |   1 +
 .../org/apache/arrow/vector/BitVector.java    |  13 +-
 .../org/apache/arrow/vector/BufferLayout.java |   2 +-
 .../apache/arrow/vector/DateDayVector.java    |   1 +
 .../apache/arrow/vector/DateMilliVector.java  |   1 +
 .../apache/arrow/vector/Decimal256Vector.java |   1 +
 .../apache/arrow/vector/DecimalVector.java    |   1 +
 .../apache/arrow/vector/DurationVector.java   |   1 +
 .../org/apache/arrow/vector/Float4Vector.java |   1 +
 .../org/apache/arrow/vector/Float8Vector.java |   1 +
 .../arrow/vector/GenerateSampleData.java      |   4 +-
 .../org/apache/arrow/vector/IntVector.java    |   1 +
 .../arrow/vector/IntervalDayVector.java       |  17 +--
 .../vector/IntervalMonthDayNanoVector.java    |   1 +
 .../arrow/vector/IntervalYearVector.java      |   5 +-
 .../arrow/vector/LargeVarBinaryVector.java    |   1 +
 .../arrow/vector/LargeVarCharVector.java      |   1 +
 .../apache/arrow/vector/SmallIntVector.java   |   1 +
 .../apache/arrow/vector/TimeMicroVector.java  |   1 +
 .../apache/arrow/vector/TimeMilliVector.java  |   1 +
 .../apache/arrow/vector/TimeNanoVector.java   |   1 +
 .../apache/arrow/vector/TimeSecVector.java    |   1 +
 .../arrow/vector/TimeStampMicroTZVector.java  |   1 +
 .../arrow/vector/TimeStampMicroVector.java    |   1 +
 .../arrow/vector/TimeStampMilliTZVector.java  |   1 +
 .../arrow/vector/TimeStampMilliVector.java    |   1 +
 .../arrow/vector/TimeStampNanoTZVector.java   |   1 +
 .../arrow/vector/TimeStampNanoVector.java     |   1 +
 .../arrow/vector/TimeStampSecVector.java      |   1 +
 .../apache/arrow/vector/TinyIntVector.java    |   1 +
 .../org/apache/arrow/vector/TypeLayout.java   |   5 +-
 .../org/apache/arrow/vector/UInt1Vector.java  |   1 +
 .../org/apache/arrow/vector/UInt2Vector.java  |   1 +
 .../org/apache/arrow/vector/UInt4Vector.java  |   1 +
 .../org/apache/arrow/vector/UInt8Vector.java  |   1 +
 .../apache/arrow/vector/VarBinaryVector.java  |  11 +-
 .../apache/arrow/vector/VarCharVector.java    |  11 +-
 .../complex/AbstractContainerVector.java      |   1 +
 .../vector/complex/AbstractStructVector.java  |   6 +-
 .../complex/BaseRepeatedValueVector.java      |  20 +--
 .../vector/complex/FixedSizeListVector.java   |  12 +-
 .../arrow/vector/complex/LargeListVector.java |  24 ++--
 .../arrow/vector/complex/ListVector.java      |  33 +++--
 .../arrow/vector/complex/StructVector.java    |  29 ++--
 .../complex/impl/AbstractBaseReader.java      |   1 +
 .../vector/complex/impl/PromotableWriter.java |   5 +-
 .../complex/impl/StructOrListWriterImpl.java  |   3 +-
 .../impl/UnionFixedSizeListReader.java        |   1 +
 .../complex/impl/UnionLargeListReader.java    |   1 -
 .../vector/complex/impl/UnionListReader.java  |   5 +-
 .../arrow/vector/dictionary/Dictionary.java   |   2 +-
 .../arrow/vector/table/package-info.java      |   2 +-
 .../vector/types/FloatingPointPrecision.java  |   2 +-
 .../arrow/vector/types/IntervalUnit.java      |   2 +-
 .../vector/types/pojo/DictionaryEncoding.java |   2 +-
 .../apache/arrow/vector/types/pojo/Field.java |  16 ++-
 .../arrow/vector/types/pojo/FieldType.java    |   2 +-
 .../arrow/vector/types/pojo/Schema.java       |   9 +-
 .../arrow/vector/util/MapWithOrdinalImpl.java |   9 +-
 .../org/apache/arrow/vector/util/Text.java    |   2 +-
 .../arrow/vector/ITTestLargeVector.java       |  11 +-
 .../arrow/vector/TestBitVectorHelper.java     |   6 +-
 .../vector/TestBufferOwnershipTransfer.java   |   4 +-
 .../org/apache/arrow/vector/TestCopyFrom.java |  17 +--
 .../arrow/vector/TestDecimal256Vector.java    |   4 +-
 .../arrow/vector/TestDecimalVector.java       |   4 +-
 .../arrow/vector/TestDenseUnionVector.java    |  14 +-
 .../arrow/vector/TestDictionaryVector.java    |  41 +++---
 .../arrow/vector/TestFixedSizeListVector.java |  71 +++++-----
 .../arrow/vector/TestLargeListVector.java     | 123 ++++++++---------
 .../vector/TestLargeVarBinaryVector.java      |  20 +--
 .../arrow/vector/TestLargeVarCharVector.java  |  49 +++----
 .../apache/arrow/vector/TestListVector.java   | 130 +++++++++---------
 .../apache/arrow/vector/TestMapVector.java    |  34 ++---
 .../arrow/vector/TestSplitAndTransfer.java    |   7 +-
 .../apache/arrow/vector/TestUnionVector.java  |  10 +-
 .../apache/arrow/vector/TestValueVector.java  |  61 ++++----
 .../arrow/vector/TestVarCharListVector.java   |  10 +-
 .../apache/arrow/vector/TestVectorAlloc.java  |   4 +-
 .../arrow/vector/TestVectorReAlloc.java       |   4 +-
 .../arrow/vector/TestVectorSchemaRoot.java    |   2 +-
 .../vector/compare/TestTypeEqualsVisitor.java |   6 -
 .../complex/impl/TestPromotableWriter.java    |  90 ++++++------
 .../complex/writer/TestComplexWriter.java     | 122 ++++++++--------
 .../apache/arrow/vector/ipc/BaseFileTest.java |  14 +-
 .../arrow/vector/ipc/TestArrowStream.java     |   4 +-
 .../apache/arrow/vector/ipc/TestJSONFile.java |   4 +-
 .../ipc/TestUIntDictionaryRoundTrip.java      |   3 +-
 .../message/TestMessageMetadataResult.java    |   2 +-
 .../arrow/vector/table/BaseTableTest.java     |  14 +-
 .../apache/arrow/vector/table/RowTest.java    |  14 +-
 .../apache/arrow/vector/table/TestUtils.java  |  21 +--
 .../testing/TestValueVectorPopulator.java     |  18 ++-
 .../vector/types/pojo/TestExtensionType.java  |   4 +-
 .../TestElementAddressableVectorIterator.java |   6 +-
 .../vector/util/TestReusableByteArray.java    |  46 ++++---
 .../arrow/vector/util/TestVectorAppender.java |   2 -
 189 files changed, 1189 insertions(+), 1031 deletions(-)

diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
index e62ebdecb1bac..6226921b22ed6 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
@@ -20,6 +20,7 @@
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
 
 import org.apache.arrow.algorithm.sort.VectorValueComparator;
 import org.apache.arrow.vector.ValueVector;
@@ -95,7 +96,7 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup
     final int valueCount = vector.getValueCount();
     for (int i = 0; i < numThreads; i++) {
       final int tid = i;
-      threadPool.submit(() -> {
+      Future<?> unused = threadPool.submit(() -> {
         // convert to long to avoid overflow
         int start = (int) (((long) valueCount) * tid / numThreads);
         int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
@@ -153,7 +154,7 @@ public int search(
     final int valueCount = vector.getValueCount();
     for (int i = 0; i < numThreads; i++) {
       final int tid = i;
-      threadPool.submit(() -> {
+      Future<?> unused = threadPool.submit(() -> {
         // convert to long to avoid overflow
         int start = (int) (((long) valueCount) * tid / numThreads);
         int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
index c3b68facfda97..05a4585792dc2 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
@@ -54,7 +54,7 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator<V> co
             "Expected capacity %s, actual capacity %s",
         (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity());
     Preconditions.checkArgument(
-        dstValueBuffer.capacity() >= srcVector.getValueCount() * srcVector.getTypeWidth(),
+        dstValueBuffer.capacity() >= srcVector.getValueCount() * ((long) srcVector.getTypeWidth()),
         "Not enough capacity for the data buffer of the dst vector. " +
             "Expected capacity %s, actual capacity %s",
         srcVector.getValueCount() * srcVector.getTypeWidth(), dstValueBuffer.capacity());
@@ -73,8 +73,8 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator<V> co
         } else {
           BitVectorHelper.setBit(dstValidityBuffer, dstIndex);
           MemoryUtil.UNSAFE.copyMemory(
-                  srcValueBuffer.memoryAddress() + srcIndex * valueWidth,
-                  dstValueBuffer.memoryAddress() + dstIndex * valueWidth,
+                  srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth),
+                  dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth),
                   valueWidth);
         }
       }
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
index c60e273e9e851..863b07c348ef2 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
@@ -51,12 +51,12 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator<V> co
             "Expected capacity %s, actual capacity %s",
         (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity());
     Preconditions.checkArgument(
-        dstOffsetBuffer.capacity() >= (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH,
+        dstOffsetBuffer.capacity() >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH),
         "Not enough capacity for the offset buffer of the dst vector. " +
             "Expected capacity %s, actual capacity %s",
         (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffsetBuffer.capacity());
     long dataSize = srcVector.getOffsetBuffer().getInt(
-        srcVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
+        srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH));
     Preconditions.checkArgument(
         dstValueBuffer.capacity() >= dataSize, "No enough capacity for the data buffer of the dst vector. " +
             "Expected capacity %s, actual capacity %s", dataSize, dstValueBuffer.capacity());
@@ -77,15 +77,16 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator<V> co
           BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex);
         } else {
           BitVectorHelper.setBit(dstValidityBuffer, dstIndex);
-          int srcOffset = srcOffsetBuffer.getInt(srcIndex * BaseVariableWidthVector.OFFSET_WIDTH);
-          int valueLength = srcOffsetBuffer.getInt((srcIndex + 1) * BaseVariableWidthVector.OFFSET_WIDTH) - srcOffset;
+          int srcOffset = srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH));
+          int valueLength =
+              srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH)) - srcOffset;
           MemoryUtil.UNSAFE.copyMemory(
                   srcValueBuffer.memoryAddress() + srcOffset,
                   dstValueBuffer.memoryAddress() + dstOffset,
                   valueLength);
           dstOffset += valueLength;
         }
-        dstOffsetBuffer.setInt((dstIndex + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffset);
+        dstOffsetBuffer.setInt((dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset);
       }
     }
   }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
index def83fba7b74a..ac083b84f1611 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
@@ -20,6 +20,8 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@@ -107,7 +109,7 @@ public void testDeduplicateVariableWidth() {
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         String str = String.valueOf(i * i);
         for (int j = 0; j < REPETITION_COUNT; j++) {
-          origVec.set(i * REPETITION_COUNT + j, str.getBytes());
+          origVec.set(i * REPETITION_COUNT + j, str.getBytes(StandardCharsets.UTF_8));
         }
       }
 
@@ -120,7 +122,7 @@ public void testDeduplicateVariableWidth() {
       assertEquals(VECTOR_LENGTH, dedupVec.getValueCount());
 
       for (int i = 0; i < VECTOR_LENGTH; i++) {
-        assertArrayEquals(String.valueOf(i * i).getBytes(), dedupVec.get(i));
+        assertArrayEquals(String.valueOf(i * i).getBytes(StandardCharsets.UTF_8), dedupVec.get(i));
       }
 
       DeduplicationUtils.populateRunLengths(
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
index 4bfa6e2555176..788213b162870 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
@@ -20,6 +20,8 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.IntVector;
@@ -104,20 +106,20 @@ public void testDeduplicateVariableWidth() {
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         String str = String.valueOf(i * i);
         for (int j = 0; j < REPETITION_COUNT; j++) {
-          origVec.set(i * REPETITION_COUNT + j, str.getBytes());
+          origVec.set(i * REPETITION_COUNT + j, str.getBytes(StandardCharsets.UTF_8));
         }
       }
 
       int distinctCount = deduplicator.getRunCount();
       assertEquals(VECTOR_LENGTH, distinctCount);
 
-      dedupVec.allocateNew(distinctCount * 10, distinctCount);
+      dedupVec.allocateNew(distinctCount * 10L, distinctCount);
 
       deduplicator.populateDeduplicatedValues(dedupVec);
       assertEquals(VECTOR_LENGTH, dedupVec.getValueCount());
 
       for (int i = 0; i < VECTOR_LENGTH; i++) {
-        assertArrayEquals(String.valueOf(i * i).getBytes(), dedupVec.get(i));
+        assertArrayEquals(String.valueOf(i * i).getBytes(StandardCharsets.UTF_8), dedupVec.get(i));
       }
 
       deduplicator.populateRunLengths(lengthVec);
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java
index 0a3314535f234..45c47626b720e 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java
@@ -21,6 +21,9 @@
 import static org.junit.Assert.assertNull;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.IntVector;
@@ -57,16 +60,16 @@ public void testBuildVariableWidthDictionaryWithNull() {
       dictionary.allocateNew();
 
       // fill data
-      vec.set(0, "hello".getBytes());
-      vec.set(1, "abc".getBytes());
+      vec.set(0, "hello".getBytes(StandardCharsets.UTF_8));
+      vec.set(1, "abc".getBytes(StandardCharsets.UTF_8));
       vec.setNull(2);
-      vec.set(3, "world".getBytes());
-      vec.set(4, "12".getBytes());
-      vec.set(5, "dictionary".getBytes());
+      vec.set(3, "world".getBytes(StandardCharsets.UTF_8));
+      vec.set(4, "12".getBytes(StandardCharsets.UTF_8));
+      vec.set(5, "dictionary".getBytes(StandardCharsets.UTF_8));
       vec.setNull(6);
-      vec.set(7, "hello".getBytes());
-      vec.set(8, "good".getBytes());
-      vec.set(9, "abc".getBytes());
+      vec.set(7, "hello".getBytes(StandardCharsets.UTF_8));
+      vec.set(8, "good".getBytes(StandardCharsets.UTF_8));
+      vec.set(9, "abc".getBytes(StandardCharsets.UTF_8));
 
       HashTableBasedDictionaryBuilder<VarCharVector> dictionaryBuilder =
               new HashTableBasedDictionaryBuilder<>(dictionary, true);
@@ -76,13 +79,13 @@ public void testBuildVariableWidthDictionaryWithNull() {
       assertEquals(7, result);
       assertEquals(7, dictionary.getValueCount());
 
-      assertEquals("hello", new String(dictionary.get(0)));
-      assertEquals("abc", new String(dictionary.get(1)));
+      assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8));
+      assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8));
       assertNull(dictionary.get(2));
-      assertEquals("world", new String(dictionary.get(3)));
-      assertEquals("12", new String(dictionary.get(4)));
-      assertEquals("dictionary", new String(dictionary.get(5)));
-      assertEquals("good", new String(dictionary.get(6)));
+      assertEquals("world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8));
+      assertEquals("12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8));
+      assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8));
+      assertEquals("good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8));
     }
   }
 
@@ -97,16 +100,16 @@ public void testBuildVariableWidthDictionaryWithoutNull() {
       dictionary.allocateNew();
 
       // fill data
-      vec.set(0, "hello".getBytes());
-      vec.set(1, "abc".getBytes());
+      vec.set(0, "hello".getBytes(StandardCharsets.UTF_8));
+      vec.set(1, "abc".getBytes(StandardCharsets.UTF_8));
       vec.setNull(2);
-      vec.set(3, "world".getBytes());
-      vec.set(4, "12".getBytes());
-      vec.set(5, "dictionary".getBytes());
+      vec.set(3, "world".getBytes(StandardCharsets.UTF_8));
+      vec.set(4, "12".getBytes(StandardCharsets.UTF_8));
+      vec.set(5, "dictionary".getBytes(StandardCharsets.UTF_8));
       vec.setNull(6);
-      vec.set(7, "hello".getBytes());
-      vec.set(8, "good".getBytes());
-      vec.set(9, "abc".getBytes());
+      vec.set(7, "hello".getBytes(StandardCharsets.UTF_8));
+      vec.set(8, "good".getBytes(StandardCharsets.UTF_8));
+      vec.set(9, "abc".getBytes(StandardCharsets.UTF_8));
 
       HashTableBasedDictionaryBuilder<VarCharVector> dictionaryBuilder =
               new HashTableBasedDictionaryBuilder<>(dictionary, false);
@@ -116,12 +119,12 @@ public void testBuildVariableWidthDictionaryWithoutNull() {
       assertEquals(6, result);
       assertEquals(6, dictionary.getValueCount());
 
-      assertEquals("hello", new String(dictionary.get(0)));
-      assertEquals("abc", new String(dictionary.get(1)));
-      assertEquals("world", new String(dictionary.get(2)));
-      assertEquals("12", new String(dictionary.get(3)));
-      assertEquals("dictionary", new String(dictionary.get(4)));
-      assertEquals("good", new String(dictionary.get(5)));
+      assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8));
+      assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8));
+      assertEquals("world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8));
+      assertEquals("12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8));
+      assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8));
+      assertEquals("good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8));
 
     }
   }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
index dd22ac96fac88..60efbf58bebda 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
@@ -76,7 +76,7 @@ public void testEncodeAndDecode() {
       dictionary.allocateNew();
       for (int i = 0; i < DICTIONARY_LENGTH; i++) {
         // encode "i" as i
-        dictionary.setSafe(i, String.valueOf(i).getBytes());
+        dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
       }
       dictionary.setValueCount(DICTIONARY_LENGTH);
 
@@ -84,7 +84,7 @@ public void testEncodeAndDecode() {
       rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH);
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         int val = (random.nextInt() & Integer.MAX_VALUE) % DICTIONARY_LENGTH;
-        rawVector.set(i, String.valueOf(val).getBytes());
+        rawVector.set(i, String.valueOf(val).getBytes(StandardCharsets.UTF_8));
       }
       rawVector.setValueCount(VECTOR_LENGTH);
 
@@ -98,7 +98,7 @@ public void testEncodeAndDecode() {
       // verify encoding results
       assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
       for (int i = 0; i < VECTOR_LENGTH; i++) {
-        assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes());
+        assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
       }
 
       // perform decoding
@@ -108,7 +108,8 @@ public void testEncodeAndDecode() {
         // verify decoding results
         assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
         for (int i = 0; i < VECTOR_LENGTH; i++) {
-          assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i));
+          assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+              decodedVector.get(i));
         }
       }
     }
@@ -126,7 +127,7 @@ public void testEncodeAndDecodeWithNull() {
       dictionary.setNull(0);
       for (int i = 1; i < DICTIONARY_LENGTH; i++) {
         // encode "i" as i
-        dictionary.setSafe(i, String.valueOf(i).getBytes());
+        dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
       }
       dictionary.setValueCount(DICTIONARY_LENGTH);
 
@@ -137,7 +138,7 @@ public void testEncodeAndDecodeWithNull() {
           rawVector.setNull(i);
         } else {
           int val = (random.nextInt() & Integer.MAX_VALUE) % (DICTIONARY_LENGTH - 1) + 1;
-          rawVector.set(i, String.valueOf(val).getBytes());
+          rawVector.set(i, String.valueOf(val).getBytes(StandardCharsets.UTF_8));
         }
       }
       rawVector.setValueCount(VECTOR_LENGTH);
@@ -155,7 +156,7 @@ public void testEncodeAndDecodeWithNull() {
         if (i % 10 == 0) {
           assertEquals(0, encodedVector.get(i));
         } else {
-          assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes());
+          assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
         }
       }
 
@@ -168,7 +169,8 @@ public void testEncodeAndDecodeWithNull() {
           if (i % 10 == 0) {
             assertTrue(decodedVector.isNull(i));
           } else {
-            assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i));
+            assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+                decodedVector.get(i));
           }
         }
       }
@@ -185,7 +187,7 @@ public void testEncodeNullWithoutNullInDictionary() {
       dictionary.allocateNew();
       for (int i = 0; i < DICTIONARY_LENGTH; i++) {
         // encode "i" as i
-        dictionary.setSafe(i, String.valueOf(i).getBytes());
+        dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
       }
       dictionary.setValueCount(DICTIONARY_LENGTH);
 
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java
index 104d1b35b0660..a76aedffa308d 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java
@@ -77,7 +77,7 @@ public void testEncodeAndDecode() {
       dictionary.allocateNew();
       for (int i = 0; i < DICTIONARY_LENGTH; i++) {
         // encode "i" as i
-        dictionary.setSafe(i, String.valueOf(i).getBytes());
+        dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
       }
       dictionary.setValueCount(DICTIONARY_LENGTH);
 
@@ -85,7 +85,7 @@ public void testEncodeAndDecode() {
       rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH);
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         int val = (random.nextInt() & Integer.MAX_VALUE) % DICTIONARY_LENGTH;
-        rawVector.set(i, String.valueOf(val).getBytes());
+        rawVector.set(i, String.valueOf(val).getBytes(StandardCharsets.UTF_8));
       }
       rawVector.setValueCount(VECTOR_LENGTH);
 
@@ -99,7 +99,7 @@ public void testEncodeAndDecode() {
       // verify encoding results
       assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
       for (int i = 0; i < VECTOR_LENGTH; i++) {
-        assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes());
+        assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
       }
 
       // perform decoding
@@ -109,7 +109,8 @@ public void testEncodeAndDecode() {
         // verify decoding results
         assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
         for (int i = 0; i < VECTOR_LENGTH; i++) {
-          assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i));
+          assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+              decodedVector.get(i));
         }
       }
     }
@@ -127,7 +128,7 @@ public void testEncodeAndDecodeWithNull() {
       dictionary.setNull(0);
       for (int i = 1; i < DICTIONARY_LENGTH; i++) {
         // encode "i" as i
-        dictionary.setSafe(i, String.valueOf(i).getBytes());
+        dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
       }
       dictionary.setValueCount(DICTIONARY_LENGTH);
 
@@ -138,7 +139,7 @@ public void testEncodeAndDecodeWithNull() {
           rawVector.setNull(i);
         } else {
           int val = (random.nextInt() & Integer.MAX_VALUE) % (DICTIONARY_LENGTH - 1) + 1;
-          rawVector.set(i, String.valueOf(val).getBytes());
+          rawVector.set(i, String.valueOf(val).getBytes(StandardCharsets.UTF_8));
         }
       }
       rawVector.setValueCount(VECTOR_LENGTH);
@@ -156,7 +157,7 @@ public void testEncodeAndDecodeWithNull() {
         if (i % 10 == 0) {
           assertEquals(0, encodedVector.get(i));
         } else {
-          assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes());
+          assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
         }
       }
 
@@ -170,7 +171,8 @@ public void testEncodeAndDecodeWithNull() {
           if (i % 10 == 0) {
             assertTrue(decodedVector.isNull(i));
           } else {
-            assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i));
+            assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+                decodedVector.get(i));
           }
         }
       }
@@ -187,7 +189,7 @@ public void testEncodeNullWithoutNullInDictionary() {
       dictionary.allocateNew();
       for (int i = 0; i < DICTIONARY_LENGTH; i++) {
         // encode "i" as i
-        dictionary.setSafe(i, String.valueOf(i).getBytes());
+        dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
       }
       dictionary.setValueCount(DICTIONARY_LENGTH);
 
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java
index a156e987c20ce..e01c2e7905b46 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java
@@ -78,7 +78,7 @@ public void testEncodeAndDecode() {
       dictionary.allocateNew();
       for (int i = 0; i < DICTIONARY_LENGTH; i++) {
         // encode "i" as i
-        dictionary.setSafe(i, String.valueOf(i).getBytes());
+        dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
       }
       dictionary.setValueCount(DICTIONARY_LENGTH);
 
@@ -86,7 +86,7 @@ public void testEncodeAndDecode() {
       rawVector.allocateNew(10 * VECTOR_LENGTH, VECTOR_LENGTH);
       for (int i = 0; i < VECTOR_LENGTH; i++) {
         int val = (random.nextInt() & Integer.MAX_VALUE) % DICTIONARY_LENGTH;
-        rawVector.set(i, String.valueOf(val).getBytes());
+        rawVector.set(i, String.valueOf(val).getBytes(StandardCharsets.UTF_8));
       }
       rawVector.setValueCount(VECTOR_LENGTH);
 
@@ -101,7 +101,7 @@ public void testEncodeAndDecode() {
       // verify encoding results
       assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
       for (int i = 0; i < VECTOR_LENGTH; i++) {
-        assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes());
+        assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
       }
 
       // perform decoding
@@ -111,7 +111,8 @@ public void testEncodeAndDecode() {
         // verify decoding results
         assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
         for (int i = 0; i < VECTOR_LENGTH; i++) {
-          assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i));
+          assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+              decodedVector.get(i));
         }
       }
     }
@@ -129,7 +130,7 @@ public void testEncodeAndDecodeWithNull() {
       dictionary.setNull(0);
       for (int i = 1; i < DICTIONARY_LENGTH; i++) {
         // encode "i" as i
-        dictionary.setSafe(i, String.valueOf(i).getBytes());
+        dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
       }
       dictionary.setValueCount(DICTIONARY_LENGTH);
 
@@ -140,7 +141,7 @@ public void testEncodeAndDecodeWithNull() {
           rawVector.setNull(i);
         } else {
           int val = (random.nextInt() & Integer.MAX_VALUE) % (DICTIONARY_LENGTH - 1) + 1;
-          rawVector.set(i, String.valueOf(val).getBytes());
+          rawVector.set(i, String.valueOf(val).getBytes(StandardCharsets.UTF_8));
         }
       }
       rawVector.setValueCount(VECTOR_LENGTH);
@@ -159,7 +160,7 @@ public void testEncodeAndDecodeWithNull() {
         if (i % 10 == 0) {
           assertEquals(0, encodedVector.get(i));
         } else {
-          assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes());
+          assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
         }
       }
 
@@ -173,7 +174,8 @@ public void testEncodeAndDecodeWithNull() {
           if (i % 10 == 0) {
             assertTrue(decodedVector.isNull(i));
           } else {
-            assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(), decodedVector.get(i));
+            assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+                decodedVector.get(i));
           }
         }
       }
@@ -190,7 +192,7 @@ public void testEncodeNullWithoutNullInDictionary() {
       dictionary.allocateNew();
       for (int i = 0; i < DICTIONARY_LENGTH; i++) {
         // encode "i" as i
-        dictionary.setSafe(i, String.valueOf(i).getBytes());
+        dictionary.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
       }
       dictionary.setValueCount(DICTIONARY_LENGTH);
 
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java
index d8e9edce83b7f..340b7e67e861f 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java
@@ -20,6 +20,9 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+
 import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
 import org.apache.arrow.algorithm.sort.VectorValueComparator;
 import org.apache.arrow.memory.BufferAllocator;
@@ -60,16 +63,16 @@ public void testBuildVariableWidthDictionaryWithNull() {
       sortedDictionary.allocateNew();
 
       // fill data
-      vec.set(0, "hello".getBytes());
-      vec.set(1, "abc".getBytes());
+      vec.set(0, "hello".getBytes(StandardCharsets.UTF_8));
+      vec.set(1, "abc".getBytes(StandardCharsets.UTF_8));
       vec.setNull(2);
-      vec.set(3, "world".getBytes());
-      vec.set(4, "12".getBytes());
-      vec.set(5, "dictionary".getBytes());
+      vec.set(3, "world".getBytes(StandardCharsets.UTF_8));
+      vec.set(4, "12".getBytes(StandardCharsets.UTF_8));
+      vec.set(5, "dictionary".getBytes(StandardCharsets.UTF_8));
       vec.setNull(6);
-      vec.set(7, "hello".getBytes());
-      vec.set(8, "good".getBytes());
-      vec.set(9, "abc".getBytes());
+      vec.set(7, "hello".getBytes(StandardCharsets.UTF_8));
+      vec.set(8, "good".getBytes(StandardCharsets.UTF_8));
+      vec.set(9, "abc".getBytes(StandardCharsets.UTF_8));
 
       VectorValueComparator<VarCharVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
       SearchTreeBasedDictionaryBuilder<VarCharVector> dictionaryBuilder =
@@ -83,12 +86,12 @@ public void testBuildVariableWidthDictionaryWithNull() {
       dictionaryBuilder.populateSortedDictionary(sortedDictionary);
 
       assertTrue(sortedDictionary.isNull(0));
-      assertEquals("12", new String(sortedDictionary.get(1)));
-      assertEquals("abc", new String(sortedDictionary.get(2)));
-      assertEquals("dictionary", new String(sortedDictionary.get(3)));
-      assertEquals("good", new String(sortedDictionary.get(4)));
-      assertEquals("hello", new String(sortedDictionary.get(5)));
-      assertEquals("world", new String(sortedDictionary.get(6)));
+      assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8));
+      assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8));
+      assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8));
+      assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8));
+      assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8));
+      assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8));
     }
   }
 
@@ -105,16 +108,16 @@ public void testBuildVariableWidthDictionaryWithoutNull() {
       sortedDictionary.allocateNew();
 
       // fill data
-      vec.set(0, "hello".getBytes());
-      vec.set(1, "abc".getBytes());
+      vec.set(0, "hello".getBytes(StandardCharsets.UTF_8));
+      vec.set(1, "abc".getBytes(StandardCharsets.UTF_8));
       vec.setNull(2);
-      vec.set(3, "world".getBytes());
-      vec.set(4, "12".getBytes());
-      vec.set(5, "dictionary".getBytes());
+      vec.set(3, "world".getBytes(StandardCharsets.UTF_8));
+      vec.set(4, "12".getBytes(StandardCharsets.UTF_8));
+      vec.set(5, "dictionary".getBytes(StandardCharsets.UTF_8));
       vec.setNull(6);
-      vec.set(7, "hello".getBytes());
-      vec.set(8, "good".getBytes());
-      vec.set(9, "abc".getBytes());
+      vec.set(7, "hello".getBytes(StandardCharsets.UTF_8));
+      vec.set(8, "good".getBytes(StandardCharsets.UTF_8));
+      vec.set(9, "abc".getBytes(StandardCharsets.UTF_8));
 
       VectorValueComparator<VarCharVector> comparator = DefaultVectorComparators.createDefaultComparator(vec);
       SearchTreeBasedDictionaryBuilder<VarCharVector> dictionaryBuilder =
@@ -127,12 +130,12 @@ public void testBuildVariableWidthDictionaryWithoutNull() {
 
       dictionaryBuilder.populateSortedDictionary(sortedDictionary);
 
-      assertEquals("12", new String(sortedDictionary.get(0)));
-      assertEquals("abc", new String(sortedDictionary.get(1)));
-      assertEquals("dictionary", new String(sortedDictionary.get(2)));
-      assertEquals("good", new String(sortedDictionary.get(3)));
-      assertEquals("hello", new String(sortedDictionary.get(4)));
-      assertEquals("world", new String(sortedDictionary.get(5)));
+      assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8));
+      assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8));
+      assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8));
+      assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8));
+      assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8));
+      assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8));
     }
   }
 
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java
index 4e2d5900f8ccc..630dd80b44084 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java
@@ -67,7 +67,7 @@ public void testToPartialSumVector() {
       // verify results
       assertEquals(PARTIAL_SUM_VECTOR_LENGTH, partialSum.getValueCount());
       for (int i = 0; i < partialSum.getValueCount(); i++) {
-        assertEquals(i * 3 + sumBase, partialSum.get(i));
+        assertEquals(i * 3L + sumBase, partialSum.get(i));
       }
     }
   }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java
index f372a809bab53..0e6627eb4822a 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java
@@ -20,6 +20,8 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
 import org.apache.arrow.algorithm.sort.VectorValueComparator;
 import org.apache.arrow.memory.BufferAllocator;
@@ -89,16 +91,16 @@ public void testVariableWidthRank() {
       vector.allocateNew(VECTOR_LENGTH * 5, VECTOR_LENGTH);
       vector.setValueCount(VECTOR_LENGTH);
 
-      vector.set(0, String.valueOf(1).getBytes());
-      vector.set(1, String.valueOf(5).getBytes());
-      vector.set(2, String.valueOf(3).getBytes());
-      vector.set(3, String.valueOf(7).getBytes());
-      vector.set(4, String.valueOf(9).getBytes());
-      vector.set(5, String.valueOf(8).getBytes());
-      vector.set(6, String.valueOf(2).getBytes());
-      vector.set(7, String.valueOf(0).getBytes());
-      vector.set(8, String.valueOf(4).getBytes());
-      vector.set(9, String.valueOf(6).getBytes());
+      vector.set(0, String.valueOf(1).getBytes(StandardCharsets.UTF_8));
+      vector.set(1, String.valueOf(5).getBytes(StandardCharsets.UTF_8));
+      vector.set(2, String.valueOf(3).getBytes(StandardCharsets.UTF_8));
+      vector.set(3, String.valueOf(7).getBytes(StandardCharsets.UTF_8));
+      vector.set(4, String.valueOf(9).getBytes(StandardCharsets.UTF_8));
+      vector.set(5, String.valueOf(8).getBytes(StandardCharsets.UTF_8));
+      vector.set(6, String.valueOf(2).getBytes(StandardCharsets.UTF_8));
+      vector.set(7, String.valueOf(0).getBytes(StandardCharsets.UTF_8));
+      vector.set(8, String.valueOf(4).getBytes(StandardCharsets.UTF_8));
+      vector.set(9, String.valueOf(6).getBytes(StandardCharsets.UTF_8));
 
       VectorValueComparator<VarCharVector> comparator =
               DefaultVectorComparators.createDefaultComparator(vector);
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
index 767935aaa4bae..9ccecfa84a73a 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
@@ -19,6 +19,7 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@@ -130,8 +131,8 @@ public void testParallelStringSearch() throws ExecutionException, InterruptedExc
           : DefaultVectorComparators.createDefaultComparator(targetVector);
 
       for (int i = 0; i < VECTOR_LENGTH; i++) {
-        targetVector.setSafe(i, String.valueOf(i).getBytes());
-        keyVector.setSafe(i, String.valueOf(i * 2).getBytes());
+        targetVector.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
+        keyVector.setSafe(i, String.valueOf(i * 2).getBytes(StandardCharsets.UTF_8));
       }
       targetVector.setValueCount(VECTOR_LENGTH);
       keyVector.setValueCount(VECTOR_LENGTH);
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
index d7659dc4cfa03..18f4fa0355f4f 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
@@ -81,7 +81,7 @@ public void testGetLowerBounds() {
       VectorValueComparator<IntVector> comparator = DefaultVectorComparators.createDefaultComparator(intVector);
       for (int i = 0; i < maxValue; i++) {
         int result = VectorRangeSearcher.getFirstMatch(intVector, comparator, intVector, i * repeat);
-        assertEquals(i * repeat, result);
+        assertEquals(i * ((long) repeat), result);
       }
     }
   }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
index 2847ddbb8ada6..32fa10bbd98d0 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java
@@ -20,6 +20,8 @@
 import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH;
 import static org.junit.Assert.assertEquals;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
 import org.apache.arrow.algorithm.sort.VectorValueComparator;
 import org.apache.arrow.memory.BufferAllocator;
@@ -142,7 +144,7 @@ public void testBinarySearchVarChar() {
           rawVector.set(i, content);
         }
       }
-      negVector.set(0, "abcd".getBytes());
+      negVector.set(0, "abcd".getBytes(StandardCharsets.UTF_8));
 
       // do search
       VectorValueComparator<BaseVariableWidthVector> comparator =
@@ -181,7 +183,7 @@ public void testLinearSearchVarChar() {
           rawVector.set(i, content);
         }
       }
-      negVector.set(0, "abcd".getBytes());
+      negVector.set(0, "abcd".getBytes(StandardCharsets.UTF_8));
 
       // do search
       VectorValueComparator<BaseVariableWidthVector> comparator =
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java
index cac9933cc0bc2..9624432924b5a 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java
@@ -17,8 +17,10 @@
 
 package org.apache.arrow.algorithm.sort;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 
 import org.apache.arrow.memory.BufferAllocator;
@@ -67,9 +69,9 @@ public void testCompareVectorSchemaRoot() {
 
       for (int i = 0; i < vectorLength; i++) {
         intVec1.set(i, i);
-        strVec1.set(i, new String("a" + i).getBytes());
+        strVec1.set(i, ("a" + i).getBytes(StandardCharsets.UTF_8));
         intVec2.set(i, i);
-        strVec2.set(i, new String("a5").getBytes());
+        strVec2.set(i, "a5".getBytes(StandardCharsets.UTF_8));
       }
 
       VectorValueComparator<IntVector> innerComparator1 =
@@ -86,7 +88,7 @@ public void testCompareVectorSchemaRoot() {
       // verify results
 
       // both elements are equal, the result is equal
-      assertTrue(comparator.compare(5, 5) == 0);
+      assertEquals(0, comparator.compare(5, 5));
 
       // the first element being equal, the second is smaller, and the result is smaller
       assertTrue(comparator.compare(1, 1) < 0);
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java
index 43c634b7647fb..c40854fb17410 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java
@@ -65,6 +65,7 @@
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
 
 /**
  * Test cases for {@link DefaultVectorComparators}.
@@ -258,7 +259,8 @@ public void testCompareUInt2() {
       vec.allocateNew(10);
 
       ValueVectorDataPopulator.setVector(
-          vec, null, (char) -2, (char) -1, (char) 0, (char) 1, (char) 2, (char) -2, null,
+          vec, null, (char) (Character.MAX_VALUE - 1), Character.MAX_VALUE, (char) 0, (char) 1,
+          (char) 2, (char) (Character.MAX_VALUE - 1), null,
           '\u7FFF', // value for the max 16-byte signed integer
           '\u8000' // value for the min 16-byte signed integer
       );
@@ -272,8 +274,8 @@ public void testCompareUInt2() {
       assertTrue(comparator.compare(1, 3) > 0);
       assertTrue(comparator.compare(2, 5) > 0);
       assertTrue(comparator.compare(4, 5) < 0);
-      assertTrue(comparator.compare(1, 6) == 0);
-      assertTrue(comparator.compare(0, 7) == 0);
+      Assertions.assertEquals(0, comparator.compare(1, 6));
+      Assertions.assertEquals(0, comparator.compare(0, 7));
       assertTrue(comparator.compare(8, 9) < 0);
       assertTrue(comparator.compare(4, 8) < 0);
       assertTrue(comparator.compare(5, 9) < 0);
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java
index ba2a341bf44a0..80c72b4e21a27 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java
@@ -131,37 +131,37 @@ public static Collection<Object[]> getParameters() {
         for (boolean inPlace : new boolean[] {true, false}) {
           params.add(new Object[] {
               length, nullFrac, inPlace, "TinyIntVector",
-              (Function<BufferAllocator, TinyIntVector>) (allocator -> new TinyIntVector("vector", allocator)),
+              (Function<BufferAllocator, TinyIntVector>) allocator -> new TinyIntVector("vector", allocator),
               TestSortingUtil.TINY_INT_GENERATOR
           });
 
           params.add(new Object[] {
               length, nullFrac, inPlace, "SmallIntVector",
-              (Function<BufferAllocator, SmallIntVector>) (allocator -> new SmallIntVector("vector", allocator)),
+              (Function<BufferAllocator, SmallIntVector>) allocator -> new SmallIntVector("vector", allocator),
               TestSortingUtil.SMALL_INT_GENERATOR
           });
 
           params.add(new Object[] {
               length, nullFrac, inPlace, "IntVector",
-              (Function<BufferAllocator, IntVector>) (allocator -> new IntVector("vector", allocator)),
+              (Function<BufferAllocator, IntVector>) allocator -> new IntVector("vector", allocator),
               TestSortingUtil.INT_GENERATOR
           });
 
           params.add(new Object[] {
               length, nullFrac, inPlace, "BigIntVector",
-              (Function<BufferAllocator, BigIntVector>) (allocator -> new BigIntVector("vector", allocator)),
+              (Function<BufferAllocator, BigIntVector>) allocator -> new BigIntVector("vector", allocator),
               TestSortingUtil.LONG_GENERATOR
           });
 
           params.add(new Object[] {
               length, nullFrac, inPlace, "Float4Vector",
-              (Function<BufferAllocator, Float4Vector>) (allocator -> new Float4Vector("vector", allocator)),
+              (Function<BufferAllocator, Float4Vector>) allocator -> new Float4Vector("vector", allocator),
               TestSortingUtil.FLOAT_GENERATOR
           });
 
           params.add(new Object[] {
               length, nullFrac, inPlace, "Float8Vector",
-              (Function<BufferAllocator, Float8Vector>) (allocator -> new Float8Vector("vector", allocator)),
+              (Function<BufferAllocator, Float8Vector>) allocator -> new Float8Vector("vector", allocator),
               TestSortingUtil.DOUBLE_GENERATOR
           });
         }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java
index ea86551061d56..e22b22d4e6757 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java
@@ -20,6 +20,7 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 import java.lang.reflect.Array;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Random;
 import java.util.function.BiConsumer;
@@ -122,7 +123,7 @@ static String generateRandomString(int length) {
       str[i] = (byte) (r % (upper - lower + 1) + lower);
     }
 
-    return new String(str);
+    return new String(str, StandardCharsets.UTF_8);
   }
 
   /**
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java
index 07419359427f9..f2de5d23fce89 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java
@@ -20,12 +20,16 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.VarCharVector;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
 
 /**
  * Test cases for {@link StableVectorComparator}.
@@ -51,11 +55,11 @@ public void testCompare() {
       vec.setValueCount(10);
 
       // fill data to sort
-      vec.set(0, "ba".getBytes());
-      vec.set(1, "abc".getBytes());
-      vec.set(2, "aa".getBytes());
-      vec.set(3, "abc".getBytes());
-      vec.set(4, "a".getBytes());
+      vec.set(0, "ba".getBytes(StandardCharsets.UTF_8));
+      vec.set(1, "abc".getBytes(StandardCharsets.UTF_8));
+      vec.set(2, "aa".getBytes(StandardCharsets.UTF_8));
+      vec.set(3, "abc".getBytes(StandardCharsets.UTF_8));
+      vec.set(4, "a".getBytes(StandardCharsets.UTF_8));
 
       VectorValueComparator<VarCharVector> comparator = new TestVarCharSorter();
       VectorValueComparator<VarCharVector> stableComparator = new StableVectorComparator<>(comparator);
@@ -66,7 +70,7 @@ public void testCompare() {
       assertTrue(stableComparator.compare(2, 3) < 0);
       assertTrue(stableComparator.compare(1, 3) < 0);
       assertTrue(stableComparator.compare(3, 1) > 0);
-      assertTrue(stableComparator.compare(3, 3) == 0);
+      Assertions.assertEquals(0, stableComparator.compare(3, 3));
     }
   }
 
@@ -77,16 +81,16 @@ public void testStableSortString() {
       vec.setValueCount(10);
 
       // fill data to sort
-      vec.set(0, "a".getBytes());
-      vec.set(1, "abc".getBytes());
-      vec.set(2, "aa".getBytes());
-      vec.set(3, "a1".getBytes());
-      vec.set(4, "abcdefg".getBytes());
-      vec.set(5, "accc".getBytes());
-      vec.set(6, "afds".getBytes());
-      vec.set(7, "0".getBytes());
-      vec.set(8, "01".getBytes());
-      vec.set(9, "0c".getBytes());
+      vec.set(0, "a".getBytes(StandardCharsets.UTF_8));
+      vec.set(1, "abc".getBytes(StandardCharsets.UTF_8));
+      vec.set(2, "aa".getBytes(StandardCharsets.UTF_8));
+      vec.set(3, "a1".getBytes(StandardCharsets.UTF_8));
+      vec.set(4, "abcdefg".getBytes(StandardCharsets.UTF_8));
+      vec.set(5, "accc".getBytes(StandardCharsets.UTF_8));
+      vec.set(6, "afds".getBytes(StandardCharsets.UTF_8));
+      vec.set(7, "0".getBytes(StandardCharsets.UTF_8));
+      vec.set(8, "01".getBytes(StandardCharsets.UTF_8));
+      vec.set(9, "0c".getBytes(StandardCharsets.UTF_8));
 
       // sort the vector
       VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter();
@@ -103,16 +107,16 @@ public void testStableSortString() {
 
         // verify results
         // the results are stable
-        assertEquals("0", new String(sortedVec.get(0)));
-        assertEquals("01", new String(sortedVec.get(1)));
-        assertEquals("0c", new String(sortedVec.get(2)));
-        assertEquals("a", new String(sortedVec.get(3)));
-        assertEquals("abc", new String(sortedVec.get(4)));
-        assertEquals("aa", new String(sortedVec.get(5)));
-        assertEquals("a1", new String(sortedVec.get(6)));
-        assertEquals("abcdefg", new String(sortedVec.get(7)));
-        assertEquals("accc", new String(sortedVec.get(8)));
-        assertEquals("afds", new String(sortedVec.get(9)));
+        assertEquals("0", new String(Objects.requireNonNull(sortedVec.get(0)), StandardCharsets.UTF_8));
+        assertEquals("01", new String(Objects.requireNonNull(sortedVec.get(1)), StandardCharsets.UTF_8));
+        assertEquals("0c", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8));
+        assertEquals("a", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8));
+        assertEquals("abc", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8));
+        assertEquals("aa", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8));
+        assertEquals("a1", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8));
+        assertEquals("abcdefg", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8));
+        assertEquals("accc", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8));
+        assertEquals("afds", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8));
       }
     }
   }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
index 8f4e3b8e19426..2486034f1fa32 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java
@@ -20,6 +20,9 @@
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
+
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.BaseVariableWidthVector;
@@ -62,16 +65,16 @@ public void testSortString() {
       vec.setValueCount(10);
 
       // fill data to sort
-      vec.set(0, "hello".getBytes());
-      vec.set(1, "abc".getBytes());
+      vec.set(0, "hello".getBytes(StandardCharsets.UTF_8));
+      vec.set(1, "abc".getBytes(StandardCharsets.UTF_8));
       vec.setNull(2);
-      vec.set(3, "world".getBytes());
-      vec.set(4, "12".getBytes());
-      vec.set(5, "dictionary".getBytes());
+      vec.set(3, "world".getBytes(StandardCharsets.UTF_8));
+      vec.set(4, "12".getBytes(StandardCharsets.UTF_8));
+      vec.set(5, "dictionary".getBytes(StandardCharsets.UTF_8));
       vec.setNull(6);
-      vec.set(7, "hello".getBytes());
-      vec.set(8, "good".getBytes());
-      vec.set(9, "yes".getBytes());
+      vec.set(7, "hello".getBytes(StandardCharsets.UTF_8));
+      vec.set(8, "good".getBytes(StandardCharsets.UTF_8));
+      vec.set(9, "yes".getBytes(StandardCharsets.UTF_8));
 
       // sort the vector
       OutOfPlaceVectorSorter<BaseVariableWidthVector> sorter = getSorter();
@@ -93,14 +96,14 @@ public void testSortString() {
 
       assertTrue(sortedVec.isNull(0));
       assertTrue(sortedVec.isNull(1));
-      assertEquals("12", new String(sortedVec.get(2)));
-      assertEquals("abc", new String(sortedVec.get(3)));
-      assertEquals("dictionary", new String(sortedVec.get(4)));
-      assertEquals("good", new String(sortedVec.get(5)));
-      assertEquals("hello", new String(sortedVec.get(6)));
-      assertEquals("hello", new String(sortedVec.get(7)));
-      assertEquals("world", new String(sortedVec.get(8)));
-      assertEquals("yes", new String(sortedVec.get(9)));
+      assertEquals("12", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8));
+      assertEquals("abc", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8));
+      assertEquals("dictionary", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8));
+      assertEquals("good", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8));
+      assertEquals("hello", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8));
+      assertEquals("hello", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8));
+      assertEquals("world", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8));
+      assertEquals("yes", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8));
 
       sortedVec.close();
     }
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java
index 068fe8b69a883..7951c39d550d2 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java
@@ -21,6 +21,7 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -94,7 +95,7 @@ void sortOutOfPlace() {
       VectorValueComparator<V> comparator = DefaultVectorComparators.createDefaultComparator(vector);
 
       try (V sortedVec = (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) {
-        int dataSize = vector.getOffsetBuffer().getInt(vector.getValueCount() * 4);
+        int dataSize = vector.getOffsetBuffer().getInt(vector.getValueCount() * 4L);
         sortedVec.allocateNew(dataSize, vector.getValueCount());
         sortedVec.setValueCount(vector.getValueCount());
 
@@ -113,7 +114,7 @@ public static Collection<Object[]> getParameters() {
       for (double nullFrac : NULL_FRACTIONS) {
         params.add(new Object[]{
             length, nullFrac, "VarCharVector",
-            (Function<BufferAllocator, VarCharVector>) (allocator -> new VarCharVector("vector", allocator)),
+            (Function<BufferAllocator, VarCharVector>) allocator -> new VarCharVector("vector", allocator),
             TestSortingUtil.STRING_GENERATOR
         });
       }
@@ -130,7 +131,7 @@ public static <V extends ValueVector> void verifyResults(V vector, String[] expe
       if (expected[i] == null) {
         assertTrue(vector.isNull(i));
       } else {
-        assertArrayEquals(((Text) vector.getObject(i)).getBytes(), expected[i].getBytes());
+        assertArrayEquals(((Text) vector.getObject(i)).getBytes(), expected[i].getBytes(StandardCharsets.UTF_8));
       }
     }
   }
@@ -151,8 +152,8 @@ public int compare(String str1, String str2) {
         return str1 == null ? -1 : 1;
       }
 
-      byte[] bytes1 = str1.getBytes();
-      byte[] bytes2 = str2.getBytes();
+      byte[] bytes1 = str1.getBytes(StandardCharsets.UTF_8);
+      byte[] bytes2 = str2.getBytes(StandardCharsets.UTF_8);
 
       for (int i = 0; i < bytes1.length && i < bytes2.length; i++) {
         if (bytes1[i] != bytes2[i]) {
diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
index 01156fa2b0e0b..5fff4fafd677e 100644
--- a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
+++ b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
@@ -175,7 +175,7 @@ void testCompressVariableWidthBuffers(int vectorLength, CompressionCodec codec)
       if (i % 10 == 0) {
         origVec.setNull(i);
       } else {
-        origVec.setSafe(i, String.valueOf(i).getBytes());
+        origVec.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
       }
     }
     origVec.setValueCount(vectorLength);
@@ -199,7 +199,7 @@ void testCompressVariableWidthBuffers(int vectorLength, CompressionCodec codec)
       if (i % 10 == 0) {
         assertTrue(newVec.isNull(i));
       } else {
-        assertArrayEquals(String.valueOf(i).getBytes(), newVec.get(i));
+        assertArrayEquals(String.valueOf(i).getBytes(StandardCharsets.UTF_8), newVec.get(i));
       }
     }
 
diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index b7624d7748e7f..0346172f610a6 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -86,6 +86,10 @@
       <groupId>com.google.protobuf</groupId>
       <artifactId>protobuf-java</artifactId>
     </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java-util</artifactId>
+    </dependency>
     <dependency>
       <groupId>io.grpc</groupId>
       <artifactId>grpc-api</artifactId>
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java
index b4ee835dee4a0..46cb282e9f3ce 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java
@@ -35,8 +35,6 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.util.AutoCloseables;
 import org.apache.arrow.util.Preconditions;
-import org.apache.arrow.vector.compression.NoCompressionCodec;
-import org.apache.arrow.vector.ipc.message.ArrowBodyCompression;
 import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
 import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
 import org.apache.arrow.vector.ipc.message.IpcOption;
@@ -144,7 +142,6 @@ public static HeaderType getHeader(byte b) {
   private final MessageMetadataResult message;
   private final ArrowBuf appMetadata;
   private final List<ArrowBuf> bufs;
-  private final ArrowBodyCompression bodyCompression;
   private final boolean tryZeroCopyWrite;
 
   public ArrowMessage(FlightDescriptor descriptor, Schema schema, IpcOption option) {
@@ -155,7 +152,6 @@ public ArrowMessage(FlightDescriptor descriptor, Schema schema, IpcOption option
     bufs = ImmutableList.of();
     this.descriptor = descriptor;
     this.appMetadata = null;
-    this.bodyCompression = NoCompressionCodec.DEFAULT_BODY_COMPRESSION;
     this.tryZeroCopyWrite = false;
   }
 
@@ -172,7 +168,6 @@ public ArrowMessage(ArrowRecordBatch batch, ArrowBuf appMetadata, boolean tryZer
     this.bufs = ImmutableList.copyOf(batch.getBuffers());
     this.descriptor = null;
     this.appMetadata = appMetadata;
-    this.bodyCompression = batch.getBodyCompression();
     this.tryZeroCopyWrite = tryZeroCopy;
   }
 
@@ -186,7 +181,6 @@ public ArrowMessage(ArrowDictionaryBatch batch, IpcOption option) {
     this.bufs = ImmutableList.copyOf(batch.getDictionary().getBuffers());
     this.descriptor = null;
     this.appMetadata = null;
-    this.bodyCompression = batch.getDictionary().getBodyCompression();
     this.tryZeroCopyWrite = false;
   }
 
@@ -201,7 +195,6 @@ public ArrowMessage(ArrowBuf appMetadata) {
     this.bufs = ImmutableList.of();
     this.descriptor = null;
     this.appMetadata = appMetadata;
-    this.bodyCompression = NoCompressionCodec.DEFAULT_BODY_COMPRESSION;
     this.tryZeroCopyWrite = false;
   }
 
@@ -212,7 +205,6 @@ public ArrowMessage(FlightDescriptor descriptor) {
     this.bufs = ImmutableList.of();
     this.descriptor = descriptor;
     this.appMetadata = null;
-    this.bodyCompression = NoCompressionCodec.DEFAULT_BODY_COMPRESSION;
     this.tryZeroCopyWrite = false;
   }
 
@@ -227,7 +219,6 @@ private ArrowMessage(FlightDescriptor descriptor, MessageMetadataResult message,
     this.descriptor = descriptor;
     this.appMetadata = appMetadata;
     this.bufs = buf == null ? ImmutableList.of() : ImmutableList.of(buf);
-    this.bodyCompression = NoCompressionCodec.DEFAULT_BODY_COMPRESSION;
     this.tryZeroCopyWrite = false;
   }
 
@@ -370,7 +361,7 @@ private static int readRawVarint32(InputStream is) throws IOException {
    *
    * @return InputStream
    */
-  private InputStream asInputStream(BufferAllocator allocator) {
+  private InputStream asInputStream() {
     if (message == null) {
       // If we have no IPC message, it's a pure-metadata message
       final FlightData.Builder builder = FlightData.newBuilder();
@@ -422,7 +413,7 @@ private InputStream asInputStream(BufferAllocator allocator) {
         // Arrow buffer. This is susceptible to use-after-free, so we subclass CompositeByteBuf
         // below to tie the Arrow buffer refcnt to the Netty buffer refcnt
         allBufs.add(Unpooled.wrappedBuffer(b.nioBuffer()).retain());
-        size += b.readableBytes();
+        size += (int) b.readableBytes();
         // [ARROW-4213] These buffers must be aligned to an 8-byte boundary in order to be readable from C++.
         if (b.readableBytes() % 8 != 0) {
           int paddingBytes = (int) (8 - (b.readableBytes() % 8));
@@ -543,7 +534,7 @@ public ArrowMessageHolderMarshaller(BufferAllocator allocator) {
 
     @Override
     public InputStream stream(ArrowMessage value) {
-      return value.asInputStream(allocator);
+      return value.asInputStream();
     }
 
     @Override
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelFlightInfoResult.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelFlightInfoResult.java
index eff5afdeeb788..165afdff553df 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelFlightInfoResult.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CancelFlightInfoResult.java
@@ -105,7 +105,7 @@ public boolean equals(Object o) {
     if (this == o) {
       return true;
     }
-    if (o == null || getClass() != o.getClass()) {
+    if (!(o instanceof CancelFlightInfoResult)) {
       return false;
     }
     CancelFlightInfoResult that = (CancelFlightInfoResult) o;
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ErrorFlightMetadata.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ErrorFlightMetadata.java
index 6669ce4655010..6e19d2750cb67 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ErrorFlightMetadata.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ErrorFlightMetadata.java
@@ -61,7 +61,7 @@ public Iterable<byte[]> getAllByte(String key) {
 
   @Override
   public void insert(String key, String value) {
-    metadata.put(key, value.getBytes());
+    metadata.put(key, value.getBytes(StandardCharsets.UTF_8));
   }
 
   @Override
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightCallHeaders.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightCallHeaders.java
index dd26d190872ac..93b89e775507e 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightCallHeaders.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightCallHeaders.java
@@ -17,6 +17,7 @@
 
 package org.apache.arrow.flight;
 
+import java.nio.charset.StandardCharsets;
 import java.util.Collection;
 import java.util.Set;
 import java.util.stream.Collectors;
@@ -46,7 +47,7 @@ public String get(String key) {
     }
 
     if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
-      return new String((byte[]) Iterables.get(values, 0));
+      return new String((byte[]) Iterables.get(values, 0), StandardCharsets.UTF_8);
     }
 
     return (String) Iterables.get(values, 0);
@@ -63,13 +64,14 @@ public byte[] getByte(String key) {
       return (byte[]) Iterables.get(values, 0);
     }
 
-    return ((String) Iterables.get(values, 0)).getBytes();
+    return ((String) Iterables.get(values, 0)).getBytes(StandardCharsets.UTF_8);
   }
 
   @Override
   public Iterable<String> getAll(String key) {
     if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
-      return this.keysAndValues.get(key).stream().map(o -> new String((byte[]) o)).collect(Collectors.toList());
+      return this.keysAndValues.get(key).stream().map(o -> new String((byte[]) o, StandardCharsets.UTF_8))
+          .collect(Collectors.toList());
     }
     return (Collection<String>) (Collection<?>) this.keysAndValues.get(key);
   }
@@ -79,7 +81,8 @@ public Iterable<byte[]> getAllByte(String key) {
     if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
       return (Collection<byte[]>) (Collection<?>) this.keysAndValues.get(key);
     }
-    return this.keysAndValues.get(key).stream().map(o -> ((String) o).getBytes()).collect(Collectors.toList());
+    return this.keysAndValues.get(key).stream().map(o -> ((String) o).getBytes(StandardCharsets.UTF_8))
+        .collect(Collectors.toList());
   }
 
   @Override
@@ -105,6 +108,7 @@ public boolean containsKey(String key) {
     return this.keysAndValues.containsKey(key);
   }
 
+  @Override
   public String toString() {
     return this.keysAndValues.toString();
   }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
index 91e3b4d052f39..fc491ebe0df98 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
@@ -19,6 +19,7 @@
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.lang.reflect.InvocationTargetException;
 import java.net.URISyntaxException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
@@ -627,6 +628,7 @@ default boolean isCancelled() {
   /**
    * Shut down this client.
    */
+  @Override
   public void close() throws InterruptedException {
     channel.shutdown().awaitTermination(5, TimeUnit.SECONDS);
     allocator.close();
@@ -746,19 +748,24 @@ public FlightClient build() {
             try {
               // Linux
               builder.channelType(
-                  (Class<? extends ServerChannel>) Class.forName("io.netty.channel.epoll.EpollDomainSocketChannel"));
-              final EventLoopGroup elg = (EventLoopGroup) Class.forName("io.netty.channel.epoll.EpollEventLoopGroup")
-                  .newInstance();
+                  Class.forName("io.netty.channel.epoll.EpollDomainSocketChannel")
+                      .asSubclass(ServerChannel.class));
+              final EventLoopGroup elg =
+                  Class.forName("io.netty.channel.epoll.EpollEventLoopGroup").asSubclass(EventLoopGroup.class)
+                  .getDeclaredConstructor().newInstance();
               builder.eventLoopGroup(elg);
             } catch (ClassNotFoundException e) {
               // BSD
               builder.channelType(
-                  (Class<? extends ServerChannel>) Class.forName("io.netty.channel.kqueue.KQueueDomainSocketChannel"));
-              final EventLoopGroup elg = (EventLoopGroup) Class.forName("io.netty.channel.kqueue.KQueueEventLoopGroup")
-                  .newInstance();
+                  Class.forName("io.netty.channel.kqueue.KQueueDomainSocketChannel")
+                      .asSubclass(ServerChannel.class));
+              final EventLoopGroup elg = Class.forName("io.netty.channel.kqueue.KQueueEventLoopGroup")
+                  .asSubclass(EventLoopGroup.class)
+                  .getDeclaredConstructor().newInstance();
               builder.eventLoopGroup(elg);
             }
-          } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
+          } catch (ClassNotFoundException | InstantiationException | IllegalAccessException |
+                   NoSuchMethodException | InvocationTargetException e) {
             throw new UnsupportedOperationException(
                 "Could not find suitable Netty native transport implementation for domain socket address.");
           }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightDescriptor.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightDescriptor.java
index 3eff011d9fe77..1836f2edd94c0 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightDescriptor.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightDescriptor.java
@@ -152,7 +152,7 @@ public boolean equals(Object obj) {
     if (obj == null) {
       return false;
     }
-    if (getClass() != obj.getClass()) {
+    if (!(obj instanceof FlightDescriptor)) {
       return false;
     }
     FlightDescriptor other = (FlightDescriptor) obj;
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightEndpoint.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightEndpoint.java
index 1967fe1d91c34..41ead8e1fcddf 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightEndpoint.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightEndpoint.java
@@ -33,6 +33,7 @@
 
 import com.google.protobuf.ByteString;
 import com.google.protobuf.Timestamp;
+import com.google.protobuf.util.Timestamps;
 
 /**
  * POJO to convert to/from the underlying protobuf FlightEndpoint.
@@ -85,11 +86,11 @@ private FlightEndpoint(Ticket ticket, Instant expirationTime, byte[] appMetadata
     }
     if (flt.hasExpirationTime()) {
       this.expirationTime = Instant.ofEpochSecond(
-          flt.getExpirationTime().getSeconds(), flt.getExpirationTime().getNanos());
+          flt.getExpirationTime().getSeconds(), Timestamps.toNanos(flt.getExpirationTime()));
     } else {
       this.expirationTime = null;
     }
-    this.appMetadata = (flt.getAppMetadata().size() == 0 ? null : flt.getAppMetadata().toByteArray());
+    this.appMetadata = (flt.getAppMetadata().isEmpty() ? null : flt.getAppMetadata().toByteArray());
     this.ticket = new Ticket(flt.getTicket());
   }
 
@@ -163,7 +164,7 @@ public boolean equals(Object o) {
     if (this == o) {
       return true;
     }
-    if (o == null || getClass() != o.getClass()) {
+    if (!(o instanceof FlightEndpoint)) {
       return false;
     }
     FlightEndpoint that = (FlightEndpoint) o;
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java
index b5279a304c865..39e5f5e3a3ed6 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightInfo.java
@@ -249,7 +249,7 @@ public boolean equals(Object o) {
     if (this == o) {
       return true;
     }
-    if (o == null || getClass() != o.getClass()) {
+    if (!(o instanceof FlightInfo)) {
       return false;
     }
     FlightInfo that = (FlightInfo) o;
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
index 234c9bdcaacc1..d873f7d2828d0 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
@@ -21,6 +21,7 @@
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.lang.reflect.InvocationTargetException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
@@ -134,6 +135,7 @@ public boolean awaitTermination(final long timeout, final TimeUnit unit) throws
   }
 
   /** Shutdown the server, waits for up to 6 seconds for successful shutdown before returning. */
+  @Override
   public void close() throws InterruptedException {
     shutdown();
     final boolean terminated = awaitTermination(3000, TimeUnit.MILLISECONDS);
@@ -146,7 +148,7 @@ public void close() throws InterruptedException {
     server.shutdownNow();
 
     int count = 0;
-    while (!server.isTerminated() & count < 30) {
+    while (!server.isTerminated() && count < 30) {
       count++;
       logger.debug("Waiting for termination");
       Thread.sleep(100);
@@ -216,22 +218,23 @@ public FlightServer build() {
           try {
             try {
               // Linux
-              builder.channelType(
-                  (Class<? extends ServerChannel>) Class
-                      .forName("io.netty.channel.epoll.EpollServerDomainSocketChannel"));
-              final EventLoopGroup elg = (EventLoopGroup) Class.forName("io.netty.channel.epoll.EpollEventLoopGroup")
-                  .newInstance();
+              builder.channelType(Class
+                      .forName("io.netty.channel.epoll.EpollServerDomainSocketChannel")
+                      .asSubclass(ServerChannel.class));
+              final EventLoopGroup elg = Class.forName("io.netty.channel.epoll.EpollEventLoopGroup")
+                      .asSubclass(EventLoopGroup.class).getConstructor().newInstance();
               builder.bossEventLoopGroup(elg).workerEventLoopGroup(elg);
             } catch (ClassNotFoundException e) {
               // BSD
               builder.channelType(
-                  (Class<? extends ServerChannel>) Class
-                      .forName("io.netty.channel.kqueue.KQueueServerDomainSocketChannel"));
-              final EventLoopGroup elg = (EventLoopGroup) Class.forName("io.netty.channel.kqueue.KQueueEventLoopGroup")
-                  .newInstance();
+                      Class.forName("io.netty.channel.kqueue.KQueueServerDomainSocketChannel")
+                              .asSubclass(ServerChannel.class));
+              final EventLoopGroup elg = Class.forName("io.netty.channel.kqueue.KQueueEventLoopGroup")
+                      .asSubclass(EventLoopGroup.class).getConstructor().newInstance();
               builder.bossEventLoopGroup(elg).workerEventLoopGroup(elg);
             }
-          } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) {
+          } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | NoSuchMethodException |
+                   InvocationTargetException e) {
             throw new UnsupportedOperationException(
                 "Could not find suitable Netty native transport implementation for domain socket address.");
           }
@@ -342,7 +345,8 @@ private void closeInputStreamIfNotNull(InputStream stream) {
       if (stream != null) {
         try {
           stream.close();
-        } catch (IOException ignored) {
+        } catch (IOException expected) {
+          // stream closes gracefully, doesn't expect an exception.
         }
       }
     }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightService.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightService.java
index 5231a7aaf76e4..f55b47d2a945b 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightService.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightService.java
@@ -20,6 +20,7 @@
 import java.util.Collections;
 import java.util.Map;
 import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
 import java.util.function.BooleanSupplier;
 import java.util.function.Consumer;
 
@@ -142,7 +143,7 @@ public void listActions(Flight.Empty request, StreamObserver<Flight.ActionType>
   }
 
   private static class GetListener extends OutboundStreamListenerImpl implements ServerStreamListener {
-    private ServerCallStreamObserver<ArrowMessage> responseObserver;
+    private final ServerCallStreamObserver<ArrowMessage> serverCallResponseObserver;
     private final Consumer<Throwable> errorHandler;
     private Runnable onCancelHandler = null;
     private Runnable onReadyHandler = null;
@@ -152,10 +153,10 @@ public GetListener(ServerCallStreamObserver<ArrowMessage> responseObserver, Cons
       super(null, responseObserver);
       this.errorHandler = errorHandler;
       this.completed = false;
-      this.responseObserver = responseObserver;
-      this.responseObserver.setOnCancelHandler(this::onCancel);
-      this.responseObserver.setOnReadyHandler(this::onReady);
-      this.responseObserver.disableAutoInboundFlowControl();
+      this.serverCallResponseObserver = responseObserver;
+      this.serverCallResponseObserver.setOnCancelHandler(this::onCancel);
+      this.serverCallResponseObserver.setOnReadyHandler(this::onReady);
+      this.serverCallResponseObserver.disableAutoInboundFlowControl();
     }
 
     private void onCancel() {
@@ -183,7 +184,7 @@ public void setOnReadyHandler(Runnable handler) {
 
     @Override
     public boolean isCancelled() {
-      return responseObserver.isCancelled();
+      return serverCallResponseObserver.isCancelled();
     }
 
     @Override
@@ -228,7 +229,7 @@ public StreamObserver<ArrowMessage> doPutCustom(final StreamObserver<Flight.PutR
     // When the ackStream is completed, the FlightStream will be closed with it
     ackStream.setAutoCloseable(fs);
     final StreamObserver<ArrowMessage> observer = fs.asObserver();
-    executors.submit(() -> {
+    Future<?> unused = executors.submit(() -> {
       try {
         producer.acceptPut(makeContext(responseObserver), fs, ackStream).run();
       } catch (Throwable ex) {
@@ -277,7 +278,8 @@ public void pollFlightInfo(Flight.FlightDescriptor request, StreamObserver<Fligh
    * Broadcast the given exception to all registered middleware.
    */
   private void handleExceptionWithMiddleware(Throwable t) {
-    final Map<Key<?>, FlightServerMiddleware> middleware = ServerInterceptorAdapter.SERVER_MIDDLEWARE_KEY.get();
+    final Map<FlightServerMiddleware.Key<?>, FlightServerMiddleware> middleware = ServerInterceptorAdapter
+            .SERVER_MIDDLEWARE_KEY.get();
     if (middleware == null || middleware.isEmpty()) {
       logger.error("Uncaught exception in Flight method body", t);
       return;
@@ -377,7 +379,7 @@ public StreamObserver<ArrowMessage> doExchangeCustom(StreamObserver<ArrowMessage
     responseObserver.request(1);
     final StreamObserver<ArrowMessage> observer = fs.asObserver();
     try {
-      executors.submit(() -> {
+      Future<?> unused = executors.submit(() -> {
         try {
           producer.doExchange(makeContext(responseObserver), fs, listener);
         } catch (Exception ex) {
@@ -416,8 +418,9 @@ public boolean isCancelled() {
     }
 
     @Override
-    public <T extends FlightServerMiddleware> T getMiddleware(Key<T> key) {
-      final Map<Key<?>, FlightServerMiddleware> middleware = ServerInterceptorAdapter.SERVER_MIDDLEWARE_KEY.get();
+    public <T extends FlightServerMiddleware> T getMiddleware(FlightServerMiddleware.Key<T> key) {
+      final Map<FlightServerMiddleware.Key<?>, FlightServerMiddleware> middleware = ServerInterceptorAdapter
+              .SERVER_MIDDLEWARE_KEY.get();
       if (middleware == null) {
         return null;
       }
@@ -430,8 +433,9 @@ public <T extends FlightServerMiddleware> T getMiddleware(Key<T> key) {
     }
 
     @Override
-    public Map<Key<?>, FlightServerMiddleware> getMiddleware() {
-      final Map<Key<?>, FlightServerMiddleware> middleware = ServerInterceptorAdapter.SERVER_MIDDLEWARE_KEY.get();
+    public Map<FlightServerMiddleware.Key<?>, FlightServerMiddleware> getMiddleware() {
+      final Map<FlightServerMiddleware.Key<?>, FlightServerMiddleware> middleware =
+          ServerInterceptorAdapter.SERVER_MIDDLEWARE_KEY.get();
       if (middleware == null) {
         return Collections.emptyMap();
       }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
index ad4ffcbebdec1..7a5a941603ace 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
@@ -27,6 +27,7 @@
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.arrow.flight.ArrowMessage.HeaderType;
 import org.apache.arrow.flight.grpc.StatusUtils;
@@ -56,9 +57,17 @@
  */
 public class FlightStream implements AutoCloseable {
   // Use AutoCloseable sentinel objects to simplify logic in #close
-  private final AutoCloseable DONE = () -> {
+  private final AutoCloseable DONE = new AutoCloseable() {
+    @Override
+    public void close() throws Exception {
+
+    }
   };
-  private final AutoCloseable DONE_EX = () -> {
+  private final AutoCloseable DONE_EX = new AutoCloseable() {
+    @Override
+    public void close() throws Exception {
+
+    }
   };
 
   private final BufferAllocator allocator;
@@ -76,7 +85,7 @@ public class FlightStream implements AutoCloseable {
   // we don't block forever trying to write to a server that has rejected a call.
   final CompletableFuture<Void> cancelled;
 
-  private volatile int pending = 1;
+  private final AtomicInteger pending = new AtomicInteger();
   private volatile VectorSchemaRoot fulfilledRoot;
   private DictionaryProvider.MapDictionaryProvider dictionaries;
   private volatile VectorLoader loader;
@@ -169,6 +178,7 @@ public FlightDescriptor getDescriptor() {
    *
    * <p>If the stream isn't complete and is cancellable, this method will cancel and drain the stream first.
    */
+  @Override
   public void close() throws Exception {
     final List<AutoCloseable> closeables = new ArrayList<>();
     Throwable suppressor = null;
@@ -227,7 +237,7 @@ public boolean next() {
         return false;
       }
 
-      pending--;
+      pending.decrementAndGet();
       requestOutstanding();
 
       Object data = queue.take();
@@ -359,9 +369,9 @@ public ArrowBuf getLatestMetadata() {
   }
 
   private synchronized void requestOutstanding() {
-    if (pending < pendingTarget) {
-      requestor.request(pendingTarget - pending);
-      pending = pendingTarget;
+    if (pending.get() < pendingTarget) {
+      requestor.request(pendingTarget - pending.get());
+      pending.set(pendingTarget);
     }
   }
 
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java
index 9dba773bf3386..fe192aa0c3f9d 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java
@@ -71,7 +71,7 @@ public SocketAddress toSocketAddress() {
       case LocationSchemes.GRPC_DOMAIN_SOCKET: {
         try {
           // This dependency is not available on non-Unix platforms.
-          return (SocketAddress) Class.forName("io.netty.channel.unix.DomainSocketAddress")
+          return Class.forName("io.netty.channel.unix.DomainSocketAddress").asSubclass(SocketAddress.class)
               .getConstructor(String.class)
               .newInstance(uri.getPath());
         } catch (InstantiationException | ClassNotFoundException | InvocationTargetException |
@@ -144,7 +144,7 @@ public boolean equals(Object o) {
     if (this == o) {
       return true;
     }
-    if (o == null || getClass() != o.getClass()) {
+    if (!(o instanceof Location)) {
       return false;
     }
     Location location = (Location) o;
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PollInfo.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PollInfo.java
index 2bb3c6db69569..59150d8814cd9 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PollInfo.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/PollInfo.java
@@ -27,6 +27,7 @@
 import org.apache.arrow.flight.impl.Flight;
 
 import com.google.protobuf.Timestamp;
+import com.google.protobuf.util.Timestamps;
 
 /**
  * A POJO representation of the execution of a long-running query.
@@ -57,7 +58,7 @@ public PollInfo(FlightInfo flightInfo, FlightDescriptor flightDescriptor, Double
     this.flightDescriptor = flt.hasFlightDescriptor() ? new FlightDescriptor(flt.getFlightDescriptor()) : null;
     this.progress = flt.hasProgress() ? flt.getProgress() : null;
     this.expirationTime = flt.hasExpirationTime() ?
-        Instant.ofEpochSecond(flt.getExpirationTime().getSeconds(), flt.getExpirationTime().getNanos()) :
+        Instant.ofEpochSecond(flt.getExpirationTime().getSeconds(), Timestamps.toNanos(flt.getExpirationTime())) :
         null;
   }
 
@@ -133,7 +134,8 @@ public boolean equals(Object o) {
     if (this == o) {
       return true;
     }
-    if (o == null || getClass() != o.getClass()) {
+
+    if (!(o instanceof PollInfo)) {
       return false;
     }
     PollInfo pollInfo = (PollInfo) o;
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Ticket.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Ticket.java
index a93cd087905db..eb2f4af70d781 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Ticket.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Ticket.java
@@ -88,7 +88,7 @@ public boolean equals(Object obj) {
     if (obj == null) {
       return false;
     }
-    if (getClass() != obj.getClass()) {
+    if (!(obj instanceof Ticket)) {
       return false;
     }
     Ticket other = (Ticket) obj;
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
index ac55872e5b18b..e3ccdc626d71b 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
@@ -20,8 +20,8 @@
 import org.apache.arrow.flight.FlightConstants;
 
 import io.grpc.Context;
+import io.grpc.Metadata;
 import io.grpc.Metadata.BinaryMarshaller;
-import io.grpc.Metadata.Key;
 import io.grpc.MethodDescriptor;
 
 /**
@@ -32,7 +32,7 @@ public final class AuthConstants {
   public static final String HANDSHAKE_DESCRIPTOR_NAME = MethodDescriptor
       .generateFullMethodName(FlightConstants.SERVICE, "Handshake");
   public static final String TOKEN_NAME = "Auth-Token-bin";
-  public static final Key<byte[]> TOKEN_KEY = Key.of(TOKEN_NAME, new BinaryMarshaller<byte[]>() {
+  public static final Metadata.Key<byte[]> TOKEN_KEY = Metadata.Key.of(TOKEN_NAME, new BinaryMarshaller<byte[]>() {
 
     @Override
     public byte[] toBytes(byte[] value) {
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerTokenAuthenticator.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerTokenAuthenticator.java
index 2006e0a2b1241..5eb5863e792d4 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerTokenAuthenticator.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth2/BearerTokenAuthenticator.java
@@ -55,7 +55,6 @@ public AuthResult authenticate(CallHeaders incomingHeaders) {
    * Validate the bearer token.
    * @param bearerToken The bearer token to validate.
    * @return A successful AuthResult if validation succeeded.
-   * @throws Exception If the token validation fails.
    */
   protected abstract AuthResult validateBearer(String bearerToken);
 
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ClientInterceptorAdapter.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ClientInterceptorAdapter.java
index ae11e52605623..db27aa481ec75 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ClientInterceptorAdapter.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ClientInterceptorAdapter.java
@@ -23,7 +23,6 @@
 import org.apache.arrow.flight.CallInfo;
 import org.apache.arrow.flight.CallStatus;
 import org.apache.arrow.flight.FlightClientMiddleware;
-import org.apache.arrow.flight.FlightClientMiddleware.Factory;
 import org.apache.arrow.flight.FlightMethod;
 import org.apache.arrow.flight.FlightRuntimeException;
 import org.apache.arrow.flight.FlightStatusCode;
@@ -46,9 +45,9 @@
  */
 public class ClientInterceptorAdapter implements ClientInterceptor {
 
-  private final List<Factory> factories;
+  private final List<FlightClientMiddleware.Factory> factories;
 
-  public ClientInterceptorAdapter(List<Factory> factories) {
+  public ClientInterceptorAdapter(List<FlightClientMiddleware.Factory> factories) {
     this.factories = factories;
   }
 
@@ -59,7 +58,7 @@ public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(MethodDescriptor<ReqT
     final CallInfo info = new CallInfo(FlightMethod.fromProtocol(method.getFullMethodName()));
 
     try {
-      for (final Factory factory : factories) {
+      for (final FlightClientMiddleware.Factory factory : factories) {
         middleware.add(factory.onCallStarted(info));
       }
     } catch (FlightRuntimeException e) {
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/MetadataAdapter.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/MetadataAdapter.java
index 4327f0ca85b0d..25e4fb0197f95 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/MetadataAdapter.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/MetadataAdapter.java
@@ -17,6 +17,7 @@
 
 package org.apache.arrow.flight.grpc;
 
+import java.nio.charset.StandardCharsets;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.stream.Collectors;
@@ -42,26 +43,26 @@ public MetadataAdapter(Metadata metadata) {
 
   @Override
   public String get(String key) {
-    return this.metadata.get(Key.of(key, Metadata.ASCII_STRING_MARSHALLER));
+    return this.metadata.get(Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER));
   }
 
   @Override
   public byte[] getByte(String key) {
     if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
-      return this.metadata.get(Key.of(key, Metadata.BINARY_BYTE_MARSHALLER));
+      return this.metadata.get(Metadata.Key.of(key, Metadata.BINARY_BYTE_MARSHALLER));
     }
-    return get(key).getBytes();
+    return get(key).getBytes(StandardCharsets.UTF_8);
   }
 
   @Override
   public Iterable<String> getAll(String key) {
-    return this.metadata.getAll(Key.of(key, Metadata.ASCII_STRING_MARSHALLER));
+    return this.metadata.getAll(Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER));
   }
 
   @Override
   public Iterable<byte[]> getAllByte(String key) {
     if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
-      return this.metadata.getAll(Key.of(key, Metadata.BINARY_BYTE_MARSHALLER));
+      return this.metadata.getAll(Metadata.Key.of(key, Metadata.BINARY_BYTE_MARSHALLER));
     }
     return StreamSupport.stream(getAll(key).spliterator(), false)
         .map(String::getBytes).collect(Collectors.toList());
@@ -69,12 +70,12 @@ public Iterable<byte[]> getAllByte(String key) {
 
   @Override
   public void insert(String key, String value) {
-    this.metadata.put(Key.of(key, Metadata.ASCII_STRING_MARSHALLER), value);
+    this.metadata.put(Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER), value);
   }
 
   @Override
   public void insert(String key, byte[] value) {
-    this.metadata.put(Key.of(key, Metadata.BINARY_BYTE_MARSHALLER), value);
+    this.metadata.put(Metadata.Key.of(key, Metadata.BINARY_BYTE_MARSHALLER), value);
   }
 
   @Override
@@ -85,13 +86,14 @@ public Set<String> keys() {
   @Override
   public boolean containsKey(String key) {
     if (key.endsWith("-bin")) {
-      final Key<?> grpcKey = Key.of(key, Metadata.BINARY_BYTE_MARSHALLER);
+      final Metadata.Key<?> grpcKey = Metadata.Key.of(key, Metadata.BINARY_BYTE_MARSHALLER);
       return this.metadata.containsKey(grpcKey);
     }
-    final Key<?> grpcKey = Key.of(key, Metadata.ASCII_STRING_MARSHALLER);
+    final Metadata.Key<?> grpcKey = Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER);
     return this.metadata.containsKey(grpcKey);
   }
 
+  @Override
   public String toString() {
     return this.metadata.toString();
   }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerInterceptorAdapter.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerInterceptorAdapter.java
index 9b038b9d49272..70c667df56020 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerInterceptorAdapter.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/ServerInterceptorAdapter.java
@@ -61,7 +61,7 @@ public static class KeyFactory<T extends FlightServerMiddleware> {
     private final FlightServerMiddleware.Key<T> key;
     private final FlightServerMiddleware.Factory<T> factory;
 
-    public KeyFactory(Key<T> key, Factory<T> factory) {
+    public KeyFactory(FlightServerMiddleware.Key<T> key, FlightServerMiddleware.Factory<T> factory) {
       this.key = key;
       this.factory = factory;
     }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java
index 55e8418642d36..7f0dcf2da3f0d 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java
@@ -17,6 +17,7 @@
 
 package org.apache.arrow.flight.grpc;
 
+import java.nio.charset.StandardCharsets;
 import java.util.Iterator;
 import java.util.Objects;
 import java.util.function.Function;
@@ -171,7 +172,7 @@ private static ErrorFlightMetadata parseTrailers(Metadata trailers) {
       if (key.endsWith(Metadata.BINARY_HEADER_SUFFIX)) {
         metadata.insert(key, trailers.get(keyOfBinary(key)));
       } else {
-        metadata.insert(key, Objects.requireNonNull(trailers.get(keyOfAscii(key))).getBytes());
+        metadata.insert(key, Objects.requireNonNull(trailers.get(keyOfAscii(key))).getBytes(StandardCharsets.UTF_8));
       }
     }
     return metadata;
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
index 11510dbd32058..393fa086775ed 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/FlightTestUtil.java
@@ -23,7 +23,6 @@
 import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.List;
-import java.util.Random;
 
 import org.apache.arrow.vector.test.util.ArrowTestDataUtil;
 import org.junit.jupiter.api.Assertions;
@@ -34,8 +33,6 @@
  */
 public class FlightTestUtil {
 
-  private static final Random RANDOM = new Random();
-
   public static final String LOCALHOST = "localhost";
 
   static Path getFlightTestDataRoot() {
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestApplicationMetadata.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestApplicationMetadata.java
index 07db301309e3d..77c039afd87a0 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestApplicationMetadata.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestApplicationMetadata.java
@@ -20,6 +20,7 @@
 import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST;
 import static org.apache.arrow.flight.Location.forGrpcInsecure;
 
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.concurrent.ExecutionException;
@@ -45,7 +46,7 @@
 public class TestApplicationMetadata {
 
   // The command used to trigger the test for ARROW-6136.
-  private static final byte[] COMMAND_ARROW_6136 = "ARROW-6136".getBytes();
+  private static final byte[] COMMAND_ARROW_6136 = "ARROW-6136".getBytes(StandardCharsets.UTF_8);
   // The expected error message.
   private static final String MESSAGE_ARROW_6136 = "The stream should not be double-closed.";
 
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBackPressure.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBackPressure.java
index 7586d50c8e713..596debcf89dd2 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBackPressure.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBackPressure.java
@@ -22,6 +22,7 @@
 
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.Function;
 
@@ -153,7 +154,7 @@ public void getStream(CallContext context, Ticket ticket, ServerStreamListener l
             loadData.run();
           } else {
             final ExecutorService service = Executors.newSingleThreadExecutor();
-            service.submit(loadData);
+            Future<?> unused = service.submit(loadData);
             service.shutdown();
           }
         }
@@ -237,7 +238,8 @@ public WaitResult waitForListener(long timeout) {
         try {
           Thread.sleep(1);
           sleepTime.addAndGet(1L);
-        } catch (InterruptedException ignore) {
+        } catch (InterruptedException expected) {
+          // it is expected and no action needed
         }
       }
       return WaitResult.READY;
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java
index 41b3a4693e579..ae520ee9b991b 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java
@@ -17,6 +17,7 @@
 
 package org.apache.arrow.flight;
 
+
 import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST;
 import static org.apache.arrow.flight.Location.forGrpcInsecure;
 
@@ -114,11 +115,11 @@ public void roundTripInfo() throws Exception {
         Field.nullable("b", new ArrowType.FixedSizeBinary(32))
     ), metadata);
     final FlightInfo info1 = FlightInfo.builder(schema, FlightDescriptor.path(), Collections.emptyList())
-            .setAppMetadata("foo".getBytes()).build();
+            .setAppMetadata("foo".getBytes(StandardCharsets.UTF_8)).build();
     final FlightInfo info2 = new FlightInfo(schema, FlightDescriptor.command(new byte[2]),
         Collections.singletonList(
                 FlightEndpoint.builder(new Ticket(new byte[10]), Location.forGrpcDomainSocket("/tmp/test.sock"))
-                        .setAppMetadata("bar".getBytes()).build()
+                        .setAppMetadata("bar".getBytes(StandardCharsets.UTF_8)).build()
         ), 200, 500);
     final FlightInfo info3 = new FlightInfo(schema, FlightDescriptor.path("a", "b"),
         Arrays.asList(new FlightEndpoint(
@@ -160,7 +161,7 @@ public void roundTripDescriptor() throws Exception {
   public void getDescriptors() throws Exception {
     test(c -> {
       int count = 0;
-      for (FlightInfo i : c.listFlights(Criteria.ALL)) {
+      for (FlightInfo unused : c.listFlights(Criteria.ALL)) {
         count += 1;
       }
       Assertions.assertEquals(1, count);
@@ -171,7 +172,8 @@ public void getDescriptors() throws Exception {
   public void getDescriptorsWithCriteria() throws Exception {
     test(c -> {
       int count = 0;
-      for (FlightInfo i : c.listFlights(new Criteria(new byte[]{1}))) {
+      for (FlightInfo unused : c.listFlights(new Criteria(new byte[]{1}))) {
+
         count += 1;
       }
       Assertions.assertEquals(0, count);
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestCallOptions.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestCallOptions.java
index 8b1a897467d58..41df36c863325 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestCallOptions.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestCallOptions.java
@@ -21,6 +21,7 @@
 import static org.apache.arrow.flight.Location.forGrpcInsecure;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.time.Duration;
 import java.time.Instant;
 import java.util.Iterator;
@@ -87,8 +88,8 @@ public void multipleProperties() {
   @Test
   public void binaryProperties() {
     final FlightCallHeaders headers = new FlightCallHeaders();
-    headers.insert("key-bin", "value".getBytes());
-    headers.insert("key3-bin", "ëfßæ".getBytes());
+    headers.insert("key-bin", "value".getBytes(StandardCharsets.UTF_8));
+    headers.insert("key3-bin", "ëfßæ".getBytes(StandardCharsets.UTF_8));
     testHeaders(headers);
   }
 
@@ -96,7 +97,7 @@ public void binaryProperties() {
   public void mixedProperties() {
     final FlightCallHeaders headers = new FlightCallHeaders();
     headers.insert("key", "value");
-    headers.insert("key3-bin", "ëfßæ".getBytes());
+    headers.insert("key3-bin", "ëfßæ".getBytes(StandardCharsets.UTF_8));
     testHeaders(headers);
   }
 
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDictionaryUtils.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDictionaryUtils.java
index b3a716ab3cec5..40930131e0ca8 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDictionaryUtils.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDictionaryUtils.java
@@ -18,7 +18,8 @@
 package org.apache.arrow.flight;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNotSame;
+import static org.junit.jupiter.api.Assertions.assertSame;
 
 import java.util.TreeSet;
 
@@ -54,7 +55,7 @@ public void testReuseSchema() {
     Schema newSchema = DictionaryUtils.generateSchema(schema, null, new TreeSet<>());
 
     // assert that no new schema is created.
-    assertTrue(schema == newSchema);
+    assertSame(schema, newSchema);
   }
 
   @Test
@@ -78,7 +79,7 @@ public void testCreateSchema() {
       Schema newSchema = DictionaryUtils.generateSchema(schema, dictProvider, dictionaryUsed);
 
       // assert that a new schema is created.
-      assertTrue(schema != newSchema);
+      assertNotSame(schema, newSchema);
 
       // assert the column is converted as expected
       ArrowType newColType = newSchema.getFields().get(0).getType();
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDoExchange.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDoExchange.java
index f9db9bfd23a88..b70353df8e9a7 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDoExchange.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestDoExchange.java
@@ -476,7 +476,7 @@ public void doExchange(CallContext context, FlightStream reader, ServerStreamLis
     }
 
     /** Emulate DoGet. */
-    private void doGet(CallContext context, FlightStream reader, ServerStreamListener writer) {
+    private void doGet(CallContext unusedContext, FlightStream unusedReader, ServerStreamListener writer) {
       try (VectorSchemaRoot root = VectorSchemaRoot.create(SCHEMA, allocator)) {
         writer.start(root);
         root.allocateNew();
@@ -493,7 +493,7 @@ private void doGet(CallContext context, FlightStream reader, ServerStreamListene
     }
 
     /** Emulate DoPut. */
-    private void doPut(CallContext context, FlightStream reader, ServerStreamListener writer) {
+    private void doPut(CallContext unusedContext, FlightStream reader, ServerStreamListener writer) {
       int counter = 0;
       while (reader.next()) {
         if (!reader.hasRoot()) {
@@ -510,7 +510,7 @@ private void doPut(CallContext context, FlightStream reader, ServerStreamListene
     }
 
     /** Exchange metadata without ever exchanging data. */
-    private void metadataOnly(CallContext context, FlightStream reader, ServerStreamListener writer) {
+    private void metadataOnly(CallContext unusedContext, FlightStream reader, ServerStreamListener writer) {
       final ArrowBuf buf = allocator.buffer(4);
       buf.writeInt(42);
       writer.putMetadata(buf);
@@ -522,7 +522,7 @@ private void metadataOnly(CallContext context, FlightStream reader, ServerStream
     }
 
     /** Echo the client's response back to it. */
-    private void echo(CallContext context, FlightStream reader, ServerStreamListener writer) {
+    private void echo(CallContext unusedContext, FlightStream reader, ServerStreamListener writer) {
       VectorSchemaRoot root = null;
       VectorLoader loader = null;
       while (reader.next()) {
@@ -555,7 +555,7 @@ private void echo(CallContext context, FlightStream reader, ServerStreamListener
     }
 
     /** Accept a set of messages, then return some result. */
-    private void transform(CallContext context, FlightStream reader, ServerStreamListener writer) {
+    private void transform(CallContext unusedContext, FlightStream reader, ServerStreamListener writer) {
       final Schema schema = reader.getSchema();
       for (final Field field : schema.getFields()) {
         if (!(field.getType() instanceof ArrowType.Int)) {
@@ -597,11 +597,11 @@ private void transform(CallContext context, FlightStream reader, ServerStreamLis
     }
 
     /** Immediately cancel the call. */
-    private void cancel(CallContext context, FlightStream reader, ServerStreamListener writer) {
+    private void cancel(CallContext unusedContext, FlightStream unusedReader, ServerStreamListener writer) {
       writer.error(CallStatus.CANCELLED.withDescription("expected").toRuntimeException());
     }
 
-    private void error(CallContext context, FlightStream reader, ServerStreamListener writer) {
+    private void error(CallContext unusedContext, FlightStream reader, ServerStreamListener writer) {
       VectorSchemaRoot root = null;
       VectorLoader loader = null;
       while (reader.next()) {
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java
index 1987d98196e9d..4ec7301466228 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestErrorMetadata.java
@@ -20,6 +20,8 @@
 import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST;
 import static org.apache.arrow.flight.Location.forGrpcInsecure;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.arrow.flight.perf.impl.PerfOuterClass;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@@ -53,7 +55,7 @@ public void testGrpcMetadata() throws Exception {
              .start();
          final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
       final CallStatus flightStatus = FlightTestUtil.assertCode(FlightStatusCode.CANCELLED, () -> {
-        FlightStream stream = client.getStream(new Ticket("abs".getBytes()));
+        FlightStream stream = client.getStream(new Ticket("abs".getBytes(StandardCharsets.UTF_8)));
         stream.next();
       });
       PerfOuterClass.Perf newPerf = null;
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java
index 9010f2d4a98f0..2569d2ac2b384 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightGrpcUtils.java
@@ -176,7 +176,7 @@ public void testProxyChannelWithClosedChannel() throws IOException, InterruptedE
   /**
    * Private class used for testing purposes that overrides service behavior.
    */
-  private class TestServiceAdapter extends TestServiceGrpc.TestServiceImplBase {
+  private static class TestServiceAdapter extends TestServiceGrpc.TestServiceImplBase {
 
     /**
      * gRPC service that receives an empty object & returns and empty protobuf object.
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
index 0e4669f29ce43..de1b7750da3bf 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
@@ -21,6 +21,7 @@
 import static org.apache.arrow.flight.Location.forGrpcInsecure;
 import static org.junit.jupiter.api.Assertions.fail;
 
+import java.nio.charset.StandardCharsets;
 import java.util.Collections;
 import java.util.Optional;
 
@@ -139,7 +140,7 @@ public void supportsNullSchemas() throws Exception
       public FlightInfo getFlightInfo(CallContext context,
               FlightDescriptor descriptor) {
         return new FlightInfo(null, descriptor, Collections.emptyList(),
-                0, 0, false, IpcOption.DEFAULT, "foo".getBytes());
+                0, 0, false, IpcOption.DEFAULT, "foo".getBytes(StandardCharsets.UTF_8));
       }
     };
 
@@ -149,7 +150,7 @@ public FlightInfo getFlightInfo(CallContext context,
       FlightInfo flightInfo = client.getInfo(FlightDescriptor.path("test"));
       Assertions.assertEquals(Optional.empty(), flightInfo.getSchemaOptional());
       Assertions.assertEquals(new Schema(Collections.emptyList()), flightInfo.getSchema());
-      Assertions.assertArrayEquals(flightInfo.getAppMetadata(), "foo".getBytes());
+      Assertions.assertArrayEquals(flightInfo.getAppMetadata(), "foo".getBytes(StandardCharsets.UTF_8));
 
       Exception e = Assertions.assertThrows(
           FlightRuntimeException.class,
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLargeMessage.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLargeMessage.java
index 1feb6afcf8f05..430dc29a7d0c2 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLargeMessage.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestLargeMessage.java
@@ -17,6 +17,7 @@
 
 package org.apache.arrow.flight;
 
+import static com.google.common.collect.ImmutableList.toImmutableList;
 import static org.apache.arrow.flight.FlightTestUtil.LOCALHOST;
 import static org.apache.arrow.flight.Location.forGrpcInsecure;
 
@@ -89,7 +90,7 @@ private static VectorSchemaRoot generateData(BufferAllocator allocator) {
     final Stream<Field> fields = fieldNames
         .stream()
         .map(fieldName -> new Field(fieldName, FieldType.nullable(new ArrowType.Int(32, true)), null));
-    final Schema schema = new Schema(fields::iterator, null);
+    final Schema schema = new Schema(fields.collect(toImmutableList()), null);
 
     final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
     root.allocateNew();
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerMiddleware.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerMiddleware.java
index 0e19468d2b409..3bc8f2f90a612 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerMiddleware.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestServerMiddleware.java
@@ -38,15 +38,13 @@
 
 public class TestServerMiddleware {
 
-  private static final RuntimeException EXPECTED_EXCEPTION = new RuntimeException("test");
-
   /**
    * Make sure errors in DoPut are intercepted.
    */
   @Test
   public void doPutErrors() {
     test(
-        new ErrorProducer(EXPECTED_EXCEPTION),
+        new ErrorProducer(new RuntimeException("test")),
         (allocator, client) -> {
           final FlightDescriptor descriptor = FlightDescriptor.path("test");
           try (final VectorSchemaRoot root = VectorSchemaRoot.create(new Schema(Collections.emptyList()), allocator)) {
@@ -91,7 +89,7 @@ public void doPutCustomCode() {
    */
   @Test
   public void doPutUncaught() {
-    test(new ServerErrorProducer(EXPECTED_EXCEPTION),
+    test(new ServerErrorProducer(new RuntimeException("test")),
         (allocator, client) -> {
           final FlightDescriptor descriptor = FlightDescriptor.path("test");
           try (final VectorSchemaRoot root = VectorSchemaRoot.create(new Schema(Collections.emptyList()), allocator)) {
@@ -106,13 +104,13 @@ public void doPutUncaught() {
           Assertions.assertEquals(FlightStatusCode.OK, status.code());
           Assertions.assertNull(status.cause());
           Assertions.assertNotNull(err);
-          Assertions.assertEquals(EXPECTED_EXCEPTION.getMessage(), err.getMessage());
+          Assertions.assertEquals("test", err.getMessage());
         });
   }
 
   @Test
   public void listFlightsUncaught() {
-    test(new ServerErrorProducer(EXPECTED_EXCEPTION),
+    test(new ServerErrorProducer(new RuntimeException("test")),
         (allocator, client) -> client.listFlights(new Criteria(new byte[0])).forEach((action) -> {
         }), (recorder) -> {
           final CallStatus status = recorder.statusFuture.get();
@@ -121,13 +119,13 @@ public void listFlightsUncaught() {
           Assertions.assertEquals(FlightStatusCode.OK, status.code());
           Assertions.assertNull(status.cause());
           Assertions.assertNotNull(err);
-          Assertions.assertEquals(EXPECTED_EXCEPTION.getMessage(), err.getMessage());
+          Assertions.assertEquals("test", err.getMessage());
         });
   }
 
   @Test
   public void doActionUncaught() {
-    test(new ServerErrorProducer(EXPECTED_EXCEPTION),
+    test(new ServerErrorProducer(new RuntimeException("test")),
         (allocator, client) -> client.doAction(new Action("test")).forEachRemaining(result -> {
         }), (recorder) -> {
           final CallStatus status = recorder.statusFuture.get();
@@ -136,13 +134,13 @@ public void doActionUncaught() {
           Assertions.assertEquals(FlightStatusCode.OK, status.code());
           Assertions.assertNull(status.cause());
           Assertions.assertNotNull(err);
-          Assertions.assertEquals(EXPECTED_EXCEPTION.getMessage(), err.getMessage());
+          Assertions.assertEquals("test", err.getMessage());
         });
   }
 
   @Test
   public void listActionsUncaught() {
-    test(new ServerErrorProducer(EXPECTED_EXCEPTION),
+    test(new ServerErrorProducer(new RuntimeException("test")),
         (allocator, client) -> client.listActions().forEach(result -> {
         }), (recorder) -> {
           final CallStatus status = recorder.statusFuture.get();
@@ -151,13 +149,13 @@ public void listActionsUncaught() {
           Assertions.assertEquals(FlightStatusCode.OK, status.code());
           Assertions.assertNull(status.cause());
           Assertions.assertNotNull(err);
-          Assertions.assertEquals(EXPECTED_EXCEPTION.getMessage(), err.getMessage());
+          Assertions.assertEquals("test", err.getMessage());
         });
   }
 
   @Test
   public void getFlightInfoUncaught() {
-    test(new ServerErrorProducer(EXPECTED_EXCEPTION),
+    test(new ServerErrorProducer(new RuntimeException("test")),
         (allocator, client) -> {
           FlightTestUtil.assertCode(FlightStatusCode.INTERNAL, () -> client.getInfo(FlightDescriptor.path("test")));
         }, (recorder) -> {
@@ -165,13 +163,13 @@ public void getFlightInfoUncaught() {
           Assertions.assertNotNull(status);
           Assertions.assertEquals(FlightStatusCode.INTERNAL, status.code());
           Assertions.assertNotNull(status.cause());
-          Assertions.assertEquals(EXPECTED_EXCEPTION.getMessage(), status.cause().getMessage());
+          Assertions.assertEquals(new RuntimeException("test").getMessage(), status.cause().getMessage());
         });
   }
 
   @Test
   public void doGetUncaught() {
-    test(new ServerErrorProducer(EXPECTED_EXCEPTION),
+    test(new ServerErrorProducer(new RuntimeException("test")),
         (allocator, client) -> {
           try (final FlightStream stream = client.getStream(new Ticket(new byte[0]))) {
             while (stream.next()) {
@@ -186,7 +184,7 @@ public void doGetUncaught() {
           Assertions.assertEquals(FlightStatusCode.OK, status.code());
           Assertions.assertNull(status.cause());
           Assertions.assertNotNull(err);
-          Assertions.assertEquals(EXPECTED_EXCEPTION.getMessage(), err.getMessage());
+          Assertions.assertEquals("test", err.getMessage());
         });
   }
 
@@ -305,7 +303,7 @@ static class ServerMiddlewarePair<T extends FlightServerMiddleware> {
     final FlightServerMiddleware.Key<T> key;
     final FlightServerMiddleware.Factory<T> factory;
 
-    ServerMiddlewarePair(Key<T> key, Factory<T> factory) {
+    ServerMiddlewarePair(FlightServerMiddleware.Key<T> key, FlightServerMiddleware.Factory<T> factory) {
       this.key = key;
       this.factory = factory;
     }
@@ -339,7 +337,7 @@ static void test(FlightProducer producer, BiConsumer<BufferAllocator, FlightClie
       ErrorConsumer<ErrorRecorder> verify) {
     final ErrorRecorder.Factory factory = new ErrorRecorder.Factory();
     final List<ServerMiddlewarePair<ErrorRecorder>> middleware = Collections
-        .singletonList(new ServerMiddlewarePair<>(Key.of("m"), factory));
+        .singletonList(new ServerMiddlewarePair<>(FlightServerMiddleware.Key.of("m"), factory));
     test(producer, middleware, (allocator, client) -> {
       body.accept(allocator, client);
       try {
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestTls.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestTls.java
index 0f6697a8e519c..75bc5f6e61589 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestTls.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestTls.java
@@ -27,7 +27,6 @@
 import java.util.Iterator;
 import java.util.function.Consumer;
 
-import org.apache.arrow.flight.FlightClient.Builder;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.junit.jupiter.api.Assertions;
@@ -105,7 +104,7 @@ public void connectTlsDisableServerVerification() {
     });
   }
 
-  void test(Consumer<Builder> testFn) {
+  void test(Consumer<FlightClient.Builder> testFn) {
     final FlightTestUtil.CertKeyPair certKey = FlightTestUtil.exampleTlsCerts().get(0);
     try (
         BufferAllocator a = new RootAllocator(Long.MAX_VALUE);
@@ -113,7 +112,8 @@ void test(Consumer<Builder> testFn) {
         FlightServer s = FlightServer.builder(a, forGrpcInsecure(LOCALHOST, 0), producer)
             .useTls(certKey.cert, certKey.key)
             .build().start()) {
-      final Builder builder = FlightClient.builder(a, Location.forGrpcTls(FlightTestUtil.LOCALHOST, s.getPort()));
+      final FlightClient.Builder builder = FlightClient.builder(a, Location.forGrpcTls(FlightTestUtil.LOCALHOST,
+              s.getPort()));
       testFn.accept(builder);
     } catch (InterruptedException | IOException e) {
       throw new RuntimeException(e);
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/TestCookieHandling.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/TestCookieHandling.java
index 4b8a11870dab6..d34a3a2d3a51e 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/TestCookieHandling.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/client/TestCookieHandling.java
@@ -251,6 +251,7 @@ public ClientCookieMiddleware onCallStarted(CallInfo info) {
 
   private void startServerAndClient() throws IOException {
     final FlightProducer flightProducer = new NoOpFlightProducer() {
+      @Override
       public void listFlights(CallContext context, Criteria criteria,
                               StreamListener<FlightInfo> listener) {
         listener.onCompleted();
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java
index 0ded2f7065f9c..b1e83ea61ed53 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/PerformanceTestServer.java
@@ -22,6 +22,7 @@
 import java.util.List;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
 import org.apache.arrow.flight.BackpressureStrategy;
 import org.apache.arrow.flight.FlightDescriptor;
@@ -48,10 +49,7 @@
 
 public class PerformanceTestServer implements AutoCloseable {
 
-  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(PerformanceTestServer.class);
-
   private final FlightServer flightServer;
-  private final Location location;
   private final BufferAllocator allocator;
   private final PerfProducer producer;
   private final boolean isNonBlocking;
@@ -78,7 +76,6 @@ public WaitResult waitForListener(long timeout) {
   public PerformanceTestServer(BufferAllocator incomingAllocator, Location location, BackpressureStrategy bpStrategy,
                                boolean isNonBlocking) {
     this.allocator = incomingAllocator.newChildAllocator("perf-server", 0, Long.MAX_VALUE);
-    this.location = location;
     this.producer = new PerfProducer(bpStrategy);
     this.flightServer = FlightServer.builder(this.allocator, location, producer).build();
     this.isNonBlocking = isNonBlocking;
@@ -110,16 +107,18 @@ public void getStream(CallContext context, Ticket ticket,
         ServerStreamListener listener) {
       bpStrategy.register(listener);
       final Runnable loadData = () -> {
-        VectorSchemaRoot root = null;
+        Token token = null;
         try {
-          Token token = Token.parseFrom(ticket.getBytes());
-          Perf perf = token.getDefinition();
-          Schema schema = Schema.deserializeMessage(perf.getSchema().asReadOnlyByteBuffer());
-          root = VectorSchemaRoot.create(schema, allocator);
-          BigIntVector a = (BigIntVector) root.getVector("a");
-          BigIntVector b = (BigIntVector) root.getVector("b");
-          BigIntVector c = (BigIntVector) root.getVector("c");
-          BigIntVector d = (BigIntVector) root.getVector("d");
+          token = Token.parseFrom(ticket.getBytes());
+        } catch (InvalidProtocolBufferException e) {
+          throw new RuntimeException(e);
+        }
+        Perf perf = token.getDefinition();
+        Schema schema = Schema.deserializeMessage(perf.getSchema().asReadOnlyByteBuffer());
+        try (
+            VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+            BigIntVector a = (BigIntVector) root.getVector("a")
+        ) {
           listener.setUseZeroCopy(true);
           listener.start(root);
           root.allocateNew();
@@ -158,14 +157,6 @@ public void getStream(CallContext context, Ticket ticket,
             listener.putNext();
           }
           listener.completed();
-        } catch (InvalidProtocolBufferException e) {
-          throw new RuntimeException(e);
-        } finally {
-          try {
-            AutoCloseables.close(root);
-          } catch (Exception e) {
-            throw new RuntimeException(e);
-          }
         }
       };
 
@@ -173,7 +164,7 @@ public void getStream(CallContext context, Ticket ticket,
         loadData.run();
       } else {
         final ExecutorService service = Executors.newSingleThreadExecutor();
-        service.submit(loadData);
+        Future<?> unused = service.submit(loadData);
         service.shutdown();
       }
     }
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java
index 17c83c205feb0..290e82de36c57 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/perf/TestPerf.java
@@ -110,14 +110,14 @@ public void throughput() throws Exception {
 
         double seconds = r.nanos * 1.0d / 1000 / 1000 / 1000;
         throughPuts[i] = (r.bytes * 1.0d / 1024 / 1024) / seconds;
-        System.out.println(String.format(
-            "Transferred %d records totaling %s bytes at %f MiB/s. %f record/s. %f batch/s.",
+        System.out.printf(
+                "Transferred %d records totaling %s bytes at %f MiB/s. %f record/s. %f batch/s.%n",
             r.rows,
             r.bytes,
             throughPuts[i],
             (r.rows * 1.0d) / seconds,
             (r.batches * 1.0d) / seconds
-        ));
+        );
       }
     }
     pool.shutdown();
@@ -126,11 +126,11 @@ public void throughput() throws Exception {
     double average = Arrays.stream(throughPuts).sum() / numRuns;
     double sqrSum = Arrays.stream(throughPuts).map(val -> val - average).map(val -> val * val).sum();
     double stddev = Math.sqrt(sqrSum / numRuns);
-    System.out.println(String.format("Average throughput: %f MiB/s, standard deviation: %f MiB/s",
-            average, stddev));
+    System.out.printf("Average throughput: %f MiB/s, standard deviation: %f MiB/s%n",
+            average, stddev);
   }
 
-  private final class Consumer implements Callable<Result> {
+  private static final class Consumer implements Callable<Result> {
 
     private final FlightClient client;
     private final Ticket ticket;
@@ -157,7 +157,7 @@ public Result call() throws Exception {
                 aSum += a.get(i);
               }
             }
-            r.bytes += rows * 32;
+            r.bytes += rows * 32L;
             r.rows += rows;
             r.aSum = aSum;
             r.batches++;
@@ -173,7 +173,7 @@ public Result call() throws Exception {
 
   }
 
-  private final class Result {
+  private static final class Result {
     private long rows;
     private long aSum;
     private long bytes;
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationTestClient.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationTestClient.java
index 2ea9874f3dec3..64b5882c0f50d 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationTestClient.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/IntegrationTestClient.java
@@ -98,14 +98,14 @@ private void run(String[] args) throws Exception {
         Scenarios.getScenario(cmd.getOptionValue("scenario")).client(allocator, defaultLocation, client);
       } else {
         final String inputPath = cmd.getOptionValue("j");
-        testStream(allocator, defaultLocation, client, inputPath);
+        testStream(allocator, client, inputPath);
       }
     } catch (InterruptedException e) {
       throw new RuntimeException(e);
     }
   }
 
-  private static void testStream(BufferAllocator allocator, Location server, FlightClient client, String inputPath)
+  private static void testStream(BufferAllocator allocator, FlightClient client, String inputPath)
       throws IOException {
     // 1. Read data from JSON and upload to server.
     FlightDescriptor descriptor = FlightDescriptor.path(inputPath);
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java
index aa1b460fc136a..183e3d5c7b055 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriver.java
@@ -43,6 +43,7 @@
 import org.apache.calcite.avatica.Meta;
 import org.apache.calcite.avatica.UnregisteredDriver;
 
+
 /**
  * JDBC driver for querying data from an Apache Arrow Flight server.
  */
@@ -99,6 +100,7 @@ protected String getFactoryClassName(final JdbcVersion jdbcVersion) {
   }
 
   @Override
+  @SuppressWarnings("StringSplitter")
   protected DriverVersion createDriverVersion() {
     if (version == null) {
       final InputStream flightProperties = this.getClass().getResourceAsStream("/properties/flight.properties");
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightMetaImpl.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightMetaImpl.java
index 382750914992f..d25f03ac27b48 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightMetaImpl.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/ArrowFlightMetaImpl.java
@@ -255,11 +255,6 @@ private static final class StatementHandleKey {
     public final String connectionId;
     public final int id;
 
-    StatementHandleKey(String connectionId, int id) {
-      this.connectionId = connectionId;
-      this.id = id;
-    }
-
     StatementHandleKey(StatementHandle statementHandle) {
       this.connectionId = statementHandle.connectionId;
       this.id = statementHandle.id;
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessor.java
index aea9b75fa6c3f..8d2fe1cc70319 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessor.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBaseIntVectorAccessor.java
@@ -46,59 +46,57 @@ public class ArrowFlightJdbcBaseIntVectorAccessor extends ArrowFlightJdbcAccesso
 
   private final MinorType type;
   private final boolean isUnsigned;
-  private final int bytesToAllocate;
   private final Getter getter;
   private final NumericHolder holder;
 
   public ArrowFlightJdbcBaseIntVectorAccessor(UInt1Vector vector, IntSupplier currentRowSupplier,
                                               ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) {
-    this(vector, currentRowSupplier, true, UInt1Vector.TYPE_WIDTH, setCursorWasNull);
+    this(vector, currentRowSupplier, true, setCursorWasNull);
   }
 
   public ArrowFlightJdbcBaseIntVectorAccessor(UInt2Vector vector, IntSupplier currentRowSupplier,
                                               ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) {
-    this(vector, currentRowSupplier, true, UInt2Vector.TYPE_WIDTH, setCursorWasNull);
+    this(vector, currentRowSupplier, true, setCursorWasNull);
   }
 
   public ArrowFlightJdbcBaseIntVectorAccessor(UInt4Vector vector, IntSupplier currentRowSupplier,
                                               ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) {
-    this(vector, currentRowSupplier, true, UInt4Vector.TYPE_WIDTH, setCursorWasNull);
+    this(vector, currentRowSupplier, true, setCursorWasNull);
   }
 
   public ArrowFlightJdbcBaseIntVectorAccessor(UInt8Vector vector, IntSupplier currentRowSupplier,
                                               ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) {
-    this(vector, currentRowSupplier, true, UInt8Vector.TYPE_WIDTH, setCursorWasNull);
+    this(vector, currentRowSupplier, true, setCursorWasNull);
   }
 
   public ArrowFlightJdbcBaseIntVectorAccessor(TinyIntVector vector, IntSupplier currentRowSupplier,
                                               ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) {
-    this(vector, currentRowSupplier, false, TinyIntVector.TYPE_WIDTH, setCursorWasNull);
+    this(vector, currentRowSupplier, false, setCursorWasNull);
   }
 
   public ArrowFlightJdbcBaseIntVectorAccessor(SmallIntVector vector, IntSupplier currentRowSupplier,
                                               ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) {
-    this(vector, currentRowSupplier, false, SmallIntVector.TYPE_WIDTH, setCursorWasNull);
+    this(vector, currentRowSupplier, false, setCursorWasNull);
   }
 
   public ArrowFlightJdbcBaseIntVectorAccessor(IntVector vector, IntSupplier currentRowSupplier,
                                               ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) {
-    this(vector, currentRowSupplier, false, IntVector.TYPE_WIDTH, setCursorWasNull);
+    this(vector, currentRowSupplier, false, setCursorWasNull);
   }
 
   public ArrowFlightJdbcBaseIntVectorAccessor(BigIntVector vector, IntSupplier currentRowSupplier,
                                               ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) {
-    this(vector, currentRowSupplier, false, BigIntVector.TYPE_WIDTH, setCursorWasNull);
+    this(vector, currentRowSupplier, false, setCursorWasNull);
   }
 
   private ArrowFlightJdbcBaseIntVectorAccessor(BaseIntVector vector, IntSupplier currentRowSupplier,
-                                               boolean isUnsigned, int bytesToAllocate,
-                                               ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) {
+                                               boolean isUnsigned,
+      ArrowFlightJdbcAccessorFactory.WasNullConsumer setCursorWasNull) {
     super(currentRowSupplier, setCursorWasNull);
     this.type = vector.getMinorType();
     this.holder = new NumericHolder();
     this.getter = createGetter(vector);
     this.isUnsigned = isUnsigned;
-    this.bytesToAllocate = bytesToAllocate;
   }
 
   @Override
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessor.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessor.java
index f55fd12f9a517..67d98c2e69847 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessor.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessor.java
@@ -32,7 +32,6 @@ public class ArrowFlightJdbcBitVectorAccessor extends ArrowFlightJdbcAccessor {
 
   private final BitVector vector;
   private final NullableBitHolder holder;
-  private static final int BYTES_T0_ALLOCATE = 1;
 
   /**
    * Constructor for the BitVectorAccessor.
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java
index 6237a8b58d68a..e95cf00bc7a21 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ArrowFlightConnectionConfigImpl.java
@@ -33,6 +33,7 @@
 import org.apache.calcite.avatica.ConnectionConfigImpl;
 import org.apache.calcite.avatica.ConnectionProperty;
 
+
 /**
  * A {@link ConnectionConfig} for the {@link ArrowFlightConnection}.
  */
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapper.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapper.java
index 5ee43ce012e94..c28071490caa6 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapper.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConnectionWrapper.java
@@ -248,13 +248,13 @@ public PreparedStatement prepareStatement(final String sqlQuery, final int autoG
   }
 
   @Override
-  public PreparedStatement prepareStatement(final String sqlQuery, final int... columnIndices)
+  public PreparedStatement prepareStatement(final String sqlQuery, final int[] columnIndices)
       throws SQLException {
     return realConnection.prepareStatement(sqlQuery, columnIndices);
   }
 
   @Override
-  public PreparedStatement prepareStatement(final String sqlQuery, final String... columnNames)
+  public PreparedStatement prepareStatement(final String sqlQuery, final String[] columnNames)
       throws SQLException {
     return realConnection.prepareStatement(sqlQuery, columnNames);
   }
@@ -306,12 +306,12 @@ public Properties getClientInfo() throws SQLException {
   }
 
   @Override
-  public Array createArrayOf(final String typeName, final Object... elements) throws SQLException {
+  public Array createArrayOf(final String typeName, final Object[] elements) throws SQLException {
     return realConnection.createArrayOf(typeName, elements);
   }
 
   @Override
-  public Struct createStruct(final String typeName, final Object... attributes)
+  public Struct createStruct(final String typeName, final Object[] attributes)
       throws SQLException {
     return realConnection.createStruct(typeName, attributes);
   }
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java
index b21b03340e9f9..843fe0cb89d9f 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java
@@ -49,7 +49,6 @@
 import org.apache.calcite.avatica.AvaticaParameter;
 import org.apache.calcite.avatica.ColumnMetaData;
 import org.apache.calcite.avatica.proto.Common;
-import org.apache.calcite.avatica.proto.Common.ColumnMetaData.Builder;
 
 /**
  * Convert objects between Arrow and Avatica.
@@ -71,7 +70,7 @@ public static List<ColumnMetaData> convertArrowFieldsToColumnMetaDataList(final
           final Field field = fields.get(index);
           final ArrowType fieldType = field.getType();
 
-          final Builder builder = Common.ColumnMetaData.newBuilder()
+          final Common.ColumnMetaData.Builder builder = Common.ColumnMetaData.newBuilder()
               .setOrdinal(index)
               .setColumnName(field.getName())
               .setLabel(field.getName());
@@ -90,10 +89,10 @@ public static List<ColumnMetaData> convertArrowFieldsToColumnMetaDataList(final
   /**
    * Set on Column MetaData Builder.
    *
-   * @param builder     {@link Builder}
+   * @param builder     {@link Common.ColumnMetaData.Builder}
    * @param metadataMap {@link Map}
    */
-  public static void setOnColumnMetaDataBuilder(final Builder builder,
+  public static void setOnColumnMetaDataBuilder(final Common.ColumnMetaData.Builder builder,
                                                 final Map<String, String> metadataMap) {
     final FlightSqlColumnMetadata columnMetadata = new FlightSqlColumnMetadata(metadataMap);
     final String catalogName = columnMetadata.getCatalogName();
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java
index e52251f53918a..64255e2213a1a 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/UrlParser.java
@@ -21,10 +21,10 @@
 import java.net.URLDecoder;
 import java.util.HashMap;
 import java.util.Map;
-
 /**
  * URL Parser for extracting key values from a connection string.
  */
+
 public final class UrlParser {
   private UrlParser() {
   }
@@ -37,6 +37,7 @@ private UrlParser() {
    * @param url {@link String}
    * @return {@link Map}
    */
+  @SuppressWarnings("StringSplitter")
   public static Map<String, String> parse(String url, String separator) {
     Map<String, String> resultMap = new HashMap<>();
     if (url != null) {
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArrayTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArrayTest.java
index 90c926612f15a..76f01514c9501 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArrayTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcArrayTest.java
@@ -140,7 +140,7 @@ public void testShouldGetResultSetReturnValidResultSetWithOffsets() throws SQLEx
         Assert.assertEquals((Object) resultSet.getInt(1), dataVector.getObject(count + 3));
         count++;
       }
-      Assert.assertEquals(count, 5);
+      Assert.assertEquals(5, count);
     }
   }
 
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java
index 784fd5b292b27..e1f64c9dd8732 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ArrowFlightJdbcDriverTest.java
@@ -181,6 +181,8 @@ public void testConnectWithInsensitiveCasePropertyKeys2() throws Exception {
   /**
    * Tests whether an exception is thrown upon attempting to connect to a
    * malformed URI.
+   *
+   * @throws SQLException If an error occurs.
    */
   @Test(expected = SQLException.class)
   public void testShouldThrowExceptionWhenAttemptingToConnectToMalformedUrl() throws SQLException {
@@ -194,7 +196,7 @@ public void testShouldThrowExceptionWhenAttemptingToConnectToMalformedUrl() thro
    * Tests whether an exception is thrown upon attempting to connect to a
    * malformed URI.
    *
-   * @throws Exception If an error occurs.
+   * @throws SQLException If an error occurs.
    */
   @Test(expected = SQLException.class)
   public void testShouldThrowExceptionWhenAttemptingToConnectToUrlNoPrefix() throws SQLException {
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionMutualTlsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionMutualTlsTest.java
index cc44cc57be9b3..03f15d77ade11 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionMutualTlsTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ConnectionMutualTlsTest.java
@@ -50,8 +50,6 @@ public class ConnectionMutualTlsTest {
   @ClassRule
   public static final FlightServerTestRule FLIGHT_SERVER_TEST_RULE;
   private static final String tlsRootCertsPath;
-
-  private static final String serverMTlsCACertPath;
   private static final String clientMTlsCertPath;
   private static final String badClientMTlsCertPath;
   private static final String clientMTlsKeyPath;
@@ -68,8 +66,6 @@ public class ConnectionMutualTlsTest {
 
     final File serverMTlsCACert = FlightSqlTestCertificates.exampleCACert();
 
-    serverMTlsCACertPath = serverMTlsCACert.getPath();
-
     final FlightSqlTestCertificates.CertKeyPair
         clientMTlsCertKey = FlightSqlTestCertificates.exampleTlsCerts().get(1);
 
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java
index 231371a923a28..0e3e015a04636 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java
@@ -94,10 +94,6 @@ private static void resultSetNextUntilDone(ResultSet resultSet) throws SQLExcept
     }
   }
 
-  private static void setMaxRowsLimit(int maxRowsLimit, Statement statement) throws SQLException {
-    statement.setLargeMaxRows(maxRowsLimit);
-  }
-
   /**
    * Tests whether the {@link ArrowFlightJdbcDriver} can run a query successfully.
    *
@@ -411,9 +407,9 @@ public void testPartitionedFlightServer() throws Exception {
 
       // Construct the data-only nodes first.
       FlightProducer firstProducer = new PartitionedFlightSqlProducer.DataOnlyFlightSqlProducer(
-          new Ticket("first".getBytes()), firstPartition);
+          new Ticket("first".getBytes(StandardCharsets.UTF_8)), firstPartition);
       FlightProducer secondProducer = new PartitionedFlightSqlProducer.DataOnlyFlightSqlProducer(
-          new Ticket("second".getBytes()), secondPartition);
+          new Ticket("second".getBytes(StandardCharsets.UTF_8)), secondPartition);
 
       final FlightServer.Builder firstBuilder = FlightServer.builder(
           allocator, forGrpcInsecure("localhost", 0), firstProducer);
@@ -427,10 +423,10 @@ public void testPartitionedFlightServer() throws Exception {
         firstServer.start();
         secondServer.start();
         final FlightEndpoint firstEndpoint =
-            new FlightEndpoint(new Ticket("first".getBytes()), firstServer.getLocation());
+            new FlightEndpoint(new Ticket("first".getBytes(StandardCharsets.UTF_8)), firstServer.getLocation());
 
         final FlightEndpoint secondEndpoint =
-            new FlightEndpoint(new Ticket("second".getBytes()), secondServer.getLocation());
+            new FlightEndpoint(new Ticket("second".getBytes(StandardCharsets.UTF_8)), secondServer.getLocation());
 
         // Finally start the root node.
         try (final PartitionedFlightSqlProducer rootProducer = new PartitionedFlightSqlProducer(
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorTest.java
index 099b0122179f1..a9b5c46e01e9b 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/ArrowFlightJdbcAccessorTest.java
@@ -123,7 +123,7 @@ public void testShouldGetObjectWithBooleanClassReturnGetBoolean() throws SQLExce
 
     when(accessor.getObject(Boolean.class)).thenCallRealMethod();
 
-    Assert.assertEquals(accessor.getObject(Boolean.class), true);
+    Assert.assertEquals(true, accessor.getObject(Boolean.class));
     verify(accessor).getBoolean();
   }
 
@@ -134,7 +134,7 @@ public void testShouldGetObjectWithBigDecimalClassReturnGetBigDecimal() throws S
 
     when(accessor.getObject(BigDecimal.class)).thenCallRealMethod();
 
-    Assert.assertEquals(accessor.getObject(BigDecimal.class), expected);
+    Assert.assertEquals(expected, accessor.getObject(BigDecimal.class));
     verify(accessor).getBigDecimal();
   }
 
@@ -145,7 +145,7 @@ public void testShouldGetObjectWithStringClassReturnGetString() throws SQLExcept
 
     when(accessor.getObject(String.class)).thenCallRealMethod();
 
-    Assert.assertEquals(accessor.getObject(String.class), expected);
+    Assert.assertEquals(expected, accessor.getObject(String.class));
     verify(accessor).getString();
   }
 
@@ -167,7 +167,7 @@ public void testShouldGetObjectWithObjectClassReturnGetObject() throws SQLExcept
 
     when(accessor.getObject(Object.class)).thenCallRealMethod();
 
-    Assert.assertEquals(accessor.getObject(Object.class), expected);
+    Assert.assertEquals(expected, accessor.getObject(Object.class));
     verify(accessor).getObject();
   }
 
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessorTest.java
index 38d842724b9c1..e2c17b2f085ae 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessorTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/calendar/ArrowFlightJdbcTimeStampVectorAccessorTest.java
@@ -179,7 +179,7 @@ public void testShouldGetTimestampReturnValidTimestampWithCalendar() throws Exce
       final Timestamp resultWithoutCalendar = accessor.getTimestamp(null);
       final Timestamp result = accessor.getTimestamp(calendar);
 
-      long offset = timeZone.getOffset(resultWithoutCalendar.getTime()) -
+      long offset = (long) timeZone.getOffset(resultWithoutCalendar.getTime()) -
           timeZoneForVector.getOffset(resultWithoutCalendar.getTime());
 
       collector.checkThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset));
@@ -212,7 +212,7 @@ public void testShouldGetDateReturnValidDateWithCalendar() throws Exception {
       final Date resultWithoutCalendar = accessor.getDate(null);
       final Date result = accessor.getDate(calendar);
 
-      long offset = timeZone.getOffset(resultWithoutCalendar.getTime()) -
+      long offset = (long) timeZone.getOffset(resultWithoutCalendar.getTime()) -
           timeZoneForVector.getOffset(resultWithoutCalendar.getTime());
 
       collector.checkThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset));
@@ -245,7 +245,7 @@ public void testShouldGetTimeReturnValidTimeWithCalendar() throws Exception {
       final Time resultWithoutCalendar = accessor.getTime(null);
       final Time result = accessor.getTime(calendar);
 
-      long offset = timeZone.getOffset(resultWithoutCalendar.getTime()) -
+      long offset = (long) timeZone.getOffset(resultWithoutCalendar.getTime()) -
           timeZoneForVector.getOffset(resultWithoutCalendar.getTime());
 
       collector.checkThat(resultWithoutCalendar.getTime() - result.getTime(), is(offset));
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListAccessorTest.java
index b2eb8f1dbee8f..e958fb60ba41e 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListAccessorTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/AbstractArrowFlightJdbcListAccessorTest.java
@@ -114,7 +114,7 @@ public void testShouldGetObjectReturnValidList() throws Exception {
     accessorIterator.assertAccessorGetter(vector,
         AbstractArrowFlightJdbcListVectorAccessor::getObject,
         (accessor, currentRow) -> equalTo(
-            Arrays.asList(0, (currentRow), (currentRow) * 2, (currentRow) * 3, (currentRow) * 4)));
+            Arrays.asList(0, currentRow, currentRow * 2, currentRow * 3, currentRow * 4)));
   }
 
   @Test
@@ -137,7 +137,7 @@ public void testShouldGetArrayReturnValidArray() throws Exception {
       Object[] arrayObject = (Object[]) array.getArray();
 
       collector.checkThat(arrayObject, equalTo(
-          new Object[] {0, currentRow, (currentRow) * 2, (currentRow) * 3, (currentRow) * 4}));
+          new Object[] {0, currentRow, currentRow * 2, currentRow * 3, currentRow * 4}));
     });
   }
 
@@ -161,7 +161,7 @@ public void testShouldGetArrayReturnValidArrayPassingOffsets() throws Exception
       Object[] arrayObject = (Object[]) array.getArray(1, 3);
 
       collector.checkThat(arrayObject, equalTo(
-          new Object[] {currentRow, (currentRow) * 2, (currentRow) * 3}));
+          new Object[] {currentRow, currentRow * 2, currentRow * 3}));
     });
   }
 
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessorTest.java
index b3c85fc0ab1f3..735fe9f40ba0e 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessorTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/complex/ArrowFlightJdbcStructVectorAccessorTest.java
@@ -202,8 +202,8 @@ public void testShouldGetObjectWorkWithNestedComplexData() throws SQLException {
           new ArrowFlightJdbcStructVectorAccessor(rootVector, () -> 0, (boolean wasNull) -> {
           });
 
-      Assert.assertEquals(accessor.getObject(), expected);
-      Assert.assertEquals(accessor.getString(), expected.toString());
+      Assert.assertEquals(expected, accessor.getObject());
+      Assert.assertEquals(expected.toString(), accessor.getString());
     }
   }
 }
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessorTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessorTest.java
index 809d6e8d35386..00537bfa028e9 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessorTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/accessor/impl/numeric/ArrowFlightJdbcBitVectorAccessorTest.java
@@ -68,7 +68,7 @@ private <T> void iterate(final CheckedFunction<ArrowFlightJdbcBitVectorAccessor,
                            final T result,
                            final T resultIfFalse, final BitVector vector) throws Exception {
     accessorIterator.assertAccessorGetter(vector, function,
-        ((accessor, currentRow) -> is(arrayToAssert[currentRow] ? result : resultIfFalse))
+        (accessor, currentRow) -> is(arrayToAssert[currentRow] ? result : resultIfFalse)
     );
   }
 
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java
index c165bfb7ce336..52a397edab18f 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/MockFlightSqlProducer.java
@@ -159,7 +159,7 @@ public void addSelectQuery(final String sqlCommand, final Schema schema,
    * @param updatedRows the number of rows affected.
    */
   public void addUpdateQuery(final String sqlCommand, final long updatedRows) {
-    addUpdateQuery(sqlCommand, ((flightStream, putResultStreamListener) -> {
+    addUpdateQuery(sqlCommand, (flightStream, putResultStreamListener) -> {
       final DoPutUpdateResult result =
           DoPutUpdateResult.newBuilder().setRecordCount(updatedRows).build();
       try (final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
@@ -171,7 +171,7 @@ public void addUpdateQuery(final String sqlCommand, final long updatedRows) {
       } finally {
         putResultStreamListener.onCompleted();
       }
-    }));
+    });
   }
 
   /**
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/RootAllocatorTestRule.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/RootAllocatorTestRule.java
index a200fc8d39c15..fd8fb57fcafde 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/RootAllocatorTestRule.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/RootAllocatorTestRule.java
@@ -18,6 +18,7 @@
 package org.apache.arrow.driver.jdbc.utils;
 
 import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
 import java.util.Random;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.IntStream;
@@ -456,9 +457,9 @@ public VarBinaryVector createVarBinaryVector() {
   public VarBinaryVector createVarBinaryVector(final String fieldName) {
     VarBinaryVector valueVector = new VarBinaryVector(fieldName, this.getRootAllocator());
     valueVector.allocateNew(3);
-    valueVector.setSafe(0, (fieldName + "__BINARY_DATA_0001").getBytes());
-    valueVector.setSafe(1, (fieldName + "__BINARY_DATA_0002").getBytes());
-    valueVector.setSafe(2, (fieldName + "__BINARY_DATA_0003").getBytes());
+    valueVector.setSafe(0, (fieldName + "__BINARY_DATA_0001").getBytes(StandardCharsets.UTF_8));
+    valueVector.setSafe(1, (fieldName + "__BINARY_DATA_0002").getBytes(StandardCharsets.UTF_8));
+    valueVector.setSafe(2, (fieldName + "__BINARY_DATA_0003").getBytes(StandardCharsets.UTF_8));
     valueVector.setValueCount(3);
 
     return valueVector;
@@ -472,9 +473,9 @@ public VarBinaryVector createVarBinaryVector(final String fieldName) {
   public LargeVarBinaryVector createLargeVarBinaryVector() {
     LargeVarBinaryVector valueVector = new LargeVarBinaryVector("", this.getRootAllocator());
     valueVector.allocateNew(3);
-    valueVector.setSafe(0, "BINARY_DATA_0001".getBytes());
-    valueVector.setSafe(1, "BINARY_DATA_0002".getBytes());
-    valueVector.setSafe(2, "BINARY_DATA_0003".getBytes());
+    valueVector.setSafe(0, "BINARY_DATA_0001".getBytes(StandardCharsets.UTF_8));
+    valueVector.setSafe(1, "BINARY_DATA_0002".getBytes(StandardCharsets.UTF_8));
+    valueVector.setSafe(2, "BINARY_DATA_0003".getBytes(StandardCharsets.UTF_8));
     valueVector.setValueCount(3);
 
     return valueVector;
@@ -488,9 +489,9 @@ public LargeVarBinaryVector createLargeVarBinaryVector() {
   public FixedSizeBinaryVector createFixedSizeBinaryVector() {
     FixedSizeBinaryVector valueVector = new FixedSizeBinaryVector("", this.getRootAllocator(), 16);
     valueVector.allocateNew(3);
-    valueVector.setSafe(0, "BINARY_DATA_0001".getBytes());
-    valueVector.setSafe(1, "BINARY_DATA_0002".getBytes());
-    valueVector.setSafe(2, "BINARY_DATA_0003".getBytes());
+    valueVector.setSafe(0, "BINARY_DATA_0001".getBytes(StandardCharsets.UTF_8));
+    valueVector.setSafe(1, "BINARY_DATA_0002".getBytes(StandardCharsets.UTF_8));
+    valueVector.setSafe(2, "BINARY_DATA_0003".getBytes(StandardCharsets.UTF_8));
     valueVector.setValueCount(3);
 
     return valueVector;
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ThrowableAssertionUtils.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ThrowableAssertionUtils.java
index f1bd44539ac58..48334dc0f92e2 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ThrowableAssertionUtils.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/ThrowableAssertionUtils.java
@@ -25,7 +25,7 @@ public class ThrowableAssertionUtils {
   private ThrowableAssertionUtils() {
   }
 
-  public static <T extends Throwable> void simpleAssertThrowableClass(
+  public static void simpleAssertThrowableClass(
       final Class<? extends Throwable> expectedThrowable, final ThrowingRunnable runnable) {
     try {
       runnable.run();
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
index e2d79129c1fc9..dbe39ab1d07b4 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
@@ -433,7 +433,7 @@ default void cancelFlightInfo(CancelFlightInfoRequest request, CallContext conte
    * @param info     The FlightInfo of the query to cancel.
    * @param context  Per-call context.
    * @param listener Whether cancellation succeeded.
-   * @deprecated Prefer {@link #cancelFlightInfo(FlightInfo, CallContext, StreamListener)}.
+   * @deprecated Prefer {@link #cancelFlightInfo(CancelFlightInfoRequest, CallContext, StreamListener)}.
    */
   @Deprecated
   default void cancelQuery(FlightInfo info, CallContext context, StreamListener<CancelResult> listener) {
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
index 251a709f63965..338a60e2ae6df 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
@@ -17,7 +17,6 @@
 
 package org.apache.arrow.flight.sql;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
 import static java.util.stream.IntStream.range;
 import static org.apache.arrow.flight.FlightProducer.ServerStreamListener;
 import static org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedTransaction;
@@ -81,7 +80,7 @@ public class SqlInfoBuilder {
    * @return a new {@link NullableVarCharHolder} with the provided input data {@code string}.
    */
   public static NullableVarCharHolder getHolderForUtf8(final String string, final ArrowBuf buf) {
-    final byte[] bytes = string.getBytes(UTF_8);
+    final byte[] bytes = string.getBytes(StandardCharsets.UTF_8);
     buf.setBytes(0, bytes);
     final NullableVarCharHolder holder = new NullableVarCharHolder();
     holder.buffer = buf;
@@ -1051,7 +1050,7 @@ private void setDataVarCharListField(final VectorSchemaRoot root, final int inde
     final int length = values.length;
     range(0, length)
         .forEach(i -> onCreateArrowBuf(buf -> {
-          final byte[] bytes = values[i].getBytes(UTF_8);
+          final byte[] bytes = values[i].getBytes(StandardCharsets.UTF_8);
           buf.setBytes(0, bytes);
           writer.writeVarChar(0, bytes.length, buf);
         }));
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
index 11f38ded5fcdd..1d43728b789f5 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
@@ -69,6 +69,7 @@
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.function.BiConsumer;
 import java.util.function.Consumer;
@@ -382,6 +383,7 @@ private static <T extends FieldVector> int saveToVectors(final Map<T, String> ve
     return saveToVectors(vectorToColumnName, data, emptyToNull, alwaysTrue);
   }
 
+  @SuppressWarnings("StringSplitter")
   private static <T extends FieldVector> int saveToVectors(final Map<T, String> vectorToColumnName,
                                                            final ResultSet data, boolean emptyToNull,
                                                            Predicate<ResultSet> resultSetPredicate)
@@ -512,7 +514,7 @@ private static VectorSchemaRoot getTypeInfoRoot(CommandGetXdbcTypeInfo request,
         }
       };
     } else {
-      predicate = (resultSet -> true);
+      predicate = resultSet -> true;
     }
 
     int rows = saveToVectors(mapper, typeInfo, true, predicate);
@@ -685,7 +687,7 @@ public void getStreamPreparedStatement(final CommandPreparedStatementQuery comma
   public void closePreparedStatement(final ActionClosePreparedStatementRequest request, final CallContext context,
                                      final StreamListener<Result> listener) {
     // Running on another thread
-    executorService.submit(() -> {
+    Future<?> unused = executorService.submit(() -> {
       try {
         preparedStatementLoadingCache.invalidate(request.getPreparedStatementHandle());
       } catch (final Exception e) {
@@ -774,7 +776,7 @@ public void listFlights(CallContext context, Criteria criteria, StreamListener<F
   public void createPreparedStatement(final ActionCreatePreparedStatementRequest request, final CallContext context,
                                       final StreamListener<Result> listener) {
     // Running on another thread
-    executorService.submit(() -> {
+    Future<?> unused = executorService.submit(() -> {
       try {
         final ByteString preparedStatementHandle = copyFrom(randomUUID().toString().getBytes(UTF_8));
         // Ownership of the connection will be passed to the context. Do NOT close!
diff --git a/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java b/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java
index c49b04c855846..1201d0f760524 100644
--- a/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java
+++ b/java/tools/src/main/java/org/apache/arrow/tools/FileRoundtrip.java
@@ -43,11 +43,9 @@
 public class FileRoundtrip {
   private static final Logger LOGGER = LoggerFactory.getLogger(FileRoundtrip.class);
   private final Options options;
-  private final PrintStream out;
   private final PrintStream err;
 
-  FileRoundtrip(PrintStream out, PrintStream err) {
-    this.out = out;
+  FileRoundtrip(PrintStream err) {
     this.err = err;
     this.options = new Options();
     this.options.addOption("i", "in", true, "input file");
@@ -56,7 +54,7 @@ public class FileRoundtrip {
   }
 
   public static void main(String[] args) {
-    System.exit(new FileRoundtrip(System.out, System.err).run(args));
+    System.exit(new FileRoundtrip(System.err).run(args));
   }
 
   private File validateFile(String type, String fileName) {
diff --git a/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java b/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java
index 178a0834fa44f..1bc7ead7b73bb 100644
--- a/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java
+++ b/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java
@@ -35,7 +35,6 @@
 import org.apache.arrow.vector.ipc.ArrowFileReader;
 import org.apache.arrow.vector.ipc.ArrowFileWriter;
 import org.apache.arrow.vector.ipc.message.ArrowBlock;
-import org.apache.arrow.vector.types.pojo.Schema;
 import org.junit.Assert;
 
 public class ArrowFileTestFixtures {
@@ -63,7 +62,6 @@ static void validateOutput(File testOutFile, BufferAllocator allocator) throws E
          ArrowFileReader arrowReader = new ArrowFileReader(fileInputStream.getChannel(),
              readerAllocator)) {
       VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
-      Schema schema = root.getSchema();
       for (ArrowBlock rbBlock : arrowReader.getRecordBlocks()) {
         if (!arrowReader.loadRecordBatch(rbBlock)) {
           throw new IOException("Expected to read record batch");
diff --git a/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java b/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java
index 714cb416bf996..9cf893ee5c283 100644
--- a/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java
+++ b/java/tools/src/test/java/org/apache/arrow/tools/EchoServerTest.java
@@ -142,7 +142,6 @@ public void basicTest() throws InterruptedException, IOException {
         Collections.<Field>emptyList());
     TinyIntVector vector =
         new TinyIntVector("testField", FieldType.nullable(TINYINT.getType()), alloc);
-    Schema schema = new Schema(asList(field));
 
     // Try an empty stream, just the header.
     testEchoServer(serverPort, field, vector, 0);
diff --git a/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java b/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java
index ddac6f79384d9..a5d6c9658fd4f 100644
--- a/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java
+++ b/java/tools/src/test/java/org/apache/arrow/tools/TestFileRoundtrip.java
@@ -56,7 +56,7 @@ public void test() throws Exception {
     writeInput(testInFile, allocator);
 
     String[] args = {"-i", testInFile.getAbsolutePath(), "-o", testOutFile.getAbsolutePath()};
-    int result = new FileRoundtrip(System.out, System.err).run(args);
+    int result = new FileRoundtrip(System.err).run(args);
     assertEquals(0, result);
 
     validateOutput(testOutFile, allocator);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
index 90229460111c3..c456c625389ba 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
@@ -110,7 +110,7 @@ public String getName() {
    */
   @Override
   public long getValidityBufferAddress() {
-    return (validityBuffer.memoryAddress());
+    return validityBuffer.memoryAddress();
   }
 
   /**
@@ -120,7 +120,7 @@ public long getValidityBufferAddress() {
    */
   @Override
   public long getDataBufferAddress() {
-    return (valueBuffer.memoryAddress());
+    return valueBuffer.memoryAddress();
   }
 
   /**
@@ -298,6 +298,7 @@ public boolean allocateNewSafe() {
    * @param valueCount the desired number of elements in the vector
    * @throws org.apache.arrow.memory.OutOfMemoryException on error
    */
+  @Override
   public void allocateNew(int valueCount) {
     computeAndCheckBufferSize(valueCount);
 
@@ -521,6 +522,7 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers
    *
    * @return the inner buffers.
    */
+  @Override
   public List<ArrowBuf> getFieldBuffers() {
     List<ArrowBuf> result = new ArrayList<>(2);
     setReaderAndWriterIndex();
@@ -597,6 +599,7 @@ public TransferPair getTransferPair(BufferAllocator allocator) {
    * @param allocator allocator for the target vector
    * @return TransferPair
    */
+  @Override
   public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator);
 
   /**
@@ -605,6 +608,7 @@ public TransferPair getTransferPair(BufferAllocator allocator) {
    * @param allocator allocator for the target vector
    * @return TransferPair
    */
+  @Override
   public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator);
 
   /**
@@ -911,6 +915,7 @@ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
    *
    * @param index position of element
    */
+  @Override
   public void setNull(int index) {
     handleSafe(index);
     // not really needed to set the bit to 0 as long as
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
index a77278138f28c..c239edbcc3c29 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
@@ -228,6 +228,7 @@ private void initOffsetBuffer() {
    * Reset the vector to initial state. Same as {@link #zeroVector()}.
    * Note that this method doesn't release any memory.
    */
+  @Override
   public void reset() {
     zeroVector();
     lastSet = -1;
@@ -318,6 +319,7 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers
    * Get the buffers belonging to this vector.
    * @return the inner buffers.
    */
+  @Override
   public List<ArrowBuf> getFieldBuffers() {
     // before flight/IPC, we must bring the vector to a consistent state.
     // this is because, it is possible that the offset buffers of some trailing values
@@ -471,6 +473,7 @@ private void allocateValidityBuffer(final long size) {
    * Resize the vector to increase the capacity. The internal behavior is to
    * double the current value capacity.
    */
+  @Override
   public void reAlloc() {
     reallocDataBuffer();
     reallocValidityAndOffsetBuffers();
@@ -691,6 +694,7 @@ public TransferPair getTransferPair(BufferAllocator allocator) {
    * @param allocator allocator for the target vector
    * @return TransferPair
    */
+  @Override
   public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator);
 
   /**
@@ -699,6 +703,7 @@ public TransferPair getTransferPair(BufferAllocator allocator) {
    * @param allocator allocator for the target vector
    * @return TransferPair
    */
+  @Override
   public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator);
 
   /**
@@ -835,6 +840,7 @@ private void splitAndTransferValidityBuffer(int startIndex, int length,
    *
    * @return the number of null elements.
    */
+  @Override
   public int getNullCount() {
     return BitVectorHelper.getNullCount(validityBuffer, valueCount);
   }
@@ -856,6 +862,7 @@ public boolean isSafe(int index) {
    * @param index  position of element
    * @return true if element at given index is null
    */
+  @Override
   public boolean isNull(int index) {
     return (isSet(index) == 0);
   }
@@ -879,6 +886,7 @@ public int isSet(int index) {
    *
    * @return valueCount for the vector
    */
+  @Override
   public int getValueCount() {
     return valueCount;
   }
@@ -888,6 +896,7 @@ public int getValueCount() {
    *
    * @param valueCount   value count
    */
+  @Override
   public void setValueCount(int valueCount) {
     assert valueCount >= 0;
     this.valueCount = valueCount;
@@ -1091,6 +1100,7 @@ public void setSafe(int index, ByteBuffer value, int start, int length) {
    *
    * @param index   position of element
    */
+  @Override
   public void setNull(int index) {
     // We need to check and realloc both validity and offset buffer
     while (index >= getValueCapacity()) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
index 679e5d06c016e..070919c356791 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
@@ -28,15 +28,12 @@
 import org.apache.arrow.vector.util.DataSizeRoundingUtil;
 import org.apache.arrow.vector.util.TransferPair;
 import org.apache.arrow.vector.util.ValueVectorUtility;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * Base class for other Arrow Vector Types.  Provides basic functionality around
  * memory management.
  */
 public abstract class BaseValueVector implements ValueVector {
-  private static final Logger logger = LoggerFactory.getLogger(BaseValueVector.class);
 
   public static final String MAX_ALLOCATION_SIZE_PROPERTY = "arrow.vector.max_allocation_bytes";
   public static final long MAX_ALLOCATION_SIZE = Long.getLong(MAX_ALLOCATION_SIZE_PROPERTY, Long.MAX_VALUE);
@@ -160,6 +157,7 @@ long computeCombinedBufferSize(int valueCount, int typeWidth) {
    *
    * @return Concrete instance of FieldReader by using double-checked locking.
    */
+  @Override
   public FieldReader getReader() {
     FieldReader reader = fieldReader;
 
@@ -178,7 +176,7 @@ public FieldReader getReader() {
   /**
    * Container for primitive vectors (1 for the validity bit-mask and one to hold the values).
    */
-  class DataAndValidityBuffers {
+  static class DataAndValidityBuffers {
     private ArrowBuf dataBuf;
     private ArrowBuf validityBuf;
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index 46bc9815f037a..4cf495a349f02 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -247,6 +247,7 @@ private void initOffsetBuffer() {
    * Reset the vector to initial state. Same as {@link #zeroVector()}.
    * Note that this method doesn't release any memory.
    */
+  @Override
   public void reset() {
     zeroVector();
     lastSet = -1;
@@ -337,6 +338,7 @@ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers
    * Get the buffers belonging to this vector.
    * @return the inner buffers.
    */
+  @Override
   public List<ArrowBuf> getFieldBuffers() {
     // before flight/IPC, we must bring the vector to a consistent state.
     // this is because, it is possible that the offset buffers of some trailing values
@@ -493,6 +495,7 @@ private void allocateValidityBuffer(final long size) {
    * Resize the vector to increase the capacity. The internal behavior is to
    * double the current value capacity.
    */
+  @Override
   public void reAlloc() {
     reallocDataBuffer();
     reallocValidityAndOffsetBuffers();
@@ -732,6 +735,7 @@ public TransferPair getTransferPair(BufferAllocator allocator) {
    * @param allocator allocator for the target vector
    * @return TransferPair
    */
+  @Override
   public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator);
 
   /**
@@ -740,6 +744,7 @@ public TransferPair getTransferPair(BufferAllocator allocator) {
    * @param allocator allocator for the target vector
    * @return TransferPair
    */
+  @Override
   public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator);
 
   /**
@@ -796,7 +801,8 @@ private void splitAndTransferOffsetBuffer(int startIndex, int length, BaseVariab
     final int dataLength = end - start;
 
     if (start == 0) {
-      final ArrowBuf slicedOffsetBuffer = offsetBuffer.slice(startIndex * OFFSET_WIDTH, (1 + length) * OFFSET_WIDTH);
+      final ArrowBuf slicedOffsetBuffer = offsetBuffer.slice(startIndex * ((long) OFFSET_WIDTH),
+          (1 + length) * ((long) OFFSET_WIDTH));
       target.offsetBuffer = transferBuffer(slicedOffsetBuffer, target.allocator);
     } else {
       target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH);
@@ -883,6 +889,7 @@ private void splitAndTransferValidityBuffer(int startIndex, int length,
    *
    * @return the number of null elements.
    */
+  @Override
   public int getNullCount() {
     return BitVectorHelper.getNullCount(validityBuffer, valueCount);
   }
@@ -904,6 +911,7 @@ public boolean isSafe(int index) {
    * @param index  position of element
    * @return true if element at given index is null
    */
+  @Override
   public boolean isNull(int index) {
     return (isSet(index) == 0);
   }
@@ -927,6 +935,7 @@ public int isSet(int index) {
    *
    * @return valueCount for the vector
    */
+  @Override
   public int getValueCount() {
     return valueCount;
   }
@@ -936,6 +945,7 @@ public int getValueCount() {
    *
    * @param valueCount   value count
    */
+  @Override
   public void setValueCount(int valueCount) {
     assert valueCount >= 0;
     this.valueCount = valueCount;
@@ -1016,7 +1026,7 @@ public void setValueLengthSafe(int index, int length) {
     handleSafe(index, length);
     fillHoles(index);
     final int startOffset = getStartOffset(index);
-    offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length);
+    offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + length);
     lastSet = index;
   }
 
@@ -1119,7 +1129,7 @@ public void set(int index, ByteBuffer value, int start, int length) {
     fillHoles(index);
     BitVectorHelper.setBit(validityBuffer, index);
     final int startOffset = getStartOffset(index);
-    offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length);
+    offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + length);
     valueBuffer.setBytes(startOffset, value, start, length);
     lastSet = index;
   }
@@ -1140,7 +1150,7 @@ public void setSafe(int index, ByteBuffer value, int start, int length) {
     fillHoles(index);
     BitVectorHelper.setBit(validityBuffer, index);
     final int startOffset = getStartOffset(index);
-    offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length);
+    offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + length);
     valueBuffer.setBytes(startOffset, value, start, length);
     lastSet = index;
   }
@@ -1150,6 +1160,7 @@ public void setSafe(int index, ByteBuffer value, int start, int length) {
    *
    * @param index   position of element
    */
+  @Override
   public void setNull(int index) {
     // We need to check and realloc both validity and offset buffer
     while (index >= getValueCapacity()) {
@@ -1174,7 +1185,7 @@ public void set(int index, int isSet, int start, int end, ArrowBuf buffer) {
     fillHoles(index);
     BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
     final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
-    offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+    offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength);
     valueBuffer.setBytes(startOffset, buffer, start, dataLength);
     lastSet = index;
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java
index b0052e7e33009..095d98aa265fe 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java
@@ -129,6 +129,7 @@ public void get(int index, NullableBigIntHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Long getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
index 104819147b109..a34df8cf6f68b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
@@ -105,7 +105,7 @@ public MinorType getMinorType() {
   @Override
   public void setInitialCapacity(int valueCount) {
     final int size = getValidityBufferSizeFromCount(valueCount);
-    if (size * 2 > MAX_ALLOCATION_SIZE) {
+    if (size * 2L > MAX_ALLOCATION_SIZE) {
       throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
     }
     lastValueCapacity = valueCount;
@@ -149,15 +149,14 @@ public int getBufferSize() {
    * @param length     length of the split.
    * @param target     destination vector
    */
+  @Override
   public void splitAndTransferTo(int startIndex, int length, BaseFixedWidthVector target) {
     Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
         "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
     compareTypes(target, "splitAndTransferTo");
     target.clear();
-    target.validityBuffer = splitAndTransferBuffer(startIndex, length, target,
-            validityBuffer, target.validityBuffer);
-    target.valueBuffer = splitAndTransferBuffer(startIndex, length, target,
-            valueBuffer, target.valueBuffer);
+    target.validityBuffer = splitAndTransferBuffer(startIndex, length, validityBuffer, target.validityBuffer);
+    target.valueBuffer = splitAndTransferBuffer(startIndex, length, valueBuffer, target.valueBuffer);
     target.refreshValueCapacity();
 
     target.setValueCount(length);
@@ -166,7 +165,6 @@ public void splitAndTransferTo(int startIndex, int length, BaseFixedWidthVector
   private ArrowBuf splitAndTransferBuffer(
       int startIndex,
       int length,
-      BaseFixedWidthVector target,
       ArrowBuf sourceBuffer,
       ArrowBuf destBuffer) {
     int firstByteSource = BitVectorHelper.byteIndex(startIndex);
@@ -276,11 +274,12 @@ public void get(int index, NullableBitHolder holder) {
    * @param index position of element
    * @return element at given index
    */
+  @Override
   public Boolean getObject(int index) {
     if (isSet(index) == 0) {
       return null;
     } else {
-      return new Boolean(getBit(index) != 0);
+      return getBit(index) != 0;
     }
   }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
index 09c874e398022..9725693348a48 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
@@ -144,7 +144,7 @@ public boolean equals(Object obj) {
     if (obj == null) {
       return false;
     }
-    if (getClass() != obj.getClass()) {
+    if (!(obj instanceof BufferLayout)) {
       return false;
     }
     BufferLayout other = (BufferLayout) obj;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
index c99c5786058b7..13645d3b26004 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
@@ -131,6 +131,7 @@ public void get(int index, NullableDateDayHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Integer getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
index 6ab8ac4eed229..1333fb0adcefa 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
@@ -133,6 +133,7 @@ public void get(int index, NullableDateMilliHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public LocalDateTime getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
index fe650c7d28074..931c4eea0afb1 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
@@ -151,6 +151,7 @@ public void get(int index, NullableDecimal256Holder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public BigDecimal getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
index 7c3662c86748b..eefcee837f719 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
@@ -150,6 +150,7 @@ public void get(int index, NullableDecimalHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public BigDecimal getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
index b6abc16194b77..636afef1e9f7b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
@@ -143,6 +143,7 @@ public void get(int index, NullableDurationHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Duration getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java
index 4b56a22f2d0c4..46f9447be2938 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java
@@ -131,6 +131,7 @@ public void get(int index, NullableFloat4Holder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Float getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java
index 7e4fae7087ba5..840f9d4ba087b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java
@@ -131,6 +131,7 @@ public void get(int index, NullableFloat8Holder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Double getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java b/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
index efebfd83543d7..6cda18a8a53d3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
@@ -108,8 +108,8 @@ private static void writeTimeStampData(TimeStampVector vector, int valueCount) {
   }
 
   private static void writeDecimalData(DecimalVector vector, int valueCount) {
-    final BigDecimal even = new BigDecimal(0.0543278923);
-    final BigDecimal odd = new BigDecimal(2.0543278923);
+    final BigDecimal even = new BigDecimal("0.0543278923");
+    final BigDecimal odd = new BigDecimal("2.0543278923");
     for (int i = 0; i < valueCount; i++) {
       if (i % 2 == 0) {
         vector.setSafe(i, even);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java
index 5c8ef440e8ea4..08ead148af312 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java
@@ -131,6 +131,7 @@ public void get(int index, NullableIntHolder holder) {
    * @param index position of element
    * @return element at given index
    */
+  @Override
   public Integer getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java
index 7c0d19baa9a6f..f53eb37138dcb 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java
@@ -164,6 +164,7 @@ public void get(int index, NullableIntervalDayHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Duration getObject(int index) {
     if (isSet(index) == 0) {
       return null;
@@ -206,23 +207,23 @@ private StringBuilder getAsStringBuilderHelper(int index) {
     final int days = valueBuffer.getInt(startIndex);
     int millis = valueBuffer.getInt(startIndex + MILLISECOND_OFFSET);
 
-    final int hours = millis / (org.apache.arrow.vector.util.DateUtility.hoursToMillis);
-    millis = millis % (org.apache.arrow.vector.util.DateUtility.hoursToMillis);
+    final int hours = millis / org.apache.arrow.vector.util.DateUtility.hoursToMillis;
+    millis = millis % org.apache.arrow.vector.util.DateUtility.hoursToMillis;
 
-    final int minutes = millis / (org.apache.arrow.vector.util.DateUtility.minutesToMillis);
-    millis = millis % (org.apache.arrow.vector.util.DateUtility.minutesToMillis);
+    final int minutes = millis / org.apache.arrow.vector.util.DateUtility.minutesToMillis;
+    millis = millis % org.apache.arrow.vector.util.DateUtility.minutesToMillis;
 
-    final int seconds = millis / (org.apache.arrow.vector.util.DateUtility.secondsToMillis);
-    millis = millis % (org.apache.arrow.vector.util.DateUtility.secondsToMillis);
+    final int seconds = millis / org.apache.arrow.vector.util.DateUtility.secondsToMillis;
+    millis = millis % org.apache.arrow.vector.util.DateUtility.secondsToMillis;
 
     final String dayString = (Math.abs(days) == 1) ? " day " : " days ";
 
-    return (new StringBuilder()
+    return new StringBuilder()
             .append(days).append(dayString)
             .append(hours).append(":")
             .append(minutes).append(":")
             .append(seconds).append(".")
-            .append(millis));
+            .append(millis);
   }
 
   /*----------------------------------------------------------------*
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java
index fc0aa9d27b1c3..716af6fec9cd8 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java
@@ -186,6 +186,7 @@ public void get(int index, NullableIntervalMonthDayNanoHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public PeriodDuration getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java b/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
index 7fe572f3ff1f8..c5f384604aa83 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
@@ -147,6 +147,7 @@ public void get(int index, NullableIntervalYearHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Period getObject(int index) {
     if (isSet(index) == 0) {
       return null;
@@ -181,11 +182,11 @@ private StringBuilder getAsStringBuilderHelper(int index) {
     final String yearString = (Math.abs(years) == 1) ? " year " : " years ";
     final String monthString = (Math.abs(months) == 1) ? " month " : " months ";
 
-    return (new StringBuilder()
+    return new StringBuilder()
         .append(years)
         .append(yearString)
         .append(months)
-        .append(monthString));
+        .append(monthString);
   }
 
   /*----------------------------------------------------------------*
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
index 0750f68f4f716..8560ba3a68b04 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
@@ -131,6 +131,7 @@ public void read(int index, ReusableBuffer<?> buffer) {
    * @param index   position of element to get
    * @return byte array for non-null element, null otherwise
    */
+  @Override
   public byte[] getObject(int index) {
     return get(index);
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
index 6f08fcb81fee1..df424c87488a0 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
@@ -121,6 +121,7 @@ public byte[] get(int index) {
    * @param index   position of element to get
    * @return Text object for non-null element, null otherwise
    */
+  @Override
   public Text getObject(int index) {
     assert index >= 0;
     if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java
index 518ee707396ea..37a6fe110401e 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java
@@ -131,6 +131,7 @@ public void get(int index, NullableSmallIntHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Short getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
index 86738cd221ec4..c463dc36336c8 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
@@ -131,6 +131,7 @@ public void get(int index, NullableTimeMicroHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Long getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java
index 480add91097bb..1e745d9b9923b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java
@@ -133,6 +133,7 @@ public void get(int index, NullableTimeMilliHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public LocalDateTime getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java
index 82609cdc446ed..426e865a5c18b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java
@@ -131,6 +131,7 @@ public void get(int index, NullableTimeNanoHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Long getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java
index 9b7614e55b6e8..c760ed29e04e6 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java
@@ -131,6 +131,7 @@ public void get(int index, NullableTimeSecHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Integer getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java
index a37b444d1a368..b515f8e2c83c0 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java
@@ -133,6 +133,7 @@ public void get(int index, NullableTimeStampMicroTZHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Long getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java
index 88ce27a187ebc..2f65921f22b26 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java
@@ -119,6 +119,7 @@ public void get(int index, NullableTimeStampMicroHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public LocalDateTime getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java
index 775594ceea640..d0293099432a9 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java
@@ -133,6 +133,7 @@ public void get(int index, NullableTimeStampMilliTZHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Long getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java
index a42773269f8b5..96440fd5ac3f7 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java
@@ -119,6 +119,7 @@ public void get(int index, NullableTimeStampMilliHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public LocalDateTime getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java
index af43cf6fc9b64..f93ec9b24c43a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java
@@ -133,6 +133,7 @@ public void get(int index, NullableTimeStampNanoTZHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Long getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java
index 7b02b1c87d3fb..723e62f8d6e02 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java
@@ -119,6 +119,7 @@ public void get(int index, NullableTimeStampNanoHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public LocalDateTime getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java
index 1e249140335d2..2de01fd52e457 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java
@@ -119,6 +119,7 @@ public void get(int index, NullableTimeStampSecHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public LocalDateTime getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java b/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java
index 4c4eee1342ff0..e9ea59298d093 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java
@@ -131,6 +131,7 @@ public void get(int index, NullableTinyIntHolder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Byte getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
index 60fe2a6a6ee63..ae465418cf2fd 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
@@ -55,7 +55,7 @@
 public class TypeLayout {
 
   /**
-   * Constructs a new {@TypeLayout} for the given <code>arrowType</code>.
+   * Constructs a new {@link TypeLayout} for the given <code>arrowType</code>.
    */
   public static TypeLayout getTypeLayout(final ArrowType arrowType) {
     TypeLayout layout = arrowType.accept(new ArrowTypeVisitor<TypeLayout>() {
@@ -421,6 +421,7 @@ public List<BufferType> getBufferTypes() {
     return types;
   }
 
+  @Override
   public String toString() {
     return bufferLayouts.toString();
   }
@@ -438,7 +439,7 @@ public boolean equals(Object obj) {
     if (obj == null) {
       return false;
     }
-    if (getClass() != obj.getClass()) {
+    if (!(obj instanceof TypeLayout)) {
       return false;
     }
     TypeLayout other = (TypeLayout) obj;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
index 777df3fb1efe7..fcb04eaf08821 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
@@ -136,6 +136,7 @@ public void get(int index, NullableUInt1Holder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Byte getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
index e5b95be191df1..a9708a4faa9a7 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
@@ -127,6 +127,7 @@ public void get(int index, NullableUInt2Holder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Character getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
index bda98b12005ce..f9bed0c013a2a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
@@ -136,6 +136,7 @@ public void get(int index, NullableUInt4Holder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Integer getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java
index 5e7c18902f0ae..a3e16b5e30dde 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java
@@ -136,6 +136,7 @@ public void get(int index, NullableUInt8Holder holder) {
    * @param index   position of element
    * @return element at given index
    */
+  @Override
   public Long getObject(int index) {
     if (isSet(index) == 0) {
       return null;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
index 87790c1168bd0..ab67ebad965aa 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
@@ -132,6 +132,7 @@ public void read(int index, ReusableBuffer<?> buffer) {
    * @param index   position of element to get
    * @return byte array for non-null element, null otherwise
    */
+  @Override
   public byte[] getObject(int index) {
     return get(index);
   }
@@ -176,7 +177,7 @@ public void set(int index, VarBinaryHolder holder) {
     BitVectorHelper.setBit(validityBuffer, index);
     final int dataLength = holder.end - holder.start;
     final int startOffset = getStartOffset(index);
-    offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+    offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength);
     valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
     lastSet = index;
   }
@@ -196,7 +197,7 @@ public void setSafe(int index, VarBinaryHolder holder) {
     fillHoles(index);
     BitVectorHelper.setBit(validityBuffer, index);
     final int startOffset = getStartOffset(index);
-    offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+    offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength);
     valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
     lastSet = index;
   }
@@ -215,10 +216,10 @@ public void set(int index, NullableVarBinaryHolder holder) {
     final int startOffset = getStartOffset(index);
     if (holder.isSet != 0) {
       final int dataLength = holder.end - holder.start;
-      offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+      offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength);
       valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
     } else {
-      offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset);
+      offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset);
     }
     lastSet = index;
   }
@@ -238,7 +239,7 @@ public void setSafe(int index, NullableVarBinaryHolder holder) {
       handleSafe(index, dataLength);
       fillHoles(index);
       final int startOffset = getStartOffset(index);
-      offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+      offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength);
       valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
     } else {
       fillEmpties(index + 1);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
index 7350dc99bbda8..c6d5a7090bc6f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
@@ -118,6 +118,7 @@ public byte[] get(int index) {
    * @param index   position of element to get
    * @return Text object for non-null element, null otherwise
    */
+  @Override
   public Text getObject(int index) {
     assert index >= 0;
     if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
@@ -182,7 +183,7 @@ public void set(int index, VarCharHolder holder) {
     BitVectorHelper.setBit(validityBuffer, index);
     final int dataLength = holder.end - holder.start;
     final int startOffset = getStartOffset(index);
-    offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+    offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength);
     valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
     lastSet = index;
   }
@@ -203,7 +204,7 @@ public void setSafe(int index, VarCharHolder holder) {
 
     BitVectorHelper.setBit(validityBuffer, index);
     final int startOffset = getStartOffset(index);
-    offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+    offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength);
     valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
     lastSet = index;
   }
@@ -222,10 +223,10 @@ public void set(int index, NullableVarCharHolder holder) {
     final int startOffset = getStartOffset(index);
     if (holder.isSet != 0) {
       final int dataLength = holder.end - holder.start;
-      offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+      offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength);
       valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
     } else {
-      offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset);
+      offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset);
     }
     lastSet = index;
   }
@@ -245,7 +246,7 @@ public void setSafe(int index, NullableVarCharHolder holder) {
       handleSafe(index, dataLength);
       fillHoles(index);
       final int startOffset = getStartOffset(index);
-      offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+      offsetBuffer.setInt((index + 1) * ((long) OFFSET_WIDTH), startOffset + dataLength);
       valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
     } else {
       fillEmpties(index + 1);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
index 898bfe3d39780..8e6cdb6c45bc5 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
@@ -55,6 +55,7 @@ public void allocateNew() throws OutOfMemoryException {
     }
   }
 
+  @Override
   public BufferAllocator getAllocator() {
     return allocator;
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
index 797a5af31f9a4..80efea6cbe39e 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
@@ -21,6 +21,7 @@
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Locale;
 import java.util.stream.Collectors;
 
 import org.apache.arrow.memory.ArrowBuf;
@@ -56,7 +57,7 @@ public abstract class AbstractStructVector extends AbstractContainerVector {
     }
     ConflictPolicy conflictPolicy;
     try {
-      conflictPolicy = ConflictPolicy.valueOf(conflictPolicyStr.toUpperCase());
+      conflictPolicy = ConflictPolicy.valueOf(conflictPolicyStr.toUpperCase(Locale.ROOT));
     } catch (Exception e) {
       conflictPolicy = ConflictPolicy.CONFLICT_REPLACE;
     }
@@ -172,6 +173,7 @@ public void reAlloc() {
    * @return resultant {@link org.apache.arrow.vector.ValueVector}
    * @throws java.lang.IllegalStateException raised if there is a hard schema change
    */
+  @Override
   public <T extends FieldVector> T addOrGet(String childName, FieldType fieldType, Class<T> clazz) {
     final ValueVector existing = getChild(childName);
     boolean create = false;
@@ -411,7 +413,7 @@ public int getBufferSize() {
 
     for (final ValueVector v : vectors.values()) {
       for (final ArrowBuf buf : v.getBuffers(false)) {
-        actualBufSize += buf.writerIndex();
+        actualBufSize += (int) buf.writerIndex();
       }
     }
     return actualBufSize;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
index 95deceb4e75ca..8ba2e48dc2fa3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
@@ -54,7 +54,7 @@ public abstract class BaseRepeatedValueVector extends BaseValueVector implements
   public static final byte OFFSET_WIDTH = 4;
   protected ArrowBuf offsetBuffer;
   protected FieldVector vector;
-  protected final CallBack callBack;
+  protected final CallBack repeatedCallBack;
   protected int valueCount;
   protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH;
   private final String name;
@@ -70,7 +70,7 @@ protected BaseRepeatedValueVector(String name, BufferAllocator allocator, FieldV
     this.name = name;
     this.offsetBuffer = allocator.getEmpty();
     this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null");
-    this.callBack = callBack;
+    this.repeatedCallBack = callBack;
     this.valueCount = 0;
   }
 
@@ -123,7 +123,7 @@ protected void reallocOffsetBuffer() {
     }
 
     newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
-    newAllocationSize = Math.min(newAllocationSize, (long) (OFFSET_WIDTH) * Integer.MAX_VALUE);
+    newAllocationSize = Math.min(newAllocationSize, (long) OFFSET_WIDTH * Integer.MAX_VALUE);
     assert newAllocationSize >= 1;
 
     if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) {
@@ -157,7 +157,7 @@ public FieldVector getDataVector() {
 
   @Override
   public void setInitialCapacity(int numRecords) {
-    offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH;
+    offsetAllocationSizeInBytes = (numRecords + 1L) * OFFSET_WIDTH;
     if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) {
       vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD);
     } else {
@@ -194,7 +194,7 @@ public void setInitialCapacity(int numRecords, double density) {
       throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
     }
 
-    offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH;
+    offsetAllocationSizeInBytes = (numRecords + 1L) * OFFSET_WIDTH;
 
     int innerValueCapacity = Math.max((int) (numRecords * density), 1);
 
@@ -222,7 +222,7 @@ public void setInitialCapacity(int numRecords, double density) {
    *                              for in this vector across all records.
    */
   public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) {
-    offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH;
+    offsetAllocationSizeInBytes = (numRecords + 1L) * OFFSET_WIDTH;
     vector.setInitialCapacity(totalNumberOfElements);
   }
 
@@ -313,13 +313,13 @@ public int size() {
   public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
     boolean created = false;
     if (vector instanceof NullVector) {
-      vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, callBack);
+      vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, repeatedCallBack);
       // returned vector must have the same field
       created = true;
-      if (callBack != null &&
+      if (repeatedCallBack != null &&
               // not a schema change if changing from ZeroVector to ZeroVector
               (fieldType.getType().getTypeID() != ArrowTypeID.Null)) {
-        callBack.doWork();
+        repeatedCallBack.doWork();
       }
     }
 
@@ -355,6 +355,7 @@ public int getInnerValueCountAt(int index) {
   }
 
   /** Return if value at index is null (this implementation is always false). */
+  @Override
   public boolean isNull(int index) {
     return false;
   }
@@ -376,6 +377,7 @@ public int startNewValue(int index) {
   }
 
   /** Preallocates the number of repeated values. */
+  @Override
   public void setValueCount(int valueCount) {
     this.valueCount = valueCount;
     while (valueCount > getOffsetBufferValueCapacity()) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
index 367335436aecd..48b53d7de2e3f 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
@@ -234,11 +234,9 @@ public boolean allocateNewSafe() {
     } finally {
       if (!success) {
         clear();
-        return false;
       }
     }
-
-    return true;
+    return success;
   }
 
   private void allocateValidityBuffer(final long size) {
@@ -257,12 +255,12 @@ public void reAlloc() {
 
   private void reallocValidityBuffer() {
     final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
-    long newAllocationSize = currentBufferCapacity * 2;
+    long newAllocationSize = currentBufferCapacity * 2L;
     if (newAllocationSize == 0) {
       if (validityAllocationSizeInBytes > 0) {
         newAllocationSize = validityAllocationSizeInBytes;
       } else {
-        newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2;
+        newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L;
       }
     }
 
@@ -273,7 +271,7 @@ private void reallocValidityBuffer() {
       throw new OversizedAllocationException("Unable to expand the buffer");
     }
 
-    final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
+    final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
     newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
     newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
     validityBuffer.getReferenceManager().release(1);
@@ -468,6 +466,7 @@ public List<?> getObject(int index) {
   /**
    * Returns whether the value at index null.
    */
+  @Override
   public boolean isNull(int index) {
     return (isSet(index) == 0);
   }
@@ -503,6 +502,7 @@ private int getValidityBufferValueCapacity() {
   /**
    * Sets the value at index to null.  Reallocates if index is larger than capacity.
    */
+  @Override
   public void setNull(int index) {
     while (index >= getValidityBufferValueCapacity()) {
       reallocValidityBuffer();
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
index 312bed6ab3349..b934cbd81db16 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
@@ -194,7 +194,7 @@ public void setInitialCapacity(int numRecords, double density) {
       throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
     }
 
-    offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH;
+    offsetAllocationSizeInBytes = (numRecords + 1L) * OFFSET_WIDTH;
 
     int innerValueCapacity = Math.max((int) (numRecords * density), 1);
 
@@ -222,7 +222,7 @@ public void setInitialCapacity(int numRecords, double density) {
    *                              for in this vector across all records.
    */
   public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) {
-    offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH;
+    offsetAllocationSizeInBytes = (numRecords + 1L) * OFFSET_WIDTH;
     vector.setInitialCapacity(totalNumberOfElements);
   }
 
@@ -332,6 +332,7 @@ public void allocateNew() throws OutOfMemoryException {
    *
    * @return false if memory allocation fails, true otherwise.
    */
+  @Override
   public boolean allocateNewSafe() {
     boolean success = false;
     try {
@@ -347,7 +348,7 @@ public boolean allocateNewSafe() {
       } catch (Exception e) {
         e.printStackTrace();
         clear();
-        return false;
+        success = false;
       } finally {
         if (!dataAlloc) {
           clear();
@@ -357,10 +358,9 @@ public boolean allocateNewSafe() {
     } finally {
       if (!success) {
         clear();
-        return false;
       }
     }
-    return true;
+    return success;
   }
 
   private void allocateValidityBuffer(final long size) {
@@ -408,7 +408,7 @@ protected void reallocOffsetBuffer() {
     }
 
     newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
-    newAllocationSize = Math.min(newAllocationSize, (long) (OFFSET_WIDTH) * Integer.MAX_VALUE);
+    newAllocationSize = Math.min(newAllocationSize, (long) OFFSET_WIDTH * Integer.MAX_VALUE);
     assert newAllocationSize >= 1;
 
     if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) {
@@ -425,12 +425,12 @@ protected void reallocOffsetBuffer() {
 
   private void reallocValidityBuffer() {
     final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
-    long newAllocationSize = currentBufferCapacity * 2;
+    long newAllocationSize = currentBufferCapacity * 2L;
     if (newAllocationSize == 0) {
       if (validityAllocationSizeInBytes > 0) {
         newAllocationSize = validityAllocationSizeInBytes;
       } else {
-        newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2;
+        newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L;
       }
     }
     newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
@@ -440,7 +440,7 @@ private void reallocValidityBuffer() {
       throw new OversizedAllocationException("Unable to expand the buffer");
     }
 
-    final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
+    final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
     newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
     newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
     validityBuffer.getReferenceManager().release(1);
@@ -526,7 +526,7 @@ public TransferPair makeTransferPair(ValueVector target) {
 
   @Override
   public long getValidityBufferAddress() {
-    return (validityBuffer.memoryAddress());
+    return validityBuffer.memoryAddress();
   }
 
   @Override
@@ -536,7 +536,7 @@ public long getDataBufferAddress() {
 
   @Override
   public long getOffsetBufferAddress() {
-    return (offsetBuffer.memoryAddress());
+    return offsetBuffer.memoryAddress();
   }
 
   @Override
@@ -754,6 +754,7 @@ public UnionLargeListReader getReader() {
    * Initialize the data vector (and execute callback) if it hasn't already been done,
    * returns the data vector.
    */
+  @Override
   public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
     boolean created = false;
     if (vector instanceof NullVector) {
@@ -988,6 +989,7 @@ public void setNotNull(int index) {
    * Sets list at index to be null.
    * @param index position in vector
    */
+  @Override
   public void setNull(int index) {
     while (index >= getValidityAndOffsetValueCapacity()) {
       reallocValidityAndOffsetBuffers();
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
index e5a83921b3135..5154ac17279c5 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -291,6 +291,7 @@ public void allocateNew() throws OutOfMemoryException {
    *
    * @return false if memory allocation fails, true otherwise.
    */
+  @Override
   public boolean allocateNewSafe() {
     boolean success = false;
     try {
@@ -303,10 +304,9 @@ public boolean allocateNewSafe() {
     } finally {
       if (!success) {
         clear();
-        return false;
       }
     }
-    return true;
+    return success;
   }
 
   protected void allocateValidityBuffer(final long size) {
@@ -336,12 +336,23 @@ protected void reallocValidityAndOffsetBuffers() {
 
   private void reallocValidityBuffer() {
     final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
-    long newAllocationSize = currentBufferCapacity * 2;
+    long newAllocationSize = getNewAllocationSize(currentBufferCapacity);
+
+    final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+    newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+    validityBuffer.getReferenceManager().release(1);
+    validityBuffer = newBuf;
+    validityAllocationSizeInBytes = (int) newAllocationSize;
+  }
+
+  private long getNewAllocationSize(int currentBufferCapacity) {
+    long newAllocationSize = currentBufferCapacity * 2L;
     if (newAllocationSize == 0) {
       if (validityAllocationSizeInBytes > 0) {
         newAllocationSize = validityAllocationSizeInBytes;
       } else {
-        newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2;
+        newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L;
       }
     }
     newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
@@ -350,13 +361,7 @@ private void reallocValidityBuffer() {
     if (newAllocationSize > MAX_ALLOCATION_SIZE) {
       throw new OversizedAllocationException("Unable to expand the buffer");
     }
-
-    final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
-    newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
-    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
-    validityBuffer.getReferenceManager().release(1);
-    validityBuffer = newBuf;
-    validityAllocationSizeInBytes = (int) newAllocationSize;
+    return newAllocationSize;
   }
 
   /**
@@ -425,7 +430,7 @@ public TransferPair makeTransferPair(ValueVector target) {
 
   @Override
   public long getValidityBufferAddress() {
-    return (validityBuffer.memoryAddress());
+    return validityBuffer.memoryAddress();
   }
 
   @Override
@@ -435,7 +440,7 @@ public long getDataBufferAddress() {
 
   @Override
   public long getOffsetBufferAddress() {
-    return (offsetBuffer.memoryAddress());
+    return offsetBuffer.memoryAddress();
   }
 
   @Override
@@ -625,6 +630,7 @@ public UnionListReader getReader() {
   }
 
   /** Initialize the child data vector to field type.  */
+  @Override
   public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
     AddOrGetResult<T> result = super.addOrGetVector(fieldType);
     invalidateReader();
@@ -837,6 +843,7 @@ public void setNotNull(int index) {
    * Sets list at index to be null.
    * @param index position in vector
    */
+  @Override
   public void setNull(int index) {
     while (index >= getValidityAndOffsetValueCapacity()) {
       reallocValidityAndOffsetBuffers();
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
index 27db1574808a3..9d0dc5ca3fd15 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
@@ -495,10 +495,9 @@ public boolean allocateNewSafe() {
     } finally {
       if (!success) {
         clear();
-        return false;
       }
     }
-    return true;
+    return success;
   }
 
   private void allocateValidityBuffer(final long size) {
@@ -518,12 +517,23 @@ public void reAlloc() {
 
   private void reallocValidityBuffer() {
     final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
-    long newAllocationSize = currentBufferCapacity * 2;
+    long newAllocationSize = getNewAllocationSize(currentBufferCapacity);
+
+    final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+    newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+    validityBuffer.getReferenceManager().release(1);
+    validityBuffer = newBuf;
+    validityAllocationSizeInBytes = (int) newAllocationSize;
+  }
+
+  private long getNewAllocationSize(int currentBufferCapacity) {
+    long newAllocationSize = currentBufferCapacity * 2L;
     if (newAllocationSize == 0) {
       if (validityAllocationSizeInBytes > 0) {
         newAllocationSize = validityAllocationSizeInBytes;
       } else {
-        newAllocationSize = BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION) * 2;
+        newAllocationSize = BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION) * 2L;
       }
     }
     newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
@@ -532,13 +542,7 @@ private void reallocValidityBuffer() {
     if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
       throw new OversizedAllocationException("Unable to expand the buffer");
     }
-
-    final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
-    newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
-    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
-    validityBuffer.getReferenceManager().release(1);
-    validityBuffer = newBuf;
-    validityAllocationSizeInBytes = (int) newAllocationSize;
+    return newAllocationSize;
   }
 
   @Override
@@ -607,6 +611,7 @@ public void get(int index, ComplexHolder holder) {
   /**
    * Return the number of null values in the vector.
    */
+  @Override
   public int getNullCount() {
     return BitVectorHelper.getNullCount(validityBuffer, valueCount);
   }
@@ -614,6 +619,7 @@ public int getNullCount() {
   /**
    * Returns true if the value at the provided index is null.
    */
+  @Override
   public boolean isNull(int index) {
     return isSet(index) == 0;
   }
@@ -643,6 +649,7 @@ public void setIndexDefined(int index) {
   /**
    * Marks the value at index as null/not set.
    */
+  @Override
   public void setNull(int index) {
     while (index >= getValidityBufferValueCapacity()) {
       /* realloc the inner buffers if needed */
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
index c80fcb89d0cc9..028901ee847da 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
@@ -46,6 +46,7 @@ public int getPosition() {
     return index;
   }
 
+  @Override
   public void setPosition(int index) {
     this.index = index;
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
index f7be277f592a6..7f724829ef1eb 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
@@ -19,6 +19,7 @@
 
 import java.math.BigDecimal;
 import java.nio.ByteBuffer;
+import java.util.Locale;
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.vector.FieldVector;
@@ -301,13 +302,15 @@ public boolean isEmptyStruct() {
     return writer.isEmptyStruct();
   }
 
+  @Override
   protected FieldWriter getWriter() {
     return writer;
   }
 
   private FieldWriter promoteToUnion() {
     String name = vector.getField().getName();
-    TransferPair tp = vector.getTransferPair(vector.getMinorType().name().toLowerCase(), vector.getAllocator());
+    TransferPair tp = vector.getTransferPair(vector.getMinorType().name().toLowerCase(Locale.ROOT),
+        vector.getAllocator());
     tp.transfer();
     if (parentContainer != null) {
       // TODO allow dictionaries in complex types
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java
index 5c4cd2af98d55..6a217bbc8b547 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java
@@ -88,8 +88,9 @@ public StructOrListWriter struct(final String name) {
    *
    * @param name Unused.
    *
-   * @deprecated use {@link #listOfStruct()} instead.
+   * @deprecated use {@link #listOfStruct(String)} instead.
    */
+  @Deprecated
   public StructOrListWriter listoftstruct(final String name) {
     return listOfStruct(name);
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java
index ece729ae563af..f69fea3bd5779 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java
@@ -99,6 +99,7 @@ public boolean next() {
     }
   }
 
+  @Override
   public void copyAsValue(ListWriter writer) {
     ComplexCopier.copy(this, (FieldWriter) writer);
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java
index faf088b55981d..0f3ba50f2b3a1 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java
@@ -34,7 +34,6 @@ public class UnionLargeListReader extends AbstractFieldReader {
 
   private LargeListVector vector;
   private ValueVector data;
-  private long index;
   private static final long OFFSET_WIDTH = 8L;
 
   public UnionLargeListReader(LargeListVector vector) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java
index 74548bc985f6a..7dadcabdcee88 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java
@@ -60,8 +60,8 @@ public void setPosition(int index) {
       currentOffset = 0;
       maxOffset = 0;
     } else {
-      currentOffset = vector.getOffsetBuffer().getInt(index * OFFSET_WIDTH) - 1;
-      maxOffset = vector.getOffsetBuffer().getInt((index + 1) * OFFSET_WIDTH);
+      currentOffset = vector.getOffsetBuffer().getInt(index * (long) OFFSET_WIDTH) - 1;
+      maxOffset = vector.getOffsetBuffer().getInt((index + 1) * (long) OFFSET_WIDTH);
     }
   }
 
@@ -106,6 +106,7 @@ public boolean next() {
     }
   }
 
+  @Override
   public void copyAsValue(ListWriter writer) {
     ComplexCopier.copy(this, (FieldWriter) writer);
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java
index 6f40e5814b972..5687e4025acee 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java
@@ -60,7 +60,7 @@ public boolean equals(Object o) {
     if (this == o) {
       return true;
     }
-    if (o == null || getClass() != o.getClass()) {
+    if (!(o instanceof Dictionary)) {
       return false;
     }
     Dictionary that = (Dictionary) o;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/table/package-info.java b/java/vector/src/main/java/org/apache/arrow/vector/table/package-info.java
index cdd5093b9f554..b11ada51292f9 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/table/package-info.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/table/package-info.java
@@ -17,7 +17,7 @@
 
 package org.apache.arrow.vector.table;
 
-/**
+/*
  *  Support for Table, an immutable, columnar, tabular data structure based on FieldVectors.
  *  See the Arrow Java documentation for details: <a href="https://arrow.apache.org/docs/java/table.html">Table</a>
  */
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java b/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java
index c52fc1243d99f..85c2532236866 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java
@@ -39,7 +39,7 @@ public enum FloatingPointPrecision {
     }
   }
 
-  private short flatbufID;
+  private final short flatbufID;
 
   private FloatingPointPrecision(short flatbufID) {
     this.flatbufID = flatbufID;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java b/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java
index 1b17240d016b3..d2314ea7cce3c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java
@@ -36,7 +36,7 @@ public enum IntervalUnit {
     }
   }
 
-  private short flatbufID;
+  private final short flatbufID;
 
   private IntervalUnit(short flatbufID) {
     this.flatbufID = flatbufID;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java
index 8d41b92d867e9..592e18826f09c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java
@@ -74,7 +74,7 @@ public String toString() {
   public boolean equals(Object o) {
     if (this == o) {
       return true;
-    } else if (o == null || getClass() != o.getClass()) {
+    } else if (!(o instanceof DictionaryEncoding)) {
       return false;
     }
     DictionaryEncoding that = (DictionaryEncoding) o;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
index 54c609d4a104f..d3623618e7a55 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
@@ -37,7 +37,6 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.util.Collections2;
 import org.apache.arrow.vector.FieldVector;
-import org.apache.arrow.vector.TypeLayout;
 import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -93,16 +92,19 @@ private Field(
     this(name, new FieldType(nullable, type, dictionary, convertMetadata(metadata)), children);
   }
 
-  private Field(String name, FieldType fieldType, List<Field> children, TypeLayout typeLayout) {
+  /**
+   * Constructs a new Field object.
+   *
+   * @param name name of the field
+   * @param fieldType type of the field
+   * @param children child fields, if any
+   */
+  public Field(String name, FieldType fieldType, List<Field> children) {
     this.name = name;
     this.fieldType = checkNotNull(fieldType);
     this.children = children == null ? Collections.emptyList() : Collections2.toImmutableList(children);
   }
 
-  public Field(String name, FieldType fieldType, List<Field> children) {
-    this(name, fieldType, children, fieldType == null ? null : TypeLayout.getTypeLayout(fieldType.getType()));
-  }
-
   /**
    * Construct a new vector of this type using the given allocator.
    */
@@ -279,7 +281,7 @@ public boolean equals(Object obj) {
     }
     Field that = (Field) obj;
     return Objects.equals(this.name, that.name) &&
-        Objects.equals(this.isNullable(), that.isNullable()) &&
+        this.isNullable() == that.isNullable() &&
         Objects.equals(this.getType(), that.getType()) &&
         Objects.equals(this.getDictionary(), that.getDictionary()) &&
         Objects.equals(this.getMetadata(), that.getMetadata()) &&
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
index d5c0d85671fcc..8988993920d79 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
@@ -118,7 +118,7 @@ public boolean equals(Object obj) {
       return false;
     }
     FieldType that = (FieldType) obj;
-    return Objects.equals(this.isNullable(), that.isNullable()) &&
+    return this.isNullable() == that.isNullable() &&
         Objects.equals(this.getType(), that.getType()) &&
         Objects.equals(this.getDictionary(), that.getDictionary()) &&
         Objects.equals(this.getMetadata(), that.getMetadata());
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java
index dcffea0ef5367..392b3c2e2ec73 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java
@@ -159,10 +159,10 @@ private Schema(@JsonProperty("fields") Iterable<Field> fields,
 
   /**
    * Private constructor to bypass automatic collection copy.
-   * @param unsafe a ignored argument. Its only purpose is to prevent using the constructor
+   * @param ignored an ignored argument. Its only purpose is to prevent using the constructor
    *     by accident because of type collisions (List vs Iterable).
    */
-  private Schema(boolean unsafe, List<Field> fields, Map<String, String> metadata) {
+  private Schema(boolean ignored, List<Field> fields, Map<String, String> metadata) {
     this.fields = fields;
     this.metadata = metadata;
   }
@@ -245,13 +245,12 @@ public int getSchema(FlatBufferBuilder builder) {
 
   /**
    * Returns the serialized flatbuffer bytes of the schema wrapped in a message table.
-   * Use {@link #deserializeMessage() to rebuild the Schema.}
+   * Use {@link #deserializeMessage(ByteBuffer)} to rebuild the Schema.
    */
   public byte[] serializeAsMessage() {
     ByteArrayOutputStream out = new ByteArrayOutputStream();
     try (WriteChannel channel = new WriteChannel(Channels.newChannel(out))) {
-      long size = MessageSerializer.serialize(
-          new WriteChannel(Channels.newChannel(out)), this);
+      MessageSerializer.serialize(channel, this);
       return out.toByteArray();
     } catch (IOException ex) {
       throw new RuntimeException(ex);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java
index 7c9c0e9408860..1f18587afdfd1 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java
@@ -27,8 +27,6 @@
 import java.util.stream.Collectors;
 
 import org.eclipse.collections.impl.map.mutable.primitive.IntObjectHashMap;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * An implementation of map that supports constant time look-up by a generic key or an ordinal.
@@ -48,7 +46,6 @@
  * @param <V> value type
  */
 public class MapWithOrdinalImpl<K, V> implements MapWithOrdinal<K, V> {
-  private static final Logger logger = LoggerFactory.getLogger(MapWithOrdinalImpl.class);
 
   private final Map<K, Map.Entry<Integer, V>> primary = new LinkedHashMap<>();
   private final IntObjectHashMap<V> secondary = new IntObjectHashMap<>();
@@ -93,10 +90,6 @@ public V put(K key, V value) {
       return oldPair == null ? null : oldPair.getValue();
     }
 
-    public boolean put(K key, V value, boolean override) {
-      return put(key, value) != null;
-    }
-
     @Override
     public V remove(Object key) {
       final Entry<Integer, V> oldPair = primary.remove(key);
@@ -146,6 +139,7 @@ public Set<Entry<K, V>> entrySet() {
    * @param id ordinal value for lookup
    * @return an instance of V
    */
+  @Override
   public V getByOrdinal(int id) {
     return secondary.get(id);
   }
@@ -156,6 +150,7 @@ public V getByOrdinal(int id) {
    * @param key key for ordinal lookup
    * @return ordinal value corresponding to key if it exists or -1
    */
+  @Override
   public int getOrdinal(K key) {
     Map.Entry<Integer, V> pair = primary.get(key);
     if (pair != null) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
index 5f5f5d3bd6d22..95e35ce6938c3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
@@ -210,7 +210,7 @@ public void set(String string) {
   }
 
   /**
-   * Set to a utf8 byte array.
+   * Set to an utf8 byte array.
    *
    * @param utf8 the byte array to initialize from
    */
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java b/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java
index 19648dc9e13fb..8596399e7e08c 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertTrue;
 
 import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
@@ -133,7 +134,7 @@ public void testLargeDecimalVector() {
 
       for (int i = 0; i < vecLength; i++) {
         ArrowBuf buf = largeVec.get(i);
-        assertEquals(buf.capacity(), DecimalVector.TYPE_WIDTH);
+        assertEquals(DecimalVector.TYPE_WIDTH, buf.capacity());
         assertEquals(0, buf.getLong(0));
         assertEquals(0, buf.getLong(8));
 
@@ -215,7 +216,7 @@ public void testLargeVarCharVector() {
       logger.trace("Successfully allocated a vector with capacity " + vecLength);
 
       for (int i = 0; i < vecLength; i++) {
-        largeVec.setSafe(i, strElement.getBytes());
+        largeVec.setSafe(i, strElement.getBytes(StandardCharsets.UTF_8));
 
         if ((i + 1) % 10000 == 0) {
           logger.trace("Successfully written " + (i + 1) + " values");
@@ -228,7 +229,7 @@ public void testLargeVarCharVector() {
 
       for (int i = 0; i < vecLength; i++) {
         byte[] val = largeVec.get(i);
-        assertEquals(strElement, new String(val));
+        assertEquals(strElement, new String(val, StandardCharsets.UTF_8));
 
         if ((i + 1) % 10000 == 0) {
           logger.trace("Successfully read " + (i + 1) + " values");
@@ -254,7 +255,7 @@ public void testLargeLargeVarCharVector() {
       logger.trace("Successfully allocated a vector with capacity " + vecLength);
 
       for (int i = 0; i < vecLength; i++) {
-        largeVec.setSafe(i, strElement.getBytes());
+        largeVec.setSafe(i, strElement.getBytes(StandardCharsets.UTF_8));
 
         if ((i + 1) % 10000 == 0) {
           logger.trace("Successfully written " + (i + 1) + " values");
@@ -267,7 +268,7 @@ public void testLargeLargeVarCharVector() {
 
       for (int i = 0; i < vecLength; i++) {
         byte[] val = largeVec.get(i);
-        assertEquals(strElement, new String(val));
+        assertEquals(strElement, new String(val, StandardCharsets.UTF_8));
 
         if ((i + 1) % 10000 == 0) {
           logger.trace("Successfully read " + (i + 1) + " values");
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
index 96005dc511cab..1da4a4c4914b9 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
@@ -47,7 +47,7 @@ public void testGetNullCount() throws Exception {
       validityBuffer.setByte(0, 0xFF);
 
       count = BitVectorHelper.getNullCount(validityBuffer, 8);
-      assertEquals(count, 0);
+      assertEquals(0, count);
       validityBuffer.close();
 
       // test case 3, 1 null value for 0x7F
@@ -55,7 +55,7 @@ public void testGetNullCount() throws Exception {
       validityBuffer.setByte(0, 0x7F);
 
       count = BitVectorHelper.getNullCount(validityBuffer, 8);
-      assertEquals(count, 1);
+      assertEquals(1, count);
       validityBuffer.close();
 
       // test case 4, validity buffer has multiple bytes, 11 items
@@ -64,7 +64,7 @@ public void testGetNullCount() throws Exception {
       validityBuffer.setByte(1, 0b01010101);
 
       count = BitVectorHelper.getNullCount(validityBuffer, 11);
-      assertEquals(count, 5);
+      assertEquals(5, count);
       validityBuffer.close();
     }
   }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java
index 8efadad9b3bf4..056b6bdd2b787 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java
@@ -21,6 +21,8 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.ReferenceManager;
 import org.apache.arrow.memory.RootAllocator;
@@ -65,7 +67,7 @@ public void testTransferVariableWidth() {
 
     VarCharVector v1 = new VarCharVector("v1", childAllocator1);
     v1.allocateNew();
-    v1.setSafe(4094, "hello world".getBytes(), 0, 11);
+    v1.setSafe(4094, "hello world".getBytes(StandardCharsets.UTF_8), 0, 11);
     v1.setValueCount(4001);
 
     VarCharVector v2 = new VarCharVector("v2", childAllocator2);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
index 3786f63c31bb6..97de27bec8237 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
@@ -23,9 +23,10 @@
 import static org.junit.Assert.assertNull;
 
 import java.math.BigDecimal;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.time.Duration;
 import java.time.Period;
+import java.util.Objects;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@@ -84,7 +85,7 @@ public void testCopyFromWithNulls() {
         if (i % 3 == 0) {
           continue;
         }
-        byte[] b = Integer.toString(i).getBytes();
+        byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8);
         vector.setSafe(i, b, 0, b.length);
       }
 
@@ -156,7 +157,7 @@ public void testCopyFromWithNulls1() {
         if (i % 3 == 0) {
           continue;
         }
-        byte[] b = Integer.toString(i).getBytes();
+        byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8);
         vector.setSafe(i, b, 0, b.length);
       }
 
@@ -950,7 +951,7 @@ public void testCopyFromWithNulls13() {
       assertEquals(0, vector1.getValueCount());
       int initialCapacity = vector1.getValueCapacity();
 
-      final double baseValue = 104567897654.876543654;
+      final double baseValue = 104567897654.87654;
       final BigDecimal[] decimals = new BigDecimal[4096];
       for (int i = 0; i < initialCapacity; i++) {
         if ((i & 1) == 0) {
@@ -1082,13 +1083,13 @@ public void testCopySafeArrow7837() {
       // to trigger a reallocation of the vector.
       vc2.setInitialCapacity(/*valueCount*/20, /*density*/0.5);
 
-      vc1.setSafe(0, "1234567890".getBytes(Charset.forName("utf-8")));
+      vc1.setSafe(0, "1234567890".getBytes(StandardCharsets.UTF_8));
       assertFalse(vc1.isNull(0));
-      assertEquals(vc1.getObject(0).toString(), "1234567890");
+      assertEquals("1234567890", Objects.requireNonNull(vc1.getObject(0)).toString());
 
       vc2.copyFromSafe(0, 0, vc1);
       assertFalse(vc2.isNull(0));
-      assertEquals(vc2.getObject(0).toString(), "1234567890");
+      assertEquals("1234567890", Objects.requireNonNull(vc2.getObject(0)).toString());
 
       vc2.copyFromSafe(0, 5, vc1);
       assertTrue(vc2.isNull(1));
@@ -1096,7 +1097,7 @@ public void testCopySafeArrow7837() {
       assertTrue(vc2.isNull(3));
       assertTrue(vc2.isNull(4));
       assertFalse(vc2.isNull(5));
-      assertEquals(vc2.getObject(5).toString(), "1234567890");
+      assertEquals("1234567890", Objects.requireNonNull(vc2.getObject(5)).toString());
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java
index b703959d2bb1e..fc5dfc38587a4 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java
@@ -40,8 +40,8 @@ public class TestDecimal256Vector {
   static {
     intValues = new long[60];
     for (int i = 0; i < intValues.length / 2; i++) {
-      intValues[i] = 1 << i + 1;
-      intValues[2 * i] = -1 * (1 << i + 1);
+      intValues[i] = 1L << (i + 1);
+      intValues[2 * i] = -1L * (1 << (i + 1));
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java
index ba25cbe8b52a0..572f13fea1ed1 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java
@@ -40,8 +40,8 @@ public class TestDecimalVector {
   static {
     intValues = new long[60];
     for (int i = 0; i < intValues.length / 2; i++) {
-      intValues[i] = 1 << i + 1;
-      intValues[2 * i] = -1 * (1 << i + 1);
+      intValues[i] = 1L << (i + 1);
+      intValues[2 * i] = -1L * (1 << (i + 1));
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
index 9cb12481612b2..8fd33eb5a8432 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
@@ -349,16 +349,16 @@ public void testGetFieldTypeInfo() throws Exception {
     assertEquals(vector.getField(), field);
 
     // Union has 2 child vectors
-    assertEquals(vector.size(), 2);
+    assertEquals(2, vector.size());
 
     // Check child field 0
     VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
-    assertEquals(intChild.ordinal, 0);
+    assertEquals(0, intChild.ordinal);
     assertEquals(intChild.vector.getField(), children.get(0));
 
     // Check child field 1
     VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
-    assertEquals(varcharChild.ordinal, 1);
+    assertEquals(1, varcharChild.ordinal);
     assertEquals(varcharChild.vector.getField(), children.get(1));
   }
 
@@ -458,8 +458,8 @@ public void testMultipleStructs() {
       // register relative types
       byte typeId1 = unionVector.registerNewTypeId(structVector1.getField());
       byte typeId2 = unionVector.registerNewTypeId(structVector2.getField());
-      assertEquals(typeId1, 0);
-      assertEquals(typeId2, 1);
+      assertEquals(0, typeId1);
+      assertEquals(1, typeId2);
 
       // add two struct vectors to union vector
       unionVector.addVector(typeId1, structVector1);
@@ -519,8 +519,8 @@ public void testMultipleVarChars() {
       byte typeId1 = unionVector.registerNewTypeId(childVector1.getField());
       byte typeId2 = unionVector.registerNewTypeId(childVector2.getField());
 
-      assertEquals(typeId1, 0);
-      assertEquals(typeId2, 1);
+      assertEquals(0, typeId1);
+      assertEquals(1, typeId2);
 
       while (unionVector.getValueCapacity() < 5) {
         unionVector.reAlloc();
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
index 501059733c616..9ffa79470eeb8 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
@@ -552,7 +552,7 @@ public void testEncodeWithEncoderInstance() {
         // now run through the decoder and verify we get the original back
         try (ValueVector decoded = encoder.decode(encoded)) {
           assertEquals(vector.getClass(), decoded.getClass());
-          assertEquals(vector.getValueCount(), (decoded).getValueCount());
+          assertEquals(vector.getValueCount(), decoded.getValueCount());
           for (int i = 0; i < 5; i++) {
             assertEquals(vector.getObject(i), ((VarCharVector) decoded).getObject(i));
           }
@@ -591,7 +591,7 @@ public void testEncodeMultiVectors() {
         // now run through the decoder and verify we get the original back
         try (ValueVector decoded = encoder.decode(encoded)) {
           assertEquals(vector1.getClass(), decoded.getClass());
-          assertEquals(vector1.getValueCount(), (decoded).getValueCount());
+          assertEquals(vector1.getValueCount(), decoded.getValueCount());
           for (int i = 0; i < 5; i++) {
             assertEquals(vector1.getObject(i), ((VarCharVector) decoded).getObject(i));
           }
@@ -611,7 +611,7 @@ public void testEncodeMultiVectors() {
         // now run through the decoder and verify we get the original back
         try (ValueVector decoded = encoder.decode(encoded)) {
           assertEquals(vector2.getClass(), decoded.getClass());
-          assertEquals(vector2.getValueCount(), (decoded).getValueCount());
+          assertEquals(vector2.getValueCount(), decoded.getValueCount());
           for (int i = 0; i < 3; i++) {
             assertEquals(vector2.getObject(i), ((VarCharVector) decoded).getObject(i));
           }
@@ -841,7 +841,8 @@ public void testEncodeStructSubFieldWithCertainColumns() {
       // initialize dictionaries
       DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
 
-      setVector(dictVector1, "aa".getBytes(), "bb".getBytes(), "cc".getBytes(), "dd".getBytes());
+      setVector(dictVector1, "aa".getBytes(StandardCharsets.UTF_8), "bb".getBytes(StandardCharsets.UTF_8),
+          "cc".getBytes(StandardCharsets.UTF_8), "dd".getBytes(StandardCharsets.UTF_8));
 
       provider.put(new Dictionary(dictVector1, new DictionaryEncoding(1L, false, null)));
       StructSubfieldEncoder encoder = new StructSubfieldEncoder(allocator, provider);
@@ -1049,20 +1050,20 @@ private void testDictionary(Dictionary dictionary, ToIntBiFunction<ValueVector,
 
         // verify encoded result
         assertEquals(vector.getValueCount(), encodedVector.getValueCount());
-        assertEquals(valGetter.applyAsInt(encodedVector, 0), 1);
-        assertEquals(valGetter.applyAsInt(encodedVector, 1), 3);
-        assertEquals(valGetter.applyAsInt(encodedVector, 2), 5);
-        assertEquals(valGetter.applyAsInt(encodedVector, 3), 7);
-        assertEquals(valGetter.applyAsInt(encodedVector, 4), 9);
+        assertEquals(1, valGetter.applyAsInt(encodedVector, 0));
+        assertEquals(3, valGetter.applyAsInt(encodedVector, 1));
+        assertEquals(5, valGetter.applyAsInt(encodedVector, 2));
+        assertEquals(7, valGetter.applyAsInt(encodedVector, 3));
+        assertEquals(9, valGetter.applyAsInt(encodedVector, 4));
 
         try (ValueVector decodedVector = DictionaryEncoder.decode(encodedVector, dictionary)) {
           assertTrue(decodedVector instanceof VarCharVector);
           assertEquals(vector.getValueCount(), decodedVector.getValueCount());
-          assertArrayEquals("1".getBytes(), ((VarCharVector) decodedVector).get(0));
-          assertArrayEquals("3".getBytes(), ((VarCharVector) decodedVector).get(1));
-          assertArrayEquals("5".getBytes(), ((VarCharVector) decodedVector).get(2));
-          assertArrayEquals("7".getBytes(), ((VarCharVector) decodedVector).get(3));
-          assertArrayEquals("9".getBytes(), ((VarCharVector) decodedVector).get(4));
+          assertArrayEquals("1".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decodedVector).get(0));
+          assertArrayEquals("3".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decodedVector).get(1));
+          assertArrayEquals("5".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decodedVector).get(2));
+          assertArrayEquals("7".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decodedVector).get(3));
+          assertArrayEquals("9".getBytes(StandardCharsets.UTF_8), ((VarCharVector) decodedVector).get(4));
         }
       }
     }
@@ -1085,7 +1086,7 @@ public void testDictionaryUInt2() {
       setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
       Dictionary dictionary2 = new Dictionary(dictionaryVector,
           new DictionaryEncoding(/*id=*/20L, /*ordered=*/false,
-              /*indexType=*/new ArrowType.Int(/*indexType=*/16, /*isSigned*/false)));
+              /*indexType=*/new ArrowType.Int(/*bitWidth=*/16, /*isSigned*/false)));
       testDictionary(dictionary2, (vector, index) -> ((UInt2Vector) vector).get(index));
     }
   }
@@ -1096,7 +1097,7 @@ public void testDictionaryUInt4() {
       setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
       Dictionary dictionary4 = new Dictionary(dictionaryVector,
           new DictionaryEncoding(/*id=*/30L, /*ordered=*/false,
-              /*indexType=*/new ArrowType.Int(/*indexType=*/32, /*isSigned*/false)));
+              /*indexType=*/new ArrowType.Int(/*bitWidth=*/32, /*isSigned*/false)));
       testDictionary(dictionary4, (vector, index) -> ((UInt4Vector) vector).get(index));
     }
   }
@@ -1107,7 +1108,7 @@ public void testDictionaryUInt8() {
       setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
       Dictionary dictionary8 = new Dictionary(dictionaryVector,
               new DictionaryEncoding(/*id=*/40L, /*ordered=*/false,
-                  /*indexType=*/new ArrowType.Int(/*indexType=*/64, /*isSigned*/false)));
+                  /*indexType=*/new ArrowType.Int(/*bitWidth=*/64, /*isSigned*/false)));
       testDictionary(dictionary8, (vector, index) -> (int) ((UInt8Vector) vector).get(index));
     }
   }
@@ -1119,13 +1120,13 @@ public void testDictionaryUIntOverflow() {
     try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) {
       dictionaryVector.allocateNew(vecLength * 3, vecLength);
       for (int i = 0; i < vecLength; i++) {
-        dictionaryVector.set(i, String.valueOf(i).getBytes());
+        dictionaryVector.set(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
       }
       dictionaryVector.setValueCount(vecLength);
 
       Dictionary dictionary = new Dictionary(dictionaryVector,
           new DictionaryEncoding(/*id=*/10L, /*ordered=*/false,
-              /*indexType=*/new ArrowType.Int(/*indexType=*/8, /*isSigned*/false)));
+              /*indexType=*/new ArrowType.Int(/*bitWidth=*/8, /*isSigned*/false)));
 
       try (VarCharVector vector = new VarCharVector("vector", allocator)) {
         setVector(vector, "255");
@@ -1137,7 +1138,7 @@ public void testDictionaryUIntOverflow() {
 
           try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dictionary)) {
             assertEquals(1, decodedVector.getValueCount());
-            assertArrayEquals("255".getBytes(), decodedVector.get(0));
+            assertArrayEquals("255".getBytes(StandardCharsets.UTF_8), decodedVector.get(0));
           }
         }
       }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
index 0023b1dddb8e7..bde6dd491dd71 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
@@ -25,6 +25,7 @@
 
 import java.math.BigDecimal;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.List;
 
@@ -61,7 +62,7 @@ public void terminate() throws Exception {
 
   @Test
   public void testIntType() {
-    try (FixedSizeListVector vector = FixedSizeListVector.empty("list", 2, allocator)) {
+    try (FixedSizeListVector vector = FixedSizeListVector.empty("list", /*size=*/2, allocator)) {
       IntVector nested = (IntVector) vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())).getVector();
       vector.allocateNew();
 
@@ -88,7 +89,7 @@ public void testIntType() {
 
   @Test
   public void testFloatTypeNullable() {
-    try (FixedSizeListVector vector = FixedSizeListVector.empty("list", 2, allocator)) {
+    try (FixedSizeListVector vector = FixedSizeListVector.empty("list", /*size=*/2, allocator)) {
       Float4Vector nested = (Float4Vector) vector.addOrGetVector(FieldType.nullable(MinorType.FLOAT4.getType()))
           .getVector();
       vector.allocateNew();
@@ -235,7 +236,7 @@ public void testTransferPair() {
 
   @Test
   public void testConsistentChildName() throws Exception {
-    try (FixedSizeListVector listVector = FixedSizeListVector.empty("sourceVector", 2, allocator)) {
+    try (FixedSizeListVector listVector = FixedSizeListVector.empty("sourceVector", /*size=*/2, allocator)) {
       String emptyListStr = listVector.getField().toString();
       Assert.assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME));
 
@@ -251,7 +252,7 @@ public void testUnionFixedSizeListWriterWithNulls() throws Exception {
      * each list of size 3 and having its data values alternating between null and a non-null.
      * Read and verify
      */
-    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/3, allocator)) {
+    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*size=*/3, allocator)) {
 
       UnionFixedSizeListWriter writer = vector.getWriter();
       writer.allocate();
@@ -279,7 +280,7 @@ public void testUnionFixedSizeListWriterWithNulls() throws Exception {
 
   @Test
   public void testUnionFixedSizeListWriter() throws Exception {
-    try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 3, allocator)) {
+    try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", /*size=*/3, allocator)) {
 
       UnionFixedSizeListWriter writer1 = vector1.getWriter();
       writer1.allocate();
@@ -307,7 +308,7 @@ public void testUnionFixedSizeListWriter() throws Exception {
 
   @Test
   public void testWriteDecimal() throws Exception {
-    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/3, allocator)) {
+    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*size=*/3, allocator)) {
 
       UnionFixedSizeListWriter writer = vector.getWriter();
       writer.allocate();
@@ -335,7 +336,7 @@ public void testWriteDecimal() throws Exception {
 
   @Test
   public void testDecimalIndexCheck() throws Exception {
-    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/3, allocator)) {
+    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*size=*/3, allocator)) {
 
       UnionFixedSizeListWriter writer = vector.getWriter();
       writer.allocate();
@@ -355,7 +356,7 @@ public void testDecimalIndexCheck() throws Exception {
 
   @Test(expected = IllegalStateException.class)
   public void testWriteIllegalData() throws Exception {
-    try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 3, allocator)) {
+    try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", /*size=*/3, allocator)) {
 
       UnionFixedSizeListWriter writer1 = vector1.getWriter();
       writer1.allocate();
@@ -378,7 +379,7 @@ public void testWriteIllegalData() throws Exception {
 
   @Test
   public void testSplitAndTransfer() throws Exception {
-    try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 3, allocator)) {
+    try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", /*size=*/3, allocator)) {
 
       UnionFixedSizeListWriter writer1 = vector1.getWriter();
       writer1.allocate();
@@ -399,9 +400,9 @@ public void testSplitAndTransfer() throws Exception {
 
       assertEquals(2, targetVector.getValueCount());
       int[] realValue1 = convertListToIntArray(targetVector.getObject(0));
-      assertTrue(Arrays.equals(values1, realValue1));
+      assertArrayEquals(values1, realValue1);
       int[] realValue2 = convertListToIntArray(targetVector.getObject(1));
-      assertTrue(Arrays.equals(values2, realValue2));
+      assertArrayEquals(values2, realValue2);
 
       targetVector.clear();
     }
@@ -409,7 +410,7 @@ public void testSplitAndTransfer() throws Exception {
 
   @Test
   public void testZeroWidthVector() {
-    try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 0, allocator)) {
+    try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", /*size=*/0, allocator)) {
 
       UnionFixedSizeListWriter writer1 = vector1.getWriter();
       writer1.allocate();
@@ -440,7 +441,7 @@ public void testZeroWidthVector() {
 
   @Test
   public void testVectorWithNulls() {
-    try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 4, allocator)) {
+    try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", /*size=*/4, allocator)) {
 
       UnionFixedSizeListWriter writer1 = vector1.getWriter();
       writer1.allocate();
@@ -472,7 +473,7 @@ public void testVectorWithNulls() {
 
   @Test
   public void testWriteVarCharHelpers() throws Exception {
-    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/4, allocator)) {
+    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*size=*/4, allocator)) {
 
       UnionFixedSizeListWriter writer = vector.getWriter();
       writer.allocate();
@@ -491,7 +492,7 @@ public void testWriteVarCharHelpers() throws Exception {
 
   @Test
   public void testWriteLargeVarCharHelpers() throws Exception {
-    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/4, allocator)) {
+    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*size=*/4, allocator)) {
 
       UnionFixedSizeListWriter writer = vector.getWriter();
       writer.allocate();
@@ -510,43 +511,47 @@ public void testWriteLargeVarCharHelpers() throws Exception {
 
   @Test
   public void testWriteVarBinaryHelpers() throws Exception {
-    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/4, allocator)) {
+    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*size=*/4, allocator)) {
 
       UnionFixedSizeListWriter writer = vector.getWriter();
       writer.allocate();
 
       writer.startList();
-      writer.writeVarBinary("row1,1".getBytes());
-      writer.writeVarBinary("row1,2".getBytes(), 0, "row1,2".getBytes().length);
-      writer.writeVarBinary(ByteBuffer.wrap("row1,3".getBytes()));
-      writer.writeVarBinary(ByteBuffer.wrap("row1,4".getBytes()), 0, "row1,4".getBytes().length);
+      writer.writeVarBinary("row1,1".getBytes(StandardCharsets.UTF_8));
+      writer.writeVarBinary("row1,2".getBytes(StandardCharsets.UTF_8), 0,
+          "row1,2".getBytes(StandardCharsets.UTF_8).length);
+      writer.writeVarBinary(ByteBuffer.wrap("row1,3".getBytes(StandardCharsets.UTF_8)));
+      writer.writeVarBinary(ByteBuffer.wrap("row1,4".getBytes(StandardCharsets.UTF_8)), 0,
+          "row1,4".getBytes(StandardCharsets.UTF_8).length);
       writer.endList();
 
-      assertEquals("row1,1", new String((byte[]) (vector.getObject(0).get(0))));
-      assertEquals("row1,2", new String((byte[]) (vector.getObject(0).get(1))));
-      assertEquals("row1,3", new String((byte[]) (vector.getObject(0).get(2))));
-      assertEquals("row1,4", new String((byte[]) (vector.getObject(0).get(3))));
+      assertEquals("row1,1", new String((byte[]) vector.getObject(0).get(0), StandardCharsets.UTF_8));
+      assertEquals("row1,2", new String((byte[]) vector.getObject(0).get(1), StandardCharsets.UTF_8));
+      assertEquals("row1,3", new String((byte[]) vector.getObject(0).get(2), StandardCharsets.UTF_8));
+      assertEquals("row1,4", new String((byte[]) vector.getObject(0).get(3), StandardCharsets.UTF_8));
     }
   }
 
   @Test
   public void testWriteLargeVarBinaryHelpers() throws Exception {
-    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/4, allocator)) {
+    try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*size=*/4, allocator)) {
 
       UnionFixedSizeListWriter writer = vector.getWriter();
       writer.allocate();
 
       writer.startList();
-      writer.writeLargeVarBinary("row1,1".getBytes());
-      writer.writeLargeVarBinary("row1,2".getBytes(), 0, "row1,2".getBytes().length);
-      writer.writeLargeVarBinary(ByteBuffer.wrap("row1,3".getBytes()));
-      writer.writeLargeVarBinary(ByteBuffer.wrap("row1,4".getBytes()), 0, "row1,4".getBytes().length);
+      writer.writeLargeVarBinary("row1,1".getBytes(StandardCharsets.UTF_8));
+      writer.writeLargeVarBinary("row1,2".getBytes(StandardCharsets.UTF_8), 0,
+          "row1,2".getBytes(StandardCharsets.UTF_8).length);
+      writer.writeLargeVarBinary(ByteBuffer.wrap("row1,3".getBytes(StandardCharsets.UTF_8)));
+      writer.writeLargeVarBinary(ByteBuffer.wrap("row1,4".getBytes(StandardCharsets.UTF_8)), 0,
+          "row1,4".getBytes(StandardCharsets.UTF_8).length);
       writer.endList();
 
-      assertEquals("row1,1", new String((byte[]) (vector.getObject(0).get(0))));
-      assertEquals("row1,2", new String((byte[]) (vector.getObject(0).get(1))));
-      assertEquals("row1,3", new String((byte[]) (vector.getObject(0).get(2))));
-      assertEquals("row1,4", new String((byte[]) (vector.getObject(0).get(3))));
+      assertEquals("row1,1", new String((byte[]) vector.getObject(0).get(0), StandardCharsets.UTF_8));
+      assertEquals("row1,2", new String((byte[]) vector.getObject(0).get(1), StandardCharsets.UTF_8));
+      assertEquals("row1,3", new String((byte[]) vector.getObject(0).get(2), StandardCharsets.UTF_8));
+      assertEquals("row1,4", new String((byte[]) vector.getObject(0).get(3), StandardCharsets.UTF_8));
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
index 993ce0b089769..ffd87c99d508d 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
@@ -102,9 +102,9 @@ public void testCopyFrom() throws Exception {
       Object result = outVector.getObject(0);
       ArrayList<Long> resultSet = (ArrayList<Long>) result;
       assertEquals(3, resultSet.size());
-      assertEquals(new Long(1), resultSet.get(0));
-      assertEquals(new Long(2), resultSet.get(1));
-      assertEquals(new Long(3), resultSet.get(2));
+      assertEquals(Long.valueOf(1), resultSet.get(0));
+      assertEquals(Long.valueOf(2), resultSet.get(1));
+      assertEquals(Long.valueOf(3), resultSet.get(2));
 
       /* index 1 */
       result = outVector.getObject(1);
@@ -143,7 +143,7 @@ public void testSetLastSetUsage() throws Exception {
       assertEquals(-1L, listVector.getLastSet());
 
       int index = 0;
-      int offset = 0;
+      int offset;
 
       /* write [10, 11, 12] to the list vector at index 0 */
       BitVectorHelper.setBit(validityBuffer, index);
@@ -222,41 +222,40 @@ public void testSetLastSetUsage() throws Exception {
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
       Long actual = dataVector.getObject(offset);
-      assertEquals(new Long(10), actual);
+      assertEquals(Long.valueOf(10), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(11), actual);
+      assertEquals(Long.valueOf(11), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(12), actual);
+      assertEquals(Long.valueOf(12), actual);
 
       index++;
       offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(13), actual);
+      assertEquals(Long.valueOf(13), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(14), actual);
+      assertEquals(Long.valueOf(14), actual);
 
       index++;
       offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(15), actual);
+      assertEquals(Long.valueOf(15), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(16), actual);
+      assertEquals(Long.valueOf(16), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(17), actual);
+      assertEquals(Long.valueOf(17), actual);
 
       index++;
       offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(8), Integer.toString(offset));
-
       actual = dataVector.getObject(offset);
       assertNull(actual);
     }
@@ -323,8 +322,8 @@ public void testSplitAndTransfer() throws Exception {
       /* check the vector output */
 
       int index = 0;
-      int offset = 0;
-      Long actual = null;
+      int offset;
+      Long actual;
 
       /* index 0 */
       assertFalse(listVector.isNull(index));
@@ -332,13 +331,13 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(10), actual);
+      assertEquals(Long.valueOf(10), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(11), actual);
+      assertEquals(Long.valueOf(11), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(12), actual);
+      assertEquals(Long.valueOf(12), actual);
 
       /* index 1 */
       index++;
@@ -347,10 +346,10 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(13), actual);
+      assertEquals(Long.valueOf(13), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(14), actual);
+      assertEquals(Long.valueOf(14), actual);
 
       /* index 2 */
       index++;
@@ -359,16 +358,16 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(15), actual);
+      assertEquals(Long.valueOf(15), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(16), actual);
+      assertEquals(Long.valueOf(16), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(17), actual);
+      assertEquals(Long.valueOf(17), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(18), actual);
+      assertEquals(Long.valueOf(18), actual);
 
       /* index 3 */
       index++;
@@ -377,7 +376,7 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(9), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(19), actual);
+      assertEquals(Long.valueOf(19), actual);
 
       /* index 4 */
       index++;
@@ -386,16 +385,16 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(10), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(20), actual);
+      assertEquals(Long.valueOf(20), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(21), actual);
+      assertEquals(Long.valueOf(21), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(22), actual);
+      assertEquals(Long.valueOf(22), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(23), actual);
+      assertEquals(Long.valueOf(23), actual);
 
       /* index 5 */
       index++;
@@ -522,15 +521,15 @@ public void testNestedLargeListVector() throws Exception {
       assertEquals(4, resultSet.get(1).size()); /* size of second inner list */
 
       list = resultSet.get(0);
-      assertEquals(new Long(50), list.get(0));
-      assertEquals(new Long(100), list.get(1));
-      assertEquals(new Long(200), list.get(2));
+      assertEquals(Long.valueOf(50), list.get(0));
+      assertEquals(Long.valueOf(100), list.get(1));
+      assertEquals(Long.valueOf(200), list.get(2));
 
       list = resultSet.get(1);
-      assertEquals(new Long(75), list.get(0));
-      assertEquals(new Long(125), list.get(1));
-      assertEquals(new Long(150), list.get(2));
-      assertEquals(new Long(175), list.get(3));
+      assertEquals(Long.valueOf(75), list.get(0));
+      assertEquals(Long.valueOf(125), list.get(1));
+      assertEquals(Long.valueOf(150), list.get(2));
+      assertEquals(Long.valueOf(175), list.get(3));
 
       /* get listVector value at index 1 -- the value itself is a listvector */
       result = listVector.getObject(1);
@@ -542,16 +541,16 @@ public void testNestedLargeListVector() throws Exception {
       assertEquals(3, resultSet.get(2).size()); /* size of third inner list */
 
       list = resultSet.get(0);
-      assertEquals(new Long(10), list.get(0));
+      assertEquals(Long.valueOf(10), list.get(0));
 
       list = resultSet.get(1);
-      assertEquals(new Long(15), list.get(0));
-      assertEquals(new Long(20), list.get(1));
+      assertEquals(Long.valueOf(15), list.get(0));
+      assertEquals(Long.valueOf(20), list.get(1));
 
       list = resultSet.get(2);
-      assertEquals(new Long(25), list.get(0));
-      assertEquals(new Long(30), list.get(1));
-      assertEquals(new Long(35), list.get(2));
+      assertEquals(Long.valueOf(25), list.get(0));
+      assertEquals(Long.valueOf(30), list.get(1));
+      assertEquals(Long.valueOf(35), list.get(2));
 
       /* check underlying bitVector */
       assertFalse(listVector.isNull(0));
@@ -656,13 +655,13 @@ public void testNestedLargeListVector2() throws Exception {
       assertEquals(2, resultSet.get(1).size()); /* size of second inner list */
 
       list = resultSet.get(0);
-      assertEquals(new Long(50), list.get(0));
-      assertEquals(new Long(100), list.get(1));
-      assertEquals(new Long(200), list.get(2));
+      assertEquals(Long.valueOf(50), list.get(0));
+      assertEquals(Long.valueOf(100), list.get(1));
+      assertEquals(Long.valueOf(200), list.get(2));
 
       list = resultSet.get(1);
-      assertEquals(new Long(75), list.get(0));
-      assertEquals(new Long(125), list.get(1));
+      assertEquals(Long.valueOf(75), list.get(0));
+      assertEquals(Long.valueOf(125), list.get(1));
 
       /* get listVector value at index 1 -- the value itself is a listvector */
       result = listVector.getObject(1);
@@ -673,13 +672,13 @@ public void testNestedLargeListVector2() throws Exception {
       assertEquals(3, resultSet.get(1).size()); /* size of second inner list */
 
       list = resultSet.get(0);
-      assertEquals(new Long(15), list.get(0));
-      assertEquals(new Long(20), list.get(1));
+      assertEquals(Long.valueOf(15), list.get(0));
+      assertEquals(Long.valueOf(20), list.get(1));
 
       list = resultSet.get(1);
-      assertEquals(new Long(25), list.get(0));
-      assertEquals(new Long(30), list.get(1));
-      assertEquals(new Long(35), list.get(2));
+      assertEquals(Long.valueOf(25), list.get(0));
+      assertEquals(Long.valueOf(30), list.get(1));
+      assertEquals(Long.valueOf(35), list.get(2));
 
       /* check underlying bitVector */
       assertFalse(listVector.isNull(0));
@@ -723,15 +722,15 @@ public void testGetBufferAddress() throws Exception {
       Object result = listVector.getObject(0);
       ArrayList<Long> resultSet = (ArrayList<Long>) result;
       assertEquals(3, resultSet.size());
-      assertEquals(new Long(50), resultSet.get(0));
-      assertEquals(new Long(100), resultSet.get(1));
-      assertEquals(new Long(200), resultSet.get(2));
+      assertEquals(Long.valueOf(50), resultSet.get(0));
+      assertEquals(Long.valueOf(100), resultSet.get(1));
+      assertEquals(Long.valueOf(200), resultSet.get(2));
 
       result = listVector.getObject(1);
       resultSet = (ArrayList<Long>) result;
       assertEquals(2, resultSet.size());
-      assertEquals(new Long(250), resultSet.get(0));
-      assertEquals(new Long(300), resultSet.get(1));
+      assertEquals(Long.valueOf(250), resultSet.get(0));
+      assertEquals(Long.valueOf(300), resultSet.get(1));
 
       List<ArrowBuf> buffers = listVector.getFieldBuffers();
 
@@ -739,7 +738,7 @@ public void testGetBufferAddress() throws Exception {
       long offsetAddress = listVector.getOffsetBufferAddress();
 
       try {
-        long dataAddress = listVector.getDataBufferAddress();
+        listVector.getDataBufferAddress();
       } catch (UnsupportedOperationException ue) {
         error = true;
       } finally {
@@ -849,11 +848,11 @@ public void testClearAndReuse() {
 
       Object result = vector.getObject(0);
       ArrayList<Long> resultSet = (ArrayList<Long>) result;
-      assertEquals(new Long(7), resultSet.get(0));
+      assertEquals(Long.valueOf(7), resultSet.get(0));
 
       result = vector.getObject(1);
       resultSet = (ArrayList<Long>) result;
-      assertEquals(new Long(8), resultSet.get(0));
+      assertEquals(Long.valueOf(8), resultSet.get(0));
 
       // Clear and release the buffers to trigger a realloc when adding next value
       vector.clear();
@@ -869,11 +868,11 @@ public void testClearAndReuse() {
 
       result = vector.getObject(0);
       resultSet = (ArrayList<Long>) result;
-      assertEquals(new Long(7), resultSet.get(0));
+      assertEquals(Long.valueOf(7), resultSet.get(0));
 
       result = vector.getObject(1);
       resultSet = (ArrayList<Long>) result;
-      assertEquals(new Long(8), resultSet.get(0));
+      assertEquals(Long.valueOf(8), resultSet.get(0));
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
index ecababde8de3a..36607903b01a2 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
@@ -22,7 +22,9 @@
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
+import java.util.Objects;
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
@@ -61,7 +63,7 @@ public void testSetNullableLargeVarBinaryHolder() {
 
       String str = "hello";
       try (ArrowBuf buf = allocator.buffer(16)) {
-        buf.setBytes(0, str.getBytes());
+        buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8));
 
         binHolder.start = 0;
         binHolder.end = str.length();
@@ -72,7 +74,7 @@ public void testSetNullableLargeVarBinaryHolder() {
 
         // verify results
         assertTrue(vector.isNull(0));
-        assertEquals(str, new String(vector.get(1)));
+        assertEquals(str, new String(Objects.requireNonNull(vector.get(1)), StandardCharsets.UTF_8));
       }
     }
   }
@@ -90,7 +92,7 @@ public void testSetNullableLargeVarBinaryHolderSafe() {
 
       String str = "hello world";
       try (ArrowBuf buf = allocator.buffer(16)) {
-        buf.setBytes(0, str.getBytes());
+        buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8));
 
         binHolder.start = 0;
         binHolder.end = str.length();
@@ -100,7 +102,7 @@ public void testSetNullableLargeVarBinaryHolderSafe() {
         vector.setSafe(1, nullHolder);
 
         // verify results
-        assertEquals(str, new String(vector.get(0)));
+        assertEquals(str, new String(Objects.requireNonNull(vector.get(0)), StandardCharsets.UTF_8));
         assertTrue(vector.isNull(1));
       }
     }
@@ -113,18 +115,18 @@ public void testGetBytesRepeatedly() {
 
       final String str = "hello world";
       final String str2 = "foo";
-      vector.setSafe(0, str.getBytes());
-      vector.setSafe(1, str2.getBytes());
+      vector.setSafe(0, str.getBytes(StandardCharsets.UTF_8));
+      vector.setSafe(1, str2.getBytes(StandardCharsets.UTF_8));
 
       // verify results
       ReusableByteArray reusableByteArray = new ReusableByteArray();
       vector.read(0, reusableByteArray);
       byte[] oldBuffer = reusableByteArray.getBuffer();
-      assertArrayEquals(str.getBytes(), Arrays.copyOfRange(reusableByteArray.getBuffer(),
+      assertArrayEquals(str.getBytes(StandardCharsets.UTF_8), Arrays.copyOfRange(reusableByteArray.getBuffer(),
           0, (int) reusableByteArray.getLength()));
 
       vector.read(1, reusableByteArray);
-      assertArrayEquals(str2.getBytes(), Arrays.copyOfRange(reusableByteArray.getBuffer(),
+      assertArrayEquals(str2.getBytes(StandardCharsets.UTF_8), Arrays.copyOfRange(reusableByteArray.getBuffer(),
           0, (int) reusableByteArray.getLength()));
 
       // There should not have been any reallocation since the newer value is smaller in length.
@@ -137,7 +139,7 @@ public void testGetTransferPairWithField() {
     try (BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 1000000, 1000000);
         LargeVarBinaryVector v1 = new LargeVarBinaryVector("v1", childAllocator1)) {
       v1.allocateNew();
-      v1.setSafe(4094, "hello world".getBytes(), 0, 11);
+      v1.setSafe(4094, "hello world".getBytes(StandardCharsets.UTF_8), 0, 11);
       v1.setValueCount(4001);
 
       TransferPair tp = v1.getTransferPair(v1.getField(), allocator);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
index 7d074c393648f..62d09da86d652 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
@@ -27,6 +27,7 @@
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Objects;
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
@@ -48,12 +49,12 @@
 
 public class TestLargeVarCharVector {
 
-  private static final byte[] STR1 = "AAAAA1".getBytes();
-  private static final byte[] STR2 = "BBBBBBBBB2".getBytes();
-  private static final byte[] STR3 = "CCCC3".getBytes();
-  private static final byte[] STR4 = "DDDDDDDD4".getBytes();
-  private static final byte[] STR5 = "EEE5".getBytes();
-  private static final byte[] STR6 = "FFFFF6".getBytes();
+  private static final byte[] STR1 = "AAAAA1".getBytes(StandardCharsets.UTF_8);
+  private static final byte[] STR2 = "BBBBBBBBB2".getBytes(StandardCharsets.UTF_8);
+  private static final byte[] STR3 = "CCCC3".getBytes(StandardCharsets.UTF_8);
+  private static final byte[] STR4 = "DDDDDDDD4".getBytes(StandardCharsets.UTF_8);
+  private static final byte[] STR5 = "EEE5".getBytes(StandardCharsets.UTF_8);
+  private static final byte[] STR6 = "FFFFF6".getBytes(StandardCharsets.UTF_8);
 
   private BufferAllocator allocator;
 
@@ -74,7 +75,7 @@ public void testTransfer() {
          LargeVarCharVector v1 = new LargeVarCharVector("v1", childAllocator1);
          LargeVarCharVector v2 = new LargeVarCharVector("v2", childAllocator2);) {
       v1.allocateNew();
-      v1.setSafe(4094, "hello world".getBytes(), 0, 11);
+      v1.setSafe(4094, "hello world".getBytes(StandardCharsets.UTF_8), 0, 11);
       v1.setValueCount(4001);
 
       long memoryBeforeTransfer = childAllocator1.getAllocatedMemory();
@@ -207,12 +208,12 @@ public void testSizeOfValueBuffer() {
 
   @Test
   public void testSetLastSetUsage() {
-    final byte[] STR1 = "AAAAA1".getBytes();
-    final byte[] STR2 = "BBBBBBBBB2".getBytes();
-    final byte[] STR3 = "CCCC3".getBytes();
-    final byte[] STR4 = "DDDDDDDD4".getBytes();
-    final byte[] STR5 = "EEE5".getBytes();
-    final byte[] STR6 = "FFFFF6".getBytes();
+    final byte[] STR1 = "AAAAA1".getBytes(StandardCharsets.UTF_8);
+    final byte[] STR2 = "BBBBBBBBB2".getBytes(StandardCharsets.UTF_8);
+    final byte[] STR3 = "CCCC3".getBytes(StandardCharsets.UTF_8);
+    final byte[] STR4 = "DDDDDDDD4".getBytes(StandardCharsets.UTF_8);
+    final byte[] STR5 = "EEE5".getBytes(StandardCharsets.UTF_8);
+    final byte[] STR6 = "FFFFF6".getBytes(StandardCharsets.UTF_8);
 
     try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) {
       vector.allocateNew(1024 * 10, 1024);
@@ -353,7 +354,7 @@ public void testSplitAndTransfer() {
           for (int i = 0; i < length; i++) {
             final boolean expectedSet = ((start + i) % 3) == 0;
             if (expectedSet) {
-              final byte[] expectedValue = compareArray[start + i].getBytes();
+              final byte[] expectedValue = compareArray[start + i].getBytes(StandardCharsets.UTF_8);
               assertFalse(newLargeVarCharVector.isNull(i));
               assertArrayEquals(expectedValue, newLargeVarCharVector.get(i));
             } else {
@@ -367,8 +368,8 @@ public void testSplitAndTransfer() {
 
   @Test
   public void testReallocAfterVectorTransfer() {
-    final byte[] STR1 = "AAAAA1".getBytes();
-    final byte[] STR2 = "BBBBBBBBB2".getBytes();
+    final byte[] STR1 = "AAAAA1".getBytes(StandardCharsets.UTF_8);
+    final byte[] STR2 = "BBBBBBBBB2".getBytes(StandardCharsets.UTF_8);
 
     try (final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator)) {
       /* 4096 values with 10 byte per record */
@@ -675,7 +676,7 @@ public void testSetNullableLargeVarCharHolder() {
 
       String str = "hello";
       ArrowBuf buf = allocator.buffer(16);
-      buf.setBytes(0, str.getBytes());
+      buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8));
 
       stringHolder.start = 0;
       stringHolder.end = str.length();
@@ -686,7 +687,7 @@ public void testSetNullableLargeVarCharHolder() {
 
       // verify results
       assertTrue(vector.isNull(0));
-      assertEquals(str, new String(vector.get(1)));
+      assertEquals(str, new String(Objects.requireNonNull(vector.get(1)), StandardCharsets.UTF_8));
 
       buf.close();
     }
@@ -705,7 +706,7 @@ public void testSetNullableLargeVarCharHolderSafe() {
 
       String str = "hello world";
       ArrowBuf buf = allocator.buffer(16);
-      buf.setBytes(0, str.getBytes());
+      buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8));
 
       stringHolder.start = 0;
       stringHolder.end = str.length();
@@ -715,7 +716,7 @@ public void testSetNullableLargeVarCharHolderSafe() {
       vector.setSafe(1, nullHolder);
 
       // verify results
-      assertEquals(str, new String(vector.get(0)));
+      assertEquals(str, new String(Objects.requireNonNull(vector.get(0)), StandardCharsets.UTF_8));
       assertTrue(vector.isNull(1));
 
       buf.close();
@@ -743,7 +744,7 @@ public void testLargeVariableWidthVectorNullHashCode() {
       largeVarChVec.allocateNew(100, 1);
       largeVarChVec.setValueCount(1);
 
-      largeVarChVec.set(0, "abc".getBytes());
+      largeVarChVec.set(0, "abc".getBytes(StandardCharsets.UTF_8));
       largeVarChVec.setNull(0);
 
       assertEquals(0, largeVarChVec.hashCode(0));
@@ -756,7 +757,7 @@ public void testUnloadLargeVariableWidthVector() {
       largeVarCharVector.allocateNew(5, 2);
       largeVarCharVector.setValueCount(2);
 
-      largeVarCharVector.set(0, "abcd".getBytes());
+      largeVarCharVector.set(0, "abcd".getBytes(StandardCharsets.UTF_8));
 
       List<ArrowBuf> bufs = largeVarCharVector.getFieldBuffers();
       assertEquals(3, bufs.size());
@@ -821,7 +822,7 @@ public void testGetTransferPairWithField() {
     try (BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 1000000, 1000000);
         LargeVarCharVector v1 = new LargeVarCharVector("v1", childAllocator1)) {
       v1.allocateNew();
-      v1.setSafe(4094, "hello world".getBytes(), 0, 11);
+      v1.setSafe(4094, "hello world".getBytes(StandardCharsets.UTF_8), 0, 11);
       v1.setValueCount(4001);
 
       TransferPair tp = v1.getTransferPair(v1.getField(), allocator);
@@ -835,7 +836,7 @@ public void testGetTransferPairWithField() {
   private void populateLargeVarcharVector(final LargeVarCharVector vector, int valueCount, String[] values) {
     for (int i = 0; i < valueCount; i += 3) {
       final String s = String.format("%010d", i);
-      vector.set(i, s.getBytes());
+      vector.set(i, s.getBytes(StandardCharsets.UTF_8));
       if (values != null) {
         values[i] = s;
       }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
index 278f497b47991..97f2d9fd6def1 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
@@ -107,9 +107,9 @@ public void testCopyFrom() throws Exception {
       Object result = outVector.getObject(0);
       ArrayList<Long> resultSet = (ArrayList<Long>) result;
       assertEquals(3, resultSet.size());
-      assertEquals(new Long(1), (Long) resultSet.get(0));
-      assertEquals(new Long(2), (Long) resultSet.get(1));
-      assertEquals(new Long(3), (Long) resultSet.get(2));
+      assertEquals(Long.valueOf(1), resultSet.get(0));
+      assertEquals(Long.valueOf(2), resultSet.get(1));
+      assertEquals(Long.valueOf(3), resultSet.get(2));
 
       /* index 1 */
       result = outVector.getObject(1);
@@ -148,7 +148,7 @@ public void testSetLastSetUsage() throws Exception {
       assertEquals(Integer.toString(-1), Integer.toString(listVector.getLastSet()));
 
       int index = 0;
-      int offset = 0;
+      int offset;
 
       /* write [10, 11, 12] to the list vector at index 0 */
       BitVectorHelper.setBit(validityBuffer, index);
@@ -227,36 +227,36 @@ public void testSetLastSetUsage() throws Exception {
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
       Long actual = dataVector.getObject(offset);
-      assertEquals(new Long(10), actual);
+      assertEquals(Long.valueOf(10), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(11), actual);
+      assertEquals(Long.valueOf(11), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(12), actual);
+      assertEquals(Long.valueOf(12), actual);
 
       index++;
       offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(13), actual);
+      assertEquals(Long.valueOf(13), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(14), actual);
+      assertEquals(Long.valueOf(14), actual);
 
       index++;
       offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(15), actual);
+      assertEquals(Long.valueOf(15), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(16), actual);
+      assertEquals(Long.valueOf(16), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(17), actual);
+      assertEquals(Long.valueOf(17), actual);
 
       index++;
       offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
@@ -328,8 +328,8 @@ public void testSplitAndTransfer() throws Exception {
       /* check the vector output */
 
       int index = 0;
-      int offset = 0;
-      Long actual = null;
+      int offset;
+      Long actual;
 
       /* index 0 */
       assertFalse(listVector.isNull(index));
@@ -337,13 +337,13 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(0), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(10), actual);
+      assertEquals(Long.valueOf(10), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(11), actual);
+      assertEquals(Long.valueOf(11), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(12), actual);
+      assertEquals(Long.valueOf(12), actual);
 
       /* index 1 */
       index++;
@@ -352,10 +352,10 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(3), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(13), actual);
+      assertEquals(Long.valueOf(13), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(14), actual);
+      assertEquals(Long.valueOf(14), actual);
 
       /* index 2 */
       index++;
@@ -364,16 +364,16 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(5), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(15), actual);
+      assertEquals(Long.valueOf(15), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(16), actual);
+      assertEquals(Long.valueOf(16), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(17), actual);
+      assertEquals(Long.valueOf(17), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(18), actual);
+      assertEquals(Long.valueOf(18), actual);
 
       /* index 3 */
       index++;
@@ -382,7 +382,7 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(9), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(19), actual);
+      assertEquals(Long.valueOf(19), actual);
 
       /* index 4 */
       index++;
@@ -391,16 +391,16 @@ public void testSplitAndTransfer() throws Exception {
       assertEquals(Integer.toString(10), Integer.toString(offset));
 
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(20), actual);
+      assertEquals(Long.valueOf(20), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(21), actual);
+      assertEquals(Long.valueOf(21), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(22), actual);
+      assertEquals(Long.valueOf(22), actual);
       offset++;
       actual = dataVector.getObject(offset);
-      assertEquals(new Long(23), actual);
+      assertEquals(Long.valueOf(23), actual);
 
       /* index 5 */
       index++;
@@ -527,15 +527,15 @@ public void testNestedListVector() throws Exception {
       assertEquals(4, resultSet.get(1).size()); /* size of second inner list */
 
       list = resultSet.get(0);
-      assertEquals(new Long(50), list.get(0));
-      assertEquals(new Long(100), list.get(1));
-      assertEquals(new Long(200), list.get(2));
+      assertEquals(Long.valueOf(50), list.get(0));
+      assertEquals(Long.valueOf(100), list.get(1));
+      assertEquals(Long.valueOf(200), list.get(2));
 
       list = resultSet.get(1);
-      assertEquals(new Long(75), list.get(0));
-      assertEquals(new Long(125), list.get(1));
-      assertEquals(new Long(150), list.get(2));
-      assertEquals(new Long(175), list.get(3));
+      assertEquals(Long.valueOf(75), list.get(0));
+      assertEquals(Long.valueOf(125), list.get(1));
+      assertEquals(Long.valueOf(150), list.get(2));
+      assertEquals(Long.valueOf(175), list.get(3));
 
       /* get listVector value at index 1 -- the value itself is a listvector */
       result = listVector.getObject(1);
@@ -547,16 +547,16 @@ public void testNestedListVector() throws Exception {
       assertEquals(3, resultSet.get(2).size()); /* size of third inner list */
 
       list = resultSet.get(0);
-      assertEquals(new Long(10), list.get(0));
+      assertEquals(Long.valueOf(10), list.get(0));
 
       list = resultSet.get(1);
-      assertEquals(new Long(15), list.get(0));
-      assertEquals(new Long(20), list.get(1));
+      assertEquals(Long.valueOf(15), list.get(0));
+      assertEquals(Long.valueOf(20), list.get(1));
 
       list = resultSet.get(2);
-      assertEquals(new Long(25), list.get(0));
-      assertEquals(new Long(30), list.get(1));
-      assertEquals(new Long(35), list.get(2));
+      assertEquals(Long.valueOf(25), list.get(0));
+      assertEquals(Long.valueOf(30), list.get(1));
+      assertEquals(Long.valueOf(35), list.get(2));
 
       /* check underlying bitVector */
       assertFalse(listVector.isNull(0));
@@ -661,13 +661,13 @@ public void testNestedListVector2() throws Exception {
       assertEquals(2, resultSet.get(1).size()); /* size of second inner list */
 
       list = resultSet.get(0);
-      assertEquals(new Long(50), list.get(0));
-      assertEquals(new Long(100), list.get(1));
-      assertEquals(new Long(200), list.get(2));
+      assertEquals(Long.valueOf(50), list.get(0));
+      assertEquals(Long.valueOf(100), list.get(1));
+      assertEquals(Long.valueOf(200), list.get(2));
 
       list = resultSet.get(1);
-      assertEquals(new Long(75), list.get(0));
-      assertEquals(new Long(125), list.get(1));
+      assertEquals(Long.valueOf(75), list.get(0));
+      assertEquals(Long.valueOf(125), list.get(1));
 
       /* get listVector value at index 1 -- the value itself is a listvector */
       result = listVector.getObject(1);
@@ -678,13 +678,13 @@ public void testNestedListVector2() throws Exception {
       assertEquals(3, resultSet.get(1).size()); /* size of second inner list */
 
       list = resultSet.get(0);
-      assertEquals(new Long(15), list.get(0));
-      assertEquals(new Long(20), list.get(1));
+      assertEquals(Long.valueOf(15), list.get(0));
+      assertEquals(Long.valueOf(20), list.get(1));
 
       list = resultSet.get(1);
-      assertEquals(new Long(25), list.get(0));
-      assertEquals(new Long(30), list.get(1));
-      assertEquals(new Long(35), list.get(2));
+      assertEquals(Long.valueOf(25), list.get(0));
+      assertEquals(Long.valueOf(30), list.get(1));
+      assertEquals(Long.valueOf(35), list.get(2));
 
       /* check underlying bitVector */
       assertFalse(listVector.isNull(0));
@@ -728,15 +728,15 @@ public void testGetBufferAddress() throws Exception {
       Object result = listVector.getObject(0);
       ArrayList<Long> resultSet = (ArrayList<Long>) result;
       assertEquals(3, resultSet.size());
-      assertEquals(new Long(50), resultSet.get(0));
-      assertEquals(new Long(100), resultSet.get(1));
-      assertEquals(new Long(200), resultSet.get(2));
+      assertEquals(Long.valueOf(50), resultSet.get(0));
+      assertEquals(Long.valueOf(100), resultSet.get(1));
+      assertEquals(Long.valueOf(200), resultSet.get(2));
 
       result = listVector.getObject(1);
       resultSet = (ArrayList<Long>) result;
       assertEquals(2, resultSet.size());
-      assertEquals(new Long(250), resultSet.get(0));
-      assertEquals(new Long(300), resultSet.get(1));
+      assertEquals(Long.valueOf(250), resultSet.get(0));
+      assertEquals(Long.valueOf(300), resultSet.get(1));
 
       List<ArrowBuf> buffers = listVector.getFieldBuffers();
 
@@ -744,7 +744,7 @@ public void testGetBufferAddress() throws Exception {
       long offsetAddress = listVector.getOffsetBufferAddress();
 
       try {
-        long dataAddress = listVector.getDataBufferAddress();
+        listVector.getDataBufferAddress();
       } catch (UnsupportedOperationException ue) {
         error = true;
       } finally {
@@ -777,7 +777,7 @@ public void testSetInitialCapacity() {
     try (final ListVector vector = ListVector.empty("", allocator)) {
       vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
 
-      /**
+      /*
        * use the default multiplier of 5,
        * 512 * 5 => 2560 * 4 => 10240 bytes => 16KB => 4096 value capacity.
        */
@@ -792,7 +792,7 @@ public void testSetInitialCapacity() {
       assertEquals(512, vector.getValueCapacity());
       assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4);
 
-      /**
+      /*
        * inner value capacity we pass to data vector is 512 * 0.1 => 51
        * For an int vector this is 204 bytes of memory for data buffer
        * and 7 bytes for validity buffer.
@@ -805,7 +805,7 @@ public void testSetInitialCapacity() {
       assertEquals(512, vector.getValueCapacity());
       assertTrue(vector.getDataVector().getValueCapacity() >= 51);
 
-      /**
+      /*
        * inner value capacity we pass to data vector is 512 * 0.01 => 5
        * For an int vector this is 20 bytes of memory for data buffer
        * and 1 byte for validity buffer.
@@ -818,7 +818,7 @@ public void testSetInitialCapacity() {
       assertEquals(512, vector.getValueCapacity());
       assertTrue(vector.getDataVector().getValueCapacity() >= 5);
 
-      /**
+      /*
        * inner value capacity we pass to data vector is 5 * 0.1 => 0
        * which is then rounded off to 1. So we pass value count as 1
        * to the inner int vector.
@@ -854,11 +854,11 @@ public void testClearAndReuse() {
 
       Object result = vector.getObject(0);
       ArrayList<Long> resultSet = (ArrayList<Long>) result;
-      assertEquals(new Long(7), resultSet.get(0));
+      assertEquals(Long.valueOf(7), resultSet.get(0));
 
       result = vector.getObject(1);
       resultSet = (ArrayList<Long>) result;
-      assertEquals(new Long(8), resultSet.get(0));
+      assertEquals(Long.valueOf(8), resultSet.get(0));
 
       // Clear and release the buffers to trigger a realloc when adding next value
       vector.clear();
@@ -874,11 +874,11 @@ public void testClearAndReuse() {
 
       result = vector.getObject(0);
       resultSet = (ArrayList<Long>) result;
-      assertEquals(new Long(7), resultSet.get(0));
+      assertEquals(Long.valueOf(7), resultSet.get(0));
 
       result = vector.getObject(1);
       resultSet = (ArrayList<Long>) result;
-      assertEquals(new Long(8), resultSet.get(0));
+      assertEquals(Long.valueOf(8), resultSet.get(0));
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
index 1db55198e4bb3..43f4c3b536fdc 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
@@ -335,8 +335,8 @@ public void testSplitAndTransfer() throws Exception {
 
       /* check the vector output */
       int index = 0;
-      int offset = 0;
-      Map<?, ?> result = null;
+      int offset;
+      Map<?, ?> result;
 
       /* index 0 */
       assertFalse(mapVector.isNull(index));
@@ -571,18 +571,18 @@ public void testMapWithListValue() throws Exception {
       assertEquals(1L, getResultKey(resultStruct));
       ArrayList<Long> list = (ArrayList<Long>) getResultValue(resultStruct);
       assertEquals(3, list.size()); // value is a list with 3 elements
-      assertEquals(new Long(50), list.get(0));
-      assertEquals(new Long(100), list.get(1));
-      assertEquals(new Long(200), list.get(2));
+      assertEquals(Long.valueOf(50), list.get(0));
+      assertEquals(Long.valueOf(100), list.get(1));
+      assertEquals(Long.valueOf(200), list.get(2));
 
       // Second Map entry
       resultStruct = (Map<?, ?>) resultSet.get(1);
       list = (ArrayList<Long>) getResultValue(resultStruct);
       assertEquals(4, list.size()); // value is a list with 4 elements
-      assertEquals(new Long(75), list.get(0));
-      assertEquals(new Long(125), list.get(1));
-      assertEquals(new Long(150), list.get(2));
-      assertEquals(new Long(175), list.get(3));
+      assertEquals(Long.valueOf(75), list.get(0));
+      assertEquals(Long.valueOf(125), list.get(1));
+      assertEquals(Long.valueOf(150), list.get(2));
+      assertEquals(Long.valueOf(175), list.get(3));
 
       // Get mapVector element at index 1
       result = mapVector.getObject(1);
@@ -593,24 +593,24 @@ public void testMapWithListValue() throws Exception {
       assertEquals(3L, getResultKey(resultStruct));
       list = (ArrayList<Long>) getResultValue(resultStruct);
       assertEquals(1, list.size()); // value is a list with 1 element
-      assertEquals(new Long(10), list.get(0));
+      assertEquals(Long.valueOf(10), list.get(0));
 
       // Second Map entry
       resultStruct = (Map<?, ?>) resultSet.get(1);
       assertEquals(4L, getResultKey(resultStruct));
       list = (ArrayList<Long>) getResultValue(resultStruct);
       assertEquals(2, list.size()); // value is a list with 1 element
-      assertEquals(new Long(15), list.get(0));
-      assertEquals(new Long(20), list.get(1));
+      assertEquals(Long.valueOf(15), list.get(0));
+      assertEquals(Long.valueOf(20), list.get(1));
 
       // Third Map entry
       resultStruct = (Map<?, ?>) resultSet.get(2);
       assertEquals(5L, getResultKey(resultStruct));
       list = (ArrayList<Long>) getResultValue(resultStruct);
       assertEquals(3, list.size()); // value is a list with 1 element
-      assertEquals(new Long(25), list.get(0));
-      assertEquals(new Long(30), list.get(1));
-      assertEquals(new Long(35), list.get(2));
+      assertEquals(Long.valueOf(25), list.get(0));
+      assertEquals(Long.valueOf(30), list.get(1));
+      assertEquals(Long.valueOf(35), list.get(2));
 
       /* check underlying bitVector */
       assertFalse(mapVector.isNull(0));
@@ -1012,8 +1012,8 @@ public void testMapWithMapKeyAndMapValue() throws Exception {
       final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
 
       /* mapVector has 2 entries at index 0 and 4 entries at index 1 */
-      assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH));
-      assertEquals(2, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH));
+      assertEquals(0, offsetBuffer.getInt(0));
+      assertEquals(2, offsetBuffer.getInt(MapVector.OFFSET_WIDTH));
       assertEquals(6, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH));
     }
   }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
index 716fa0bde454d..3580a321f01c9 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
@@ -22,6 +22,7 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import java.nio.charset.StandardCharsets;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -57,7 +58,7 @@ public void terminate() throws Exception {
   private void populateVarcharVector(final VarCharVector vector, int valueCount, String[] compareArray) {
     for (int i = 0; i < valueCount; i += 3) {
       final String s = String.format("%010d", i);
-      vector.set(i, s.getBytes());
+      vector.set(i, s.getBytes(StandardCharsets.UTF_8));
       if (compareArray != null) {
         compareArray[i] = s;
       }
@@ -86,7 +87,7 @@ public void test() throws Exception {
         for (int i = 0; i < length; i++) {
           final boolean expectedSet = ((start + i) % 3) == 0;
           if (expectedSet) {
-            final byte[] expectedValue = compareArray[start + i].getBytes();
+            final byte[] expectedValue = compareArray[start + i].getBytes(StandardCharsets.UTF_8);
             assertFalse(newVarCharVector.isNull(i));
             assertArrayEquals(expectedValue, newVarCharVector.get(i));
           } else {
@@ -141,7 +142,7 @@ public void testTransfer() {
       for (int i = 0; i < valueCount; i++) {
         final boolean expectedSet = (i % 3) == 0;
         if (expectedSet) {
-          final byte[] expectedValue = compareArray[i].getBytes();
+          final byte[] expectedValue = compareArray[i].getBytes(StandardCharsets.UTF_8);
           assertFalse(newVarCharVector.isNull(i));
           assertArrayEquals(expectedValue, newVarCharVector.get(i));
         } else {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
index b53171a597681..1b0387feb73ff 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
@@ -404,16 +404,16 @@ public void testGetFieldTypeInfo() throws Exception {
     assertTrue(vector.getField().equals(field));
 
     // Union has 2 child vectors
-    assertEquals(vector.size(), 2);
+    assertEquals(2, vector.size());
 
     // Check child field 0
     VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
-    assertEquals(intChild.ordinal, 0);
+    assertEquals(0, intChild.ordinal);
     assertEquals(intChild.vector.getField(), children.get(0));
 
     // Check child field 1
     VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
-    assertEquals(varcharChild.ordinal, 1);
+    assertEquals(1, varcharChild.ordinal);
     assertEquals(varcharChild.vector.getField(), children.get(1));
   }
 
@@ -455,7 +455,7 @@ public void testGetBufferAddress() throws Exception {
 
 
       try {
-        long offsetAddress = vector.getOffsetBufferAddress();
+        vector.getOffsetBufferAddress();
       } catch (UnsupportedOperationException ue) {
         error = true;
       } finally {
@@ -464,7 +464,7 @@ public void testGetBufferAddress() throws Exception {
       }
 
       try {
-        long dataAddress = vector.getDataBufferAddress();
+        vector.getDataBufferAddress();
       } catch (UnsupportedOperationException ue) {
         error = true;
       } finally {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index fb96870804441..614aff18d4554 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -83,7 +83,7 @@ public void init() {
     allocator = new RootAllocator(Long.MAX_VALUE);
   }
 
-  private static final Charset utf8Charset = Charset.forName("UTF-8");
+  private static final Charset utf8Charset = StandardCharsets.UTF_8;
   private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
   private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
   private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset);
@@ -127,10 +127,9 @@ public void testFixedType1() {
     try (final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator)) {
 
       boolean error = false;
-      int initialCapacity = 0;
 
       vector.allocateNew(1024);
-      initialCapacity = vector.getValueCapacity();
+      int initialCapacity = vector.getValueCapacity();
       assertTrue(initialCapacity >= 1024);
 
       // Put and set a few values
@@ -562,8 +561,6 @@ public void testNullableFixedType1() {
       assertEquals(103, vector.get(initialCapacity - 2));
       assertEquals(104, vector.get(initialCapacity - 1));
 
-      int val = 0;
-
       /* check unset bits/null values */
       for (int i = 2, j = 101; i <= 99 || j <= initialCapacity - 3; i++, j++) {
         if (i <= 99) {
@@ -606,8 +603,6 @@ public void testNullableFixedType1() {
       assertEquals(104, vector.get(initialCapacity - 1));
       assertEquals(10000, vector.get(initialCapacity));
 
-      val = 0;
-
       /* check unset bits/null values */
       for (int i = 2, j = 101; i < 99 || j < initialCapacity - 3; i++, j++) {
         if (i <= 99) {
@@ -735,7 +730,6 @@ public void testNullableFixedType2() {
   public void testNullableFixedType3() {
     // Create a new value vector for 1024 integers
     try (final IntVector vector = newVector(IntVector.class, EMPTY_SCHEMA_PATH, MinorType.INT, allocator)) {
-      boolean error = false;
       int initialCapacity = 1024;
 
       /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
@@ -765,7 +759,6 @@ public void testNullableFixedType3() {
       }
 
       vector.setValueCount(1024);
-      Field field = vector.getField();
 
       List<ArrowBuf> buffers = vector.getFieldBuffers();
 
@@ -1105,7 +1098,6 @@ public void testNullableVarType1() {
       assertEquals(txt, vector.getObject(7));
 
       // Ensure null value throws.
-      boolean b = false;
       assertNull(vector.get(8));
     }
   }
@@ -1182,18 +1174,18 @@ public void testGetBytesRepeatedly() {
 
       final String str = "hello world";
       final String str2 = "foo";
-      vector.setSafe(0, str.getBytes());
-      vector.setSafe(1, str2.getBytes());
+      vector.setSafe(0, str.getBytes(StandardCharsets.UTF_8));
+      vector.setSafe(1, str2.getBytes(StandardCharsets.UTF_8));
 
       // verify results
       ReusableByteArray reusableByteArray = new ReusableByteArray();
       vector.read(0, reusableByteArray);
-      assertArrayEquals(str.getBytes(), Arrays.copyOfRange(reusableByteArray.getBuffer(),
+      assertArrayEquals(str.getBytes(StandardCharsets.UTF_8), Arrays.copyOfRange(reusableByteArray.getBuffer(),
           0, (int) reusableByteArray.getLength()));
       byte[] oldBuffer = reusableByteArray.getBuffer();
 
       vector.read(1, reusableByteArray);
-      assertArrayEquals(str2.getBytes(), Arrays.copyOfRange(reusableByteArray.getBuffer(),
+      assertArrayEquals(str2.getBytes(StandardCharsets.UTF_8), Arrays.copyOfRange(reusableByteArray.getBuffer(),
           0, (int) reusableByteArray.getLength()));
 
       // There should not have been any reallocation since the newer value is smaller in length.
@@ -1219,7 +1211,6 @@ public void testGetBytesRepeatedly() {
   public void testReallocAfterVectorTransfer1() {
     try (final Float8Vector vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
       int initialCapacity = 4096;
-      boolean error = false;
 
       /* use the default capacity; 4096*8 => 32KB */
       vector.setInitialCapacity(initialCapacity);
@@ -1259,7 +1250,7 @@ public void testReallocAfterVectorTransfer1() {
       }
 
       /* this should trigger a realloc */
-      vector.setSafe(capacityAfterRealloc1, baseValue + (double) (capacityAfterRealloc1));
+      vector.setSafe(capacityAfterRealloc1, baseValue + (double) capacityAfterRealloc1);
       assertTrue(vector.getValueCapacity() >= initialCapacity * 4);
       int capacityAfterRealloc2 = vector.getValueCapacity();
 
@@ -1301,7 +1292,6 @@ public void testReallocAfterVectorTransfer1() {
   public void testReallocAfterVectorTransfer2() {
     try (final Float8Vector vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
       int initialCapacity = 4096;
-      boolean error = false;
 
       vector.allocateNew(initialCapacity);
       assertTrue(vector.getValueCapacity() >= initialCapacity);
@@ -1338,7 +1328,7 @@ public void testReallocAfterVectorTransfer2() {
       }
 
       /* this should trigger a realloc */
-      vector.setSafe(capacityAfterRealloc1, baseValue + (double) (capacityAfterRealloc1));
+      vector.setSafe(capacityAfterRealloc1, baseValue + (double) capacityAfterRealloc1);
       assertTrue(vector.getValueCapacity() >= initialCapacity * 4);
       int capacityAfterRealloc2 = vector.getValueCapacity();
 
@@ -1494,7 +1484,6 @@ public void testReallocAfterVectorTransfer4() {
       assertTrue(valueCapacity >= 4096);
 
       /* populate the vector */
-      int baseValue = 1000;
       for (int i = 0; i < valueCapacity; i++) {
         if ((i & 1) == 0) {
           vector.set(i, 1000 + i);
@@ -1649,7 +1638,7 @@ public void testFillEmptiesNotOverfill() {
       int initialCapacity = vector.getValueCapacity();
       assertTrue(initialCapacity >= 4095);
 
-      vector.setSafe(4094, "hello".getBytes(), 0, 5);
+      vector.setSafe(4094, "hello".getBytes(StandardCharsets.UTF_8), 0, 5);
       /* the above set method should NOT have triggered a realloc */
       assertEquals(initialCapacity, vector.getValueCapacity());
 
@@ -1663,7 +1652,7 @@ public void testFillEmptiesNotOverfill() {
   @Test
   public void testSetSafeWithArrowBufNoExcessAllocs() {
     final int numValues = BaseFixedWidthVector.INITIAL_VALUE_ALLOCATION * 2;
-    final byte[] valueBytes = "hello world".getBytes();
+    final byte[] valueBytes = "hello world".getBytes(StandardCharsets.UTF_8);
     final int valueBytesLength = valueBytes.length;
     final int isSet = 1;
 
@@ -1720,7 +1709,7 @@ public void testCopyFromWithNulls() {
         if (i % 3 == 0) {
           continue;
         }
-        byte[] b = Integer.toString(i).getBytes();
+        byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8);
         vector.setSafe(i, b, 0, b.length);
       }
 
@@ -1781,7 +1770,7 @@ public void testCopyFromWithNulls1() {
         if (i % 3 == 0) {
           continue;
         }
-        byte[] b = Integer.toString(i).getBytes();
+        byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8);
         vector.setSafe(i, b, 0, b.length);
       }
 
@@ -2137,7 +2126,7 @@ public void testGetBufferAddress2() {
       long dataAddress = vector.getDataBufferAddress();
 
       try {
-        long offsetAddress = vector.getOffsetBufferAddress();
+        vector.getOffsetBufferAddress();
       } catch (UnsupportedOperationException ue) {
         error = true;
       } finally {
@@ -2275,7 +2264,7 @@ public void testSetNullableVarCharHolder() {
 
       String str = "hello";
       ArrowBuf buf = allocator.buffer(16);
-      buf.setBytes(0, str.getBytes());
+      buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8));
 
       stringHolder.start = 0;
       stringHolder.end = str.length();
@@ -2286,7 +2275,7 @@ public void testSetNullableVarCharHolder() {
 
       // verify results
       assertTrue(vector.isNull(0));
-      assertEquals(str, new String(vector.get(1)));
+      assertEquals(str, new String(vector.get(1), StandardCharsets.UTF_8));
 
       buf.close();
     }
@@ -2305,7 +2294,7 @@ public void testSetNullableVarCharHolderSafe() {
 
       String str = "hello world";
       ArrowBuf buf = allocator.buffer(16);
-      buf.setBytes(0, str.getBytes());
+      buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8));
 
       stringHolder.start = 0;
       stringHolder.end = str.length();
@@ -2315,7 +2304,7 @@ public void testSetNullableVarCharHolderSafe() {
       vector.setSafe(1, nullHolder);
 
       // verify results
-      assertEquals(str, new String(vector.get(0)));
+      assertEquals(str, new String(vector.get(0), StandardCharsets.UTF_8));
       assertTrue(vector.isNull(1));
 
       buf.close();
@@ -2335,7 +2324,7 @@ public void testSetNullableVarBinaryHolder() {
 
       String str = "hello";
       ArrowBuf buf = allocator.buffer(16);
-      buf.setBytes(0, str.getBytes());
+      buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8));
 
       binHolder.start = 0;
       binHolder.end = str.length();
@@ -2346,7 +2335,7 @@ public void testSetNullableVarBinaryHolder() {
 
       // verify results
       assertTrue(vector.isNull(0));
-      assertEquals(str, new String(vector.get(1)));
+      assertEquals(str, new String(vector.get(1), StandardCharsets.UTF_8));
 
       buf.close();
     }
@@ -2365,7 +2354,7 @@ public void testSetNullableVarBinaryHolderSafe() {
 
       String str = "hello world";
       ArrowBuf buf = allocator.buffer(16);
-      buf.setBytes(0, str.getBytes());
+      buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8));
 
       binHolder.start = 0;
       binHolder.end = str.length();
@@ -2375,7 +2364,7 @@ public void testSetNullableVarBinaryHolderSafe() {
       vector.setSafe(1, nullHolder);
 
       // verify results
-      assertEquals(str, new String(vector.get(0)));
+      assertEquals(str, new String(vector.get(0), StandardCharsets.UTF_8));
       assertTrue(vector.isNull(1));
 
       buf.close();
@@ -2431,8 +2420,8 @@ public void testGetPointerVariableWidth() {
       for (int i = 0; i < sampleData.length; i++) {
         String str = sampleData[i];
         if (str != null) {
-          vec1.set(i, sampleData[i].getBytes());
-          vec2.set(i, sampleData[i].getBytes());
+          vec1.set(i, sampleData[i].getBytes(StandardCharsets.UTF_8));
+          vec2.set(i, sampleData[i].getBytes(StandardCharsets.UTF_8));
         } else {
           vec1.setNull(i);
           vec2.setNull(i);
@@ -2827,7 +2816,7 @@ public void testVariableWidthVectorNullHashCode() {
       varChVec.allocateNew(100, 1);
       varChVec.setValueCount(1);
 
-      varChVec.set(0, "abc".getBytes());
+      varChVec.set(0, "abc".getBytes(StandardCharsets.UTF_8));
       varChVec.setNull(0);
 
       assertEquals(0, varChVec.hashCode(0));
@@ -2945,7 +2934,7 @@ public void testUnloadVariableWidthVector() {
       varCharVector.allocateNew(5, 2);
       varCharVector.setValueCount(2);
 
-      varCharVector.set(0, "abcd".getBytes());
+      varCharVector.set(0, "abcd".getBytes(StandardCharsets.UTF_8));
 
       List<ArrowBuf> bufs = varCharVector.getFieldBuffers();
       assertEquals(3, bufs.size());
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java
index a9b155499f773..bfe489fa5af4e 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java
@@ -17,6 +17,8 @@
 
 package org.apache.arrow.vector;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.ListVector;
@@ -44,7 +46,7 @@ public void terminate() throws Exception {
 
   @Test
   public void testVarCharListWithNulls() {
-    byte[] bytes = "a".getBytes();
+    byte[] bytes = "a".getBytes(StandardCharsets.UTF_8);
     try (ListVector vector = new ListVector("VarList", allocator, FieldType.nullable(Types
             .MinorType.VARCHAR.getType()), null);
          ArrowBuf tempBuf = allocator.buffer(bytes.length)) {
@@ -63,15 +65,15 @@ public void testVarCharListWithNulls() {
 
       writer.setPosition(2);
       writer.startList();
-      bytes = "b".getBytes();
+      bytes = "b".getBytes(StandardCharsets.UTF_8);
       tempBuf.setBytes(0, bytes);
       writer.writeVarChar(0, bytes.length, tempBuf);
       writer.endList();
 
       writer.setValueCount(2);
 
-      Assert.assertTrue(vector.getValueCount() == 2);
-      Assert.assertTrue(vector.getDataVector().getValueCount() == 2);
+      Assert.assertEquals(2, vector.getValueCount());
+      Assert.assertEquals(2, vector.getDataVector().getValueCount());
     }
   }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
index dfc75ec8e34cf..b96f6ab6afedd 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
@@ -143,7 +143,7 @@ public void testFixedWidthVectorAllocation() {
       totalCapacity = vec2.getValidityBuffer().capacity() + vec2.getDataBuffer().capacity();
 
       // the total capacity must be a power of two
-      assertEquals(totalCapacity & (totalCapacity - 1), 0);
+      assertEquals(0, totalCapacity & (totalCapacity - 1));
     }
   }
 
@@ -163,7 +163,7 @@ public void testVariableWidthVectorAllocation() {
       totalCapacity = vec2.getValidityBuffer().capacity() + vec2.getOffsetBuffer().capacity();
 
       // the total capacity must be a power of two
-      assertEquals(totalCapacity & (totalCapacity - 1), 0);
+      assertEquals(0, totalCapacity & (totalCapacity - 1));
     }
   }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
index 7d5701ddb765b..9043bd4f8f2d4 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
@@ -324,12 +324,12 @@ public void testVariableRepeatedClearAndSet() throws Exception {
       vector.allocateNewSafe(); // Initial allocation
 
       vector.clear(); // clear vector.
-      vector.setSafe(0, "hello world".getBytes());
+      vector.setSafe(0, "hello world".getBytes(StandardCharsets.UTF_8));
       int savedValueCapacity = vector.getValueCapacity();
 
       for (int i = 0; i < 1024; ++i) {
         vector.clear(); // clear vector.
-        vector.setSafe(0, "hello world".getBytes());
+        vector.setSafe(0, "hello world".getBytes(StandardCharsets.UTF_8));
       }
 
       // should be deterministic, and not cause a run-away increase in capacity.
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java
index ce3fb2cdf0ea1..207962eb45b85 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java
@@ -61,7 +61,7 @@ public void testResetRowCount() {
       VectorSchemaRoot vsr = VectorSchemaRoot.of(vec1, vec2);
 
       vsr.allocateNew();
-      assertEquals(vsr.getRowCount(), 0);
+      assertEquals(0, vsr.getRowCount());
 
       for (int i = 0; i < size; i++) {
         vec1.setSafe(i, i % 2);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java
index c0a3bd89dc18c..62fa0336ea925 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java
@@ -20,7 +20,6 @@
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import java.nio.charset.Charset;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -52,11 +51,6 @@ public void init() {
     allocator = new RootAllocator(Long.MAX_VALUE);
   }
 
-  private static final Charset utf8Charset = Charset.forName("UTF-8");
-  private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
-  private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
-  private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset);
-
   @After
   public void terminate() throws Exception {
     allocator.close();
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java
index 4c8c96a0d74d3..b7fc681c16118 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java
@@ -24,6 +24,8 @@
 
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import java.util.Objects;
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
@@ -413,8 +415,8 @@ public void testPromoteLargeVarCharHelpersOnStruct() throws Exception {
       writer.end();
 
       final LargeVarCharVector uv = v.getChild("c", LargeVarCharVector.class);
-      assertEquals("foo", uv.getObject(0).toString());
-      assertEquals("foo2", uv.getObject(1).toString());
+      assertEquals("foo", Objects.requireNonNull(uv.getObject(0)).toString());
+      assertEquals("foo2", Objects.requireNonNull(uv.getObject(1)).toString());
     }
   }
 
@@ -433,8 +435,8 @@ public void testPromoteVarCharHelpersOnStruct() throws Exception {
       writer.end();
 
       final VarCharVector uv = v.getChild("c", VarCharVector.class);
-      assertEquals("foo", uv.getObject(0).toString());
-      assertEquals("foo2", uv.getObject(1).toString());
+      assertEquals("foo", Objects.requireNonNull(uv.getObject(0)).toString());
+      assertEquals("foo2", Objects.requireNonNull(uv.getObject(1)).toString());
     }
   }
 
@@ -455,8 +457,8 @@ public void testPromoteVarCharHelpersDirect() throws Exception {
       // The "test" vector in the parent container should have been replaced with a UnionVector.
       UnionVector promotedVector = container.getChild("test", UnionVector.class);
       VarCharVector vector = promotedVector.getVarCharVector();
-      assertEquals("foo", vector.getObject(0).toString());
-      assertEquals("foo2", vector.getObject(1).toString());
+      assertEquals("foo", Objects.requireNonNull(vector.getObject(0)).toString());
+      assertEquals("foo2", Objects.requireNonNull(vector.getObject(1)).toString());
     }
   }
 
@@ -477,8 +479,8 @@ public void testPromoteLargeVarCharHelpersDirect() throws Exception {
       // The "test" vector in the parent container should have been replaced with a UnionVector.
       UnionVector promotedVector = container.getChild("test", UnionVector.class);
       LargeVarCharVector vector = promotedVector.getLargeVarCharVector();
-      assertEquals("foo", vector.getObject(0).toString());
-      assertEquals("foo2", vector.getObject(1).toString());
+      assertEquals("foo", Objects.requireNonNull(vector.getObject(0)).toString());
+      assertEquals("foo2", Objects.requireNonNull(vector.getObject(1)).toString());
     }
   }
 
@@ -491,20 +493,22 @@ public void testPromoteVarBinaryHelpersOnStruct() throws Exception {
 
       writer.start();
       writer.setPosition(0);
-      writer.varBinary("c").writeVarBinary("row1".getBytes());
+      writer.varBinary("c").writeVarBinary("row1".getBytes(StandardCharsets.UTF_8));
       writer.setPosition(1);
-      writer.varBinary("c").writeVarBinary("row2".getBytes(), 0, "row2".getBytes().length);
+      writer.varBinary("c").writeVarBinary("row2".getBytes(StandardCharsets.UTF_8), 0,
+          "row2".getBytes(StandardCharsets.UTF_8).length);
       writer.setPosition(2);
-      writer.varBinary("c").writeVarBinary(ByteBuffer.wrap("row3".getBytes()));
+      writer.varBinary("c").writeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8)));
       writer.setPosition(3);
-      writer.varBinary("c").writeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length);
+      writer.varBinary("c").writeVarBinary(ByteBuffer.wrap("row4".getBytes(StandardCharsets.UTF_8)), 0,
+          "row4".getBytes(StandardCharsets.UTF_8).length);
       writer.end();
 
       final VarBinaryVector uv = v.getChild("c", VarBinaryVector.class);
-      assertEquals("row1", new String(uv.get(0)));
-      assertEquals("row2", new String(uv.get(1)));
-      assertEquals("row3", new String(uv.get(2)));
-      assertEquals("row4", new String(uv.get(3)));
+      assertEquals("row1", new String(Objects.requireNonNull(uv.get(0)), StandardCharsets.UTF_8));
+      assertEquals("row2", new String(Objects.requireNonNull(uv.get(1)), StandardCharsets.UTF_8));
+      assertEquals("row3", new String(Objects.requireNonNull(uv.get(2)), StandardCharsets.UTF_8));
+      assertEquals("row4", new String(Objects.requireNonNull(uv.get(3)), StandardCharsets.UTF_8));
     }
   }
 
@@ -517,22 +521,24 @@ public void testPromoteVarBinaryHelpersDirect() throws Exception {
 
       writer.start();
       writer.setPosition(0);
-      writer.writeVarBinary("row1".getBytes());
+      writer.writeVarBinary("row1".getBytes(StandardCharsets.UTF_8));
       writer.setPosition(1);
-      writer.writeVarBinary("row2".getBytes(), 0, "row2".getBytes().length);
+      writer.writeVarBinary("row2".getBytes(StandardCharsets.UTF_8), 0,
+          "row2".getBytes(StandardCharsets.UTF_8).length);
       writer.setPosition(2);
-      writer.writeVarBinary(ByteBuffer.wrap("row3".getBytes()));
+      writer.writeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8)));
       writer.setPosition(3);
-      writer.writeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length);
+      writer.writeVarBinary(ByteBuffer.wrap("row4".getBytes(StandardCharsets.UTF_8)), 0,
+          "row4".getBytes(StandardCharsets.UTF_8).length);
       writer.end();
 
       // The "test" vector in the parent container should have been replaced with a UnionVector.
       UnionVector promotedVector = container.getChild("test", UnionVector.class);
       VarBinaryVector uv = promotedVector.getVarBinaryVector();
-      assertEquals("row1", new String(uv.get(0)));
-      assertEquals("row2", new String(uv.get(1)));
-      assertEquals("row3", new String(uv.get(2)));
-      assertEquals("row4", new String(uv.get(3)));
+      assertEquals("row1", new String(Objects.requireNonNull(uv.get(0)), StandardCharsets.UTF_8));
+      assertEquals("row2", new String(Objects.requireNonNull(uv.get(1)), StandardCharsets.UTF_8));
+      assertEquals("row3", new String(Objects.requireNonNull(uv.get(2)), StandardCharsets.UTF_8));
+      assertEquals("row4", new String(Objects.requireNonNull(uv.get(3)), StandardCharsets.UTF_8));
     }
   }
 
@@ -545,20 +551,22 @@ public void testPromoteLargeVarBinaryHelpersOnStruct() throws Exception {
 
       writer.start();
       writer.setPosition(0);
-      writer.largeVarBinary("c").writeLargeVarBinary("row1".getBytes());
+      writer.largeVarBinary("c").writeLargeVarBinary("row1".getBytes(StandardCharsets.UTF_8));
       writer.setPosition(1);
-      writer.largeVarBinary("c").writeLargeVarBinary("row2".getBytes(), 0, "row2".getBytes().length);
+      writer.largeVarBinary("c").writeLargeVarBinary("row2".getBytes(StandardCharsets.UTF_8), 0,
+          "row2".getBytes(StandardCharsets.UTF_8).length);
       writer.setPosition(2);
-      writer.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes()));
+      writer.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8)));
       writer.setPosition(3);
-      writer.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length);
+      writer.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes(StandardCharsets.UTF_8)), 0,
+          "row4".getBytes(StandardCharsets.UTF_8).length);
       writer.end();
 
       final LargeVarBinaryVector uv = v.getChild("c", LargeVarBinaryVector.class);
-      assertEquals("row1", new String(uv.get(0)));
-      assertEquals("row2", new String(uv.get(1)));
-      assertEquals("row3", new String(uv.get(2)));
-      assertEquals("row4", new String(uv.get(3)));
+      assertEquals("row1", new String(Objects.requireNonNull(uv.get(0)), StandardCharsets.UTF_8));
+      assertEquals("row2", new String(Objects.requireNonNull(uv.get(1)), StandardCharsets.UTF_8));
+      assertEquals("row3", new String(Objects.requireNonNull(uv.get(2)), StandardCharsets.UTF_8));
+      assertEquals("row4", new String(Objects.requireNonNull(uv.get(3)), StandardCharsets.UTF_8));
     }
   }
 
@@ -571,22 +579,24 @@ public void testPromoteLargeVarBinaryHelpersDirect() throws Exception {
 
       writer.start();
       writer.setPosition(0);
-      writer.writeLargeVarBinary("row1".getBytes());
+      writer.writeLargeVarBinary("row1".getBytes(StandardCharsets.UTF_8));
       writer.setPosition(1);
-      writer.writeLargeVarBinary("row2".getBytes(), 0, "row2".getBytes().length);
+      writer.writeLargeVarBinary("row2".getBytes(StandardCharsets.UTF_8), 0,
+          "row2".getBytes(StandardCharsets.UTF_8).length);
       writer.setPosition(2);
-      writer.writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes()));
+      writer.writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8)));
       writer.setPosition(3);
-      writer.writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length);
+      writer.writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes(StandardCharsets.UTF_8)), 0,
+          "row4".getBytes(StandardCharsets.UTF_8).length);
       writer.end();
 
       // The "test" vector in the parent container should have been replaced with a UnionVector.
       UnionVector promotedVector = container.getChild("test", UnionVector.class);
       LargeVarBinaryVector uv = promotedVector.getLargeVarBinaryVector();
-      assertEquals("row1", new String(uv.get(0)));
-      assertEquals("row2", new String(uv.get(1)));
-      assertEquals("row3", new String(uv.get(2)));
-      assertEquals("row4", new String(uv.get(3)));
+      assertEquals("row1", new String(Objects.requireNonNull(uv.get(0)), StandardCharsets.UTF_8));
+      assertEquals("row2", new String(Objects.requireNonNull(uv.get(1)), StandardCharsets.UTF_8));
+      assertEquals("row3", new String(Objects.requireNonNull(uv.get(2)), StandardCharsets.UTF_8));
+      assertEquals("row4", new String(Objects.requireNonNull(uv.get(3)), StandardCharsets.UTF_8));
     }
   }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
index e03ce0c056bf1..19f0ea9d4e392 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
@@ -21,6 +21,7 @@
 
 import java.math.BigDecimal;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.time.LocalDateTime;
 import java.util.ArrayList;
 import java.util.HashSet;
@@ -128,9 +129,7 @@ public void simpleNestedTypes() {
   @Test
   public void transferPairSchemaChange() {
     SchemaChangeCallBack callBack1 = new SchemaChangeCallBack();
-    SchemaChangeCallBack callBack2 = new SchemaChangeCallBack();
     try (NonNullableStructVector parent = populateStructVector(callBack1)) {
-      TransferPair tp = parent.getTransferPair("newVector", allocator, callBack2);
 
       ComplexWriter writer = new ComplexWriterImpl("newWriter", parent);
       StructWriter rootWriter = writer.rootAsStruct();
@@ -818,7 +817,7 @@ public void promotableWriter() {
       for (int i = 100; i < 200; i++) {
         VarCharWriter varCharWriter = rootWriter.varChar("a");
         varCharWriter.setPosition(i);
-        byte[] bytes = Integer.toString(i).getBytes();
+        byte[] bytes = Integer.toString(i).getBytes(StandardCharsets.UTF_8);
         ArrowBuf tempBuf = allocator.buffer(bytes.length);
         tempBuf.setBytes(0, bytes);
         varCharWriter.writeVarChar(0, bytes.length, tempBuf);
@@ -1719,21 +1718,23 @@ public void structWriterVarBinaryHelpers() {
       StructWriter rootWriter = writer.rootAsStruct();
       rootWriter.start();
       rootWriter.setPosition(0);
-      rootWriter.varBinary("c").writeVarBinary("row1".getBytes());
+      rootWriter.varBinary("c").writeVarBinary("row1".getBytes(StandardCharsets.UTF_8));
       rootWriter.setPosition(1);
-      rootWriter.varBinary("c").writeVarBinary("row2".getBytes(), 0, "row2".getBytes().length);
+      rootWriter.varBinary("c").writeVarBinary("row2".getBytes(StandardCharsets.UTF_8), 0,
+          "row2".getBytes(StandardCharsets.UTF_8).length);
       rootWriter.setPosition(2);
-      rootWriter.varBinary("c").writeVarBinary(ByteBuffer.wrap("row3".getBytes()));
+      rootWriter.varBinary("c").writeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8)));
       rootWriter.setPosition(3);
-      rootWriter.varBinary("c").writeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length);
+      rootWriter.varBinary("c").writeVarBinary(ByteBuffer.wrap(
+          "row4".getBytes(StandardCharsets.UTF_8)), 0, "row4".getBytes(StandardCharsets.UTF_8).length);
       rootWriter.end();
 
       VarBinaryVector uv = parent.getChild("root", StructVector.class).getChild("c", VarBinaryVector.class);
 
-      assertEquals("row1", new String(uv.get(0)));
-      assertEquals("row2", new String(uv.get(1)));
-      assertEquals("row3", new String(uv.get(2)));
-      assertEquals("row4", new String(uv.get(3)));
+      assertEquals("row1", new String(uv.get(0), StandardCharsets.UTF_8));
+      assertEquals("row2", new String(uv.get(1), StandardCharsets.UTF_8));
+      assertEquals("row3", new String(uv.get(2), StandardCharsets.UTF_8));
+      assertEquals("row4", new String(uv.get(3), StandardCharsets.UTF_8));
     }
   }
 
@@ -1744,23 +1745,24 @@ public void structWriterLargeVarBinaryHelpers() {
       StructWriter rootWriter = writer.rootAsStruct();
       rootWriter.start();
       rootWriter.setPosition(0);
-      rootWriter.largeVarBinary("c").writeLargeVarBinary("row1".getBytes());
+      rootWriter.largeVarBinary("c").writeLargeVarBinary("row1".getBytes(StandardCharsets.UTF_8));
       rootWriter.setPosition(1);
-      rootWriter.largeVarBinary("c").writeLargeVarBinary("row2".getBytes(), 0, "row2".getBytes().length);
+      rootWriter.largeVarBinary("c").writeLargeVarBinary("row2".getBytes(StandardCharsets.UTF_8), 0,
+          "row2".getBytes(StandardCharsets.UTF_8).length);
       rootWriter.setPosition(2);
-      rootWriter.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes()));
+      rootWriter.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8)));
       rootWriter.setPosition(3);
-      rootWriter.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0,
-          "row4".getBytes().length);
+      rootWriter.largeVarBinary("c").writeLargeVarBinary(ByteBuffer.wrap(
+          "row4".getBytes(StandardCharsets.UTF_8)), 0, "row4".getBytes(StandardCharsets.UTF_8).length);
       rootWriter.end();
 
       LargeVarBinaryVector uv = parent.getChild("root", StructVector.class).getChild("c",
           LargeVarBinaryVector.class);
 
-      assertEquals("row1", new String(uv.get(0)));
-      assertEquals("row2", new String(uv.get(1)));
-      assertEquals("row3", new String(uv.get(2)));
-      assertEquals("row4", new String(uv.get(3)));
+      assertEquals("row1", new String(uv.get(0), StandardCharsets.UTF_8));
+      assertEquals("row2", new String(uv.get(1), StandardCharsets.UTF_8));
+      assertEquals("row3", new String(uv.get(2), StandardCharsets.UTF_8));
+      assertEquals("row4", new String(uv.get(3), StandardCharsets.UTF_8));
     }
   }
 
@@ -1800,16 +1802,18 @@ public void listVarBinaryHelpers() {
       listVector.allocateNew();
       UnionListWriter listWriter = new UnionListWriter(listVector);
       listWriter.startList();
-      listWriter.writeVarBinary("row1".getBytes());
-      listWriter.writeVarBinary("row2".getBytes(), 0, "row2".getBytes().length);
-      listWriter.writeVarBinary(ByteBuffer.wrap("row3".getBytes()));
-      listWriter.writeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length);
+      listWriter.writeVarBinary("row1".getBytes(StandardCharsets.UTF_8));
+      listWriter.writeVarBinary("row2".getBytes(StandardCharsets.UTF_8), 0,
+          "row2".getBytes(StandardCharsets.UTF_8).length);
+      listWriter.writeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8)));
+      listWriter.writeVarBinary(ByteBuffer.wrap(
+          "row4".getBytes(StandardCharsets.UTF_8)), 0, "row4".getBytes(StandardCharsets.UTF_8).length);
       listWriter.endList();
       listWriter.setValueCount(1);
-      assertEquals("row1", new String((byte[]) listVector.getObject(0).get(0)));
-      assertEquals("row2", new String((byte[]) listVector.getObject(0).get(1)));
-      assertEquals("row3", new String((byte[]) listVector.getObject(0).get(2)));
-      assertEquals("row4", new String((byte[]) listVector.getObject(0).get(3)));
+      assertEquals("row1", new String((byte[]) listVector.getObject(0).get(0), StandardCharsets.UTF_8));
+      assertEquals("row2", new String((byte[]) listVector.getObject(0).get(1), StandardCharsets.UTF_8));
+      assertEquals("row3", new String((byte[]) listVector.getObject(0).get(2), StandardCharsets.UTF_8));
+      assertEquals("row4", new String((byte[]) listVector.getObject(0).get(3), StandardCharsets.UTF_8));
     }
   }
 
@@ -1819,16 +1823,18 @@ public void listLargeVarBinaryHelpers() {
       listVector.allocateNew();
       UnionListWriter listWriter = new UnionListWriter(listVector);
       listWriter.startList();
-      listWriter.writeLargeVarBinary("row1".getBytes());
-      listWriter.writeLargeVarBinary("row2".getBytes(), 0, "row2".getBytes().length);
-      listWriter.writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes()));
-      listWriter.writeLargeVarBinary(ByteBuffer.wrap("row4".getBytes()), 0, "row4".getBytes().length);
+      listWriter.writeLargeVarBinary("row1".getBytes(StandardCharsets.UTF_8));
+      listWriter.writeLargeVarBinary("row2".getBytes(StandardCharsets.UTF_8), 0,
+          "row2".getBytes(StandardCharsets.UTF_8).length);
+      listWriter.writeLargeVarBinary(ByteBuffer.wrap("row3".getBytes(StandardCharsets.UTF_8)));
+      listWriter.writeLargeVarBinary(ByteBuffer.wrap(
+          "row4".getBytes(StandardCharsets.UTF_8)), 0, "row4".getBytes(StandardCharsets.UTF_8).length);
       listWriter.endList();
       listWriter.setValueCount(1);
-      assertEquals("row1", new String((byte[]) listVector.getObject(0).get(0)));
-      assertEquals("row2", new String((byte[]) listVector.getObject(0).get(1)));
-      assertEquals("row3", new String((byte[]) listVector.getObject(0).get(2)));
-      assertEquals("row4", new String((byte[]) listVector.getObject(0).get(3)));
+      assertEquals("row1", new String((byte[]) listVector.getObject(0).get(0), StandardCharsets.UTF_8));
+      assertEquals("row2", new String((byte[]) listVector.getObject(0).get(1), StandardCharsets.UTF_8));
+      assertEquals("row3", new String((byte[]) listVector.getObject(0).get(2), StandardCharsets.UTF_8));
+      assertEquals("row4", new String((byte[]) listVector.getObject(0).get(3), StandardCharsets.UTF_8));
     }
   }
 
@@ -1847,35 +1853,39 @@ public void unionWithVarCharAndBinaryHelpers() throws Exception {
       unionWriter.setPosition(3);
       unionWriter.writeLargeVarChar(new Text("row4"));
       unionWriter.setPosition(4);
-      unionWriter.writeVarBinary("row5".getBytes());
+      unionWriter.writeVarBinary("row5".getBytes(StandardCharsets.UTF_8));
       unionWriter.setPosition(5);
-      unionWriter.writeVarBinary("row6".getBytes(), 0, "row6".getBytes().length);
+      unionWriter.writeVarBinary("row6".getBytes(StandardCharsets.UTF_8), 0,
+          "row6".getBytes(StandardCharsets.UTF_8).length);
       unionWriter.setPosition(6);
-      unionWriter.writeVarBinary(ByteBuffer.wrap("row7".getBytes()));
+      unionWriter.writeVarBinary(ByteBuffer.wrap("row7".getBytes(StandardCharsets.UTF_8)));
       unionWriter.setPosition(7);
-      unionWriter.writeVarBinary(ByteBuffer.wrap("row8".getBytes()), 0, "row8".getBytes().length);
+      unionWriter.writeVarBinary(ByteBuffer.wrap("row8".getBytes(StandardCharsets.UTF_8)), 0,
+          "row8".getBytes(StandardCharsets.UTF_8).length);
       unionWriter.setPosition(8);
-      unionWriter.writeLargeVarBinary("row9".getBytes());
+      unionWriter.writeLargeVarBinary("row9".getBytes(StandardCharsets.UTF_8));
       unionWriter.setPosition(9);
-      unionWriter.writeLargeVarBinary("row10".getBytes(), 0, "row10".getBytes().length);
+      unionWriter.writeLargeVarBinary("row10".getBytes(StandardCharsets.UTF_8), 0,
+          "row10".getBytes(StandardCharsets.UTF_8).length);
       unionWriter.setPosition(10);
-      unionWriter.writeLargeVarBinary(ByteBuffer.wrap("row11".getBytes()));
+      unionWriter.writeLargeVarBinary(ByteBuffer.wrap("row11".getBytes(StandardCharsets.UTF_8)));
       unionWriter.setPosition(11);
-      unionWriter.writeLargeVarBinary(ByteBuffer.wrap("row12".getBytes()), 0, "row12".getBytes().length);
+      unionWriter.writeLargeVarBinary(ByteBuffer.wrap(
+          "row12".getBytes(StandardCharsets.UTF_8)), 0, "row12".getBytes(StandardCharsets.UTF_8).length);
       unionWriter.end();
 
-      assertEquals("row1", new String(vector.getVarCharVector().get(0)));
-      assertEquals("row2", new String(vector.getVarCharVector().get(1)));
-      assertEquals("row3", new String(vector.getLargeVarCharVector().get(2)));
-      assertEquals("row4", new String(vector.getLargeVarCharVector().get(3)));
-      assertEquals("row5", new String(vector.getVarBinaryVector().get(4)));
-      assertEquals("row6", new String(vector.getVarBinaryVector().get(5)));
-      assertEquals("row7", new String(vector.getVarBinaryVector().get(6)));
-      assertEquals("row8", new String(vector.getVarBinaryVector().get(7)));
-      assertEquals("row9", new String(vector.getLargeVarBinaryVector().get(8)));
-      assertEquals("row10", new String(vector.getLargeVarBinaryVector().get(9)));
-      assertEquals("row11", new String(vector.getLargeVarBinaryVector().get(10)));
-      assertEquals("row12", new String(vector.getLargeVarBinaryVector().get(11)));
+      assertEquals("row1", new String(vector.getVarCharVector().get(0), StandardCharsets.UTF_8));
+      assertEquals("row2", new String(vector.getVarCharVector().get(1), StandardCharsets.UTF_8));
+      assertEquals("row3", new String(vector.getLargeVarCharVector().get(2), StandardCharsets.UTF_8));
+      assertEquals("row4", new String(vector.getLargeVarCharVector().get(3), StandardCharsets.UTF_8));
+      assertEquals("row5", new String(vector.getVarBinaryVector().get(4), StandardCharsets.UTF_8));
+      assertEquals("row6", new String(vector.getVarBinaryVector().get(5), StandardCharsets.UTF_8));
+      assertEquals("row7", new String(vector.getVarBinaryVector().get(6), StandardCharsets.UTF_8));
+      assertEquals("row8", new String(vector.getVarBinaryVector().get(7), StandardCharsets.UTF_8));
+      assertEquals("row9", new String(vector.getLargeVarBinaryVector().get(8), StandardCharsets.UTF_8));
+      assertEquals("row10", new String(vector.getLargeVarBinaryVector().get(9), StandardCharsets.UTF_8));
+      assertEquals("row11", new String(vector.getLargeVarBinaryVector().get(10), StandardCharsets.UTF_8));
+      assertEquals("row12", new String(vector.getLargeVarBinaryVector().get(11), StandardCharsets.UTF_8));
     }
   }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
index 8663c0c49990d..de9187edb667e 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
@@ -694,19 +694,19 @@ protected void writeBatchData(ArrowWriter writer, IntVector vector, VectorSchema
   protected void validateBatchData(ArrowReader reader, IntVector vector) throws IOException {
     reader.loadNextBatch();
 
-    assertEquals(vector.getValueCount(), 5);
+    assertEquals(5, vector.getValueCount());
     assertTrue(vector.isNull(0));
-    assertEquals(vector.get(1), 1);
-    assertEquals(vector.get(2), 2);
+    assertEquals(1, vector.get(1));
+    assertEquals(2, vector.get(2));
     assertTrue(vector.isNull(3));
-    assertEquals(vector.get(4), 1);
+    assertEquals(1, vector.get(4));
 
     reader.loadNextBatch();
 
-    assertEquals(vector.getValueCount(), 3);
+    assertEquals(3, vector.getValueCount());
     assertTrue(vector.isNull(0));
-    assertEquals(vector.get(1), 1);
-    assertEquals(vector.get(2), 2);
+    assertEquals(1, vector.get(1));
+    assertEquals(2, vector.get(2));
   }
 
   protected VectorSchemaRoot writeMapData(BufferAllocator bufferAllocator) {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java
index 9348cd3a66708..145bdd588e945 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java
@@ -79,8 +79,8 @@ public void testStreamZeroLengthBatch() throws IOException {
       VectorSchemaRoot root = reader.getVectorSchemaRoot();
       IntVector vector = (IntVector) root.getFieldVectors().get(0);
       reader.loadNextBatch();
-      assertEquals(vector.getValueCount(), 0);
-      assertEquals(root.getRowCount(), 0);
+      assertEquals(0, vector.getValueCount());
+      assertEquals(0, root.getRowCount());
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
index 0aa49d9daa0da..bd5bd4feabbd4 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
@@ -476,7 +476,7 @@ public void testRoundtripEmptyVector() throws Exception {
           assertEquals(schema, readSchema);
           try (final VectorSchemaRoot data = reader.read()) {
             assertNotNull(data);
-            assertEquals(data.getRowCount(), 0);
+            assertEquals(0, data.getRowCount());
           }
           assertNull(reader.read());
         }
@@ -496,7 +496,7 @@ public void testRoundtripEmptyVector() throws Exception {
           assertEquals(schema, readSchema);
           try (final VectorSchemaRoot data = reader.read()) {
             assertNotNull(data);
-            assertEquals(data.getRowCount(), 0);
+            assertEquals(0, data.getRowCount());
           }
           assertNull(reader.read());
         }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java
index 6aa7a0c6df5c3..ac95121eb73f2 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java
@@ -27,6 +27,7 @@
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.nio.channels.Channels;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Map;
@@ -138,7 +139,7 @@ private void readData(
       VarCharVector dictVector = (VarCharVector) dictionary.getVector();
       assertEquals(expectedDictItems.length, dictVector.getValueCount());
       for (int i = 0; i < dictVector.getValueCount(); i++) {
-        assertArrayEquals(expectedDictItems[i].getBytes(), dictVector.get(i));
+        assertArrayEquals(expectedDictItems[i].getBytes(StandardCharsets.UTF_8), dictVector.get(i));
       }
     }
   }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java
index ee5361547a0b9..0505a18484b54 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java
@@ -30,7 +30,7 @@ public void getMessageLength_returnsConstructValue() {
     // This API is used by spark.
     MessageMetadataResult result = new MessageMetadataResult(1, ByteBuffer.allocate(0),
         new org.apache.arrow.flatbuf.Message());
-    assertEquals(result.getMessageLength(), 1);
+    assertEquals(1, result.getMessageLength());
   }
 
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/table/BaseTableTest.java b/java/vector/src/test/java/org/apache/arrow/vector/table/BaseTableTest.java
index 78f2ee51b8912..1b7f984992ada 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/table/BaseTableTest.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/table/BaseTableTest.java
@@ -28,8 +28,10 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Objects;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@@ -282,8 +284,8 @@ void testDecode() {
 
     VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator);
     dictionaryVector.allocateNew(2);
-    dictionaryVector.set(0, "one".getBytes());
-    dictionaryVector.set(1, "two".getBytes());
+    dictionaryVector.set(0, "one".getBytes(StandardCharsets.UTF_8));
+    dictionaryVector.set(1, "two".getBytes(StandardCharsets.UTF_8));
     dictionaryVector.setValueCount(2);
     Dictionary dictionary =
         new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
@@ -297,8 +299,8 @@ void testDecode() {
     try (Table t = new Table(vectorList, vectorList.get(0).getValueCount(), provider)) {
       VarCharVector v = (VarCharVector) t.decode(encoded.getName(), 1L);
       assertNotNull(v);
-      assertEquals("one", new String(v.get(0)));
-      assertEquals("two", new String(v.get(1)));
+      assertEquals("one", new String(Objects.requireNonNull(v.get(0)), StandardCharsets.UTF_8));
+      assertEquals("two", new String(Objects.requireNonNull(v.get(1)), StandardCharsets.UTF_8));
     }
   }
 
@@ -319,8 +321,8 @@ private DictionaryProvider getDictionary() {
 
     VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator);
     dictionaryVector.allocateNew(2);
-    dictionaryVector.set(0, "one".getBytes());
-    dictionaryVector.set(1, "two".getBytes());
+    dictionaryVector.set(0, "one".getBytes(StandardCharsets.UTF_8));
+    dictionaryVector.set(1, "two".getBytes(StandardCharsets.UTF_8));
     dictionaryVector.setValueCount(2);
 
     Dictionary dictionary = new Dictionary(dictionaryVector, encoding);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/table/RowTest.java b/java/vector/src/test/java/org/apache/arrow/vector/table/RowTest.java
index eb50e866b19f0..3e6a096104d44 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/table/RowTest.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/table/RowTest.java
@@ -650,8 +650,8 @@ void getVarChar() {
       c.setPosition(1);
       assertEquals(c.getVarCharObj(1), "two");
       assertEquals(c.getVarCharObj(1), c.getVarCharObj(VARCHAR_VECTOR_NAME_1));
-      assertArrayEquals("two".getBytes(), c.getVarChar(VARCHAR_VECTOR_NAME_1));
-      assertArrayEquals("two".getBytes(), c.getVarChar(1));
+      assertArrayEquals("two".getBytes(StandardCharsets.UTF_8), c.getVarChar(VARCHAR_VECTOR_NAME_1));
+      assertArrayEquals("two".getBytes(StandardCharsets.UTF_8), c.getVarChar(1));
     }
   }
 
@@ -661,7 +661,7 @@ void getVarBinary() {
     try (Table t = new Table(vectorList)) {
       Row c = t.immutableRow();
       c.setPosition(1);
-      assertArrayEquals(c.getVarBinary(1), "two".getBytes());
+      assertArrayEquals(c.getVarBinary(1), "two".getBytes(StandardCharsets.UTF_8));
       assertArrayEquals(c.getVarBinary(1), c.getVarBinary(VARBINARY_VECTOR_NAME_1));
     }
   }
@@ -672,7 +672,7 @@ void getLargeVarBinary() {
     try (Table t = new Table(vectorList)) {
       Row c = t.immutableRow();
       c.setPosition(1);
-      assertArrayEquals(c.getLargeVarBinary(1), "two".getBytes());
+      assertArrayEquals(c.getLargeVarBinary(1), "two".getBytes(StandardCharsets.UTF_8));
       assertArrayEquals(c.getLargeVarBinary(1), c.getLargeVarBinary(VARBINARY_VECTOR_NAME_1));
     }
   }
@@ -685,8 +685,8 @@ void getLargeVarChar() {
       c.setPosition(1);
       assertEquals(c.getLargeVarCharObj(1), "two");
       assertEquals(c.getLargeVarCharObj(1), c.getLargeVarCharObj(VARCHAR_VECTOR_NAME_1));
-      assertArrayEquals("two".getBytes(), c.getLargeVarChar(VARCHAR_VECTOR_NAME_1));
-      assertArrayEquals("two".getBytes(), c.getLargeVarChar(1));
+      assertArrayEquals("two".getBytes(StandardCharsets.UTF_8), c.getLargeVarChar(VARCHAR_VECTOR_NAME_1));
+      assertArrayEquals("two".getBytes(StandardCharsets.UTF_8), c.getLargeVarChar(1));
     }
   }
 
@@ -696,7 +696,7 @@ void getFixedBinary() {
     try (Table t = new Table(vectorList)) {
       Row c = t.immutableRow();
       c.setPosition(1);
-      assertArrayEquals(c.getFixedSizeBinary(1), "two".getBytes());
+      assertArrayEquals(c.getFixedSizeBinary(1), "two".getBytes(StandardCharsets.UTF_8));
       assertArrayEquals(c.getFixedSizeBinary(1), c.getFixedSizeBinary(FIXEDBINARY_VECTOR_NAME_1));
     }
   }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/table/TestUtils.java b/java/vector/src/test/java/org/apache/arrow/vector/table/TestUtils.java
index cb0b7b8eb6b87..c0b3bfdf73220 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/table/TestUtils.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/table/TestUtils.java
@@ -20,6 +20,7 @@
 import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH;
 
 import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -117,8 +118,8 @@ static List<FieldVector> intPlusVarcharColumns(BufferAllocator allocator) {
     IntVector v1 = getSimpleIntVector(allocator);
     VarCharVector v2 = new VarCharVector(VARCHAR_VECTOR_NAME_1, allocator);
     v2.allocateNew(2);
-    v2.set(0, "one".getBytes());
-    v2.set(1, "two".getBytes());
+    v2.set(0, "one".getBytes(StandardCharsets.UTF_8));
+    v2.set(1, "two".getBytes(StandardCharsets.UTF_8));
     v2.setValueCount(2);
     vectorList.add(v1);
     vectorList.add(v2);
@@ -134,8 +135,8 @@ static List<FieldVector> intPlusLargeVarcharColumns(BufferAllocator allocator) {
     IntVector v1 = getSimpleIntVector(allocator);
     LargeVarCharVector v2 = new LargeVarCharVector(VARCHAR_VECTOR_NAME_1, allocator);
     v2.allocateNew(2);
-    v2.set(0, "one".getBytes());
-    v2.set(1, "two".getBytes());
+    v2.set(0, "one".getBytes(StandardCharsets.UTF_8));
+    v2.set(1, "two".getBytes(StandardCharsets.UTF_8));
     v2.setValueCount(2);
     vectorList.add(v1);
     vectorList.add(v2);
@@ -152,8 +153,8 @@ static List<FieldVector> intPlusVarBinaryColumns(BufferAllocator allocator) {
     IntVector v1 = getSimpleIntVector(allocator);
     VarBinaryVector v2 = new VarBinaryVector(VARBINARY_VECTOR_NAME_1, allocator);
     v2.allocateNew(2);
-    v2.set(0, "one".getBytes());
-    v2.set(1, "two".getBytes());
+    v2.set(0, "one".getBytes(StandardCharsets.UTF_8));
+    v2.set(1, "two".getBytes(StandardCharsets.UTF_8));
     v2.setValueCount(2);
     vectorList.add(v1);
     vectorList.add(v2);
@@ -170,8 +171,8 @@ static List<FieldVector> intPlusLargeVarBinaryColumns(BufferAllocator allocator)
     IntVector v1 = getSimpleIntVector(allocator);
     LargeVarBinaryVector v2 = new LargeVarBinaryVector(VARBINARY_VECTOR_NAME_1, allocator);
     v2.allocateNew(2);
-    v2.set(0, "one".getBytes());
-    v2.set(1, "two".getBytes());
+    v2.set(0, "one".getBytes(StandardCharsets.UTF_8));
+    v2.set(1, "two".getBytes(StandardCharsets.UTF_8));
     v2.setValueCount(2);
     vectorList.add(v1);
     vectorList.add(v2);
@@ -188,8 +189,8 @@ static List<FieldVector> intPlusFixedBinaryColumns(BufferAllocator allocator) {
     IntVector v1 = getSimpleIntVector(allocator);
     FixedSizeBinaryVector v2 = new FixedSizeBinaryVector(FIXEDBINARY_VECTOR_NAME_1, allocator, 3);
     v2.allocateNew(2);
-    v2.set(0, "one".getBytes());
-    v2.set(1, "two".getBytes());
+    v2.set(0, "one".getBytes(StandardCharsets.UTF_8));
+    v2.set(1, "two".getBytes(StandardCharsets.UTF_8));
     v2.setValueCount(2);
     vectorList.add(v1);
     vectorList.add(v2);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java
index 74257c45ca887..3c075c9293079 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java
@@ -20,6 +20,8 @@
 import static junit.framework.TestCase.assertTrue;
 import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.BigIntVector;
@@ -204,13 +206,14 @@ public void testPopulateFixedSizeBinaryVector() {
         if (i % 2 == 0) {
           vector1.setNull(i);
         } else {
-          vector1.set(i, ("test" + i).getBytes());
+          vector1.set(i, ("test" + i).getBytes(StandardCharsets.UTF_8));
         }
       }
       vector1.setValueCount(10);
 
-      setVector(vector2, null, "test1".getBytes(), null, "test3".getBytes(), null, "test5".getBytes(), null,
-          "test7".getBytes(), null, "test9".getBytes());
+      setVector(vector2, null, "test1".getBytes(StandardCharsets.UTF_8), null,
+          "test3".getBytes(StandardCharsets.UTF_8), null, "test5".getBytes(StandardCharsets.UTF_8), null,
+          "test7".getBytes(StandardCharsets.UTF_8), null, "test9".getBytes(StandardCharsets.UTF_8));
       assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
     }
   }
@@ -571,13 +574,14 @@ public void testPopulateVarBinaryVector() {
         if (i % 2 == 0) {
           vector1.setNull(i);
         } else {
-          vector1.set(i, ("test" + i).getBytes());
+          vector1.set(i, ("test" + i).getBytes(StandardCharsets.UTF_8));
         }
       }
       vector1.setValueCount(10);
 
-      setVector(vector2, null, "test1".getBytes(), null, "test3".getBytes(), null, "test5".getBytes(), null,
-          "test7".getBytes(), null, "test9".getBytes());
+      setVector(vector2, null, "test1".getBytes(StandardCharsets.UTF_8), null,
+          "test3".getBytes(StandardCharsets.UTF_8), null, "test5".getBytes(StandardCharsets.UTF_8), null,
+          "test7".getBytes(StandardCharsets.UTF_8), null, "test9".getBytes(StandardCharsets.UTF_8));
       assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
     }
   }
@@ -592,7 +596,7 @@ public void testPopulateVarCharVector() {
         if (i % 2 == 0) {
           vector1.setNull(i);
         } else {
-          vector1.set(i, ("test" + i).getBytes());
+          vector1.set(i, ("test" + i).getBytes(StandardCharsets.UTF_8));
         }
       }
       vector1.setValueCount(10);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
index 084350410a4f5..872b2f3934b07 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
@@ -221,7 +221,7 @@ public void roundtripLocation() throws IOException {
 
         final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0);
         Assert.assertTrue(deserialized instanceof LocationVector);
-        Assert.assertEquals(deserialized.getName(), "location");
+        Assert.assertEquals("location", deserialized.getName());
         StructVector deserStruct = (StructVector) deserialized.getUnderlyingVector();
         Assert.assertNotNull(deserStruct.getChild("Latitude"));
         Assert.assertNotNull(deserStruct.getChild("Longitude"));
@@ -273,7 +273,7 @@ public void testVectorCompare() {
 
       // Test out vector appender
       VectorBatchAppender.batchAppend(a1, a2, bb);
-      assertEquals(a1.getValueCount(), 6);
+      assertEquals(6, a1.getValueCount());
       validateVisitor.visit(a1, null);
     }
   }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java
index 419872225e16f..1c8281c85981b 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java
@@ -20,6 +20,8 @@
 import static junit.framework.TestCase.assertNull;
 import static org.junit.Assert.assertEquals;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.memory.util.ArrowBufPointer;
@@ -98,7 +100,7 @@ public void testIterateVarCharVector() {
         if (i == 0) {
           strVector.setNull(i);
         } else {
-          strVector.set(i, String.valueOf(i).getBytes());
+          strVector.set(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
         }
       }
 
@@ -125,7 +127,7 @@ public void testIterateVarCharVector() {
           assertEquals(expected.length(), pt.getLength());
 
           pt.getBuf().getBytes(pt.getOffset(), actual);
-          assertEquals(expected, new String(actual));
+          assertEquals(expected, new String(actual, StandardCharsets.UTF_8));
         }
         index += 1;
       }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java
index b11aa5638d651..f562e63b4bf8d 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java
@@ -23,6 +23,7 @@
 import static org.junit.Assert.assertSame;
 import static org.junit.Assert.assertTrue;
 
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.Base64;
 
@@ -54,25 +55,27 @@ public void testSetByteArrayRepeatedly() {
     ReusableByteArray byteArray = new ReusableByteArray();
     try (ArrowBuf workingBuf = allocator.buffer(100)) {
       final String str = "test";
-      workingBuf.setBytes(0, str.getBytes());
-      byteArray.set(workingBuf, 0, str.getBytes().length);
-      assertEquals(str.getBytes().length, byteArray.getLength());
-      assertArrayEquals(str.getBytes(), Arrays.copyOfRange(byteArray.getBuffer(), 0, (int) byteArray.getLength()));
-      assertEquals(Base64.getEncoder().encodeToString(str.getBytes()), byteArray.toString());
-      assertEquals(new ReusableByteArray(str.getBytes()), byteArray);
-      assertEquals(new ReusableByteArray(str.getBytes()).hashCode(), byteArray.hashCode());
+      workingBuf.setBytes(0, str.getBytes(StandardCharsets.UTF_8));
+      byteArray.set(workingBuf, 0, str.getBytes(StandardCharsets.UTF_8).length);
+      assertEquals(str.getBytes(StandardCharsets.UTF_8).length, byteArray.getLength());
+      assertArrayEquals(str.getBytes(StandardCharsets.UTF_8), Arrays.copyOfRange(byteArray.getBuffer(), 0,
+          (int) byteArray.getLength()));
+      assertEquals(Base64.getEncoder().encodeToString(str.getBytes(StandardCharsets.UTF_8)), byteArray.toString());
+      assertEquals(new ReusableByteArray(str.getBytes(StandardCharsets.UTF_8)), byteArray);
+      assertEquals(new ReusableByteArray(str.getBytes(StandardCharsets.UTF_8)).hashCode(), byteArray.hashCode());
 
       // Test a longer string. Should require reallocation.
       final String str2 = "test_longer";
       byte[] oldBuffer = byteArray.getBuffer();
       workingBuf.clear();
-      workingBuf.setBytes(0, str2.getBytes());
-      byteArray.set(workingBuf, 0, str2.getBytes().length);
-      assertEquals(str2.getBytes().length, byteArray.getLength());
-      assertArrayEquals(str2.getBytes(), Arrays.copyOfRange(byteArray.getBuffer(), 0, (int) byteArray.getLength()));
-      assertEquals(Base64.getEncoder().encodeToString(str2.getBytes()), byteArray.toString());
-      assertEquals(new ReusableByteArray(str2.getBytes()), byteArray);
-      assertEquals(new ReusableByteArray(str2.getBytes()).hashCode(), byteArray.hashCode());
+      workingBuf.setBytes(0, str2.getBytes(StandardCharsets.UTF_8));
+      byteArray.set(workingBuf, 0, str2.getBytes(StandardCharsets.UTF_8).length);
+      assertEquals(str2.getBytes(StandardCharsets.UTF_8).length, byteArray.getLength());
+      assertArrayEquals(str2.getBytes(StandardCharsets.UTF_8), Arrays.copyOfRange(byteArray.getBuffer(), 0,
+          (int) byteArray.getLength()));
+      assertEquals(Base64.getEncoder().encodeToString(str2.getBytes(StandardCharsets.UTF_8)), byteArray.toString());
+      assertEquals(new ReusableByteArray(str2.getBytes(StandardCharsets.UTF_8)), byteArray);
+      assertEquals(new ReusableByteArray(str2.getBytes(StandardCharsets.UTF_8)).hashCode(), byteArray.hashCode());
 
       // Verify reallocation needed.
       assertNotSame(oldBuffer, byteArray.getBuffer());
@@ -82,13 +85,14 @@ public void testSetByteArrayRepeatedly() {
       final String str3 = "short";
       oldBuffer = byteArray.getBuffer();
       workingBuf.clear();
-      workingBuf.setBytes(0, str3.getBytes());
-      byteArray.set(workingBuf, 0, str3.getBytes().length);
-      assertEquals(str3.getBytes().length, byteArray.getLength());
-      assertArrayEquals(str3.getBytes(), Arrays.copyOfRange(byteArray.getBuffer(), 0, (int) byteArray.getLength()));
-      assertEquals(Base64.getEncoder().encodeToString(str3.getBytes()), byteArray.toString());
-      assertEquals(new ReusableByteArray(str3.getBytes()), byteArray);
-      assertEquals(new ReusableByteArray(str3.getBytes()).hashCode(), byteArray.hashCode());
+      workingBuf.setBytes(0, str3.getBytes(StandardCharsets.UTF_8));
+      byteArray.set(workingBuf, 0, str3.getBytes(StandardCharsets.UTF_8).length);
+      assertEquals(str3.getBytes(StandardCharsets.UTF_8).length, byteArray.getLength());
+      assertArrayEquals(str3.getBytes(StandardCharsets.UTF_8), Arrays.copyOfRange(byteArray.getBuffer(), 0,
+          (int) byteArray.getLength()));
+      assertEquals(Base64.getEncoder().encodeToString(str3.getBytes(StandardCharsets.UTF_8)), byteArray.toString());
+      assertEquals(new ReusableByteArray(str3.getBytes(StandardCharsets.UTF_8)), byteArray);
+      assertEquals(new ReusableByteArray(str3.getBytes(StandardCharsets.UTF_8)).hashCode(), byteArray.hashCode());
 
       // Verify reallocation was not needed.
       assertSame(oldBuffer, byteArray.getBuffer());
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
index ab36ea2fd2129..93e7535947536 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
@@ -437,8 +437,6 @@ public void testAppendStructVector() {
       delta.accept(appender, null);
 
       assertEquals(length1 + length2, target.getValueCount());
-      IntVector child1 = (IntVector) target.getVectorById(0);
-      VarCharVector child2 = (VarCharVector) target.getVectorById(1);
 
       try (IntVector expected1 = new IntVector("expected1", allocator);
            VarCharVector expected2 = new VarCharVector("expected2", allocator)) {

From 3fe598ae4dfd7805ab05452dd5ed4b0d6c97d8d5 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 23 Jan 2024 18:31:40 +0100
Subject: [PATCH 260/570] GH-39666: [C++] Ensure CSV and JSON benchmarks
 present a bytes/s or items/s metric (#39764)

### Rationale for this change

Some of our microbenchmarks only present an iteration time in (nano,micro...)seconds. That is usually tedious to read and difficult to interpret.

### What changes are included in this PR?

Ensure that benchmarks present a items/seconds and/or a bytes/seconds metric where that makes sense.

### Are these changes tested?

Manually.

### Are there any user-facing changes?

No.
* Closes: #39666

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/writer_benchmark.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/csv/writer_benchmark.cc b/cpp/src/arrow/csv/writer_benchmark.cc
index 54c0f50613754..9baa00d48a6d2 100644
--- a/cpp/src/arrow/csv/writer_benchmark.cc
+++ b/cpp/src/arrow/csv/writer_benchmark.cc
@@ -97,7 +97,7 @@ void BenchmarkWriteCsv(benchmark::State& state, const WriteOptions& options,
                        const RecordBatch& batch) {
   int64_t total_size = 0;
 
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     auto out = io::BufferOutputStream::Create().ValueOrDie();
     ABORT_NOT_OK(WriteCSV(batch, options, out.get()));
     auto buffer = out->Finish().ValueOrDie();
@@ -106,6 +106,7 @@ void BenchmarkWriteCsv(benchmark::State& state, const WriteOptions& options,
 
   // byte size of the generated csv dataset
   state.SetBytesProcessed(total_size);
+  state.SetItemsProcessed(state.iterations() * batch.num_columns() * batch.num_rows());
   state.counters["null_percent"] = static_cast<double>(state.range(0));
 }
 

From df83e50cdbc956846476a1dbcd5f09ef7058ed58 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 24 Jan 2024 11:22:51 +0100
Subject: [PATCH 261/570] GH-39667: [C++] Ensure dataset benchmarks present a
 bytes/s or items/s metric (#39766)

### Rationale for this change

Some of our microbenchmarks only present an iteration time in (nano,micro...)seconds. That is usually tedious to read and difficult to interpret.

### What changes are included in this PR?

Ensure that benchmarks present a items/seconds and/or a bytes/seconds metric where that makes sense.

### Are these changes tested?

Manually.

### Are there any user-facing changes?

No.
* Closes: #39667

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/dataset/file_benchmark.cc | 27 +++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_benchmark.cc b/cpp/src/arrow/dataset/file_benchmark.cc
index 8953cbd110643..f687392d13601 100644
--- a/cpp/src/arrow/dataset/file_benchmark.cc
+++ b/cpp/src/arrow/dataset/file_benchmark.cc
@@ -30,7 +30,12 @@
 namespace arrow {
 namespace dataset {
 
-static std::shared_ptr<Dataset> GetDataset() {
+struct SampleDataset {
+  std::shared_ptr<Dataset> dataset;
+  int64_t num_fragments;
+};
+
+static SampleDataset GetDataset() {
   std::vector<fs::FileInfo> files;
   std::vector<std::string> paths;
   for (int a = 0; a < 100; a++) {
@@ -50,25 +55,35 @@ static std::shared_ptr<Dataset> GetDataset() {
   FinishOptions finish_options;
   finish_options.inspect_options.fragments = 0;
   EXPECT_OK_AND_ASSIGN(auto dataset, factory->Finish(finish_options));
-  return dataset;
+  return {dataset, static_cast<int64_t>(paths.size())};
 }
 
 // A benchmark of filtering fragments in a dataset.
 static void GetAllFragments(benchmark::State& state) {
   auto dataset = GetDataset();
   for (auto _ : state) {
-    ASSERT_OK_AND_ASSIGN(auto fragments, dataset->GetFragments());
+    ASSERT_OK_AND_ASSIGN(auto fragments, dataset.dataset->GetFragments());
     ABORT_NOT_OK(fragments.Visit([](std::shared_ptr<Fragment>) { return Status::OK(); }));
   }
+  state.SetItemsProcessed(state.iterations() * dataset.num_fragments);
+  state.counters["num_fragments"] = dataset.num_fragments;
 }
 
 static void GetFilteredFragments(benchmark::State& state, compute::Expression filter) {
   auto dataset = GetDataset();
-  ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*dataset->schema()));
+  ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*dataset.dataset->schema()));
+  int64_t num_filtered_fragments = 0;
   for (auto _ : state) {
-    ASSERT_OK_AND_ASSIGN(auto fragments, dataset->GetFragments(filter));
-    ABORT_NOT_OK(fragments.Visit([](std::shared_ptr<Fragment>) { return Status::OK(); }));
+    num_filtered_fragments = 0;
+    ASSERT_OK_AND_ASSIGN(auto fragments, dataset.dataset->GetFragments(filter));
+    ABORT_NOT_OK(fragments.Visit([&](std::shared_ptr<Fragment>) {
+      ++num_filtered_fragments;
+      return Status::OK();
+    }));
   }
+  state.SetItemsProcessed(state.iterations() * dataset.num_fragments);
+  state.counters["num_fragments"] = dataset.num_fragments;
+  state.counters["num_filtered_fragments"] = num_filtered_fragments;
 }
 
 using compute::field_ref;

From 2e8bd8d0b53560a561656337021abc3e4aa73f8c Mon Sep 17 00:00:00 2001
From: Gabriel Tomitsuka <g@gtomitsuka.com>
Date: Wed, 24 Jan 2024 15:31:51 +0100
Subject: [PATCH 262/570] GH-39761: [Docs] Link to Go documentation references
 outdated documentation from 2018 (#39750)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

The godoc format shows docs in a versioned fashion, making the ["basic" link](https://pkg.go.dev/github.com/apache/arrow/go) reference v0.0.0 from 2018, and godoc doesn't warn you when docs are outdated. Accordingly, for people to find "real" documentation, they have to manually search for the latest docs.

Otherwise, it gives the impression that the package is abandoned / makes devs waste considerable time — e.g., I work with Go professionally and wasted 15 minutes on the 2018 docs until I realized they were wrong.

### What changes are included in this PR?

* Reference the latest version (v16) in the list of supported implementations of the Arrow website & R package.
* Add step to release process that automatically updates the version on release

### Are these changes tested?

Yes, I've added tests for this to `dev/release/post-11-bump-versions-test.rb`.

### Are there any user-facing changes?

Yes, the link to Implementations > Go on the website will change.
* Closes: #39761

Authored-by: Gabriel Tomitsuka <g@gtomitsuka.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/post-11-bump-versions-test.rb | 18 ++++++++++++++++++
 dev/release/utils-prepare.sh              | 17 +++++++++++++++++
 docs/source/index.rst                     |  2 +-
 r/_pkgdown.yml                            |  2 +-
 4 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/dev/release/post-11-bump-versions-test.rb b/dev/release/post-11-bump-versions-test.rb
index 4b6933d6102a9..78d9320bfb312 100644
--- a/dev/release/post-11-bump-versions-test.rb
+++ b/dev/release/post-11-bump-versions-test.rb
@@ -197,6 +197,15 @@ def test_version_post_tag
     ]
     if release_type == :major
       expected_changes += [
+        {
+          path: "docs/source/index.rst",
+          hunks: [
+            [
+              "-   Go <https://pkg.go.dev/github.com/apache/arrow/go/v#{@snapshot_major_version}>",
+              "+   Go <https://pkg.go.dev/github.com/apache/arrow/go/v#{@next_major_version}>",
+            ],
+          ],
+        },
         {
           path: "r/pkgdown/assets/versions.json",
           hunks: [
@@ -212,6 +221,15 @@ def test_version_post_tag
             ],
           ],
         },
+        {
+          path: "r/_pkgdown.yml",
+          hunks: [
+            [
+              "-          [Go](https://pkg.go.dev/github.com/apache/arrow/go/v#{@snapshot_major_version}) <br>",
+              "+          [Go](https://pkg.go.dev/github.com/apache/arrow/go/v#{@next_major_version}) <br>",
+            ],
+          ],
+        },
       ]
     else
       expected_changes += [
diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh
index 8e4c8a84ae8fd..51367087228a4 100644
--- a/dev/release/utils-prepare.sh
+++ b/dev/release/utils-prepare.sh
@@ -127,6 +127,7 @@ update_versions() {
     DESCRIPTION
   rm -f DESCRIPTION.bak
   git add DESCRIPTION
+  
   # Replace dev version with release version
   sed -i.bak -E -e \
     "/^<!--/,/^# arrow /s/^# arrow .+/# arrow ${base_version}/" \
@@ -139,6 +140,13 @@ update_versions() {
   fi
   rm -f NEWS.md.bak
   git add NEWS.md
+
+  # godoc link must reference current version, will reference v0.0.0 (2018) otherwise
+  sed -i.bak -E -e \
+    "s|(github\\.com/apache/arrow/go)/v[0-9]+|\1/v${major_version}|g" \
+    _pkgdown.yml
+  rm -f _pkgdown.yml.bak
+  git add _pkgdown.yml
   popd
 
   pushd "${ARROW_DIR}/ruby"
@@ -164,6 +172,15 @@ update_versions() {
   git add .
   popd
 
+  pushd "${ARROW_DIR}/docs/source"
+  # godoc link must reference current version, will reference v0.0.0 (2018) otherwise
+  sed -i.bak -E -e \
+    "s|(github\\.com/apache/arrow/go)/v[0-9]+|\1/v${major_version}|g" \
+    index.rst
+  rm -f index.rst.bak
+  git add index.rst
+  popd
+
   pushd "${ARROW_DIR}"
   ${PYTHON:-python3} "dev/release/utils-update-docs-versions.py" \
                      . \
diff --git a/docs/source/index.rst b/docs/source/index.rst
index e6e82c13d7eff..2ca63c5ebdbc4 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -104,7 +104,7 @@ Implementations
    C/GLib <c_glib/index>
    C++ <cpp/index>
    C# <https://github.com/apache/arrow/blob/main/csharp/README.md>
-   Go <https://pkg.go.dev/github.com/apache/arrow/go>
+   Go <https://pkg.go.dev/github.com/apache/arrow/go/v16>
    Java <java/index>
    JavaScript <js/index>
    Julia <https://arrow.apache.org/julia/>
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index 84111e599c457..e9513b8c16b26 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -76,7 +76,7 @@ home:
           [C GLib](https://arrow.apache.org/docs/c_glib) <br>
           [C++](https://arrow.apache.org/docs/cpp) <br>
           [C#](https://github.com/apache/arrow/blob/main/csharp/README.md) <br>
-          [Go](https://pkg.go.dev/github.com/apache/arrow/go) <br>
+          [Go](https://pkg.go.dev/github.com/apache/arrow/go/v16) <br>
           [Java](https://arrow.apache.org/docs/java) <br>
           [JavaScript](https://arrow.apache.org/docs/js) <br>
           [Julia](https://github.com/apache/arrow-julia/blob/main/README.md) <br>

From c67d0260d4e96472b5cbdff66ca67ead2b9abe4c Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Thu, 25 Jan 2024 10:21:57 +0100
Subject: [PATCH 263/570] GH-39732: [Python][CI] Fix test failures with
 latest/nightly pandas (#39760)

This PR rearranges if-else blocks in the `table` function (`table.pxi`) so that pandas dataframe object comes before checking for `__arrow_c_stream__` and `__arrow_c_array__`.
* Closes: #39732

Authored-by: AlenkaF <frim.alenka@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/table.pxi | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index d98c93e1c049b..3c450d61a7659 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -5202,7 +5202,17 @@ def table(data, names=None, schema=None, metadata=None, nthreads=None):
             raise ValueError(
                 "The 'names' argument is not valid when passing a dictionary")
         return Table.from_pydict(data, schema=schema, metadata=metadata)
+    elif _pandas_api.is_data_frame(data):
+        if names is not None or metadata is not None:
+            raise ValueError(
+                "The 'names' and 'metadata' arguments are not valid when "
+                "passing a pandas DataFrame")
+        return Table.from_pandas(data, schema=schema, nthreads=nthreads)
     elif hasattr(data, "__arrow_c_stream__"):
+        if names is not None or metadata is not None:
+            raise ValueError(
+                "The 'names' and 'metadata' arguments are not valid when "
+                "using Arrow PyCapsule Interface")
         if schema is not None:
             requested = schema.__arrow_c_schema__()
         else:
@@ -5216,14 +5226,12 @@ def table(data, names=None, schema=None, metadata=None, nthreads=None):
             table = table.cast(schema)
         return table
     elif hasattr(data, "__arrow_c_array__"):
-        batch = record_batch(data, schema)
-        return Table.from_batches([batch])
-    elif _pandas_api.is_data_frame(data):
         if names is not None or metadata is not None:
             raise ValueError(
                 "The 'names' and 'metadata' arguments are not valid when "
-                "passing a pandas DataFrame")
-        return Table.from_pandas(data, schema=schema, nthreads=nthreads)
+                "using Arrow PyCapsule Interface")
+        batch = record_batch(data, schema)
+        return Table.from_batches([batch])
     else:
         raise TypeError(
             "Expected pandas DataFrame, python dictionary or list of arrays")

From c97e6c46d969718e850d3fdeb7d77f998cc2342d Mon Sep 17 00:00:00 2001
From: abandy <abandy@live.com>
Date: Thu, 25 Jan 2024 10:20:54 -0500
Subject: [PATCH 264/570] GH-39774: [Go] Add public access to PreparedStatement
 handle (#39775)

* Closes: #39774

Authored-by: Alva Bandy <abandy@live.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/flight/flightsql/client.go      |  3 +++
 go/arrow/flight/flightsql/client_test.go | 10 ++++++++++
 2 files changed, 13 insertions(+)

diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go
index 928118cf299c1..441f88f39f43a 100644
--- a/go/arrow/flight/flightsql/client.go
+++ b/go/arrow/flight/flightsql/client.go
@@ -1165,6 +1165,9 @@ func (p *PreparedStatement) DatasetSchema() *arrow.Schema { return p.datasetSche
 // the prepared statement.
 func (p *PreparedStatement) ParameterSchema() *arrow.Schema { return p.paramSchema }
 
+// The handle associated with this PreparedStatement
+func (p *PreparedStatement) Handle() []byte { return p.handle }
+
 // GetSchema re-requests the schema of the result set of the prepared
 // statement from the server. It should otherwise be identical to DatasetSchema.
 //
diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go
index a4fb83f984f1d..c8b9f7f1246c1 100644
--- a/go/arrow/flight/flightsql/client_test.go
+++ b/go/arrow/flight/flightsql/client_test.go
@@ -384,6 +384,8 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecute() {
 	s.NoError(err)
 	defer prepared.Close(context.TODO(), s.callOpts...)
 
+	s.Equal(string(prepared.Handle()), "query")
+
 	info, err := prepared.Execute(context.TODO(), s.callOpts...)
 	s.NoError(err)
 	s.Equal(&emptyFlightInfo, info)
@@ -445,11 +447,15 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecuteParamBinding() {
 	s.NoError(err)
 	defer prepared.Close(context.TODO(), s.callOpts...)
 
+	s.Equal(string(prepared.Handle()), "query")
+
 	paramSchema := prepared.ParameterSchema()
 	rec, _, err := array.RecordFromJSON(memory.DefaultAllocator, paramSchema, strings.NewReader(`[{"id": 1}]`))
 	s.NoError(err)
 	defer rec.Release()
 
+	s.Equal(string(prepared.Handle()), "query")
+
 	prepared.SetParameters(rec)
 	info, err := prepared.Execute(context.TODO(), s.callOpts...)
 	s.NoError(err)
@@ -517,6 +523,8 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecuteReaderBinding() {
 	s.NoError(err)
 	defer prepared.Close(context.TODO(), s.callOpts...)
 
+	s.Equal(string(prepared.Handle()), "query")
+
 	paramSchema := prepared.ParameterSchema()
 	rec, _, err := array.RecordFromJSON(memory.DefaultAllocator, paramSchema, strings.NewReader(`[{"id": 1}]`))
 	s.NoError(err)
@@ -575,6 +583,8 @@ func (s *FlightSqlClientSuite) TestPreparedStatementClose() {
 
 	err = prepared.Close(context.TODO(), s.callOpts...)
 	s.NoError(err)
+
+	s.Equal(string(prepared.Handle()), "query")
 }
 
 func (s *FlightSqlClientSuite) TestExecuteUpdate() {

From 9c0a7617d953a74abaec5c14318bc2f46d0d176a Mon Sep 17 00:00:00 2001
From: Xiansen Chen <khn64@163.com>
Date: Fri, 26 Jan 2024 00:11:59 +0800
Subject: [PATCH 265/570] GH-39765: [C++][Dataset] Fix failures in
 dataset-scanner-benchmark (#39794)

### Rationale for this change

`ScanOnlyBench` use 2 kind of options to scan, but ignore the function `MakeScanNode` will return 4 extra columns.

### What changes are included in this PR?

Change the expected result of bench `ScanOnlyBench`

### Are these changes tested?

They covered by existing tests.
* Closes: #39765

Authored-by: xiansen.chen <xiansen.chen@openpie.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/dataset/scanner_benchmark.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/dataset/scanner_benchmark.cc b/cpp/src/arrow/dataset/scanner_benchmark.cc
index be953b3555945..287d76418ff16 100644
--- a/cpp/src/arrow/dataset/scanner_benchmark.cc
+++ b/cpp/src/arrow/dataset/scanner_benchmark.cc
@@ -162,7 +162,11 @@ void ScanOnly(
                        acero::DeclarationToTable(std::move(scan)));
 
   ASSERT_GT(collected->num_rows(), 0);
-  ASSERT_EQ(collected->num_columns(), 2);
+  if (factory_name == "scan") {
+    ASSERT_EQ(collected->num_columns(), 6);
+  } else if (factory_name == "scan2") {
+    ASSERT_EQ(collected->num_columns(), 2);
+  }
 }
 
 static constexpr int kScanIdx = 0;

From 667e9170ef363e9b4a067be3be245d2dcd4b7a6f Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Fri, 26 Jan 2024 01:31:46 +0100
Subject: [PATCH 266/570] MINOR: [C++] Fix conversion warnings on MSVC (#39797)

### Rationale for this change

Fix a conversion warning that fails compiling arrow-dataset-file-benchmark on MSVC.

### Are these changes tested?

Yes, by CI.

### Are there any user-facing changes?

No.

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/dataset/file_benchmark.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_benchmark.cc b/cpp/src/arrow/dataset/file_benchmark.cc
index f687392d13601..8aa2ac5a6fa77 100644
--- a/cpp/src/arrow/dataset/file_benchmark.cc
+++ b/cpp/src/arrow/dataset/file_benchmark.cc
@@ -66,7 +66,7 @@ static void GetAllFragments(benchmark::State& state) {
     ABORT_NOT_OK(fragments.Visit([](std::shared_ptr<Fragment>) { return Status::OK(); }));
   }
   state.SetItemsProcessed(state.iterations() * dataset.num_fragments);
-  state.counters["num_fragments"] = dataset.num_fragments;
+  state.counters["num_fragments"] = static_cast<double>(dataset.num_fragments);
 }
 
 static void GetFilteredFragments(benchmark::State& state, compute::Expression filter) {
@@ -82,8 +82,8 @@ static void GetFilteredFragments(benchmark::State& state, compute::Expression fi
     }));
   }
   state.SetItemsProcessed(state.iterations() * dataset.num_fragments);
-  state.counters["num_fragments"] = dataset.num_fragments;
-  state.counters["num_filtered_fragments"] = num_filtered_fragments;
+  state.counters["num_fragments"] = static_cast<double>(dataset.num_fragments);
+  state.counters["num_filtered_fragments"] = static_cast<double>(num_filtered_fragments);
 }
 
 using compute::field_ref;

From 13b22346d36b9952df5c988c9425b9e5bc4f09c4 Mon Sep 17 00:00:00 2001
From: Rossi Sun <zanmato1984@gmail.com>
Date: Fri, 26 Jan 2024 22:43:08 +0800
Subject: [PATCH 267/570] GH-39778: [C++] Fix tail-byte access cross buffer
 boundary in key hash avx2 (#39800)

### Rationale for this change

Issue #39778 seems caused by a careless (but hard to spot) bug in key hash avx2.

### What changes are included in this PR?

Fix the careless bug.

### Are these changes tested?

UT included.

### Are there any user-facing changes?

No.

* Closes: #39778

Authored-by: Ruoxi Sun <zanmato1984@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/key_hash.cc      | 142 +++++++++++++------------
 cpp/src/arrow/compute/key_hash.h       |  22 ++--
 cpp/src/arrow/compute/key_hash_avx2.cc |   2 +-
 cpp/src/arrow/compute/key_hash_test.cc |  59 ++++++++++
 4 files changed, 145 insertions(+), 80 deletions(-)

diff --git a/cpp/src/arrow/compute/key_hash.cc b/cpp/src/arrow/compute/key_hash.cc
index f5867b405ec71..1902b9ce9a88e 100644
--- a/cpp/src/arrow/compute/key_hash.cc
+++ b/cpp/src/arrow/compute/key_hash.cc
@@ -105,23 +105,23 @@ inline void Hashing32::StripeMask(int i, uint32_t* mask1, uint32_t* mask2,
 }
 
 template <bool T_COMBINE_HASHES>
-void Hashing32::HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_t* keys,
-                                uint32_t* hashes) {
+void Hashing32::HashFixedLenImp(uint32_t num_rows, uint64_t key_length,
+                                const uint8_t* keys, uint32_t* hashes) {
   // Calculate the number of rows that skip the last 16 bytes
   //
   uint32_t num_rows_safe = num_rows;
-  while (num_rows_safe > 0 && (num_rows - num_rows_safe) * length < kStripeSize) {
+  while (num_rows_safe > 0 && (num_rows - num_rows_safe) * key_length < kStripeSize) {
     --num_rows_safe;
   }
 
   // Compute masks for the last 16 byte stripe
   //
-  uint64_t num_stripes = bit_util::CeilDiv(length, kStripeSize);
+  uint64_t num_stripes = bit_util::CeilDiv(key_length, kStripeSize);
   uint32_t mask1, mask2, mask3, mask4;
-  StripeMask(((length - 1) & (kStripeSize - 1)) + 1, &mask1, &mask2, &mask3, &mask4);
+  StripeMask(((key_length - 1) & (kStripeSize - 1)) + 1, &mask1, &mask2, &mask3, &mask4);
 
   for (uint32_t i = 0; i < num_rows_safe; ++i) {
-    const uint8_t* key = keys + static_cast<uint64_t>(i) * length;
+    const uint8_t* key = keys + static_cast<uint64_t>(i) * key_length;
     uint32_t acc1, acc2, acc3, acc4;
     ProcessFullStripes(num_stripes, key, &acc1, &acc2, &acc3, &acc4);
     ProcessLastStripe(mask1, mask2, mask3, mask4, key + (num_stripes - 1) * kStripeSize,
@@ -138,11 +138,11 @@ void Hashing32::HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_
 
   uint32_t last_stripe_copy[4];
   for (uint32_t i = num_rows_safe; i < num_rows; ++i) {
-    const uint8_t* key = keys + static_cast<uint64_t>(i) * length;
+    const uint8_t* key = keys + static_cast<uint64_t>(i) * key_length;
     uint32_t acc1, acc2, acc3, acc4;
     ProcessFullStripes(num_stripes, key, &acc1, &acc2, &acc3, &acc4);
     memcpy(last_stripe_copy, key + (num_stripes - 1) * kStripeSize,
-           length - (num_stripes - 1) * kStripeSize);
+           key_length - (num_stripes - 1) * kStripeSize);
     ProcessLastStripe(mask1, mask2, mask3, mask4,
                       reinterpret_cast<const uint8_t*>(last_stripe_copy), &acc1, &acc2,
                       &acc3, &acc4);
@@ -168,15 +168,16 @@ void Hashing32::HashVarLenImp(uint32_t num_rows, const T* offsets,
   }
 
   for (uint32_t i = 0; i < num_rows_safe; ++i) {
-    uint64_t length = offsets[i + 1] - offsets[i];
+    uint64_t key_length = offsets[i + 1] - offsets[i];
 
     // Compute masks for the last 16 byte stripe.
     // For an empty string set number of stripes to 1 but mask to all zeroes.
     //
-    int is_non_empty = length == 0 ? 0 : 1;
-    uint64_t num_stripes = bit_util::CeilDiv(length, kStripeSize) + (1 - is_non_empty);
+    int is_non_empty = key_length == 0 ? 0 : 1;
+    uint64_t num_stripes =
+        bit_util::CeilDiv(key_length, kStripeSize) + (1 - is_non_empty);
     uint32_t mask1, mask2, mask3, mask4;
-    StripeMask(((length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1,
+    StripeMask(((key_length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1,
                &mask2, &mask3, &mask4);
 
     const uint8_t* key = concatenated_keys + offsets[i];
@@ -198,23 +199,24 @@ void Hashing32::HashVarLenImp(uint32_t num_rows, const T* offsets,
 
   uint32_t last_stripe_copy[4];
   for (uint32_t i = num_rows_safe; i < num_rows; ++i) {
-    uint64_t length = offsets[i + 1] - offsets[i];
+    uint64_t key_length = offsets[i + 1] - offsets[i];
 
     // Compute masks for the last 16 byte stripe.
     // For an empty string set number of stripes to 1 but mask to all zeroes.
     //
-    int is_non_empty = length == 0 ? 0 : 1;
-    uint64_t num_stripes = bit_util::CeilDiv(length, kStripeSize) + (1 - is_non_empty);
+    int is_non_empty = key_length == 0 ? 0 : 1;
+    uint64_t num_stripes =
+        bit_util::CeilDiv(key_length, kStripeSize) + (1 - is_non_empty);
     uint32_t mask1, mask2, mask3, mask4;
-    StripeMask(((length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1,
+    StripeMask(((key_length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1,
                &mask2, &mask3, &mask4);
 
     const uint8_t* key = concatenated_keys + offsets[i];
     uint32_t acc1, acc2, acc3, acc4;
     ProcessFullStripes(num_stripes, key, &acc1, &acc2, &acc3, &acc4);
-    if (length > 0) {
+    if (key_length > 0) {
       memcpy(last_stripe_copy, key + (num_stripes - 1) * kStripeSize,
-             length - (num_stripes - 1) * kStripeSize);
+             key_length - (num_stripes - 1) * kStripeSize);
     }
     if (num_stripes > 0) {
       ProcessLastStripe(mask1, mask2, mask3, mask4,
@@ -309,9 +311,9 @@ void Hashing32::HashIntImp(uint32_t num_keys, const T* keys, uint32_t* hashes) {
   }
 }
 
-void Hashing32::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t length_key,
+void Hashing32::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t key_length,
                         const uint8_t* keys, uint32_t* hashes) {
-  switch (length_key) {
+  switch (key_length) {
     case sizeof(uint8_t):
       if (combine_hashes) {
         HashIntImp<true, uint8_t>(num_keys, keys, hashes);
@@ -352,27 +354,27 @@ void Hashing32::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t length_
   }
 }
 
-void Hashing32::HashFixed(int64_t hardware_flags, bool combine_hashes, uint32_t num_rows,
-                          uint64_t length, const uint8_t* keys, uint32_t* hashes,
-                          uint32_t* hashes_temp_for_combine) {
-  if (ARROW_POPCOUNT64(length) == 1 && length <= sizeof(uint64_t)) {
-    HashInt(combine_hashes, num_rows, length, keys, hashes);
+void Hashing32::HashFixed(int64_t hardware_flags, bool combine_hashes, uint32_t num_keys,
+                          uint64_t key_length, const uint8_t* keys, uint32_t* hashes,
+                          uint32_t* temp_hashes_for_combine) {
+  if (ARROW_POPCOUNT64(key_length) == 1 && key_length <= sizeof(uint64_t)) {
+    HashInt(combine_hashes, num_keys, key_length, keys, hashes);
     return;
   }
 
   uint32_t num_processed = 0;
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
   if (hardware_flags & arrow::internal::CpuInfo::AVX2) {
-    num_processed = HashFixedLen_avx2(combine_hashes, num_rows, length, keys, hashes,
-                                      hashes_temp_for_combine);
+    num_processed = HashFixedLen_avx2(combine_hashes, num_keys, key_length, keys, hashes,
+                                      temp_hashes_for_combine);
   }
 #endif
   if (combine_hashes) {
-    HashFixedLenImp<true>(num_rows - num_processed, length, keys + length * num_processed,
-                          hashes + num_processed);
+    HashFixedLenImp<true>(num_keys - num_processed, key_length,
+                          keys + key_length * num_processed, hashes + num_processed);
   } else {
-    HashFixedLenImp<false>(num_rows - num_processed, length,
-                           keys + length * num_processed, hashes + num_processed);
+    HashFixedLenImp<false>(num_keys - num_processed, key_length,
+                           keys + key_length * num_processed, hashes + num_processed);
   }
 }
 
@@ -423,13 +425,13 @@ void Hashing32::HashMultiColumn(const std::vector<KeyColumnArray>& cols,
       }
 
       if (cols[icol].metadata().is_fixed_length) {
-        uint32_t col_width = cols[icol].metadata().fixed_length;
-        if (col_width == 0) {
+        uint32_t key_length = cols[icol].metadata().fixed_length;
+        if (key_length == 0) {
           HashBit(icol > 0, cols[icol].bit_offset(1), batch_size_next,
                   cols[icol].data(1) + first_row / 8, hashes + first_row);
         } else {
-          HashFixed(ctx->hardware_flags, icol > 0, batch_size_next, col_width,
-                    cols[icol].data(1) + first_row * col_width, hashes + first_row,
+          HashFixed(ctx->hardware_flags, icol > 0, batch_size_next, key_length,
+                    cols[icol].data(1) + first_row * key_length, hashes + first_row,
                     hash_temp);
         }
       } else if (cols[icol].metadata().fixed_length == sizeof(uint32_t)) {
@@ -463,8 +465,9 @@ void Hashing32::HashMultiColumn(const std::vector<KeyColumnArray>& cols,
 Status Hashing32::HashBatch(const ExecBatch& key_batch, uint32_t* hashes,
                             std::vector<KeyColumnArray>& column_arrays,
                             int64_t hardware_flags, util::TempVectorStack* temp_stack,
-                            int64_t offset, int64_t length) {
-  RETURN_NOT_OK(ColumnArraysFromExecBatch(key_batch, offset, length, &column_arrays));
+                            int64_t start_rows, int64_t num_rows) {
+  RETURN_NOT_OK(
+      ColumnArraysFromExecBatch(key_batch, start_rows, num_rows, &column_arrays));
 
   LightContext ctx;
   ctx.hardware_flags = hardware_flags;
@@ -574,23 +577,23 @@ inline void Hashing64::StripeMask(int i, uint64_t* mask1, uint64_t* mask2,
 }
 
 template <bool T_COMBINE_HASHES>
-void Hashing64::HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_t* keys,
-                                uint64_t* hashes) {
+void Hashing64::HashFixedLenImp(uint32_t num_rows, uint64_t key_length,
+                                const uint8_t* keys, uint64_t* hashes) {
   // Calculate the number of rows that skip the last 32 bytes
   //
   uint32_t num_rows_safe = num_rows;
-  while (num_rows_safe > 0 && (num_rows - num_rows_safe) * length < kStripeSize) {
+  while (num_rows_safe > 0 && (num_rows - num_rows_safe) * key_length < kStripeSize) {
     --num_rows_safe;
   }
 
   // Compute masks for the last 32 byte stripe
   //
-  uint64_t num_stripes = bit_util::CeilDiv(length, kStripeSize);
+  uint64_t num_stripes = bit_util::CeilDiv(key_length, kStripeSize);
   uint64_t mask1, mask2, mask3, mask4;
-  StripeMask(((length - 1) & (kStripeSize - 1)) + 1, &mask1, &mask2, &mask3, &mask4);
+  StripeMask(((key_length - 1) & (kStripeSize - 1)) + 1, &mask1, &mask2, &mask3, &mask4);
 
   for (uint32_t i = 0; i < num_rows_safe; ++i) {
-    const uint8_t* key = keys + static_cast<uint64_t>(i) * length;
+    const uint8_t* key = keys + static_cast<uint64_t>(i) * key_length;
     uint64_t acc1, acc2, acc3, acc4;
     ProcessFullStripes(num_stripes, key, &acc1, &acc2, &acc3, &acc4);
     ProcessLastStripe(mask1, mask2, mask3, mask4, key + (num_stripes - 1) * kStripeSize,
@@ -607,11 +610,11 @@ void Hashing64::HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_
 
   uint64_t last_stripe_copy[4];
   for (uint32_t i = num_rows_safe; i < num_rows; ++i) {
-    const uint8_t* key = keys + static_cast<uint64_t>(i) * length;
+    const uint8_t* key = keys + static_cast<uint64_t>(i) * key_length;
     uint64_t acc1, acc2, acc3, acc4;
     ProcessFullStripes(num_stripes, key, &acc1, &acc2, &acc3, &acc4);
     memcpy(last_stripe_copy, key + (num_stripes - 1) * kStripeSize,
-           length - (num_stripes - 1) * kStripeSize);
+           key_length - (num_stripes - 1) * kStripeSize);
     ProcessLastStripe(mask1, mask2, mask3, mask4,
                       reinterpret_cast<const uint8_t*>(last_stripe_copy), &acc1, &acc2,
                       &acc3, &acc4);
@@ -637,15 +640,16 @@ void Hashing64::HashVarLenImp(uint32_t num_rows, const T* offsets,
   }
 
   for (uint32_t i = 0; i < num_rows_safe; ++i) {
-    uint64_t length = offsets[i + 1] - offsets[i];
+    uint64_t key_length = offsets[i + 1] - offsets[i];
 
     // Compute masks for the last 32 byte stripe.
     // For an empty string set number of stripes to 1 but mask to all zeroes.
     //
-    int is_non_empty = length == 0 ? 0 : 1;
-    uint64_t num_stripes = bit_util::CeilDiv(length, kStripeSize) + (1 - is_non_empty);
+    int is_non_empty = key_length == 0 ? 0 : 1;
+    uint64_t num_stripes =
+        bit_util::CeilDiv(key_length, kStripeSize) + (1 - is_non_empty);
     uint64_t mask1, mask2, mask3, mask4;
-    StripeMask(((length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1,
+    StripeMask(((key_length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1,
                &mask2, &mask3, &mask4);
 
     const uint8_t* key = concatenated_keys + offsets[i];
@@ -667,22 +671,23 @@ void Hashing64::HashVarLenImp(uint32_t num_rows, const T* offsets,
 
   uint64_t last_stripe_copy[4];
   for (uint32_t i = num_rows_safe; i < num_rows; ++i) {
-    uint64_t length = offsets[i + 1] - offsets[i];
+    uint64_t key_length = offsets[i + 1] - offsets[i];
 
     // Compute masks for the last 32 byte stripe
     //
-    int is_non_empty = length == 0 ? 0 : 1;
-    uint64_t num_stripes = bit_util::CeilDiv(length, kStripeSize) + (1 - is_non_empty);
+    int is_non_empty = key_length == 0 ? 0 : 1;
+    uint64_t num_stripes =
+        bit_util::CeilDiv(key_length, kStripeSize) + (1 - is_non_empty);
     uint64_t mask1, mask2, mask3, mask4;
-    StripeMask(((length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1,
+    StripeMask(((key_length - is_non_empty) & (kStripeSize - 1)) + is_non_empty, &mask1,
                &mask2, &mask3, &mask4);
 
     const uint8_t* key = concatenated_keys + offsets[i];
     uint64_t acc1, acc2, acc3, acc4;
     ProcessFullStripes(num_stripes, key, &acc1, &acc2, &acc3, &acc4);
-    if (length > 0) {
+    if (key_length > 0) {
       memcpy(last_stripe_copy, key + (num_stripes - 1) * kStripeSize,
-             length - (num_stripes - 1) * kStripeSize);
+             key_length - (num_stripes - 1) * kStripeSize);
     }
     if (num_stripes > 0) {
       ProcessLastStripe(mask1, mask2, mask3, mask4,
@@ -759,9 +764,9 @@ void Hashing64::HashIntImp(uint32_t num_keys, const T* keys, uint64_t* hashes) {
   }
 }
 
-void Hashing64::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t length_key,
+void Hashing64::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t key_length,
                         const uint8_t* keys, uint64_t* hashes) {
-  switch (length_key) {
+  switch (key_length) {
     case sizeof(uint8_t):
       if (combine_hashes) {
         HashIntImp<true, uint8_t>(num_keys, keys, hashes);
@@ -802,17 +807,17 @@ void Hashing64::HashInt(bool combine_hashes, uint32_t num_keys, uint64_t length_
   }
 }
 
-void Hashing64::HashFixed(bool combine_hashes, uint32_t num_rows, uint64_t length,
+void Hashing64::HashFixed(bool combine_hashes, uint32_t num_keys, uint64_t key_length,
                           const uint8_t* keys, uint64_t* hashes) {
-  if (ARROW_POPCOUNT64(length) == 1 && length <= sizeof(uint64_t)) {
-    HashInt(combine_hashes, num_rows, length, keys, hashes);
+  if (ARROW_POPCOUNT64(key_length) == 1 && key_length <= sizeof(uint64_t)) {
+    HashInt(combine_hashes, num_keys, key_length, keys, hashes);
     return;
   }
 
   if (combine_hashes) {
-    HashFixedLenImp<true>(num_rows, length, keys, hashes);
+    HashFixedLenImp<true>(num_keys, key_length, keys, hashes);
   } else {
-    HashFixedLenImp<false>(num_rows, length, keys, hashes);
+    HashFixedLenImp<false>(num_keys, key_length, keys, hashes);
   }
 }
 
@@ -860,13 +865,13 @@ void Hashing64::HashMultiColumn(const std::vector<KeyColumnArray>& cols,
       }
 
       if (cols[icol].metadata().is_fixed_length) {
-        uint64_t col_width = cols[icol].metadata().fixed_length;
-        if (col_width == 0) {
+        uint64_t key_length = cols[icol].metadata().fixed_length;
+        if (key_length == 0) {
           HashBit(icol > 0, cols[icol].bit_offset(1), batch_size_next,
                   cols[icol].data(1) + first_row / 8, hashes + first_row);
         } else {
-          HashFixed(icol > 0, batch_size_next, col_width,
-                    cols[icol].data(1) + first_row * col_width, hashes + first_row);
+          HashFixed(icol > 0, batch_size_next, key_length,
+                    cols[icol].data(1) + first_row * key_length, hashes + first_row);
         }
       } else if (cols[icol].metadata().fixed_length == sizeof(uint32_t)) {
         HashVarLen(icol > 0, batch_size_next, cols[icol].offsets() + first_row,
@@ -897,8 +902,9 @@ void Hashing64::HashMultiColumn(const std::vector<KeyColumnArray>& cols,
 Status Hashing64::HashBatch(const ExecBatch& key_batch, uint64_t* hashes,
                             std::vector<KeyColumnArray>& column_arrays,
                             int64_t hardware_flags, util::TempVectorStack* temp_stack,
-                            int64_t offset, int64_t length) {
-  RETURN_NOT_OK(ColumnArraysFromExecBatch(key_batch, offset, length, &column_arrays));
+                            int64_t start_row, int64_t num_rows) {
+  RETURN_NOT_OK(
+      ColumnArraysFromExecBatch(key_batch, start_row, num_rows, &column_arrays));
 
   LightContext ctx;
   ctx.hardware_flags = hardware_flags;
diff --git a/cpp/src/arrow/compute/key_hash.h b/cpp/src/arrow/compute/key_hash.h
index b193716c9bdfd..1173df5ed103e 100644
--- a/cpp/src/arrow/compute/key_hash.h
+++ b/cpp/src/arrow/compute/key_hash.h
@@ -51,10 +51,10 @@ class ARROW_EXPORT Hashing32 {
   static Status HashBatch(const ExecBatch& key_batch, uint32_t* hashes,
                           std::vector<KeyColumnArray>& column_arrays,
                           int64_t hardware_flags, util::TempVectorStack* temp_stack,
-                          int64_t offset, int64_t length);
+                          int64_t start_row, int64_t num_rows);
 
   static void HashFixed(int64_t hardware_flags, bool combine_hashes, uint32_t num_keys,
-                        uint64_t length_key, const uint8_t* keys, uint32_t* hashes,
+                        uint64_t key_length, const uint8_t* keys, uint32_t* hashes,
                         uint32_t* temp_hashes_for_combine);
 
  private:
@@ -100,7 +100,7 @@ class ARROW_EXPORT Hashing32 {
   static inline void StripeMask(int i, uint32_t* mask1, uint32_t* mask2, uint32_t* mask3,
                                 uint32_t* mask4);
   template <bool T_COMBINE_HASHES>
-  static void HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_t* keys,
+  static void HashFixedLenImp(uint32_t num_rows, uint64_t key_length, const uint8_t* keys,
                               uint32_t* hashes);
   template <typename T, bool T_COMBINE_HASHES>
   static void HashVarLenImp(uint32_t num_rows, const T* offsets,
@@ -112,7 +112,7 @@ class ARROW_EXPORT Hashing32 {
                       const uint8_t* keys, uint32_t* hashes);
   template <bool T_COMBINE_HASHES, typename T>
   static void HashIntImp(uint32_t num_keys, const T* keys, uint32_t* hashes);
-  static void HashInt(bool combine_hashes, uint32_t num_keys, uint64_t length_key,
+  static void HashInt(bool combine_hashes, uint32_t num_keys, uint64_t key_length,
                       const uint8_t* keys, uint32_t* hashes);
 
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
@@ -129,11 +129,11 @@ class ARROW_EXPORT Hashing32 {
                                             __m256i mask_last_stripe, const uint8_t* keys,
                                             int64_t offset_A, int64_t offset_B);
   template <bool T_COMBINE_HASHES>
-  static uint32_t HashFixedLenImp_avx2(uint32_t num_rows, uint64_t length,
+  static uint32_t HashFixedLenImp_avx2(uint32_t num_rows, uint64_t key_length,
                                        const uint8_t* keys, uint32_t* hashes,
                                        uint32_t* hashes_temp_for_combine);
   static uint32_t HashFixedLen_avx2(bool combine_hashes, uint32_t num_rows,
-                                    uint64_t length, const uint8_t* keys,
+                                    uint64_t key_length, const uint8_t* keys,
                                     uint32_t* hashes, uint32_t* hashes_temp_for_combine);
   template <typename T, bool T_COMBINE_HASHES>
   static uint32_t HashVarLenImp_avx2(uint32_t num_rows, const T* offsets,
@@ -164,9 +164,9 @@ class ARROW_EXPORT Hashing64 {
   static Status HashBatch(const ExecBatch& key_batch, uint64_t* hashes,
                           std::vector<KeyColumnArray>& column_arrays,
                           int64_t hardware_flags, util::TempVectorStack* temp_stack,
-                          int64_t offset, int64_t length);
+                          int64_t start_row, int64_t num_rows);
 
-  static void HashFixed(bool combine_hashes, uint32_t num_keys, uint64_t length_key,
+  static void HashFixed(bool combine_hashes, uint32_t num_keys, uint64_t key_length,
                         const uint8_t* keys, uint64_t* hashes);
 
  private:
@@ -203,7 +203,7 @@ class ARROW_EXPORT Hashing64 {
   static inline void StripeMask(int i, uint64_t* mask1, uint64_t* mask2, uint64_t* mask3,
                                 uint64_t* mask4);
   template <bool T_COMBINE_HASHES>
-  static void HashFixedLenImp(uint32_t num_rows, uint64_t length, const uint8_t* keys,
+  static void HashFixedLenImp(uint32_t num_rows, uint64_t key_length, const uint8_t* keys,
                               uint64_t* hashes);
   template <typename T, bool T_COMBINE_HASHES>
   static void HashVarLenImp(uint32_t num_rows, const T* offsets,
@@ -211,11 +211,11 @@ class ARROW_EXPORT Hashing64 {
   template <bool T_COMBINE_HASHES>
   static void HashBitImp(int64_t bit_offset, uint32_t num_keys, const uint8_t* keys,
                          uint64_t* hashes);
-  static void HashBit(bool T_COMBINE_HASHES, int64_t bit_offset, uint32_t num_keys,
+  static void HashBit(bool combine_hashes, int64_t bit_offset, uint32_t num_keys,
                       const uint8_t* keys, uint64_t* hashes);
   template <bool T_COMBINE_HASHES, typename T>
   static void HashIntImp(uint32_t num_keys, const T* keys, uint64_t* hashes);
-  static void HashInt(bool T_COMBINE_HASHES, uint32_t num_keys, uint64_t length_key,
+  static void HashInt(bool combine_hashes, uint32_t num_keys, uint64_t key_length,
                       const uint8_t* keys, uint64_t* hashes);
 };
 
diff --git a/cpp/src/arrow/compute/key_hash_avx2.cc b/cpp/src/arrow/compute/key_hash_avx2.cc
index 1b444b576784f..aec2800c647d7 100644
--- a/cpp/src/arrow/compute/key_hash_avx2.cc
+++ b/cpp/src/arrow/compute/key_hash_avx2.cc
@@ -190,7 +190,7 @@ uint32_t Hashing32::HashFixedLenImp_avx2(uint32_t num_rows, uint64_t length,
   // Do not process rows that could read past the end of the buffer using 16
   // byte loads. Round down number of rows to process to multiple of 2.
   //
-  uint64_t num_rows_to_skip = bit_util::CeilDiv(length, kStripeSize);
+  uint64_t num_rows_to_skip = bit_util::CeilDiv(kStripeSize, length);
   uint32_t num_rows_to_process =
       (num_rows_to_skip > num_rows)
           ? 0
diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc
index 3e6d41525cf44..c998df7169c4a 100644
--- a/cpp/src/arrow/compute/key_hash_test.cc
+++ b/cpp/src/arrow/compute/key_hash_test.cc
@@ -252,5 +252,64 @@ TEST(VectorHash, BasicString) { RunTestVectorHash<StringType>(); }
 
 TEST(VectorHash, BasicLargeString) { RunTestVectorHash<LargeStringType>(); }
 
+void HashFixedLengthFrom(int key_length, int num_rows, int start_row) {
+  int num_rows_to_hash = num_rows - start_row;
+  auto num_bytes_aligned = arrow::bit_util::RoundUpToMultipleOf64(key_length * num_rows);
+
+  const auto hardware_flags_for_testing = HardwareFlagsForTesting();
+  ASSERT_GT(hardware_flags_for_testing.size(), 0);
+
+  std::vector<std::vector<uint32_t>> hashes32(hardware_flags_for_testing.size());
+  std::vector<std::vector<uint64_t>> hashes64(hardware_flags_for_testing.size());
+  for (auto& h : hashes32) {
+    h.resize(num_rows_to_hash);
+  }
+  for (auto& h : hashes64) {
+    h.resize(num_rows_to_hash);
+  }
+
+  FixedSizeBinaryBuilder keys_builder(fixed_size_binary(key_length));
+  for (int j = 0; j < num_rows; ++j) {
+    ASSERT_OK(keys_builder.Append(std::string(key_length, 42)));
+  }
+  ASSERT_OK_AND_ASSIGN(auto keys, keys_builder.Finish());
+  // Make sure the buffer is aligned as expected.
+  ASSERT_EQ(keys->data()->buffers[1]->capacity(), num_bytes_aligned);
+
+  constexpr int mini_batch_size = 1024;
+  std::vector<uint32_t> temp_buffer;
+  temp_buffer.resize(mini_batch_size * 4);
+
+  for (int i = 0; i < static_cast<int>(hardware_flags_for_testing.size()); ++i) {
+    const auto hardware_flags = hardware_flags_for_testing[i];
+    Hashing32::HashFixed(hardware_flags,
+                         /*combine_hashes=*/false, num_rows_to_hash, key_length,
+                         keys->data()->GetValues<uint8_t>(1) + start_row * key_length,
+                         hashes32[i].data(), temp_buffer.data());
+    Hashing64::HashFixed(
+        /*combine_hashes=*/false, num_rows_to_hash, key_length,
+        keys->data()->GetValues<uint8_t>(1) + start_row * key_length, hashes64[i].data());
+  }
+
+  // Verify that all implementations (scalar, SIMD) give the same hashes.
+  for (int i = 1; i < static_cast<int>(hardware_flags_for_testing.size()); ++i) {
+    for (int j = 0; j < num_rows_to_hash; ++j) {
+      ASSERT_EQ(hashes32[i][j], hashes32[0][j])
+          << "scalar and simd approaches yielded different 32-bit hashes";
+      ASSERT_EQ(hashes64[i][j], hashes64[0][j])
+          << "scalar and simd approaches yielded different 64-bit hashes";
+    }
+  }
+}
+
+// Some carefully chosen cases that may cause troubles like GH-39778.
+TEST(VectorHash, FixedLengthTailByteSafety) {
+  // Tow cases of key_length < stripe (16-byte).
+  HashFixedLengthFrom(/*key_length=*/3, /*num_rows=*/1450, /*start_row=*/1447);
+  HashFixedLengthFrom(/*key_length=*/5, /*num_rows=*/883, /*start_row=*/858);
+  // Case of key_length > stripe (16-byte).
+  HashFixedLengthFrom(/*key_length=*/19, /*num_rows=*/64, /*start_row=*/63);
+}
+
 }  // namespace compute
 }  // namespace arrow

From 5d7f66190eec05f4fc0b9a1a17635ca6c68c828f Mon Sep 17 00:00:00 2001
From: emkornfield <emkornfield@gmail.com>
Date: Fri, 26 Jan 2024 23:02:12 -0800
Subject: [PATCH 268/570] GH-39527: [C++][Parquet] Validate page sizes before
 truncating to int32 (#39528)

Be defensive instead of writing invalid data.

### Rationale for this change

Users can provide this API pages that are large to validly write and we silently truncate lengths before writing.

### What changes are included in this PR?

Add validations and throw an exception if sizes are too large (this was previously checked only if page indexes are being build).

### Are these changes tested?

Unit tested

### Are there any user-facing changes?

This might start raising exceptions instead of writing out invalid parquet files.

Closes #39527

**This PR contains a "Critical Fix".**
* Closes: #39527

Lead-authored-by: emkornfield <emkornfield@gmail.com>
Co-authored-by: Micah Kornfield <micahk@google.com>
Co-authored-by: mwish <maplewish117@gmail.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Co-authored-by: Gang Wu <ustcwg@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/parquet/column_writer.cc      | 29 ++++++++++++++---
 cpp/src/parquet/column_writer_test.cc | 45 +++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 4 deletions(-)

diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 12b2837fbfd1e..23366b2daafd5 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -271,7 +271,12 @@ class SerializedPageWriter : public PageWriter {
   }
 
   int64_t WriteDictionaryPage(const DictionaryPage& page) override {
-    int64_t uncompressed_size = page.size();
+    int64_t uncompressed_size = page.buffer()->size();
+    if (uncompressed_size > std::numeric_limits<int32_t>::max()) {
+      throw ParquetException(
+          "Uncompressed dictionary page size overflows INT32_MAX. Size:",
+          uncompressed_size);
+    }
     std::shared_ptr<Buffer> compressed_data;
     if (has_compressor()) {
       auto buffer = std::static_pointer_cast<ResizableBuffer>(
@@ -288,6 +293,11 @@ class SerializedPageWriter : public PageWriter {
     dict_page_header.__set_is_sorted(page.is_sorted());
 
     const uint8_t* output_data_buffer = compressed_data->data();
+    if (compressed_data->size() > std::numeric_limits<int32_t>::max()) {
+      throw ParquetException(
+          "Compressed dictionary page size overflows INT32_MAX. Size: ",
+          uncompressed_size);
+    }
     int32_t output_data_len = static_cast<int32_t>(compressed_data->size());
 
     if (data_encryptor_.get()) {
@@ -371,18 +381,29 @@ class SerializedPageWriter : public PageWriter {
     const int64_t uncompressed_size = page.uncompressed_size();
     std::shared_ptr<Buffer> compressed_data = page.buffer();
     const uint8_t* output_data_buffer = compressed_data->data();
-    int32_t output_data_len = static_cast<int32_t>(compressed_data->size());
+    int64_t output_data_len = compressed_data->size();
+
+    if (output_data_len > std::numeric_limits<int32_t>::max()) {
+      throw ParquetException("Compressed data page size overflows INT32_MAX. Size:",
+                             output_data_len);
+    }
 
     if (data_encryptor_.get()) {
       PARQUET_THROW_NOT_OK(encryption_buffer_->Resize(
           data_encryptor_->CiphertextSizeDelta() + output_data_len, false));
       UpdateEncryption(encryption::kDataPage);
-      output_data_len = data_encryptor_->Encrypt(compressed_data->data(), output_data_len,
+      output_data_len = data_encryptor_->Encrypt(compressed_data->data(),
+                                                 static_cast<int32_t>(output_data_len),
                                                  encryption_buffer_->mutable_data());
       output_data_buffer = encryption_buffer_->data();
     }
 
     format::PageHeader page_header;
+
+    if (uncompressed_size > std::numeric_limits<int32_t>::max()) {
+      throw ParquetException("Uncompressed data page size overflows INT32_MAX. Size:",
+                             uncompressed_size);
+    }
     page_header.__set_uncompressed_page_size(static_cast<int32_t>(uncompressed_size));
     page_header.__set_compressed_page_size(static_cast<int32_t>(output_data_len));
 
@@ -421,7 +442,7 @@ class SerializedPageWriter : public PageWriter {
     if (offset_index_builder_ != nullptr) {
       const int64_t compressed_size = output_data_len + header_size;
       if (compressed_size > std::numeric_limits<int32_t>::max()) {
-        throw ParquetException("Compressed page size overflows to INT32_MAX.");
+        throw ParquetException("Compressed page size overflows INT32_MAX.");
       }
       if (!page.first_row_index().has_value()) {
         throw ParquetException("First row index is not set in data page.");
diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc
index 59fc848d7fd57..97421629d2ca6 100644
--- a/cpp/src/parquet/column_writer_test.cc
+++ b/cpp/src/parquet/column_writer_test.cc
@@ -15,9 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <memory>
 #include <utility>
 #include <vector>
 
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
 #include "arrow/io/buffered.h"
@@ -25,6 +27,7 @@
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_builders.h"
 
+#include "parquet/column_page.h"
 #include "parquet/column_reader.h"
 #include "parquet/column_writer.h"
 #include "parquet/file_reader.h"
@@ -479,6 +482,9 @@ using TestValuesWriterInt64Type = TestPrimitiveWriter<Int64Type>;
 using TestByteArrayValuesWriter = TestPrimitiveWriter<ByteArrayType>;
 using TestFixedLengthByteArrayValuesWriter = TestPrimitiveWriter<FLBAType>;
 
+using ::testing::HasSubstr;
+using ::testing::ThrowsMessage;
+
 TYPED_TEST(TestPrimitiveWriter, RequiredPlain) {
   this->TestRequiredWithEncoding(Encoding::PLAIN);
 }
@@ -889,6 +895,45 @@ TEST_F(TestByteArrayValuesWriter, CheckDefaultStats) {
   ASSERT_TRUE(this->metadata_is_stats_set());
 }
 
+TEST(TestPageWriter, ThrowsOnPagesTooLarge) {
+  NodePtr item = schema::Int32("item");  // optional item
+  NodePtr list(GroupNode::Make("b", Repetition::REPEATED, {item}, ConvertedType::LIST));
+  NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list}));  // optional list
+  std::vector<NodePtr> fields = {bag};
+  NodePtr root = GroupNode::Make("schema", Repetition::REPEATED, fields);
+
+  SchemaDescriptor schema;
+  schema.Init(root);
+
+  auto sink = CreateOutputStream();
+  auto props = WriterProperties::Builder().build();
+
+  auto metadata = ColumnChunkMetaDataBuilder::Make(props, schema.Column(0));
+  std::unique_ptr<PageWriter> pager =
+      PageWriter::Open(sink, Compression::UNCOMPRESSED, metadata.get());
+
+  uint8_t data;
+  std::shared_ptr<Buffer> buffer =
+      std::make_shared<Buffer>(&data, std::numeric_limits<int32_t>::max() + int64_t{1});
+  DataPageV1 over_compressed_limit(buffer, /*num_values=*/100, Encoding::BIT_PACKED,
+                                   Encoding::BIT_PACKED, Encoding::BIT_PACKED,
+                                   /*uncompressed_size=*/100);
+  EXPECT_THAT([&]() { pager->WriteDataPage(over_compressed_limit); },
+              ThrowsMessage<ParquetException>(HasSubstr("overflows INT32_MAX")));
+  DictionaryPage dictionary_over_compressed_limit(buffer, /*num_values=*/100,
+                                                  Encoding::PLAIN);
+  EXPECT_THAT([&]() { pager->WriteDictionaryPage(dictionary_over_compressed_limit); },
+              ThrowsMessage<ParquetException>(HasSubstr("overflows INT32_MAX")));
+
+  buffer = std::make_shared<Buffer>(&data, 1);
+  DataPageV1 over_uncompressed_limit(
+      buffer, /*num_values=*/100, Encoding::BIT_PACKED, Encoding::BIT_PACKED,
+      Encoding::BIT_PACKED,
+      /*uncompressed_size=*/std::numeric_limits<int32_t>::max() + int64_t{1});
+  EXPECT_THAT([&]() { pager->WriteDataPage(over_compressed_limit); },
+              ThrowsMessage<ParquetException>(HasSubstr("overflows INT32_MAX")));
+}
+
 TEST(TestColumnWriter, RepeatedListsUpdateSpacedBug) {
   // In ARROW-3930 we discovered a bug when writing from Arrow when we had data
   // that looks like this:

From 21ffd82c05c93b873ae3c27128eb8604ed0c735f Mon Sep 17 00:00:00 2001
From: abandy <abandy@live.com>
Date: Sat, 27 Jan 2024 17:25:54 -0500
Subject: [PATCH 269/570] GH-39720: [Swift] Switch reader to use arrow field
 instead of proto for building arrays (#39721)

This PR updates the ArrowReaderHelper to use an ArrowField object for building an Array instead of a protobuf field obj.  This removes leveraging protobuf from building out the Arrays and makes the code easier to reuse (like for the C Data Interface)
* Closes: #39720

Authored-by: Alva Bandy <abandy@live.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../Sources/Arrow/ArrowArrayBuilder.swift     |  12 +-
 swift/Arrow/Sources/Arrow/ArrowData.swift     |   4 +-
 .../Sources/Arrow/ArrowReaderHelper.swift     | 159 ++++++++----------
 swift/Arrow/Sources/Arrow/ArrowType.swift     |  44 +++++
 swift/Arrow/Sources/Arrow/ProtoUtil.swift     |  72 ++++++++
 swift/Arrow/Tests/ArrowTests/ArrayTests.swift |  34 ++++
 6 files changed, 221 insertions(+), 104 deletions(-)
 create mode 100644 swift/Arrow/Sources/Arrow/ProtoUtil.swift

diff --git a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
index 32728dc7eeaa4..b78f0ccd74997 100644
--- a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
@@ -36,12 +36,12 @@ public class ArrowArrayBuilder<T: ArrowBufferBuilder, U: ArrowArray<T.ItemType>>
 
     public func finish() throws -> ArrowArray<T.ItemType> {
         let buffers = self.bufferBuilder.finish()
-        let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount, stride: self.getStride())
+        let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount)
         return U(arrowData)
     }
 
     public func getStride() -> Int {
-        MemoryLayout<T.ItemType>.stride
+        return self.type.getStride()
     }
 }
 
@@ -73,20 +73,12 @@ public class Date32ArrayBuilder: ArrowArrayBuilder<Date32BufferBuilder, Date32Ar
     fileprivate convenience init() throws {
         try self.init(ArrowType(ArrowType.ArrowDate32))
     }
-
-    public override func getStride() -> Int {
-        MemoryLayout<Int32>.stride
-    }
 }
 
 public class Date64ArrayBuilder: ArrowArrayBuilder<Date64BufferBuilder, Date64Array> {
     fileprivate convenience init() throws {
         try self.init(ArrowType(ArrowType.ArrowDate64))
     }
-
-    public override func getStride() -> Int {
-        MemoryLayout<Int64>.stride
-    }
 }
 
 public class Time32ArrayBuilder: ArrowArrayBuilder<FixedBufferBuilder<Time32>, Time32Array> {
diff --git a/swift/Arrow/Sources/Arrow/ArrowData.swift b/swift/Arrow/Sources/Arrow/ArrowData.swift
index 60281a8d24133..93986b5955bd8 100644
--- a/swift/Arrow/Sources/Arrow/ArrowData.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowData.swift
@@ -24,7 +24,7 @@ public class ArrowData {
     public let length: UInt
     public let stride: Int
 
-    init(_ arrowType: ArrowType, buffers: [ArrowBuffer], nullCount: UInt, stride: Int) throws {
+    init(_ arrowType: ArrowType, buffers: [ArrowBuffer], nullCount: UInt) throws {
         let infoType = arrowType.info
         switch infoType {
         case let .primitiveInfo(typeId):
@@ -45,7 +45,7 @@ public class ArrowData {
         self.buffers = buffers
         self.nullCount = nullCount
         self.length = buffers[1].length
-        self.stride = stride
+        self.stride = arrowType.getStride()
     }
 
     public func isNull(_ at: UInt) -> Bool {
diff --git a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift
index 7b3ec04b3aa36..fb4a13b766f10 100644
--- a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift
@@ -21,8 +21,8 @@ import Foundation
 private func makeBinaryHolder(_ buffers: [ArrowBuffer],
                               nullCount: UInt) -> Result<ArrowArrayHolder, ArrowError> {
     do {
-        let arrowData = try ArrowData(ArrowType(ArrowType.ArrowBinary), buffers: buffers,
-                                      nullCount: nullCount, stride: MemoryLayout<Int8>.stride)
+        let arrowType = ArrowType(ArrowType.ArrowBinary)
+        let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
         return .success(ArrowArrayHolder(BinaryArray(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
@@ -34,8 +34,8 @@ private func makeBinaryHolder(_ buffers: [ArrowBuffer],
 private func makeStringHolder(_ buffers: [ArrowBuffer],
                               nullCount: UInt) -> Result<ArrowArrayHolder, ArrowError> {
     do {
-        let arrowData = try ArrowData(ArrowType(ArrowType.ArrowString), buffers: buffers,
-                                      nullCount: nullCount, stride: MemoryLayout<Int8>.stride)
+        let arrowType = ArrowType(ArrowType.ArrowString)
+        let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
         return .success(ArrowArrayHolder(StringArray(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
@@ -44,33 +44,17 @@ private func makeStringHolder(_ buffers: [ArrowBuffer],
     }
 }
 
-private func makeFloatHolder(_ floatType: org_apache_arrow_flatbuf_FloatingPoint,
-                             buffers: [ArrowBuffer],
-                             nullCount: UInt
-) -> Result<ArrowArrayHolder, ArrowError> {
-    switch floatType.precision {
-    case .single:
-        return makeFixedHolder(Float.self, buffers: buffers, arrowType: ArrowType.ArrowFloat, nullCount: nullCount)
-    case .double:
-        return makeFixedHolder(Double.self, buffers: buffers, arrowType: ArrowType.ArrowDouble, nullCount: nullCount)
-    default:
-        return .failure(.unknownType("Float precision \(floatType.precision) currently not supported"))
-    }
-}
-
-private func makeDateHolder(_ dateType: org_apache_arrow_flatbuf_Date,
+private func makeDateHolder(_ field: ArrowField,
                             buffers: [ArrowBuffer],
                             nullCount: UInt
 ) -> Result<ArrowArrayHolder, ArrowError> {
     do {
-        if dateType.unit == .day {
-            let arrowData = try ArrowData(ArrowType(ArrowType.ArrowString), buffers: buffers,
-                                          nullCount: nullCount, stride: MemoryLayout<Date>.stride)
+        if field.type.id == .date32 {
+            let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount)
             return .success(ArrowArrayHolder(Date32Array(arrowData)))
         }
 
-        let arrowData = try ArrowData(ArrowType(ArrowType.ArrowString), buffers: buffers,
-                                      nullCount: nullCount, stride: MemoryLayout<Date>.stride)
+        let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount)
         return .success(ArrowArrayHolder(Date64Array(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
@@ -79,22 +63,26 @@ private func makeDateHolder(_ dateType: org_apache_arrow_flatbuf_Date,
     }
 }
 
-private func makeTimeHolder(_ timeType: org_apache_arrow_flatbuf_Time,
+private func makeTimeHolder(_ field: ArrowField,
                             buffers: [ArrowBuffer],
                             nullCount: UInt
 ) -> Result<ArrowArrayHolder, ArrowError> {
     do {
-        if timeType.unit == .second || timeType.unit == .millisecond {
-            let arrowUnit: ArrowTime32Unit = timeType.unit == .second ? .seconds : .milliseconds
-            let arrowData = try ArrowData(ArrowTypeTime32(arrowUnit), buffers: buffers,
-                                          nullCount: nullCount, stride: MemoryLayout<Time32>.stride)
-            return .success(ArrowArrayHolder(FixedArray<Time32>(arrowData)))
+        if field.type.id == .time32 {
+            if let arrowType = field.type as? ArrowTypeTime32 {
+                let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
+                return .success(ArrowArrayHolder(FixedArray<Time32>(arrowData)))
+            } else {
+                return .failure(.invalid("Incorrect field type for time: \(field.type)"))
+            }
         }
 
-        let arrowUnit: ArrowTime64Unit = timeType.unit == .microsecond ? .microseconds : .nanoseconds
-        let arrowData = try ArrowData(ArrowTypeTime64(arrowUnit), buffers: buffers,
-                                      nullCount: nullCount, stride: MemoryLayout<Time64>.stride)
-        return .success(ArrowArrayHolder(FixedArray<Time64>(arrowData)))
+        if let arrowType = field.type as? ArrowTypeTime64 {
+            let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
+            return .success(ArrowArrayHolder(FixedArray<Time64>(arrowData)))
+        } else {
+            return .failure(.invalid("Incorrect field type for time: \(field.type)"))
+        }
     } catch let error as ArrowError {
         return .failure(error)
     } catch {
@@ -105,8 +93,8 @@ private func makeTimeHolder(_ timeType: org_apache_arrow_flatbuf_Time,
 private func makeBoolHolder(_ buffers: [ArrowBuffer],
                             nullCount: UInt) -> Result<ArrowArrayHolder, ArrowError> {
     do {
-        let arrowData = try ArrowData(ArrowType(ArrowType.ArrowBool), buffers: buffers,
-                                      nullCount: nullCount, stride: MemoryLayout<UInt8>.stride)
+        let arrowType = ArrowType(ArrowType.ArrowBool)
+        let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
         return .success(ArrowArrayHolder(BoolArray(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
@@ -116,13 +104,11 @@ private func makeBoolHolder(_ buffers: [ArrowBuffer],
 }
 
 private func makeFixedHolder<T>(
-    _: T.Type, buffers: [ArrowBuffer],
-    arrowType: ArrowType.Info,
+    _: T.Type, field: ArrowField, buffers: [ArrowBuffer],
     nullCount: UInt
 ) -> Result<ArrowArrayHolder, ArrowError> {
     do {
-        let arrowData = try ArrowData(ArrowType(arrowType), buffers: buffers,
-                                      nullCount: nullCount, stride: MemoryLayout<T>.stride)
+        let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount)
         return .success(ArrowArrayHolder(FixedArray<T>(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
@@ -131,67 +117,56 @@ private func makeFixedHolder<T>(
     }
 }
 
-func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity function_body_length
+func makeArrayHolder(
     _ field: org_apache_arrow_flatbuf_Field,
     buffers: [ArrowBuffer],
     nullCount: UInt
 ) -> Result<ArrowArrayHolder, ArrowError> {
-    let type = field.typeType
-    switch type {
-    case .int:
-        let intType = field.type(type: org_apache_arrow_flatbuf_Int.self)!
-        let bitWidth = intType.bitWidth
-        if bitWidth == 8 {
-            if intType.isSigned {
-                return makeFixedHolder(Int8.self, buffers: buffers,
-                                       arrowType: ArrowType.ArrowInt8, nullCount: nullCount)
-            } else {
-                return makeFixedHolder(UInt8.self, buffers: buffers,
-                                       arrowType: ArrowType.ArrowUInt8, nullCount: nullCount)
-            }
-        } else if bitWidth == 16 {
-            if intType.isSigned {
-                return makeFixedHolder(Int16.self, buffers: buffers,
-                                       arrowType: ArrowType.ArrowInt16, nullCount: nullCount)
-            } else {
-                return makeFixedHolder(UInt16.self, buffers: buffers,
-                                       arrowType: ArrowType.ArrowUInt16, nullCount: nullCount)
-            }
-        } else if bitWidth == 32 {
-            if intType.isSigned {
-                return makeFixedHolder(Int32.self, buffers: buffers,
-                                       arrowType: ArrowType.ArrowInt32, nullCount: nullCount)
-            } else {
-                return makeFixedHolder(UInt32.self, buffers: buffers,
-                                       arrowType: ArrowType.ArrowUInt32, nullCount: nullCount)
-            }
-        } else if bitWidth == 64 {
-            if intType.isSigned {
-                return makeFixedHolder(Int64.self, buffers: buffers,
-                                       arrowType: ArrowType.ArrowInt64, nullCount: nullCount)
-            } else {
-                return makeFixedHolder(UInt64.self, buffers: buffers,
-                                       arrowType: ArrowType.ArrowUInt64, nullCount: nullCount)
-            }
-        }
-        return .failure(.unknownType("Int width \(bitWidth) currently not supported"))
-    case .bool:
+    let arrowField = fromProto(field: field)
+    return makeArrayHolder(arrowField, buffers: buffers, nullCount: nullCount)
+}
+
+func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity
+    _ field: ArrowField,
+    buffers: [ArrowBuffer],
+    nullCount: UInt
+) -> Result<ArrowArrayHolder, ArrowError> {
+    let typeId = field.type.id
+    switch typeId {
+    case .int8:
+        return makeFixedHolder(Int8.self, field: field, buffers: buffers, nullCount: nullCount)
+    case .uint8:
+        return makeFixedHolder(UInt8.self, field: field, buffers: buffers, nullCount: nullCount)
+    case .int16:
+        return makeFixedHolder(Int16.self, field: field, buffers: buffers, nullCount: nullCount)
+    case .uint16:
+        return makeFixedHolder(UInt16.self, field: field, buffers: buffers, nullCount: nullCount)
+    case .int32:
+        return makeFixedHolder(Int32.self, field: field, buffers: buffers, nullCount: nullCount)
+    case .uint32:
+        return makeFixedHolder(UInt32.self, field: field, buffers: buffers, nullCount: nullCount)
+    case .int64:
+        return makeFixedHolder(Int64.self, field: field, buffers: buffers, nullCount: nullCount)
+    case .uint64:
+        return makeFixedHolder(UInt64.self, field: field, buffers: buffers, nullCount: nullCount)
+    case .boolean:
         return makeBoolHolder(buffers, nullCount: nullCount)
-    case .floatingpoint:
-        let floatType = field.type(type: org_apache_arrow_flatbuf_FloatingPoint.self)!
-        return makeFloatHolder(floatType, buffers: buffers, nullCount: nullCount)
-    case .utf8:
+    case .float:
+        return makeFixedHolder(Float.self, field: field, buffers: buffers, nullCount: nullCount)
+    case .double:
+        return makeFixedHolder(Double.self, field: field, buffers: buffers, nullCount: nullCount)
+    case .string:
         return makeStringHolder(buffers, nullCount: nullCount)
     case .binary:
         return makeBinaryHolder(buffers, nullCount: nullCount)
-    case .date:
-        let dateType = field.type(type: org_apache_arrow_flatbuf_Date.self)!
-        return makeDateHolder(dateType, buffers: buffers, nullCount: nullCount)
-    case .time:
-        let timeType = field.type(type: org_apache_arrow_flatbuf_Time.self)!
-        return makeTimeHolder(timeType, buffers: buffers, nullCount: nullCount)
+    case .date32:
+        return makeDateHolder(field, buffers: buffers, nullCount: nullCount)
+    case .time32:
+        return makeTimeHolder(field, buffers: buffers, nullCount: nullCount)
+    case .time64:
+        return makeTimeHolder(field, buffers: buffers, nullCount: nullCount)
     default:
-        return .failure(.unknownType("Type \(type) currently not supported"))
+        return .failure(.unknownType("Type \(typeId) currently not supported"))
     }
 }
 
diff --git a/swift/Arrow/Sources/Arrow/ArrowType.swift b/swift/Arrow/Sources/Arrow/ArrowType.swift
index e63647d0797ee..f5a869f7cdaff 100644
--- a/swift/Arrow/Sources/Arrow/ArrowType.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowType.swift
@@ -19,6 +19,8 @@ import Foundation
 
 public typealias Time32 = Int32
 public typealias Time64 = Int64
+public typealias Date32 = Int32
+public typealias Date64 = Int64
 
 func FlatBuffersVersion_23_1_4() { // swiftlint:disable:this identifier_name
 }
@@ -165,6 +167,48 @@ public class ArrowType {
             return ArrowType.ArrowUnknown
         }
     }
+
+    public func getStride( // swiftlint:disable:this cyclomatic_complexity
+    ) -> Int {
+        switch self.id {
+        case .int8:
+            return MemoryLayout<Int8>.stride
+        case .int16:
+            return MemoryLayout<Int16>.stride
+        case .int32:
+            return MemoryLayout<Int32>.stride
+        case .int64:
+            return MemoryLayout<Int64>.stride
+        case .uint8:
+            return MemoryLayout<UInt8>.stride
+        case .uint16:
+            return MemoryLayout<UInt16>.stride
+        case .uint32:
+            return MemoryLayout<UInt32>.stride
+        case .uint64:
+            return MemoryLayout<UInt64>.stride
+        case .float:
+            return MemoryLayout<Float>.stride
+        case .double:
+            return MemoryLayout<Double>.stride
+        case .boolean:
+            return MemoryLayout<Bool>.stride
+        case .date32:
+            return MemoryLayout<Date32>.stride
+        case .date64:
+            return MemoryLayout<Date64>.stride
+        case .time32:
+            return MemoryLayout<Time32>.stride
+        case .time64:
+            return MemoryLayout<Time64>.stride
+        case .binary:
+            return MemoryLayout<Int8>.stride
+        case .string:
+            return MemoryLayout<Int8>.stride
+        default:
+            fatalError("Stride requested for unknown type: \(self)")
+        }
+    }
 }
 
 extension ArrowType.Info: Equatable {
diff --git a/swift/Arrow/Sources/Arrow/ProtoUtil.swift b/swift/Arrow/Sources/Arrow/ProtoUtil.swift
new file mode 100644
index 0000000000000..f7fd725fe1140
--- /dev/null
+++ b/swift/Arrow/Sources/Arrow/ProtoUtil.swift
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import Foundation
+
+func fromProto( // swiftlint:disable:this cyclomatic_complexity
+    field: org_apache_arrow_flatbuf_Field
+) -> ArrowField {
+    let type = field.typeType
+    var arrowType = ArrowType(ArrowType.ArrowUnknown)
+    switch type {
+    case .int:
+        let intType = field.type(type: org_apache_arrow_flatbuf_Int.self)!
+        let bitWidth = intType.bitWidth
+        if bitWidth == 8 {
+            arrowType = ArrowType(intType.isSigned ? ArrowType.ArrowInt8 : ArrowType.ArrowUInt8)
+        } else if bitWidth == 16 {
+            arrowType = ArrowType(intType.isSigned ? ArrowType.ArrowInt16 : ArrowType.ArrowUInt16)
+        } else if bitWidth == 32 {
+            arrowType = ArrowType(intType.isSigned ? ArrowType.ArrowInt32 : ArrowType.ArrowUInt32)
+        } else if bitWidth == 64 {
+            arrowType = ArrowType(intType.isSigned ? ArrowType.ArrowInt64 : ArrowType.ArrowUInt64)
+        }
+    case .bool:
+        arrowType = ArrowType(ArrowType.ArrowBool)
+    case .floatingpoint:
+        let floatType = field.type(type: org_apache_arrow_flatbuf_FloatingPoint.self)!
+        if floatType.precision == .single {
+            arrowType = ArrowType(ArrowType.ArrowFloat)
+        } else if floatType.precision == .double {
+            arrowType = ArrowType(ArrowType.ArrowDouble)
+        }
+    case .utf8:
+        arrowType = ArrowType(ArrowType.ArrowString)
+    case .binary:
+        arrowType = ArrowType(ArrowType.ArrowBinary)
+    case .date:
+        let dateType = field.type(type: org_apache_arrow_flatbuf_Date.self)!
+        if dateType.unit == .day {
+            arrowType = ArrowType(ArrowType.ArrowDate32)
+        } else {
+            arrowType = ArrowType(ArrowType.ArrowDate64)
+        }
+    case .time:
+        let timeType = field.type(type: org_apache_arrow_flatbuf_Time.self)!
+        if timeType.unit == .second || timeType.unit == .millisecond {
+            let arrowUnit: ArrowTime32Unit = timeType.unit == .second ? .seconds : .milliseconds
+            arrowType = ArrowTypeTime32(arrowUnit)
+        } else {
+            let arrowUnit: ArrowTime64Unit = timeType.unit == .microsecond ? .microseconds : .nanoseconds
+            arrowType = ArrowTypeTime64(arrowUnit)
+        }
+    default:
+        arrowType = ArrowType(ArrowType.ArrowUnknown)
+    }
+
+    return ArrowField(field.name ?? "", type: arrowType, isNullable: field.nullable)
+}
diff --git a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift
index 069dbfc88f3ac..f5bfa0506e62f 100644
--- a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift
+++ b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift
@@ -211,4 +211,38 @@ final class ArrayTests: XCTestCase {
         XCTAssertEqual(microArray[1], 20000)
         XCTAssertEqual(microArray[2], 987654321)
     }
+
+    func checkHolderForType(_ checkType: ArrowType) throws {
+        let buffers = [ArrowBuffer(length: 0, capacity: 0,
+                                rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)),
+                       ArrowBuffer(length: 0, capacity: 0,
+                               rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))]
+        let field = ArrowField("", type: checkType, isNullable: true)
+        switch makeArrayHolder(field, buffers: buffers, nullCount: 0) {
+        case .success(let holder):
+            XCTAssertEqual(holder.type.id, checkType.id)
+        case .failure(let err):
+            throw err
+        }
+    }
+
+    func testArrayHolders() throws {
+        try checkHolderForType(ArrowType(ArrowType.ArrowInt8))
+        try checkHolderForType(ArrowType(ArrowType.ArrowUInt8))
+        try checkHolderForType(ArrowType(ArrowType.ArrowInt16))
+        try checkHolderForType(ArrowType(ArrowType.ArrowUInt16))
+        try checkHolderForType(ArrowType(ArrowType.ArrowInt32))
+        try checkHolderForType(ArrowType(ArrowType.ArrowUInt32))
+        try checkHolderForType(ArrowType(ArrowType.ArrowInt64))
+        try checkHolderForType(ArrowType(ArrowType.ArrowUInt64))
+        try checkHolderForType(ArrowTypeTime32(.seconds))
+        try checkHolderForType(ArrowTypeTime32(.milliseconds))
+        try checkHolderForType(ArrowTypeTime64(.microseconds))
+        try checkHolderForType(ArrowTypeTime64(.nanoseconds))
+        try checkHolderForType(ArrowType(ArrowType.ArrowBinary))
+        try checkHolderForType(ArrowType(ArrowType.ArrowFloat))
+        try checkHolderForType(ArrowType(ArrowType.ArrowDouble))
+        try checkHolderForType(ArrowType(ArrowType.ArrowBool))
+        try checkHolderForType(ArrowType(ArrowType.ArrowString))
+    }
 }

From 2fa095c8393a9ff54fcf84c52f47f75152243b1e Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Mon, 29 Jan 2024 11:09:12 -0300
Subject: [PATCH 270/570] GH-39815: [C++] Document and micro-optimize
 ChunkResolver::Resolve() (#39817)

### Rationale for this change

There has been interest in improving operations on chunked-arrays and even though `ChunkResolver::Resolve()` is not a big contributor in most kernels, the fact that it can be used from tight loops warrants careful attention to branch prediction and memory effects of its implementation.

### What changes are included in this PR?

 - Documentation of invariants and behavior of functions
 - Multiple optimizations justified by microbenchmarks
 - Addition of a variation of `Resolve` that takes a hint as parameter
 - Fix of an out-of-bounds memory access that doesn't affect correctness (it can only reduce effectiveness of cache in very rare situations, but is nevertheless an issue)

### Are these changes tested?

Yes, by existing tests.

### Are there any user-facing changes?

 - The `arrow::internal::ChunkResolver::Bisect()` function was `protected` and is now `private` with a different signature
* Closes: #39815

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/chunk_resolver.h               | 156 +++++++++++++------
 cpp/src/arrow/compute/kernels/vector_sort.cc |  25 ++-
 2 files changed, 129 insertions(+), 52 deletions(-)

diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h
index 818070ffe350a..d3ae315568d08 100644
--- a/cpp/src/arrow/chunk_resolver.h
+++ b/cpp/src/arrow/chunk_resolver.h
@@ -18,87 +18,151 @@
 #pragma once
 
 #include <atomic>
+#include <cassert>
 #include <cstdint>
 #include <vector>
 
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
 
-namespace arrow {
-namespace internal {
+namespace arrow::internal {
 
 struct ChunkLocation {
-  int64_t chunk_index, index_in_chunk;
+  /// \brief Index of the chunk in the array of chunks
+  ///
+  /// The value is always in the range `[0, chunks.size()]`. `chunks.size()` is used
+  /// to represent out-of-bounds locations.
+  int64_t chunk_index;
+
+  /// \brief Index of the value in the chunk
+  ///
+  /// The value is undefined if chunk_index >= chunks.size()
+  int64_t index_in_chunk;
 };
 
-// An object that resolves an array chunk depending on a logical index
+/// \brief An utility that incrementally resolves logical indices into
+/// physical indices in a chunked array.
 struct ARROW_EXPORT ChunkResolver {
-  explicit ChunkResolver(const ArrayVector& chunks);
+ private:
+  /// \brief Array containing `chunks.size() + 1` offsets.
+  ///
+  /// `offsets_[i]` is the starting logical index of chunk `i`. `offsets_[0]` is always 0
+  /// and `offsets_[chunks.size()]` is the logical length of the chunked array.
+  std::vector<int64_t> offsets_;
 
-  explicit ChunkResolver(const std::vector<const Array*>& chunks);
+  /// \brief Cache of the index of the last resolved chunk.
+  ///
+  /// \invariant `cached_chunk_ in [0, chunks.size()]`
+  mutable std::atomic<int64_t> cached_chunk_;
 
+ public:
+  explicit ChunkResolver(const ArrayVector& chunks);
+  explicit ChunkResolver(const std::vector<const Array*>& chunks);
   explicit ChunkResolver(const RecordBatchVector& batches);
 
   ChunkResolver(ChunkResolver&& other) noexcept
-      : offsets_(std::move(other.offsets_)), cached_chunk_(other.cached_chunk_.load()) {}
+      : offsets_(std::move(other.offsets_)),
+        cached_chunk_(other.cached_chunk_.load(std::memory_order_relaxed)) {}
 
   ChunkResolver& operator=(ChunkResolver&& other) {
     offsets_ = std::move(other.offsets_);
-    cached_chunk_.store(other.cached_chunk_.load());
+    cached_chunk_.store(other.cached_chunk_.load(std::memory_order_relaxed));
     return *this;
   }
 
-  /// \brief Return a ChunkLocation containing the chunk index and in-chunk value index of
-  /// the chunked array at logical index
-  inline ChunkLocation Resolve(const int64_t index) const {
-    // It is common for the algorithms below to make consecutive accesses at
-    // a relatively small distance from each other, hence often falling in
-    // the same chunk.
-    // This is trivial when merging (assuming each side of the merge uses
-    // its own resolver), but also in the inner recursive invocations of
+  /// \brief Resolve a logical index to a ChunkLocation.
+  ///
+  /// The returned ChunkLocation contains the chunk index and the within-chunk index
+  /// equivalent to the logical index.
+  ///
+  /// \pre index >= 0
+  /// \post location.chunk_index in [0, chunks.size()]
+  /// \param index The logical index to resolve
+  /// \return ChunkLocation with a valid chunk_index if index is within
+  ///         bounds, or with chunk_index == chunks.size() if logical index is
+  ///         `>= chunked_array.length()`.
+  inline ChunkLocation Resolve(int64_t index) const {
+    const auto cached_chunk = cached_chunk_.load(std::memory_order_relaxed);
+    const auto chunk_index =
+        ResolveChunkIndex</*StoreCachedChunk=*/true>(index, cached_chunk);
+    return {chunk_index, index - offsets_[chunk_index]};
+  }
+
+  /// \brief Resolve a logical index to a ChunkLocation.
+  ///
+  /// The returned ChunkLocation contains the chunk index and the within-chunk index
+  /// equivalent to the logical index.
+  ///
+  /// \pre index >= 0
+  /// \post location.chunk_index in [0, chunks.size()]
+  /// \param index The logical index to resolve
+  /// \param cached_chunk_index 0 or the chunk_index of the last ChunkLocation
+  /// returned by this ChunkResolver.
+  /// \return ChunkLocation with a valid chunk_index if index is within
+  ///         bounds, or with chunk_index == chunks.size() if logical index is
+  ///         `>= chunked_array.length()`.
+  inline ChunkLocation ResolveWithChunkIndexHint(int64_t index,
+                                                 int64_t cached_chunk_index) const {
+    assert(cached_chunk_index < static_cast<int64_t>(offsets_.size()));
+    const auto chunk_index =
+        ResolveChunkIndex</*StoreCachedChunk=*/false>(index, cached_chunk_index);
+    return {chunk_index, index - offsets_[chunk_index]};
+  }
+
+ private:
+  template <bool StoreCachedChunk>
+  inline int64_t ResolveChunkIndex(int64_t index, int64_t cached_chunk) const {
+    // It is common for algorithms sequentially processing arrays to make consecutive
+    // accesses at a relatively small distance from each other, hence often falling in the
+    // same chunk.
+    //
+    // This is guaranteed when merging (assuming each side of the merge uses its
+    // own resolver), and is the most common case in recursive invocations of
     // partitioning.
-    if (offsets_.size() <= 1) {
-      return {0, index};
+    const auto num_offsets = static_cast<int64_t>(offsets_.size());
+    const int64_t* offsets = offsets_.data();
+    if (ARROW_PREDICT_TRUE(index >= offsets[cached_chunk]) &&
+        (cached_chunk + 1 == num_offsets || index < offsets[cached_chunk + 1])) {
+      return cached_chunk;
     }
-    const auto cached_chunk = cached_chunk_.load();
-    const bool cache_hit =
-        (index >= offsets_[cached_chunk] && index < offsets_[cached_chunk + 1]);
-    if (ARROW_PREDICT_TRUE(cache_hit)) {
-      return {cached_chunk, index - offsets_[cached_chunk]};
+    // lo < hi is guaranteed by `num_offsets = chunks.size() + 1`
+    const auto chunk_index = Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets);
+    if constexpr (StoreCachedChunk) {
+      assert(chunk_index < static_cast<int64_t>(offsets_.size()));
+      cached_chunk_.store(chunk_index, std::memory_order_relaxed);
     }
-    auto chunk_index = Bisect(index);
-    cached_chunk_.store(chunk_index);
-    return {chunk_index, index - offsets_[chunk_index]};
+    return chunk_index;
   }
 
- protected:
-  // Find the chunk index corresponding to a value index using binary search
-  inline int64_t Bisect(const int64_t index) const {
-    // Like std::upper_bound(), but hand-written as it can help the compiler.
-    // Search [lo, lo + n)
-    int64_t lo = 0;
-    auto n = static_cast<int64_t>(offsets_.size());
-    while (n > 1) {
+  /// \brief Find the index of the chunk that contains the logical index.
+  ///
+  /// Any non-negative index is accepted. When `hi=num_offsets`, the largest
+  /// possible return value is `num_offsets-1` which is equal to
+  /// `chunks.size()`. The is returned when the logical index is out-of-bounds.
+  ///
+  /// \pre index >= 0
+  /// \pre lo < hi
+  /// \pre lo >= 0 && hi <= offsets_.size()
+  static inline int64_t Bisect(int64_t index, const int64_t* offsets, int64_t lo,
+                               int64_t hi) {
+    // Similar to std::upper_bound(), but slightly different as our offsets
+    // array always starts with 0.
+    auto n = hi - lo;
+    // First iteration does not need to check for n > 1
+    // (lo < hi is guaranteed by the precondition).
+    assert(n > 1 && "lo < hi is a precondition of Bisect");
+    do {
       const int64_t m = n >> 1;
       const int64_t mid = lo + m;
-      if (static_cast<int64_t>(index) >= offsets_[mid]) {
+      if (index >= offsets[mid]) {
         lo = mid;
         n -= m;
       } else {
         n = m;
       }
-    }
+    } while (n > 1);
     return lo;
   }
-
- private:
-  // Collection of starting offsets used for binary search
-  std::vector<int64_t> offsets_;
-
-  // Tracks the most recently used chunk index to allow fast
-  // access for consecutive indices corresponding to the same chunk
-  mutable std::atomic<int64_t> cached_chunk_;
 };
 
-}  // namespace internal
-}  // namespace arrow
+}  // namespace arrow::internal
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index e08a2bc10372f..d3914173b65aa 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -24,6 +24,7 @@
 namespace arrow {
 
 using internal::checked_cast;
+using internal::ChunkLocation;
 
 namespace compute {
 namespace internal {
@@ -748,11 +749,15 @@ class TableSorter {
     auto& comparator = comparator_;
     const auto& first_sort_key = sort_keys_[0];
 
+    ChunkLocation left_loc{0, 0};
+    ChunkLocation right_loc{0, 0};
     std::merge(nulls_begin, nulls_middle, nulls_middle, nulls_end, temp_indices,
                [&](uint64_t left, uint64_t right) {
                  // First column is either null or nan
-                 const auto left_loc = left_resolver_.Resolve(left);
-                 const auto right_loc = right_resolver_.Resolve(right);
+                 left_loc =
+                     left_resolver_.ResolveWithChunkIndexHint(left, left_loc.chunk_index);
+                 right_loc = right_resolver_.ResolveWithChunkIndexHint(
+                     right, right_loc.chunk_index);
                  auto chunk_left = first_sort_key.GetChunk<ArrayType>(left_loc);
                  auto chunk_right = first_sort_key.GetChunk<ArrayType>(right_loc);
                  const auto left_is_null = chunk_left.IsNull();
@@ -783,11 +788,15 @@ class TableSorter {
     // Untyped implementation
     auto& comparator = comparator_;
 
+    ChunkLocation left_loc{0, 0};
+    ChunkLocation right_loc{0, 0};
     std::merge(nulls_begin, nulls_middle, nulls_middle, nulls_end, temp_indices,
                [&](uint64_t left, uint64_t right) {
                  // First column is always null
-                 const auto left_loc = left_resolver_.Resolve(left);
-                 const auto right_loc = right_resolver_.Resolve(right);
+                 left_loc =
+                     left_resolver_.ResolveWithChunkIndexHint(left, left_loc.chunk_index);
+                 right_loc = right_resolver_.ResolveWithChunkIndexHint(
+                     right, right_loc.chunk_index);
                  return comparator.Compare(left_loc, right_loc, 1);
                });
     // Copy back temp area into main buffer
@@ -807,11 +816,15 @@ class TableSorter {
     auto& comparator = comparator_;
     const auto& first_sort_key = sort_keys_[0];
 
+    ChunkLocation left_loc{0, 0};
+    ChunkLocation right_loc{0, 0};
     std::merge(range_begin, range_middle, range_middle, range_end, temp_indices,
                [&](uint64_t left, uint64_t right) {
                  // Both values are never null nor NaN.
-                 const auto left_loc = left_resolver_.Resolve(left);
-                 const auto right_loc = right_resolver_.Resolve(right);
+                 left_loc =
+                     left_resolver_.ResolveWithChunkIndexHint(left, left_loc.chunk_index);
+                 right_loc = right_resolver_.ResolveWithChunkIndexHint(
+                     right, right_loc.chunk_index);
                  auto chunk_left = first_sort_key.GetChunk<ArrayType>(left_loc);
                  auto chunk_right = first_sort_key.GetChunk<ArrayType>(right_loc);
                  DCHECK(!chunk_left.IsNull());

From 800254fb16f23af57916768124fb90e0050a8335 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 29 Jan 2024 17:27:36 +0100
Subject: [PATCH 271/570] GH-39740: [C++] Fix filter and take kernel for
 month_day_nano intervals (#39795)

### Rationale for this change

The filter and take functions were not correctly supported on month_day_nano intervals.

### What changes are included in this PR?

* Expand the primitive filter implementation to handle all possible fixed-width primitive types (including fixed-size binary)
* Expand the take filter implementation to handle all well-known fixed-width primitive types (including month_day_nano, decimal128 and decimal256)
* Add benchmarks for taking and filtering fixed-size binary

These changes allow for very significant performance improvements filtering and taking fixed-size binary data:
```
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Non-regressions: (90)
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
                                              benchmark           baseline          contender  change %                                                                                                                                                                                                                                                                   counters
          FilterFixedSizeBinaryFilterNoNulls/524288/0/8      1.716 GiB/sec     33.814 GiB/sec  1870.862      {'family_index': 0, 'per_family_instance_index': 0, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2462, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 99.9}
   TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1/8 380.056M items/sec   7.098G items/sec  1767.491                                {'family_index': 3, 'per_family_instance_index': 6, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 505, 'byte_width': 8.0, 'null_percent': 100.0}
          FilterFixedSizeBinaryFilterNoNulls/524288/0/9      1.916 GiB/sec     33.721 GiB/sec  1659.766      {'family_index': 0, 'per_family_instance_index': 1, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2750, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 99.9}
          FilterFixedSizeBinaryFilterNoNulls/524288/9/8    917.713 MiB/sec      9.193 GiB/sec   925.719    {'family_index': 0, 'per_family_instance_index': 18, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/9/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1271, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 99.9}
         FilterFixedSizeBinaryFilterNoNulls/524288/12/8      1.004 GiB/sec      9.374 GiB/sec   833.673   {'family_index': 0, 'per_family_instance_index': 24, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/12/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1440, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 99.9}
          FilterFixedSizeBinaryFilterNoNulls/524288/3/8      1.625 GiB/sec     15.009 GiB/sec   823.442      {'family_index': 0, 'per_family_instance_index': 6, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/3/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2328, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 99.9}
          FilterFixedSizeBinaryFilterNoNulls/524288/9/9   1021.638 MiB/sec      9.126 GiB/sec   814.670    {'family_index': 0, 'per_family_instance_index': 19, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/9/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1428, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 99.9}
          FilterFixedSizeBinaryFilterNoNulls/524288/6/8      1.235 GiB/sec     10.814 GiB/sec   775.869     {'family_index': 0, 'per_family_instance_index': 12, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/6/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1762, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 99.9}
         FilterFixedSizeBinaryFilterNoNulls/524288/12/9      1.123 GiB/sec      9.120 GiB/sec   712.196   {'family_index': 0, 'per_family_instance_index': 25, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/12/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1598, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 99.9}
          FilterFixedSizeBinaryFilterNoNulls/524288/6/9      1.370 GiB/sec     10.499 GiB/sec   666.348     {'family_index': 0, 'per_family_instance_index': 13, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/6/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1958, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 99.9}
          FilterFixedSizeBinaryFilterNoNulls/524288/3/9      1.814 GiB/sec     13.394 GiB/sec   638.343      {'family_index': 0, 'per_family_instance_index': 7, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/3/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 2600, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 99.9}
          FilterFixedSizeBinaryFilterNoNulls/524288/2/8     12.155 GiB/sec     77.799 GiB/sec   540.051      {'family_index': 0, 'per_family_instance_index': 4, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 17222, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 1.0}
          FilterFixedSizeBinaryFilterNoNulls/524288/2/9     13.507 GiB/sec     84.361 GiB/sec   524.592      {'family_index': 0, 'per_family_instance_index': 5, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19469, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 1.0}
         TakeFixedSizeBinaryMonotonicIndices/524288/1/8 194.493M items/sec 732.378M items/sec   276.557                                      {'family_index': 4, 'per_family_instance_index': 6, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 259, 'byte_width': 8.0, 'null_percent': 100.0}
     TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1/8 200.981M items/sec 747.628M items/sec   271.989                                  {'family_index': 2, 'per_family_instance_index': 6, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 268, 'byte_width': 8.0, 'null_percent': 100.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/0/8    947.631 MiB/sec      3.318 GiB/sec   258.565    {'family_index': 1, 'per_family_instance_index': 0, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1329, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 99.9}
        FilterFixedSizeBinaryFilterWithNulls/524288/3/8    911.406 MiB/sec      3.121 GiB/sec   250.677    {'family_index': 1, 'per_family_instance_index': 6, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/3/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1275, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 99.9}
          FilterFixedSizeBinaryFilterNoNulls/524288/1/8      1.045 GiB/sec      3.535 GiB/sec   238.406      {'family_index': 0, 'per_family_instance_index': 2, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1496, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 50.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/6/8    899.161 MiB/sec      2.915 GiB/sec   232.029   {'family_index': 1, 'per_family_instance_index': 12, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/6/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1260, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 99.9}
        FilterFixedSizeBinaryFilterWithNulls/524288/9/8    829.852 MiB/sec      2.617 GiB/sec   222.914  {'family_index': 1, 'per_family_instance_index': 18, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/9/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1157, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 99.9}
     TakeFixedSizeBinaryRandomIndicesNoNulls/524288/0/8 234.268M items/sec 752.809M items/sec   221.345                                    {'family_index': 2, 'per_family_instance_index': 8, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 312, 'byte_width': 8.0, 'null_percent': 0.0}
          FilterFixedSizeBinaryFilterNoNulls/524288/1/9      1.171 GiB/sec      3.711 GiB/sec   216.957      {'family_index': 0, 'per_family_instance_index': 3, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1674, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 0.0, 'select%': 50.0}
         TakeFixedSizeBinaryMonotonicIndices/524288/0/8 249.393M items/sec 787.274M items/sec   215.676                                        {'family_index': 4, 'per_family_instance_index': 8, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 333, 'byte_width': 8.0, 'null_percent': 0.0}
   TakeFixedSizeBinaryRandomIndicesWithNulls/524288/0/8 234.268M items/sec 736.727M items/sec   214.481                                  {'family_index': 3, 'per_family_instance_index': 8, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/0/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 313, 'byte_width': 8.0, 'null_percent': 0.0}
      TakeFixedSizeBinaryMonotonicIndices/524288/1000/8 134.852M items/sec 423.748M items/sec   214.231                                     {'family_index': 4, 'per_family_instance_index': 0, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/1000/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 202, 'byte_width': 8.0, 'null_percent': 0.1}
       FilterFixedSizeBinaryFilterWithNulls/524288/12/8    913.734 MiB/sec      2.599 GiB/sec   191.245 {'family_index': 1, 'per_family_instance_index': 24, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/12/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1292, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 99.9}
  TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1000/8 138.218M items/sec 309.307M items/sec   123.783                                 {'family_index': 2, 'per_family_instance_index': 0, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1000/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 184, 'byte_width': 8.0, 'null_percent': 0.1}
TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1000/8 132.755M items/sec 293.027M items/sec   120.727                               {'family_index': 3, 'per_family_instance_index': 0, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1000/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 179, 'byte_width': 8.0, 'null_percent': 0.1}
    TakeFixedSizeBinaryRandomIndicesNoNulls/524288/10/8 125.492M items/sec 272.996M items/sec   117.540                                  {'family_index': 2, 'per_family_instance_index': 2, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 174, 'byte_width': 8.0, 'null_percent': 10.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/9/9    926.938 MiB/sec      1.904 GiB/sec   110.379  {'family_index': 1, 'per_family_instance_index': 19, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/9/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1295, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 99.9}
        TakeFixedSizeBinaryMonotonicIndices/524288/10/8 158.754M items/sec 331.106M items/sec   108.565                                      {'family_index': 4, 'per_family_instance_index': 2, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 167, 'byte_width': 8.0, 'null_percent': 10.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/0/9      1.031 GiB/sec      2.129 GiB/sec   106.621    {'family_index': 1, 'per_family_instance_index': 1, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1477, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 99.9}
        FilterFixedSizeBinaryFilterWithNulls/524288/3/9   1020.776 MiB/sec      2.056 GiB/sec   106.293    {'family_index': 1, 'per_family_instance_index': 7, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/3/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1430, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 99.9}
        FilterFixedSizeBinaryFilterWithNulls/524288/4/8    890.785 MiB/sec      1.768 GiB/sec   103.293    {'family_index': 1, 'per_family_instance_index': 8, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/4/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1242, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 50.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/6/9   1005.839 MiB/sec      1.984 GiB/sec   102.023   {'family_index': 1, 'per_family_instance_index': 13, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/6/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1407, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 99.9}
        FilterFixedSizeBinaryFilterWithNulls/524288/1/8    916.810 MiB/sec      1.762 GiB/sec    96.757    {'family_index': 1, 'per_family_instance_index': 2, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/1/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1270, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 50.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/7/8    890.211 MiB/sec      1.694 GiB/sec    94.853   {'family_index': 1, 'per_family_instance_index': 14, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/7/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1235, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 50.0}
     TakeFixedSizeBinaryRandomIndicesNoNulls/524288/2/8  95.788M items/sec 184.004M items/sec    92.095                                   {'family_index': 2, 'per_family_instance_index': 4, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 124, 'byte_width': 8.0, 'null_percent': 50.0}
       FilterFixedSizeBinaryFilterWithNulls/524288/10/8    862.497 MiB/sec      1.616 GiB/sec    91.823 {'family_index': 1, 'per_family_instance_index': 20, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1200, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 50.0}
       FilterFixedSizeBinaryFilterWithNulls/524288/12/9      1.005 GiB/sec      1.904 GiB/sec    89.431 {'family_index': 1, 'per_family_instance_index': 25, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/12/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1442, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 99.9}
         TakeFixedSizeBinaryMonotonicIndices/524288/2/8 123.065M items/sec 228.755M items/sec    85.881                                       {'family_index': 4, 'per_family_instance_index': 4, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 164, 'byte_width': 8.0, 'null_percent': 50.0}
         FilterFixedSizeBinaryFilterNoNulls/524288/10/8    930.637 MiB/sec      1.669 GiB/sec    83.659   {'family_index': 0, 'per_family_instance_index': 20, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1293, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 50.0}
          FilterFixedSizeBinaryFilterNoNulls/524288/4/8      1.034 GiB/sec      1.871 GiB/sec    81.019      {'family_index': 0, 'per_family_instance_index': 8, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/4/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1482, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 50.0}
          FilterFixedSizeBinaryFilterNoNulls/524288/7/8   1004.789 MiB/sec      1.772 GiB/sec    80.538     {'family_index': 0, 'per_family_instance_index': 14, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/7/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1404, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 50.0}
       FilterFixedSizeBinaryFilterWithNulls/524288/13/8    920.819 MiB/sec      1.616 GiB/sec    79.686 {'family_index': 1, 'per_family_instance_index': 26, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/13/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1285, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 50.0}
         FilterFixedSizeBinaryFilterNoNulls/524288/13/8    974.713 MiB/sec      1.669 GiB/sec    75.388   {'family_index': 0, 'per_family_instance_index': 26, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/13/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1363, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 50.0}
  TakeFixedSizeBinaryRandomIndicesWithNulls/524288/10/8 107.165M items/sec 187.372M items/sec    74.845                                {'family_index': 3, 'per_family_instance_index': 2, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/10/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 143, 'byte_width': 8.0, 'null_percent': 10.0}
   TakeFixedSizeBinaryRandomIndicesWithNulls/524288/2/8  72.662M items/sec 114.781M items/sec    57.965                                  {'family_index': 3, 'per_family_instance_index': 4, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 96, 'byte_width': 8.0, 'null_percent': 50.0}
       FilterFixedSizeBinaryFilterWithNulls/524288/10/9    976.180 MiB/sec      1.480 GiB/sec    55.260 {'family_index': 1, 'per_family_instance_index': 21, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1358, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 50.0}
         FilterFixedSizeBinaryFilterNoNulls/524288/10/9      1.023 GiB/sec      1.581 GiB/sec    54.502   {'family_index': 0, 'per_family_instance_index': 21, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1466, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 50.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/4/9    992.477 MiB/sec      1.453 GiB/sec    49.957    {'family_index': 1, 'per_family_instance_index': 9, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/4/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1400, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 50.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/7/9    997.679 MiB/sec      1.450 GiB/sec    48.846   {'family_index': 1, 'per_family_instance_index': 15, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/7/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1389, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 50.0}
         FilterFixedSizeBinaryFilterNoNulls/524288/13/9      1.071 GiB/sec      1.581 GiB/sec    47.526   {'family_index': 0, 'per_family_instance_index': 27, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/13/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1538, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 50.0}
       FilterFixedSizeBinaryFilterWithNulls/524288/13/9      1.008 GiB/sec      1.485 GiB/sec    47.328 {'family_index': 1, 'per_family_instance_index': 27, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/13/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1446, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 50.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/1/9      1.003 GiB/sec      1.452 GiB/sec    44.708    {'family_index': 1, 'per_family_instance_index': 3, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1437, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 50.0}
          FilterFixedSizeBinaryFilterNoNulls/524288/7/9      1.105 GiB/sec      1.568 GiB/sec    41.954     {'family_index': 0, 'per_family_instance_index': 15, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/7/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1587, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 50.0}
          FilterFixedSizeBinaryFilterNoNulls/524288/4/9      1.163 GiB/sec      1.613 GiB/sec    38.639      {'family_index': 0, 'per_family_instance_index': 9, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/4/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 1662, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 50.0}
       FilterFixedSizeBinaryFilterWithNulls/524288/14/9      8.884 GiB/sec     12.117 GiB/sec    36.381 {'family_index': 1, 'per_family_instance_index': 29, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/14/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12508, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 1.0}
       FilterFixedSizeBinaryFilterWithNulls/524288/11/9      8.886 GiB/sec     12.075 GiB/sec    35.892 {'family_index': 1, 'per_family_instance_index': 23, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/11/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12716, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 1.0}
      TakeFixedSizeBinaryMonotonicIndices/524288/1000/9 134.765M items/sec 182.868M items/sec    35.694                                     {'family_index': 4, 'per_family_instance_index': 1, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/1000/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 206, 'byte_width': 9.0, 'null_percent': 0.1}
          FilterFixedSizeBinaryFilterNoNulls/524288/5/8     11.393 GiB/sec     15.091 GiB/sec    32.453     {'family_index': 0, 'per_family_instance_index': 10, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/5/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 16510, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 1.0}
          FilterFixedSizeBinaryFilterNoNulls/524288/8/8     11.573 GiB/sec     15.102 GiB/sec    30.496     {'family_index': 0, 'per_family_instance_index': 16, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/8/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 16684, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 1.0}
       FilterFixedSizeBinaryFilterWithNulls/524288/11/8      7.740 GiB/sec     10.059 GiB/sec    29.956 {'family_index': 1, 'per_family_instance_index': 22, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/11/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10972, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 5.0, 'select%': 1.0}
       FilterFixedSizeBinaryFilterWithNulls/524288/14/8      7.733 GiB/sec      9.915 GiB/sec    28.213 {'family_index': 1, 'per_family_instance_index': 28, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/14/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10991, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 5.0, 'select%': 1.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/5/8      7.682 GiB/sec      9.765 GiB/sec    27.109   {'family_index': 1, 'per_family_instance_index': 10, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/5/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10991, 'byte_width': 8.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 1.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/8/9      8.856 GiB/sec     11.180 GiB/sec    26.241   {'family_index': 1, 'per_family_instance_index': 17, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/8/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12571, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 1.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/8/8      7.735 GiB/sec      9.710 GiB/sec    25.530   {'family_index': 1, 'per_family_instance_index': 16, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/8/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 11069, 'byte_width': 8.0, 'data null%': 1.0, 'mask null%': 5.0, 'select%': 1.0}
        TakeFixedSizeBinaryMonotonicIndices/524288/10/9 128.606M items/sec 160.249M items/sec    24.604                                      {'family_index': 4, 'per_family_instance_index': 3, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 209, 'byte_width': 9.0, 'null_percent': 10.0}
         FilterFixedSizeBinaryFilterNoNulls/524288/11/8     12.033 GiB/sec     14.737 GiB/sec    22.478   {'family_index': 0, 'per_family_instance_index': 22, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/11/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 17220, 'byte_width': 8.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 1.0}
         FilterFixedSizeBinaryFilterNoNulls/524288/14/8     12.141 GiB/sec     14.761 GiB/sec    21.579   {'family_index': 0, 'per_family_instance_index': 28, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/14/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 17343, 'byte_width': 8.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 1.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/5/9      8.825 GiB/sec     10.633 GiB/sec    20.489   {'family_index': 1, 'per_family_instance_index': 11, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/5/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 12543, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 5.0, 'select%': 1.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/2/8      8.300 GiB/sec      9.969 GiB/sec    20.117    {'family_index': 1, 'per_family_instance_index': 4, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/2/8', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 11819, 'byte_width': 8.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 1.0}
          FilterFixedSizeBinaryFilterNoNulls/524288/5/9     12.954 GiB/sec     15.192 GiB/sec    17.273     {'family_index': 0, 'per_family_instance_index': 11, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/5/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 18572, 'byte_width': 9.0, 'data null%': 0.1, 'mask null%': 0.0, 'select%': 1.0}
          FilterFixedSizeBinaryFilterNoNulls/524288/8/9     13.181 GiB/sec     15.222 GiB/sec    15.490     {'family_index': 0, 'per_family_instance_index': 17, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/8/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 18904, 'byte_width': 9.0, 'data null%': 1.0, 'mask null%': 0.0, 'select%': 1.0}
        FilterFixedSizeBinaryFilterWithNulls/524288/2/9      9.344 GiB/sec     10.632 GiB/sec    13.784    {'family_index': 1, 'per_family_instance_index': 5, 'run_name': 'FilterFixedSizeBinaryFilterWithNulls/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 13291, 'byte_width': 9.0, 'data null%': 0.0, 'mask null%': 5.0, 'select%': 1.0}
         FilterFixedSizeBinaryFilterNoNulls/524288/11/9     13.566 GiB/sec     14.894 GiB/sec     9.789   {'family_index': 0, 'per_family_instance_index': 23, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/11/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19349, 'byte_width': 9.0, 'data null%': 10.0, 'mask null%': 0.0, 'select%': 1.0}
         FilterFixedSizeBinaryFilterNoNulls/524288/14/9     13.603 GiB/sec     14.863 GiB/sec     9.265   {'family_index': 0, 'per_family_instance_index': 29, 'run_name': 'FilterFixedSizeBinaryFilterNoNulls/524288/14/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 19490, 'byte_width': 9.0, 'data null%': 90.0, 'mask null%': 0.0, 'select%': 1.0}
    TakeFixedSizeBinaryRandomIndicesNoNulls/524288/10/9 124.390M items/sec 133.566M items/sec     7.377                                  {'family_index': 2, 'per_family_instance_index': 3, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 164, 'byte_width': 9.0, 'null_percent': 10.0}
         TakeFixedSizeBinaryMonotonicIndices/524288/2/9 116.792M items/sec 124.182M items/sec     6.328                                       {'family_index': 4, 'per_family_instance_index': 5, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 161, 'byte_width': 9.0, 'null_percent': 50.0}
  TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1000/9 135.860M items/sec 142.524M items/sec     4.905                                 {'family_index': 2, 'per_family_instance_index': 1, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1000/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 180, 'byte_width': 9.0, 'null_percent': 0.1}
TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1000/9 131.123M items/sec 137.400M items/sec     4.788                               {'family_index': 3, 'per_family_instance_index': 1, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1000/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 176, 'byte_width': 9.0, 'null_percent': 0.1}
     TakeFixedSizeBinaryRandomIndicesNoNulls/524288/0/9 220.634M items/sec 230.872M items/sec     4.640                                    {'family_index': 2, 'per_family_instance_index': 9, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 295, 'byte_width': 9.0, 'null_percent': 0.0}
     TakeFixedSizeBinaryRandomIndicesNoNulls/524288/2/9  97.425M items/sec 101.477M items/sec     4.159                                   {'family_index': 2, 'per_family_instance_index': 5, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 130, 'byte_width': 9.0, 'null_percent': 50.0}
  TakeFixedSizeBinaryRandomIndicesWithNulls/524288/10/9 104.830M items/sec 108.346M items/sec     3.354                                {'family_index': 3, 'per_family_instance_index': 3, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/10/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 100, 'byte_width': 9.0, 'null_percent': 10.0}
   TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1/9 378.858M items/sec 387.322M items/sec     2.234                                {'family_index': 3, 'per_family_instance_index': 7, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 506, 'byte_width': 9.0, 'null_percent': 100.0}
   TakeFixedSizeBinaryRandomIndicesWithNulls/524288/0/9 221.900M items/sec 226.450M items/sec     2.050                                  {'family_index': 3, 'per_family_instance_index': 9, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 295, 'byte_width': 9.0, 'null_percent': 0.0}
         TakeFixedSizeBinaryMonotonicIndices/524288/0/9 248.664M items/sec 253.037M items/sec     1.758                                        {'family_index': 4, 'per_family_instance_index': 9, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/0/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 332, 'byte_width': 9.0, 'null_percent': 0.0}
     TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1/9 197.730M items/sec 201.173M items/sec     1.741                                  {'family_index': 2, 'per_family_instance_index': 7, 'run_name': 'TakeFixedSizeBinaryRandomIndicesNoNulls/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 264, 'byte_width': 9.0, 'null_percent': 100.0}
   TakeFixedSizeBinaryRandomIndicesWithNulls/524288/2/9  73.196M items/sec  74.167M items/sec     1.327                                  {'family_index': 3, 'per_family_instance_index': 5, 'run_name': 'TakeFixedSizeBinaryRandomIndicesWithNulls/524288/2/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 96, 'byte_width': 9.0, 'null_percent': 50.0}
         TakeFixedSizeBinaryMonotonicIndices/524288/1/9 192.545M items/sec 188.138M items/sec    -2.289                                      {'family_index': 4, 'per_family_instance_index': 7, 'run_name': 'TakeFixedSizeBinaryMonotonicIndices/524288/1/9', 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 257, 'byte_width': 9.0, 'null_percent': 100.0}

```

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.

* Closes: #39740

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../kernels/vector_selection_benchmark.cc     |  72 +++++++-
 .../vector_selection_filter_internal.cc       | 167 ++++++++++--------
 .../kernels/vector_selection_internal.cc      |  22 ++-
 .../kernels/vector_selection_internal.h       |   2 +-
 .../kernels/vector_selection_take_internal.cc |  76 +++++---
 .../compute/kernels/vector_selection_test.cc  | 150 +++++++++++-----
 6 files changed, 348 insertions(+), 141 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
index 25e30e65a3526..e65d5dbcab1c9 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
@@ -128,6 +128,13 @@ struct TakeBenchmark {
     Bench(values);
   }
 
+  void FixedSizeBinary() {
+    const int32_t byte_width = static_cast<int32_t>(state.range(2));
+    auto values = rand.FixedSizeBinary(args.size, byte_width, args.null_proportion);
+    Bench(values);
+    state.counters["byte_width"] = byte_width;
+  }
+
   void String() {
     int32_t string_min_length = 0, string_max_length = 32;
     auto values = std::static_pointer_cast<StringArray>(rand.String(
@@ -149,6 +156,7 @@ struct TakeBenchmark {
     for (auto _ : state) {
       ABORT_NOT_OK(Take(values, indices).status());
     }
+    state.SetItemsProcessed(state.iterations() * values->length());
   }
 };
 
@@ -166,8 +174,7 @@ struct FilterBenchmark {
 
   void Int64() {
     const int64_t array_size = args.size / sizeof(int64_t);
-    auto values = std::static_pointer_cast<NumericArray<Int64Type>>(
-        rand.Int64(array_size, -100, 100, args.values_null_proportion));
+    auto values = rand.Int64(array_size, -100, 100, args.values_null_proportion);
     Bench(values);
   }
 
@@ -181,6 +188,15 @@ struct FilterBenchmark {
     Bench(values);
   }
 
+  void FixedSizeBinary() {
+    const int32_t byte_width = static_cast<int32_t>(state.range(2));
+    const int64_t array_size = args.size / byte_width;
+    auto values =
+        rand.FixedSizeBinary(array_size, byte_width, args.values_null_proportion);
+    Bench(values);
+    state.counters["byte_width"] = byte_width;
+  }
+
   void String() {
     int32_t string_min_length = 0, string_max_length = 32;
     int32_t string_mean_length = (string_max_length + string_min_length) / 2;
@@ -202,6 +218,7 @@ struct FilterBenchmark {
     for (auto _ : state) {
       ABORT_NOT_OK(Filter(values, filter).status());
     }
+    state.SetItemsProcessed(state.iterations() * values->length());
   }
 
   void BenchRecordBatch() {
@@ -236,6 +253,7 @@ struct FilterBenchmark {
     for (auto _ : state) {
       ABORT_NOT_OK(Filter(batch, filter).status());
     }
+    state.SetItemsProcessed(state.iterations() * num_rows);
   }
 };
 
@@ -255,6 +273,14 @@ static void FilterFSLInt64FilterWithNulls(benchmark::State& state) {
   FilterBenchmark(state, true).FSLInt64();
 }
 
+static void FilterFixedSizeBinaryFilterNoNulls(benchmark::State& state) {
+  FilterBenchmark(state, false).FixedSizeBinary();
+}
+
+static void FilterFixedSizeBinaryFilterWithNulls(benchmark::State& state) {
+  FilterBenchmark(state, true).FixedSizeBinary();
+}
+
 static void FilterStringFilterNoNulls(benchmark::State& state) {
   FilterBenchmark(state, false).String();
 }
@@ -283,6 +309,19 @@ static void TakeInt64MonotonicIndices(benchmark::State& state) {
   TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true).Int64();
 }
 
+static void TakeFixedSizeBinaryRandomIndicesNoNulls(benchmark::State& state) {
+  TakeBenchmark(state, false).FixedSizeBinary();
+}
+
+static void TakeFixedSizeBinaryRandomIndicesWithNulls(benchmark::State& state) {
+  TakeBenchmark(state, true).FixedSizeBinary();
+}
+
+static void TakeFixedSizeBinaryMonotonicIndices(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true)
+      .FixedSizeBinary();
+}
+
 static void TakeFSLInt64RandomIndicesNoNulls(benchmark::State& state) {
   TakeBenchmark(state, false).FSLInt64();
 }
@@ -315,8 +354,22 @@ void FilterSetArgs(benchmark::internal::Benchmark* bench) {
   }
 }
 
+void FilterFSBSetArgs(benchmark::internal::Benchmark* bench) {
+  for (int64_t size : g_data_sizes) {
+    for (int i = 0; i < static_cast<int>(g_filter_params.size()); ++i) {
+      // FixedSizeBinary of primitive sizes (powers of two up to 32)
+      // have a faster path.
+      for (int32_t byte_width : {8, 9}) {
+        bench->Args({static_cast<ArgsType>(size), i, byte_width});
+      }
+    }
+  }
+}
+
 BENCHMARK(FilterInt64FilterNoNulls)->Apply(FilterSetArgs);
 BENCHMARK(FilterInt64FilterWithNulls)->Apply(FilterSetArgs);
+BENCHMARK(FilterFixedSizeBinaryFilterNoNulls)->Apply(FilterFSBSetArgs);
+BENCHMARK(FilterFixedSizeBinaryFilterWithNulls)->Apply(FilterFSBSetArgs);
 BENCHMARK(FilterFSLInt64FilterNoNulls)->Apply(FilterSetArgs);
 BENCHMARK(FilterFSLInt64FilterWithNulls)->Apply(FilterSetArgs);
 BENCHMARK(FilterStringFilterNoNulls)->Apply(FilterSetArgs);
@@ -340,9 +393,24 @@ void TakeSetArgs(benchmark::internal::Benchmark* bench) {
   }
 }
 
+void TakeFSBSetArgs(benchmark::internal::Benchmark* bench) {
+  for (int64_t size : g_data_sizes) {
+    for (auto nulls : std::vector<ArgsType>({1000, 10, 2, 1, 0})) {
+      // FixedSizeBinary of primitive sizes (powers of two up to 32)
+      // have a faster path.
+      for (int32_t byte_width : {8, 9}) {
+        bench->Args({static_cast<ArgsType>(size), nulls, byte_width});
+      }
+    }
+  }
+}
+
 BENCHMARK(TakeInt64RandomIndicesNoNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeInt64RandomIndicesWithNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeInt64MonotonicIndices)->Apply(TakeSetArgs);
+BENCHMARK(TakeFixedSizeBinaryRandomIndicesNoNulls)->Apply(TakeFSBSetArgs);
+BENCHMARK(TakeFixedSizeBinaryRandomIndicesWithNulls)->Apply(TakeFSBSetArgs);
+BENCHMARK(TakeFixedSizeBinaryMonotonicIndices)->Apply(TakeFSBSetArgs);
 BENCHMARK(TakeFSLInt64RandomIndicesNoNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeFSLInt64RandomIndicesWithNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeFSLInt64MonotonicIndices)->Apply(TakeSetArgs);
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc
index a25b04ae4fa65..8825d697fdf77 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc
@@ -146,36 +146,40 @@ class DropNullCounter {
 
 /// \brief The Filter implementation for primitive (fixed-width) types does not
 /// use the logical Arrow type but rather the physical C type. This way we only
-/// generate one take function for each byte width. We use the same
-/// implementation here for boolean and fixed-byte-size inputs with some
-/// template specialization.
-template <typename ArrowType>
+/// generate one take function for each byte width.
+///
+/// We use compile-time specialization for two variations:
+/// - operating on boolean data (using kIsBoolean = true)
+/// - operating on fixed-width data of arbitrary width (using kByteWidth = -1),
+///   with the actual width only known at runtime
+template <int32_t kByteWidth, bool kIsBoolean = false>
 class PrimitiveFilterImpl {
  public:
-  using T = typename std::conditional<std::is_same<ArrowType, BooleanType>::value,
-                                      uint8_t, typename ArrowType::c_type>::type;
-
   PrimitiveFilterImpl(const ArraySpan& values, const ArraySpan& filter,
                       FilterOptions::NullSelectionBehavior null_selection,
                       ArrayData* out_arr)
-      : values_is_valid_(values.buffers[0].data),
-        values_data_(reinterpret_cast<const T*>(values.buffers[1].data)),
+      : byte_width_(values.type->byte_width()),
+        values_is_valid_(values.buffers[0].data),
+        values_data_(values.buffers[1].data),
         values_null_count_(values.null_count),
         values_offset_(values.offset),
         values_length_(values.length),
         filter_(filter),
         null_selection_(null_selection) {
-    if (values.type->id() != Type::BOOL) {
+    if constexpr (kByteWidth >= 0 && !kIsBoolean) {
+      DCHECK_EQ(kByteWidth, byte_width_);
+    }
+    if constexpr (!kIsBoolean) {
       // No offset applied for boolean because it's a bitmap
-      values_data_ += values.offset;
+      values_data_ += values.offset * byte_width();
     }
 
     if (out_arr->buffers[0] != nullptr) {
       // May be unallocated if neither filter nor values contain nulls
       out_is_valid_ = out_arr->buffers[0]->mutable_data();
     }
-    out_data_ = reinterpret_cast<T*>(out_arr->buffers[1]->mutable_data());
-    out_offset_ = out_arr->offset;
+    out_data_ = out_arr->buffers[1]->mutable_data();
+    DCHECK_EQ(out_arr->offset, 0);
     out_length_ = out_arr->length;
     out_position_ = 0;
   }
@@ -201,14 +205,11 @@ class PrimitiveFilterImpl {
           [&](int64_t position, int64_t segment_length, bool filter_valid) {
             if (filter_valid) {
               CopyBitmap(values_is_valid_, values_offset_ + position, segment_length,
-                         out_is_valid_, out_offset_ + out_position_);
+                         out_is_valid_, out_position_);
               WriteValueSegment(position, segment_length);
             } else {
-              bit_util::SetBitsTo(out_is_valid_, out_offset_ + out_position_,
-                                  segment_length, false);
-              memset(out_data_ + out_offset_ + out_position_, 0,
-                     segment_length * sizeof(T));
-              out_position_ += segment_length;
+              bit_util::SetBitsTo(out_is_valid_, out_position_, segment_length, false);
+              WriteNullSegment(segment_length);
             }
             return true;
           });
@@ -218,7 +219,7 @@ class PrimitiveFilterImpl {
     if (out_is_valid_) {
       // Set all to valid, so only if nulls are produced by EMIT_NULL, we need
       // to set out_is_valid[i] to false.
-      bit_util::SetBitsTo(out_is_valid_, out_offset_, out_length_, true);
+      bit_util::SetBitsTo(out_is_valid_, 0, out_length_, true);
     }
     return VisitPlainxREEFilterOutputSegments(
         filter_, /*filter_may_have_nulls=*/true, null_selection_,
@@ -226,11 +227,8 @@ class PrimitiveFilterImpl {
           if (filter_valid) {
             WriteValueSegment(position, segment_length);
           } else {
-            bit_util::SetBitsTo(out_is_valid_, out_offset_ + out_position_,
-                                segment_length, false);
-            memset(out_data_ + out_offset_ + out_position_, 0,
-                   segment_length * sizeof(T));
-            out_position_ += segment_length;
+            bit_util::SetBitsTo(out_is_valid_, out_position_, segment_length, false);
+            WriteNullSegment(segment_length);
           }
           return true;
         });
@@ -260,13 +258,13 @@ class PrimitiveFilterImpl {
                                                  values_length_);
 
     auto WriteNotNull = [&](int64_t index) {
-      bit_util::SetBit(out_is_valid_, out_offset_ + out_position_);
+      bit_util::SetBit(out_is_valid_, out_position_);
       // Increments out_position_
       WriteValue(index);
     };
 
     auto WriteMaybeNull = [&](int64_t index) {
-      bit_util::SetBitTo(out_is_valid_, out_offset_ + out_position_,
+      bit_util::SetBitTo(out_is_valid_, out_position_,
                          bit_util::GetBit(values_is_valid_, values_offset_ + index));
       // Increments out_position_
       WriteValue(index);
@@ -279,15 +277,14 @@ class PrimitiveFilterImpl {
       BitBlockCount data_block = data_counter.NextWord();
       if (filter_block.AllSet() && data_block.AllSet()) {
         // Fastest path: all values in block are included and not null
-        bit_util::SetBitsTo(out_is_valid_, out_offset_ + out_position_,
-                            filter_block.length, true);
+        bit_util::SetBitsTo(out_is_valid_, out_position_, filter_block.length, true);
         WriteValueSegment(in_position, filter_block.length);
         in_position += filter_block.length;
       } else if (filter_block.AllSet()) {
         // Faster: all values are selected, but some values are null
         // Batch copy bits from values validity bitmap to output validity bitmap
         CopyBitmap(values_is_valid_, values_offset_ + in_position, filter_block.length,
-                   out_is_valid_, out_offset_ + out_position_);
+                   out_is_valid_, out_position_);
         WriteValueSegment(in_position, filter_block.length);
         in_position += filter_block.length;
       } else if (filter_block.NoneSet() && null_selection_ == FilterOptions::DROP) {
@@ -326,7 +323,7 @@ class PrimitiveFilterImpl {
                 WriteNotNull(in_position);
               } else if (!is_valid) {
                 // Filter slot is null, so we have a null in the output
-                bit_util::ClearBit(out_is_valid_, out_offset_ + out_position_);
+                bit_util::ClearBit(out_is_valid_, out_position_);
                 WriteNull();
               }
               ++in_position;
@@ -362,7 +359,7 @@ class PrimitiveFilterImpl {
                 WriteMaybeNull(in_position);
               } else if (!is_valid) {
                 // Filter slot is null, so we have a null in the output
-                bit_util::ClearBit(out_is_valid_, out_offset_ + out_position_);
+                bit_util::ClearBit(out_is_valid_, out_position_);
                 WriteNull();
               }
               ++in_position;
@@ -376,54 +373,72 @@ class PrimitiveFilterImpl {
   // Write the next out_position given the selected in_position for the input
   // data and advance out_position
   void WriteValue(int64_t in_position) {
-    out_data_[out_offset_ + out_position_++] = values_data_[in_position];
+    if constexpr (kIsBoolean) {
+      bit_util::SetBitTo(out_data_, out_position_,
+                         bit_util::GetBit(values_data_, values_offset_ + in_position));
+    } else {
+      memcpy(out_data_ + out_position_ * byte_width(),
+             values_data_ + in_position * byte_width(), byte_width());
+    }
+    ++out_position_;
   }
 
   void WriteValueSegment(int64_t in_start, int64_t length) {
-    std::memcpy(out_data_ + out_position_, values_data_ + in_start, length * sizeof(T));
+    if constexpr (kIsBoolean) {
+      CopyBitmap(values_data_, values_offset_ + in_start, length, out_data_,
+                 out_position_);
+    } else {
+      memcpy(out_data_ + out_position_ * byte_width(),
+             values_data_ + in_start * byte_width(), length * byte_width());
+    }
     out_position_ += length;
   }
 
   void WriteNull() {
-    // Zero the memory
-    out_data_[out_offset_ + out_position_++] = T{};
+    if constexpr (kIsBoolean) {
+      // Zero the bit
+      bit_util::ClearBit(out_data_, out_position_);
+    } else {
+      // Zero the memory
+      memset(out_data_ + out_position_ * byte_width(), 0, byte_width());
+    }
+    ++out_position_;
+  }
+
+  void WriteNullSegment(int64_t length) {
+    if constexpr (kIsBoolean) {
+      // Zero the bits
+      bit_util::SetBitsTo(out_data_, out_position_, length, false);
+    } else {
+      // Zero the memory
+      memset(out_data_ + out_position_ * byte_width(), 0, length * byte_width());
+    }
+    out_position_ += length;
+  }
+
+  constexpr int32_t byte_width() const {
+    if constexpr (kByteWidth >= 0) {
+      return kByteWidth;
+    } else {
+      return byte_width_;
+    }
   }
 
  private:
+  int32_t byte_width_;
   const uint8_t* values_is_valid_;
-  const T* values_data_;
+  const uint8_t* values_data_;
   int64_t values_null_count_;
   int64_t values_offset_;
   int64_t values_length_;
   const ArraySpan& filter_;
   FilterOptions::NullSelectionBehavior null_selection_;
   uint8_t* out_is_valid_ = NULLPTR;
-  T* out_data_;
-  int64_t out_offset_;
+  uint8_t* out_data_;
   int64_t out_length_;
   int64_t out_position_;
 };
 
-template <>
-inline void PrimitiveFilterImpl<BooleanType>::WriteValue(int64_t in_position) {
-  bit_util::SetBitTo(out_data_, out_offset_ + out_position_++,
-                     bit_util::GetBit(values_data_, values_offset_ + in_position));
-}
-
-template <>
-inline void PrimitiveFilterImpl<BooleanType>::WriteValueSegment(int64_t in_start,
-                                                                int64_t length) {
-  CopyBitmap(values_data_, values_offset_ + in_start, length, out_data_,
-             out_offset_ + out_position_);
-  out_position_ += length;
-}
-
-template <>
-inline void PrimitiveFilterImpl<BooleanType>::WriteNull() {
-  // Zero the bit
-  bit_util::ClearBit(out_data_, out_offset_ + out_position_++);
-}
-
 Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const ArraySpan& values = batch[0].array;
   const ArraySpan& filter = batch[1].array;
@@ -459,22 +474,32 @@ Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult
 
   switch (bit_width) {
     case 1:
-      PrimitiveFilterImpl<BooleanType>(values, filter, null_selection, out_arr).Exec();
+      PrimitiveFilterImpl<1, /*kIsBoolean=*/true>(values, filter, null_selection, out_arr)
+          .Exec();
       break;
     case 8:
-      PrimitiveFilterImpl<UInt8Type>(values, filter, null_selection, out_arr).Exec();
+      PrimitiveFilterImpl<1>(values, filter, null_selection, out_arr).Exec();
       break;
     case 16:
-      PrimitiveFilterImpl<UInt16Type>(values, filter, null_selection, out_arr).Exec();
+      PrimitiveFilterImpl<2>(values, filter, null_selection, out_arr).Exec();
       break;
     case 32:
-      PrimitiveFilterImpl<UInt32Type>(values, filter, null_selection, out_arr).Exec();
+      PrimitiveFilterImpl<4>(values, filter, null_selection, out_arr).Exec();
       break;
     case 64:
-      PrimitiveFilterImpl<UInt64Type>(values, filter, null_selection, out_arr).Exec();
+      PrimitiveFilterImpl<8>(values, filter, null_selection, out_arr).Exec();
+      break;
+    case 128:
+      // For INTERVAL_MONTH_DAY_NANO, DECIMAL128
+      PrimitiveFilterImpl<16>(values, filter, null_selection, out_arr).Exec();
+      break;
+    case 256:
+      // For DECIMAL256
+      PrimitiveFilterImpl<32>(values, filter, null_selection, out_arr).Exec();
       break;
     default:
-      DCHECK(false) << "Invalid values bit width";
+      // Non-specializing on byte width
+      PrimitiveFilterImpl<-1>(values, filter, null_selection, out_arr).Exec();
       break;
   }
   return Status::OK();
@@ -1050,10 +1075,10 @@ void PopulateFilterKernels(std::vector<SelectionKernelData>* out) {
       {InputType(match::Primitive()), plain_filter, PrimitiveFilterExec},
       {InputType(match::BinaryLike()), plain_filter, BinaryFilterExec},
       {InputType(match::LargeBinaryLike()), plain_filter, BinaryFilterExec},
-      {InputType(Type::FIXED_SIZE_BINARY), plain_filter, FSBFilterExec},
       {InputType(null()), plain_filter, NullFilterExec},
-      {InputType(Type::DECIMAL128), plain_filter, FSBFilterExec},
-      {InputType(Type::DECIMAL256), plain_filter, FSBFilterExec},
+      {InputType(Type::FIXED_SIZE_BINARY), plain_filter, PrimitiveFilterExec},
+      {InputType(Type::DECIMAL128), plain_filter, PrimitiveFilterExec},
+      {InputType(Type::DECIMAL256), plain_filter, PrimitiveFilterExec},
       {InputType(Type::DICTIONARY), plain_filter, DictionaryFilterExec},
       {InputType(Type::EXTENSION), plain_filter, ExtensionFilterExec},
       {InputType(Type::LIST), plain_filter, ListFilterExec},
@@ -1068,10 +1093,10 @@ void PopulateFilterKernels(std::vector<SelectionKernelData>* out) {
       {InputType(match::Primitive()), ree_filter, PrimitiveFilterExec},
       {InputType(match::BinaryLike()), ree_filter, BinaryFilterExec},
       {InputType(match::LargeBinaryLike()), ree_filter, BinaryFilterExec},
-      {InputType(Type::FIXED_SIZE_BINARY), ree_filter, FSBFilterExec},
       {InputType(null()), ree_filter, NullFilterExec},
-      {InputType(Type::DECIMAL128), ree_filter, FSBFilterExec},
-      {InputType(Type::DECIMAL256), ree_filter, FSBFilterExec},
+      {InputType(Type::FIXED_SIZE_BINARY), ree_filter, PrimitiveFilterExec},
+      {InputType(Type::DECIMAL128), ree_filter, PrimitiveFilterExec},
+      {InputType(Type::DECIMAL256), ree_filter, PrimitiveFilterExec},
       {InputType(Type::DICTIONARY), ree_filter, DictionaryFilterExec},
       {InputType(Type::EXTENSION), ree_filter, ExtensionFilterExec},
       {InputType(Type::LIST), ree_filter, ListFilterExec},
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc
index 98eb37e9c5fd2..a0fe2808e3e4e 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc
@@ -77,7 +77,8 @@ Status PreallocatePrimitiveArrayData(KernelContext* ctx, int64_t length, int bit
   if (bit_width == 1) {
     ARROW_ASSIGN_OR_RAISE(out->buffers[1], ctx->AllocateBitmap(length));
   } else {
-    ARROW_ASSIGN_OR_RAISE(out->buffers[1], ctx->Allocate(length * bit_width / 8));
+    ARROW_ASSIGN_OR_RAISE(out->buffers[1],
+                          ctx->Allocate(bit_util::BytesForBits(length * bit_width)));
   }
   return Status::OK();
 }
@@ -899,10 +900,6 @@ Status FilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
 
 }  // namespace
 
-Status FSBFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  return FilterExec<FSBSelectionImpl>(ctx, batch, out);
-}
-
 Status ListFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   return FilterExec<ListSelectionImpl<ListType>>(ctx, batch, out);
 }
@@ -946,7 +943,20 @@ Status LargeVarBinaryTakeExec(KernelContext* ctx, const ExecSpan& batch,
 }
 
 Status FSBTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  return TakeExec<FSBSelectionImpl>(ctx, batch, out);
+  const ArraySpan& values = batch[0].array;
+  const auto byte_width = values.type->byte_width();
+  // Use primitive Take implementation (presumably faster) for some byte widths
+  switch (byte_width) {
+    case 1:
+    case 2:
+    case 4:
+    case 8:
+    case 16:
+    case 32:
+      return PrimitiveTakeExec(ctx, batch, out);
+    default:
+      return TakeExec<FSBSelectionImpl>(ctx, batch, out);
+  }
 }
 
 Status ListTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.h b/cpp/src/arrow/compute/kernels/vector_selection_internal.h
index b9eba6ea6631f..95f3e51cd67e3 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_internal.h
+++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.h
@@ -70,7 +70,6 @@ void VisitPlainxREEFilterOutputSegments(
     FilterOptions::NullSelectionBehavior null_selection,
     const EmitREEFilterSegment& emit_segment);
 
-Status FSBFilterExec(KernelContext*, const ExecSpan&, ExecResult*);
 Status ListFilterExec(KernelContext*, const ExecSpan&, ExecResult*);
 Status LargeListFilterExec(KernelContext*, const ExecSpan&, ExecResult*);
 Status FSLFilterExec(KernelContext*, const ExecSpan&, ExecResult*);
@@ -79,6 +78,7 @@ Status MapFilterExec(KernelContext*, const ExecSpan&, ExecResult*);
 
 Status VarBinaryTakeExec(KernelContext*, const ExecSpan&, ExecResult*);
 Status LargeVarBinaryTakeExec(KernelContext*, const ExecSpan&, ExecResult*);
+Status PrimitiveTakeExec(KernelContext*, const ExecSpan&, ExecResult*);
 Status FSBTakeExec(KernelContext*, const ExecSpan&, ExecResult*);
 Status ListTakeExec(KernelContext*, const ExecSpan&, ExecResult*);
 Status LargeListTakeExec(KernelContext*, const ExecSpan&, ExecResult*);
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc
index 612de8505d3ab..89b3f7d0d3c58 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc
@@ -334,11 +334,15 @@ using TakeState = OptionsWrapper<TakeOptions>;
 /// only generate one take function for each byte width.
 ///
 /// This function assumes that the indices have been boundschecked.
-template <typename IndexCType, typename ValueCType>
+template <typename IndexCType, typename ValueWidthConstant>
 struct PrimitiveTakeImpl {
+  static constexpr int kValueWidth = ValueWidthConstant::value;
+
   static void Exec(const ArraySpan& values, const ArraySpan& indices,
                    ArrayData* out_arr) {
-    const auto* values_data = values.GetValues<ValueCType>(1);
+    DCHECK_EQ(values.type->byte_width(), kValueWidth);
+    const auto* values_data =
+        values.GetValues<uint8_t>(1, 0) + kValueWidth * values.offset;
     const uint8_t* values_is_valid = values.buffers[0].data;
     auto values_offset = values.offset;
 
@@ -346,9 +350,10 @@ struct PrimitiveTakeImpl {
     const uint8_t* indices_is_valid = indices.buffers[0].data;
     auto indices_offset = indices.offset;
 
-    auto out = out_arr->GetMutableValues<ValueCType>(1);
+    auto out = out_arr->GetMutableValues<uint8_t>(1, 0) + kValueWidth * out_arr->offset;
     auto out_is_valid = out_arr->buffers[0]->mutable_data();
     auto out_offset = out_arr->offset;
+    DCHECK_EQ(out_offset, 0);
 
     // If either the values or indices have nulls, we preemptively zero out the
     // out validity bitmap so that we don't have to use ClearBit in each
@@ -357,6 +362,19 @@ struct PrimitiveTakeImpl {
       bit_util::SetBitsTo(out_is_valid, out_offset, indices.length, false);
     }
 
+    auto WriteValue = [&](int64_t position) {
+      memcpy(out + position * kValueWidth,
+             values_data + indices_data[position] * kValueWidth, kValueWidth);
+    };
+
+    auto WriteZero = [&](int64_t position) {
+      memset(out + position * kValueWidth, 0, kValueWidth);
+    };
+
+    auto WriteZeroSegment = [&](int64_t position, int64_t length) {
+      memset(out + position * kValueWidth, 0, kValueWidth * length);
+    };
+
     OptionalBitBlockCounter indices_bit_counter(indices_is_valid, indices_offset,
                                                 indices.length);
     int64_t position = 0;
@@ -370,7 +388,7 @@ struct PrimitiveTakeImpl {
           // Fastest path: neither values nor index nulls
           bit_util::SetBitsTo(out_is_valid, out_offset + position, block.length, true);
           for (int64_t i = 0; i < block.length; ++i) {
-            out[position] = values_data[indices_data[position]];
+            WriteValue(position);
             ++position;
           }
         } else if (block.popcount > 0) {
@@ -379,14 +397,14 @@ struct PrimitiveTakeImpl {
             if (bit_util::GetBit(indices_is_valid, indices_offset + position)) {
               // index is not null
               bit_util::SetBit(out_is_valid, out_offset + position);
-              out[position] = values_data[indices_data[position]];
+              WriteValue(position);
             } else {
-              out[position] = ValueCType{};
+              WriteZero(position);
             }
             ++position;
           }
         } else {
-          memset(out + position, 0, sizeof(ValueCType) * block.length);
+          WriteZeroSegment(position, block.length);
           position += block.length;
         }
       } else {
@@ -397,11 +415,11 @@ struct PrimitiveTakeImpl {
             if (bit_util::GetBit(values_is_valid,
                                  values_offset + indices_data[position])) {
               // value is not null
-              out[position] = values_data[indices_data[position]];
+              WriteValue(position);
               bit_util::SetBit(out_is_valid, out_offset + position);
               ++valid_count;
             } else {
-              out[position] = ValueCType{};
+              WriteZero(position);
             }
             ++position;
           }
@@ -414,16 +432,16 @@ struct PrimitiveTakeImpl {
                 bit_util::GetBit(values_is_valid,
                                  values_offset + indices_data[position])) {
               // index is not null && value is not null
-              out[position] = values_data[indices_data[position]];
+              WriteValue(position);
               bit_util::SetBit(out_is_valid, out_offset + position);
               ++valid_count;
             } else {
-              out[position] = ValueCType{};
+              WriteZero(position);
             }
             ++position;
           }
         } else {
-          memset(out + position, 0, sizeof(ValueCType) * block.length);
+          WriteZeroSegment(position, block.length);
           position += block.length;
         }
       }
@@ -554,6 +572,8 @@ void TakeIndexDispatch(const ArraySpan& values, const ArraySpan& indices,
   }
 }
 
+}  // namespace
+
 Status PrimitiveTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const ArraySpan& values = batch[0].array;
   const ArraySpan& indices = batch[1].array;
@@ -577,24 +597,40 @@ Status PrimitiveTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult*
       TakeIndexDispatch<BooleanTakeImpl>(values, indices, out_arr);
       break;
     case 8:
-      TakeIndexDispatch<PrimitiveTakeImpl, int8_t>(values, indices, out_arr);
+      TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 1>>(
+          values, indices, out_arr);
       break;
     case 16:
-      TakeIndexDispatch<PrimitiveTakeImpl, int16_t>(values, indices, out_arr);
+      TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 2>>(
+          values, indices, out_arr);
       break;
     case 32:
-      TakeIndexDispatch<PrimitiveTakeImpl, int32_t>(values, indices, out_arr);
+      TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 4>>(
+          values, indices, out_arr);
       break;
     case 64:
-      TakeIndexDispatch<PrimitiveTakeImpl, int64_t>(values, indices, out_arr);
+      TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 8>>(
+          values, indices, out_arr);
       break;
-    default:
-      DCHECK(false) << "Invalid values byte width";
+    case 128:
+      // For INTERVAL_MONTH_DAY_NANO, DECIMAL128
+      TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 16>>(
+          values, indices, out_arr);
+      break;
+    case 256:
+      // For DECIMAL256
+      TakeIndexDispatch<PrimitiveTakeImpl, std::integral_constant<int, 32>>(
+          values, indices, out_arr);
       break;
+    default:
+      return Status::NotImplemented("Unsupported primitive type for take: ",
+                                    *values.type);
   }
   return Status::OK();
 }
 
+namespace {
+
 // ----------------------------------------------------------------------
 // Null take
 
@@ -836,8 +872,8 @@ void PopulateTakeKernels(std::vector<SelectionKernelData>* out) {
       {InputType(match::LargeBinaryLike()), take_indices, LargeVarBinaryTakeExec},
       {InputType(Type::FIXED_SIZE_BINARY), take_indices, FSBTakeExec},
       {InputType(null()), take_indices, NullTakeExec},
-      {InputType(Type::DECIMAL128), take_indices, FSBTakeExec},
-      {InputType(Type::DECIMAL256), take_indices, FSBTakeExec},
+      {InputType(Type::DECIMAL128), take_indices, PrimitiveTakeExec},
+      {InputType(Type::DECIMAL256), take_indices, PrimitiveTakeExec},
       {InputType(Type::DICTIONARY), take_indices, DictionaryTake},
       {InputType(Type::EXTENSION), take_indices, ExtensionTake},
       {InputType(Type::LIST), take_indices, ListTakeExec},
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
index bdf9f5454fdef..ec94b328ea361 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
@@ -309,6 +309,33 @@ class TestFilterKernel : public ::testing::Test {
     AssertFilter(values_array, ree_filter, expected_array);
   }
 
+  void TestNumericBasics(const std::shared_ptr<DataType>& type) {
+    ARROW_SCOPED_TRACE("type = ", *type);
+    AssertFilter(type, "[]", "[]", "[]");
+
+    AssertFilter(type, "[9]", "[0]", "[]");
+    AssertFilter(type, "[9]", "[1]", "[9]");
+    AssertFilter(type, "[9]", "[null]", "[null]");
+    AssertFilter(type, "[null]", "[0]", "[]");
+    AssertFilter(type, "[null]", "[1]", "[null]");
+    AssertFilter(type, "[null]", "[null]", "[null]");
+
+    AssertFilter(type, "[7, 8, 9]", "[0, 1, 0]", "[8]");
+    AssertFilter(type, "[7, 8, 9]", "[1, 0, 1]", "[7, 9]");
+    AssertFilter(type, "[null, 8, 9]", "[0, 1, 0]", "[8]");
+    AssertFilter(type, "[7, 8, 9]", "[null, 1, 0]", "[null, 8]");
+    AssertFilter(type, "[7, 8, 9]", "[1, null, 1]", "[7, null, 9]");
+
+    AssertFilter(ArrayFromJSON(type, "[7, 8, 9]"),
+                 ArrayFromJSON(boolean(), "[0, 1, 1, 1, 0, 1]")->Slice(3, 3),
+                 ArrayFromJSON(type, "[7, 9]"));
+
+    ASSERT_RAISES(Invalid, Filter(ArrayFromJSON(type, "[7, 8, 9]"),
+                                  ArrayFromJSON(boolean(), "[]"), emit_null_));
+    ASSERT_RAISES(Invalid, Filter(ArrayFromJSON(type, "[7, 8, 9]"),
+                                  ArrayFromJSON(boolean(), "[]"), drop_));
+  }
+
   const FilterOptions emit_null_, drop_;
 };
 
@@ -342,6 +369,33 @@ void ValidateFilter(const std::shared_ptr<Array>& values,
                     /*verbose=*/true);
 }
 
+TEST_F(TestFilterKernel, Temporal) {
+  this->TestNumericBasics(time32(TimeUnit::MILLI));
+  this->TestNumericBasics(time64(TimeUnit::MICRO));
+  this->TestNumericBasics(timestamp(TimeUnit::NANO, "Europe/Paris"));
+  this->TestNumericBasics(duration(TimeUnit::SECOND));
+  this->TestNumericBasics(date32());
+  this->AssertFilter(date64(), "[0, 86400000, null]", "[null, 1, 0]", "[null, 86400000]");
+}
+
+TEST_F(TestFilterKernel, Duration) {
+  for (auto type : DurationTypes()) {
+    this->TestNumericBasics(type);
+  }
+}
+
+TEST_F(TestFilterKernel, Interval) {
+  this->TestNumericBasics(month_interval());
+
+  auto type = day_time_interval();
+  this->AssertFilter(type, "[[1, -600], [2, 3000], null]", "[null, 1, 0]",
+                     "[null, [2, 3000]]");
+  type = month_day_nano_interval();
+  this->AssertFilter(type,
+                     "[[1, -2, 34567890123456789], [2, 3, -34567890123456789], null]",
+                     "[null, 1, 0]", "[null, [2, 3, -34567890123456789]]");
+}
+
 class TestFilterKernelWithNull : public TestFilterKernel {
  protected:
   void AssertFilter(const std::string& values, const std::string& filter,
@@ -401,30 +455,7 @@ class TestFilterKernelWithNumeric : public TestFilterKernel {
 
 TYPED_TEST_SUITE(TestFilterKernelWithNumeric, NumericArrowTypes);
 TYPED_TEST(TestFilterKernelWithNumeric, FilterNumeric) {
-  auto type = this->type_singleton();
-  this->AssertFilter(type, "[]", "[]", "[]");
-
-  this->AssertFilter(type, "[9]", "[0]", "[]");
-  this->AssertFilter(type, "[9]", "[1]", "[9]");
-  this->AssertFilter(type, "[9]", "[null]", "[null]");
-  this->AssertFilter(type, "[null]", "[0]", "[]");
-  this->AssertFilter(type, "[null]", "[1]", "[null]");
-  this->AssertFilter(type, "[null]", "[null]", "[null]");
-
-  this->AssertFilter(type, "[7, 8, 9]", "[0, 1, 0]", "[8]");
-  this->AssertFilter(type, "[7, 8, 9]", "[1, 0, 1]", "[7, 9]");
-  this->AssertFilter(type, "[null, 8, 9]", "[0, 1, 0]", "[8]");
-  this->AssertFilter(type, "[7, 8, 9]", "[null, 1, 0]", "[null, 8]");
-  this->AssertFilter(type, "[7, 8, 9]", "[1, null, 1]", "[7, null, 9]");
-
-  this->AssertFilter(ArrayFromJSON(type, "[7, 8, 9]"),
-                     ArrayFromJSON(boolean(), "[0, 1, 1, 1, 0, 1]")->Slice(3, 3),
-                     ArrayFromJSON(type, "[7, 9]"));
-
-  ASSERT_RAISES(Invalid, Filter(ArrayFromJSON(type, "[7, 8, 9]"),
-                                ArrayFromJSON(boolean(), "[]"), this->emit_null_));
-  ASSERT_RAISES(Invalid, Filter(ArrayFromJSON(type, "[7, 8, 9]"),
-                                ArrayFromJSON(boolean(), "[]"), this->drop_));
+  this->TestNumericBasics(this->type_singleton());
 }
 
 template <typename CType>
@@ -588,7 +619,7 @@ TYPED_TEST(TestFilterKernelWithDecimal, FilterNumeric) {
                                 ArrayFromJSON(boolean(), "[]"), this->drop_));
 }
 
-TEST(TestFilterKernel, NoValidityBitmapButUnknownNullCount) {
+TEST_F(TestFilterKernel, NoValidityBitmapButUnknownNullCount) {
   auto values = ArrayFromJSON(int32(), "[1, 2, 3, 4]");
   auto filter = ArrayFromJSON(boolean(), "[true, true, false, true]");
 
@@ -1136,6 +1167,20 @@ class TestTakeKernel : public ::testing::Test {
     TestNoValidityBitmapButUnknownNullCount(ArrayFromJSON(type, values),
                                             ArrayFromJSON(int16(), indices));
   }
+
+  void TestNumericBasics(const std::shared_ptr<DataType>& type) {
+    ARROW_SCOPED_TRACE("type = ", *type);
+    CheckTake(type, "[7, 8, 9]", "[]", "[]");
+    CheckTake(type, "[7, 8, 9]", "[0, 1, 0]", "[7, 8, 7]");
+    CheckTake(type, "[null, 8, 9]", "[0, 1, 0]", "[null, 8, null]");
+    CheckTake(type, "[7, 8, 9]", "[null, 1, 0]", "[null, 8, 7]");
+    CheckTake(type, "[null, 8, 9]", "[]", "[]");
+    CheckTake(type, "[7, 8, 9]", "[0, 0, 0, 0, 0, 0, 2]", "[7, 7, 7, 7, 7, 7, 9]");
+
+    std::shared_ptr<Array> arr;
+    ASSERT_RAISES(IndexError, TakeJSON(type, "[7, 8, 9]", int8(), "[0, 9, 0]", &arr));
+    ASSERT_RAISES(IndexError, TakeJSON(type, "[7, 8, 9]", int8(), "[0, -1, 0]", &arr));
+  }
 };
 
 template <typename ArrowType>
@@ -1201,6 +1246,34 @@ TEST_F(TestTakeKernel, TakeBoolean) {
                 TakeJSON(boolean(), "[true, false, true]", int8(), "[0, -1, 0]", &arr));
 }
 
+TEST_F(TestTakeKernel, Temporal) {
+  this->TestNumericBasics(time32(TimeUnit::MILLI));
+  this->TestNumericBasics(time64(TimeUnit::MICRO));
+  this->TestNumericBasics(timestamp(TimeUnit::NANO, "Europe/Paris"));
+  this->TestNumericBasics(duration(TimeUnit::SECOND));
+  this->TestNumericBasics(date32());
+  CheckTake(date64(), "[0, 86400000, null]", "[null, 1, 1, 0]",
+            "[null, 86400000, 86400000, 0]");
+}
+
+TEST_F(TestTakeKernel, Duration) {
+  for (auto type : DurationTypes()) {
+    this->TestNumericBasics(type);
+  }
+}
+
+TEST_F(TestTakeKernel, Interval) {
+  this->TestNumericBasics(month_interval());
+
+  auto type = day_time_interval();
+  CheckTake(type, "[[1, -600], [2, 3000], null]", "[0, null, 2, 1]",
+            "[[1, -600], null, null, [2, 3000]]");
+  type = month_day_nano_interval();
+  CheckTake(type, "[[1, -2, 34567890123456789], [2, 3, -34567890123456789], null]",
+            "[0, null, 2, 1]",
+            "[[1, -2, 34567890123456789], null, null, [2, 3, -34567890123456789]]");
+}
+
 template <typename ArrowType>
 class TestTakeKernelWithNumeric : public TestTakeKernelTyped<ArrowType> {
  protected:
@@ -1216,18 +1289,7 @@ class TestTakeKernelWithNumeric : public TestTakeKernelTyped<ArrowType> {
 
 TYPED_TEST_SUITE(TestTakeKernelWithNumeric, NumericArrowTypes);
 TYPED_TEST(TestTakeKernelWithNumeric, TakeNumeric) {
-  this->AssertTake("[7, 8, 9]", "[]", "[]");
-  this->AssertTake("[7, 8, 9]", "[0, 1, 0]", "[7, 8, 7]");
-  this->AssertTake("[null, 8, 9]", "[0, 1, 0]", "[null, 8, null]");
-  this->AssertTake("[7, 8, 9]", "[null, 1, 0]", "[null, 8, 7]");
-  this->AssertTake("[null, 8, 9]", "[]", "[]");
-  this->AssertTake("[7, 8, 9]", "[0, 0, 0, 0, 0, 0, 2]", "[7, 7, 7, 7, 7, 7, 9]");
-
-  std::shared_ptr<Array> arr;
-  ASSERT_RAISES(IndexError,
-                TakeJSON(this->type_singleton(), "[7, 8, 9]", int8(), "[0, 9, 0]", &arr));
-  ASSERT_RAISES(IndexError, TakeJSON(this->type_singleton(), "[7, 8, 9]", int8(),
-                                     "[0, -1, 0]", &arr));
+  this->TestNumericBasics(this->type_singleton());
 }
 
 template <typename TypeClass>
@@ -1816,6 +1878,7 @@ TEST(TestTakeMetaFunction, ArityChecking) {
 template <typename Unused = void>
 struct FilterRandomTest {
   static void Test(const std::shared_ptr<DataType>& type) {
+    ARROW_SCOPED_TRACE("type = ", *type);
     auto rand = random::RandomArrayGenerator(kRandomSeed);
     const int64_t length = static_cast<int64_t>(1ULL << 10);
     for (auto null_probability : {0.0, 0.01, 0.1, 0.999, 1.0}) {
@@ -1856,6 +1919,7 @@ void CheckTakeRandom(const std::shared_ptr<Array>& values, int64_t indices_lengt
 template <typename ValuesType>
 struct TakeRandomTest {
   static void Test(const std::shared_ptr<DataType>& type) {
+    ARROW_SCOPED_TRACE("type = ", *type);
     auto rand = random::RandomArrayGenerator(kRandomSeed);
     const int64_t values_length = 64 * 16 + 1;
     const int64_t indices_length = 64 * 4 + 1;
@@ -1897,8 +1961,10 @@ TEST(TestFilter, RandomString) {
 }
 
 TEST(TestFilter, RandomFixedSizeBinary) {
-  FilterRandomTest<>::Test(fixed_size_binary(0));
-  FilterRandomTest<>::Test(fixed_size_binary(16));
+  // FixedSizeBinary filter is special-cased for some widths
+  for (int32_t width : {0, 1, 16, 32, 35}) {
+    FilterRandomTest<>::Test(fixed_size_binary(width));
+  }
 }
 
 TEST(TestTake, PrimitiveRandom) { TestRandomPrimitiveCTypes<TakeRandomTest>(); }
@@ -1911,8 +1977,10 @@ TEST(TestTake, RandomString) {
 }
 
 TEST(TestTake, RandomFixedSizeBinary) {
-  TakeRandomTest<FixedSizeBinaryType>::Test(fixed_size_binary(0));
-  TakeRandomTest<FixedSizeBinaryType>::Test(fixed_size_binary(16));
+  // FixedSizeBinary take is special-cased for some widths
+  for (int32_t width : {0, 1, 16, 32, 35}) {
+    TakeRandomTest<FixedSizeBinaryType>::Test(fixed_size_binary(width));
+  }
 }
 
 // ----------------------------------------------------------------------

From 87dd4c4ceaef316033f3709e496805710555764e Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 29 Jan 2024 17:41:47 +0100
Subject: [PATCH 272/570] MINOR: [Python][CI] Add upper bound on pytest version
 (#39827)

### Rationale for this change

The PyArrow test suite relies on the pytest-lazy-fixture plugin, which breaks on pytest 8.0.0: https://github.com/TvoroG/pytest-lazy-fixture/issues/65

### What changes are included in this PR?

Avoid installing pytest 8 on CI builds, by putting an upper bound on the pytest version.

### Are these changes tested?

Yes, by construction.

### Are there any user-facing changes?

No.

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 ci/conda_env_python.txt            | 2 +-
 python/requirements-test.txt       | 2 +-
 python/requirements-wheel-test.txt | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt
index 97203442129c4..5fdd21d2bd1f9 100644
--- a/ci/conda_env_python.txt
+++ b/ci/conda_env_python.txt
@@ -23,7 +23,7 @@ cloudpickle
 fsspec
 hypothesis
 numpy>=1.16.6
-pytest
+pytest<8  # pytest-lazy-fixture broken on pytest 8.0.0
 pytest-faulthandler
 pytest-lazy-fixture
 s3fs>=2023.10.0
diff --git a/python/requirements-test.txt b/python/requirements-test.txt
index 9f07e5c57bd09..b3ba5d852b968 100644
--- a/python/requirements-test.txt
+++ b/python/requirements-test.txt
@@ -1,6 +1,6 @@
 cffi
 hypothesis
 pandas
-pytest
+pytest<8
 pytest-lazy-fixture
 pytz
diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt
index 516ec0fccc2e9..c74a8ca6908a7 100644
--- a/python/requirements-wheel-test.txt
+++ b/python/requirements-wheel-test.txt
@@ -1,7 +1,7 @@
 cffi
 cython
 hypothesis
-pytest
+pytest<8
 pytest-lazy-fixture
 pytz
 tzdata; sys_platform == 'win32'

From c2ca9bcedeb004f9d7f5d3e1aafc7b83ce6c1e3f Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Mon, 29 Jan 2024 15:39:10 -0500
Subject: [PATCH 273/570] GH-39837: [Go][Flight] Allow cloning existing cookies
 in middleware (#39838)

### Rationale for this change
This is needed for https://github.com/apache/arrow-adbc/issues/1194 to facilitate better connection handling for flight clients in ADBC by copying the existing cookies over when creating a sub-client.

### What changes are included in this PR?
Creating a `Clone` method on the `CookieMiddleware` so that a user can create and hold a reference to a specific cookie middleware instance and then create new ones on the fly that copy over the existing cookies at that moment.

### Are these changes tested?
Yes.

### Are there any user-facing changes?
No

* Closes: #39837

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/flight/cookie_middleware.go      | 24 +++++++++
 go/arrow/flight/cookie_middleware_test.go | 60 +++++++++++++++++++++++
 2 files changed, 84 insertions(+)

diff --git a/go/arrow/flight/cookie_middleware.go b/go/arrow/flight/cookie_middleware.go
index 27754a13b829a..39c86d8303434 100644
--- a/go/arrow/flight/cookie_middleware.go
+++ b/go/arrow/flight/cookie_middleware.go
@@ -23,6 +23,7 @@ import (
 	"sync"
 	"time"
 
+	"golang.org/x/exp/maps"
 	"google.golang.org/grpc/metadata"
 )
 
@@ -40,11 +41,34 @@ func NewClientCookieMiddleware() ClientMiddleware {
 	return CreateClientMiddleware(&clientCookieMiddleware{jar: make(map[string]http.Cookie)})
 }
 
+func NewCookieMiddleware() CookieMiddleware {
+	return &clientCookieMiddleware{jar: make(map[string]http.Cookie)}
+}
+
+// CookieMiddleware is a go-routine safe middleware for flight clients
+// which properly handles Set-Cookie headers for storing cookies.
+// This can be passed into `CreateClientMiddleware` to create a new
+// middleware object. You can also clone it to create middleware for a
+// new client which starts with the same cookies.
+type CookieMiddleware interface {
+	CustomClientMiddleware
+	// Clone creates a new CookieMiddleware that starts out with the same
+	// cookies that this one already has. This is useful when creating a
+	// new client connection for the same server.
+	Clone() CookieMiddleware
+}
+
 type clientCookieMiddleware struct {
 	jar map[string]http.Cookie
 	mx  sync.Mutex
 }
 
+func (cc *clientCookieMiddleware) Clone() CookieMiddleware {
+	cc.mx.Lock()
+	defer cc.mx.Unlock()
+	return &clientCookieMiddleware{jar: maps.Clone(cc.jar)}
+}
+
 func (cc *clientCookieMiddleware) StartCall(ctx context.Context) context.Context {
 	cc.mx.Lock()
 	defer cc.mx.Unlock()
diff --git a/go/arrow/flight/cookie_middleware_test.go b/go/arrow/flight/cookie_middleware_test.go
index 0adf4927652d4..4007d056b2c99 100644
--- a/go/arrow/flight/cookie_middleware_test.go
+++ b/go/arrow/flight/cookie_middleware_test.go
@@ -239,3 +239,63 @@ func TestCookieExpiration(t *testing.T) {
 	cookieMiddleware.expectedCookies = map[string]string{}
 	makeReq(client, t)
 }
+
+func TestCookiesClone(t *testing.T) {
+	cookieMiddleware := &serverAddCookieMiddleware{}
+
+	s := flight.NewServerWithMiddleware([]flight.ServerMiddleware{
+		flight.CreateServerMiddleware(cookieMiddleware),
+	})
+	s.Init("localhost:0")
+	f := &flightServer{}
+	s.RegisterFlightService(f)
+
+	go s.Serve()
+	defer s.Shutdown()
+
+	makeReq := func(c flight.Client, t *testing.T) {
+		flightStream, err := c.ListFlights(context.Background(), &flight.Criteria{})
+		assert.NoError(t, err)
+
+		for {
+			_, err := flightStream.Recv()
+			if err != nil {
+				if errors.Is(err, io.EOF) {
+					break
+				}
+				assert.NoError(t, err)
+			}
+		}
+	}
+
+	credsOpt := grpc.WithTransportCredentials(insecure.NewCredentials())
+	cookies := flight.NewCookieMiddleware()
+	client1, err := flight.NewClientWithMiddleware(s.Addr().String(), nil,
+		[]flight.ClientMiddleware{flight.CreateClientMiddleware(cookies)}, credsOpt)
+	require.NoError(t, err)
+	defer client1.Close()
+
+	// set cookies
+	cookieMiddleware.cookies = []*http.Cookie{
+		{Name: "foo", Value: "bar"},
+		{Name: "foo2", Value: "bar2", MaxAge: 1},
+	}
+	makeReq(client1, t)
+
+	// validate set
+	cookieMiddleware.expectedCookies = map[string]string{
+		"foo": "bar", "foo2": "bar2",
+	}
+	makeReq(client1, t)
+
+	client2, err := flight.NewClientWithMiddleware(s.Addr().String(), nil,
+		[]flight.ClientMiddleware{flight.CreateClientMiddleware(cookies.Clone())}, credsOpt)
+	require.NoError(t, err)
+	defer client2.Close()
+
+	// validate clone worked
+	cookieMiddleware.expectedCookies = map[string]string{
+		"foo": "bar", "foo2": "bar2",
+	}
+	makeReq(client2, t)
+}

From fc3278ffb78e6f4f79cd20160bf911efa5a09ba1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:01:22 +0900
Subject: [PATCH 274/570] MINOR: [Java] Bump org.immutables:value from 2.8.2 to
 2.10.0 in /java (#39831)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.immutables:value](https://github.com/immutables/immutables) from 2.8.2 to 2.10.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/immutables/immutables/releases">org.immutables:value's releases</a>.</em></p>
<blockquote>
<h2>2.10.0</h2>
<h1>JakartaEE support</h1>
<p>Style flag <code>jakarta = true</code>
Mainly package change for annotations and types like Validator</p>
<h1>Miscellaneous</h1>
<ul>
<li>JDK9 unmodifiable collections for <code>List</code>, <code>Set</code>, <code>Map</code>, style flag <code>jdk9Collections = true</code></li>
<li>Suppress from method, style flag <code>from = &quot;&quot;</code></li>
<li>Non-strict modifiables allows reading unset attributes, style flag <code>strictModifiables = false</code></li>
<li>Fixes in nested type_use annotations.</li>
<li>Performance: better initial capacity for collections</li>
<li>Refinements and fixes to Criteria modules</li>
<li>Plus many other refinements and maintance, see below</li>
</ul>
<h1>Workarounds for Gradle</h1>
<ul>
<li>imports for not-yet-generated types : add <code>options.sourcepath</code></li>
<li>disable incremental compilation (<code>options.incremental</code>), may also help is some complex cases</li>
</ul>
<h2>What's Changed (since some last year's release)</h2>
<ul>
<li>guava upgrade to 30.0-jre by <a href="https://github.com/elucash"><code>@​elucash</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1422">immutables/immutables#1422</a></li>
<li>Fix sorting by id property with Criteria for Mongo <a href="https://redirect.github.com/immutables/immutables/issues/1442">#1442</a> by <a href="https://github.com/harmenweber"><code>@​harmenweber</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1443">immutables/immutables#1443</a></li>
<li>Fix <a href="https://redirect.github.com/immutables/immutables/issues/1424">#1424</a> - redundant null check in JDK maps by <a href="https://github.com/saarmbruster"><code>@​saarmbruster</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1425">immutables/immutables#1425</a></li>
<li>detect the Eclipse compiler without using the TCCL by <a href="https://github.com/hwellmann"><code>@​hwellmann</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1452">immutables/immutables#1452</a></li>
<li>Experimental <a href="https://redirect.github.com/immutables/immutables/issues/1112">#1112</a> requiring Enclosing annotation by <a href="https://github.com/elucash"><code>@​elucash</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1423">immutables/immutables#1423</a></li>
<li>Enable eclipse compiler support for Java 17 and beyond. by <a href="https://github.com/SimY4"><code>@​SimY4</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1458">immutables/immutables#1458</a></li>
<li>Fix <code>toBuilder</code> generator with generics by <a href="https://github.com/rdesgroppes"><code>@​rdesgroppes</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1460">immutables/immutables#1460</a></li>
<li>Populate correct initial capacity for List/Set/Map/ImmutableList.Builder by <a href="https://github.com/snazy"><code>@​snazy</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1468">immutables/immutables#1468</a></li>
<li>Allow specifying visibility as string by <a href="https://github.com/nastra"><code>@​nastra</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1474">immutables/immutables#1474</a></li>
<li>Re-try to resolve types in java.lang by <a href="https://github.com/PtrTeixeira"><code>@​PtrTeixeira</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1477">immutables/immutables#1477</a></li>
<li>re-enable jdk9 collections support. by <a href="https://github.com/SimY4"><code>@​SimY4</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1481">immutables/immutables#1481</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/harmenweber"><code>@​harmenweber</code></a> made their first contribution in <a href="https://redirect.github.com/immutables/immutables/pull/1443">immutables/immutables#1443</a></li>
<li><a href="https://github.com/saarmbruster"><code>@​saarmbruster</code></a> made their first contribution in <a href="https://redirect.github.com/immutables/immutables/pull/1425">immutables/immutables#1425</a></li>
<li><a href="https://github.com/hwellmann"><code>@​hwellmann</code></a> made their first contribution in <a href="https://redirect.github.com/immutables/immutables/pull/1452">immutables/immutables#1452</a></li>
<li><a href="https://github.com/rdesgroppes"><code>@​rdesgroppes</code></a> made their first contribution in <a href="https://redirect.github.com/immutables/immutables/pull/1460">immutables/immutables#1460</a></li>
<li><a href="https://github.com/snazy"><code>@​snazy</code></a> made their first contribution in <a href="https://redirect.github.com/immutables/immutables/pull/1468">immutables/immutables#1468</a></li>
<li><a href="https://github.com/PtrTeixeira"><code>@​PtrTeixeira</code></a> made their first contribution in <a href="https://redirect.github.com/immutables/immutables/pull/1477">immutables/immutables#1477</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/immutables/immutables/compare/2.9.3...2.10.0">https://github.com/immutables/immutables/compare/2.9.3...2.10.0</a></p>
<h2>2.9.3</h2>
<h2>Maintenance release</h2>
<h2>What's Changed</h2>
<ul>
<li><a href="https://redirect.github.com/immutables/immutables/issues/1408">#1408</a> withUnaryOperator style flag/naming template by <a href="https://github.com/elucash"><code>@​elucash</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1409">immutables/immutables#1409</a></li>
<li>Avoid invalidating Gson type adapter cache when creating Mongo Repository by <a href="https://github.com/jmoghisi"><code>@​jmoghisi</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1406">immutables/immutables#1406</a></li>
<li>fixing <a href="https://redirect.github.com/immutables/immutables/issues/1407">#1407</a> - Default value with explicit null should use default. by <a href="https://github.com/SimY4"><code>@​SimY4</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1410">immutables/immutables#1410</a></li>
<li>Fix counts in WriteResult for upsert operation by <a href="https://github.com/dparamoshkin"><code>@​dparamoshkin</code></a> in <a href="https://redirect.github.com/immutables/immutables/pull/1412">immutables/immutables#1412</a></li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/immutables/immutables/commit/29a2230477f196d583c55ca4f1c1e1705767d07c"><code>29a2230</code></a> relese 2.10.0</li>
<li><a href="https://github.com/immutables/immutables/commit/34c76bd1f372f5b3c391533b7763367cebedb0b5"><code>34c76bd</code></a> Java7 leftovers</li>
<li><a href="https://github.com/immutables/immutables/commit/c4597efc3ef38c3021910284633a1e2583f2d8c5"><code>c4597ef</code></a> <a href="https://redirect.github.com/immutables/immutables/issues/1483">#1483</a> strictModifiable=false</li>
<li><a href="https://github.com/immutables/immutables/commit/e07209c4930552bf658eb77d4f454f0946b9ae36"><code>e07209c</code></a> Styles: jdk9Collections, from=&quot;&quot;</li>
<li><a href="https://github.com/immutables/immutables/commit/3f85a4274693acdbb0f080e7a2fe0f84bc7dfe92"><code>3f85a42</code></a> Merge pull request <a href="https://redirect.github.com/immutables/immutables/issues/1481">#1481</a> from SimY4/topic/re-enable-jdk9-collections</li>
<li><a href="https://github.com/immutables/immutables/commit/be599d44f933c182d45b8d0d2ca9b3a02c36a449"><code>be599d4</code></a> Fix ClassCastException when projections are used with ReactorFacet.</li>
<li><a href="https://github.com/immutables/immutables/commit/b9df716fe8e17fd15473158de642a4aafeb8b1c7"><code>b9df716</code></a> Revert utility functions.</li>
<li><a href="https://github.com/immutables/immutables/commit/711c7bcb284a2b60b98fc5ecac61d755ea8c3217"><code>711c7bc</code></a> re-enable jdk9 collections support.</li>
<li><a href="https://github.com/immutables/immutables/commit/c98038e82512ec255a24a0dfd6971eab52bd5b8e"><code>c98038e</code></a> <a href="https://redirect.github.com/immutables/immutables/issues/1479">#1479</a> need to look at value, not key</li>
<li><a href="https://github.com/immutables/immutables/commit/c643bef48d509a5283e7efe7118b9172aac355c7"><code>c643bef</code></a> <a href="https://redirect.github.com/immutables/immutables/issues/1477">#1477</a> refinements after PR</li>
<li>Additional commits viewable in <a href="https://github.com/immutables/immutables/compare/2.8.2...2.10.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.immutables:value&package-manager=maven&previous-version=2.8.2&new-version=2.10.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/pom.xml b/java/pom.xml
index 3951f1c1bc8ed..2423e2d495d11 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -438,7 +438,7 @@
               <path>
                 <groupId>org.immutables</groupId>
                 <artifactId>value</artifactId>
-                <version>2.8.2</version>
+                <version>2.10.0</version>
               </path>
             </annotationProcessorPaths>
           </configuration>
@@ -653,7 +653,7 @@
       <dependency>
         <groupId>org.immutables</groupId>
         <artifactId>value</artifactId>
-        <version>2.8.2</version>
+        <version>2.10.0</version>
         <scope>provided</scope>
       </dependency>
       <dependency>

From 7fd59739fddf4b614c68d57e24068542b4cf2884 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:01:42 +0900
Subject: [PATCH 275/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-gpg-plugin from 1.5 to 3.1.0 in /java (#39832)

Bumps [org.apache.maven.plugins:maven-gpg-plugin](https://github.com/apache/maven-gpg-plugin) from 1.5 to 3.1.0.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/699e2ad61179ef661693f99d1d9c147e2f6bcd04"><code>699e2ad</code></a> [maven-release-plugin] prepare release maven-gpg-plugin-3.1.0</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/f314f8e879d6acbfe88386538f882ecedc9ad18a"><code>f314f8e</code></a> [MGPG-97] use gpgverify plugin to check dependencies signatures</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/bad6b5708c58e5e4d868e30509acc38f552982fd"><code>bad6b57</code></a> [MGPG-96] add INFO message</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/0498a82b6b6b67bf46d61e66e945c093365e63df"><code>0498a82</code></a> [MGPG-95] don't GPG-sign .sigstore signatures</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/09b5be93972ae0c95c741fc0f111a96f51119aa7"><code>09b5be9</code></a> Auto-link MGPG Jira</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/1e0472f2e59d037bdde38ccde38897e04d89e206"><code>1e0472f</code></a> extract FilesCollector</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/af9ccfdd3b12502f9da225a7b394c541a6b4088c"><code>af9ccfd</code></a> [MGPG-94] Ignore reformatting</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/5e51734d22f4541fcbf2d03aae24f7501b67abe1"><code>5e51734</code></a> [MGPG-94] Integration tests - convert and reformat bsh to groovy</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/955ea0ed5c2a7a3189159158e1fcb10e1ed2964e"><code>955ea0e</code></a> [MGPG-94] Reformat code</li>
<li><a href="https://github.com/apache/maven-gpg-plugin/commit/e160f4358ccde49da7f3c7339a33774a536db457"><code>e160f43</code></a> [MGPG-94] Bump maven-plugins from 36 to 39</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-gpg-plugin/compare/maven-gpg-plugin-1.5...maven-gpg-plugin-3.1.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-gpg-plugin&package-manager=maven&previous-version=1.5&new-version=3.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/gandiva/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index d0290b6814ed5..6337efcf7e348 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -96,7 +96,7 @@
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-gpg-plugin</artifactId>
-                        <version>1.5</version>
+                        <version>3.1.0</version>
                         <executions>
                             <execution>
                                 <id>sign-artifacts</id>

From 3b8b700348f5d73fa4cfdb2780b0bde5d83a7f22 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:02:06 +0900
Subject: [PATCH 276/570] MINOR: [Java] Bump org.apache.hadoop:hadoop-common
 from 2.7.1 to 3.3.6 in /java (#39833)

Bumps org.apache.hadoop:hadoop-common from 2.7.1 to 3.3.6.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.hadoop:hadoop-common&package-manager=maven&previous-version=2.7.1&new-version=3.3.6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/adapter/orc/pom.xml | 2 +-
 java/pom.xml             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 265a9a71b80e2..79e51470a426e 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -75,7 +75,7 @@
         <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-common</artifactId>
-            <version>3.3.3</version>
+            <version>3.3.6</version>
             <scope>test</scope>
             <exclusions>
                 <exclusion>
diff --git a/java/pom.xml b/java/pom.xml
index 2423e2d495d11..3947f76cae849 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -37,7 +37,7 @@
     <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.16.0</dep.jackson-bom.version>
-    <dep.hadoop.version>2.7.1</dep.hadoop.version>
+    <dep.hadoop.version>3.3.6</dep.hadoop.version>
     <dep.fbs.version>23.5.26</dep.fbs.version>
     <dep.avro.version>1.11.3</dep.avro.version>
     <arrow.vector.classifier />

From 91d65b79f71a1be6a0bf7426e0ee91dd2e65a852 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:02:31 +0900
Subject: [PATCH 277/570] MINOR: [Java] Bump io.netty:netty-bom from
 4.1.105.Final to 4.1.106.Final in /java (#39834)

Bumps [io.netty:netty-bom](https://github.com/netty/netty) from 4.1.105.Final to 4.1.106.Final.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/netty/netty/commit/9d0ec7b9356487f11921340c6b54f9f0c8c8b4b7"><code>9d0ec7b</code></a> [maven-release-plugin] prepare release netty-4.1.106.Final</li>
<li><a href="https://github.com/netty/netty/commit/e2859f4f42cb237484c58ea19f757f88cf610c4b"><code>e2859f4</code></a> Short-circuit ByteBuf::release (<a href="https://redirect.github.com/netty/netty/issues/13782">#13782</a>)</li>
<li><a href="https://github.com/netty/netty/commit/d9ca50d5a4acec37c822764fb84d047f41097b21"><code>d9ca50d</code></a> Prevent sharing the index of the continuation frame header ByteBuf. (<a href="https://redirect.github.com/netty/netty/issues/13786">#13786</a>)</li>
<li><a href="https://github.com/netty/netty/commit/0e7c27c6536d8411ba4c8f63429a640b129d34bc"><code>0e7c27c</code></a> DnsNameResolver: Fail query if id space is exhausted (<a href="https://redirect.github.com/netty/netty/issues/13784">#13784</a>)</li>
<li><a href="https://github.com/netty/netty/commit/b1947417b7444326965086b951b75f0128cf7085"><code>b194741</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li>See full diff in <a href="https://github.com/netty/netty/compare/netty-4.1.105.Final...netty-4.1.106.Final">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=io.netty:netty-bom&package-manager=maven&previous-version=4.1.105.Final&new-version=4.1.106.Final)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 3947f76cae849..4888f833df096 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -33,7 +33,7 @@
     <dep.junit.jupiter.version>5.10.1</dep.junit.jupiter.version>
     <dep.slf4j.version>2.0.11</dep.slf4j.version>
     <dep.guava-bom.version>33.0.0-jre</dep.guava-bom.version>
-    <dep.netty-bom.version>4.1.105.Final</dep.netty-bom.version>
+    <dep.netty-bom.version>4.1.106.Final</dep.netty-bom.version>
     <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.16.0</dep.jackson-bom.version>

From 63498c2891c757aca016305c61e4a0ba82faed2b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 30 Jan 2024 06:02:55 +0900
Subject: [PATCH 278/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-enforcer-plugin from 3.0.0-M2 to 3.4.1 in
 /java (#39835)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.apache.maven.plugins:maven-enforcer-plugin](https://github.com/apache/maven-enforcer) from 3.0.0-M2 to 3.4.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/maven-enforcer/releases">org.apache.maven.plugins:maven-enforcer-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.4.1</h2>

<h2>🐛 Bug Fixes</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-491">[MENFORCER-491]</a> - Fix plugin documentation generation (<a href="https://redirect.github.com/apache/maven-enforcer/pull/286">#286</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
</ul>
<h2>👻 Maintenance</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-490">[MENFORCER-490]</a> - Declare maven-enforcer-plugin dependencies  (<a href="https://redirect.github.com/apache/maven-enforcer/pull/285">#285</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-490">[MENFORCER-490]</a> - Declare org.eclipse.sisu.plexus dependencies (<a href="https://redirect.github.com/apache/maven-enforcer/pull/283">#283</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-490">[MENFORCER-490]</a> - Declare maven-enforcer-extension dependencies (<a href="https://redirect.github.com/apache/maven-enforcer/pull/284">#284</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-490">[MENFORCER-490]</a> - Declare maven-enforcer-extension dependencies (<a href="https://redirect.github.com/apache/maven-enforcer/pull/282">#282</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
</ul>
<h2>3.4.0</h2>

<h2>🚀 New features and improvements</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-488">[MENFORCER-488]</a> - Add EnforcerLogger.isEnabled() (<a href="https://redirect.github.com/apache/maven-enforcer/pull/279">#279</a>) <a href="https://github.com/kwin"><code>@​kwin</code></a></li>
</ul>
<h2>🐛 Bug Fixes</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-393">[MENFORCER-393]</a> - Extend IT for dependencyConvergence and no standard protocol in repository (<a href="https://redirect.github.com/apache/maven-enforcer/pull/271">#271</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-426">[MENFORCER-426]</a> - DependencyConvergence transitive dependencies with version range (<a href="https://redirect.github.com/apache/maven-enforcer/pull/259">#259</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-480">[MENFORCER-480]</a> - BanDynamicVersions: fix <code>ignores</code> parameter (<a href="https://redirect.github.com/apache/maven-enforcer/pull/269">#269</a>) <a href="https://github.com/Stephan202"><code>@​Stephan202</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-481">[MENFORCER-481]</a> - BanDynamicVersions: make <code>excludedScopes</code> optional (<a href="https://redirect.github.com/apache/maven-enforcer/pull/270">#270</a>) <a href="https://github.com/Stephan202"><code>@​Stephan202</code></a></li>
</ul>
<h2>📦 Dependency updates</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-489">[MENFORCER-489]</a> - Bump commons-lang3 from 3.12.0 to 3.13.0 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/281">#281</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>Bump org.junit:junit-bom from 5.9.3 to 5.10.0 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/280">#280</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-487">[MENFORCER-487]</a> - Bump commons-codec from 1.15 to 1.16.0 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/277">#277</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-486">[MENFORCER-486]</a> - Bump commons-codec from 1.15 to 1.16.0 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/276">#276</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MENFORCER-485">[MENFORCER-485]</a> - Upgrade Parent to 40 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/275">#275</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>Bump guava from 30.1.1-jre to 32.0.0-jre in /maven-enforcer-plugin/src/it/projects/dependency-convergence-cycle (<a href="https://redirect.github.com/apache/maven-enforcer/pull/274">#274</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump snappy-java from 1.1.8.3 to 1.1.10.1 in /maven-enforcer-plugin/src/it/projects/dependency-convergence_transitive_provided/module1 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/273">#273</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump junit-bom from 5.9.2 to 5.9.3 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/268">#268</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
</ul>
<h2>📝 Documentation updates</h2>
<ul>
<li>Clarify availability of AbstractEnforcerRule (<a href="https://redirect.github.com/apache/maven-enforcer/pull/278">#278</a>) <a href="https://github.com/kwin"><code>@​kwin</code></a></li>
</ul>
<h2>👻 Maintenance</h2>
<ul>
<li>Bump org.junit:junit-bom from 5.9.3 to 5.10.0 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/280">#280</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump snappy-java from 1.1.8.3 to 1.1.10.1 in /maven-enforcer-plugin/src/it/projects/dependency-convergence_transitive_provided/module1 (<a href="https://redirect.github.com/apache/maven-enforcer/pull/273">#273</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MNG-6829">[MNG-6829]</a> - Replace StringUtils#isEmpty(String) and #isNotEmpty(String) (<a href="https://redirect.github.com/apache/maven-enforcer/pull/272">#272</a>) <a href="https://github.com/timtebeek"><code>@​timtebeek</code></a></li>
</ul>
<h2>3.3.0</h2>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-enforcer/commit/d8a21eef9f9fdd9b88c02807f4059e9e7279ad26"><code>d8a21ee</code></a> [maven-release-plugin] prepare release enforcer-3.4.1</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/66250c0addc3dc19e42e102d05b20ef57584ccea"><code>66250c0</code></a> [MENFORCER-491] Fix plugin documentation generation</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/5d32e6c53e8be47c5ad56b048e8b4c9c834b4b13"><code>5d32e6c</code></a> [MENFORCER-490] Declare maven-enforcer-plugin dependencies  (<a href="https://redirect.github.com/apache/maven-enforcer/issues/285">#285</a>)</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/d258109dd982d1f037a2e1dbbf643878b4ce566e"><code>d258109</code></a> [MENFORCER-490] Declare org.eclipse.sisu.plexus dependencies (<a href="https://redirect.github.com/apache/maven-enforcer/issues/283">#283</a>)</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/2aa71e792cf2817ae8e355e5483df11a74b78ea1"><code>2aa71e7</code></a> [MENFORCER-490] Declare maven-enforcer-extension dependencies (<a href="https://redirect.github.com/apache/maven-enforcer/issues/284">#284</a>)</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/d4ec8e1ce84f7a7ae1947b660477d24bad4ad41d"><code>d4ec8e1</code></a> [MENFORCER-490] Declare maven-enforcer-extension dependencies (<a href="https://redirect.github.com/apache/maven-enforcer/issues/282">#282</a>)</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/b35e4a0b99a6a524a9719b4810150d6e4a510521"><code>b35e4a0</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/3d365f7eb932a6e9b5496099e4a0374f62e02f08"><code>3d365f7</code></a> [maven-release-plugin] prepare release enforcer-3.4.0</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/5feb93a695c8b0ca3bc84e8e2907499de6632682"><code>5feb93a</code></a> [MENFORCER-489] Bump commons-lang3 from 3.12.0 to 3.13.0</li>
<li><a href="https://github.com/apache/maven-enforcer/commit/8f2de47622b55f6d3279cb14b9f68f6774d47e4c"><code>8f2de47</code></a> Bump org.junit:junit-bom from 5.9.3 to 5.10.0</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-enforcer/compare/enforcer-3.0.0-M2...enforcer-3.4.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-enforcer-plugin&package-manager=maven&previous-version=3.0.0-M2&new-version=3.4.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 4888f833df096..3e595648ed085 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -445,7 +445,7 @@
         </plugin>
         <plugin>
           <artifactId>maven-enforcer-plugin</artifactId>
-          <version>3.0.0-M2</version>
+          <version>3.4.1</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>

From b778ace6622614035acc1bbe17b06bdc8141d9fe Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 30 Jan 2024 11:54:57 +0900
Subject: [PATCH 279/570] GH-39841: [GLib] Add support for GLib 2.56 again
 (#39842)

### Rationale for this change

It's still used in CentOS 7 and AlmaLinux 8.

### What changes are included in this PR?

Don't use `g_time_zone_get_identifier()` with GLib < 2.58.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39841

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/basic-data-type.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp
index 0de9466eee456..98b2c92104507 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -1212,7 +1212,8 @@ garrow_timestamp_data_type_class_init(GArrowTimestampDataTypeClass *klass)
 /**
  * garrow_timestamp_data_type_new:
  * @unit: The unit of the timestamp data.
- * @time_zone: (nullable): The time zone of the timestamp data.
+ * @time_zone: (nullable): The time zone of the timestamp data. If based GLib
+ *   is less than 2.58, this is ignored.
  *
  * Returns: A newly created the number of
  *   seconds/milliseconds/microseconds/nanoseconds since UNIX epoch in
@@ -1226,9 +1227,11 @@ garrow_timestamp_data_type_new(GArrowTimeUnit unit,
 {
   auto arrow_unit = garrow_time_unit_to_raw(unit);
   std::string arrow_timezone;
+#if GLIB_CHECK_VERSION(2, 58, 0)
   if (time_zone) {
     arrow_timezone = g_time_zone_get_identifier(time_zone);
   }
+#endif
   auto arrow_data_type = arrow::timestamp(arrow_unit, arrow_timezone);
   auto data_type =
     GARROW_TIMESTAMP_DATA_TYPE(g_object_new(GARROW_TYPE_TIMESTAMP_DATA_TYPE,

From c6ab28677ddf22799f3db277137708ac5b070acd Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 30 Jan 2024 09:16:53 +0100
Subject: [PATCH 280/570] GH-39640: [Docs] Pin pydata-sphinx-theme to 0.14.*
 (#39758)

### Rationale for this change

Fixing the pinning syntax so we get the latest 0.14.x version (which is currently 0.14.4)

* Closes: #39640

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/conda_env_sphinx.txt            | 2 +-
 docs/requirements.txt              | 2 +-
 docs/source/python/api/compute.rst | 2 +-
 docs/source/python/compute.rst     | 4 ++--
 docs/source/python/pandas.rst      | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt
index d0f494d2e085d..0e50875fc1ef8 100644
--- a/ci/conda_env_sphinx.txt
+++ b/ci/conda_env_sphinx.txt
@@ -20,7 +20,7 @@ breathe
 doxygen
 ipython
 numpydoc
-pydata-sphinx-theme=0.14.1
+pydata-sphinx-theme=0.14
 sphinx-autobuild
 sphinx-design
 sphinx-copybutton
diff --git a/docs/requirements.txt b/docs/requirements.txt
index aee2eb662c06b..5d6fec7ddf72e 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,7 +5,7 @@
 breathe
 ipython
 numpydoc
-pydata-sphinx-theme==0.14.1
+pydata-sphinx-theme~=0.14
 sphinx-autobuild
 sphinx-design
 sphinx-copybutton
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index b879643017a90..928c607d139ce 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -590,4 +590,4 @@ User-Defined Functions
    :toctree: ../generated/
 
    register_scalar_function
-   ScalarUdfContext
+   UdfContext
diff --git a/docs/source/python/compute.rst b/docs/source/python/compute.rst
index e8a5b613c6099..c02059a4f8faa 100644
--- a/docs/source/python/compute.rst
+++ b/docs/source/python/compute.rst
@@ -445,9 +445,9 @@ output type need to be defined. Using :func:`pyarrow.compute.register_scalar_fun
 
 The implementation of a user-defined function always takes a first *context*
 parameter (named ``ctx`` in the example above) which is an instance of
-:class:`pyarrow.compute.ScalarUdfContext`.
+:class:`pyarrow.compute.UdfContext`.
 This context exposes several useful attributes, particularly a
-:attr:`~pyarrow.compute.ScalarUdfContext.memory_pool` to be used for
+:attr:`~pyarrow.compute.UdfContext.memory_pool` to be used for
 allocations in the context of the user-defined function.
 
 You can call a user-defined function directly using :func:`pyarrow.compute.call_function`:
diff --git a/docs/source/python/pandas.rst b/docs/source/python/pandas.rst
index fda90c4f2a58c..23a4b73bd0965 100644
--- a/docs/source/python/pandas.rst
+++ b/docs/source/python/pandas.rst
@@ -197,7 +197,7 @@ use the ``datetime64[ns]`` type in Pandas and are converted to an Arrow
 
 .. ipython:: python
 
-   df = pd.DataFrame({"datetime": pd.date_range("2020-01-01T00:00:00Z", freq="H", periods=3)})
+   df = pd.DataFrame({"datetime": pd.date_range("2020-01-01T00:00:00Z", freq="h", periods=3)})
    df.dtypes
    df
 

From 787afa1594586d2d556d21471647f9cd2c55b18f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 30 Jan 2024 12:54:19 +0100
Subject: [PATCH 281/570] GH-39651: [Python] Basic pyarrow bindings for
 Binary/StringView classes (#39652)

### Rationale for this change

First step for https://github.com/apache/arrow/issues/39633: exposing the Array, DataType and Scalar classes for BinaryView and StringView, such that those can already be represented in pyarrow.

(I exposed a variant of StringBuilder as well, just for now to be able to create test data)

* Closes: #39651

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 docs/source/python/api/arrays.rst          |  4 ++
 docs/source/python/api/datatypes.rst       |  4 ++
 python/pyarrow/__init__.py                 |  7 ++-
 python/pyarrow/array.pxi                   | 14 +++++
 python/pyarrow/builder.pxi                 | 66 ++++++++++++++++++++++
 python/pyarrow/includes/libarrow.pxd       |  9 +++
 python/pyarrow/lib.pxd                     |  8 +++
 python/pyarrow/lib.pyx                     |  2 +
 python/pyarrow/scalar.pxi                  | 10 ++++
 python/pyarrow/src/arrow/python/helpers.cc |  2 +
 python/pyarrow/tests/test_builder.py       | 21 ++++++-
 python/pyarrow/tests/test_misc.py          |  4 ++
 python/pyarrow/tests/test_scalars.py       | 28 ++++++++-
 python/pyarrow/tests/test_types.py         |  8 +++
 python/pyarrow/types.pxi                   | 32 +++++++++++
 python/pyarrow/types.py                    | 10 ++++
 16 files changed, 223 insertions(+), 6 deletions(-)

diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst
index 73b5e063ff1a0..b858862dcff01 100644
--- a/docs/source/python/api/arrays.rst
+++ b/docs/source/python/api/arrays.rst
@@ -63,6 +63,8 @@ may expose data type-specific methods or properties.
    FixedSizeBinaryArray
    LargeBinaryArray
    LargeStringArray
+   BinaryViewArray,
+   StringViewArray,
    Time32Array
    Time64Array
    Date32Array
@@ -119,6 +121,8 @@ classes may expose data type-specific methods or properties.
    FixedSizeBinaryScalar
    LargeBinaryScalar
    LargeStringScalar
+   BinaryViewScalar
+   StringViewScalar
    Time32Scalar
    Time64Scalar
    Date32Scalar
diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst
index 4066ef314234d..642c243b21af0 100644
--- a/docs/source/python/api/datatypes.rst
+++ b/docs/source/python/api/datatypes.rst
@@ -55,6 +55,8 @@ These should be used to create Arrow data types and schemas.
    large_binary
    large_string
    large_utf8
+   binary_view
+   string_view
    decimal128
    list_
    large_list
@@ -168,6 +170,8 @@ represents a given data type (such as ``int32``) or general category
    is_large_binary
    is_large_unicode
    is_large_string
+   is_binary_view
+   is_string_view
    is_fixed_size_binary
    is_map
    is_dictionary
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 9da94885ec6b2..4dbd1258d3cea 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -163,7 +163,7 @@ def print_entry(label, value):
                          time32, time64, timestamp, date32, date64, duration,
                          month_day_nano_interval,
                          float16, float32, float64,
-                         binary, string, utf8,
+                         binary, string, utf8, binary_view, string_view,
                          large_binary, large_string, large_utf8,
                          decimal128, decimal256,
                          list_, large_list, map_, struct,
@@ -205,6 +205,7 @@ def print_entry(label, value):
                          FixedSizeListArray, UnionArray,
                          BinaryArray, StringArray,
                          LargeBinaryArray, LargeStringArray,
+                         BinaryViewArray, StringViewArray,
                          FixedSizeBinaryArray,
                          DictionaryArray,
                          Date32Array, Date64Array, TimestampArray,
@@ -223,8 +224,8 @@ def print_entry(label, value):
                          Time32Scalar, Time64Scalar,
                          TimestampScalar, DurationScalar,
                          MonthDayNanoIntervalScalar,
-                         BinaryScalar, LargeBinaryScalar,
-                         StringScalar, LargeStringScalar,
+                         BinaryScalar, LargeBinaryScalar, BinaryViewScalar,
+                         StringScalar, LargeStringScalar, StringViewScalar,
                          FixedSizeBinaryScalar, DictionaryScalar,
                          MapScalar, StructScalar, UnionScalar,
                          RunEndEncodedScalar, ExtensionScalar)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 1416f5f4346d9..1029f3a629817 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2942,6 +2942,12 @@ cdef class LargeStringArray(Array):
                                   null_count, offset)
 
 
+cdef class StringViewArray(Array):
+    """
+    Concrete class for Arrow arrays of string (or utf8) view data type.
+    """
+
+
 cdef class BinaryArray(Array):
     """
     Concrete class for Arrow arrays of variable-sized binary data type.
@@ -2968,6 +2974,12 @@ cdef class LargeBinaryArray(Array):
         return (<CLargeBinaryArray*> self.ap).total_values_length()
 
 
+cdef class BinaryViewArray(Array):
+    """
+    Concrete class for Arrow arrays of variable-sized binary view data type.
+    """
+
+
 cdef class DictionaryArray(Array):
     """
     Concrete class for dictionary-encoded Arrow arrays.
@@ -3669,6 +3681,8 @@ cdef dict _array_classes = {
     _Type_STRING: StringArray,
     _Type_LARGE_BINARY: LargeBinaryArray,
     _Type_LARGE_STRING: LargeStringArray,
+    _Type_BINARY_VIEW: BinaryViewArray,
+    _Type_STRING_VIEW: StringViewArray,
     _Type_DICTIONARY: DictionaryArray,
     _Type_FIXED_SIZE_BINARY: FixedSizeBinaryArray,
     _Type_DECIMAL128: Decimal128Array,
diff --git a/python/pyarrow/builder.pxi b/python/pyarrow/builder.pxi
index a34ea5412e14a..2af39e2c589e6 100644
--- a/python/pyarrow/builder.pxi
+++ b/python/pyarrow/builder.pxi
@@ -80,3 +80,69 @@ cdef class StringBuilder(_Weakrefable):
 
     def __len__(self):
         return self.builder.get().length()
+
+
+cdef class StringViewBuilder(_Weakrefable):
+    """
+    Builder class for UTF8 string views.
+
+    This class exposes facilities for incrementally adding string values and
+    building the null bitmap for a pyarrow.Array (type='string_view').
+    """
+    cdef:
+        unique_ptr[CStringViewBuilder] builder
+
+    def __cinit__(self, MemoryPool memory_pool=None):
+        cdef CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+        self.builder.reset(new CStringViewBuilder(pool))
+
+    def append(self, value):
+        """
+        Append a single value to the builder.
+
+        The value can either be a string/bytes object or a null value
+        (np.nan or None).
+
+        Parameters
+        ----------
+        value : string/bytes or np.nan/None
+            The value to append to the string array builder.
+        """
+        if value is None or value is np.nan:
+            self.builder.get().AppendNull()
+        elif isinstance(value, (bytes, str)):
+            self.builder.get().Append(tobytes(value))
+        else:
+            raise TypeError('StringViewBuilder only accepts string objects')
+
+    def append_values(self, values):
+        """
+        Append all the values from an iterable.
+
+        Parameters
+        ----------
+        values : iterable of string/bytes or np.nan/None values
+            The values to append to the string array builder.
+        """
+        for value in values:
+            self.append(value)
+
+    def finish(self):
+        """
+        Return result of builder as an Array object; also resets the builder.
+
+        Returns
+        -------
+        array : pyarrow.Array
+        """
+        cdef shared_ptr[CArray] out
+        with nogil:
+            self.builder.get().Finish(&out)
+        return pyarrow_wrap_array(out)
+
+    @property
+    def null_count(self):
+        return self.builder.get().null_count()
+
+    def __len__(self):
+        return self.builder.get().length()
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 74e92594b04e5..d92f09da779b6 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -126,6 +126,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         _Type_LARGE_BINARY" arrow::Type::LARGE_BINARY"
         _Type_LARGE_STRING" arrow::Type::LARGE_STRING"
         _Type_FIXED_SIZE_BINARY" arrow::Type::FIXED_SIZE_BINARY"
+        _Type_BINARY_VIEW" arrow::Type::BINARY_VIEW"
+        _Type_STRING_VIEW" arrow::Type::STRING_VIEW"
 
         _Type_LIST" arrow::Type::LIST"
         _Type_LARGE_LIST" arrow::Type::LARGE_LIST"
@@ -1295,7 +1297,14 @@ cdef extern from "arrow/builder.h" namespace "arrow" nogil:
 
     cdef cppclass CStringBuilder" arrow::StringBuilder"(CBinaryBuilder):
         CStringBuilder(CMemoryPool* pool)
+        CStatus Append(const c_string& value)
+
+    cdef cppclass CBinaryViewBuilder" arrow::BinaryViewBuilder"(CArrayBuilder):
+        CBinaryViewBuilder(shared_ptr[CDataType], CMemoryPool* pool)
+        CStatus Append(const char* value, int32_t length)
 
+    cdef cppclass CStringViewBuilder" arrow::StringViewBuilder"(CBinaryViewBuilder):
+        CStringViewBuilder(CMemoryPool* pool)
         CStatus Append(const c_string& value)
 
     cdef cppclass CTimestampBuilder "arrow::TimestampBuilder"(CArrayBuilder):
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 58ec34addbc0a..c1104864066e9 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -445,6 +445,14 @@ cdef class BinaryArray(Array):
     pass
 
 
+cdef class StringViewArray(Array):
+    pass
+
+
+cdef class BinaryViewArray(Array):
+    pass
+
+
 cdef class DictionaryArray(Array):
     cdef:
         object _indices, _dictionary
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 29a0bed55949c..b0368b67f790e 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -106,6 +106,8 @@ Type_STRING = _Type_STRING
 Type_LARGE_BINARY = _Type_LARGE_BINARY
 Type_LARGE_STRING = _Type_LARGE_STRING
 Type_FIXED_SIZE_BINARY = _Type_FIXED_SIZE_BINARY
+Type_BINARY_VIEW = _Type_BINARY_VIEW
+Type_STRING_VIEW = _Type_STRING_VIEW
 Type_LIST = _Type_LIST
 Type_LARGE_LIST = _Type_LARGE_LIST
 Type_MAP = _Type_MAP
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 9a66dc81226d4..2772acf81861c 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -665,6 +665,14 @@ cdef class LargeStringScalar(StringScalar):
     pass
 
 
+cdef class BinaryViewScalar(BinaryScalar):
+    pass
+
+
+cdef class StringViewScalar(StringScalar):
+    pass
+
+
 cdef class ListScalar(Scalar):
     """
     Concrete class for list-like scalars.
@@ -1051,8 +1059,10 @@ cdef dict _scalar_classes = {
     _Type_BINARY: BinaryScalar,
     _Type_LARGE_BINARY: LargeBinaryScalar,
     _Type_FIXED_SIZE_BINARY: FixedSizeBinaryScalar,
+    _Type_BINARY_VIEW: BinaryViewScalar,
     _Type_STRING: StringScalar,
     _Type_LARGE_STRING: LargeStringScalar,
+    _Type_STRING_VIEW: StringViewScalar,
     _Type_LIST: ListScalar,
     _Type_LARGE_LIST: LargeListScalar,
     _Type_FIXED_SIZE_LIST: FixedSizeListScalar,
diff --git a/python/pyarrow/src/arrow/python/helpers.cc b/python/pyarrow/src/arrow/python/helpers.cc
index c266abc169d49..2c86c86a919be 100644
--- a/python/pyarrow/src/arrow/python/helpers.cc
+++ b/python/pyarrow/src/arrow/python/helpers.cc
@@ -63,6 +63,8 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
       GET_PRIMITIVE_TYPE(STRING, utf8);
       GET_PRIMITIVE_TYPE(LARGE_BINARY, large_binary);
       GET_PRIMITIVE_TYPE(LARGE_STRING, large_utf8);
+      GET_PRIMITIVE_TYPE(BINARY_VIEW, binary_view);
+      GET_PRIMITIVE_TYPE(STRING_VIEW, utf8_view);
       GET_PRIMITIVE_TYPE(INTERVAL_MONTH_DAY_NANO, month_day_nano_interval);
     default:
       return nullptr;
diff --git a/python/pyarrow/tests/test_builder.py b/python/pyarrow/tests/test_builder.py
index 50d801026b7d8..abc8a0013df37 100644
--- a/python/pyarrow/tests/test_builder.py
+++ b/python/pyarrow/tests/test_builder.py
@@ -20,7 +20,7 @@
 import numpy as np
 
 import pyarrow as pa
-from pyarrow.lib import StringBuilder
+from pyarrow.lib import StringBuilder, StringViewBuilder
 
 
 def test_weakref():
@@ -65,3 +65,22 @@ def test_string_builder_append_after_finish():
     sbuilder.append("No effect")
     expected = [None, None, "text", None, "other text"]
     assert arr.to_pylist() == expected
+
+
+def test_string_view_builder():
+    builder = StringViewBuilder()
+    builder.append(b"a byte string")
+    builder.append("a string")
+    builder.append("a longer not-inlined string")
+    builder.append(np.nan)
+    builder.append_values([None, "text"])
+    assert len(builder) == 6
+    assert builder.null_count == 2
+    arr = builder.finish()
+    assert isinstance(arr, pa.Array)
+    assert arr.null_count == 2
+    assert arr.type == 'string_view'
+    expected = [
+        "a byte string", "a string", "a longer not-inlined string", None, None, "text"
+    ]
+    assert arr.to_pylist() == expected
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index 8b8c50882b749..8cec8783280dd 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -185,6 +185,8 @@ def test_set_timezone_db_path_non_windows():
     pa.UnionArray,
     pa.BinaryArray,
     pa.StringArray,
+    pa.BinaryViewArray,
+    pa.StringViewArray,
     pa.FixedSizeBinaryArray,
     pa.DictionaryArray,
     pa.Date32Array,
@@ -221,6 +223,8 @@ def test_set_timezone_db_path_non_windows():
     pa.StringScalar,
     pa.BinaryScalar,
     pa.FixedSizeBinaryScalar,
+    pa.BinaryViewScalar,
+    pa.StringViewScalar,
     pa.ListScalar,
     pa.LargeListScalar,
     pa.MapScalar,
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 74dee59558239..4a239b23d5676 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -51,6 +51,9 @@
     (b"bytes", None, pa.BinaryScalar),
     ("largestring", pa.large_string(), pa.LargeStringScalar),
     (b"largebytes", pa.large_binary(), pa.LargeBinaryScalar),
+    # TODO(GH-39633) pa.scalar(..) requires python->arrow conversion to be implemented
+    # ("string_view", pa.string_view(), pa.StringViewScalar),
+    # (b"bytes_view", pa.binary_view(), pa.BinaryViewScalar),
     (b"abc", pa.binary(3), pa.FixedSizeBinaryScalar),
     ([1, 2, 3], None, pa.ListScalar),
     ([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar),
@@ -488,7 +491,8 @@ def test_month_day_nano_interval():
 @pytest.mark.parametrize('value', ['foo', 'mañana'])
 @pytest.mark.parametrize(('ty', 'scalar_typ'), [
     (pa.string(), pa.StringScalar),
-    (pa.large_string(), pa.LargeStringScalar)
+    (pa.large_string(), pa.LargeStringScalar),
+    # (pa.string_view(), pa.StringViewScalar),
 ])
 def test_string(value, ty, scalar_typ):
     s = pa.scalar(value, type=ty)
@@ -503,10 +507,30 @@ def test_string(value, ty, scalar_typ):
     assert buf.to_pybytes() == value.encode()
 
 
+@pytest.mark.parametrize('value', ['foo', 'mañana'])
+def test_string_view(value):
+    # TODO: replace with normal scalar construction
+    builder = pa.lib.StringViewBuilder()
+    builder.append(value)
+    arr = builder.finish()
+
+    s = arr[0]
+    assert isinstance(s, pa.StringViewScalar)
+    assert s.as_py() == value
+    assert s.as_py() != 'something'
+    assert repr(value) in repr(s)
+    assert str(s) == str(value)
+
+    buf = s.as_buffer()
+    assert isinstance(buf, pa.Buffer)
+    assert buf.to_pybytes() == value.encode()
+
+
 @pytest.mark.parametrize('value', [b'foo', b'bar'])
 @pytest.mark.parametrize(('ty', 'scalar_typ'), [
     (pa.binary(), pa.BinaryScalar),
-    (pa.large_binary(), pa.LargeBinaryScalar)
+    (pa.large_binary(), pa.LargeBinaryScalar),
+    # (pa.binary_view(), pa.BinaryViewScalar),
 ])
 def test_binary(value, ty, scalar_typ):
     s = pa.scalar(value, type=ty)
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index c8a52c6b626c2..a5ab3128dc874 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -61,6 +61,8 @@ def get_many_types():
         pa.binary(10),
         pa.large_string(),
         pa.large_binary(),
+        pa.string_view(),
+        pa.binary_view(),
         pa.list_(pa.int32()),
         pa.list_(pa.int32(), 2),
         pa.large_list(pa.uint16()),
@@ -244,6 +246,12 @@ def test_is_binary_string():
     assert types.is_fixed_size_binary(pa.binary(5))
     assert not types.is_fixed_size_binary(pa.binary())
 
+    assert types.is_string_view(pa.string_view())
+    assert not types.is_string_view(pa.string())
+    assert types.is_binary_view(pa.binary_view())
+    assert not types.is_binary_view(pa.binary())
+    assert not types.is_binary_view(pa.string_view())
+
 
 def test_is_temporal_date_time_timestamp():
     date_types = [pa.date32(), pa.date64()]
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index b6dc53d633543..ce3736b5af847 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -4375,6 +4375,36 @@ def large_utf8():
     return large_string()
 
 
+def binary_view():
+    """
+    Create a variable-length binary view type.
+
+    Examples
+    --------
+    Create an instance of a string type:
+
+    >>> import pyarrow as pa
+    >>> pa.binary_view()
+    DataType(binary_view)
+    """
+    return primitive_type(_Type_BINARY_VIEW)
+
+
+def string_view():
+    """
+    Create UTF8 variable-length string view type.
+
+    Examples
+    --------
+    Create an instance of a string type:
+
+    >>> import pyarrow as pa
+    >>> pa.string_view()
+    DataType(string_view)
+    """
+    return primitive_type(_Type_STRING_VIEW)
+
+
 def list_(value_type, int list_size=-1):
     """
     Create ListType instance from child data type or field.
@@ -4991,6 +5021,8 @@ cdef dict _type_aliases = {
     'large_str': large_string,
     'large_utf8': large_string,
     'large_binary': large_binary,
+    'binary_view': binary_view,
+    'string_view': string_view,
     'date32': date32,
     'date64': date64,
     'date32[day]': date32,
diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py
index 5d7dbe4b451b9..32398dac9c5f5 100644
--- a/python/pyarrow/types.py
+++ b/python/pyarrow/types.py
@@ -243,6 +243,16 @@ def is_fixed_size_binary(t):
     return t.id == lib.Type_FIXED_SIZE_BINARY
 
 
+@doc(is_null, datatype="variable-length binary view")
+def is_binary_view(t):
+    return t.id == lib.Type_BINARY_VIEW
+
+
+@doc(is_null, datatype="variable-length string (utf-8) view")
+def is_string_view(t):
+    return t.id == lib.Type_STRING_VIEW
+
+
 @doc(is_null, datatype="date")
 def is_date(t):
     return t.id in _DATE_TYPES

From 749f936fc77b83d3c0ec5642c16561b3afa5dfa7 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Wed, 31 Jan 2024 08:24:11 -0800
Subject: [PATCH 282/570] MINOR: [CI] update weston codeowners (#39867)

### Rationale for this change

Currently I am unable to keep up with my Github inbox and thus respond to very little.  I am trying to balance this.

### What changes are included in this PR?

Reduce the scope of files that will trigger automated review.

### Are these changes tested?

N/A

### Are there any user-facing changes?

No

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Weston Pace <weston.pace@gmail.com>
---
 .github/CODEOWNERS | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 41a075b1c0bcb..e7e544c2b0e62 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -30,15 +30,10 @@
 # /cpp/
 /cpp/src/arrow/acero @westonpace
 /cpp/src/arrow/adapters/orc @wgtmac
-/cpp/src/arrow/dataset @westonpace
 /cpp/src/arrow/engine @westonpace
 /cpp/src/arrow/flight/ @lidavidm
-/cpp/src/arrow/util/async* @westonpace
-/cpp/src/arrow/util/future* @westonpace
-/cpp/src/arrow/util/thread* @westonpace
 /cpp/src/parquet @wgtmac
-/cpp/src/skyhook @westonpace
-/csharp/ @westonpace
+/csharp/ @curthagenlocher
 /go/ @zeroshade
 /java/ @lidavidm
 /js/ @domoritz @trxcllnt

From 2a87693134135a8af2ae2b6df41980176431b1c0 Mon Sep 17 00:00:00 2001
From: david dali susanibar arce <davi.sarces@gmail.com>
Date: Wed, 31 Jan 2024 13:38:54 -0500
Subject: [PATCH 283/570] GH-39680: [Java] enable half float support on Java
 module (#39681)

### Rationale for this change

- To enable half float support on Java module.

### What changes are included in this PR?

- [x] Add initial Float16 type support
- [x] Unit test
- [x] Integration test
- [x] Documentation

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No
* Closes: #39680

Authored-by: david dali susanibar arce <davi.sarces@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 docs/source/status.rst                        |   9 +-
 .../apache/arrow/dataset/TestAllTypes.java    |   6 +-
 .../org/apache/arrow/memory/util/Float16.java | 271 +++++++++++
 .../org/apache/arrow/memory/TestArrowBuf.java |  11 +
 .../main/codegen/data/ValueVectorTypes.tdd    |  10 +
 .../main/codegen/templates/UnionReader.java   |   6 +-
 .../org/apache/arrow/vector/Float2Vector.java | 434 ++++++++++++++++++
 .../org/apache/arrow/vector/types/Types.java  |  16 +-
 .../apache/arrow/vector/TestValueVector.java  | 198 ++++++++
 9 files changed, 953 insertions(+), 8 deletions(-)
 create mode 100644 java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java
 create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java

diff --git a/docs/source/status.rst b/docs/source/status.rst
index 03a87012342c2..11dd9c2c2965c 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -40,7 +40,7 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | UInt8/16/32/64    | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Float16           | ✓ (1) |       | ✓     | ✓          |  ✓ (2)|  ✓    | ✓     |       |
+| Float16           | ✓ (1) | ✓ (2) | ✓     | ✓          |  ✓ (3)|  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Float32/64        | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     | ✓     |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -104,7 +104,7 @@ Data Types
 | Data type         | C++   | Java  | Go    | JavaScript | C#    | Rust  | Julia | Swift |
 | (special)         |       |       |       |            |       |       |       |       |
 +===================+=======+=======+=======+============+=======+=======+=======+=======+
-| Dictionary        | ✓     | ✓ (3) | ✓     | ✓          | ✓     | ✓ (3) | ✓     |       |
+| Dictionary        | ✓     | ✓ (4) | ✓     | ✓          | ✓     | ✓ (3) | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Extension         | ✓     | ✓     | ✓     |            |       | ✓     | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -114,8 +114,9 @@ Data Types
 Notes:
 
 * \(1) Casting to/from Float16 in C++ is not supported.
-* \(2) Float16 support in C# is only available when targeting .NET 6+.
-* \(3) Nested dictionaries not supported
+* \(2) Casting to/from Float16 in Java is not supported.
+* \(3) Float16 support in C# is only available when targeting .NET 6+.
+* \(4) Nested dictionaries not supported
 
 .. seealso::
    The :ref:`format_columnar` specification.
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
index 13b247452348d..6d33cf057ed3a 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/TestAllTypes.java
@@ -32,6 +32,7 @@
 import org.apache.arrow.dataset.file.DatasetFileWriter;
 import org.apache.arrow.dataset.file.FileFormat;
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.Float16;
 import org.apache.arrow.vector.BigIntVector;
 import org.apache.arrow.vector.BitVector;
 import org.apache.arrow.vector.DateMilliVector;
@@ -39,6 +40,7 @@
 import org.apache.arrow.vector.DecimalVector;
 import org.apache.arrow.vector.DurationVector;
 import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
 import org.apache.arrow.vector.Float4Vector;
 import org.apache.arrow.vector.Float8Vector;
 import org.apache.arrow.vector.IntVector;
@@ -89,7 +91,6 @@ public class TestAllTypes extends TestDataset {
 
   private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
     // Notes:
-    // - Float16 is not supported by Java.
     // - IntervalMonthDayNano is not supported by Parquet.
     // - Map (GH-38250) and SparseUnion are resulting in serialization errors when writing with the Dataset API.
     // "Unhandled type for Arrow to Parquet schema conversion" errors: IntervalDay, IntervalYear, DenseUnion
@@ -109,6 +110,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
         Field.nullablePrimitive("uint16", new ArrowType.Int(16, false)),
         Field.nullablePrimitive("uint32", new ArrowType.Int(32, false)),
         Field.nullablePrimitive("uint64", new ArrowType.Int(64, false)),
+        Field.nullablePrimitive("float16", new ArrowType.FloatingPoint(FloatingPointPrecision.HALF)),
         Field.nullablePrimitive("float32", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
         Field.nullablePrimitive("float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)),
         Field.nullablePrimitive("utf8", ArrowType.Utf8.INSTANCE),
@@ -148,6 +150,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
     root.getVector("uint16").setNull(0);
     root.getVector("uint32").setNull(0);
     root.getVector("uint64").setNull(0);
+    root.getVector("float16").setNull(0);
     root.getVector("float32").setNull(0);
     root.getVector("float64").setNull(0);
     root.getVector("utf8").setNull(0);
@@ -180,6 +183,7 @@ private VectorSchemaRoot generateAllTypesVector(BufferAllocator allocator) {
     ((UInt2Vector) root.getVector("uint16")).set(1, 1);
     ((UInt4Vector) root.getVector("uint32")).set(1, 1);
     ((UInt8Vector) root.getVector("uint64")).set(1, 1);
+    ((Float2Vector) root.getVector("float16")).set(1, Float16.toFloat16(+32.875f));
     ((Float4Vector) root.getVector("float32")).set(1, 1.0f);
     ((Float8Vector) root.getVector("float64")).set(1, 1.0);
     ((VarCharVector) root.getVector("utf8")).set(1, new Text("a"));
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java
new file mode 100644
index 0000000000000..8040158fd090e
--- /dev/null
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.memory.util;
+
+
+import org.apache.arrow.util.VisibleForTesting;
+
+/**
+ * Lifted from Apache Parquet MR project:
+ * https://github.com/apache/parquet-mr/blob/e87b80308869b77f914fcfd04364686e11158950/parquet-column/src/main/java/org/apache/parquet/schema/Float16.java
+ * <ul>
+ * Changes made:
+ * <li>Modify the data type input from Parquet-MR Binary (toFloat(Binary b)) to Arrow Java short (toFloat(short b))</li>
+ * <li>Expose NAN and POSITIVE_INFINITY variables</li>
+ * </ul>
+ *
+ *
+ * The class is a utility class to manipulate half-precision 16-bit
+ * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a>
+ * floating point data types (also called fp16 or binary16). A half-precision float can be
+ * created from or converted to single-precision floats, and is stored in a short data type.
+ * The IEEE 754 standard specifies an float16 as having the following format:
+ * <ul>
+ * <li>Sign bit: 1 bit</li>
+ * <li>Exponent width: 5 bits</li>
+ * <li>Significand: 10 bits</li>
+ * </ul>
+ *
+ * <p>The format is laid out as follows:</p>
+ * <pre>
+ * 1   11111   1111111111
+ * ^   --^--   -----^----
+ * sign  |          |_______ significand
+ *       |
+ *      -- exponent
+ * </pre>
+ * Half-precision floating points can be useful to save memory and/or
+ * bandwidth at the expense of range and precision when compared to single-precision
+ * floating points (float32).
+ * Ref: https://android.googlesource.com/platform/libcore/+/master/luni/src/main/java/libcore/util/FP16.java
+ */
+public class Float16 {
+  // Positive infinity of type half-precision float.
+  public static final short POSITIVE_INFINITY = (short) 0x7c00;
+  // A Not-a-Number representation of a half-precision float.
+  public static final short NaN = (short) 0x7e00;
+  // The bitmask to and a number with to obtain the sign bit.
+  private static final int SIGN_MASK = 0x8000;
+  // The offset to shift by to obtain the exponent bits.
+  private static final int EXPONENT_SHIFT = 10;
+  // The bitmask to and a number shifted by EXPONENT_SHIFT right, to obtain exponent bits.
+  private static final int SHIFTED_EXPONENT_MASK = 0x1f;
+  // The bitmask to and a number with to obtain significand bits.
+  private static final int SIGNIFICAND_MASK = 0x3ff;
+  // The offset of the exponent from the actual value.
+  private static final int EXPONENT_BIAS = 15;
+  // The offset to shift by to obtain the sign bit.
+  private static final int SIGN_SHIFT = 15;
+  // The bitmask to AND with to obtain exponent and significand bits.
+  private static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff;
+
+  private static final int FP32_SIGN_SHIFT = 31;
+  private static final int FP32_EXPONENT_SHIFT = 23;
+  private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff;
+  private static final int FP32_SIGNIFICAND_MASK = 0x7fffff;
+  private static final int FP32_EXPONENT_BIAS = 127;
+  private static final int FP32_QNAN_MASK = 0x400000;
+  private static final int FP32_DENORMAL_MAGIC = 126 << 23;
+  private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC);
+
+  /**
+   * Returns true if the specified half-precision float value represents
+   * a Not-a-Number, false otherwise.
+   *
+   * @param h A half-precision float value
+   * @return True if the value is a NaN, false otherwise
+   *
+   */
+  @VisibleForTesting
+  public static boolean isNaN(short h) {
+    return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY;
+  }
+
+  /**
+   * <p>Compares the two specified half-precision float values. The following
+   * conditions apply during the comparison:</p>
+   *
+   * <ul>
+   * <li>NaN is considered by this method to be equal to itself and greater
+   * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
+   * <li>POSITIVE_ZERO is considered by this method to be greater than NEGATIVE_ZERO.</li>
+   * </ul>
+   *
+   * @param x The first half-precision float value to compare.
+   * @param y The second half-precision float value to compare
+   *
+   * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}, a
+   *          value less than {@code 0} if {@code x} is numerically less than {@code y},
+   *          and a value greater than {@code 0} if {@code x} is numerically greater
+   *          than {@code y}
+   *
+   */
+  @VisibleForTesting
+  public static int compare(short x, short y) {
+    boolean xIsNaN = isNaN(x);
+    boolean yIsNaN = isNaN(y);
+
+    if (!xIsNaN && !yIsNaN) {
+      int first = ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff);
+      int second = ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
+      // Returns true if the first half-precision float value is less
+      // (smaller toward negative infinity) than the second half-precision float value.
+      if (first < second) {
+        return -1;
+      }
+
+      // Returns true if the first half-precision float value is greater
+      // (larger toward positive infinity) than the second half-precision float value.
+      if (first > second) {
+        return 1;
+      }
+    }
+
+    // Collapse NaNs, akin to halfToIntBits(), but we want to keep
+    // (signed) short value types to preserve the ordering of -0.0
+    // and +0.0
+    short xBits = xIsNaN ? NaN : x;
+    short yBits = yIsNaN ? NaN : y;
+    return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1));
+  }
+
+  /**
+   * Converts the specified half-precision float value into a
+   * single-precision float value. The following special cases are handled:
+   * If the input is NaN, the returned value is Float NaN.
+   * If the input is POSITIVE_INFINITY or NEGATIVE_INFINITY, the returned value is respectively
+   *   Float POSITIVE_INFINITY or Float NEGATIVE_INFINITY.
+   * If the input is 0 (positive or negative), the returned value is +/-0.0f.
+   * Otherwise, the returned value is a normalized single-precision float value.
+   *
+   * @param b The half-precision float value to convert to single-precision
+   * @return A normalized single-precision float value
+   */
+  @VisibleForTesting
+  public static float toFloat(short b) {
+    int bits = b & 0xffff;
+    int s = bits & SIGN_MASK;
+    int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK;
+    int m = (bits) & SIGNIFICAND_MASK;
+    int outE = 0;
+    int outM = 0;
+    if (e == 0) { // Denormal or 0
+      if (m != 0) {
+        // Convert denorm fp16 into normalized fp32
+        float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m);
+        o -= FP32_DENORMAL_FLOAT;
+        return s == 0 ? o : -o;
+      }
+    } else {
+      outM = m << 13;
+      if (e == 0x1f) { // Infinite or NaN
+        outE = 0xff;
+        if (outM != 0) { // SNaNs are quieted
+          outM |= FP32_QNAN_MASK;
+        }
+      } else {
+        outE = e - EXPONENT_BIAS + FP32_EXPONENT_BIAS;
+      }
+    }
+    int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM;
+    return Float.intBitsToFloat(out);
+  }
+
+  /**
+   * Converts the specified single-precision float value into a
+   * half-precision float value. The following special cases are handled:
+   *
+   * If the input is NaN, the returned value is NaN.
+   * If the input is Float POSITIVE_INFINITY or Float NEGATIVE_INFINITY,
+   *   the returned value is respectively POSITIVE_INFINITY or NEGATIVE_INFINITY.
+   * If the input is 0 (positive or negative), the returned value is
+   *   POSITIVE_ZERO or NEGATIVE_ZERO.
+   * If the input is a less than MIN_VALUE, the returned value
+   *   is flushed to POSITIVE_ZERO or NEGATIVE_ZERO.
+   * If the input is a less than MIN_NORMAL, the returned value
+   *   is a denorm half-precision float.
+   * Otherwise, the returned value is rounded to the nearest
+   *   representable half-precision float value.
+   *
+   * @param f The single-precision float value to convert to half-precision
+   * @return A half-precision float value
+   */
+  public static short toFloat16(float f) {
+    int bits = Float.floatToRawIntBits(f);
+    int s = (bits >>> FP32_SIGN_SHIFT);
+    int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK;
+    int m = (bits) & FP32_SIGNIFICAND_MASK;
+    int outE = 0;
+    int outM = 0;
+    if (e == 0xff) { // Infinite or NaN
+      outE = 0x1f;
+      outM = m != 0 ? 0x200 : 0;
+    } else {
+      e = e - FP32_EXPONENT_BIAS + EXPONENT_BIAS;
+      if (e >= 0x1f) { // Overflow
+        outE = 0x1f;
+      } else if (e <= 0) { // Underflow
+        if (e < -10) {
+          // The absolute fp32 value is less than MIN_VALUE, flush to +/-0
+        } else {
+          // The fp32 value is a normalized float less than MIN_NORMAL,
+          // we convert to a denorm fp16
+          m = m | 0x800000;
+          int shift = 14 - e;
+          outM = m >> shift;
+          int lowm = m & ((1 << shift) - 1);
+          int hway = 1 << (shift - 1);
+          // if above halfway or exactly halfway and outM is odd
+          if (lowm + (outM & 1) > hway) {
+            // Round to nearest even
+            // Can overflow into exponent bit, which surprisingly is OK.
+            // This increment relies on the +outM in the return statement below
+            outM++;
+          }
+        }
+      } else {
+        outE = e;
+        outM = m >> 13;
+        // if above halfway or exactly halfway and outM is odd
+        if ((m & 0x1fff) + (outM & 0x1) > 0x1000) {
+          // Round to nearest even
+          // Can overflow into exponent bit, which surprisingly is OK.
+          // This increment relies on the +outM in the return statement below
+          outM++;
+        }
+      }
+    }
+    // The outM is added here as the +1 increments for outM above can
+    // cause an overflow in the exponent bit which is OK.
+    return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM);
+  }
+
+  /**
+   * Returns a string representation of the specified half-precision
+   * float value. Calling this method is equivalent to calling
+   * <code>Float.toString(toFloat(h))</code>. See {@link Float#toString(float)}
+   * for more information on the format of the string representation.
+   *
+   * @param h A half-precision float value in binary little-endian format
+   * @return A string representation of the specified value
+   */
+  @VisibleForTesting
+  public static String toFloatString(short h) {
+    return Float.toString(Float16.toFloat(h));
+  }
+}
diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
index 9ba42abc1ce89..b4385b72a38cf 100644
--- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java
@@ -29,6 +29,7 @@
 import java.nio.ByteOrder;
 import java.util.Arrays;
 
+import org.apache.arrow.memory.util.Float16;
 import org.junit.Test;
 import org.slf4j.LoggerFactory;
 
@@ -180,4 +181,14 @@ public void testEnabledHistoricalLog() {
       ((Logger) LoggerFactory.getLogger("org.apache.arrow")).setLevel(null);
     }
   }
+
+  @Test
+  public void testArrowBufFloat16() {
+    try (BufferAllocator allocator = new RootAllocator();
+         ArrowBuf buf = allocator.buffer(1024)
+    ) {
+      buf.setShort(0, Float16.toFloat16(+32.875f));
+      assertEquals((short) 0x501c, buf.getShort(0));
+    }
+  }
 }
diff --git a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
index 2a921804202f0..6c2a967712454 100644
--- a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
+++ b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
@@ -49,6 +49,16 @@
         { class: "SmallInt", valueHolder: "Int2Holder"},
       ]
     },
+    {
+      major: "Fixed",
+      width: 2,
+      javaType: "short",
+      boxedType: "Short",
+      fields: [{name: "value", type: "short"}],
+      minor: [
+        { class: "Float2", valueHolder: "Int2Holder"},
+      ]
+    },
     {
       major: "Fixed",
       width: 4,
diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java
index 56a6cc90b321b..822d4822987fb 100644
--- a/java/vector/src/main/codegen/templates/UnionReader.java
+++ b/java/vector/src/main/codegen/templates/UnionReader.java
@@ -39,7 +39,9 @@
 @SuppressWarnings("unused")
 public class UnionReader extends AbstractFieldReader {
 
-  private BaseReader[] readers = new BaseReader[45];
+  private static final int NUM_SUPPORTED_TYPES = 46;
+
+  private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES];
   public UnionVector data;
   
   public UnionReader(UnionVector data) {
@@ -50,7 +52,7 @@ public MinorType getMinorType() {
     return TYPES[data.getTypeValue(idx())];
   }
 
-  private static MinorType[] TYPES = new MinorType[45];
+  private static MinorType[] TYPES = new MinorType[NUM_SUPPORTED_TYPES];
 
   static {
     for (MinorType minorType : MinorType.values()) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java
new file mode 100644
index 0000000000000..9d3f25769abff
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/Float2Vector.java
@@ -0,0 +1,434 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.Float16;
+import org.apache.arrow.vector.complex.impl.Float2ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.Float2Holder;
+import org.apache.arrow.vector.holders.NullableFloat2Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Float2Vector implements a fixed width (2 bytes) vector of
+ * short values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class Float2Vector extends BaseFixedWidthVector implements FloatingPointVector {
+  public static final byte TYPE_WIDTH = 2;
+
+  /**
+   * Instantiate a Float2Vector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param name name of the vector
+   * @param allocator allocator for memory management.
+   */
+  public Float2Vector(String name, BufferAllocator allocator) {
+    this(name, FieldType.nullable(MinorType.FLOAT2.getType()), allocator);
+  }
+
+  /**
+   * Instantiate a Float2Vector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param name name of the vector
+   * @param fieldType type of Field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public Float2Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+    this(new Field(name, fieldType, null), allocator);
+  }
+
+  /**
+   * Instantiate a Float2Vector. This doesn't allocate any memory for
+   * the data in vector.
+   *
+   * @param field field materialized by this vector
+   * @param allocator allocator for memory management.
+   */
+  public Float2Vector(Field field, BufferAllocator allocator) {
+    super(field, allocator, TYPE_WIDTH);
+  }
+
+  @Override
+  protected FieldReader getReaderImpl() {
+    return new Float2ReaderImpl(Float2Vector.this);
+  }
+
+  /**
+   * Get minor type for this vector. The vector holds values belonging
+   * to a particular type.
+   *
+   * @return {@link MinorType}
+   */
+  @Override
+  public MinorType getMinorType() {
+    return MinorType.FLOAT2;
+  }
+
+
+  /*----------------------------------------------------------------*
+   |                                                                |
+   |          vector value retrieval methods                        |
+   |                                                                |
+   *----------------------------------------------------------------*/
+
+
+  /**
+   * Get the element at the given index from the vector.
+   *
+   * @param index   position of element
+   * @return element at given index
+   */
+  public short get(int index) throws IllegalStateException {
+    if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+      throw new IllegalStateException("Value at index is null");
+    }
+    return valueBuffer.getShort((long) index * TYPE_WIDTH);
+  }
+
+  /**
+   * Get the element at the given index from the vector and
+   * sets the state in holder. If element at given index
+   * is null, holder.isSet will be zero.
+   *
+   * @param index   position of element
+   */
+  public void get(int index, NullableFloat2Holder holder) {
+    if (isSet(index) == 0) {
+      holder.isSet = 0;
+      return;
+    }
+    holder.isSet = 1;
+    holder.value = valueBuffer.getShort((long) index * TYPE_WIDTH);
+  }
+
+  /**
+   * Same as {@link #get(int)}.
+   *
+   * @param index   position of element
+   * @return element at given index
+   */
+  @Override
+  public Short getObject(int index) {
+    if (isSet(index) == 0) {
+      return null;
+    } else {
+      return valueBuffer.getShort((long) index * TYPE_WIDTH);
+    }
+  }
+
+  /**
+   * Given a data buffer, get the value stored at a particular position
+   * in the vector.
+   *
+   * <p>This method should not be used externally.
+   *
+   * @param buffer data buffer
+   * @param index position of the element.
+   * @return value stored at the index.
+   */
+  static short get(final ArrowBuf buffer, final int index) {
+    return buffer.getShort((long) index * TYPE_WIDTH);
+  }
+
+  @Override
+  public double getValueAsDouble(int index) {
+    return getValueAsFloat(index);
+  }
+
+  public float getValueAsFloat(int index) {
+    return Float16.toFloat(this.get(index));
+  }
+
+  /*----------------------------------------------------------------*
+   |                                                                |
+   |          vector value setter methods                           |
+   |                                                                |
+   *----------------------------------------------------------------*/
+
+  private void setValue(int index, short value) {
+    valueBuffer.setShort((long) index * TYPE_WIDTH, value);
+  }
+
+  private void setValue(int index, float value) {
+    valueBuffer.setShort((long) index * TYPE_WIDTH, Float16.toFloat16(value));
+  }
+
+  /**
+   * Set the element at the given index to the given value.
+   *
+   * @param index   position of element
+   * @param value   value of element
+   */
+  public void set(int index, short value) {
+    BitVectorHelper.setBit(validityBuffer, index);
+    setValue(index, value);
+  }
+
+  /**
+   * Set the element at the given index to the given value.
+   *
+   * @param index   position of element
+   * @param value   value of element
+   */
+  public void setWithPossibleTruncate(int index, float value) {
+    BitVectorHelper.setBit(validityBuffer, index);
+    setValue(index, value);
+  }
+
+  /**
+   * Set the element at the given index to the value set in data holder.
+   * If the value in holder is not indicated as set, element in the
+   * at the given index will be null.
+   *
+   * @param index   position of element
+   * @param holder  nullable data holder for value of element
+   */
+  public void set(int index, NullableFloat2Holder holder) throws IllegalArgumentException {
+    if (holder.isSet < 0) {
+      throw new IllegalArgumentException();
+    } else if (holder.isSet > 0) {
+      BitVectorHelper.setBit(validityBuffer, index);
+      setValue(index, holder.value);
+    } else {
+      BitVectorHelper.unsetBit(validityBuffer, index);
+    }
+  }
+
+  /**
+   * Set the element at the given index to the value set in data holder.
+   *
+   * @param index   position of element
+   * @param holder  data holder for value of element
+   */
+  public void set(int index, Float2Holder holder) {
+    BitVectorHelper.setBit(validityBuffer, index);
+    setValue(index, holder.value);
+  }
+
+  /**
+   * Same as {@link #set(int, short)} except that it handles the
+   * case when index is greater than or equal to existing
+   * value capacity {@link #getValueCapacity()}.
+   *
+   * @param index   position of element
+   * @param value   value of element
+   */
+  public void setSafe(int index, short value) {
+    handleSafe(index);
+    set(index, value);
+  }
+
+  /**
+   * Same as {@link #setWithPossibleTruncate(int, float)} except that it handles the
+   * case when index is greater than or equal to existing
+   * value capacity {@link #getValueCapacity()}.
+   *
+   * @param index   position of element
+   * @param value   value of element
+   */
+  public void setSafeWithPossibleTruncate(int index, float value) {
+    handleSafe(index);
+    setWithPossibleTruncate(index, value);
+  }
+
+  /**
+   * Same as {@link #set(int, NullableFloat2Holder)} except that it handles the
+   * case when index is greater than or equal to existing
+   * value capacity {@link #getValueCapacity()}.
+   *
+   * @param index   position of element
+   * @param holder  nullable data holder for value of element
+   */
+  public void setSafe(int index, NullableFloat2Holder holder) throws IllegalArgumentException {
+    handleSafe(index);
+    set(index, holder);
+  }
+
+  /**
+   * Same as {@link #set(int, Float2Holder)} except that it handles the
+   * case when index is greater than or equal to existing
+   * value capacity {@link #getValueCapacity()}.
+   *
+   * @param index   position of element
+   * @param holder  data holder for value of element
+   */
+  public void setSafe(int index, Float2Holder holder) {
+    handleSafe(index);
+    set(index, holder);
+  }
+
+  /**
+   * Store the given value at a particular position in the vector. isSet indicates
+   * whether the value is NULL or not.
+   *
+   * @param index position of the new value
+   * @param isSet 0 for NULL value, 1 otherwise
+   * @param value element value
+   */
+  public void set(int index, int isSet, short value) {
+    if (isSet > 0) {
+      set(index, value);
+    } else {
+      BitVectorHelper.unsetBit(validityBuffer, index);
+    }
+  }
+
+  /**
+   * Store the given value at a particular position in the vector. isSet indicates
+   * whether the value is NULL or not.
+   *
+   * @param index position of the new value
+   * @param isSet 0 for NULL value, 1 otherwise
+   * @param value element value
+   */
+  public void setWithPossibleTruncate(int index, int isSet, float value) {
+    if (isSet > 0) {
+      setWithPossibleTruncate(index, value);
+    } else {
+      BitVectorHelper.unsetBit(validityBuffer, index);
+    }
+  }
+
+  /**
+   * Same as {@link #set(int, int, short)} except that it handles the case
+   * when index is greater than or equal to current value capacity of the
+   * vector.
+   *
+   * @param index position of the new value
+   * @param isSet 0 for NULL value, 1 otherwise
+   * @param value element value
+   */
+  public void setSafe(int index, int isSet, short value) {
+    handleSafe(index);
+    set(index, isSet, value);
+  }
+
+  /**
+   * Same as {@link #set(int, int, short)} except that it handles the case
+   * when index is greater than or equal to current value capacity of the
+   * vector.
+   *
+   * @param index position of the new value
+   * @param isSet 0 for NULL value, 1 otherwise
+   * @param value element value
+   */
+  public void setSafeWithPossibleTruncate(int index, int isSet, float value) {
+    handleSafe(index);
+    setWithPossibleTruncate(index, isSet, value);
+  }
+
+  @Override
+  public void setWithPossibleTruncate(int index, double value) {
+    throw new UnsupportedOperationException("The operation for double data types is not supported.");
+  }
+
+  @Override
+  public void setSafeWithPossibleTruncate(int index, double value) {
+    throw new UnsupportedOperationException("The operation for double data types is not supported.");
+  }
+
+  /*----------------------------------------------------------------*
+   |                                                                |
+   |                      vector transfer                           |
+   |                                                                |
+   *----------------------------------------------------------------*/
+
+  /**
+   * Construct a TransferPair comprising this and a target vector of
+   * the same type.
+   *
+   * @param ref name of the target vector
+   * @param allocator allocator for the target vector
+   * @return {@link TransferPair}
+   */
+  @Override
+  public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+    return new TransferImpl(ref, allocator);
+  }
+
+  /**
+   * Construct a TransferPair comprising this and a target vector of
+   * the same type.
+   *
+   * @param field Field object used by the target vector
+   * @param allocator allocator for the target vector
+   * @return {@link TransferPair}
+   */
+  @Override
+  public TransferPair getTransferPair(Field field, BufferAllocator allocator) {
+    return new TransferImpl(field, allocator);
+  }
+
+  /**
+   * Construct a TransferPair with a desired target vector of the same type.
+   *
+   * @param to target vector
+   * @return {@link TransferPair}
+   */
+  @Override
+  public TransferPair makeTransferPair(ValueVector to) {
+    return new TransferImpl((Float2Vector) to);
+  }
+
+  private class TransferImpl implements TransferPair {
+    Float2Vector to;
+
+    public TransferImpl(String ref, BufferAllocator allocator) {
+      to = new Float2Vector(ref, field.getFieldType(), allocator);
+    }
+
+    public TransferImpl(Field field, BufferAllocator allocator) {
+      to = new Float2Vector(field, allocator);
+    }
+
+    public TransferImpl(Float2Vector to) {
+      this.to = to;
+    }
+
+    @Override
+    public Float2Vector getTo() {
+      return to;
+    }
+
+    @Override
+    public void transfer() {
+      transferTo(to);
+    }
+
+    @Override
+    public void splitAndTransfer(int startIndex, int length) {
+      splitAndTransferTo(startIndex, length, to);
+    }
+
+    @Override
+    public void copyValueSafe(int fromIndex, int toIndex) {
+      to.copyFromSafe(fromIndex, toIndex, Float2Vector.this);
+    }
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
index f29157524f2df..0b0e0d66a98f0 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
@@ -18,6 +18,7 @@
 package org.apache.arrow.vector.types;
 
 import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
+import static org.apache.arrow.vector.types.FloatingPointPrecision.HALF;
 import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
 import static org.apache.arrow.vector.types.UnionMode.Dense;
 import static org.apache.arrow.vector.types.UnionMode.Sparse;
@@ -33,6 +34,7 @@
 import org.apache.arrow.vector.ExtensionTypeVector;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
 import org.apache.arrow.vector.Float4Vector;
 import org.apache.arrow.vector.Float8Vector;
 import org.apache.arrow.vector.IntVector;
@@ -79,6 +81,7 @@
 import org.apache.arrow.vector.complex.impl.DenseUnionWriter;
 import org.apache.arrow.vector.complex.impl.DurationWriterImpl;
 import org.apache.arrow.vector.complex.impl.FixedSizeBinaryWriterImpl;
+import org.apache.arrow.vector.complex.impl.Float2WriterImpl;
 import org.apache.arrow.vector.complex.impl.Float4WriterImpl;
 import org.apache.arrow.vector.complex.impl.Float8WriterImpl;
 import org.apache.arrow.vector.complex.impl.IntWriterImpl;
@@ -432,6 +435,17 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
         return new IntervalYearWriterImpl((IntervalYearVector) vector);
       }
     },
+    FLOAT2(new FloatingPoint(HALF)) {
+      @Override
+      public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) {
+        return new Float2Vector(field, allocator);
+      }
+
+      @Override
+      public FieldWriter getNewFieldWriter(ValueVector vector) {
+        return new Float2WriterImpl((Float2Vector) vector);
+      }
+    },
     //  4 byte ieee 754
     FLOAT4(new FloatingPoint(SINGLE)) {
       @Override
@@ -894,7 +908,7 @@ public MinorType visit(Int type) {
       public MinorType visit(FloatingPoint type) {
         switch (type.getPrecision()) {
           case HALF:
-            throw new UnsupportedOperationException("NYI: " + type);
+            return MinorType.FLOAT2;
           case SINGLE:
             return MinorType.FLOAT4;
           case DOUBLE:
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index 614aff18d4554..10091aebdd50b 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -332,6 +332,204 @@ public void testSizeOfValueBuffer() {
     }
   }
 
+  @Test
+  public void testFixedFloat2() {
+    try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) {
+      boolean error = false;
+      int initialCapacity = 16;
+
+      /* we should not throw exception for these values of capacity */
+      floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1);
+      floatVector.setInitialCapacity(MAX_VALUE_COUNT);
+
+      try {
+        floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4);
+      } catch (OversizedAllocationException oe) {
+        error = true;
+      } finally {
+        assertTrue(error);
+        error = false;
+      }
+
+      floatVector.setInitialCapacity(initialCapacity);
+      /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+      assertEquals(0, floatVector.getValueCapacity());
+
+      /* allocate 32 bytes (16 * 2) */
+      floatVector.allocateNew();
+      /* underlying buffer should be able to store 16 values */
+      assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+      initialCapacity = floatVector.getValueCapacity();
+
+      floatVector.zeroVector();
+
+      /* populate the floatVector */
+      floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f)
+      floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f)
+      floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f)
+      floatVector.set(6, (short) 0x901d); // Float16.toFloat16(-0.000502109527588f)
+      floatVector.set(8, (short) 0x121c); // Float16.toFloat16(+0.00074577331543f)
+      floatVector.set(10, (short) 0x921c); // Float16.toFloat16(-0.00074577331543f)
+      floatVector.set(12, (short) 0x501c); // Float16.toFloat16(+32.875f)
+      floatVector.set(14, (short) 0xd01c); // Float16.toFloat16(-32.875f)
+
+      try {
+        floatVector.set(initialCapacity, (short) 0x141c);
+      } catch (IndexOutOfBoundsException ie) {
+        error = true;
+      } finally {
+        assertTrue(error);
+        error = false;
+      }
+
+      /* check vector contents */
+      assertEquals((short) 0x101c, floatVector.get(0));
+      assertEquals((short) 0x901c, floatVector.get(2));
+      assertEquals((short) 0x101d, floatVector.get(4));
+      assertEquals((short) 0x901d, floatVector.get(6));
+      assertEquals((short) 0x121c, floatVector.get(8));
+      assertEquals((short) 0x921c, floatVector.get(10));
+      assertEquals((short) 0x501c, floatVector.get(12));
+      assertEquals((short) 0xd01c, floatVector.get(14));
+
+      try {
+        floatVector.get(initialCapacity);
+      } catch (IndexOutOfBoundsException ie) {
+        error = true;
+      } finally {
+        assertTrue(error);
+      }
+
+      /* this should trigger a realloc() */
+      floatVector.setSafe(initialCapacity, (short) 0x141c); // Float16.toFloat16(+0.00100326538086f)
+
+      /* underlying buffer should now be able to store double the number of values */
+      assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
+
+      /* vector data should still be intact after realloc */
+      assertEquals((short) 0x101c, floatVector.get(0));
+      assertEquals((short) 0x901c, floatVector.get(2));
+      assertEquals((short) 0x101d, floatVector.get(4));
+      assertEquals((short) 0x901d, floatVector.get(6));
+      assertEquals((short) 0x121c, floatVector.get(8));
+      assertEquals((short) 0x921c, floatVector.get(10));
+      assertEquals((short) 0x501c, floatVector.get(12));
+      assertEquals((short) 0xd01c, floatVector.get(14));
+      assertEquals((short) 0x141c, floatVector.get(initialCapacity));
+
+      /* reset the vector */
+      int capacityBeforeReset = floatVector.getValueCapacity();
+      floatVector.reset();
+
+      /* capacity shouldn't change after reset */
+      assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
+
+      /* vector data should be zeroed out */
+      for (int i = 0; i < capacityBeforeReset; i++) {
+        assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i));
+      }
+    }
+  }
+
+  @Test
+  public void testFixedFloat2WithPossibleTruncate() {
+    try (final Float2Vector floatVector = new Float2Vector(EMPTY_SCHEMA_PATH, allocator)) {
+      boolean error = false;
+      int initialCapacity = 16;
+
+      /* we should not throw exception for these values of capacity */
+      floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1);
+      floatVector.setInitialCapacity(MAX_VALUE_COUNT);
+
+      try {
+        floatVector.setInitialCapacity(MAX_VALUE_COUNT * 4);
+      } catch (OversizedAllocationException oe) {
+        error = true;
+      } finally {
+        assertTrue(error);
+        error = false;
+      }
+
+      floatVector.setInitialCapacity(initialCapacity);
+      /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+      assertEquals(0, floatVector.getValueCapacity());
+
+      /* allocate 32 bytes (16 * 2) */
+      floatVector.allocateNew();
+      /* underlying buffer should be able to store 16 values */
+      assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+      initialCapacity = floatVector.getValueCapacity();
+
+      floatVector.zeroVector();
+
+      /* populate the floatVector */
+      floatVector.set(0, (short) 0x101c); // Float16.toFloat16(+0.00050163269043f)
+      floatVector.set(2, (short) 0x901c); // Float16.toFloat16(-0.00050163269043f)
+      floatVector.set(4, (short) 0x101d); // Float16.toFloat16(+0.000502109527588f)
+      floatVector.setWithPossibleTruncate(6, 2049.0f); // in f32=2049.000000, out f16=2048
+      floatVector.setWithPossibleTruncate(8, 4098.0f); // in f32=4098.000000, out f16=4096
+      floatVector.setWithPossibleTruncate(10, 8196.0f); // in f32=8196.000000, out f16=8192
+      floatVector.setWithPossibleTruncate(12, 16392.0f); // in f32=16392.000000, out f16=16384
+      floatVector.setWithPossibleTruncate(14, 32784.0f); // in f32=32784.000000, out f16=32768
+
+      try {
+        floatVector.setWithPossibleTruncate(initialCapacity, 1.618034f); // in f32=1.618034, out f16=1.6181641
+      } catch (IndexOutOfBoundsException ie) {
+        error = true;
+      } finally {
+        assertTrue(error);
+        error = false;
+      }
+
+      /* check vector contents */
+      assertEquals((short) 0x101c, floatVector.get(0));
+      assertEquals((short) 0x901c, floatVector.get(2));
+      assertEquals((short) 0x101d, floatVector.get(4));
+      assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0);
+      assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0);
+      assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0);
+      assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0);
+      assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0);
+
+      try {
+        floatVector.get(initialCapacity);
+      } catch (IndexOutOfBoundsException ie) {
+        error = true;
+      } finally {
+        assertTrue(error);
+      }
+
+      /* this should trigger a realloc() */
+      floatVector.setSafeWithPossibleTruncate(initialCapacity, 1.618034f); // in f32=1.618034, out f16=1.6181641
+
+      /* underlying buffer should now be able to store double the number of values */
+      assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
+
+      /* vector data should still be intact after realloc */
+      assertEquals((short) 0x101c, floatVector.get(0));
+      assertEquals((short) 0x901c, floatVector.get(2));
+      assertEquals((short) 0x101d, floatVector.get(4));
+      assertEquals(2048.0f, floatVector.getValueAsFloat(6), 0);
+      assertEquals(4096.0f, floatVector.getValueAsFloat(8), 0);
+      assertEquals(8192.0f, floatVector.getValueAsFloat(10), 0);
+      assertEquals(16384.0f, floatVector.getValueAsDouble(12), 0);
+      assertEquals(32768.0f, floatVector.getValueAsDouble(14), 0);
+      assertEquals(1.6181641f, floatVector.getValueAsDouble(initialCapacity), 0);
+
+      /* reset the vector */
+      int capacityBeforeReset = floatVector.getValueCapacity();
+      floatVector.reset();
+
+      /* capacity shouldn't change after reset */
+      assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
+
+      /* vector data should be zeroed out */
+      for (int i = 0; i < capacityBeforeReset; i++) {
+        assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i));
+      }
+    }
+  }
+
   @Test /* Float4Vector */
   public void testFixedType3() {
     try (final Float4Vector floatVector = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) {

From 6ccfeeec3b864671556e50c1ac01e65f47bd06d9 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Thu, 1 Feb 2024 21:14:47 +0800
Subject: [PATCH 284/570] GH-39876: [C++] Thirdparty: Bump zlib to 1.3.1
 (#39877)

### Rationale for this change

zlib 1.3.1 is the latest release.

### What changes are included in this PR?

Bump zlib to 1.3.1

### Are these changes tested?

Already has testing

### Are there any user-facing changes?

no

* Closes: #39876

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/thirdparty/versions.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 2664775c0fbf4..dd3f5da84f777 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -115,8 +115,8 @@ ARROW_UTF8PROC_BUILD_VERSION=v2.7.0
 ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=4bb121e297293c0fd55f08f83afab6d35d48f0af4ecc07523ad8ec99aa2b12a1
 ARROW_XSIMD_BUILD_VERSION=9.0.1
 ARROW_XSIMD_BUILD_SHA256_CHECKSUM=b1bb5f92167fd3a4f25749db0be7e61ed37e0a5d943490f3accdcd2cd2918cc0
-ARROW_ZLIB_BUILD_VERSION=1.3
-ARROW_ZLIB_BUILD_SHA256_CHECKSUM=ff0ba4c292013dbc27530b3a81e1f9a813cd39de01ca5e0f8bf355702efa593e
+ARROW_ZLIB_BUILD_VERSION=1.3.1
+ARROW_ZLIB_BUILD_SHA256_CHECKSUM=9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23
 ARROW_ZSTD_BUILD_VERSION=1.5.5
 ARROW_ZSTD_BUILD_SHA256_CHECKSUM=9c4396cc829cfae319a6e2615202e82aad41372073482fce286fac78646d3ee4
 

From 2721134715b7dedfa2715bcf47548728ff702d5a Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Thu, 1 Feb 2024 21:24:42 +0800
Subject: [PATCH 285/570] GH-39845: [C++][Parquet] Minor: avoid creating a new
 Reader object in Decoder::SetData (#39847)

### Rationale for this change

avoid creating a new Reader object in Decoder::SetData

### What changes are included in this PR?

avoid creating a new Reader object in Decoder::SetData

### Are these changes tested?

Already

### Are there any user-facing changes?

no

* Closes: #39845

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/parquet/encoding.cc | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index b801b5ab11bb9..5573f5b9aed4c 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -2411,7 +2411,11 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DTyp
   void SetData(int num_values, const uint8_t* data, int len) override {
     // num_values is equal to page's num_values, including null values in this page
     this->num_values_ = num_values;
-    decoder_ = std::make_shared<::arrow::bit_util::BitReader>(data, len);
+    if (decoder_ == nullptr) {
+      decoder_ = std::make_shared<::arrow::bit_util::BitReader>(data, len);
+    } else {
+      decoder_->Reset(data, len);
+    }
     InitHeader();
   }
 
@@ -2769,7 +2773,11 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
 
   void SetData(int num_values, const uint8_t* data, int len) override {
     DecoderImpl::SetData(num_values, data, len);
-    decoder_ = std::make_shared<::arrow::bit_util::BitReader>(data, len);
+    if (decoder_ == nullptr) {
+      decoder_ = std::make_shared<::arrow::bit_util::BitReader>(data, len);
+    } else {
+      decoder_->Reset(data, len);
+    }
     DecodeLengths();
   }
 

From 44d5597a0e8a4d635f1aec82ba885f61b5c17829 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Thu, 1 Feb 2024 14:35:32 +0100
Subject: [PATCH 286/570] GH-39849: [Python] Remove the use of
 pytest-lazy-fixture (#39850)

### Rationale for this change

Removing the use of `pytest-lazy-fixture` in our test suite as it is unmaintained.
Changes in this PR include:

- Remove the use of `pytest-lazy-fixture`
- Remove marks from fixtures to avoid future error, see
   ```
   PytestRemovedIn9Warning: Marks applied to fixtures have no effect
     See docs: https://docs.pytest.org/en/stable/deprecations.html#applying-a-mark-to-a-fixture-function
   ```
- Catch two different warnings in `def test_legacy_int_type()`

### Are these changes tested?

The changes affect the tests so they must pass.

### Are there any user-facing changes?

No.
* Closes: #39849

Lead-authored-by: AlenkaF <frim.alenka@gmail.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/conda_env_python.txt                     |  3 +-
 dev/tasks/conda-recipes/arrow-cpp/meta.yaml |  1 -
 python/pyarrow/tests/conftest.py            |  7 ++---
 python/pyarrow/tests/test_dataset.py        |  3 --
 python/pyarrow/tests/test_extension_type.py |  5 +--
 python/pyarrow/tests/test_fs.py             | 34 ++++++++++-----------
 python/pyarrow/tests/test_ipc.py            |  6 ++--
 python/requirements-test.txt                |  1 -
 python/requirements-wheel-test.txt          |  1 -
 9 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt
index 5fdd21d2bd1f9..59e2def1bf339 100644
--- a/ci/conda_env_python.txt
+++ b/ci/conda_env_python.txt
@@ -23,9 +23,8 @@ cloudpickle
 fsspec
 hypothesis
 numpy>=1.16.6
-pytest<8  # pytest-lazy-fixture broken on pytest 8.0.0
+pytest<8
 pytest-faulthandler
-pytest-lazy-fixture
 s3fs>=2023.10.0
 setuptools
 setuptools_scm<8.0.0
diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
index b8ffbfdb715b6..367445c595c4b 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
+++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
@@ -340,7 +340,6 @@ outputs:
         # test_cpp_extension_in_python requires a compiler
         - {{ compiler("cxx") }}  # [linux]
         - pytest
-        - pytest-lazy-fixture
         - backports.zoneinfo     # [py<39]
         - boto3
         - cffi
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index a5941e8c8d1a8..0da757a4bc56e 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -24,7 +24,6 @@
 import urllib.request
 
 import pytest
-from pytest_lazyfixture import lazy_fixture
 import hypothesis as h
 from ..conftest import groups, defaults
 
@@ -259,13 +258,13 @@ def gcs_server():
 
 @pytest.fixture(
     params=[
-        lazy_fixture('builtin_pickle'),
-        lazy_fixture('cloudpickle')
+        'builtin_pickle',
+        'cloudpickle'
     ],
     scope='session'
 )
 def pickle_module(request):
-    return request.param
+    return request.getfixturevalue(request.param)
 
 
 @pytest.fixture(scope='session')
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index a4838d63a6b0b..a9054f0b174aa 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -100,7 +100,6 @@ def assert_dataset_fragment_convenience_methods(dataset):
 
 
 @pytest.fixture
-@pytest.mark.parquet
 def mockfs():
     mockfs = fs._MockFileSystem()
 
@@ -221,7 +220,6 @@ def multisourcefs(request):
 
 
 @pytest.fixture
-@pytest.mark.parquet
 def dataset(mockfs):
     format = ds.ParquetFileFormat()
     selector = fs.FileSelector('subdir', recursive=True)
@@ -2692,7 +2690,6 @@ def test_dataset_partitioned_dictionary_type_reconstruct(tempdir, pickle_module)
 
 
 @pytest.fixture
-@pytest.mark.parquet
 def s3_example_simple(s3_server):
     from pyarrow.fs import FileSystem
 
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index a88e20eefe098..d8c792ef00c6b 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -1485,10 +1485,7 @@ def test_legacy_int_type():
     batch = pa.RecordBatch.from_arrays([ext_arr], names=['ext'])
     buf = ipc_write_batch(batch)
 
-    with pytest.warns(
-            RuntimeWarning,
-            match="pickle-based deserialization of pyarrow.PyExtensionType "
-                  "subclasses is disabled by default"):
+    with pytest.warns((RuntimeWarning, FutureWarning)):
         batch = ipc_read_batch(buf)
         assert isinstance(batch.column(0).type, pa.UnknownExtensionType)
 
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index d0fa253e314e9..ab10addfc3d4c 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -362,79 +362,79 @@ def py_fsspec_s3fs(request, s3_server):
 
 @pytest.fixture(params=[
     pytest.param(
-        pytest.lazy_fixture('localfs'),
+        'localfs',
         id='LocalFileSystem()'
     ),
     pytest.param(
-        pytest.lazy_fixture('localfs_with_mmap'),
+        'localfs_with_mmap',
         id='LocalFileSystem(use_mmap=True)'
     ),
     pytest.param(
-        pytest.lazy_fixture('subtree_localfs'),
+        'subtree_localfs',
         id='SubTreeFileSystem(LocalFileSystem())'
     ),
     pytest.param(
-        pytest.lazy_fixture('s3fs'),
+        's3fs',
         id='S3FileSystem',
         marks=pytest.mark.s3
     ),
     pytest.param(
-        pytest.lazy_fixture('gcsfs'),
+        'gcsfs',
         id='GcsFileSystem',
         marks=pytest.mark.gcs
     ),
     pytest.param(
-        pytest.lazy_fixture('hdfs'),
+        'hdfs',
         id='HadoopFileSystem',
         marks=pytest.mark.hdfs
     ),
     pytest.param(
-        pytest.lazy_fixture('mockfs'),
+        'mockfs',
         id='_MockFileSystem()'
     ),
     pytest.param(
-        pytest.lazy_fixture('py_localfs'),
+        'py_localfs',
         id='PyFileSystem(ProxyHandler(LocalFileSystem()))'
     ),
     pytest.param(
-        pytest.lazy_fixture('py_mockfs'),
+        'py_mockfs',
         id='PyFileSystem(ProxyHandler(_MockFileSystem()))'
     ),
     pytest.param(
-        pytest.lazy_fixture('py_fsspec_localfs'),
+        'py_fsspec_localfs',
         id='PyFileSystem(FSSpecHandler(fsspec.LocalFileSystem()))'
     ),
     pytest.param(
-        pytest.lazy_fixture('py_fsspec_memoryfs'),
+        'py_fsspec_memoryfs',
         id='PyFileSystem(FSSpecHandler(fsspec.filesystem("memory")))'
     ),
     pytest.param(
-        pytest.lazy_fixture('py_fsspec_s3fs'),
+        'py_fsspec_s3fs',
         id='PyFileSystem(FSSpecHandler(s3fs.S3FileSystem()))',
         marks=pytest.mark.s3
     ),
 ])
 def filesystem_config(request):
-    return request.param
+    return request.getfixturevalue(request.param)
 
 
 @pytest.fixture
-def fs(request, filesystem_config):
+def fs(filesystem_config):
     return filesystem_config['fs']
 
 
 @pytest.fixture
-def pathfn(request, filesystem_config):
+def pathfn(filesystem_config):
     return filesystem_config['pathfn']
 
 
 @pytest.fixture
-def allow_move_dir(request, filesystem_config):
+def allow_move_dir(filesystem_config):
     return filesystem_config['allow_move_dir']
 
 
 @pytest.fixture
-def allow_append_to_file(request, filesystem_config):
+def allow_append_to_file(filesystem_config):
     return filesystem_config['allow_append_to_file']
 
 
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index f75ec8158a9da..407011d90b734 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -142,16 +142,16 @@ def stream_fixture():
 
 @pytest.fixture(params=[
     pytest.param(
-        pytest.lazy_fixture('file_fixture'),
+        'file_fixture',
         id='File Format'
     ),
     pytest.param(
-        pytest.lazy_fixture('stream_fixture'),
+        'stream_fixture',
         id='Stream Format'
     )
 ])
 def format_fixture(request):
-    return request.param
+    return request.getfixturevalue(request.param)
 
 
 def test_empty_file():
diff --git a/python/requirements-test.txt b/python/requirements-test.txt
index b3ba5d852b968..2108d70a543f5 100644
--- a/python/requirements-test.txt
+++ b/python/requirements-test.txt
@@ -2,5 +2,4 @@ cffi
 hypothesis
 pandas
 pytest<8
-pytest-lazy-fixture
 pytz
diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt
index c74a8ca6908a7..a1046bc18c704 100644
--- a/python/requirements-wheel-test.txt
+++ b/python/requirements-wheel-test.txt
@@ -2,7 +2,6 @@ cffi
 cython
 hypothesis
 pytest<8
-pytest-lazy-fixture
 pytz
 tzdata; sys_platform == 'win32'
 

From 3d45ac96534fc76b820b488aa02182e6b93a388f Mon Sep 17 00:00:00 2001
From: "y.yoshida5" <39612448+yo1956@users.noreply.github.com>
Date: Thu, 1 Feb 2024 22:36:59 +0900
Subject: [PATCH 287/570] GH-39779: [Python] Expose force_virtual_addressing in
 PyArrow (#39819)

### Rationale for this change / What changes are included in this PR?

To expose force_virtual_addressing in PyArrow.

### Are these changes tested?

Existing unit tests are not broken, and a new test case have been added.

### Are there any user-facing changes?

pyarrow.fs.S3FileSystem: it becomes possible to specify the argument 'force_virtual_addressing'.

* Closes: #39779

Authored-by: yo1956 <hm.hr.yossy@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/_s3fs.pyx                | 11 ++++++++++-
 python/pyarrow/includes/libarrow_fs.pxd |  1 +
 python/pyarrow/tests/test_fs.py         |  4 ++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index 13b8c748cb8ca..f5bab99a49f7a 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -245,6 +245,11 @@ cdef class S3FileSystem(FileSystem):
     retry_strategy : S3RetryStrategy, default AwsStandardS3RetryStrategy(max_attempts=3)
         The retry strategy to use with S3; fail after max_attempts. Available
         strategies are AwsStandardS3RetryStrategy, AwsDefaultS3RetryStrategy.
+    force_virtual_addressing : bool, default False
+        Whether to use virtual addressing of buckets.
+        If true, then virtual addressing is always enabled.
+        If false, then virtual addressing is only enabled if `endpoint_override` is empty.
+        This can be used for non-AWS backends that only support virtual hosted-style access.
 
     Examples
     --------
@@ -268,7 +273,9 @@ cdef class S3FileSystem(FileSystem):
                  role_arn=None, session_name=None, external_id=None,
                  load_frequency=900, proxy_options=None,
                  allow_bucket_creation=False, allow_bucket_deletion=False,
-                 retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(max_attempts=3)):
+                 retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(
+                     max_attempts=3),
+                 force_virtual_addressing=False):
         cdef:
             optional[CS3Options] options
             shared_ptr[CS3FileSystem] wrapped
@@ -380,6 +387,7 @@ cdef class S3FileSystem(FileSystem):
 
         options.value().allow_bucket_creation = allow_bucket_creation
         options.value().allow_bucket_deletion = allow_bucket_deletion
+        options.value().force_virtual_addressing = force_virtual_addressing
 
         if isinstance(retry_strategy, AwsStandardS3RetryStrategy):
             options.value().retry_strategy = CS3RetryStrategy.GetAwsStandardRetryStrategy(
@@ -447,6 +455,7 @@ cdef class S3FileSystem(FileSystem):
                                    opts.proxy_options.username),
                                'password': frombytes(
                                    opts.proxy_options.password)},
+                force_virtual_addressing=opts.force_virtual_addressing,
             ),)
         )
 
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index cb30f4e750eff..7876fb0f96671 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -167,6 +167,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         c_bool background_writes
         c_bool allow_bucket_creation
         c_bool allow_bucket_deletion
+        c_bool force_virtual_addressing
         shared_ptr[const CKeyValueMetadata] default_metadata
         c_string role_arn
         c_string session_name
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index ab10addfc3d4c..6ba5137e4f63e 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1186,6 +1186,10 @@ def test_s3_options(pickle_module):
     assert pickle_module.loads(pickle_module.dumps(fs2)) == fs2
     assert fs2 != fs
 
+    fs = S3FileSystem(endpoint_override='localhost:8999', force_virtual_addressing=True)
+    assert isinstance(fs, S3FileSystem)
+    assert pickle_module.loads(pickle_module.dumps(fs)) == fs
+
     with pytest.raises(ValueError):
         S3FileSystem(access_key='access')
     with pytest.raises(ValueError):

From a1c1773b724e4d78faf9a097247c7e976cd2cbfa Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 1 Feb 2024 14:53:35 +0100
Subject: [PATCH 288/570] GH-39555: [Packaging][Python] Enable building pyarrow
 against numpy 2.0 (#39557)

### Rationale for this change

Ensure we can build pyarrow against numpy 2.0 nightly (update pyproject.toml to allow this), and test this by building our nightly wheels with numpy nightly. This also ensures that other projects that use our nightly wheels to test together with numpy nightly can do that (numpy 2.0 changes the ABI, so to run with numpy 2.0, your package needs to be built with numpy 2.x; currently pyarrow installed with our nightly wheel will fail to import when also numpy nightly is installed).

See the parent issue https://github.com/apache/arrow/issues/39532 for details, and https://numpy.org/devdocs/dev/depending_on_numpy.html#numpy-2-0-specific-advice for a direct link to the NumPy guidelines on updating build dependencies for NumPy 2.0.

* Closes: #39555

Lead-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/docker/python-wheel-manylinux.dockerfile      | 5 +++--
 ci/docker/python-wheel-windows-vs2017.dockerfile | 3 ++-
 ci/scripts/python_wheel_macos_build.sh           | 5 ++++-
 python/pyproject.toml                            | 7 ++++++-
 python/requirements-build.txt                    | 3 ++-
 python/requirements-wheel-build.txt              | 3 ++-
 python/setup.py                                  | 2 +-
 7 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index 0a50d450c225a..a07c727ac76fa 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -28,7 +28,7 @@ ENV MANYLINUX_VERSION=${manylinux}
 RUN yum install -y dnf
 
 # Install basic dependencies
-RUN dnf install -y git flex curl autoconf zip perl-IPC-Cmd wget kernel-headers
+RUN dnf install -y git flex curl autoconf zip perl-IPC-Cmd wget
 
 # A system Python is required for ninja and vcpkg in this Dockerfile.
 # On manylinux2014 base images, system Python is 2.7.5, while
@@ -97,4 +97,5 @@ SHELL ["/bin/bash", "-i", "-c"]
 ENTRYPOINT ["/bin/bash", "-i", "-c"]
 
 COPY python/requirements-wheel-build.txt /arrow/python/
-RUN pip install -r /arrow/python/requirements-wheel-build.txt
+# TODO(GH-39848) Remove the `--pre --extra-index-url` for numpy nightly again before the 16.0 release 
+RUN pip install -r /arrow/python/requirements-wheel-build.txt --pre --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
diff --git a/ci/docker/python-wheel-windows-vs2017.dockerfile b/ci/docker/python-wheel-windows-vs2017.dockerfile
index faf07800c956a..067105b3a7995 100644
--- a/ci/docker/python-wheel-windows-vs2017.dockerfile
+++ b/ci/docker/python-wheel-windows-vs2017.dockerfile
@@ -88,7 +88,8 @@ RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION%
 RUN python -m pip install -U pip setuptools
 
 COPY python/requirements-wheel-build.txt arrow/python/
-RUN python -m pip install -r arrow/python/requirements-wheel-build.txt
+# TODO(GH-39848) Remove the `--pre --extra-index-url` for numpy nightly again before the 16.0 release 
+RUN python -m pip install -r arrow/python/requirements-wheel-build.txt --pre --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
 
 # ENV CLCACHE_DIR="C:\clcache"
 # ENV CLCACHE_COMPRESS=1
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index fd845c512dcdb..8123a9fdf1c48 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -50,12 +50,15 @@ echo "=== (${PYTHON_VERSION}) Install Python build dependencies ==="
 export PIP_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
 export PIP_TARGET_PLATFORM="macosx_${MACOSX_DEPLOYMENT_TARGET//./_}_${arch}"
 
+# TODO(GH-39848) Remove the `--pre --extra-index-url` for numpy nightly again before the 16.0 release 
 pip install \
   --upgrade \
   --only-binary=:all: \
   --target $PIP_SITE_PACKAGES \
   --platform $PIP_TARGET_PLATFORM \
-  -r ${source_dir}/python/requirements-wheel-build.txt
+  -r ${source_dir}/python/requirements-wheel-build.txt \
+  --pre \
+  --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
 pip install "delocate>=0.10.3"
 
 echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 437de105ab8e7..9079618ad1c7d 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -18,7 +18,12 @@
 [build-system]
 requires = [
     "cython >= 0.29.31",
-    "oldest-supported-numpy>=0.14",
+    # Starting with NumPy 1.25, NumPy is (by default) as far back compatible
+    # as oldest-support-numpy was (customizable with a NPY_TARGET_VERSION
+    # define).  For older Python versions (where NumPy 1.25 is not yet avaiable)
+    # continue using oldest-support-numpy.
+    "oldest-supported-numpy>=0.14; python_version<'3.9'",
+    "numpy>=1.25; python_version>='3.9'",
     "setuptools_scm < 8.0.0",
     "setuptools >= 40.1.0",
     "wheel"
diff --git a/python/requirements-build.txt b/python/requirements-build.txt
index 56e9d479ee9ba..e1372e807f88d 100644
--- a/python/requirements-build.txt
+++ b/python/requirements-build.txt
@@ -1,4 +1,5 @@
 cython>=0.29.31
-oldest-supported-numpy>=0.14
+oldest-supported-numpy>=0.14; python_version<'3.9'
+numpy>=1.25; python_version>='3.9'
 setuptools_scm<8.0.0
 setuptools>=38.6.0
diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt
index f42ee4a018f3c..044f9de5f8214 100644
--- a/python/requirements-wheel-build.txt
+++ b/python/requirements-wheel-build.txt
@@ -1,5 +1,6 @@
 cython>=0.29.31
-oldest-supported-numpy>=0.14
+oldest-supported-numpy>=0.14; python_version<'3.9'
+numpy>=1.25; python_version>='3.9'
 setuptools_scm<8.0.0
 setuptools>=58
 wheel
diff --git a/python/setup.py b/python/setup.py
index d7a2da2077cdd..098d75a3186af 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -449,7 +449,7 @@ def has_ext_modules(foo):
 
 
 install_requires = (
-    'numpy >= 1.16.6, <2',
+    'numpy >= 1.16.6',
 )
 
 
From 4ceb66101382d74c6ef73ff546fad10183ab58d8 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 1 Feb 2024 14:54:14 +0100
Subject: [PATCH 289/570] GH-39880: [Python][CI] Pin moto<5 for dask
 integration tests (#39881)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

See upstream pin being added (https://github.com/dask/dask/pull/10868 / https://github.com/dask/dask/issues/10869), we are seeing the same failures
* Closes: #39880

Lead-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/scripts/install_dask.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ci/scripts/install_dask.sh b/ci/scripts/install_dask.sh
index 8d712a88a6ab1..478c1d5997906 100755
--- a/ci/scripts/install_dask.sh
+++ b/ci/scripts/install_dask.sh
@@ -35,4 +35,5 @@ else
 fi
 
 # additional dependencies needed for dask's s3 tests
-pip install moto[server] flask requests
+# Moto 5 results in timeouts in s3 tests: https://github.com/dask/dask/issues/10869
+pip install "moto[server]<5" flask requests

From b684028dfbeeed85d132a1249449a85877d796b1 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 1 Feb 2024 08:16:56 -0600
Subject: [PATCH 290/570] GH-39859: [R] Remove macOS from the allow list
 (#39861)

Originally this was going to also bundle all of our dependencies to send to CRAN, but their webforms don't allow source tars that large (I tried down to 80MB which removed a large number of our dependencies, and that was still rejected by the macbuilder).

This means that on CRAN, if there is no internet, the macOS binary will be minimal. But it means that we build on CRAN using source always.

We should definitely submit this to macbuilder after this merges to main and confirm we get source build by default (since we look to the repo for our allowlist)
* Closes: #39859

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 r/tools/nixlibs-allowlist.txt | 1 -
 r/tools/nixlibs.R             | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/r/tools/nixlibs-allowlist.txt b/r/tools/nixlibs-allowlist.txt
index 9c368e6ed15a2..bd9f0c1b2c084 100644
--- a/r/tools/nixlibs-allowlist.txt
+++ b/r/tools/nixlibs-allowlist.txt
@@ -2,4 +2,3 @@ ubuntu
 centos
 redhat
 rhel
-darwin
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index 17c6ab0a8078b..0af41888b95b7 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -222,7 +222,7 @@ check_allowlist <- function(os, allowed = "https://raw.githubusercontent.com/apa
     # Try a remote allowlist so that we can add/remove without a release
     suppressWarnings(readLines(allowed)),
     # Fallback to default: allowed only on Ubuntu and CentOS/RHEL
-    error = function(e) c("ubuntu", "centos", "redhat", "rhel", "darwin")
+    error = function(e) c("ubuntu", "centos", "redhat", "rhel")
   )
   # allowlist should contain valid regular expressions (plain strings ok too)
   any(grepl(paste(allowlist, collapse = "|"), os))

From 63c7c4a327ff5b27a1ba6838253408e965c0a348 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 1 Feb 2024 23:43:21 +0900
Subject: [PATCH 291/570] GH-39874: [CI][C++][Windows] Use pre-installed
 OpenSSL (#39882)

### Rationale for this change

It seems that we can't use OpenSSL via Chocolatey.

```text
openssl v3.2.0 [Approved]
openssl package files install completed. Performing other installation steps.
Attempt to get headers for https://slproweb.com/download/Win64OpenSSL-3_2_0.exe failed.
  The remote file either doesn't exist, is unauthorized, or is forbidden for url 'https://slproweb.com/download/Win64OpenSSL-3_2_0.exe'. Exception calling "GetResponse" with "0" argument(s): "The remote server returned an error: (404) Not Found."
Downloading openssl 64 bit
  from 'https://slproweb.com/download/Win64OpenSSL-3_2_0.exe'
ERROR: The remote file either doesn't exist, is unauthorized, or is forbidden for url 'https://slproweb.com/download/Win64OpenSSL-3_2_0.exe'. Exception calling "GetResponse" with "0" argument(s): "The remote server returned an error: (404) Not Found."
This package is likely not broken for licensed users - see https://docs.chocolatey.org/en-us/features/private-cdn.
The install of openssl was NOT successful.
Error while running 'C:\ProgramData\chocolatey\lib\openssl\tools\chocolateyinstall.ps1'.
 See log for details.
```

### What changes are included in this PR?

Use pre-installed OpenSSL on self-hosted GitHub runner instead.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39874

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/cpp.yml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index bd14f1b895bf6..9fbad06692bd2 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -284,10 +284,6 @@ jobs:
             /t REG_DWORD `
             /d 1 `
             /f
-      - name: Installed Packages
-        run: choco list
-      - name: Install Dependencies
-        run: choco install -y --no-progress openssl
       - name: Checkout Arrow
         uses: actions/checkout@v4
         with:

From c534749b3230f4ad640fe568d603c665b4bcee3d Mon Sep 17 00:00:00 2001
From: sgilmore10 <74676073+sgilmore10@users.noreply.github.com>
Date: Thu, 1 Feb 2024 10:21:12 -0500
Subject: [PATCH 292/570] GH-39885: [CI][MATLAB] Bump
 matlab-actions/setup-matlab and matlab-actions/run-tests from v1 to v2
 (#39886)

### Rationale for this change

Upgrading our CI workflows to use the latest versions of [matlab-actions/setup-matlab](https://github.com/matlab-actions/setup-matlab/) and [matlab-actions/run-tests](https://github.com/matlab-actions/run-tests/).

### What changes are included in this PR?

1. Bumped version of `matlab-actions/setup-matlab` from `v1` to `v2`
2. Bumped version of `matlab-actions/runtests-matlab` from `v1` to `v2`

### Are these changes tested?

All MATLAB workflow checks passed.

### Are there any user-facing changes?

No.

* Closes: #39885

Authored-by: Sarah Gilmore <sgilmore@mathworks.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/matlab.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml
index 512ff2bb929b3..eceeb551a0653 100644
--- a/.github/workflows/matlab.yml
+++ b/.github/workflows/matlab.yml
@@ -52,7 +52,7 @@ jobs:
       - name: Install ninja-build
         run: sudo apt-get install ninja-build
       - name: Install MATLAB
-        uses: matlab-actions/setup-matlab@v1
+        uses: matlab-actions/setup-matlab@v2
         with:
           release: R2023a
       - name: Install ccache
@@ -85,7 +85,7 @@ jobs:
           # Add the installation directory to the MATLAB Search Path by
           # setting the MATLABPATH environment variable.
           MATLABPATH: matlab/install/arrow_matlab
-        uses: matlab-actions/run-tests@v1
+        uses: matlab-actions/run-tests@v2
         with:
           select-by-folder: matlab/test
   macos:
@@ -100,7 +100,7 @@ jobs:
       - name: Install ninja-build
         run: brew install ninja
       - name: Install MATLAB
-        uses: matlab-actions/setup-matlab@v1
+        uses: matlab-actions/setup-matlab@v2
         with:
           release: R2023a
       - name: Install ccache
@@ -125,7 +125,7 @@ jobs:
           # Add the installation directory to the MATLAB Search Path by
           # setting the MATLABPATH environment variable.
           MATLABPATH: matlab/install/arrow_matlab
-        uses: matlab-actions/run-tests@v1
+        uses: matlab-actions/run-tests@v2
         with:
           select-by-folder: matlab/test 
   windows:
@@ -138,7 +138,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Install MATLAB
-        uses: matlab-actions/setup-matlab@v1
+        uses: matlab-actions/setup-matlab@v2
         with:
           release: R2023a
       - name: Download Timezone Database
@@ -171,6 +171,6 @@ jobs:
           # Add the installation directory to the MATLAB Search Path by
           # setting the MATLABPATH environment variable.
           MATLABPATH: matlab/install/arrow_matlab
-        uses: matlab-actions/run-tests@v1
+        uses: matlab-actions/run-tests@v2
         with:
           select-by-folder: matlab/test 

From 87b515e9207509aa3f77e3e1c0122be314a77e6d Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Thu, 1 Feb 2024 11:48:29 -0500
Subject: [PATCH 293/570] GH-39771: [C++][Device] Generic
 CopyBatchTo/CopyArrayTo memory types (#39772)

### Rationale for this change
Right now our MemoryManager interfaces operate solely at the buffer level and we do not provide any higher level facilities to copy an entire array or record batch between memory types. We should implement CopyArrayTo and CopyBatchTo functions which recursively utilize the buffer level copying to create a new Array/RecordBatch whose buffers have been copied to the destination memory manager.

### What changes are included in this PR?
Exposing a `CopyArrayTo` and `CopyBatchTo` function for copying entire Array or RecordBatches between memory types.

### Are these changes tested?
Tests are still being written but will be added.

* Closes: #39771

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 cpp/src/arrow/array/array_base.cc    | 12 +++++++++
 cpp/src/arrow/array/array_base.h     | 16 ++++++++++++
 cpp/src/arrow/array/data.cc          | 39 ++++++++++++++++++++++++++++
 cpp/src/arrow/array/data.h           | 19 +++++++++++---
 cpp/src/arrow/buffer.h               |  2 +-
 cpp/src/arrow/c/bridge.cc            |  2 +-
 cpp/src/arrow/c/bridge_test.cc       |  4 ++-
 cpp/src/arrow/device.cc              |  2 ++
 cpp/src/arrow/gpu/cuda_context.cc    |  5 ++++
 cpp/src/arrow/ipc/read_write_test.cc | 27 +++----------------
 cpp/src/arrow/record_batch.cc        | 24 +++++++++++++++++
 cpp/src/arrow/record_batch.h         | 19 ++++++++++++++
 12 files changed, 142 insertions(+), 29 deletions(-)

diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc
index b483ec420cc3c..6927f51283eb7 100644
--- a/cpp/src/arrow/array/array_base.cc
+++ b/cpp/src/arrow/array/array_base.cc
@@ -307,6 +307,18 @@ Result<std::shared_ptr<Array>> Array::View(
   return MakeArray(result);
 }
 
+Result<std::shared_ptr<Array>> Array::CopyTo(
+    const std::shared_ptr<MemoryManager>& to) const {
+  ARROW_ASSIGN_OR_RAISE(auto copied_data, data()->CopyTo(to));
+  return MakeArray(copied_data);
+}
+
+Result<std::shared_ptr<Array>> Array::ViewOrCopyTo(
+    const std::shared_ptr<MemoryManager>& to) const {
+  ARROW_ASSIGN_OR_RAISE(auto new_data, data()->ViewOrCopyTo(to));
+  return MakeArray(new_data);
+}
+
 // ----------------------------------------------------------------------
 // NullArray
 
diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index 7e857bf20568e..6411aebf80442 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -165,6 +165,22 @@ class ARROW_EXPORT Array {
   /// An error is returned if the types are not layout-compatible.
   Result<std::shared_ptr<Array>> View(const std::shared_ptr<DataType>& type) const;
 
+  /// \brief Construct a copy of the array with all buffers on destination
+  /// Memory Manager
+  ///
+  /// This method recursively copies the array's buffers and those of its children
+  /// onto the destination MemoryManager device and returns the new Array.
+  Result<std::shared_ptr<Array>> CopyTo(const std::shared_ptr<MemoryManager>& to) const;
+
+  /// \brief Construct a new array attempting to zero-copy view if possible.
+  ///
+  /// Like CopyTo this method recursively goes through all of the array's buffers
+  /// and those of it's children and first attempts to create zero-copy
+  /// views on the destination MemoryManager device. If it can't, it falls back
+  /// to performing a copy. See Buffer::ViewOrCopy.
+  Result<std::shared_ptr<Array>> ViewOrCopyTo(
+      const std::shared_ptr<MemoryManager>& to) const;
+
   /// Construct a zero-copy slice of the array with the indicated offset and
   /// length
   ///
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index 8454ac8f1d5fb..80c411dfa6a6d 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -27,6 +27,7 @@
 
 #include "arrow/array/util.h"
 #include "arrow/buffer.h"
+#include "arrow/device.h"
 #include "arrow/scalar.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -36,6 +37,7 @@
 #include "arrow/util/dict_util.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
+#include "arrow/util/range.h"
 #include "arrow/util/ree_util.h"
 #include "arrow/util/slice_util_internal.h"
 #include "arrow/util/union_util.h"
@@ -140,6 +142,43 @@ std::shared_ptr<ArrayData> ArrayData::Make(std::shared_ptr<DataType> type, int64
   return std::make_shared<ArrayData>(std::move(type), length, null_count, offset);
 }
 
+namespace {
+template <typename Fn>
+Result<std::shared_ptr<ArrayData>> CopyToImpl(const ArrayData& data,
+                                              const std::shared_ptr<MemoryManager>& to,
+                                              Fn&& copy_fn) {
+  auto output = ArrayData::Make(data.type, data.length, data.null_count, data.offset);
+  output->buffers.resize(data.buffers.size());
+  for (auto&& [buf, out_buf] : internal::Zip(data.buffers, output->buffers)) {
+    if (buf) {
+      ARROW_ASSIGN_OR_RAISE(out_buf, copy_fn(buf, to));
+    }
+  }
+
+  output->child_data.reserve(data.child_data.size());
+  for (const auto& child : data.child_data) {
+    ARROW_ASSIGN_OR_RAISE(auto copied, CopyToImpl(*child, to, copy_fn));
+    output->child_data.push_back(std::move(copied));
+  }
+
+  if (data.dictionary) {
+    ARROW_ASSIGN_OR_RAISE(output->dictionary, CopyToImpl(*data.dictionary, to, copy_fn));
+  }
+
+  return output;
+}
+}  // namespace
+
+Result<std::shared_ptr<ArrayData>> ArrayData::CopyTo(
+    const std::shared_ptr<MemoryManager>& to) const {
+  return CopyToImpl(*this, to, MemoryManager::CopyBuffer);
+}
+
+Result<std::shared_ptr<ArrayData>> ArrayData::ViewOrCopyTo(
+    const std::shared_ptr<MemoryManager>& to) const {
+  return CopyToImpl(*this, to, Buffer::ViewOrCopy);
+}
+
 std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
   ARROW_CHECK_LE(off, length) << "Slice offset (" << off
                               << ") greater than array length (" << length << ")";
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index edd443adc43c4..d8a6663cec580 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -27,6 +27,7 @@
 #include "arrow/buffer.h"
 #include "arrow/result.h"
 #include "arrow/type.h"
+#include "arrow/type_fwd.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/span.h"
@@ -34,9 +35,6 @@
 
 namespace arrow {
 
-class Array;
-struct ArrayData;
-
 namespace internal {
 // ----------------------------------------------------------------------
 // Null handling for types without a validity bitmap and the dictionary type
@@ -183,6 +181,21 @@ struct ARROW_EXPORT ArrayData {
 
   std::shared_ptr<ArrayData> Copy() const { return std::make_shared<ArrayData>(*this); }
 
+  /// \brief Copy all buffers and children recursively to destination MemoryManager
+  ///
+  /// This utilizes MemoryManager::CopyBuffer to create a new ArrayData object
+  /// recursively copying the buffers and all child buffers to the destination
+  /// memory manager. This includes dictionaries if applicable.
+  Result<std::shared_ptr<ArrayData>> CopyTo(
+      const std::shared_ptr<MemoryManager>& to) const;
+  /// \brief View or Copy this ArrayData to destination memory manager.
+  ///
+  /// Tries to view the buffer contents on the given memory manager's device
+  /// if possible (to avoid a copy) but falls back to copying if a no-copy view
+  /// isn't supported.
+  Result<std::shared_ptr<ArrayData>> ViewOrCopyTo(
+      const std::shared_ptr<MemoryManager>& to) const;
+
   bool IsNull(int64_t i) const { return !IsValid(i); }
 
   bool IsValid(int64_t i) const {
diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 52fd94ec1f7d4..258a9faac7361 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -359,7 +359,7 @@ class ARROW_EXPORT Buffer {
   static Result<std::shared_ptr<Buffer>> ViewOrCopy(
       std::shared_ptr<Buffer> source, const std::shared_ptr<MemoryManager>& to);
 
-  virtual std::shared_ptr<Device::SyncEvent> device_sync_event() { return NULLPTR; }
+  virtual std::shared_ptr<Device::SyncEvent> device_sync_event() const { return NULLPTR; }
 
  protected:
   bool is_mutable_;
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 238afb0328672..172ed8962ce77 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -1466,7 +1466,7 @@ class ImportedBuffer : public Buffer {
 
   ~ImportedBuffer() override = default;
 
-  std::shared_ptr<Device::SyncEvent> device_sync_event() override {
+  std::shared_ptr<Device::SyncEvent> device_sync_event() const override {
     return import_->device_sync_;
   }
 
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 58bbc9282c204..321ec36c38d8c 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -1282,7 +1282,9 @@ class MyBuffer final : public MutableBuffer {
     default_memory_pool()->Free(const_cast<uint8_t*>(data_), size_);
   }
 
-  std::shared_ptr<Device::SyncEvent> device_sync_event() override { return device_sync_; }
+  std::shared_ptr<Device::SyncEvent> device_sync_event() const override {
+    return device_sync_;
+  }
 
  protected:
   std::shared_ptr<Device::SyncEvent> device_sync_;
diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc
index de709923dc44e..616f89aae896f 100644
--- a/cpp/src/arrow/device.cc
+++ b/cpp/src/arrow/device.cc
@@ -20,8 +20,10 @@
 #include <cstring>
 #include <utility>
 
+#include "arrow/array.h"
 #include "arrow/buffer.h"
 #include "arrow/io/memory.h"
+#include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/util/logging.h"
 
diff --git a/cpp/src/arrow/gpu/cuda_context.cc b/cpp/src/arrow/gpu/cuda_context.cc
index 81542d339bd70..988cc1f25b91c 100644
--- a/cpp/src/arrow/gpu/cuda_context.cc
+++ b/cpp/src/arrow/gpu/cuda_context.cc
@@ -433,6 +433,11 @@ Result<std::shared_ptr<Buffer>> CudaMemoryManager::CopyBufferTo(
 Result<std::unique_ptr<Buffer>> CudaMemoryManager::CopyNonOwnedTo(
     const Buffer& buf, const std::shared_ptr<MemoryManager>& to) {
   if (to->is_cpu()) {
+    auto sync_event = buf.device_sync_event();
+    if (sync_event) {
+      RETURN_NOT_OK(sync_event->Wait());
+    }
+
     // Device-to-CPU copy
     std::unique_ptr<Buffer> dest;
     ARROW_ASSIGN_OR_RAISE(auto from_context, cuda_device()->GetContext());
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index bd2c2b716d502..c5075299a3e35 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -1336,30 +1336,11 @@ class CopyCollectListener : public CollectListener {
 
   Status OnRecordBatchWithMetadataDecoded(
       RecordBatchWithMetadata record_batch_with_metadata) override {
-    auto& record_batch = record_batch_with_metadata.batch;
-    for (auto column_data : record_batch->column_data()) {
-      ARROW_RETURN_NOT_OK(CopyArrayData(column_data));
-    }
-    return CollectListener::OnRecordBatchWithMetadataDecoded(record_batch_with_metadata);
-  }
+    ARROW_ASSIGN_OR_RAISE(
+        record_batch_with_metadata.batch,
+        record_batch_with_metadata.batch->CopyTo(default_cpu_memory_manager()));
 
- private:
-  Status CopyArrayData(std::shared_ptr<ArrayData> data) {
-    auto& buffers = data->buffers;
-    for (size_t i = 0; i < buffers.size(); ++i) {
-      auto& buffer = buffers[i];
-      if (!buffer) {
-        continue;
-      }
-      ARROW_ASSIGN_OR_RAISE(buffers[i], Buffer::Copy(buffer, buffer->memory_manager()));
-    }
-    for (auto child_data : data->child_data) {
-      ARROW_RETURN_NOT_OK(CopyArrayData(child_data));
-    }
-    if (data->dictionary) {
-      ARROW_RETURN_NOT_OK(CopyArrayData(data->dictionary));
-    }
-    return Status::OK();
+    return CollectListener::OnRecordBatchWithMetadataDecoded(record_batch_with_metadata);
   }
 };
 
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index 457135fa400d5..ca6b45af3d6b4 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -357,6 +357,30 @@ Status ValidateBatch(const RecordBatch& batch, bool full_validation) {
 
 }  // namespace
 
+Result<std::shared_ptr<RecordBatch>> RecordBatch::CopyTo(
+    const std::shared_ptr<MemoryManager>& to) const {
+  ArrayVector copied_columns;
+  copied_columns.reserve(num_columns());
+  for (const auto& col : columns()) {
+    ARROW_ASSIGN_OR_RAISE(auto c, col->CopyTo(to));
+    copied_columns.push_back(std::move(c));
+  }
+
+  return Make(schema_, num_rows(), std::move(copied_columns));
+}
+
+Result<std::shared_ptr<RecordBatch>> RecordBatch::ViewOrCopyTo(
+    const std::shared_ptr<MemoryManager>& to) const {
+  ArrayVector copied_columns;
+  copied_columns.reserve(num_columns());
+  for (const auto& col : columns()) {
+    ARROW_ASSIGN_OR_RAISE(auto c, col->ViewOrCopyTo(to));
+    copied_columns.push_back(std::move(c));
+  }
+
+  return Make(schema_, num_rows(), std::move(copied_columns));
+}
+
 Status RecordBatch::Validate() const {
   return ValidateBatch(*this, /*full_validation=*/false);
 }
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index 1a66fc3fb5629..79f93a7b5997f 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -186,6 +186,25 @@ class ARROW_EXPORT RecordBatch {
   /// \return the number of rows (the corresponding length of each column)
   int64_t num_rows() const { return num_rows_; }
 
+  /// \brief Copy the entire RecordBatch to destination MemoryManager
+  ///
+  /// This uses Array::CopyTo on each column of the record batch to create
+  /// a new record batch where all underlying buffers for the columns have
+  /// been copied to the destination MemoryManager. This uses
+  /// MemoryManager::CopyBuffer under the hood.
+  Result<std::shared_ptr<RecordBatch>> CopyTo(
+      const std::shared_ptr<MemoryManager>& to) const;
+
+  /// \brief View or Copy the entire RecordBatch to destination MemoryManager
+  ///
+  /// This uses Array::ViewOrCopyTo on each column of the record batch to create
+  /// a new record batch where all underlying buffers for the columns have
+  /// been zero-copy viewed on the destination MemoryManager, falling back
+  /// to performing a copy if it can't be viewed as a zero-copy buffer. This uses
+  /// Buffer::ViewOrCopy under the hood.
+  Result<std::shared_ptr<RecordBatch>> ViewOrCopyTo(
+      const std::shared_ptr<MemoryManager>& to) const;
+
   /// \brief Slice each of the arrays in the record batch
   /// \param[in] offset the starting offset to slice, through end of batch
   /// \return new record batch

From f9b7ac2e922bceed8bab09b1e28d7261cbe8b41d Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com>
Date: Thu, 1 Feb 2024 23:08:21 +0530
Subject: [PATCH 294/570] GH-37841: [Java] Dictionary decoding not using the
 compression factory from the ArrowReader (#38371)

### Rationale for this change

This PR addresses https://github.com/apache/arrow/issues/37841.

### What changes are included in this PR?

Adding compression-based write and read for Dictionary data.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No
* Closes: #37841

Lead-authored-by: Vibhatha Lakmal Abeykoon <vibhatha@gmail.com>
Co-authored-by: vibhatha <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../TestArrowReaderWriterWithCompression.java | 206 ++++++++++++++++--
 .../apache/arrow/vector/ipc/ArrowReader.java  |   2 +-
 .../apache/arrow/vector/ipc/ArrowWriter.java  |  23 +-
 3 files changed, 201 insertions(+), 30 deletions(-)

diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java
index 6104cb1a132e4..af28333746290 100644
--- a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java
+++ b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java
@@ -18,7 +18,9 @@
 package org.apache.arrow.compression;
 
 import java.io.ByteArrayOutputStream;
+import java.io.IOException;
 import java.nio.channels.Channels;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -27,63 +29,223 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.GenerateSampleData;
+import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.compression.CompressionUtil;
 import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
 import org.apache.arrow.vector.ipc.ArrowFileReader;
 import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
 import org.apache.arrow.vector.ipc.message.IpcOption;
 import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
+import org.junit.After;
 import org.junit.Assert;
-import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 public class TestArrowReaderWriterWithCompression {
 
-  @Test
-  public void testArrowFileZstdRoundTrip() throws Exception {
-    // Prepare sample data
-    final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+  private BufferAllocator allocator;
+  private ByteArrayOutputStream out;
+  private VectorSchemaRoot root;
+
+  @BeforeEach
+  public void setup() {
+    if (allocator == null) {
+      allocator = new RootAllocator(Integer.MAX_VALUE);
+    }
+    out = new ByteArrayOutputStream();
+    root = null;
+  }
+
+  @After
+  public void tearDown() {
+    if (root != null) {
+      root.close();
+    }
+    if (allocator != null) {
+      allocator.close();
+    }
+    if (out != null) {
+      out.reset();
+    }
+
+  }
+
+  private void createAndWriteArrowFile(DictionaryProvider provider,
+      CompressionUtil.CodecType codecType) throws IOException {
     List<Field> fields = new ArrayList<>();
     fields.add(new Field("col", FieldType.notNullable(new ArrowType.Utf8()), new ArrayList<>()));
-    VectorSchemaRoot root = VectorSchemaRoot.create(new Schema(fields), allocator);
+    root = VectorSchemaRoot.create(new Schema(fields), allocator);
+
     final int rowCount = 10;
     GenerateSampleData.generateTestData(root.getVector(0), rowCount);
     root.setRowCount(rowCount);
 
-    // Write an in-memory compressed arrow file
-    ByteArrayOutputStream out = new ByteArrayOutputStream();
-    try (final ArrowFileWriter writer =
-           new ArrowFileWriter(root, null, Channels.newChannel(out), new HashMap<>(),
-             IpcOption.DEFAULT, CommonsCompressionFactory.INSTANCE, CompressionUtil.CodecType.ZSTD, Optional.of(7))) {
+    try (final ArrowFileWriter writer = new ArrowFileWriter(root, provider, Channels.newChannel(out),
+        new HashMap<>(), IpcOption.DEFAULT, CommonsCompressionFactory.INSTANCE, codecType, Optional.of(7))) {
       writer.start();
       writer.writeBatch();
       writer.end();
     }
+  }
+
+  private void createAndWriteArrowStream(DictionaryProvider provider,
+                                       CompressionUtil.CodecType codecType) throws IOException {
+    List<Field> fields = new ArrayList<>();
+    fields.add(new Field("col", FieldType.notNullable(new ArrowType.Utf8()), new ArrayList<>()));
+    root = VectorSchemaRoot.create(new Schema(fields), allocator);
+
+    final int rowCount = 10;
+    GenerateSampleData.generateTestData(root.getVector(0), rowCount);
+    root.setRowCount(rowCount);
+
+    try (final ArrowStreamWriter writer = new ArrowStreamWriter(root, provider, Channels.newChannel(out),
+            IpcOption.DEFAULT, CommonsCompressionFactory.INSTANCE, codecType, Optional.of(7))) {
+      writer.start();
+      writer.writeBatch();
+      writer.end();
+    }
+  }
 
-    // Read the in-memory compressed arrow file with CommonsCompressionFactory provided
+  private Dictionary createDictionary(VarCharVector dictionaryVector) {
+    setVector(dictionaryVector,
+        "foo".getBytes(StandardCharsets.UTF_8),
+        "bar".getBytes(StandardCharsets.UTF_8),
+        "baz".getBytes(StandardCharsets.UTF_8));
+
+    return new Dictionary(dictionaryVector,
+        new DictionaryEncoding(/*id=*/1L, /*ordered=*/false, /*indexType=*/null));
+  }
+
+  @Test
+  public void testArrowFileZstdRoundTrip() throws Exception {
+    createAndWriteArrowFile(null, CompressionUtil.CodecType.ZSTD);
+    // with compression
+    try (ArrowFileReader reader =
+        new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+            CommonsCompressionFactory.INSTANCE)) {
+      Assertions.assertEquals(1, reader.getRecordBlocks().size());
+      Assertions.assertTrue(reader.loadNextBatch());
+      Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot()));
+      Assertions.assertFalse(reader.loadNextBatch());
+    }
+    // without compression
     try (ArrowFileReader reader =
-           new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()),
-             allocator, CommonsCompressionFactory.INSTANCE)) {
-      Assert.assertEquals(1, reader.getRecordBlocks().size());
+        new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+            NoCompressionCodec.Factory.INSTANCE)) {
+      Assertions.assertEquals(1, reader.getRecordBlocks().size());
+      Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+          reader::loadNextBatch);
+      Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+              exception.getMessage());
+    }
+  }
+
+  @Test
+  public void testArrowStreamZstdRoundTrip() throws Exception {
+    createAndWriteArrowStream(null, CompressionUtil.CodecType.ZSTD);
+    // with compression
+    try (ArrowStreamReader reader =
+                 new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+                         CommonsCompressionFactory.INSTANCE)) {
       Assert.assertTrue(reader.loadNextBatch());
       Assert.assertTrue(root.equals(reader.getVectorSchemaRoot()));
       Assert.assertFalse(reader.loadNextBatch());
     }
+    // without compression
+    try (ArrowStreamReader reader =
+                 new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+                         NoCompressionCodec.Factory.INSTANCE)) {
+      Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+              reader::loadNextBatch);
+      Assert.assertEquals(
+              "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+              exception.getMessage()
+      );
+    }
+  }
 
-    // Read the in-memory compressed arrow file without CompressionFactory provided
+  @Test
+  public void testArrowFileZstdRoundTripWithDictionary() throws Exception {
+    VarCharVector dictionaryVector = (VarCharVector)
+        FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector("f1_file", allocator, null);
+    Dictionary dictionary = createDictionary(dictionaryVector);
+    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+    provider.put(dictionary);
+
+    createAndWriteArrowFile(provider, CompressionUtil.CodecType.ZSTD);
+
+    // with compression
+    try (ArrowFileReader reader =
+        new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+            CommonsCompressionFactory.INSTANCE)) {
+      Assertions.assertEquals(1, reader.getRecordBlocks().size());
+      Assertions.assertTrue(reader.loadNextBatch());
+      Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot()));
+      Assertions.assertFalse(reader.loadNextBatch());
+    }
+    // without compression
     try (ArrowFileReader reader =
-           new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()),
-             allocator, NoCompressionCodec.Factory.INSTANCE)) {
-      Assert.assertEquals(1, reader.getRecordBlocks().size());
+        new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+            NoCompressionCodec.Factory.INSTANCE)) {
+      Assertions.assertEquals(1, reader.getRecordBlocks().size());
+      Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+          reader::loadNextBatch);
+      Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+              exception.getMessage());
+    }
+    dictionaryVector.close();
+  }
+
+  @Test
+  public void testArrowStreamZstdRoundTripWithDictionary() throws Exception {
+    VarCharVector dictionaryVector = (VarCharVector)
+            FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector("f1_stream", allocator, null);
+    Dictionary dictionary = createDictionary(dictionaryVector);
+    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+    provider.put(dictionary);
+
+    createAndWriteArrowStream(provider, CompressionUtil.CodecType.ZSTD);
+
+    // with compression
+    try (ArrowStreamReader reader =
+                 new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+                         CommonsCompressionFactory.INSTANCE)) {
+      Assertions.assertTrue(reader.loadNextBatch());
+      Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot()));
+      Assertions.assertFalse(reader.loadNextBatch());
+    }
+    // without compression
+    try (ArrowStreamReader reader =
+                 new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
+                         NoCompressionCodec.Factory.INSTANCE)) {
+      Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+              reader::loadNextBatch);
+      Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+              exception.getMessage());
+    }
+    dictionaryVector.close();
+  }
 
-      Exception exception = Assert.assertThrows(IllegalArgumentException.class, () -> reader.loadNextBatch());
-      String expectedMessage = "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD";
-      Assert.assertEquals(expectedMessage, exception.getMessage());
+  public static void setVector(VarCharVector vector, byte[]... values) {
+    final int length = values.length;
+    vector.allocateNewSafe();
+    for (int i = 0; i < length; i++) {
+      if (values[i] != null) {
+        vector.set(i, values[i]);
+      }
     }
+    vector.setValueCount(length);
   }
 
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
index 04c57d7e82fef..01f4e925c69b3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
@@ -251,7 +251,7 @@ private void load(ArrowDictionaryBatch dictionaryBatch, FieldVector vector) {
     VectorSchemaRoot root = new VectorSchemaRoot(
         Collections.singletonList(vector.getField()),
         Collections.singletonList(vector), 0);
-    VectorLoader loader = new VectorLoader(root);
+    VectorLoader loader = new VectorLoader(root, this.compressionFactory);
     try {
       loader.load(dictionaryBatch.getDictionary());
     } finally {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
index a33c55de53f23..1cc201ae56f4b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
@@ -61,9 +61,14 @@ public abstract class ArrowWriter implements AutoCloseable {
   private final DictionaryProvider dictionaryProvider;
   private final Set<Long> dictionaryIdsUsed = new HashSet<>();
 
+  private final CompressionCodec.Factory compressionFactory;
+  private final CompressionUtil.CodecType codecType;
+  private final Optional<Integer> compressionLevel;
   private boolean started = false;
   private boolean ended = false;
 
+  private final CompressionCodec codec;
+
   protected IpcOption option;
 
   protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) {
@@ -89,16 +94,19 @@ protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, Writab
   protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out, IpcOption option,
                         CompressionCodec.Factory compressionFactory, CompressionUtil.CodecType codecType,
                         Optional<Integer> compressionLevel) {
-    this.unloader = new VectorUnloader(
-        root, /*includeNullCount*/ true,
-        compressionLevel.isPresent() ?
-            compressionFactory.createCodec(codecType, compressionLevel.get()) :
-            compressionFactory.createCodec(codecType),
-        /*alignBuffers*/ true);
     this.out = new WriteChannel(out);
     this.option = option;
     this.dictionaryProvider = provider;
 
+    this.compressionFactory = compressionFactory;
+    this.codecType = codecType;
+    this.compressionLevel = compressionLevel;
+    this.codec = this.compressionLevel.isPresent() ?
+            this.compressionFactory.createCodec(this.codecType, this.compressionLevel.get()) :
+            this.compressionFactory.createCodec(this.codecType);
+    this.unloader = new VectorUnloader(root, /*includeNullCount*/ true, codec,
+        /*alignBuffers*/ true);
+
     List<Field> fields = new ArrayList<>(root.getSchema().getFields().size());
 
     MetadataV4UnionChecker.checkForUnion(root.getSchema().getFields().iterator(), option.metadataVersion);
@@ -133,7 +141,8 @@ protected void writeDictionaryBatch(Dictionary dictionary) throws IOException {
         Collections.singletonList(vector.getField()),
         Collections.singletonList(vector),
         count);
-    VectorUnloader unloader = new VectorUnloader(dictRoot);
+    VectorUnloader unloader = new VectorUnloader(dictRoot, /*includeNullCount*/ true, this.codec,
+        /*alignBuffers*/ true);
     ArrowRecordBatch batch = unloader.getRecordBatch();
     ArrowDictionaryBatch dictionaryBatch = new ArrowDictionaryBatch(id, batch, false);
     try {

From a57363867a6d88d0a7f17767571ab57dbb70cbfd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Feb 2024 13:33:45 -0500
Subject: [PATCH 295/570] MINOR: [JS] Bump esbuild from 0.19.2 to 0.20.0 in /js
 (#39891)

Bumps [esbuild](https://github.com/evanw/esbuild) from 0.19.2 to 0.20.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/evanw/esbuild/releases">esbuild's
releases</a>.</em></p>
<blockquote>
<h2>v0.20.0</h2>
<p><strong>This release deliberately contains backwards-incompatible
changes.</strong> To avoid automatically picking up releases like this,
you should either be pinning the exact version of <code>esbuild</code>
in your <code>package.json</code> file (recommended) or be using a
version range syntax that only accepts patch upgrades such as
<code>^0.19.0</code> or <code>~0.19.0</code>. See npm's documentation
about <a
href="https://docs.npmjs.com/cli/v6/using-npm/semver/">semver</a> for
more information.</p>
<p>This time there is only one breaking change, and it only matters for
people using Deno. Deno tests that use esbuild will now fail unless you
make the change described below.</p>
<ul>
<li>
<p>Work around API deprecations in Deno 1.40.x (<a
href="https://redirect.github.com/evanw/esbuild/issues/3609">#3609</a>,
<a
href="https://redirect.github.com/evanw/esbuild/pull/3611">#3611</a>)</p>
<p><a href="https://deno.com/blog/v1.40">Deno 1.40.0</a> was just
released and introduced run-time warnings about certain APIs that
esbuild uses. With this release, esbuild will work around these run-time
warnings by using newer APIs if they are present and falling back to the
original APIs otherwise. This should avoid the warnings without breaking
compatibility with older versions of Deno.</p>
<p>Unfortunately, doing this introduces a breaking change. The newer
child process APIs lack a way to synchronously terminate esbuild's child
process, so calling <code>esbuild.stop()</code> from within a Deno test
is no longer sufficient to prevent Deno from failing a test that uses
esbuild's API (Deno fails tests that create a child process without
killing it before the test ends). To work around this, esbuild's
<code>stop()</code> function has been changed to return a promise, and
you now have to change <code>esbuild.stop()</code> to <code>await
esbuild.stop()</code> in all of your Deno tests.</p>
</li>
<li>
<p>Reorder implicit file extensions within <code>node_modules</code> (<a
href="https://redirect.github.com/evanw/esbuild/issues/3341">#3341</a>,
<a
href="https://redirect.github.com/evanw/esbuild/issues/3608">#3608</a>)</p>
<p>In <a
href="https://github.com/evanw/esbuild/releases/v0.18.0">version
0.18.0</a>, esbuild changed the behavior of implicit file extensions
within <code>node_modules</code> directories (i.e. in published
packages) to prefer <code>.js</code> over <code>.ts</code> even when the
<code>--resolve-extensions=</code> order prefers <code>.ts</code> over
<code>.js</code> (which it does by default). However, doing that also
accidentally made esbuild prefer <code>.css</code> over
<code>.ts</code>, which caused problems for people that published
packages containing both TypeScript and CSS in files with the same
name.</p>
<p>With this release, esbuild will reorder TypeScript file extensions
immediately after the last JavaScript file extensions in the implicit
file extension order instead of putting them at the end of the order.
Specifically the default implicit file extension order is
<code>.tsx,.ts,.jsx,.js,.css,.json</code> which used to become
<code>.jsx,.js,.css,.json,.tsx,.ts</code> in <code>node_modules</code>
directories. With this release it will now become
<code>.jsx,.js,.tsx,.ts,.css,.json</code> instead.</p>
<p>Why even rewrite the implicit file extension order at all? One reason
is because the <code>.js</code> file is more likely to behave correctly
than the <code>.ts</code> file. The behavior of the <code>.ts</code>
file may depend on <code>tsconfig.json</code> and the
<code>tsconfig.json</code> file may not even be published, or may use
<code>extends</code> to refer to a base <code>tsconfig.json</code> file
that wasn't published. People can get into this situation when they
forget to add all <code>.ts</code> files to their
<code>.npmignore</code> file before publishing to npm. Picking
<code>.js</code> over <code>.ts</code> helps make it more likely that
resulting bundle will behave correctly.</p>
</li>
</ul>
<h2>v0.19.12</h2>
<ul>
<li>
<p>The &quot;preserve&quot; JSX mode now preserves JSX text verbatim (<a
href="https://redirect.github.com/evanw/esbuild/issues/3605">#3605</a>)</p>
<p>The <a href="https://facebook.github.io/jsx/">JSX specification</a>
deliberately doesn't specify how JSX text is supposed to be interpreted
and there is no canonical way to interpret JSX text. Two most popular
interpretations are Babel and TypeScript. Yes <a
href="https://twitter.com/jarredsumner/status/1456118847937781764">they
are different</a> (esbuild <a
href="https://twitter.com/evanwallace/status/1456122279453208576">deliberately
follows TypeScript</a> by the way).</p>
<p>Previously esbuild normalized text to the TypeScript interpretation
when the &quot;preserve&quot; JSX mode is active. However,
&quot;preserve&quot; should arguably reproduce the original JSX text
verbatim so that whatever JSX transform runs after esbuild is free to
interpret it however it wants. So with this release, esbuild will now
pass JSX text through unmodified:</p>
<pre lang="jsx"><code>// Original code
let el =
  &lt;a href={'/'} title='&amp;apos;&amp;quot;'&gt; some text
    {foo}
      more text &lt;/a&gt;
<p>// Old output (with --loader=jsx --jsx=preserve)
let el = &lt;a href=&quot;/&quot; title={<code>'&amp;quot;</code>}&gt;
{&quot; some text&quot;}
{foo}
{&quot;more text &quot;}
&lt;/a&gt;;</p>
<p>// New output (with --loader=jsx --jsx=preserve)
let el = &lt;a href={&quot;/&quot;} title='&amp;apos;&amp;quot;'&gt;
some text
{foo}
more text &lt;/a&gt;;
</code></pre></p>
</li>
<li>
<p>Allow JSX elements as JSX attribute values</p>
<p>JSX has an obscure feature where you can use JSX elements in
attribute position without surrounding them with <code>{...}</code>. It
looks like this:</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/evanw/esbuild/blob/main/CHANGELOG.md">esbuild's
changelog</a>.</em></p>
<blockquote>
<h2>0.20.0</h2>
<p><strong>This release deliberately contains backwards-incompatible
changes.</strong> To avoid automatically picking up releases like this,
you should either be pinning the exact version of <code>esbuild</code>
in your <code>package.json</code> file (recommended) or be using a
version range syntax that only accepts patch upgrades such as
<code>^0.19.0</code> or <code>~0.19.0</code>. See npm's documentation
about <a
href="https://docs.npmjs.com/cli/v6/using-npm/semver/">semver</a> for
more information.</p>
<p>This time there is only one breaking change, and it only matters for
people using Deno. Deno tests that use esbuild will now fail unless you
make the change described below.</p>
<ul>
<li>
<p>Work around API deprecations in Deno 1.40.x (<a
href="https://redirect.github.com/evanw/esbuild/issues/3609">#3609</a>,
<a
href="https://redirect.github.com/evanw/esbuild/pull/3611">#3611</a>)</p>
<p><a href="https://deno.com/blog/v1.40">Deno 1.40.0</a> was just
released and introduced run-time warnings about certain APIs that
esbuild uses. With this release, esbuild will work around these run-time
warnings by using newer APIs if they are present and falling back to the
original APIs otherwise. This should avoid the warnings without breaking
compatibility with older versions of Deno.</p>
<p>Unfortunately, doing this introduces a breaking change. The newer
child process APIs lack a way to synchronously terminate esbuild's child
process, so calling <code>esbuild.stop()</code> from within a Deno test
is no longer sufficient to prevent Deno from failing a test that uses
esbuild's API (Deno fails tests that create a child process without
killing it before the test ends). To work around this, esbuild's
<code>stop()</code> function has been changed to return a promise, and
you now have to change <code>esbuild.stop()</code> to <code>await
esbuild.stop()</code> in all of your Deno tests.</p>
</li>
<li>
<p>Reorder implicit file extensions within <code>node_modules</code> (<a
href="https://redirect.github.com/evanw/esbuild/issues/3341">#3341</a>,
<a
href="https://redirect.github.com/evanw/esbuild/issues/3608">#3608</a>)</p>
<p>In <a
href="https://github.com/evanw/esbuild/releases/v0.18.0">version
0.18.0</a>, esbuild changed the behavior of implicit file extensions
within <code>node_modules</code> directories (i.e. in published
packages) to prefer <code>.js</code> over <code>.ts</code> even when the
<code>--resolve-extensions=</code> order prefers <code>.ts</code> over
<code>.js</code> (which it does by default). However, doing that also
accidentally made esbuild prefer <code>.css</code> over
<code>.ts</code>, which caused problems for people that published
packages containing both TypeScript and CSS in files with the same
name.</p>
<p>With this release, esbuild will reorder TypeScript file extensions
immediately after the last JavaScript file extensions in the implicit
file extension order instead of putting them at the end of the order.
Specifically the default implicit file extension order is
<code>.tsx,.ts,.jsx,.js,.css,.json</code> which used to become
<code>.jsx,.js,.css,.json,.tsx,.ts</code> in <code>node_modules</code>
directories. With this release it will now become
<code>.jsx,.js,.tsx,.ts,.css,.json</code> instead.</p>
<p>Why even rewrite the implicit file extension order at all? One reason
is because the <code>.js</code> file is more likely to behave correctly
than the <code>.ts</code> file. The behavior of the <code>.ts</code>
file may depend on <code>tsconfig.json</code> and the
<code>tsconfig.json</code> file may not even be published, or may use
<code>extends</code> to refer to a base <code>tsconfig.json</code> file
that wasn't published. People can get into this situation when they
forget to add all <code>.ts</code> files to their
<code>.npmignore</code> file before publishing to npm. Picking
<code>.js</code> over <code>.ts</code> helps make it more likely that
resulting bundle will behave correctly.</p>
</li>
</ul>
<h2>0.19.12</h2>
<ul>
<li>
<p>The &quot;preserve&quot; JSX mode now preserves JSX text verbatim (<a
href="https://redirect.github.com/evanw/esbuild/issues/3605">#3605</a>)</p>
<p>The <a href="https://facebook.github.io/jsx/">JSX specification</a>
deliberately doesn't specify how JSX text is supposed to be interpreted
and there is no canonical way to interpret JSX text. Two most popular
interpretations are Babel and TypeScript. Yes <a
href="https://twitter.com/jarredsumner/status/1456118847937781764">they
are different</a> (esbuild <a
href="https://twitter.com/evanwallace/status/1456122279453208576">deliberately
follows TypeScript</a> by the way).</p>
<p>Previously esbuild normalized text to the TypeScript interpretation
when the &quot;preserve&quot; JSX mode is active. However,
&quot;preserve&quot; should arguably reproduce the original JSX text
verbatim so that whatever JSX transform runs after esbuild is free to
interpret it however it wants. So with this release, esbuild will now
pass JSX text through unmodified:</p>
<pre lang="jsx"><code>// Original code
let el =
  &lt;a href={'/'} title='&amp;apos;&amp;quot;'&gt; some text
    {foo}
      more text &lt;/a&gt;
<p>// Old output (with --loader=jsx --jsx=preserve)
let el = &lt;a href=&quot;/&quot; title={<code>'&amp;quot;</code>}&gt;
{&quot; some text&quot;}
{foo}
{&quot;more text &quot;}
&lt;/a&gt;;</p>
<p>// New output (with --loader=jsx --jsx=preserve)
let el = &lt;a href={&quot;/&quot;} title='&amp;apos;&amp;quot;'&gt;
some text
{foo}
more text &lt;/a&gt;;
</code></pre></p>
</li>
<li>
<p>Allow JSX elements as JSX attribute values</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/evanw/esbuild/commit/2af5ccf478812d2d7226ad4435d46fbbb3419a8c"><code>2af5ccf</code></a>
publish 0.20.0 to npm</li>
<li><a
href="https://github.com/evanw/esbuild/commit/0bccf08675867c8ce6662b1ab4aae21973083d99"><code>0bccf08</code></a>
fix <a
href="https://redirect.github.com/esbuild/deno-esbuild/pull/5">esbuild/deno-esbuild#5</a></li>
<li><a
href="https://github.com/evanw/esbuild/commit/931f87db267cf86f63d940c0a77072ef45e96128"><code>931f87d</code></a>
work around api deprecations in deno 1.40.x (<a
href="https://redirect.github.com/evanw/esbuild/issues/3609">#3609</a>)
(<a
href="https://redirect.github.com/evanw/esbuild/issues/3611">#3611</a>)</li>
<li><a
href="https://github.com/evanw/esbuild/commit/22a9cf5754d402aabfe75aeda0266c3a970b0ee1"><code>22a9cf5</code></a>
fix <a
href="https://redirect.github.com/evanw/esbuild/issues/3341">#3341</a>,
fix <a
href="https://redirect.github.com/evanw/esbuild/issues/3608">#3608</a>:
sort <code>.ts</code> right after <code>.js</code></li>
<li><a
href="https://github.com/evanw/esbuild/commit/f8ec3007b68c5bfb755317e5c7051f63184c514b"><code>f8ec300</code></a>
run <code>npm pkg fix</code> as suggested by the npm cli</li>
<li><a
href="https://github.com/evanw/esbuild/commit/d7fd1ad35715cda76eb33343b7c07b275e402a2e"><code>d7fd1ad</code></a>
publish 0.19.12 to npm</li>
<li><a
href="https://github.com/evanw/esbuild/commit/e04a6900b7659146aef670e62a0d16c6f75cfd70"><code>e04a690</code></a>
fix <a
href="https://redirect.github.com/evanw/esbuild/issues/3605">#3605</a>:
print the original JSX AST unmodified</li>
<li><a
href="https://github.com/evanw/esbuild/commit/f5713992227188d137c485d27b6956c6de814b9a"><code>f571399</code></a>
allow jsx elements as jsx attribute values</li>
<li><a
href="https://github.com/evanw/esbuild/commit/a652e730ff07b9081470ef6965f3d54daa7b2aab"><code>a652e73</code></a>
run <code>make update-compat-table</code></li>
<li><a
href="https://github.com/evanw/esbuild/commit/35c0d65b9d4f29a26176404d2890d1b499634e9f"><code>35c0d65</code></a>
fix <a
href="https://redirect.github.com/evanw/esbuild/issues/3574">#3574</a>:
ts type parser bug with infer + extends</li>
<li>Additional commits viewable in <a
href="https://github.com/evanw/esbuild/compare/v0.19.2...v0.20.0">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=esbuild&package-manager=npm_and_yarn&previous-version=0.19.2&new-version=0.20.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 js/package.json |   2 +-
 js/yarn.lock    | 234 +++++++++++++++++++++++++-----------------------
 2 files changed, 121 insertions(+), 115 deletions(-)

diff --git a/js/package.json b/js/package.json
index 57f9267afa3a8..f96764d82245e 100644
--- a/js/package.json
+++ b/js/package.json
@@ -79,7 +79,7 @@
     "cross-env": "7.0.3",
     "del": "7.1.0",
     "del-cli": "5.1.0",
-    "esbuild": "0.19.2",
+    "esbuild": "0.20.0",
     "esbuild-plugin-alias": "0.2.1",
     "eslint": "8.52.0",
     "eslint-plugin-jest": "27.4.2",
diff --git a/js/yarn.lock b/js/yarn.lock
index 10d2a256e1cac..e7dead09bf8bb 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -416,225 +416,230 @@
   resolved "https://registry.npmjs.org/@discoveryjs/json-ext/-/json-ext-0.5.7.tgz#1d572bfbbe14b7704e0ba0f39b74815b84870d70"
   integrity sha512-dBVuXR082gk3jsFp7Rd/JI4kytwGHecnCoTtXFb7DB6CNHp4rg5k1bhg0nWdLGLnOV71lmDzGQaLMy8iPLY0pw==
 
+"@esbuild/aix-ppc64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.20.0.tgz#509621cca4e67caf0d18561a0c56f8b70237472f"
+  integrity sha512-fGFDEctNh0CcSwsiRPxiaqX0P5rq+AqE0SRhYGZ4PX46Lg1FNR6oCxJghf8YgY0WQEgQuh3lErUFE4KxLeRmmw==
+
 "@esbuild/android-arm64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.17.19.tgz#bafb75234a5d3d1b690e7c2956a599345e84a2fd"
   integrity sha512-KBMWvEZooR7+kzY0BtbTQn0OAYY7CsiydT63pVEaPtVYF0hXbUaOyZog37DKxK7NF3XacBJOpYT4adIJh+avxA==
 
-"@esbuild/android-arm64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.2.tgz#bc35990f412a749e948b792825eef7df0ce0e073"
-  integrity sha512-lsB65vAbe90I/Qe10OjkmrdxSX4UJDjosDgb8sZUKcg3oefEuW2OT2Vozz8ef7wrJbMcmhvCC+hciF8jY/uAkw==
+"@esbuild/android-arm64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.20.0.tgz#109a6fdc4a2783fc26193d2687827045d8fef5ab"
+  integrity sha512-aVpnM4lURNkp0D3qPoAzSG92VXStYmoVPOgXveAUoQBWRSuQzt51yvSju29J6AHPmwY1BjH49uR29oyfH1ra8Q==
 
 "@esbuild/android-arm@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.17.19.tgz#5898f7832c2298bc7d0ab53701c57beb74d78b4d"
   integrity sha512-rIKddzqhmav7MSmoFCmDIb6e2W57geRsM94gV2l38fzhXMwq7hZoClug9USI2pFRGL06f4IOPHHpFNOkWieR8A==
 
-"@esbuild/android-arm@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.2.tgz#edd1c8f23ba353c197f5b0337123c58ff2a56999"
-  integrity sha512-tM8yLeYVe7pRyAu9VMi/Q7aunpLwD139EY1S99xbQkT4/q2qa6eA4ige/WJQYdJ8GBL1K33pPFhPfPdJ/WzT8Q==
+"@esbuild/android-arm@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.20.0.tgz#1397a2c54c476c4799f9b9073550ede496c94ba5"
+  integrity sha512-3bMAfInvByLHfJwYPJRlpTeaQA75n8C/QKpEaiS4HrFWFiJlNI0vzq/zCjBrhAYcPyVPG7Eo9dMrcQXuqmNk5g==
 
 "@esbuild/android-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.17.19.tgz#658368ef92067866d95fb268719f98f363d13ae1"
   integrity sha512-uUTTc4xGNDT7YSArp/zbtmbhO0uEEK9/ETW29Wk1thYUJBz3IVnvgEiEwEa9IeLyvnpKrWK64Utw2bgUmDveww==
 
-"@esbuild/android-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.2.tgz#2dcdd6e6f1f2d82ea1b746abd8da5b284960f35a"
-  integrity sha512-qK/TpmHt2M/Hg82WXHRc/W/2SGo/l1thtDHZWqFq7oi24AjZ4O/CpPSu6ZuYKFkEgmZlFoa7CooAyYmuvnaG8w==
+"@esbuild/android-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.20.0.tgz#2b615abefb50dc0a70ac313971102f4ce2fdb3ca"
+  integrity sha512-uK7wAnlRvjkCPzh8jJ+QejFyrP8ObKuR5cBIsQZ+qbMunwR8sbd8krmMbxTLSrDhiPZaJYKQAU5Y3iMDcZPhyQ==
 
 "@esbuild/darwin-arm64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.17.19.tgz#584c34c5991b95d4d48d333300b1a4e2ff7be276"
   integrity sha512-80wEoCfF/hFKM6WE1FyBHc9SfUblloAWx6FJkFWTWiCoht9Mc0ARGEM47e67W9rI09YoUxJL68WHfDRYEAvOhg==
 
-"@esbuild/darwin-arm64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.2.tgz#55b36bc06d76f5c243987c1f93a11a80d8fc3b26"
-  integrity sha512-Ora8JokrvrzEPEpZO18ZYXkH4asCdc1DLdcVy8TGf5eWtPO1Ie4WroEJzwI52ZGtpODy3+m0a2yEX9l+KUn0tA==
+"@esbuild/darwin-arm64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.20.0.tgz#5c122ed799eb0c35b9d571097f77254964c276a2"
+  integrity sha512-AjEcivGAlPs3UAcJedMa9qYg9eSfU6FnGHJjT8s346HSKkrcWlYezGE8VaO2xKfvvlZkgAhyvl06OJOxiMgOYQ==
 
 "@esbuild/darwin-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.17.19.tgz#7751d236dfe6ce136cce343dce69f52d76b7f6cb"
   integrity sha512-IJM4JJsLhRYr9xdtLytPLSH9k/oxR3boaUIYiHkAawtwNOXKE8KoU8tMvryogdcT8AU+Bflmh81Xn6Q0vTZbQw==
 
-"@esbuild/darwin-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.2.tgz#982524af33a6424a3b5cb44bbd52559623ad719c"
-  integrity sha512-tP+B5UuIbbFMj2hQaUr6EALlHOIOmlLM2FK7jeFBobPy2ERdohI4Ka6ZFjZ1ZYsrHE/hZimGuU90jusRE0pwDw==
+"@esbuild/darwin-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.20.0.tgz#9561d277002ba8caf1524f209de2b22e93d170c1"
+  integrity sha512-bsgTPoyYDnPv8ER0HqnJggXK6RyFy4PH4rtsId0V7Efa90u2+EifxytE9pZnsDgExgkARy24WUQGv9irVbTvIw==
 
 "@esbuild/freebsd-arm64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.17.19.tgz#cacd171665dd1d500f45c167d50c6b7e539d5fd2"
   integrity sha512-pBwbc7DufluUeGdjSU5Si+P3SoMF5DQ/F/UmTSb8HXO80ZEAJmrykPyzo1IfNbAoaqw48YRpv8shwd1NoI0jcQ==
 
-"@esbuild/freebsd-arm64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.2.tgz#8e478a0856645265fe79eac4b31b52193011ee06"
-  integrity sha512-YbPY2kc0acfzL1VPVK6EnAlig4f+l8xmq36OZkU0jzBVHcOTyQDhnKQaLzZudNJQyymd9OqQezeaBgkTGdTGeQ==
+"@esbuild/freebsd-arm64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.20.0.tgz#84178986a3138e8500d17cc380044868176dd821"
+  integrity sha512-kQ7jYdlKS335mpGbMW5tEe3IrQFIok9r84EM3PXB8qBFJPSc6dpWfrtsC/y1pyrz82xfUIn5ZrnSHQQsd6jebQ==
 
 "@esbuild/freebsd-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.17.19.tgz#0769456eee2a08b8d925d7c00b79e861cb3162e4"
   integrity sha512-4lu+n8Wk0XlajEhbEffdy2xy53dpR06SlzvhGByyg36qJw6Kpfk7cp45DR/62aPH9mtJRmIyrXAS5UWBrJT6TQ==
 
-"@esbuild/freebsd-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.2.tgz#01b96604f2540db023c73809bb8ae6cd1692d6f3"
-  integrity sha512-nSO5uZT2clM6hosjWHAsS15hLrwCvIWx+b2e3lZ3MwbYSaXwvfO528OF+dLjas1g3bZonciivI8qKR/Hm7IWGw==
+"@esbuild/freebsd-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.20.0.tgz#3f9ce53344af2f08d178551cd475629147324a83"
+  integrity sha512-uG8B0WSepMRsBNVXAQcHf9+Ko/Tr+XqmK7Ptel9HVmnykupXdS4J7ovSQUIi0tQGIndhbqWLaIL/qO/cWhXKyQ==
 
 "@esbuild/linux-arm64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.17.19.tgz#38e162ecb723862c6be1c27d6389f48960b68edb"
   integrity sha512-ct1Tg3WGwd3P+oZYqic+YZF4snNl2bsnMKRkb3ozHmnM0dGWuxcPTTntAF6bOP0Sp4x0PjSF+4uHQ1xvxfRKqg==
 
-"@esbuild/linux-arm64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.2.tgz#7e5d2c7864c5c83ec789b59c77cd9c20d2594916"
-  integrity sha512-ig2P7GeG//zWlU0AggA3pV1h5gdix0MA3wgB+NsnBXViwiGgY77fuN9Wr5uoCrs2YzaYfogXgsWZbm+HGr09xg==
+"@esbuild/linux-arm64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.20.0.tgz#24efa685515689df4ecbc13031fa0a9dda910a11"
+  integrity sha512-uTtyYAP5veqi2z9b6Gr0NUoNv9F/rOzI8tOD5jKcCvRUn7T60Bb+42NDBCWNhMjkQzI0qqwXkQGo1SY41G52nw==
 
 "@esbuild/linux-arm@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.17.19.tgz#1a2cd399c50040184a805174a6d89097d9d1559a"
   integrity sha512-cdmT3KxjlOQ/gZ2cjfrQOtmhG4HJs6hhvm3mWSRDPtZ/lP5oe8FWceS10JaSJC13GBd4eH/haHnqf7hhGNLerA==
 
-"@esbuild/linux-arm@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.2.tgz#c32ae97bc0246664a1cfbdb4a98e7b006d7db8ae"
-  integrity sha512-Odalh8hICg7SOD7XCj0YLpYCEc+6mkoq63UnExDCiRA2wXEmGlK5JVrW50vZR9Qz4qkvqnHcpH+OFEggO3PgTg==
+"@esbuild/linux-arm@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.20.0.tgz#6b586a488e02e9b073a75a957f2952b3b6e87b4c"
+  integrity sha512-2ezuhdiZw8vuHf1HKSf4TIk80naTbP9At7sOqZmdVwvvMyuoDiZB49YZKLsLOfKIr77+I40dWpHVeY5JHpIEIg==
 
 "@esbuild/linux-ia32@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.17.19.tgz#e28c25266b036ce1cabca3c30155222841dc035a"
   integrity sha512-w4IRhSy1VbsNxHRQpeGCHEmibqdTUx61Vc38APcsRbuVgK0OPEnQ0YD39Brymn96mOx48Y2laBQGqgZ0j9w6SQ==
 
-"@esbuild/linux-ia32@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.2.tgz#3fc4f0fa026057fe885e4a180b3956e704f1ceaa"
-  integrity sha512-mLfp0ziRPOLSTek0Gd9T5B8AtzKAkoZE70fneiiyPlSnUKKI4lp+mGEnQXcQEHLJAcIYDPSyBvsUbKUG2ri/XQ==
+"@esbuild/linux-ia32@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.20.0.tgz#84ce7864f762708dcebc1b123898a397dea13624"
+  integrity sha512-c88wwtfs8tTffPaoJ+SQn3y+lKtgTzyjkD8NgsyCtCmtoIC8RDL7PrJU05an/e9VuAke6eJqGkoMhJK1RY6z4w==
 
 "@esbuild/linux-loong64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.17.19.tgz#0f887b8bb3f90658d1a0117283e55dbd4c9dcf72"
   integrity sha512-2iAngUbBPMq439a+z//gE+9WBldoMp1s5GWsUSgqHLzLJ9WoZLZhpwWuym0u0u/4XmZ3gpHmzV84PonE+9IIdQ==
 
-"@esbuild/linux-loong64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.2.tgz#633bcaea443f3505fb0ed109ab840c99ad3451a4"
-  integrity sha512-hn28+JNDTxxCpnYjdDYVMNTR3SKavyLlCHHkufHV91fkewpIyQchS1d8wSbmXhs1fiYDpNww8KTFlJ1dHsxeSw==
+"@esbuild/linux-loong64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.20.0.tgz#1922f571f4cae1958e3ad29439c563f7d4fd9037"
+  integrity sha512-lR2rr/128/6svngnVta6JN4gxSXle/yZEZL3o4XZ6esOqhyR4wsKyfu6qXAL04S4S5CgGfG+GYZnjFd4YiG3Aw==
 
 "@esbuild/linux-mips64el@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.17.19.tgz#f5d2a0b8047ea9a5d9f592a178ea054053a70289"
   integrity sha512-LKJltc4LVdMKHsrFe4MGNPp0hqDFA1Wpt3jE1gEyM3nKUvOiO//9PheZZHfYRfYl6AwdTH4aTcXSqBerX0ml4A==
 
-"@esbuild/linux-mips64el@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.2.tgz#e0bff2898c46f52be7d4dbbcca8b887890805823"
-  integrity sha512-KbXaC0Sejt7vD2fEgPoIKb6nxkfYW9OmFUK9XQE4//PvGIxNIfPk1NmlHmMg6f25x57rpmEFrn1OotASYIAaTg==
+"@esbuild/linux-mips64el@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.20.0.tgz#7ca1bd9df3f874d18dbf46af009aebdb881188fe"
+  integrity sha512-9Sycc+1uUsDnJCelDf6ZNqgZQoK1mJvFtqf2MUz4ujTxGhvCWw+4chYfDLPepMEvVL9PDwn6HrXad5yOrNzIsQ==
 
 "@esbuild/linux-ppc64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.17.19.tgz#876590e3acbd9fa7f57a2c7d86f83717dbbac8c7"
   integrity sha512-/c/DGybs95WXNS8y3Ti/ytqETiW7EU44MEKuCAcpPto3YjQbyK3IQVKfF6nbghD7EcLUGl0NbiL5Rt5DMhn5tg==
 
-"@esbuild/linux-ppc64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.2.tgz#d75798da391f54a9674f8c143b9a52d1dbfbfdde"
-  integrity sha512-dJ0kE8KTqbiHtA3Fc/zn7lCd7pqVr4JcT0JqOnbj4LLzYnp+7h8Qi4yjfq42ZlHfhOCM42rBh0EwHYLL6LEzcw==
+"@esbuild/linux-ppc64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.20.0.tgz#8f95baf05f9486343bceeb683703875d698708a4"
+  integrity sha512-CoWSaaAXOZd+CjbUTdXIJE/t7Oz+4g90A3VBCHLbfuc5yUQU/nFDLOzQsN0cdxgXd97lYW/psIIBdjzQIwTBGw==
 
 "@esbuild/linux-riscv64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.17.19.tgz#7f49373df463cd9f41dc34f9b2262d771688bf09"
   integrity sha512-FC3nUAWhvFoutlhAkgHf8f5HwFWUL6bYdvLc/TTuxKlvLi3+pPzdZiFKSWz/PF30TB1K19SuCxDTI5KcqASJqA==
 
-"@esbuild/linux-riscv64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.2.tgz#012409bd489ed1bb9b775541d4a46c5ded8e6dd8"
-  integrity sha512-7Z/jKNFufZ/bbu4INqqCN6DDlrmOTmdw6D0gH+6Y7auok2r02Ur661qPuXidPOJ+FSgbEeQnnAGgsVynfLuOEw==
+"@esbuild/linux-riscv64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.20.0.tgz#ca63b921d5fe315e28610deb0c195e79b1a262ca"
+  integrity sha512-mlb1hg/eYRJUpv8h/x+4ShgoNLL8wgZ64SUr26KwglTYnwAWjkhR2GpoKftDbPOCnodA9t4Y/b68H4J9XmmPzA==
 
 "@esbuild/linux-s390x@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.17.19.tgz#e2afd1afcaf63afe2c7d9ceacd28ec57c77f8829"
   integrity sha512-IbFsFbxMWLuKEbH+7sTkKzL6NJmG2vRyy6K7JJo55w+8xDk7RElYn6xvXtDW8HCfoKBFK69f3pgBJSUSQPr+4Q==
 
-"@esbuild/linux-s390x@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.2.tgz#ece3ed75c5a150de8a5c110f02e97d315761626b"
-  integrity sha512-U+RinR6aXXABFCcAY4gSlv4CL1oOVvSSCdseQmGO66H+XyuQGZIUdhG56SZaDJQcLmrSfRmx5XZOWyCJPRqS7g==
+"@esbuild/linux-s390x@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.20.0.tgz#cb3d069f47dc202f785c997175f2307531371ef8"
+  integrity sha512-fgf9ubb53xSnOBqyvWEY6ukBNRl1mVX1srPNu06B6mNsNK20JfH6xV6jECzrQ69/VMiTLvHMicQR/PgTOgqJUQ==
 
 "@esbuild/linux-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.17.19.tgz#8a0e9738b1635f0c53389e515ae83826dec22aa4"
   integrity sha512-68ngA9lg2H6zkZcyp22tsVt38mlhWde8l3eJLWkyLrp4HwMUr3c1s/M2t7+kHIhvMjglIBrFpncX1SzMckomGw==
 
-"@esbuild/linux-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.2.tgz#dea187019741602d57aaf189a80abba261fbd2aa"
-  integrity sha512-oxzHTEv6VPm3XXNaHPyUTTte+3wGv7qVQtqaZCrgstI16gCuhNOtBXLEBkBREP57YTd68P0VgDgG73jSD8bwXQ==
+"@esbuild/linux-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.20.0.tgz#ac617e0dc14e9758d3d7efd70288c14122557dc7"
+  integrity sha512-H9Eu6MGse++204XZcYsse1yFHmRXEWgadk2N58O/xd50P9EvFMLJTQLg+lB4E1cF2xhLZU5luSWtGTb0l9UeSg==
 
 "@esbuild/netbsd-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.17.19.tgz#c29fb2453c6b7ddef9a35e2c18b37bda1ae5c462"
   integrity sha512-CwFq42rXCR8TYIjIfpXCbRX0rp1jo6cPIUPSaWwzbVI4aOfX96OXY8M6KNmtPcg7QjYeDmN+DD0Wp3LaBOLf4Q==
 
-"@esbuild/netbsd-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.2.tgz#bbfd7cf9ab236a23ee3a41b26f0628c57623d92a"
-  integrity sha512-WNa5zZk1XpTTwMDompZmvQLHszDDDN7lYjEHCUmAGB83Bgs20EMs7ICD+oKeT6xt4phV4NDdSi/8OfjPbSbZfQ==
+"@esbuild/netbsd-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.20.0.tgz#6cc778567f1513da6e08060e0aeb41f82eb0f53c"
+  integrity sha512-lCT675rTN1v8Fo+RGrE5KjSnfY0x9Og4RN7t7lVrN3vMSjy34/+3na0q7RIfWDAj0e0rCh0OL+P88lu3Rt21MQ==
 
 "@esbuild/openbsd-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.17.19.tgz#95e75a391403cb10297280d524d66ce04c920691"
   integrity sha512-cnq5brJYrSZ2CF6c35eCmviIN3k3RczmHz8eYaVlNasVqsNY+JKohZU5MKmaOI+KkllCdzOKKdPs762VCPC20g==
 
-"@esbuild/openbsd-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.2.tgz#fa5c4c6ee52a360618f00053652e2902e1d7b4a7"
-  integrity sha512-S6kI1aT3S++Dedb7vxIuUOb3oAxqxk2Rh5rOXOTYnzN8JzW1VzBd+IqPiSpgitu45042SYD3HCoEyhLKQcDFDw==
+"@esbuild/openbsd-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.20.0.tgz#76848bcf76b4372574fb4d06cd0ed1fb29ec0fbe"
+  integrity sha512-HKoUGXz/TOVXKQ+67NhxyHv+aDSZf44QpWLa3I1lLvAwGq8x1k0T+e2HHSRvxWhfJrFxaaqre1+YyzQ99KixoA==
 
 "@esbuild/sunos-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.17.19.tgz#722eaf057b83c2575937d3ffe5aeb16540da7273"
   integrity sha512-vCRT7yP3zX+bKWFeP/zdS6SqdWB8OIpaRq/mbXQxTGHnIxspRtigpkUcDMlSCOejlHowLqII7K2JKevwyRP2rg==
 
-"@esbuild/sunos-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.2.tgz#52a2ac8ac6284c02d25df22bb4cfde26fbddd68d"
-  integrity sha512-VXSSMsmb+Z8LbsQGcBMiM+fYObDNRm8p7tkUDMPG/g4fhFX5DEFmjxIEa3N8Zr96SjsJ1woAhF0DUnS3MF3ARw==
+"@esbuild/sunos-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.20.0.tgz#ea4cd0639bf294ad51bc08ffbb2dac297e9b4706"
+  integrity sha512-GDwAqgHQm1mVoPppGsoq4WJwT3vhnz/2N62CzhvApFD1eJyTroob30FPpOZabN+FgCjhG+AgcZyOPIkR8dfD7g==
 
 "@esbuild/win32-arm64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.17.19.tgz#9aa9dc074399288bdcdd283443e9aeb6b9552b6f"
   integrity sha512-yYx+8jwowUstVdorcMdNlzklLYhPxjniHWFKgRqH7IFlUEa0Umu3KuYplf1HUZZ422e3NU9F4LGb+4O0Kdcaag==
 
-"@esbuild/win32-arm64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.2.tgz#719ed5870855de8537aef8149694a97d03486804"
-  integrity sha512-5NayUlSAyb5PQYFAU9x3bHdsqB88RC3aM9lKDAz4X1mo/EchMIT1Q+pSeBXNgkfNmRecLXA0O8xP+x8V+g/LKg==
+"@esbuild/win32-arm64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.20.0.tgz#a5c171e4a7f7e4e8be0e9947a65812c1535a7cf0"
+  integrity sha512-0vYsP8aC4TvMlOQYozoksiaxjlvUcQrac+muDqj1Fxy6jh9l9CZJzj7zmh8JGfiV49cYLTorFLxg7593pGldwQ==
 
 "@esbuild/win32-ia32@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.17.19.tgz#95ad43c62ad62485e210f6299c7b2571e48d2b03"
   integrity sha512-eggDKanJszUtCdlVs0RB+h35wNlb5v4TWEkq4vZcmVt5u/HiDZrTXe2bWFQUez3RgNHwx/x4sk5++4NSSicKkw==
 
-"@esbuild/win32-ia32@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.2.tgz#24832223880b0f581962c8660f8fb8797a1e046a"
-  integrity sha512-47gL/ek1v36iN0wL9L4Q2MFdujR0poLZMJwhO2/N3gA89jgHp4MR8DKCmwYtGNksbfJb9JoTtbkoe6sDhg2QTA==
+"@esbuild/win32-ia32@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.20.0.tgz#f8ac5650c412d33ea62d7551e0caf82da52b7f85"
+  integrity sha512-p98u4rIgfh4gdpV00IqknBD5pC84LCub+4a3MO+zjqvU5MVXOc3hqR2UgT2jI2nh3h8s9EQxmOsVI3tyzv1iFg==
 
 "@esbuild/win32-x64@0.17.19":
   version "0.17.19"
   resolved "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.17.19.tgz#8cfaf2ff603e9aabb910e9c0558c26cf32744061"
   integrity sha512-lAhycmKnVOuRYNtRtatQR1LPQf2oYCkRGkSFnseDAKPl8lu5SOsK/e1sXe5a0Pc5kHIHe6P2I/ilntNv2xf3cA==
 
-"@esbuild/win32-x64@0.19.2":
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.2.tgz#1205014625790c7ff0e471644a878a65d1e34ab0"
-  integrity sha512-tcuhV7ncXBqbt/Ybf0IyrMcwVOAPDckMK9rXNHtF17UTK18OKLpg08glminN06pt2WCoALhXdLfSPbVvK/6fxw==
+"@esbuild/win32-x64@0.20.0":
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.20.0.tgz#2efddf82828aac85e64cef62482af61c29561bee"
+  integrity sha512-NgJnesu1RtWihtTtXGFMU5YSE6JyyHPMxCwBZK7a6/8d31GuSo9l0Ss7w1Jw5QnKUawG6UEehs883kcXf5fYwg==
 
 "@eslint-community/eslint-utils@^4.2.0", "@eslint-community/eslint-utils@^4.4.0":
   version "4.4.0"
@@ -2888,33 +2893,34 @@ esbuild-plugin-alias@0.2.1:
   resolved "https://registry.npmjs.org/esbuild-plugin-alias/-/esbuild-plugin-alias-0.2.1.tgz#45a86cb941e20e7c2bc68a2bea53562172494fcb"
   integrity sha512-jyfL/pwPqaFXyKnj8lP8iLk6Z0m099uXR45aSN8Av1XD4vhvQutxxPzgA2bTcAwQpa1zCXDcWOlhFgyP3GKqhQ==
 
-esbuild@0.19.2:
-  version "0.19.2"
-  resolved "https://registry.npmjs.org/esbuild/-/esbuild-0.19.2.tgz#b1541828a89dfb6f840d38538767c6130dca2aac"
-  integrity sha512-G6hPax8UbFakEj3hWO0Vs52LQ8k3lnBhxZWomUJDxfz3rZTLqF5k/FCzuNdLx2RbpBiQQF9H9onlDDH1lZsnjg==
+esbuild@0.20.0:
+  version "0.20.0"
+  resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.20.0.tgz#a7170b63447286cd2ff1f01579f09970e6965da4"
+  integrity sha512-6iwE3Y2RVYCME1jLpBqq7LQWK3MW6vjV2bZy6gt/WrqkY+WE74Spyc0ThAOYpMtITvnjX09CrC6ym7A/m9mebA==
   optionalDependencies:
-    "@esbuild/android-arm" "0.19.2"
-    "@esbuild/android-arm64" "0.19.2"
-    "@esbuild/android-x64" "0.19.2"
-    "@esbuild/darwin-arm64" "0.19.2"
-    "@esbuild/darwin-x64" "0.19.2"
-    "@esbuild/freebsd-arm64" "0.19.2"
-    "@esbuild/freebsd-x64" "0.19.2"
-    "@esbuild/linux-arm" "0.19.2"
-    "@esbuild/linux-arm64" "0.19.2"
-    "@esbuild/linux-ia32" "0.19.2"
-    "@esbuild/linux-loong64" "0.19.2"
-    "@esbuild/linux-mips64el" "0.19.2"
-    "@esbuild/linux-ppc64" "0.19.2"
-    "@esbuild/linux-riscv64" "0.19.2"
-    "@esbuild/linux-s390x" "0.19.2"
-    "@esbuild/linux-x64" "0.19.2"
-    "@esbuild/netbsd-x64" "0.19.2"
-    "@esbuild/openbsd-x64" "0.19.2"
-    "@esbuild/sunos-x64" "0.19.2"
-    "@esbuild/win32-arm64" "0.19.2"
-    "@esbuild/win32-ia32" "0.19.2"
-    "@esbuild/win32-x64" "0.19.2"
+    "@esbuild/aix-ppc64" "0.20.0"
+    "@esbuild/android-arm" "0.20.0"
+    "@esbuild/android-arm64" "0.20.0"
+    "@esbuild/android-x64" "0.20.0"
+    "@esbuild/darwin-arm64" "0.20.0"
+    "@esbuild/darwin-x64" "0.20.0"
+    "@esbuild/freebsd-arm64" "0.20.0"
+    "@esbuild/freebsd-x64" "0.20.0"
+    "@esbuild/linux-arm" "0.20.0"
+    "@esbuild/linux-arm64" "0.20.0"
+    "@esbuild/linux-ia32" "0.20.0"
+    "@esbuild/linux-loong64" "0.20.0"
+    "@esbuild/linux-mips64el" "0.20.0"
+    "@esbuild/linux-ppc64" "0.20.0"
+    "@esbuild/linux-riscv64" "0.20.0"
+    "@esbuild/linux-s390x" "0.20.0"
+    "@esbuild/linux-x64" "0.20.0"
+    "@esbuild/netbsd-x64" "0.20.0"
+    "@esbuild/openbsd-x64" "0.20.0"
+    "@esbuild/sunos-x64" "0.20.0"
+    "@esbuild/win32-arm64" "0.20.0"
+    "@esbuild/win32-ia32" "0.20.0"
+    "@esbuild/win32-x64" "0.20.0"
 
 esbuild@^0.17.11:
   version "0.17.19"

From a88e9f62f371e87ac34a29305dc87a82d227ff30 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 1 Feb 2024 13:34:14 -0500
Subject: [PATCH 296/570] MINOR: [JS] Bump regenerator-runtime from 0.14.0 to
 0.14.1 in /js (#39889)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [regenerator-runtime](https://github.com/facebook/regenerator)
from 0.14.0 to 0.14.1.
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/facebook/regenerator/commit/6be1e6295ce1577142c44863175d5180d91f8f00"><code>6be1e62</code></a>
Bump regenerator-runtime version to 0.14.1.</li>
<li><a
href="https://github.com/facebook/regenerator/commit/f4425fb231762a75493cf8fd6a644bcae5aa13ef"><code>f4425fb</code></a>
Bump <code>@​babel/parser</code> from 7.23.0 to 7.23.6 (<a
href="https://redirect.github.com/facebook/regenerator/issues/712">#712</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/fc6b8442118b855f7ce9da344b3f8afdc0df4582"><code>fc6b844</code></a>
Fix outdated reference link in <code>runtime.js</code> (<a
href="https://redirect.github.com/facebook/regenerator/issues/704">#704</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/853925fdf75dcddd484676000c2e1bc025783f05"><code>853925f</code></a>
Bump <code>@​babel/traverse</code> from 7.22.10 to 7.23.2 (<a
href="https://redirect.github.com/facebook/regenerator/issues/707">#707</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/72fbbb7c626af01746f692041e0f04accdd90300"><code>72fbbb7</code></a>
Bump <code>@​babel/runtime</code> from 7.22.5 to 7.23.2 (<a
href="https://redirect.github.com/facebook/regenerator/issues/706">#706</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/56cf3ad6c613f896914a657a997e9ffd881ba012"><code>56cf3ad</code></a>
Bump mocha from 10.1.0 to 10.2.0 (<a
href="https://redirect.github.com/facebook/regenerator/issues/685">#685</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/34f2fc1aef5721bb6ca0ebe383b27c13a6308ba3"><code>34f2fc1</code></a>
Bump debug from 4.1.1 to 4.3.4 (<a
href="https://redirect.github.com/facebook/regenerator/issues/705">#705</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/1037417e5f6f8b3556b00a180c6e4376532336d2"><code>1037417</code></a>
Bump recast from 0.23.3 to 0.23.4 (<a
href="https://redirect.github.com/facebook/regenerator/issues/689">#689</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/d66afa43bce3c88343108a407d9eb17443d21399"><code>d66afa4</code></a>
Bump browserify-sign from 4.2.1 to 4.2.2 (<a
href="https://redirect.github.com/facebook/regenerator/issues/708">#708</a>)</li>
<li><a
href="https://github.com/facebook/regenerator/commit/50ed6aaee1a9b4d176e2255f8caba833d1dac31b"><code>50ed6aa</code></a>
Fix yield * of falsy values (<a
href="https://redirect.github.com/facebook/regenerator/issues/711">#711</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/facebook/regenerator/compare/regenerator-runtime@0.14.0...regenerator-runtime@0.14.1">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=regenerator-runtime&package-manager=npm_and_yarn&previous-version=0.14.0&new-version=0.14.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 js/package.json | 2 +-
 js/yarn.lock    | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/js/package.json b/js/package.json
index f96764d82245e..bb70fd0a395b0 100644
--- a/js/package.json
+++ b/js/package.json
@@ -102,7 +102,7 @@
     "memfs": "4.5.0",
     "mkdirp": "3.0.1",
     "multistream": "4.1.0",
-    "regenerator-runtime": "0.14.0",
+    "regenerator-runtime": "0.14.1",
     "rollup": "4.3.0",
     "rxjs": "7.8.1",
     "ts-jest": "29.1.1",
diff --git a/js/yarn.lock b/js/yarn.lock
index e7dead09bf8bb..7b3180740d3da 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -5967,10 +5967,10 @@ redent@^4.0.0:
     indent-string "^5.0.0"
     strip-indent "^4.0.0"
 
-regenerator-runtime@0.14.0:
-  version "0.14.0"
-  resolved "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.0.tgz#5e19d68eb12d486f797e15a3c6a918f7cec5eb45"
-  integrity sha512-srw17NI0TUWHuGa5CFGGmhfNIeja30WMBfbslPNhf6JrqQlLN5gcrvig1oqPxiVaXb0oW0XRKtH6Nngs5lKCIA==
+regenerator-runtime@0.14.1:
+  version "0.14.1"
+  resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz#356ade10263f685dda125100cd862c1db895327f"
+  integrity sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==
 
 regex-not@^1.0.0, regex-not@^1.0.2:
   version "1.0.2"

From 796b0cc0ad0509502f5419d379225e6168e2bb06 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Fri, 2 Feb 2024 16:49:59 +0900
Subject: [PATCH 297/570] GH-39872: [Packaging][Ubuntu] Add support for Ubuntu
 24.04 Noble Numbat (#39887)

### Rationale for this change

Ubuntu 24.04 isn't released yet but it seems that Docker image is already available.

### What changes are included in this PR?

Add jobs for Ubuntu 24.04.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39872

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/binary-task.rb                    |  7 +-
 dev/release/verify-release-candidate.sh       |  4 +-
 .../apt/ubuntu-noble/Dockerfile               | 41 +++++++++
 .../apache-arrow/apt/ubuntu-noble-arm64/from  | 18 ++++
 .../apache-arrow/apt/ubuntu-noble/Dockerfile  | 85 +++++++++++++++++++
 dev/tasks/linux-packages/package-task.rb      |  2 +
 dev/tasks/tasks.yml                           |  3 +-
 7 files changed, 156 insertions(+), 4 deletions(-)
 create mode 100644 dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-noble/Dockerfile
 create mode 100644 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble-arm64/from
 create mode 100644 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile

diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index df6c0778dc805..0c1b98ab32c95 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -1089,6 +1089,7 @@ def available_apt_targets
       ["ubuntu", "focal", "main"],
       ["ubuntu", "jammy", "main"],
       ["ubuntu", "mantic", "main"],
+      ["ubuntu", "noble", "main"],
     ]
   end
 
@@ -2121,8 +2122,10 @@ def apt_test_targets_default
       # "ubuntu-focal-arm64",
       "ubuntu-jammy",
       # "ubuntu-jammy-arm64",
-      "ubuntu-lunar",
-      # "ubuntu-lunar-arm64",
+      "ubuntu-mantic",
+      # "ubuntu-mantic-arm64",
+      "ubuntu-noble",
+      # "ubuntu-noble-arm64",
     ]
   end
 
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 04fc7fd563f65..a61b5ba094c8a 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -196,7 +196,9 @@ test_apt() {
                 "ubuntu:jammy" \
                 "arm64v8/ubuntu:jammy" \
                 "ubuntu:mantic" \
-                "arm64v8/ubuntu:mantic"; do \
+                "arm64v8/ubuntu:mantic" \
+                "ubuntu:noble" \
+                "arm64v8/ubuntu:noble"; do \
     case "${target}" in
       arm64v8/*)
         if [ "$(arch)" = "aarch64" -o -e /usr/bin/qemu-aarch64-static ]; then
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-noble/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-noble/Dockerfile
new file mode 100644
index 0000000000000..0e37ee94bb0a3
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-noble/Dockerfile
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM ubuntu:noble
+
+RUN \
+  echo "debconf debconf/frontend select Noninteractive" | \
+    debconf-set-selections
+
+RUN \
+  echo 'APT::Install-Recommends "false";' > \
+    /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+
+RUN \
+  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+  apt update ${quiet} && \
+  apt install -y -V ${quiet} \
+    build-essential \
+    debhelper \
+    devscripts \
+    fakeroot \
+    gnupg \
+    lsb-release && \
+  apt clean && \
+  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble-arm64/from b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble-arm64/from
new file mode 100644
index 0000000000000..4414c353871c6
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble-arm64/from
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arm64v8/ubuntu:noble
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
new file mode 100644
index 0000000000000..33f2d9a35371b
--- /dev/null
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG FROM=ubuntu:noble
+FROM ${FROM}
+
+RUN \
+  echo "debconf debconf/frontend select Noninteractive" | \
+    debconf-set-selections
+
+RUN \
+  echo 'APT::Install-Recommends "false";' > \
+    /etc/apt/apt.conf.d/disable-install-recommends
+
+ARG DEBUG
+RUN \
+  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
+  apt update ${quiet} && \
+  apt install -y -V ${quiet} \
+    build-essential \
+    clang \
+    clang-tools \
+    cmake \
+    debhelper \
+    devscripts \
+    git \
+    gtk-doc-tools \
+    libboost-filesystem-dev \
+    libboost-system-dev \
+    libbrotli-dev \
+    libbz2-dev \
+    libc-ares-dev \
+    libcurl4-openssl-dev \
+    libgirepository1.0-dev \
+    libglib2.0-doc \
+    libgmock-dev \
+    libgoogle-glog-dev \
+    libgrpc++-dev \
+    libgtest-dev \
+    liblz4-dev \
+    libmlir-15-dev \
+    libprotobuf-dev \
+    libprotoc-dev \
+    libre2-dev \
+    libsnappy-dev \
+    libssl-dev \
+    libthrift-dev \
+    libutf8proc-dev \
+    libzstd-dev \
+    llvm-dev \
+    lsb-release \
+    meson \
+    mlir-15-tools \
+    ninja-build \
+    nlohmann-json3-dev \
+    pkg-config \
+    protobuf-compiler-grpc \
+    python3-dev \
+    python3-pip \
+    python3-setuptools \
+    rapidjson-dev \
+    tzdata \
+    valac \
+    zlib1g-dev && \
+  if apt list | grep -q '^libcuda'; then \
+    apt install -y -V ${quiet} nvidia-cuda-toolkit; \
+  else \
+    :; \
+  fi && \
+  apt clean && \
+  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/package-task.rb b/dev/tasks/linux-packages/package-task.rb
index ecd61054daeb1..51fe0b9a75b0c 100644
--- a/dev/tasks/linux-packages/package-task.rb
+++ b/dev/tasks/linux-packages/package-task.rb
@@ -279,6 +279,8 @@ def apt_targets_default
       # "ubuntu-jammy-arm64",
       "ubuntu-mantic",
       # "ubuntu-mantic-arm64",
+      "ubuntu-noble",
+      # "ubuntu-noble-arm64",
     ]
   end
 
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 6c59364d51a50..0f8c58391fa66 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -465,7 +465,8 @@ tasks:
                   "debian-trixie",
                   "ubuntu-focal",
                   "ubuntu-jammy",
-                  "ubuntu-mantic"] %}
+                  "ubuntu-mantic",
+                  "ubuntu-noble"] %}
   {% for architecture in ["amd64", "arm64"] %}
   {{ target }}-{{ architecture }}:
     ci: github

From 129a5291a26e2baa91d98d1910cb2128854e6b60 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 2 Feb 2024 11:26:57 +0100
Subject: [PATCH 298/570] GH-39788: [Python] Validate max_chunksize in
 Table.to_batches (#39796)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Validating the keyword to be strictly positive, to avoid an infinite loop.

* Closes: #39788

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 python/pyarrow/table.pxi           | 2 ++
 python/pyarrow/tests/test_table.py | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 3c450d61a7659..abda784fb7c18 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -4172,6 +4172,8 @@ cdef class Table(_Tabular):
         reader.reset(new TableBatchReader(deref(self.table)))
 
         if max_chunksize is not None:
+            if not max_chunksize > 0:
+                raise ValueError("'max_chunksize' should be strictly positive")
             c_max_chunksize = max_chunksize
             reader.get().set_chunksize(c_max_chunksize)
 
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index ff38c614c251f..d6def54570581 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -1089,6 +1089,9 @@ def test_table_to_batches():
     table_from_iter = pa.Table.from_batches(iter([batch1, batch2, batch1]))
     assert table.equals(table_from_iter)
 
+    with pytest.raises(ValueError):
+        table.to_batches(max_chunksize=0)
+
 
 def test_table_basics():
     data = [

From 90b30fcbfdfe12fa9ed497c3fa1cfe682b50168f Mon Sep 17 00:00:00 2001
From: Lyndon Shi <9373058+lynshi@users.noreply.github.com>
Date: Fri, 2 Feb 2024 07:15:57 -0800
Subject: [PATCH 299/570] MINOR: [C++][Docs] Fix MapBuilder docstring (#39755)

The [current `MapBuilder` documentation](https://arrow.apache.org/docs/cpp/api/builder.html#_CPPv4N5arrow10MapBuilderE) says:
> To use this class, you must append values to the key and item array builders and use the Append function to delimit each distinct map (once the keys and items have been appended)

This contradicts the [docstring for `Append`](https://arrow.apache.org/docs/cpp/api/builder.html#_CPPv4N5arrow10MapBuilder6AppendEv):
> This function should be called before beginning to append elements to the key and item builders

The `Append` documentation is correct; it should be called *before* keys and items have been appended. If `Append` is called after, as the `MapBuilder` docstring suggests, `Finish` results in an empty `Array`.

### What changes are included in this PR?
Documentation only change.

### Are these changes tested?
There are no behavior changes.

### Are there any user-facing changes?
No

Authored-by: Lyndon Shi <9373058+lynshi@users.noreply.github.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/array/builder_nested.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h
index 8065752f3e278..429aa5c0488cd 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -515,10 +515,9 @@ class ARROW_EXPORT LargeListViewBuilder final
 /// \class MapBuilder
 /// \brief Builder class for arrays of variable-size maps
 ///
-/// To use this class, you must append values to the key and item array builders
-/// and use the Append function to delimit each distinct map (once the keys and items
-/// have been appended) or use the bulk API to append a sequence of offsets and null
-/// maps.
+/// To use this class, you must use the Append function to delimit each distinct
+/// map before appending values to the key and item array builders, or use the
+/// bulk API to append a sequence of offsets and null maps.
 ///
 /// Key uniqueness and ordering are not validated.
 class ARROW_EXPORT MapBuilder : public ArrayBuilder {

From 32bd01fa64b275937ca90aa50b11f275eeefde94 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Sat, 3 Feb 2024 03:54:47 +0800
Subject: [PATCH 300/570] GH-39843: [C++][Parquet] Parquet binary length
 overflow exception should contain the length of binary (#39844)

### Rationale for this change

See https://github.com/apache/arrow/issues/39843

It will be great to contain a string length in decoder.

### What changes are included in this PR?

change the logging of encoding

### Are these changes tested?

no

### Are there any user-facing changes?

more specific error logging?

* Closes: #39843

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/parquet/column_writer.cc |  3 ++-
 cpp/src/parquet/encoding.cc      | 18 ++++++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index 23366b2daafd5..eae8fc6125499 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -442,7 +442,8 @@ class SerializedPageWriter : public PageWriter {
     if (offset_index_builder_ != nullptr) {
       const int64_t compressed_size = output_data_len + header_size;
       if (compressed_size > std::numeric_limits<int32_t>::max()) {
-        throw ParquetException("Compressed page size overflows INT32_MAX.");
+        throw ParquetException("Compressed page size ", compressed_size,
+                               " overflows INT32_MAX.");
       }
       if (!page.first_row_index().has_value()) {
         throw ParquetException("First row index is not set in data page.");
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 5573f5b9aed4c..a3d1746536647 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -160,7 +160,8 @@ class PlainEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
         *array.data(),
         [&](::std::string_view view) {
           if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
           }
           UnsafePutByteArray(view.data(), static_cast<uint32_t>(view.size()));
           return Status::OK();
@@ -571,7 +572,8 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
         *array.data(),
         [&](::std::string_view view) {
           if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
           }
           PutByteArray(view.data(), static_cast<uint32_t>(view.size()));
           return Status::OK();
@@ -585,7 +587,8 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
     for (int64_t i = 0; i < array.length(); i++) {
       auto v = array.GetView(i);
       if (ARROW_PREDICT_FALSE(v.size() > kMaxByteArraySize)) {
-        throw ParquetException("Parquet cannot store strings with size 2GB or more");
+        throw ParquetException(
+            "Parquet cannot store strings with size 2GB or more, got: ", v.size());
       }
       dict_encoded_size_ += static_cast<int>(v.size() + sizeof(uint32_t));
       int32_t unused_memo_index;
@@ -2671,7 +2674,8 @@ class DeltaLengthByteArrayEncoder : public EncoderImpl,
         *array.data(),
         [&](::std::string_view view) {
           if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
           }
           length_encoder_.Put({static_cast<int32_t>(view.length())}, 1);
           PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length()));
@@ -3200,7 +3204,8 @@ class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder<DT
         *array.data(),
         [&](::std::string_view view) {
           if (ARROW_PREDICT_FALSE(view.size() >= kMaxByteArraySize)) {
-            return Status::Invalid("Parquet cannot store strings with size 2GB or more");
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
           }
           const ByteArray src{view};
 
@@ -3246,7 +3251,8 @@ struct ByteArrayVisitor {
 
   std::string_view operator[](int i) const {
     if (ARROW_PREDICT_FALSE(src[i].len >= kMaxByteArraySize)) {
-      throw ParquetException("Parquet cannot store strings with size 2GB or more");
+      throw ParquetException("Parquet cannot store strings with size 2GB or more, got: ",
+                             src[i].len);
     }
     return std::string_view{src[i]};
   }

From 0fb00fdea7a9541ac8df8a4f784af1dfd0adb056 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com>
Date: Sat, 3 Feb 2024 01:45:40 +0530
Subject: [PATCH 301/570] GH-39734: [Java] Bump
 org.codehaus.mojo:exec-maven-plugin from 1.6.0 to 3.1.1 (#39696)

### Rationale for this change

This PR was created to replace https://github.com/apache/arrow/pull/39374 and do the necessary changes for `org.codehaus.mojo` upgrade to take place.

### What changes are included in this PR?

The changes to the `org.codehaus.mojo` version and an upgrade on the maven version used in the `.env`.

### Are these changes tested?

Tested locally, but this requires a CI verification on Java.

### Are there any user-facing changes?

No

Authored-by: vibhatha <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .env                     |  2 +-
 docker-compose.yml       | 12 +++---------
 java/performance/pom.xml |  2 +-
 java/pom.xml             |  6 +++---
 4 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/.env b/.env
index 6746892fd4ed8..427a4ab0bf398 100644
--- a/.env
+++ b/.env
@@ -65,7 +65,7 @@ JDK=8
 KARTOTHEK=latest
 # LLVM 12 and GCC 11 reports -Wmismatched-new-delete.
 LLVM=14
-MAVEN=3.5.4
+MAVEN=3.6.3
 NODE=18
 NUMBA=latest
 NUMPY=latest
diff --git a/docker-compose.yml b/docker-compose.yml
index a08345c198fa0..0252c4ec8a896 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1709,9 +1709,7 @@ services:
         arch: ${ARCH}
         # Use a newer JDK as it seems to improve stability
         jdk: 17
-        # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should
-        # be set to ${MAVEN}
-        maven: 3.5
+        maven: ${MAVEN}
         node: ${NODE}
         go: ${GO}
     volumes: *conda-volumes
@@ -1843,9 +1841,7 @@ services:
         arch: ${ARCH}
         python: ${PYTHON}
         jdk: ${JDK}
-        # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should
-        # be set to ${MAVEN}
-        maven: 3.5
+        maven: ${MAVEN}
         hdfs: ${HDFS}
     links:
       - impala:impala
@@ -1886,9 +1882,7 @@ services:
         arch: ${ARCH}
         python: ${PYTHON}
         jdk: ${JDK}
-        # conda-forge doesn't have 3.5.4 so pinning explicitly, but this should
-        # be set to ${MAVEN}
-        maven: 3.5
+        maven: ${MAVEN}
         spark: ${SPARK}
         numpy: ${NUMPY}
     shm_size: *shm-size
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index a1d53171f549b..ba5a6616dca77 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -139,7 +139,7 @@
             <plugin>
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
-                <version>1.6.0</version>
+                <version>3.1.1</version>
                 <executions>
                     <execution>
                         <id>run-java-benchmarks</id>
diff --git a/java/pom.xml b/java/pom.xml
index 3e595648ed085..7871303634976 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -1038,7 +1038,7 @@
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>exec-maven-plugin</artifactId>
-            <version>3.1.0</version>
+            <version>3.1.1</version>
             <executions>
               <execution>
                 <id>cdata-cmake</id>
@@ -1099,7 +1099,7 @@
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>exec-maven-plugin</artifactId>
-            <version>3.1.0</version>
+            <version>3.1.1</version>
             <executions>
               <execution>
                 <id>jni-cpp-cmake</id>
@@ -1214,7 +1214,7 @@
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>exec-maven-plugin</artifactId>
-            <version>3.1.0</version>
+            <version>3.1.1</version>
             <executions>
               <execution>
                 <id>jni-cpp-cmake</id>

From 22f2cfd1e1ebe49016b6d97c49f494287a98d02f Mon Sep 17 00:00:00 2001
From: Divyansh200102 <146909065+Divyansh200102@users.noreply.github.com>
Date: Sat, 3 Feb 2024 16:29:49 +0530
Subject: [PATCH 302/570] GH-39416: [GLib][Docs] Fixed Broken Link in README
 Content  (#39896)

### Rationale for this change

### What changes are included in this PR?
 Fixed Broken Link in README Content

### Are these changes tested?
Yes

### Are there any user-facing changes?
Yes

* Closes: #39416

Lead-authored-by: Divyansh200102 <divyanshkhatri200102@gmail.com>
Co-authored-by: Divyansh200102 <146909065+Divyansh200102@users.noreply.github.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/c_glib/README.md b/c_glib/README.md
index 2a4d6b8a6628c..24e69eff65055 100644
--- a/c_glib/README.md
+++ b/c_glib/README.md
@@ -101,7 +101,7 @@ $ sudo meson install -C c_glib.build
 You need to install Arrow C++ before you install Arrow GLib. See Arrow
 C++ document about how to install Arrow C++.
 
-You need [GTK-Doc](https://www.gtk.org/gtk-doc/) and
+You need [GTK-Doc](https://gitlab.gnome.org/GNOME/gtk-doc) and
 [GObject Introspection](https://wiki.gnome.org/Projects/GObjectIntrospection)
 to build Arrow GLib. You can install them by the followings:
 

From aded7bf37686a16fc4b0649ab97231427a219d7b Mon Sep 17 00:00:00 2001
From: david dali susanibar arce <davi.sarces@gmail.com>
Date: Sun, 4 Feb 2024 01:37:36 -0500
Subject: [PATCH 303/570] GH-39909: [Java][CI] Update reference to Float16
 testing file reference on Testing submodule (#39911)

### Rationale for this change

Update reference to Float16 testing file reference on Testing submodule.

### What changes are included in this PR?

Testing submodule. changes.

### Are these changes tested?

Yes

### Are there any user-facing changes?

No
* Closes: #39909

Authored-by: david dali susanibar arce <davi.sarces@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 testing | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testing b/testing
index ad82a736c170e..25d16511e8d42 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit ad82a736c170e97b7c8c035ebd8a801c17eec170
+Subproject commit 25d16511e8d42c2744a1d94d90169e3a36e92631

From 585e0a252f327e7136695f586b187b2ba5a3a1e3 Mon Sep 17 00:00:00 2001
From: Gang Wu <ustcwg@gmail.com>
Date: Mon, 5 Feb 2024 05:55:54 +0800
Subject: [PATCH 304/570] MINOR: [C++][Parquet] Remove undefined GetArrowType
 from schema_internal.h (#39931)

### Rationale for this change

We have redundant declarations below and the 1st one should be removed:
```cpp
Result<std::shared_ptr<::arrow::DataType>> GetArrowType(Type::type physical_type,
                                                        const LogicalType& logical_type,
                                                        int type_length);

Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
    Type::type physical_type, const LogicalType& logical_type, int type_length,
    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
```

### What changes are included in this PR?

Remove the redundant function declaration described above.

### Are these changes tested?

Make sure build and test pass.

### Are there any user-facing changes?

No.

Authored-by: Gang Wu <ustcwg@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/parquet/arrow/schema_internal.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/cpp/src/parquet/arrow/schema_internal.h b/cpp/src/parquet/arrow/schema_internal.h
index 55292ac35ab9c..f56ba0958ae2d 100644
--- a/cpp/src/parquet/arrow/schema_internal.h
+++ b/cpp/src/parquet/arrow/schema_internal.h
@@ -34,10 +34,6 @@ Result<std::shared_ptr<::arrow::DataType>> FromFLBA(const LogicalType& logical_t
 Result<std::shared_ptr<::arrow::DataType>> FromInt32(const LogicalType& logical_type);
 Result<std::shared_ptr<::arrow::DataType>> FromInt64(const LogicalType& logical_type);
 
-Result<std::shared_ptr<::arrow::DataType>> GetArrowType(Type::type physical_type,
-                                                        const LogicalType& logical_type,
-                                                        int type_length);
-
 Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
     Type::type physical_type, const LogicalType& logical_type, int type_length,
     ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);

From ed78986aa6971484f40a5780922128636a47d175 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 5 Feb 2024 11:51:04 +0900
Subject: [PATCH 305/570] GH-39928: [C++][Gandiva] Accept LLVM 18 (#39934)

### Rationale for this change

LLVM 18.1 will be released soon.

### What changes are included in this PR?

Accept LLVM 18.1.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39928

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/CMakeLists.txt        |  1 +
 cpp/src/gandiva/engine.cc | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 016cd8a1b9ec8..50a85b33d5489 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -152,6 +152,7 @@ set(ARROW_DOC_DIR "share/doc/${PROJECT_NAME}")
 set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")
 
 set(ARROW_LLVM_VERSIONS
+    "18.1"
     "17.0"
     "16.0"
     "15.0"
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index fc047f2ac0763..bfce72cefc630 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -62,7 +62,11 @@
 #endif
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Support/DynamicLibrary.h>
+#if LLVM_VERSION_MAJOR >= 18
+#include <llvm/TargetParser/Host.h>
+#else
 #include <llvm/Support/Host.h>
+#endif
 #include <llvm/Transforms/IPO/GlobalDCE.h>
 #include <llvm/Transforms/IPO/Internalize.h>
 #if LLVM_VERSION_MAJOR >= 14
@@ -86,7 +90,9 @@
 #include <llvm/Transforms/Scalar.h>
 #include <llvm/Transforms/Scalar/GVN.h>
 #include <llvm/Transforms/Utils.h>
+#if LLVM_VERSION_MAJOR <= 17
 #include <llvm/Transforms/Vectorize.h>
+#endif
 
 // JITLink is available in LLVM 9+
 // but the `InProcessMemoryManager::Create` API was added since LLVM 14
@@ -132,8 +138,13 @@ Result<llvm::orc::JITTargetMachineBuilder> MakeTargetMachineBuilder(
     jtmb.setCPU(cpu_name.str());
     jtmb.addFeatures(cpu_attrs);
   }
+#if LLVM_VERSION_MAJOR >= 18
+  using CodeGenOptLevel = llvm::CodeGenOptLevel;
+#else
+  using CodeGenOptLevel = llvm::CodeGenOpt::Level;
+#endif
   auto const opt_level =
-      conf.optimize() ? llvm::CodeGenOpt::Aggressive : llvm::CodeGenOpt::None;
+      conf.optimize() ? CodeGenOptLevel::Aggressive : CodeGenOptLevel::None;
   jtmb.setCodeGenOptLevel(opt_level);
   return jtmb;
 }

From 5856421e31b163104570d0305cb79f323cf488a6 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Mon, 5 Feb 2024 23:14:48 +0800
Subject: [PATCH 306/570] GH-39921: [Go][Parquet] ColumnWriter not reset
 TotalCompressedBytes after Flush (#39922)

### Rationale for this change

See https://github.com/apache/arrow/issues/39921

### What changes are included in this PR?

Not clearing `totalCompressedBytes` when flush called

### Are these changes tested?

Yes

### Are there any user-facing changes?

Yes, it's a bugfix

* Closes: #39921

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/parquet/file/column_writer.go      |  5 +++--
 go/parquet/file/column_writer_test.go | 28 +++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go
index ac857d17e632d..36663b10b89dd 100755
--- a/go/parquet/file/column_writer.go
+++ b/go/parquet/file/column_writer.go
@@ -397,7 +397,6 @@ func (w *columnWriter) FlushBufferedDataPages() (err error) {
 		}
 	}
 	w.pages = w.pages[:0]
-	w.totalCompressedBytes = 0
 	return
 }
 
@@ -542,7 +541,9 @@ func (w *columnWriter) Close() (err error) {
 	if !w.closed {
 		w.closed = true
 		if w.hasDict && !w.fallbackToNonDict {
-			w.WriteDictionaryPage()
+			if err = w.WriteDictionaryPage(); err != nil {
+				return err
+			}
 		}
 
 		if err = w.FlushBufferedDataPages(); err != nil {
diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go
index 8011ac2487995..321e7b730d165 100755
--- a/go/parquet/file/column_writer_test.go
+++ b/go/parquet/file/column_writer_test.go
@@ -426,6 +426,26 @@ func (p *PrimitiveWriterTestSuite) testDictionaryFallbackEncoding(version parque
 	}
 }
 
+func (p *PrimitiveWriterTestSuite) testDictionaryFallbackAndCompressedSize(version parquet.Version) {
+	p.GenerateData(SmallSize)
+	props := parquet.DefaultColumnProperties()
+	props.DictionaryEnabled = true
+
+	if version == parquet.V1_0 {
+		props.Encoding = parquet.Encodings.PlainDict
+	} else {
+		props.Encoding = parquet.Encodings.RLEDict
+	}
+
+	writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version))
+	p.WriteBatchValues(writer, nil, nil)
+	writer.FallbackToPlain()
+	p.NotEqual(0, writer.TotalCompressedBytes())
+	writer.Close()
+	p.NotEqual(0, writer.TotalCompressedBytes())
+	p.NotEqual(0, writer.TotalBytesWritten())
+}
+
 func (p *PrimitiveWriterTestSuite) TestRequiredPlain() {
 	p.testRequiredWithEncoding(parquet.Encodings.Plain)
 }
@@ -575,6 +595,14 @@ func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackEncodingV2() {
 	p.testDictionaryFallbackEncoding(parquet.V2_LATEST)
 }
 
+func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackStatsV1() {
+	p.testDictionaryFallbackAndCompressedSize(parquet.V1_0)
+}
+
+func (p *PrimitiveWriterTestSuite) TestDictionaryFallbackStatsV2() {
+	p.testDictionaryFallbackAndCompressedSize(parquet.V2_LATEST)
+}
+
 func (p *PrimitiveWriterTestSuite) TestOptionalNullValueChunk() {
 	// test case for NULL values
 	p.SetupSchema(parquet.Repetitions.Optional, 1)

From 85e2a684b79b560929085c7f8e27586fa6d0b1ff Mon Sep 17 00:00:00 2001
From: Elliot Morrison-Reed <elliotmr@users.noreply.github.com>
Date: Mon, 5 Feb 2024 10:45:46 -0500
Subject: [PATCH 307/570] GH-39925: [Go][Parquet] Fix re-slicing in
 maybeReplaceValidity function (#39926)

### Rationale for this change

See #39925.

### What changes are included in this PR?

Fixes re-slicing logic for multiple data-types and negative length bug.

### Are these changes tested?

There is a new test in the PR.

### Are there any user-facing changes?

No, it just fixes a bug.
* Closes: #39925

Authored-by: Morrison-Reed Elliot (BEG/EVS1-NA) <Elliot.Morrison-Reed@de.bosch.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/parquet/file/column_writer.go      |  5 +++-
 go/parquet/file/column_writer_test.go | 38 +++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go
index 36663b10b89dd..4d603c547ca6a 100755
--- a/go/parquet/file/column_writer.go
+++ b/go/parquet/file/column_writer.go
@@ -660,7 +660,10 @@ func (w *columnWriter) maybeReplaceValidity(values arrow.Array, newNullCount int
 
 	if values.Data().Offset() > 0 {
 		data := values.Data()
-		buffers[1] = memory.NewBufferBytes(data.Buffers()[1].Bytes()[data.Offset()*arrow.Int32SizeBytes : data.Len()*arrow.Int32SizeBytes])
+		elemSize := data.DataType().(arrow.FixedWidthDataType).Bytes()
+		start := data.Offset() * elemSize
+		end := start + data.Len()*elemSize
+		buffers[1] = memory.NewBufferBytes(data.Buffers()[1].Bytes()[start:end])
 	}
 
 	data := array.NewData(values.DataType(), values.Len(), buffers, nil, int(newNullCount), 0)
diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go
index 321e7b730d165..dd597e280b850 100755
--- a/go/parquet/file/column_writer_test.go
+++ b/go/parquet/file/column_writer_test.go
@@ -24,6 +24,8 @@ import (
 	"sync"
 	"testing"
 
+	"github.com/apache/arrow/go/v16/arrow"
+	"github.com/apache/arrow/go/v16/arrow/array"
 	"github.com/apache/arrow/go/v16/arrow/bitutil"
 	"github.com/apache/arrow/go/v16/arrow/memory"
 	arrutils "github.com/apache/arrow/go/v16/internal/utils"
@@ -36,6 +38,7 @@ import (
 	"github.com/apache/arrow/go/v16/parquet/internal/testutils"
 	"github.com/apache/arrow/go/v16/parquet/internal/utils"
 	"github.com/apache/arrow/go/v16/parquet/metadata"
+	"github.com/apache/arrow/go/v16/parquet/pqarrow"
 	"github.com/apache/arrow/go/v16/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
@@ -736,3 +739,38 @@ func (b *BooleanValueWriterSuite) TestAlternateBooleanValues() {
 		b.Equal(i%2 == 0, b.ValuesOut.([]bool)[i])
 	}
 }
+
+func TestDictionaryReslice(t *testing.T) {
+	pts := []arrow.DataType{
+		arrow.PrimitiveTypes.Int8,
+		arrow.PrimitiveTypes.Int16,
+		arrow.PrimitiveTypes.Int32,
+		arrow.PrimitiveTypes.Int64,
+		arrow.PrimitiveTypes.Uint8,
+		arrow.PrimitiveTypes.Uint16,
+		arrow.PrimitiveTypes.Uint32,
+		arrow.PrimitiveTypes.Uint64,
+	}
+	for _, pt := range pts {
+		t.Run(pt.String(), func(t *testing.T) {
+			mem := memory.NewGoAllocator()
+			dt := &arrow.DictionaryType{
+				IndexType: pt,
+				ValueType: &arrow.StringType{},
+			}
+			field := arrow.Field{Name: "test_field", Type: dt, Nullable: true}
+			schema := arrow.NewSchema([]arrow.Field{field}, nil)
+			b := array.NewRecordBuilder(mem, schema)
+			for i := 0; i < 2000; i++ {
+				b.Field(0).(*array.BinaryDictionaryBuilder).AppendString("test_value")
+			}
+			rec := b.NewRecord()
+			out := &bytes.Buffer{}
+			pqw, err := pqarrow.NewFileWriter(rec.Schema(), out, nil, pqarrow.NewArrowWriterProperties())
+			assert.NoError(t, err)
+			err = pqw.WriteBuffered(rec)
+			assert.NoError(t, err)
+
+		})
+	}
+}

From 56951fee35c920ac898c2515896ff3bd752dde97 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 5 Feb 2024 17:15:44 +0100
Subject: [PATCH 308/570] GH-39865: [C++] Strip extension metadata when
 importing a registered extension (#39866)

### Rationale for this change

When importing an extension type from the C Data Interface and the extension type is registered, we would still leave the extension-related metadata on the storage type.

### What changes are included in this PR?

Strip extension-related metadata on the storage type if we succeed in recreating the extension type.
This matches the behavior of the IPC layer and allows for more exact roundtripping.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No, unless people mistakingly rely on the presence of said metadata.
* Closes: #39865

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/c/bridge.cc                |  6 +++
 cpp/src/arrow/c/bridge_test.cc           | 48 ++++++++++++++++--------
 cpp/src/arrow/util/key_value_metadata.cc | 18 ++++-----
 cpp/src/arrow/util/key_value_metadata.h  | 11 +++---
 4 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 172ed8962ce77..9b165a10a61e7 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -914,6 +914,8 @@ struct DecodedMetadata {
   std::shared_ptr<KeyValueMetadata> metadata;
   std::string extension_name;
   std::string extension_serialized;
+  int extension_name_index = -1;        // index of extension_name in metadata
+  int extension_serialized_index = -1;  // index of extension_serialized in metadata
 };
 
 Result<DecodedMetadata> DecodeMetadata(const char* metadata) {
@@ -956,8 +958,10 @@ Result<DecodedMetadata> DecodeMetadata(const char* metadata) {
     RETURN_NOT_OK(read_string(&values[i]));
     if (keys[i] == kExtensionTypeKeyName) {
       decoded.extension_name = values[i];
+      decoded.extension_name_index = i;
     } else if (keys[i] == kExtensionMetadataKeyName) {
       decoded.extension_serialized = values[i];
+      decoded.extension_serialized_index = i;
     }
   }
   decoded.metadata = key_value_metadata(std::move(keys), std::move(values));
@@ -1046,6 +1050,8 @@ struct SchemaImporter {
         ARROW_ASSIGN_OR_RAISE(
             type_, registered_ext_type->Deserialize(std::move(type_),
                                                     metadata_.extension_serialized));
+        RETURN_NOT_OK(metadata_.metadata->DeleteMany(
+            {metadata_.extension_name_index, metadata_.extension_serialized_index}));
       }
     }
 
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 321ec36c38d8c..8b67027454c55 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -1872,7 +1872,7 @@ class TestSchemaImport : public ::testing::Test, public SchemaStructBuilder {
     ASSERT_TRUE(ArrowSchemaIsReleased(&c_struct_));
     Reset();            // for further tests
     cb.AssertCalled();  // was released
-    AssertTypeEqual(*expected, *type);
+    AssertTypeEqual(*expected, *type, /*check_metadata=*/true);
   }
 
   void CheckImport(const std::shared_ptr<Field>& expected) {
@@ -1892,7 +1892,7 @@ class TestSchemaImport : public ::testing::Test, public SchemaStructBuilder {
     ASSERT_TRUE(ArrowSchemaIsReleased(&c_struct_));
     Reset();            // for further tests
     cb.AssertCalled();  // was released
-    AssertSchemaEqual(*expected, *schema);
+    AssertSchemaEqual(*expected, *schema, /*check_metadata=*/true);
   }
 
   void CheckImportError() {
@@ -3571,7 +3571,7 @@ class TestSchemaRoundtrip : public ::testing::Test {
     // Recreate the type
     ASSERT_OK_AND_ASSIGN(actual, ImportType(&c_schema));
     type = factory_expected();
-    AssertTypeEqual(*type, *actual);
+    AssertTypeEqual(*type, *actual, /*check_metadata=*/true);
     type.reset();
     actual.reset();
 
@@ -3602,7 +3602,7 @@ class TestSchemaRoundtrip : public ::testing::Test {
     // Recreate the schema
     ASSERT_OK_AND_ASSIGN(actual, ImportSchema(&c_schema));
     schema = factory();
-    AssertSchemaEqual(*schema, *actual);
+    AssertSchemaEqual(*schema, *actual, /*check_metadata=*/true);
     schema.reset();
     actual.reset();
 
@@ -3695,13 +3695,27 @@ TEST_F(TestSchemaRoundtrip, Dictionary) {
   }
 }
 
+// Given an extension type, return a field of its storage type + the
+// serialized extension metadata.
+std::shared_ptr<Field> GetStorageWithMetadata(const std::string& field_name,
+                                              const std::shared_ptr<DataType>& type) {
+  const auto& ext_type = checked_cast<const ExtensionType&>(*type);
+  auto storage_type = ext_type.storage_type();
+  auto md = KeyValueMetadata::Make({kExtensionTypeKeyName, kExtensionMetadataKeyName},
+                                   {ext_type.extension_name(), ext_type.Serialize()});
+  return field(field_name, storage_type, /*nullable=*/true, md);
+}
+
 TEST_F(TestSchemaRoundtrip, UnregisteredExtension) {
   TestWithTypeFactory(uuid, []() { return fixed_size_binary(16); });
   TestWithTypeFactory(dict_extension_type, []() { return dictionary(int8(), utf8()); });
 
-  // Inside nested type
-  TestWithTypeFactory([]() { return list(dict_extension_type()); },
-                      []() { return list(dictionary(int8(), utf8())); });
+  // Inside nested type.
+  // When an extension type is not known by the importer, it is imported
+  // as its storage type and the extension metadata is preserved on the field.
+  TestWithTypeFactory(
+      []() { return list(dict_extension_type()); },
+      []() { return list(GetStorageWithMetadata("item", dict_extension_type())); });
 }
 
 TEST_F(TestSchemaRoundtrip, RegisteredExtension) {
@@ -3710,7 +3724,9 @@ TEST_F(TestSchemaRoundtrip, RegisteredExtension) {
   TestWithTypeFactory(dict_extension_type);
   TestWithTypeFactory(complex128);
 
-  // Inside nested type
+  // Inside nested type.
+  // When the extension type is registered, the extension metadata is removed
+  // from the storage type's field to ensure roundtripping (GH-39865).
   TestWithTypeFactory([]() { return list(uuid()); });
   TestWithTypeFactory([]() { return list(dict_extension_type()); });
   TestWithTypeFactory([]() { return list(complex128()); });
@@ -3810,7 +3826,7 @@ class TestArrayRoundtrip : public ::testing::Test {
     {
       std::shared_ptr<Array> expected;
       ASSERT_OK_AND_ASSIGN(expected, ToResult(factory_expected()));
-      AssertTypeEqual(*expected->type(), *array->type());
+      AssertTypeEqual(*expected->type(), *array->type(), /*check_metadata=*/true);
       AssertArraysEqual(*expected, *array, true);
     }
     array.reset();
@@ -3850,7 +3866,7 @@ class TestArrayRoundtrip : public ::testing::Test {
     {
       std::shared_ptr<RecordBatch> expected;
       ASSERT_OK_AND_ASSIGN(expected, ToResult(factory()));
-      AssertSchemaEqual(*expected->schema(), *batch->schema());
+      AssertSchemaEqual(*expected->schema(), *batch->schema(), /*check_metadata=*/true);
       AssertBatchesEqual(*expected, *batch);
     }
     batch.reset();
@@ -4230,7 +4246,7 @@ class TestDeviceArrayRoundtrip : public ::testing::Test {
     {
       std::shared_ptr<Array> expected;
       ASSERT_OK_AND_ASSIGN(expected, ToResult(factory_expected()));
-      AssertTypeEqual(*expected->type(), *array->type());
+      AssertTypeEqual(*expected->type(), *array->type(), /*check_metadata=*/true);
       AssertArraysEqual(*expected, *array, true);
     }
     array.reset();
@@ -4276,7 +4292,7 @@ class TestDeviceArrayRoundtrip : public ::testing::Test {
     {
       std::shared_ptr<RecordBatch> expected;
       ASSERT_OK_AND_ASSIGN(expected, ToResult(factory()));
-      AssertSchemaEqual(*expected->schema(), *batch->schema());
+      AssertSchemaEqual(*expected->schema(), *batch->schema(), /*check_metadata=*/true);
       AssertBatchesEqual(*expected, *batch);
     }
     batch.reset();
@@ -4353,7 +4369,7 @@ class TestArrayStreamExport : public BaseArrayStreamTest {
     SchemaExportGuard schema_guard(&c_schema);
     ASSERT_FALSE(ArrowSchemaIsReleased(&c_schema));
     ASSERT_OK_AND_ASSIGN(auto schema, ImportSchema(&c_schema));
-    AssertSchemaEqual(expected, *schema);
+    AssertSchemaEqual(expected, *schema, /*check_metadata=*/true);
   }
 
   void AssertStreamEnd(struct ArrowArrayStream* c_stream) {
@@ -4437,7 +4453,7 @@ TEST_F(TestArrayStreamExport, ArrayLifetime) {
   {
     SchemaExportGuard schema_guard(&c_schema);
     ASSERT_OK_AND_ASSIGN(auto got_schema, ImportSchema(&c_schema));
-    AssertSchemaEqual(*schema, *got_schema);
+    AssertSchemaEqual(*schema, *got_schema, /*check_metadata=*/true);
   }
 
   ASSERT_GT(pool_->bytes_allocated(), orig_allocated_);
@@ -4462,7 +4478,7 @@ TEST_F(TestArrayStreamExport, Errors) {
   {
     SchemaExportGuard schema_guard(&c_schema);
     ASSERT_OK_AND_ASSIGN(auto schema, ImportSchema(&c_schema));
-    AssertSchemaEqual(schema, arrow::schema({}));
+    AssertSchemaEqual(schema, arrow::schema({}), /*check_metadata=*/true);
   }
 
   struct ArrowArray c_array;
@@ -4539,7 +4555,7 @@ TEST_F(TestArrayStreamRoundtrip, Simple) {
   ASSERT_OK_AND_ASSIGN(auto reader, RecordBatchReader::Make(batches, orig_schema));
 
   Roundtrip(std::move(reader), [&](const std::shared_ptr<RecordBatchReader>& reader) {
-    AssertSchemaEqual(*orig_schema, *reader->schema());
+    AssertSchemaEqual(*orig_schema, *reader->schema(), /*check_metadata=*/true);
     AssertReaderNext(reader, *batches[0]);
     AssertReaderNext(reader, *batches[1]);
     AssertReaderEnd(reader);
diff --git a/cpp/src/arrow/util/key_value_metadata.cc b/cpp/src/arrow/util/key_value_metadata.cc
index bc48ae76c2a2f..002e8b0975094 100644
--- a/cpp/src/arrow/util/key_value_metadata.cc
+++ b/cpp/src/arrow/util/key_value_metadata.cc
@@ -90,7 +90,7 @@ void KeyValueMetadata::Append(std::string key, std::string value) {
   values_.push_back(std::move(value));
 }
 
-Result<std::string> KeyValueMetadata::Get(const std::string& key) const {
+Result<std::string> KeyValueMetadata::Get(std::string_view key) const {
   auto index = FindKey(key);
   if (index < 0) {
     return Status::KeyError(key);
@@ -129,7 +129,7 @@ Status KeyValueMetadata::DeleteMany(std::vector<int64_t> indices) {
   return Status::OK();
 }
 
-Status KeyValueMetadata::Delete(const std::string& key) {
+Status KeyValueMetadata::Delete(std::string_view key) {
   auto index = FindKey(key);
   if (index < 0) {
     return Status::KeyError(key);
@@ -138,20 +138,18 @@ Status KeyValueMetadata::Delete(const std::string& key) {
   }
 }
 
-Status KeyValueMetadata::Set(const std::string& key, const std::string& value) {
+Status KeyValueMetadata::Set(std::string key, std::string value) {
   auto index = FindKey(key);
   if (index < 0) {
-    Append(key, value);
+    Append(std::move(key), std::move(value));
   } else {
-    keys_[index] = key;
-    values_[index] = value;
+    keys_[index] = std::move(key);
+    values_[index] = std::move(value);
   }
   return Status::OK();
 }
 
-bool KeyValueMetadata::Contains(const std::string& key) const {
-  return FindKey(key) >= 0;
-}
+bool KeyValueMetadata::Contains(std::string_view key) const { return FindKey(key) >= 0; }
 
 void KeyValueMetadata::reserve(int64_t n) {
   DCHECK_GE(n, 0);
@@ -188,7 +186,7 @@ std::vector<std::pair<std::string, std::string>> KeyValueMetadata::sorted_pairs(
   return pairs;
 }
 
-int KeyValueMetadata::FindKey(const std::string& key) const {
+int KeyValueMetadata::FindKey(std::string_view key) const {
   for (size_t i = 0; i < keys_.size(); ++i) {
     if (keys_[i] == key) {
       return static_cast<int>(i);
diff --git a/cpp/src/arrow/util/key_value_metadata.h b/cpp/src/arrow/util/key_value_metadata.h
index 8702ce73a639a..57ade11e75868 100644
--- a/cpp/src/arrow/util/key_value_metadata.h
+++ b/cpp/src/arrow/util/key_value_metadata.h
@@ -20,6 +20,7 @@
 #include <cstdint>
 #include <memory>
 #include <string>
+#include <string_view>
 #include <unordered_map>
 #include <utility>
 #include <vector>
@@ -44,13 +45,13 @@ class ARROW_EXPORT KeyValueMetadata {
   void ToUnorderedMap(std::unordered_map<std::string, std::string>* out) const;
   void Append(std::string key, std::string value);
 
-  Result<std::string> Get(const std::string& key) const;
-  bool Contains(const std::string& key) const;
+  Result<std::string> Get(std::string_view key) const;
+  bool Contains(std::string_view key) const;
   // Note that deleting may invalidate known indices
-  Status Delete(const std::string& key);
+  Status Delete(std::string_view key);
   Status Delete(int64_t index);
   Status DeleteMany(std::vector<int64_t> indices);
-  Status Set(const std::string& key, const std::string& value);
+  Status Set(std::string key, std::string value);
 
   void reserve(int64_t n);
 
@@ -63,7 +64,7 @@ class ARROW_EXPORT KeyValueMetadata {
   std::vector<std::pair<std::string, std::string>> sorted_pairs() const;
 
   /// \brief Perform linear search for key, returning -1 if not found
-  int FindKey(const std::string& key) const;
+  int FindKey(std::string_view key) const;
 
   std::shared_ptr<KeyValueMetadata> Copy() const;
 

From cb5c109a5d6985264203e256ddae0b210251e820 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon <vibhatha@users.noreply.github.com>
Date: Mon, 5 Feb 2024 22:23:50 +0530
Subject: [PATCH 309/570] GH-39946: [Java] Bump com.puppycrawl.tools:checkstyle
 from 8.19 to 8.29 (#39694)

### Rationale for this change

This PR was created in place of https://github.com/apache/arrow/pull/39202 to integrate the `puppycrawl.tools.checkstyle` upgrade.

### What changes are included in this PR?

Style changes in Java classes and core changes to the style format itself.
Some unsupported attributes have been removed. And some attributes have
been reorganized upon the provided guidelines in the documentation.

### Are these changes tested?

N/A
Tested by existing checkstyle guideline.

### Are there any user-facing changes?

No

* Closes: #39946

Lead-authored-by: Vibhatha Lakmal Abeykoon <vibhatha@gmail.com>
Co-authored-by: vibhatha <vibhatha@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../apache/arrow/adapter/jdbc/Constants.java  |  3 +-
 .../adapter/jdbc/MockPreparedStatement.java   | 63 ++++++++++++-------
 .../arrow/adapter/jdbc/ResultSetUtility.java  |  3 +-
 .../apache/arrow/adapter/orc/OrcJniUtils.java |  3 +-
 java/dev/checkstyle/checkstyle.xml            | 18 +++---
 java/dev/checkstyle/suppressions.xml          |  2 +-
 .../org/apache/arrow/flight/FlightClient.java |  3 +-
 .../apache/arrow/flight/FlightGrpcUtils.java  |  3 +-
 .../org/apache/arrow/flight/FlightStream.java |  3 +-
 .../arrow/flight/OutboundStreamListener.java  |  3 +-
 .../arrow/flight/auth/AuthConstants.java      |  3 +-
 .../arrow/flight/auth/ServerAuthWrapper.java  |  4 +-
 .../arrow/flight/TestClientMiddleware.java    |  9 ++-
 .../integration/tests/OrderedScenario.java    |  3 +-
 .../jdbc/utils/IntervalStringUtils.java       |  3 +-
 .../utils/ClientAuthenticationUtilsTest.java  |  2 +-
 .../evaluator/ConfigurationBuilder.java       |  3 +-
 .../gandiva/evaluator/DecimalTypeUtil.java    |  3 +-
 .../gandiva/expression/ArrowTypeHelper.java   |  3 +-
 .../arrow/gandiva/expression/TreeBuilder.java |  3 +-
 java/maven/pom.xml                            |  2 +-
 .../arrow/memory/AllocationListener.java      | 15 +++--
 .../apache/arrow/memory/BaseAllocator.java    | 24 +++----
 .../org/apache/arrow/memory/BufferLedger.java | 32 +++++-----
 .../apache/arrow/memory/ReferenceManager.java |  6 +-
 .../memory/util/ByteFunctionHelpers.java      |  3 +-
 .../apache/arrow/memory/util/CommonUtil.java  |  3 +-
 .../arrow/memory/util/LargeMemoryUtil.java    |  3 +-
 .../org/apache/arrow/util/Collections2.java   |  3 +-
 .../org/apache/arrow/util/Preconditions.java  |  3 +-
 java/pom.xml                                  |  2 +-
 .../org/apache/arrow/tools/FileToStream.java  |  3 +-
 .../apache/arrow/vector/AllocationHelper.java |  3 +-
 .../apache/arrow/vector/BitVectorHelper.java  |  3 +-
 .../arrow/vector/GenerateSampleData.java      |  3 +-
 .../org/apache/arrow/vector/NullVector.java   |  3 +-
 .../apache/arrow/vector/compare/Range.java    |  3 +-
 .../arrow/vector/complex/StateTool.java       |  3 +-
 .../apache/arrow/vector/ipc/ArrowMagic.java   |  3 +-
 .../vector/ipc/message/FBSerializables.java   |  3 +-
 .../apache/arrow/vector/util/DateUtility.java |  3 +-
 .../arrow/vector/util/DecimalUtility.java     |  3 +-
 .../arrow/vector/util/DictionaryUtility.java  |  3 +-
 .../vector/util/ObjectMapperFactory.java      |  3 +-
 .../arrow/vector/util/SchemaUtility.java      |  3 +-
 .../testing/ValueVectorDataPopulator.java     |  3 +-
 46 files changed, 174 insertions(+), 107 deletions(-)

diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java
index 5b01077b17996..f95133fc7e44c 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java
@@ -21,7 +21,8 @@
  * String constants used for metadata returned on Vectors.
  */
 public class Constants {
-  private Constants() {}
+  private Constants() {
+  }
 
   public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME";
   public static final String SQL_SCHEMA_NAME_KEY = "SQL_SCHEMA_NAME";
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java
index 438a949b736f1..4478cdfbee6f7 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java
@@ -231,7 +231,8 @@ public void setDate(int parameterIndex, Date x, Calendar cal) throws SQLExceptio
   }
 
   @Override
-  public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException {}
+  public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException {
+  }
 
   @Override
   public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws SQLException {
@@ -241,7 +242,8 @@ public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws S
   }
 
   @Override
-  public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException {}
+  public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException {
+  }
 
   @Override
   public void setURL(int parameterIndex, URL x) throws SQLException {
@@ -259,62 +261,80 @@ public void setRowId(int parameterIndex, RowId x) throws SQLException {
   }
 
   @Override
-  public void setNString(int parameterIndex, String value) throws SQLException {}
+  public void setNString(int parameterIndex, String value) throws SQLException {
+  }
 
   @Override
   public void setNCharacterStream(int parameterIndex, Reader value, long length)
-      throws SQLException {}
+      throws SQLException {
+  }
 
   @Override
-  public void setNClob(int parameterIndex, NClob value) throws SQLException {}
+  public void setNClob(int parameterIndex, NClob value) throws SQLException {
+  }
 
   @Override
-  public void setClob(int parameterIndex, Reader reader, long length) throws SQLException {}
+  public void setClob(int parameterIndex, Reader reader, long length) throws SQLException {
+  }
 
   @Override
   public void setBlob(int parameterIndex, InputStream inputStream, long length)
-      throws SQLException {}
+      throws SQLException {
+  }
 
   @Override
-  public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException {}
+  public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException {
+  }
 
   @Override
-  public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException {}
+  public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException {
+  }
 
   @Override
   public void setObject(int parameterIndex, Object x, int targetSqlType, int scaleOrLength)
-      throws SQLException {}
+      throws SQLException {
+  }
 
   @Override
-  public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException {}
+  public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException {
+  }
 
   @Override
-  public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException {}
+  public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException {
+  }
 
   @Override
   public void setCharacterStream(int parameterIndex, Reader reader, long length)
-      throws SQLException {}
+      throws SQLException {
+  }
 
   @Override
-  public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException {}
+  public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException {
+  }
 
   @Override
-  public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException {}
+  public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException {
+  }
 
   @Override
-  public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException {}
+  public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException {
+  }
 
   @Override
-  public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException {}
+  public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException {
+  }
 
   @Override
-  public void setClob(int parameterIndex, Reader reader) throws SQLException {}
+  public void setClob(int parameterIndex, Reader reader) throws SQLException {
+  }
 
   @Override
-  public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException {}
+  public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException {
+  }
 
   @Override
-  public void setNClob(int parameterIndex, Reader reader) throws SQLException {}
+  public void setNClob(int parameterIndex, Reader reader) throws SQLException {
+  }
 
   @Override
   public ResultSet executeQuery(String sql) throws SQLException {
@@ -327,7 +347,8 @@ public int executeUpdate(String sql) throws SQLException {
   }
 
   @Override
-  public void close() throws SQLException {}
+  public void close() throws SQLException {
+  }
 
   @Override
   public int getMaxFieldSize() throws SQLException {
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
index c712741b51f5b..ccc7681c5bc8b 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
@@ -348,7 +348,8 @@ public static class MockColumnMetaData {
       private int displaySize;
 
 
-      private MockColumnMetaData() {}
+      private MockColumnMetaData() {
+      }
 
       private String getLabel() {
         return label;
diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
index 9b599234bdf51..d61799e990f77 100644
--- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
+++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java
@@ -32,7 +32,8 @@ class OrcJniUtils {
   private static final String LIBRARY_NAME = "arrow_orc_jni";
   private static boolean isLoaded = false;
 
-  private OrcJniUtils() {}
+  private OrcJniUtils() {
+  }
 
   static void loadOrcAdapterLibraryFromJar()
           throws IOException, IllegalAccessException {
diff --git a/java/dev/checkstyle/checkstyle.xml b/java/dev/checkstyle/checkstyle.xml
index c27f382ddda76..b63a4a9cba1f3 100644
--- a/java/dev/checkstyle/checkstyle.xml
+++ b/java/dev/checkstyle/checkstyle.xml
@@ -60,6 +60,11 @@
       <property name="eachLine" value="true"/>
     </module>
 
+    <module name="LineLength">
+        <property name="max" value="120"/>
+        <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
+    </module>
+
     <module name="TreeWalker">
         <module name="OuterTypeFilename"/>
         <module name="IllegalTokenText">
@@ -72,10 +77,6 @@
             <property name="allowByTailComment" value="true"/>
             <property name="allowNonPrintableEscapes" value="true"/>
         </module>
-        <module name="LineLength">
-            <property name="max" value="120"/>
-            <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
-        </module>
         <module name="OneTopLevelClass"/>
         <module name="NoLineWrap"/>
         <module name="EmptyBlock">
@@ -223,13 +224,12 @@
         <module name="JavadocMethod">
             <property name="scope" value="public"/>
             <property name="allowMissingParamTags" value="true"/>
-            <property name="allowMissingThrowsTags" value="true"/>
             <property name="allowMissingReturnTag" value="true"/>
-            <property name="minLineCount" value="2"/>
             <property name="allowedAnnotations" value="Override, Test"/>
-            <property name="allowThrowsTagsForSubclasses" value="true"/>
-            <!-- This seems partially broken under JDK >= 9. -->
-            <property name="suppressLoadErrors" value="true"/>
+        </module>
+        <module name="MissingJavadocMethod">
+            <property name="scope" value="public"/>
+            <property name="minLineCount" value="2"/>
             <property name="ignoreMethodNamesRegex" value="main"/>
         </module>
         <module name="JavadocType">
diff --git a/java/dev/checkstyle/suppressions.xml b/java/dev/checkstyle/suppressions.xml
index 585985bf32dbc..a3536e2ca9212 100644
--- a/java/dev/checkstyle/suppressions.xml
+++ b/java/dev/checkstyle/suppressions.xml
@@ -40,5 +40,5 @@
   <suppress checks="NoFinalizer|OverloadMethodsDeclarationOrder|VariableDeclarationUsageDistance" files=".*" />
 
   <!-- No license header in generated file -->
-  <suppress checks="header" files="flight.properties"/>
+  <suppress checks="header|LineLength" files="flight.properties"/>
 </suppressions>
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
index fc491ebe0df98..8f251a7c7ef07 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
@@ -437,7 +437,8 @@ public ClientStreamListener getWriter() {
      */
     public void getResult() {
       // After exchange is complete, make sure stream is drained to propagate errors through reader
-      while (reader.next()) { };
+      while (reader.next()) {
+      }
     }
 
     /** Shut down the streams in this call. */
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
index eb5e492b4cd46..b711d7ef6b5d7 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightGrpcUtils.java
@@ -125,7 +125,8 @@ public void enterIdle() {
     }
   }
 
-  private FlightGrpcUtils() {}
+  private FlightGrpcUtils() {
+  }
 
   /**
    * Creates a Flight service.
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
index 7a5a941603ace..84beee7d40564 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStream.java
@@ -194,7 +194,8 @@ public void close() throws Exception {
           }
         }
         // Drain the stream without the lock (as next() implicitly needs the lock)
-        while (next()) { }
+        while (next()) {
+        }
       } catch (FlightRuntimeException e) {
         suppressor = e;
       }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
index e80fb41c67273..80ddad90a1d28 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/OutboundStreamListener.java
@@ -119,5 +119,6 @@ default void start(VectorSchemaRoot root, DictionaryProvider dictionaries) {
    * <p>The default value can be toggled globally by setting the JVM property arrow.flight.enable_zero_copy_write
    * or the environment variable ARROW_FLIGHT_ENABLE_ZERO_COPY_WRITE.
    */
-  default void setUseZeroCopy(boolean enabled) {}
+  default void setUseZeroCopy(boolean enabled) {
+  }
 }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
index e3ccdc626d71b..8a37115f1f024 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/AuthConstants.java
@@ -47,5 +47,6 @@ public byte[] parseBytes(byte[] serialized) {
 
   public static final Context.Key<String> PEER_IDENTITY_KEY = Context.keyWithDefault("arrow-flight-peer-identity", "");
 
-  private AuthConstants() {}
+  private AuthConstants() {
+  }
 }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
index ad1a36a935fd7..3647e113cc0f6 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/auth/ServerAuthWrapper.java
@@ -115,7 +115,9 @@ public boolean hasNext() {
     @Override
     public void onError(Throwable t) {
       completed = true;
-      while (future == null) {/* busy wait */}
+      while (future == null) {
+        /* busy wait */
+      }
       future.cancel(true);
     }
 
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java
index bcff54bd7f66f..a1fa1f1d18509 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestClientMiddleware.java
@@ -303,10 +303,12 @@ public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
     }
 
     @Override
-    public void onCallCompleted(CallStatus status) {}
+    public void onCallCompleted(CallStatus status) {
+    }
 
     @Override
-    public void onCallErrored(Throwable err) {}
+    public void onCallErrored(Throwable err) {
+    }
   }
 
   static class MultiHeaderClientMiddlewareFactory implements FlightClientMiddleware.Factory {
@@ -356,6 +358,7 @@ public void onHeadersReceived(CallHeaders incomingHeaders) {
     }
 
     @Override
-    public void onCallCompleted(CallStatus status) {}
+    public void onCallCompleted(CallStatus status) {
+    }
   }
 }
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java
index b8aa46fb5674a..13238f318eaaa 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/OrderedScenario.java
@@ -55,7 +55,8 @@ public FlightProducer producer(BufferAllocator allocator, Location location) thr
   }
 
   @Override
-  public void buildServer(FlightServer.Builder builder) throws Exception {}
+  public void buildServer(FlightServer.Builder builder) throws Exception {
+  }
 
   @Override
   public void client(BufferAllocator allocator, Location location, FlightClient client)
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java
index fdf6c508d93b0..de6dccad4a846 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/IntervalStringUtils.java
@@ -31,7 +31,8 @@ public final class IntervalStringUtils {
   /**
    * Constructor Method of class.
    */
-  private IntervalStringUtils( ) {}
+  private IntervalStringUtils( ) {
+  }
 
   /**
    * Formats a period similar to Oracle INTERVAL YEAR TO MONTH data type<br>.
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
index b7977462e9c01..78d252f7824c3 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
@@ -84,7 +84,7 @@ public void testGetDefaultKeyStoreInstancePassword() throws IOException,
 
       keyStoreMockedStatic
          .when(() -> ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit"))
-         .thenReturn(keyStoreMock);
+          .thenReturn(keyStoreMock);
       KeyStore receiveKeyStore = ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit");
       Assert.assertEquals(receiveKeyStore, keyStoreMock);
     }
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
index e903b4e873278..fa5d285b90997 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java
@@ -43,7 +43,8 @@ public static ConfigOptions getDefault() {
       return new ConfigOptions();
     }
 
-    public ConfigOptions() {}
+    public ConfigOptions() {
+    }
 
     public ConfigOptions withOptimize(boolean optimize) {
       this.optimize = optimize;
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
index e0c072cfbe52e..703cfaa8be88b 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/DecimalTypeUtil.java
@@ -23,7 +23,8 @@
  * Utility methods for working with {@link Decimal} values.
  */
 public class DecimalTypeUtil {
-  private DecimalTypeUtil() {}
+  private DecimalTypeUtil() {
+  }
 
   /**
    * Enum for supported mathematical operations.
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
index 90f8684b455a8..e7377cc5c9db4 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/ArrowTypeHelper.java
@@ -33,7 +33,8 @@
  * Utility methods to convert between Arrow and Gandiva types.
  */
 public class ArrowTypeHelper {
-  private ArrowTypeHelper() {}
+  private ArrowTypeHelper() {
+  }
 
   static final int WIDTH_8 = 8;
   static final int WIDTH_16 = 16;
diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
index 8656e886aae24..3d2ea27d044e7 100644
--- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
+++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/expression/TreeBuilder.java
@@ -29,7 +29,8 @@
  * Contains helper functions for constructing expression trees.
  */
 public class TreeBuilder {
-  private TreeBuilder() {}
+  private TreeBuilder() {
+  }
 
   /**
    * Helper functions to create literal constants.
diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 3a88ec762e19c..7fdca7db7b8d8 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -235,7 +235,7 @@
           <dependency>
             <groupId>com.puppycrawl.tools</groupId>
             <artifactId>checkstyle</artifactId>
-            <version>8.19</version>
+            <version>8.29</version>
           </dependency>
           <dependency>
             <groupId>org.slf4j</groupId>
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java
index ff2b25dfa30ab..b8de6d819eaf8 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationListener.java
@@ -34,7 +34,8 @@ public interface AllocationListener {
    *
    * @param size the buffer size being allocated
    */
-  default void onPreAllocation(long size) {}
+  default void onPreAllocation(long size) {
+  }
 
   /**
    * Called each time a new buffer has been allocated.
@@ -43,7 +44,8 @@ default void onPreAllocation(long size) {}
    *
    * @param size the buffer size being allocated
    */
-  default void onAllocation(long size) {}
+  default void onAllocation(long size) {
+  }
 
   /**
    * Informed each time a buffer is released from allocation.
@@ -51,7 +53,8 @@ default void onAllocation(long size) {}
    * <p>An exception cannot be thrown by this method.
    * @param size The size of the buffer being released.
    */
-  default void onRelease(long size) {}
+  default void onRelease(long size) {
+  }
 
 
   /**
@@ -73,7 +76,8 @@ default boolean onFailedAllocation(long size, AllocationOutcome outcome) {
    * @param parentAllocator The parent allocator to which a child was added
    * @param childAllocator  The child allocator that was just added
    */
-  default void onChildAdded(BufferAllocator parentAllocator, BufferAllocator childAllocator) {}
+  default void onChildAdded(BufferAllocator parentAllocator, BufferAllocator childAllocator) {
+  }
 
   /**
    * Called immediately after a child allocator was removed from the parent allocator.
@@ -81,5 +85,6 @@ default void onChildAdded(BufferAllocator parentAllocator, BufferAllocator child
    * @param parentAllocator The parent allocator from which a child was removed
    * @param childAllocator The child allocator that was just removed
    */
-  default void onChildRemoved(BufferAllocator parentAllocator, BufferAllocator childAllocator) {}
+  default void onChildRemoved(BufferAllocator parentAllocator, BufferAllocator childAllocator) {
+  }
 }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
index 8779c7a3434ea..189c800ba0fe5 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
@@ -702,18 +702,18 @@ private void verifyAllocator(
   void print(StringBuilder sb, int level, Verbosity verbosity) {
 
     CommonUtil.indent(sb, level)
-      .append("Allocator(")
-      .append(name)
-      .append(") ")
-      .append(reservation)
-      .append('/')
-      .append(getAllocatedMemory())
-      .append('/')
-      .append(getPeakMemoryAllocation())
-      .append('/')
-      .append(getLimit())
-      .append(" (res/actual/peak/limit)")
-      .append('\n');
+        .append("Allocator(")
+        .append(name)
+        .append(") ")
+        .append(reservation)
+        .append('/')
+        .append(getAllocatedMemory())
+        .append('/')
+        .append(getPeakMemoryAllocation())
+        .append('/')
+        .append(getLimit())
+        .append(" (res/actual/peak/limit)")
+        .append('\n');
 
     if (DEBUG) {
       CommonUtil.indent(sb, level + 1).append(String.format("child allocators: %d\n", childAllocators.size()));
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
index 1ca3e08ecf046..62d268a1f4493 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java
@@ -478,20 +478,20 @@ public long getAccountedSize() {
    */
   void print(StringBuilder sb, int indent, BaseAllocator.Verbosity verbosity) {
     CommonUtil.indent(sb, indent)
-      .append("ledger[")
-      .append(ledgerId)
-      .append("] allocator: ")
-      .append(allocator.getName())
-      .append("), isOwning: ")
-      .append(", size: ")
-      .append(", references: ")
-      .append(bufRefCnt.get())
-      .append(", life: ")
-      .append(lCreationTime)
-      .append("..")
-      .append(lDestructionTime)
-      .append(", allocatorManager: [")
-      .append(", life: ");
+        .append("ledger[")
+        .append(ledgerId)
+        .append("] allocator: ")
+        .append(allocator.getName())
+        .append("), isOwning: ")
+        .append(", size: ")
+        .append(", references: ")
+        .append(bufRefCnt.get())
+        .append(", life: ")
+        .append(lCreationTime)
+        .append("..")
+        .append(lDestructionTime)
+        .append(", allocatorManager: [")
+        .append(", life: ");
 
     if (!BaseAllocator.DEBUG) {
       sb.append("]\n");
@@ -499,8 +499,8 @@ void print(StringBuilder sb, int indent, BaseAllocator.Verbosity verbosity) {
       Preconditions.checkArgument(buffers != null, "IdentityHashMap of buffers must not be null");
       synchronized (buffers) {
         sb.append("] holds ")
-          .append(buffers.size())
-          .append(" buffers. \n");
+            .append(buffers.size())
+            .append(" buffers. \n");
         for (ArrowBuf buf : buffers.keySet()) {
           buf.print(sb, indent + 2, verbosity);
           sb.append('\n');
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java
index 7d4de18751ba9..64a4232d8aeb7 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReferenceManager.java
@@ -141,10 +141,12 @@ public boolean release(int decrement) {
     }
 
     @Override
-    public void retain() { }
+    public void retain() {
+    }
 
     @Override
-    public void retain(int increment) { }
+    public void retain(int increment) {
+    }
 
     @Override
     public ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator) {
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
index 9579245ca7004..79d21fa040876 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
@@ -32,7 +32,8 @@ public class ByteFunctionHelpers {
 
   private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
 
-  private ByteFunctionHelpers() {}
+  private ByteFunctionHelpers() {
+  }
 
   /**
    * Helper function to check for equality of bytes in two ArrowBufs.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java
index ccca7b1e03093..707c5f1556062 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/CommonUtil.java
@@ -24,7 +24,8 @@
  */
 public final class CommonUtil {
 
-  private CommonUtil() { }
+  private CommonUtil() {
+  }
 
   /**
    * Rounds up the provided value to the nearest power of two.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java
index db63bbd14ba5f..94a7873664216 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/LargeMemoryUtil.java
@@ -22,7 +22,8 @@
 /** Contains utilities for dealing with a 64-bit address base. */
 public final class LargeMemoryUtil {
 
-  private LargeMemoryUtil() {}
+  private LargeMemoryUtil() {
+  }
 
   /**
    * Casts length to an int, but raises an exception the value is outside
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java
index 6b01a61ebca39..b88372abaaee1 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Collections2.java
@@ -34,7 +34,8 @@
  * Utility methods for manipulating {@link java.util.Collections} and their subclasses/implementations.
  */
 public final class Collections2 {
-  private Collections2() {}
+  private Collections2() {
+  }
 
   /**
    * Creates a {@link List} from the elements remaining in iterator.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
index 8083033007d9c..5e4323cfc9c61 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/util/Preconditions.java
@@ -111,7 +111,8 @@
  * @since 2.0
  */
 public final class Preconditions {
-  private Preconditions() {}
+  private Preconditions() {
+  }
 
   /**
    * Ensures the truth of an expression involving one or more parameters to the calling method.
diff --git a/java/pom.xml b/java/pom.xml
index 7871303634976..b2b300b2f3fed 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -304,7 +304,7 @@
           <dependency>
             <groupId>com.puppycrawl.tools</groupId>
             <artifactId>checkstyle</artifactId>
-            <version>8.19</version>
+            <version>8.29</version>
           </dependency>
           <dependency>
             <groupId>org.slf4j</groupId>
diff --git a/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java b/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java
index bb7cedeb74579..3d9bca58a763c 100644
--- a/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java
+++ b/java/tools/src/main/java/org/apache/arrow/tools/FileToStream.java
@@ -34,7 +34,8 @@
  * first argument and the output is written to standard out.
  */
 public class FileToStream {
-  private FileToStream() {}
+  private FileToStream() {
+  }
 
   /**
    * Reads an Arrow file from in and writes it back to out.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
index 6824756d8aca7..abece39475016 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
@@ -22,7 +22,8 @@
 
 /** Helper utility methods for allocating storage for Vectors. */
 public class AllocationHelper {
-  private AllocationHelper() {}
+  private AllocationHelper() {
+  }
 
   /**
    * Allocates the vector.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
index 568554ba75ed6..10f343e260ccc 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
@@ -33,7 +33,8 @@
  */
 public class BitVectorHelper {
 
-  private BitVectorHelper() {}
+  private BitVectorHelper() {
+  }
 
   /**
    * Get the index of byte corresponding to bit index in validity buffer.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java b/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
index 6cda18a8a53d3..be501ce245410 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
@@ -27,7 +27,8 @@
  * with sample data. This class should be used for that purpose.
  */
 public class GenerateSampleData {
-  private GenerateSampleData() {}
+  private GenerateSampleData() {
+  }
 
   /** Populates <code>vector</code> with <code>valueCount</code> random values. */
   public static void generateTestData(final ValueVector vector, final int valueCount) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
index d7b147feb152f..3b734bbf6608b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
@@ -300,7 +300,8 @@ public int getNullCount() {
    * @param index position of element
    */
   @Override
-  public void setNull(int index) {}
+  public void setNull(int index) {
+  }
 
   @Override
   public boolean isNull(int index) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
index 0de99ab011f66..76db0734464ed 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
@@ -41,7 +41,8 @@ public class Range {
   /**
    * Constructs a new instance.
    */
-  public Range() {}
+  public Range() {
+  }
 
   /**
    * Constructs a new instance.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
index 0098f68360a1a..2cd64c4fc6766 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
@@ -23,7 +23,8 @@
  * Utility methods for state machines based on enums.
  */
 public class StateTool {
-  private StateTool() {}
+  private StateTool() {
+  }
 
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StateTool.class);
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
index 9c399669affc3..b16315caa9f51 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
@@ -25,7 +25,8 @@
  * Magic header/footer helpers for {@link ArrowFileWriter} and {@link ArrowFileReader} formatted files.
  */
 class ArrowMagic {
-  private ArrowMagic(){}
+  private ArrowMagic(){
+  }
 
   private static final byte[] MAGIC = "ARROW1".getBytes(StandardCharsets.UTF_8);
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
index 26736ed91c5ca..59b3bb07bcf16 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
@@ -31,7 +31,8 @@
  * Utility methods for {@linkplain org.apache.arrow.vector.ipc.message.FBSerializable}s.
  */
 public class FBSerializables {
-  private FBSerializables() {}
+  private FBSerializables() {
+  }
 
   /**
    * Writes every element of all to builder and calls {@link FlatBufferBuilder#endVector()} afterwards.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
index 9e8b6d26f6fd7..f7f975a0d0e7b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
@@ -26,7 +26,8 @@
 
 /** Utility class for Date, DateTime, TimeStamp, Interval data types. */
 public class DateUtility {
-  private DateUtility() {}
+  private DateUtility() {
+  }
 
   private static final String UTC = "UTC";
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
index 0dfb61dcdf269..4635822e5141b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
@@ -29,7 +29,8 @@
  * Utility methods for configurable precision Decimal values (e.g. {@link BigDecimal}).
  */
 public class DecimalUtility {
-  private DecimalUtility() {}
+  private DecimalUtility() {
+  }
 
   public static final byte [] zeroes = new byte[] {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
index 9592f3975ab99..76fb585e6bd3a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
@@ -35,7 +35,8 @@
  * Utility methods for working with Dictionaries used in Dictionary encodings.
  */
 public class DictionaryUtility {
-  private DictionaryUtility() {}
+  private DictionaryUtility() {
+  }
 
   /**
    * Convert field and child fields that have a dictionary encoding to message format, so fields
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java
index 39488e96efda0..5fa4c1b2260e3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/ObjectMapperFactory.java
@@ -26,7 +26,8 @@
  */
 public final class ObjectMapperFactory {
 
-  private ObjectMapperFactory() {}
+  private ObjectMapperFactory() {
+  }
 
   /**
    * Creates a new {@link ObjectMapper} instance.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
index f8167604c21ad..5b3d00f6b7362 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
@@ -33,7 +33,8 @@
  * Schema utility class including serialization and deserialization.
  */
 public class SchemaUtility {
-  private SchemaUtility() {}
+  private SchemaUtility() {
+  }
 
   /**
    * Deserialize Arrow schema from byte array.
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
index f9f0357861c15..9e96e75880522 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
@@ -75,7 +75,8 @@
  */
 public class ValueVectorDataPopulator {
 
-  private ValueVectorDataPopulator(){}
+  private ValueVectorDataPopulator() {
+  }
 
   /**
    * Populate values for BigIntVector.

From 0c88d13341dfaba5109683bda25ee3ffcd808080 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Tue, 6 Feb 2024 01:34:37 +0800
Subject: [PATCH 310/570] GH-39704: [C++][Parquet] Benchmark levels decoding 
 (#39705)

### Rationale for this change

This patch add the level-decoding benchmark. It test:
1. Different max-level (for flat type, maximum level would be 1, for nested type, it would grows)
2. With different repeat ( repeated null / non-null is different from non-repeated data)
3. With different read-batch size. This part of logic is a bit tricky in original code

### What changes are included in this PR?

Add Level decoding benchmark

### Are these changes tested?

No need

### Are there any user-facing changes?

no

* Closes: #39704

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/column_reader_benchmark.cc | 98 ++++++++++++++++++++++
 cpp/src/parquet/column_writer_test.cc      |  4 +-
 2 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/cpp/src/parquet/column_reader_benchmark.cc b/cpp/src/parquet/column_reader_benchmark.cc
index 49b2317ede187..61fe397cf1c30 100644
--- a/cpp/src/parquet/column_reader_benchmark.cc
+++ b/cpp/src/parquet/column_reader_benchmark.cc
@@ -219,5 +219,103 @@ BENCHMARK(RecordReaderReadRecords)
     ->Args({2, 1000, true})
     ->Args({2, 1000, false});
 
+void GenerateLevels(int level_repeats, int max_level, int num_levels,
+                    std::vector<int16_t>* levels) {
+  // Generate random levels
+  std::default_random_engine gen(/*seed=*/1943);
+  std::uniform_int_distribution<int16_t> d(0, max_level);
+  for (int i = 0; i < num_levels;) {
+    int16_t current_level = d(gen);  // level repeat `level_repeats` times
+    const int current_repeated = std::min(level_repeats, num_levels - i);
+    levels->insert(levels->end(), current_repeated, current_level);
+    i += current_repeated;
+  }
+}
+
+void EncodeLevels(Encoding::type encoding, int16_t max_level, int num_levels,
+                  const int16_t* input_levels, std::vector<uint8_t>* bytes) {
+  LevelEncoder encoder;
+  // encode levels
+  if (encoding == Encoding::RLE) {
+    int rle_size = LevelEncoder::MaxBufferSize(encoding, max_level, num_levels);
+    bytes->resize(rle_size + sizeof(int32_t));
+    // leave space to write the rle length value
+    encoder.Init(encoding, max_level, num_levels, bytes->data() + sizeof(int32_t),
+                 rle_size);
+    encoder.Encode(num_levels, input_levels);
+    int data_length = encoder.len();
+    memcpy(bytes->data(), &data_length, sizeof(int32_t));
+  } else {
+    int bitpack_size =
+        LevelEncoder::MaxBufferSize(encoding, max_level, num_levels) + sizeof(int32_t);
+    bytes->resize(bitpack_size);
+    encoder.Init(encoding, max_level, num_levels, bytes->data(),
+                 static_cast<int>(bytes->size()));
+    encoder.Encode(num_levels, input_levels);
+  }
+}
+
+static void DecodeLevels(Encoding::type level_encoding, int16_t max_level, int num_levels,
+                         int batch_size, int level_repeat_count,
+                         ::benchmark::State& state) {
+  std::vector<uint8_t> bytes;
+  {
+    std::vector<int16_t> input_levels;
+    GenerateLevels(/*level_repeats=*/level_repeat_count, /*max_repeat_factor=*/max_level,
+                   num_levels, &input_levels);
+    EncodeLevels(level_encoding, max_level, num_levels, input_levels.data(), &bytes);
+  }
+
+  LevelDecoder decoder;
+  std::vector<int16_t> output_levels(batch_size);
+  for (auto _ : state) {
+    state.PauseTiming();
+    decoder.SetData(level_encoding, max_level, num_levels, bytes.data(),
+                    static_cast<int>(bytes.size()));
+    state.ResumeTiming();
+    // Decode multiple times with batch_size
+    while (true) {
+      int levels_decoded = decoder.Decode(batch_size, output_levels.data());
+      if (levels_decoded == 0) {
+        break;
+      }
+    }
+  }
+  state.SetBytesProcessed(state.iterations() * num_levels * sizeof(int16_t));
+  state.SetItemsProcessed(state.iterations() * num_levels);
+}
+
+static void ReadLevels_Rle(::benchmark::State& state) {
+  int16_t max_level = static_cast<int16_t>(state.range(0));
+  int num_levels = static_cast<int>(state.range(1));
+  int batch_size = static_cast<int>(state.range(2));
+  int level_repeat_count = static_cast<int>(state.range(3));
+  DecodeLevels(Encoding::RLE, max_level, num_levels, batch_size, level_repeat_count,
+               state);
+}
+
+static void ReadLevels_BitPack(::benchmark::State& state) {
+  int16_t max_level = static_cast<int16_t>(state.range(0));
+  int num_levels = static_cast<int>(state.range(1));
+  int batch_size = static_cast<int>(state.range(2));
+  int level_repeat_count = static_cast<int>(state.range(3));
+  DecodeLevels(Encoding::BIT_PACKED, max_level, num_levels, batch_size,
+               level_repeat_count, state);
+}
+
+static void ReadLevelsArguments(::benchmark::internal::Benchmark* b) {
+  b->ArgNames({"MaxLevel", "NumLevels", "BatchSize", "LevelRepeatCount"})
+      ->Args({1, 8096, 1024, 1})
+      ->Args({1, 8096, 1024, 7})
+      ->Args({1, 8096, 1024, 1024})
+      ->Args({1, 8096, 2048, 1})
+      ->Args({3, 8096, 1024, 1})
+      ->Args({3, 8096, 2048, 1})
+      ->Args({3, 8096, 1024, 7});
+}
+
+BENCHMARK(ReadLevels_Rle)->Apply(ReadLevelsArguments);
+BENCHMARK(ReadLevels_BitPack)->Apply(ReadLevelsArguments);
+
 }  // namespace benchmark
 }  // namespace parquet
diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc
index 97421629d2ca6..a40e71ce30aec 100644
--- a/cpp/src/parquet/column_writer_test.cc
+++ b/cpp/src/parquet/column_writer_test.cc
@@ -1021,7 +1021,7 @@ void EncodeLevels(Encoding::type encoding, int16_t max_level, int num_levels,
 }
 
 void VerifyDecodingLevels(Encoding::type encoding, int16_t max_level,
-                          std::vector<int16_t>& input_levels,
+                          const std::vector<int16_t>& input_levels,
                           std::vector<uint8_t>& bytes) {
   LevelDecoder decoder;
   int levels_count = 0;
@@ -1060,7 +1060,7 @@ void VerifyDecodingLevels(Encoding::type encoding, int16_t max_level,
 }
 
 void VerifyDecodingMultipleSetData(Encoding::type encoding, int16_t max_level,
-                                   std::vector<int16_t>& input_levels,
+                                   const std::vector<int16_t>& input_levels,
                                    std::vector<std::vector<uint8_t>>& bytes) {
   LevelDecoder decoder;
   int levels_count = 0;

From de53aac762fc703148f5822ed170b462a6b467d8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Feb 2024 10:01:02 -0800
Subject: [PATCH 311/570] MINOR: [C#] Bump Grpc.Tools from 2.60.0 to 2.61.0 in
 /csharp (#39945)

Bumps [Grpc.Tools](https://github.com/grpc/grpc) from 2.60.0 to 2.61.0.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/grpc/grpc/commits">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Grpc.Tools&package-manager=nuget&previous-version=2.60.0&new-version=2.61.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index 68c3e47e01902..3a6ae28b390d2 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -7,7 +7,7 @@
   <ItemGroup>
     <PackageReference Include="Google.Protobuf" Version="3.25.2" />
     <PackageReference Include="Grpc.Net.Client" Version="2.59.0" />
-    <PackageReference Include="Grpc.Tools" Version="2.60.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.61.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0'">

From 26801f147a9e98bb6c5bc4e7131bdf1bc2794467 Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Mon, 5 Feb 2024 15:29:06 -0500
Subject: [PATCH 312/570] GH-39769: [C++][Device] Fix Importing nested and
 string types for DeviceArray (#39770)

### Rationale for this change
In my testing with libcudf and other GPU data, I discovered a deficiency in ImportDeviceArray and thus ImportDeviceRecordBatch where the device type and memory manager aren't propagated to child importers and it fails to import offset-based types such as strings.

### What changes are included in this PR?
These are relatively easily handled by first ensuring that `ImportChild` propagates the device_type and memory manager from the parent. Then for importing offset based values we merely need to use the memory manager to copy the final offset value to the CPU to use for the buffer size computation.

This will work for any device which has implemented CopyBufferTo/From

### Are these changes tested?
A new test is added to test these situations.

* Closes: #39769

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 cpp/src/arrow/c/bridge.cc      | 23 ++++++++++++++++++++---
 cpp/src/arrow/c/bridge_test.cc | 10 ++++++++++
 cpp/src/arrow/device.cc        | 14 ++++++++++++++
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 9b165a10a61e7..119249da99a6d 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -1543,6 +1543,8 @@ struct ArrayImporter {
     if (recursion_level_ >= kMaxImportRecursionLevel) {
       return Status::Invalid("Recursion level in ArrowArray struct exceeded");
     }
+    device_type_ = parent->device_type_;
+    memory_mgr_ = parent->memory_mgr_;
     // Child buffers will keep the entire parent import alive.
     // Perhaps we can move the child structs to an owned area
     // when the parent ImportedArrayData::Release() gets called,
@@ -1857,10 +1859,25 @@ struct ArrayImporter {
   template <typename OffsetType>
   Status ImportStringValuesBuffer(int32_t offsets_buffer_id, int32_t buffer_id,
                                   int64_t byte_width = 1) {
-    auto offsets = data_->GetValues<OffsetType>(offsets_buffer_id);
+    if (device_type_ == DeviceAllocationType::kCPU) {
+      auto offsets = data_->GetValues<OffsetType>(offsets_buffer_id);
+      // Compute visible size of buffer
+      int64_t buffer_size =
+          (c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0;
+      return ImportBuffer(buffer_id, buffer_size);
+    }
+
+    // we only need the value of the last offset so let's just copy that
+    // one value from device to host.
+    auto single_value_buf =
+        SliceBuffer(data_->buffers[offsets_buffer_id],
+                    c_struct_->length * sizeof(OffsetType), sizeof(OffsetType));
+    ARROW_ASSIGN_OR_RAISE(
+        auto cpubuf, Buffer::ViewOrCopy(single_value_buf, default_cpu_memory_manager()));
+    auto offsets = cpubuf->data_as<OffsetType>();
     // Compute visible size of buffer
-    int64_t buffer_size =
-        (c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0;
+    int64_t buffer_size = (c_struct_->length > 0) ? byte_width * offsets[0] : 0;
+
     return ImportBuffer(buffer_id, buffer_size);
   }
 
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 8b67027454c55..b8d5e0fcd3845 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -4320,6 +4320,16 @@ TEST_F(TestDeviceArrayRoundtrip, Primitive) {
   TestWithJSON(mm, int32(), "[4, 5, null]");
 }
 
+TEST_F(TestDeviceArrayRoundtrip, Struct) {
+  std::shared_ptr<Device> device = std::make_shared<MyDevice>(1);
+  auto mm = device->default_memory_manager();
+  auto type = struct_({field("ints", int16()), field("strs", utf8())});
+
+  TestWithJSON(mm, type, "[]");
+  TestWithJSON(mm, type, R"([[4, "foo"], [5, "bar"]])");
+  TestWithJSON(mm, type, R"([[4, null], null, [5, "foo"]])");
+}
+
 ////////////////////////////////////////////////////////////////////////////
 // Array stream export tests
 
diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc
index 616f89aae896f..3736a4e018c33 100644
--- a/cpp/src/arrow/device.cc
+++ b/cpp/src/arrow/device.cc
@@ -195,6 +195,13 @@ Result<std::shared_ptr<Buffer>> CPUMemoryManager::ViewBufferFrom(
   if (!from->is_cpu()) {
     return nullptr;
   }
+  // in this case the memory manager we're coming from is visible on the CPU,
+  // but uses an allocation type other than CPU. Since we know the data is visible
+  // to the CPU a "View" of this should use the CPUMemoryManager as the listed memory
+  // manager.
+  if (buf->device_type() != DeviceAllocationType::kCPU) {
+    return std::make_shared<Buffer>(buf->address(), buf->size(), shared_from_this(), buf);
+  }
   return buf;
 }
 
@@ -220,6 +227,13 @@ Result<std::shared_ptr<Buffer>> CPUMemoryManager::ViewBufferTo(
   if (!to->is_cpu()) {
     return nullptr;
   }
+  // in this case the memory manager we're coming from is visible on the CPU,
+  // but uses an allocation type other than CPU. Since we know the data is visible
+  // to the CPU a "View" of this should use the CPUMemoryManager as the listed memory
+  // manager.
+  if (buf->device_type() != DeviceAllocationType::kCPU) {
+    return std::make_shared<Buffer>(buf->address(), buf->size(), to, buf);
+  }
   return buf;
 }
 

From fd69d307447888101600376fa3016b727a3e0106 Mon Sep 17 00:00:00 2001
From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com>
Date: Tue, 6 Feb 2024 06:11:36 +0800
Subject: [PATCH 313/570] GH-39860: [C++] Expression ExecuteScalarExpression
 execute empty args function with a wrong result (#39908)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Try to fix #39860.

### What changes are included in this PR?

Deal with the call->arguments.size() == 0's condition in ExecuteScalarExpression when we call some functions
has no arguments, like (random, hash_count ...).

### Are these changes tested?

Yes

### Are there any user-facing changes?

No.
* Closes: #39860

Lead-authored-by: hugo.zhang <hugo.zhang@openpie.com>
Co-authored-by: 张回归 <zhanghuigui@zhanghuiguideMacBook-Pro-1681.local>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/expression.cc      | 13 +++++++++++--
 cpp/src/arrow/compute/expression_test.cc | 19 +++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/compute/expression.cc b/cpp/src/arrow/compute/expression.cc
index b47e0a35525c5..8c59ad1df86f2 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -761,6 +761,15 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
     }
   }
 
+  int64_t input_length;
+  if (!arguments.empty() && all_scalar) {
+    // all inputs are scalar, so use a 1-long batch to avoid
+    // computing input.length equivalent outputs
+    input_length = 1;
+  } else {
+    input_length = input.length;
+  }
+
   auto executor = compute::detail::KernelExecutor::MakeScalar();
 
   compute::KernelContext kernel_context(exec_context, call->kernel);
@@ -772,8 +781,8 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& i
   RETURN_NOT_OK(executor->Init(&kernel_context, {kernel, types, options}));
 
   compute::detail::DatumAccumulator listener;
-  RETURN_NOT_OK(executor->Execute(
-      ExecBatch(std::move(arguments), all_scalar ? 1 : input.length), &listener));
+  RETURN_NOT_OK(
+      executor->Execute(ExecBatch(std::move(arguments), input_length), &listener));
   const auto out = executor->WrapResults(arguments, listener.values());
 #ifndef NDEBUG
   DCHECK_OK(executor->CheckResultType(out, call->function_name.c_str()));
diff --git a/cpp/src/arrow/compute/expression_test.cc b/cpp/src/arrow/compute/expression_test.cc
index 44159e76600fb..d33c348cd77da 100644
--- a/cpp/src/arrow/compute/expression_test.cc
+++ b/cpp/src/arrow/compute/expression_test.cc
@@ -863,6 +863,25 @@ TEST(Expression, ExecuteCall) {
   ])"));
 }
 
+TEST(Expression, ExecuteCallWithNoArguments) {
+  const int kCount = 10;
+  auto random_options = RandomOptions::FromSeed(/*seed=*/0);
+  ExecBatch input({}, kCount);
+
+  Expression random_expr = call("random", {}, random_options);
+  ASSERT_OK_AND_ASSIGN(random_expr, random_expr.Bind(float64()));
+
+  ASSERT_OK_AND_ASSIGN(Datum actual, ExecuteScalarExpression(random_expr, input));
+  compute::ExecContext* exec_context = default_exec_context();
+  ASSERT_OK_AND_ASSIGN(auto function,
+                       exec_context->func_registry()->GetFunction("random"));
+  ASSERT_OK_AND_ASSIGN(Datum expected,
+                       function->Execute(input, &random_options, exec_context));
+  AssertDatumsEqual(actual, expected, /*verbose=*/true);
+
+  EXPECT_EQ(actual.length(), kCount);
+}
+
 TEST(Expression, ExecuteDictionaryTransparent) {
   ExpectExecute(
       equal(field_ref("a"), field_ref("b")),

From 0415a60eebdaf8130ca3028a802529ecfb738493 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 6 Feb 2024 08:44:29 +0900
Subject: [PATCH 314/570] GH-39883: [CI][R][Windows] Use
 ci/scripts/install_minio.sh with Git bash (#39929)

### Rationale for this change

`curl` in Rtools can't be used on non Rtools' MSYS2 environment. Because `curl` in Rtools can't refer `/usr/ssl/certs/ca-bundle.crt` on non Rtools' MSYS2 environment.

### What changes are included in this PR?

Use the `bash` in GitHub Actions Runner. `curl` in the environment works.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39883

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/r.yml     | 41 ++++++++++++-----------
 ci/scripts/install_minio.sh | 67 +++++++++++++++++++++++++------------
 2 files changed, 67 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 2a801b6040ec8..3d1f75ede4bb5 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -21,24 +21,26 @@ on:
   push:
     paths:
       - ".github/workflows/r.yml"
-      - "ci/scripts/r_*.sh"
-      - "ci/scripts/cpp_*.sh"
-      - "ci/scripts/PKGBUILD"
-      - "ci/etc/rprofile"
       - "ci/docker/**"
+      - "ci/etc/rprofile"
+      - "ci/scripts/PKGBUILD"
+      - "ci/scripts/cpp_*.sh"
+      - "ci/scripts/install_minio.sh"
+      - "ci/scripts/r_*.sh"
       - "cpp/**"
-      - 'docker-compose.yml'
+      - "docker-compose.yml"
       - "r/**"
   pull_request:
     paths:
       - ".github/workflows/r.yml"
-      - "ci/scripts/r_*.sh"
-      - "ci/scripts/cpp_*.sh"
-      - "ci/scripts/PKGBUILD"
-      - "ci/etc/rprofile"
       - "ci/docker/**"
+      - "ci/etc/rprofile"
+      - "ci/scripts/PKGBUILD"
+      - "ci/scripts/cpp_*.sh"
+      - "ci/scripts/install_minio.sh"
+      - "ci/scripts/r_*.sh"
       - "cpp/**"
-      - 'docker-compose.yml'
+      - "docker-compose.yml"
       - "r/**"
 
 concurrency:
@@ -256,6 +258,16 @@ jobs:
         uses: actions/checkout@v4
         with:
           fetch-depth: 0
+      # This must be done before r-lib/actions/setup-r because curl in
+      # Rtools doesn't work on non Rtools' MSYS2 environment. If we
+      # use "shell: bash" after r-lib/actions/setup-r, bash in Rtools
+      # is used on non Rtools' MSYS2 environment.
+      - name: Install MinIO
+        shell: bash
+        run: |
+          mkdir -p "$HOME/.local/bin"
+          ci/scripts/install_minio.sh latest "$HOME/.local"
+          echo "$HOME/.local/bin" >> $GITHUB_PATH
       - run: mkdir r/windows
       - name: Download artifacts
         uses: actions/download-artifact@v3
@@ -282,15 +294,6 @@ jobs:
           working-directory: 'r'
           extra-packages: |
             any::rcmdcheck
-      - name: Install MinIO
-        shell: bash
-        run: |
-          mkdir -p "$HOME/.local/bin"
-          curl \
-            --output "$HOME/.local/bin/minio.exe" \
-            https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z
-          chmod +x "$HOME/.local/bin/minio.exe"
-          echo "$HOME/.local/bin" >> $GITHUB_PATH
       # TODO(ARROW-17149): figure out why the GCS tests are hanging on Windows
       # - name: Install Google Cloud Storage Testbench
       #   shell: bash
diff --git a/ci/scripts/install_minio.sh b/ci/scripts/install_minio.sh
index 6ea8e1a095c39..e493a183b4543 100755
--- a/ci/scripts/install_minio.sh
+++ b/ci/scripts/install_minio.sh
@@ -17,7 +17,15 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -eu
+
+if [ "$#" -ne 2 ]; then
+  echo "Usage: $0 <version> <prefix>"
+  exit 1
+fi
+
+version=$1
+prefix=$2
 
 declare -A archs
 archs=([x86_64]=amd64
@@ -25,45 +33,60 @@ archs=([x86_64]=amd64
        [aarch64]=arm64
        [s390x]=s390x)
 
-declare -A platforms
-platforms=([Linux]=linux
-           [Darwin]=darwin)
-
 arch=$(uname -m)
-platform=$(uname)
-version=$1
-prefix=$2
-
-if [ "$#" -ne 2 ]; then
-  echo "Usage: $0 <version> <prefix>"
-  exit 1
-elif [ -z ${archs[$arch]} ]; then
+if [ -z ${archs[$arch]} ]; then
   echo "Unsupported architecture: ${arch}"
   exit 0
-elif [ -z ${platforms[$platform]} ]; then
-  echo "Unsupported platform: ${platform}"
-  exit 0
-elif [ "${version}" != "latest" ]; then
+fi
+arch=${archs[$arch]}
+
+platform=$(uname)
+case ${platform} in
+  Linux)
+    platform=linux
+    ;;
+  Darwin)
+    platform=darwin
+    ;;
+  MSYS_NT*|MINGW64_NT*)
+    platform=windows
+    ;;
+  *)
+    echo "Unsupported platform: ${platform}"
+    exit 0
+    ;;
+esac
+
+if [ "${version}" != "latest" ]; then
   echo "Cannot fetch specific versions of minio, only latest is supported."
   exit 1
 fi
 
-arch=${archs[$arch]}
-platform=${platforms[$platform]}
-
 # Use specific versions for minio server and client to avoid CI failures on new releases.
 minio_version="minio.RELEASE.2022-05-26T05-48-41Z"
 mc_version="mc.RELEASE.2022-05-09T04-08-26Z"
 
+download()
+{
+  local output=$1
+  local url=$2
+
+  if type wget > /dev/null 2>&1; then
+    wget -nv --output-document ${output} ${url}
+  else
+    curl --fail --location --output ${output} ${url}
+  fi
+}
+
 if [[ ! -x ${prefix}/bin/minio ]]; then
   url="https://dl.min.io/server/minio/release/${platform}-${arch}/archive/${minio_version}"
   echo "Fetching ${url}..."
-  wget -nv --output-document ${prefix}/bin/minio ${url}
+  download ${prefix}/bin/minio ${url}
   chmod +x ${prefix}/bin/minio
 fi
 if [[ ! -x ${prefix}/bin/mc ]]; then
   url="https://dl.min.io/client/mc/release/${platform}-${arch}/archive/${mc_version}"
   echo "Fetching ${url}..."
-  wget -nv --output-document ${prefix}/bin/mc ${url}
+  download ${prefix}/bin/mc ${url}
   chmod +x ${prefix}/bin/mc
 fi

From 9db823b45fd4ae455c531e944681c898bede7d53 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 6 Feb 2024 08:50:58 +0900
Subject: [PATCH 315/570] GH-39930: [C++] Use Requires instead of Libs for
 system RE2 in arrow.pc (#39932)

### Rationale for this change

We chose Libs{,.private} with libre2.a for system RE2 in GH-10626. Because "Require{,.private} re2" may add "-std=c++11". If "-std=c++11" was added, users can't build Apache Arrow C++ because Apache Arrow C++ requires C++17 or later.

But this approach doesn't work with RE2 2024-06-01 or later because it at least requires Abseil. If we keep the Libs{,.private} approach, we also need to add Abseil libraries to Libs{,.private}. But it's unmaintainable.

### What changes are included in this PR?

Let's use "Requires{,.private} re2" instead of Libs{,.private}. I hope recent re2.pc doesn't add "-std=c++11".

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39930

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 6bb9c0f6af2ca..0238c26c0fb51 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2594,16 +2594,11 @@ macro(build_re2)
 endmacro()
 
 if(ARROW_WITH_RE2)
-  # Don't specify "PC_PACKAGE_NAMES re2" here because re2.pc may
-  # include -std=c++11. It's not compatible with C source and C++
-  # source not uses C++ 11.
-  resolve_dependency(re2 HAVE_ALT TRUE)
-  if(${re2_SOURCE} STREQUAL "SYSTEM" AND ARROW_BUILD_STATIC)
-    get_target_property(RE2_TYPE re2::re2 TYPE)
-    if(NOT RE2_TYPE STREQUAL "INTERFACE_LIBRARY")
-      string(APPEND ARROW_PC_LIBS_PRIVATE " $<TARGET_FILE:re2::re2>")
-    endif()
-  endif()
+  resolve_dependency(re2
+                     HAVE_ALT
+                     TRUE
+                     PC_PACKAGE_NAMES
+                     re2)
   add_definitions(-DARROW_WITH_RE2)
 endif()
 

From 0896d5b86510b9d410fd849610e2e1dedc77bf03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Tue, 6 Feb 2024 01:41:27 +0100
Subject: [PATCH 316/570] GH-39943: [CI][Python] Update manylinux images to
 avoid GPG problems downloading packages (#39944)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Old manylinux images seem to have issues with a GPG key in order to download packages.

### What changes are included in this PR?

Update the manylinux image used for the latest one.

### Are these changes tested?

Via archery jobs

### Are there any user-facing changes?

No
* Closes: #39943

Authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 docker-compose.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 0252c4ec8a896..5c84d24fd7df7 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1030,7 +1030,7 @@ services:
       args:
         arch: ${ARCH}
         arch_short: ${ARCH_SHORT}
-        base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2023-10-03-72cdc42
+        base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2024-02-04-ea37246
         vcpkg: ${VCPKG}
         python: ${PYTHON}
         manylinux: 2014
@@ -1053,7 +1053,7 @@ services:
       args:
         arch: ${ARCH}
         arch_short: ${ARCH_SHORT}
-        base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2023-10-03-72cdc42
+        base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2024-02-04-ea37246
         vcpkg: ${VCPKG}
         python: ${PYTHON}
         manylinux: 2_28

From 15525102992fbe83e6ce0943fe09e3f23a1287f4 Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Tue, 6 Feb 2024 00:48:57 +0000
Subject: [PATCH 317/570] GH-39621: [CI][Packaging] Update vcpkg to 2023.11.20
 release (#39622)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change
Old version of vcpkg is blocking https://github.com/apache/arrow/issues/39352

### What changes are included in this PR?
- Upgrade vcpkg
- Fix ports patches
- Upgrade visual studio used in windows wheel builds. VS2019 is now required for the vcpkg `abseil` build.
- Move `VCPKG_FORCE_SYSTEM_BINARIES` to be set before vcpkg install to fix vcpkg install on linux ARM.
- Fix for LLVM 17 which requires that an executable exports "llvm_orc_registerEHFrameSectionWrapper()" and "llvm_orc_unregisterEHFrameSectionWrapper()". This effects the java builds that depend on llvm from vcpkg for gandiva.
- Update image used for python wheel builds on windows to 2024-02-05

### Are these changes tested?
Does not change any behaviour so should be covered by existing tests

### Are there any user-facing changes?
There shouldn't be
* Closes: #39621

Lead-authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Co-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .env                                          |  6 ++---
 ci/docker/python-wheel-manylinux.dockerfile   |  7 ++---
 ...thon-wheel-windows-test-vs2019.dockerfile} |  4 +--
 ...=> python-wheel-windows-vs2019.dockerfile} |  4 +--
 ci/scripts/python_wheel_windows_build.bat     |  8 +++---
 ci/vcpkg/ports.patch                          | 27 ++++++++++---------
 ci/vcpkg/vcpkg.json                           |  5 +++-
 cpp/src/gandiva/CMakeLists.txt                |  9 +++++++
 dev/tasks/python-wheels/github.windows.yml    | 12 ++++-----
 docker-compose.yml                            | 16 +++++------
 10 files changed, 57 insertions(+), 41 deletions(-)
 rename ci/docker/{python-wheel-windows-test-vs2017.dockerfile => python-wheel-windows-test-vs2019.dockerfile} (96%)
 rename ci/docker/{python-wheel-windows-vs2017.dockerfile => python-wheel-windows-vs2019.dockerfile} (98%)

diff --git a/.env b/.env
index 427a4ab0bf398..eb87dc62bdd8c 100644
--- a/.env
+++ b/.env
@@ -92,13 +92,13 @@ DEVTOOLSET_VERSION=
 # Used through docker-compose.yml and serves as the default version for the
 # ci/scripts/install_vcpkg.sh script. Prefer to use short SHAs to keep the
 # docker tags more readable.
-VCPKG="501db0f17ef6df184fcdbfbe0f87cde2313b6ab1"    # 2023.04.15 Release
+VCPKG="a42af01b72c28a8e1d7b48107b33e4f286a55ef6"    # 2023.11.20 Release
 
 # This must be updated when we update
-# ci/docker/python-wheel-windows-vs2017.dockerfile.
+# ci/docker/python-wheel-windows-vs2019.dockerfile.
 # This is a workaround for our CI problem that "archery docker build" doesn't
 # use pulled built images in dev/tasks/python-wheels/github.windows.yml.
-PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2023-08-02
+PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-02-05
 
 # Use conanio/${CONAN} for "docker-compose run --rm conan". See
 # https://github.com/conan-io/conan-docker-tools#readme for available
diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index a07c727ac76fa..2831440d5a967 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -62,15 +62,16 @@ COPY ci/vcpkg/*.patch \
 COPY ci/scripts/install_vcpkg.sh \
      arrow/ci/scripts/
 ENV VCPKG_ROOT=/opt/vcpkg
-RUN arrow/ci/scripts/install_vcpkg.sh ${VCPKG_ROOT} ${vcpkg}
-ENV PATH="${PATH}:${VCPKG_ROOT}"
-
 ARG build_type=release
 ENV CMAKE_BUILD_TYPE=${build_type} \
     VCPKG_FORCE_SYSTEM_BINARIES=1 \
     VCPKG_OVERLAY_TRIPLETS=/arrow/ci/vcpkg \
     VCPKG_DEFAULT_TRIPLET=${arch_short}-linux-static-${build_type} \
     VCPKG_FEATURE_FLAGS="manifests"
+
+RUN arrow/ci/scripts/install_vcpkg.sh ${VCPKG_ROOT} ${vcpkg}
+ENV PATH="${PATH}:${VCPKG_ROOT}"
+
 COPY ci/vcpkg/vcpkg.json arrow/ci/vcpkg/
 # cannot use the S3 feature here because while aws-sdk-cpp=1.9.160 contains
 # ssl related fixes as well as we can patch the vcpkg portfile to support
diff --git a/ci/docker/python-wheel-windows-test-vs2017.dockerfile b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
similarity index 96%
rename from ci/docker/python-wheel-windows-test-vs2017.dockerfile
rename to ci/docker/python-wheel-windows-test-vs2019.dockerfile
index e842ede18454b..67d99fa9c5724 100644
--- a/ci/docker/python-wheel-windows-test-vs2017.dockerfile
+++ b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
@@ -19,8 +19,8 @@
 # when you update this file.
 
 # based on mcr.microsoft.com/windows/servercore:ltsc2019
-# contains choco and vs2017 preinstalled
-FROM abrarov/msvc-2017:2.11.0
+# contains choco and vs2019 preinstalled
+FROM abrarov/msvc-2019:2.11.0
 
 # Add unix tools to path
 RUN setx path "%path%;C:\Program Files\Git\usr\bin"
diff --git a/ci/docker/python-wheel-windows-vs2017.dockerfile b/ci/docker/python-wheel-windows-vs2019.dockerfile
similarity index 98%
rename from ci/docker/python-wheel-windows-vs2017.dockerfile
rename to ci/docker/python-wheel-windows-vs2019.dockerfile
index 067105b3a7995..b8e8aad952b1c 100644
--- a/ci/docker/python-wheel-windows-vs2017.dockerfile
+++ b/ci/docker/python-wheel-windows-vs2019.dockerfile
@@ -19,8 +19,8 @@
 # when you update this file.
 
 # based on mcr.microsoft.com/windows/servercore:ltsc2019
-# contains choco and vs2017 preinstalled
-FROM abrarov/msvc-2017:2.11.0
+# contains choco and vs2019 preinstalled
+FROM abrarov/msvc-2019:2.11.0
 
 # Install CMake and Ninja
 ARG cmake=3.21.4
diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat
index ffb43b3481e55..73b0192d9bc97 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -19,7 +19,7 @@
 
 echo "Building windows wheel..."
 
-call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
+call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
 
 echo "=== (%PYTHON_VERSION%) Clear output directories and leftovers ==="
 del /s /q C:\arrow-build
@@ -50,7 +50,8 @@ set ARROW_WITH_SNAPPY=ON
 set ARROW_WITH_ZLIB=ON
 set ARROW_WITH_ZSTD=ON
 set CMAKE_UNITY_BUILD=ON
-set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
+set CMAKE_GENERATOR=Visual Studio 16 2019
+set CMAKE_PLATFORM=x64
 set VCPKG_ROOT=C:\vcpkg
 set VCPKG_FEATURE_FLAGS=-manifests
 set VCGPK_TARGET_TRIPLET=amd64-windows-static-md-%CMAKE_BUILD_TYPE%
@@ -96,6 +97,7 @@ cmake ^
     -DVCPKG_MANIFEST_MODE=OFF ^
     -DVCPKG_TARGET_TRIPLET=%VCGPK_TARGET_TRIPLET% ^
     -G "%CMAKE_GENERATOR%" ^
+    -A "%CMAKE_PLATFORM%" ^
     C:\arrow\cpp || exit /B 1
 cmake --build . --config %CMAKE_BUILD_TYPE% --target install || exit /B 1
 popd
@@ -121,6 +123,6 @@ set CMAKE_PREFIX_PATH=C:\arrow-dist
 
 pushd C:\arrow\python
 @REM bundle the msvc runtime
-cp "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Redist\MSVC\14.16.27012\x64\Microsoft.VC141.CRT\msvcp140.dll" pyarrow\
+cp "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Redist\MSVC\14.28.29325\x64\Microsoft.VC142.CRT\msvcp140.dll" pyarrow\
 python setup.py bdist_wheel || exit /B 1
 popd
diff --git a/ci/vcpkg/ports.patch b/ci/vcpkg/ports.patch
index 68f6cae5addc9..0d4fb540a2003 100644
--- a/ci/vcpkg/ports.patch
+++ b/ci/vcpkg/ports.patch
@@ -1,13 +1,14 @@
 diff --git a/ports/curl/portfile.cmake b/ports/curl/portfile.cmake
-index 5a14562..924b1b7 100644
+index bdc544e9e..53f6bbc3b 100644
 --- a/ports/curl/portfile.cmake
 +++ b/ports/curl/portfile.cmake
-@@ -87,8 +87,11 @@ vcpkg_cmake_configure(
+@@ -74,9 +74,12 @@ vcpkg_cmake_configure(
          -DENABLE_MANUAL=OFF
          -DCURL_CA_FALLBACK=ON
          -DCURL_USE_LIBPSL=OFF
 +        -DCURL_CA_PATH=none
 +        -DCURL_CA_BUNDLE=none
+         -DCMAKE_DISABLE_FIND_PACKAGE_Perl=ON
      OPTIONS_DEBUG
          -DENABLE_DEBUG=ON
 +        ${EXTRA_ARGS_DEBUG}
@@ -15,29 +16,29 @@ index 5a14562..924b1b7 100644
  vcpkg_cmake_install()
  vcpkg_copy_pdbs()
 diff --git a/ports/snappy/portfile.cmake b/ports/snappy/portfile.cmake
-index 8f3f3f9..745b0fb 100644
+index 0c7098082..c603c3653 100644
 --- a/ports/snappy/portfile.cmake
 +++ b/ports/snappy/portfile.cmake
-@@ -9,6 +9,7 @@ vcpkg_from_github(
-     HEAD_REF master
+@@ -10,6 +10,7 @@ vcpkg_from_github(
      PATCHES
          fix_clang-cl_build.patch
+         no-werror.patch
 +        "snappy-disable-bmi.patch"
  )
  
  vcpkg_cmake_configure(
 diff --git a/ports/snappy/snappy-disable-bmi.patch b/ports/snappy/snappy-disable-bmi.patch
 new file mode 100644
-index 0000000..a57ce0c
+index 000000000..e839c93a4
 --- /dev/null
 +++ b/ports/snappy/snappy-disable-bmi.patch
 @@ -0,0 +1,19 @@
 +diff --git a/snappy.cc b/snappy.cc
-+index 79dc0e8..f3153ea 100644
++index d414718..7b49d2a 100644
 +--- a/snappy.cc
 ++++ b/snappy.cc
-+@@ -965,14 +965,10 @@ static inline void Report(const char *algorithm, size_t compressed_size,
-+ static inline uint32_t ExtractLowBytes(uint32_t v, int n) {
++@@ -1014,14 +1014,10 @@ static inline void Report(const char *algorithm, size_t compressed_size,
++ static inline uint32_t ExtractLowBytes(const uint32_t& v, int n) {
 +   assert(n >= 0);
 +   assert(n <= 4);
 +-#if SNAPPY_HAVE_BMI2
@@ -52,13 +53,13 @@ index 0000000..a57ce0c
 + 
 + static inline bool LeftShiftOverflows(uint8_t value, uint32_t shift) {
 diff --git a/ports/llvm/portfile.cmake b/ports/llvm/portfile.cmake
-index 4d7e26a..1f054a2 100644
+index bf9397b66..c3112b673 100644
 --- a/ports/llvm/portfile.cmake
 +++ b/ports/llvm/portfile.cmake
-@@ -274,6 +274,8 @@ vcpkg_cmake_configure(
+@@ -293,6 +293,8 @@ vcpkg_cmake_configure(
+         ${FEATURE_OPTIONS}
+     MAYBE_UNUSED_VARIABLES 
          COMPILER_RT_ENABLE_IOS
-         OPENMP_TOOLS_INSTALL_DIR
-         MLIR_TOOLS_INSTALL_DIR
 +        BOLT_TOOLS_INSTALL_DIR
 +        LIBOMP_INSTALL_ALIASES
  )
diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json
index 71c23165e61f0..99771728ecf18 100644
--- a/ci/vcpkg/vcpkg.json
+++ b/ci/vcpkg/vcpkg.json
@@ -81,8 +81,11 @@
           "default-features": false,
           "features": [
             "clang",
-            "default-options",
             "default-targets",
+            "enable-bindings",
+            "enable-terminfo",
+            "enable-zlib",
+            "enable-zstd",
             "enable-rtti",
             "lld",
             "tools"
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 3f038f54a7b27..d773fb5ff5895 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -229,6 +229,15 @@ function(ADD_GANDIVA_TEST REL_TEST_NAME)
 
   set(TEST_NAME gandiva-${REL_TEST_NAME})
   string(REPLACE "_" "-" TEST_NAME ${TEST_NAME})
+
+  if(ARG_USE_STATIC_LINKING OR ARROW_TEST_LINKAGE STREQUAL "static")
+    # LLVM 17 or later requires that an executable exports
+    # "llvm_orc_registerEHFrameSectionWrapper()" and
+    # "llvm_orc_unregisterEHFrameSectionWrapper()". We need to do
+    # nothing when we use libLLVM.so. But we need to export symbols
+    # explicitly when we use libLLVM*.a.
+    set_target_properties(${TEST_NAME} PROPERTIES ENABLE_EXPORTS TRUE)
+  endif()
 endfunction()
 
 add_gandiva_test(internals-test
diff --git a/dev/tasks/python-wheels/github.windows.yml b/dev/tasks/python-wheels/github.windows.yml
index 1641796a719e2..01f4977a9b0b1 100644
--- a/dev/tasks/python-wheels/github.windows.yml
+++ b/dev/tasks/python-wheels/github.windows.yml
@@ -29,7 +29,7 @@ jobs:
       # this is a private repository at the moment (mostly because of licensing
       # consideration of windows images with visual studio), but anyone can
       # recreate the image by manually building it via:
-      # `archery build python-wheel-windows-vs2017`
+      # `archery build python-wheel-windows-vs2019`
       # note that we don't run docker build since there wouldn't be a cache hit
       # and rebuilding the dependencies takes a fair amount of time
       REPO: ghcr.io/ursacomputing/arrow
@@ -46,17 +46,17 @@ jobs:
         run: |
           cd arrow
           @rem We want to use only
-          @rem   archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2017
+          @rem   archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2019
           @rem but it doesn't use pulled caches.
           @rem It always build an image from scratch.
           @rem We can remove this workaround once we find a way to use
           @rem pulled caches when build an image.
           echo on
-          archery docker pull --no-ignore-pull-failures python-wheel-windows-vs2017
+          archery docker pull --no-ignore-pull-failures python-wheel-windows-vs2019
           if errorlevel 1 (
-            archery docker build --no-pull python-wheel-windows-vs2017 || exit /B 1
+            archery docker build --no-pull python-wheel-windows-vs2019 || exit /B 1
           )
-          archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2017
+          archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2019
 
       - uses: actions/upload-artifact@v3
         with:
@@ -77,5 +77,5 @@ jobs:
         shell: cmd
         run: |
           cd arrow
-          archery docker push python-wheel-windows-vs2017
+          archery docker push python-wheel-windows-vs2019
       {% endif %}
diff --git a/docker-compose.yml b/docker-compose.yml
index 5c84d24fd7df7..8a7223b57632f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -172,7 +172,7 @@ x-hierarchy:
   - python-wheel-manylinux-2-28
   - python-wheel-manylinux-test-imports
   - python-wheel-manylinux-test-unittests
-  - python-wheel-windows-vs2017
+  - python-wheel-windows-vs2019
   - python-wheel-windows-test
 
 volumes:
@@ -1098,19 +1098,19 @@ services:
       CHECK_UNITTESTS: "ON"
     command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
 
-  python-wheel-windows-vs2017:
-    image: ${REPO}:python-${PYTHON}-wheel-windows-vs2017-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+  python-wheel-windows-vs2019:
+    image: ${REPO}:python-${PYTHON}-wheel-windows-vs2019-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
     build:
       args:
         vcpkg: ${VCPKG}
         python: ${PYTHON}
       context: .
-      dockerfile: ci/docker/python-wheel-windows-vs2017.dockerfile
+      dockerfile: ci/docker/python-wheel-windows-vs2019.dockerfile
       # This should make the pushed images reusable, but the image gets rebuilt.
       # Uncomment if no local cache is available.
       # cache_from:
-      #   - abrarov/msvc-2017:2.11.0
-      #   - ${REPO}:python-${PYTHON}-wheel-windows-vs2017-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+      #   - abrarov/msvc-2019:2.11.0
+      #   - ${REPO}:python-${PYTHON}-wheel-windows-vs2019-vcpkg-${VCPKG}-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
     volumes:
       - "${DOCKER_VOLUME_PREFIX}python-wheel-windows-clcache:C:/clcache"
       - type: bind
@@ -1119,12 +1119,12 @@ services:
     command: arrow\\ci\\scripts\\python_wheel_windows_build.bat
 
   python-wheel-windows-test:
-    image: ${REPO}:python-${PYTHON}-wheel-windows-test-vs2017-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
+    image: ${REPO}:python-${PYTHON}-wheel-windows-test-vs2019-${PYTHON_WHEEL_WINDOWS_IMAGE_REVISION}
     build:
       args:
         python: ${PYTHON}
       context: .
-      dockerfile: ci/docker/python-wheel-windows-test-vs2017.dockerfile
+      dockerfile: ci/docker/python-wheel-windows-test-vs2019.dockerfile
     volumes:
       - "${DOCKER_VOLUME_PREFIX}python-wheel-windows-clcache:C:/clcache"
       - type: bind

From 874e59670773bd0d52d9c6811483c00abc5ee736 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Tue, 6 Feb 2024 01:54:13 +0100
Subject: [PATCH 318/570] GH-39737: [Release][Docs] Update post release
 documentation task (#39762)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR updates the `dev/release/post-08-docs.sh` task so that

- `DOCUMENTATION_OPTIONS.theme_switcher_version_match` changes from `""` to `"{previous_version}"`
- `DOCUMENTATION_OPTIONS.show_version_warning_banner` changes from `false` to `true`

for the documentation that is moved to a subfolder when a new major release is done.
* Closes: #39737

Lead-authored-by: AlenkaF <frim.alenka@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Co-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/post-08-docs.sh | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/dev/release/post-08-docs.sh b/dev/release/post-08-docs.sh
index f18f7d10c73e6..4df574700e812 100755
--- a/dev/release/post-08-docs.sh
+++ b/dev/release/post-08-docs.sh
@@ -86,6 +86,21 @@ if [ "$is_major_release" = "yes" ] ; then
 fi
 git add docs
 git commit -m "[Website] Update documentations for ${version}"
+
+# Update DOCUMENTATION_OPTIONS.theme_switcher_version_match and
+# DOCUMENTATION_OPTIONS.show_version_warning_banner
+pushd docs/${previous_series}
+find ./ \
+  -type f \
+  -exec \
+    sed -i.bak \
+      -e "s/DOCUMENTATION_OPTIONS.theme_switcher_version_match = '';/DOCUMENTATION_OPTIONS.theme_switcher_version_match = '${previous_version}';/g" \
+      -e "s/DOCUMENTATION_OPTIONS.show_version_warning_banner = false/DOCUMENTATION_OPTIONS.show_version_warning_banner = true/g" \
+      {} \;
+find ./ -name '*.bak' -delete
+popd
+git add docs/${previous_series}
+git commit -m "[Website] Update warning banner for ${previous_series}"
 git clean -d -f -x
 popd
 

From 062c841836642ab95b1ffde031d271fffd29987d Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 6 Feb 2024 10:56:16 +0900
Subject: [PATCH 319/570] GH-39057: [CI][C++][Go] Don't run jobs that use a
 self-hosted GitHub Actions Runner on fork (#39903)

### Rationale for this change

If jobs that use a self-hosted GitHub Actions Runner on fork are submitted on fork, they will timeout eventually and report noisy failure notifications.

### What changes are included in this PR?

We can't use `jobs.<job_id>.if` to reject jobs that use self-hosted GitHub Actions Runner because `jobs.<job_id>.if` is evaluated before `jobs.<job_id>.strategy.matrix`.

https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idif

> Note: The `jobs.<job_id>.if` condition is evaluated before
> `jobs.<job_id>.strategy.matrix` is applied.

We can use output `jobs<job_id>.outputs` instead. See also:

* https://docs.github.com/en/actions/using-jobs/defining-outputs-for-jobs
* https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idoutputs

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39057

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/cpp.yml | 74 +++++++++++++++++++++++++++------------
 .github/workflows/go.yml  | 65 +++++++++++++++++++++++++---------
 2 files changed, 99 insertions(+), 40 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 9fbad06692bd2..e9409f1cd6248 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -57,37 +57,65 @@ env:
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
+  docker-targets:
+    name: Docker targets
+    runs-on: ubuntu-latest
+    outputs:
+      targets: ${{ steps.detect-targets.outputs.targets }}
+    steps:
+      - name: Detect targets
+        id: detect-targets
+        run: |
+          echo "targets<<JSON" >> "$GITHUB_OUTPUT"
+          echo "[" >> "$GITHUB_OUTPUT"
+          cat <<JSON >> "$GITHUB_OUTPUT"
+          {
+            "arch": "amd64",
+            "clang-tools": "14",
+            "image": "conda-cpp",
+            "llvm": "14",
+            "runs-on": "ubuntu-latest",
+            "simd-level": "AVX2",
+            "title": "AMD64 Conda C++ AVX2",
+            "ubuntu": "22.04"
+          },
+          {
+            "arch": "amd64",
+            "clang-tools": "14",
+            "image": "ubuntu-cpp-sanitizer",
+            "llvm": "14",
+            "runs-on": "ubuntu-latest",
+            "title": "AMD64 Ubuntu 22.04 C++ ASAN UBSAN",
+            "ubuntu": "22.04"
+          }
+          JSON
+          if [ "$GITHUB_REPOSITORY_OWNER" = "apache" ]; then
+            echo "," >> "$GITHUB_OUTPUT"
+            cat <<JSON >> "$GITHUB_OUTPUT"
+          {
+            "arch": "arm64v8",
+            "clang-tools": "10",
+            "image": "ubuntu-cpp",
+            "llvm": "10",
+            "runs-on": ["self-hosted", "arm", "linux"],
+            "title": "ARM64 Ubuntu 20.04 C++",
+            "ubuntu": "20.04"
+          }
+          JSON
+          fi
+          echo "]" >> "$GITHUB_OUTPUT"
+          echo "JSON" >> "$GITHUB_OUTPUT"
+
   docker:
     name: ${{ matrix.title }}
+    needs: docker-targets
     runs-on: ${{ matrix.runs-on }}
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 75
     strategy:
       fail-fast: false
       matrix:
-        include:
-          - arch: amd64
-            clang-tools: "14"
-            image: conda-cpp
-            llvm: "14"
-            runs-on: ubuntu-latest
-            simd-level: AVX2
-            title: AMD64 Conda C++ AVX2
-            ubuntu: "22.04"
-          - arch: amd64
-            clang-tools: "14"
-            image: ubuntu-cpp-sanitizer
-            llvm: "14"
-            runs-on: ubuntu-latest
-            title: AMD64 Ubuntu 22.04 C++ ASAN UBSAN
-            ubuntu: "22.04"
-          - arch: arm64v8
-            clang-tools: "10"
-            image: ubuntu-cpp
-            llvm: "10"
-            runs-on: ["self-hosted", "arm", "linux"]
-            title: ARM64 Ubuntu 20.04 C++
-            ubuntu: "20.04"
+        include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
       ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index cd44e65e8811b..bbffab6704087 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -43,31 +43,62 @@ permissions:
 
 jobs:
 
+  docker-targets:
+    name: Docker targets
+    runs-on: ubuntu-latest
+    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
+    outputs:
+      targets: ${{ steps.detect-targets.outputs.targets }}
+    steps:
+      - name: Detect targets
+        id: detect-targets
+        run: |
+          echo "targets<<JSON" >> "$GITHUB_OUTPUT"
+          echo "[" >> "$GITHUB_OUTPUT"
+          cat <<JSON >> "$GITHUB_OUTPUT"
+          {
+            "arch-label": "AMD64",
+            "arch": "amd64",
+            "go": "1.19",
+            "runs-on": "ubuntu-latest"
+          },
+          {
+            "arch-label": "AMD64",
+            "arch": "amd64",
+            "go": "1.20",
+            "runs-on": "ubuntu-latest"
+          }
+          JSON
+          if [ "$GITHUB_REPOSITORY_OWNER" = "apache" ]; then
+            echo "," >> "$GITHUB_OUTPUT"
+            cat <<JSON >> "$GITHUB_OUTPUT"
+          {
+            "arch-label": "ARM64",
+            "arch": "arm64v8",
+            "go": "1.19",
+            "runs-on": ["self-hosted", "arm", "linux"]
+          },
+          {
+            "arch-label": "ARM64",
+            "arch": "arm64v8",
+            "go": "1.20",
+            "runs-on": ["self-hosted", "arm", "linux"]
+          }
+          JSON
+          fi
+          echo "]" >> "$GITHUB_OUTPUT"
+          echo "JSON" >> "$GITHUB_OUTPUT"
+
   docker:
     name: ${{ matrix.arch-label }} Debian 11 Go ${{ matrix.go }}
+    needs: docker-targets
     runs-on: ${{ matrix.runs-on }}
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 60
     strategy:
       fail-fast: false
       matrix:
-        include:
-          - arch-label: AMD64
-            arch: amd64
-            go: 1.19
-            runs-on: ubuntu-latest
-          - arch-label: AMD64
-            arch: amd64
-            go: '1.20'
-            runs-on: ubuntu-latest
-          - arch-label: ARM64
-            arch: arm64v8
-            go: 1.19
-            runs-on: ["self-hosted", "arm", "linux"]
-          - arch-label: ARM64
-            arch: arm64v8
-            go: '1.20'
-            runs-on: ["self-hosted", "arm", "linux"]
+        include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
       GO: ${{ matrix.go }}

From f38ae607983264dc52a938d1930916c73292a92e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 11:03:44 +0900
Subject: [PATCH 320/570] MINOR: [Java] Bump com.fasterxml.jackson:jackson-bom
 from 2.16.0 to 2.16.1 in /java (#39947)

Bumps [com.fasterxml.jackson:jackson-bom](https://github.com/FasterXML/jackson-bom) from 2.16.0 to 2.16.1.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/FasterXML/jackson-bom/commit/f70e9cfb38eb8d8b260961eef34b8b9590b48f22"><code>f70e9cf</code></a> [maven-release-plugin] prepare release jackson-bom-2.16.1</li>
<li><a href="https://github.com/FasterXML/jackson-bom/commit/22a8c3a436e4d05a2c5bc79431db4b008df38c45"><code>22a8c3a</code></a> Prepare for 2.16.1 release</li>
<li><a href="https://github.com/FasterXML/jackson-bom/commit/4203816bd92147728d21553d0d0ccdd1af7455c6"><code>4203816</code></a> back to snapshot deps</li>
<li><a href="https://github.com/FasterXML/jackson-bom/commit/4fb9d50de2bdc76383ba80dd78da019a1555ae7a"><code>4fb9d50</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li>See full diff in <a href="https://github.com/FasterXML/jackson-bom/compare/jackson-bom-2.16.0...jackson-bom-2.16.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.fasterxml.jackson:jackson-bom&package-manager=maven&previous-version=2.16.0&new-version=2.16.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index b2b300b2f3fed..1faeb46d02afc 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -36,7 +36,7 @@
     <dep.netty-bom.version>4.1.106.Final</dep.netty-bom.version>
     <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
-    <dep.jackson-bom.version>2.16.0</dep.jackson-bom.version>
+    <dep.jackson-bom.version>2.16.1</dep.jackson-bom.version>
     <dep.hadoop.version>3.3.6</dep.hadoop.version>
     <dep.fbs.version>23.5.26</dep.fbs.version>
     <dep.avro.version>1.11.3</dep.avro.version>

From 1950f8000fa25368602b530dbec4b3d286aed819 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 11:04:08 +0900
Subject: [PATCH 321/570] MINOR: [Java] Bump
 org.cyclonedx:cyclonedx-maven-plugin from 2.7.10 to 2.7.11 in /java (#39948)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.cyclonedx:cyclonedx-maven-plugin](https://github.com/CycloneDX/cyclonedx-maven-plugin) from 2.7.10 to 2.7.11.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/releases">org.cyclonedx:cyclonedx-maven-plugin's releases</a>.</em></p>
<blockquote>
<h2>2.7.11</h2>

<h2>🚀 New features and improvements</h2>
<ul>
<li>rename convert methohds to explicit project vs dependency (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/456">#456</a>) <a href="https://github.com/hboutemy"><code>@​hboutemy</code></a></li>
<li>cleanup unused code (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/455">#455</a>) <a href="https://github.com/hboutemy"><code>@​hboutemy</code></a></li>
<li>test dependency type=zip for <a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/issues/431">#431</a> (reverts <a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/issues/9">#9</a>) (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/454">#454</a>) <a href="https://github.com/hboutemy"><code>@​hboutemy</code></a></li>
<li>Support metadata when dependency is any other dependency type than jar (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/431">#431</a>) <a href="https://github.com/AlbGarciam"><code>@​AlbGarciam</code></a></li>
<li>Add support for custom external references (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/428">#428</a>) <a href="https://github.com/vy"><code>@​vy</code></a></li>
<li>Add a configuration option to skip undeployed artifacts (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/435">#435</a>) <a href="https://github.com/ppkarwasz"><code>@​ppkarwasz</code></a></li>
<li>use metadata properties in UUID (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/441">#441</a>) <a href="https://github.com/hboutemy"><code>@​hboutemy</code></a></li>
<li>Generate serial numbers deterministically (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/issues/420">#420</a>) (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/425">#425</a>) <a href="https://github.com/vy"><code>@​vy</code></a></li>
</ul>
<h2>📦 Dependency updates</h2>
<ul>
<li>define plugin-tools.version property (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/453">#453</a>) <a href="https://github.com/hboutemy"><code>@​hboutemy</code></a></li>
<li>Bump org.apache.maven.plugin-tools:maven-plugin-annotations from 3.10.2 to 3.11.0 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/451">#451</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-plugin-report-plugin from 3.10.2 to 3.11.0 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/450">#450</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-plugin-plugin from 3.10.2 to 3.11.0 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/449">#449</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-compiler-plugin from 3.11.0 to 3.12.1 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/447">#447</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-plugin-plugin from 3.10.1 to 3.10.2 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/445">#445</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-project-info-reports-plugin from 3.4.5 to 3.5.0 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/442">#442</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.commons:commons-lang3 from 3.13.0 to 3.14.0 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/443">#443</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugin-tools:maven-plugin-annotations from 3.10.1 to 3.10.2 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/444">#444</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.junit:junit-bom from 5.10.0 to 5.10.1 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/422">#422</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-plugin-report-plugin from 3.10.1 to 3.10.2 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/424">#424</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-javadoc-plugin from 3.6.0 to 3.6.3 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/438">#438</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump actions/setup-java from 3 to 4 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/437">#437</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.maven.plugins:maven-plugin-report-plugin from 3.9.0 to 3.10.1 (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/pull/417">#417</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/349fe7cc7fd8b7f2224075b5fe7d73e7f0832140"><code>349fe7c</code></a> [maven-release-plugin] prepare release cyclonedx-maven-plugin-2.7.11</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/2d130a0c194012f8101a7f3b0b80aab47a3f009c"><code>2d130a0</code></a> rename convert methohds to explicit project vs dependency</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/051be8e20605dec7ece3b1fba7833ed9f444bc54"><code>051be8e</code></a> cleanup unused code</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/d0e6cb5f91b29caaeb06fe459590d2c0c514c8fa"><code>d0e6cb5</code></a> test dependency type=zip for <a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/issues/431">#431</a> (reverts <a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/issues/9">#9</a>)</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/46837cdbde2f674fb29a800e61d0a30df0ca6abb"><code>46837cd</code></a> Update DefaultModelConverter.java to support Zip files</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/dc90b21db46e2d92d86d85f54002ac568252f388"><code>dc90b21</code></a> define plugin-tools.version property</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/8836cbd26de1aacc08dc5dac0654cf6750d5b79a"><code>8836cbd</code></a> Add support for custom external references (<a href="https://redirect.github.com/CycloneDX/cyclonedx-maven-plugin/issues/428">#428</a>)</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/86410aa253c2f19a581e624314ab027a9b741440"><code>86410aa</code></a> Bump org.apache.maven.plugin-tools:maven-plugin-annotations</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/4d71b50d5ab0e40355819e5fb7d082422ffff64b"><code>4d71b50</code></a> Bump org.apache.maven.plugins:maven-plugin-report-plugin</li>
<li><a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/commit/70aae8e714beea657efe94733a81e6fd5ef0fe93"><code>70aae8e</code></a> Bump org.apache.maven.plugins:maven-plugin-plugin from 3.10.2 to 3.11.0</li>
<li>Additional commits viewable in <a href="https://github.com/CycloneDX/cyclonedx-maven-plugin/compare/cyclonedx-maven-plugin-2.7.10...cyclonedx-maven-plugin-2.7.11">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.cyclonedx:cyclonedx-maven-plugin&package-manager=maven&previous-version=2.7.10&new-version=2.7.11)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/maven/pom.xml | 2 +-
 java/pom.xml       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 7fdca7db7b8d8..9842777c15495 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -271,7 +271,7 @@
       <plugin>
         <groupId>org.cyclonedx</groupId>
         <artifactId>cyclonedx-maven-plugin</artifactId>
-        <version>2.7.10</version>
+        <version>2.7.11</version>
         <executions>
           <execution>
             <phase>package</phase>
diff --git a/java/pom.xml b/java/pom.xml
index 1faeb46d02afc..e928960182ab2 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -364,7 +364,7 @@
       <plugin>
         <groupId>org.cyclonedx</groupId>
         <artifactId>cyclonedx-maven-plugin</artifactId>
-        <version>2.7.10</version>
+        <version>2.7.11</version>
         <executions>
           <execution>
             <phase>package</phase>

From c7a166fc5aeec3f1b6e5d68cc7746b228a8dad04 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 11:04:37 +0900
Subject: [PATCH 322/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-project-info-reports-plugin from 3.0.0 to
 3.5.0 in /java (#39949)

Bumps [org.apache.maven.plugins:maven-project-info-reports-plugin](https://github.com/apache/maven-project-info-reports-plugin) from 3.0.0 to 3.5.0.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/28ac4b55b4b3476302bcd9814516ad66aea3afca"><code>28ac4b5</code></a> [maven-release-plugin] prepare release maven-project-info-reports-plugin-3.5.0</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/9c4fc338fc62620794ee57aa4bd696e788b8c797"><code>9c4fc33</code></a> Remove unneeded incompatiblity notice</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/482ea62ca5300ee431c5d0fc08f26cbfcb30a391"><code>482ea62</code></a> Fix formatting</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/60cfdeaa49fc6e7f3ace58cb4e42fbb7622f06b5"><code>60cfdea</code></a> [MPIR-453] Replace Commons IO in favor of standard APIs</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/4d94edc4eb51c0349980eea90a6eb893d9f7f738"><code>4d94edc</code></a> [MPIR-446] Update to Maven SCM 2.0.</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/91a065b3e5b314daf0a6c338f26a8a3ba2f3931e"><code>91a065b</code></a> [MPIR-452] Upgrade to Parent 41</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/ceac0bfb517d8d5fa095031d6bd57e6d4271bb60"><code>ceac0bf</code></a> Consistently use MavenReport#getReportOutputDirectory()</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/c16ec9433ccc394a8d8e50d5367a0b0a7881c8f7"><code>c16ec94</code></a> [MNG-7416] Simplify Boolean expressions and returns (<a href="https://redirect.github.com/apache/maven-project-info-reports-plugin/issues/63">#63</a>)</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/abd0e76e85949f962621b7e32548ef2f69d77946"><code>abd0e76</code></a> Fix style value</li>
<li><a href="https://github.com/apache/maven-project-info-reports-plugin/commit/18aedbb07e26bd87b61972767100ebb81956b4f2"><code>18aedbb</code></a> Reduce IT runtime by invoking goal directly</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-project-info-reports-plugin/compare/maven-project-info-reports-plugin-3.0.0...maven-project-info-reports-plugin-3.5.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-project-info-reports-plugin&package-manager=maven&previous-version=3.0.0&new-version=3.5.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/maven/pom.xml | 2 +-
 java/pom.xml       | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index 9842777c15495..c2b13119fc440 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -333,7 +333,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-project-info-reports-plugin</artifactId>
-        <version>3.0.0</version>
+        <version>3.5.0</version>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
diff --git a/java/pom.xml b/java/pom.xml
index e928960182ab2..258e45a519c59 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -395,7 +395,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-project-info-reports-plugin</artifactId>
-        <version>3.0.0</version>
+        <version>3.5.0</version>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -598,7 +598,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-project-info-reports-plugin</artifactId>
-          <version>3.0.0</version>
+          <version>3.5.0</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
@@ -803,7 +803,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-project-info-reports-plugin</artifactId>
-        <version>3.0.0</version>
+        <version>3.5.0</version>
       </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>

From 672238ff6352fa388b54182d8ae1667f9e99c327 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Feb 2024 11:05:00 +0900
Subject: [PATCH 323/570] MINOR: [Java] Bump io.grpc:grpc-bom from 1.60.0 to
 1.61.1 in /java (#39950)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [io.grpc:grpc-bom](https://github.com/grpc/grpc-java) from 1.60.0 to 1.61.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/grpc/grpc-java/releases">io.grpc:grpc-bom's releases</a>.</em></p>
<blockquote>
<h2>v1.61.1</h2>
<h3>Bug Fixes</h3>
<p>xds: Fix a bug in <code>WeightedRoundRobinLoadBalancer</code> policy that could raise <code>NullPointerException</code> and further cause channel panic when picking a subchannel. This bug can only be triggered when connection can not be established and the channel reports <code>TRANSIENT_FAILURE</code> state. (<a href="https://redirect.github.com/grpc/grpc-java/issues/10868">#10868</a>)</p>
<h2>v1.61.0</h2>
<h3>API Changes</h3>
<ul>
<li>Remove unused experimental API ManagedChannelBuilder.enableFullStreamDecompression (<a href="https://redirect.github.com/grpc/grpc-java/issues/10744">#10744</a>)</li>
<li>api: Deprecate LoadBalancer.EMPTY_PICKER added in 1.58.0 in favor of FixedResultPicker (860b5cb1f)</li>
</ul>
<h3>New Features</h3>
<ul>
<li>binder: Experimental support for asynchronous security policies (<a href="https://redirect.github.com/grpc/grpc-java/issues/10566">#10566</a>)</li>
</ul>
<h3>Improvements</h3>
<ul>
<li>core: reduce CompositeReadableBuffer allocation (<a href="https://redirect.github.com/grpc/grpc-java/issues/3279">#3279</a>)</li>
<li>core: Improve error message clarity when a channel leak is detected (201893f5e)</li>
<li>util: use shared index across <code>round_robin</code> pickers (dca89b25b). This makes its implementation more similar to <code>weighted_round_robin</code>.</li>
<li>xds: Implement ADS stream flow control mechanism (<a href="https://redirect.github.com/grpc/grpc-java/issues/10674">#10674</a>). This limits the maximum memory consumed if the control plane sends updates more rapidly than they can be processed.</li>
</ul>
<h3>Bug Fixes</h3>
<ul>
<li>core: Check outbound maximum message size for the compressed size in addition to the already-checked uncompressed size (<a href="https://redirect.github.com/grpc/grpc-java/issues/10739">#10739</a>). Fixed the status code to be RESOURCE_EXHAUSTED instead of UNKNOWN.</li>
<li>util: Fix NPE when multiple addresses are in an address group for petiole load balancer policies (<a href="https://redirect.github.com/grpc/grpc-java/issues/10769">#10769</a>)</li>
<li>util: Disable publishing of fixtures (8ac43dd81). The Gradle test fixtures are for use by grpc-java's internal tests.</li>
<li>okhttp: Ignore known conscrypt socket close issue (<a href="https://redirect.github.com/grpc/grpc-java/issues/10812">#10812</a>). This stops an exception from being thrown when a known Conscrypt synchronization issue happens.</li>
</ul>
<h3>Dependencies</h3>
<ul>
<li>Drop support for Bazel 5 (55a9c012c). Bazel 7 is available, and Protobuf has already dropped support for Bazel 5.</li>
<li>Change many compile deps to runtime deps (d6830d7f9). This reduces the transitive classes &quot;leaked&quot; into the compile classpath. In particular, grpc-core (<code>io.grpc.internal</code>) will be less frequently included transitively at compile time.</li>
<li>Upgrade dependencies (c985797d9)
<ul>
<li>Protobuf to 3.25.1</li>
<li>auto-value-annotations to 1.10.4</li>
<li>error_prone_annotations to 2.23.0</li>
<li>proto-google-common-protos to 2.29.0</li>
<li>google-cloud-logging to 3.15.14</li>
<li>guava to 32.1.3-android</li>
<li>okio to 3.4.0</li>
</ul>
</li>
</ul>
<h3>Acknowledgements</h3>
<ul>
<li><a href="https://github.com/Gordiychuk"><code>@​Gordiychuk</code></a></li>
<li><a href="https://github.com/jroper"><code>@​jroper</code></a></li>
<li><a href="https://github.com/jyane"><code>@​jyane</code></a></li>
<li><a href="https://github.com/ulfjack"><code>@​ulfjack</code></a></li>
</ul>
<h2>v1.60.2</h2>
<h3>Bug Fixes</h3>
<p>xds: Fix a bug in <code>WeightedRoundRobinLoadBalancer</code> policy that could raise <code>NullPointerException</code> and further cause channel panic when picking a subchannel. This bug can only be triggered when connection can not be established and the channel reports <code>TRANSIENT_FAILURE</code> state. (<a href="https://redirect.github.com/grpc/grpc-java/issues/10868">#10868</a>)</p>
<h2>v1.60.1</h2>
<h1>Bug Fixes</h1>
<ul>
<li>util: Fix NPE when multiple addresses in an address group for petiole load balancer policies (<a href="https://redirect.github.com/grpc/grpc-java/issues/10770">#10770</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/grpc/grpc-java/commit/dfff9a9475a394c455742a9b45a69cf6d9640cf5"><code>dfff9a9</code></a> Bump version to 1.61.1</li>
<li><a href="https://github.com/grpc/grpc-java/commit/df1bb36ea4c3b51d23c78a6e64b41de9d57d81e8"><code>df1bb36</code></a> Update README etc to reference 1.61.1</li>
<li><a href="https://github.com/grpc/grpc-java/commit/1abdaf36e8c363b5f0d445b8c08f49cbb69dd55f"><code>1abdaf3</code></a> xds: fix NPE in wrr in TF state (<a href="https://redirect.github.com/grpc/grpc-java/issues/10875">#10875</a>)</li>
<li><a href="https://github.com/grpc/grpc-java/commit/529d0ab330395bf66a5e3942de314bedb95d8bd8"><code>529d0ab</code></a> Bump version to 1.61.1-SNAPSHOT</li>
<li><a href="https://github.com/grpc/grpc-java/commit/f06abeb6fda2985c19e9792edb6f2b071b6fd8b5"><code>f06abeb</code></a> Bump version to 1.61.0</li>
<li><a href="https://github.com/grpc/grpc-java/commit/77005107aaa1847cdbac0919d5e73752649c3485"><code>7700510</code></a> Update README protoc references to 3.25.1</li>
<li><a href="https://github.com/grpc/grpc-java/commit/c639b8161bd16d01ddec2a42ee3bf68c3a49e296"><code>c639b81</code></a> Update README etc to reference 1.61.0</li>
<li><a href="https://github.com/grpc/grpc-java/commit/560608100f3f69b2ff5716c969d20c6802f79451"><code>5606081</code></a> fix flaky xds test due to verification race (<a href="https://redirect.github.com/grpc/grpc-java/issues/10798">#10798</a>) (<a href="https://redirect.github.com/grpc/grpc-java/issues/10808">#10808</a>)</li>
<li><a href="https://github.com/grpc/grpc-java/commit/2531563a3eb59e154e3098d8a45f39f003bcd439"><code>2531563</code></a> okhttp: Ignore known conscrypt socket close issue (<a href="https://redirect.github.com/grpc/grpc-java/issues/10811">#10811</a>) (<a href="https://redirect.github.com/grpc/grpc-java/issues/10812">#10812</a>)</li>
<li><a href="https://github.com/grpc/grpc-java/commit/5b082ca6404bd7fddf2f88d030a756694d1879f8"><code>5b082ca</code></a> Do not cache failed futures for async security policies indefinitely. (<a href="https://redirect.github.com/grpc/grpc-java/issues/10743">#10743</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/grpc/grpc-java/compare/v1.60.0...v1.61.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=io.grpc:grpc-bom&package-manager=maven&previous-version=1.60.0&new-version=1.61.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 258e45a519c59..6442987f5a192 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -34,7 +34,7 @@
     <dep.slf4j.version>2.0.11</dep.slf4j.version>
     <dep.guava-bom.version>33.0.0-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.106.Final</dep.netty-bom.version>
-    <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
+    <dep.grpc-bom.version>1.61.1</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.16.1</dep.jackson-bom.version>
     <dep.hadoop.version>3.3.6</dep.hadoop.version>

From 0993b369c4b91d81a17166d1427e7c26cd9beee4 Mon Sep 17 00:00:00 2001
From: david dali susanibar arce <davi.sarces@gmail.com>
Date: Mon, 5 Feb 2024 21:35:34 -0500
Subject: [PATCH 324/570] GH-39900: [Java][CI] To upload Maven and Memory Netty
 Buffer Patch into Apache Nightly repository (#39901)

### Rationale for this change

To upload Maven and Memory Netty Buffer Patch into Apache Nightly repository

### What changes are included in this PR?

Upload Maven and Memory Netty Buffer Patch into Apache Nightly repository

### Are these changes tested?

Needed to run https://github.com/apache/arrow/actions/workflows/java_nightly.yml

### Are there any user-facing changes?

No
* Closes: #39900

Authored-by: david dali susanibar arce <davi.sarces@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/tasks/tasks.yml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 0f8c58391fa66..cf04d29715306 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -748,6 +748,10 @@ tasks:
       - arrow-jdbc-{no_rc_snapshot_version}-tests.jar
       - arrow-jdbc-{no_rc_snapshot_version}.jar
       - arrow-jdbc-{no_rc_snapshot_version}.pom
+      - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.json
+      - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.xml
+      - arrow-maven-plugins-{no_rc_snapshot_version}-src.zip
+      - arrow-maven-plugins-{no_rc_snapshot_version}.pom
       - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.json
       - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.xml
       - arrow-memory-core-{no_rc_snapshot_version}-javadoc.jar
@@ -762,6 +766,13 @@ tasks:
       - arrow-memory-netty-{no_rc_snapshot_version}-tests.jar
       - arrow-memory-netty-{no_rc_snapshot_version}.jar
       - arrow-memory-netty-{no_rc_snapshot_version}.pom
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-cyclonedx.json
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-cyclonedx.xml
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-javadoc.jar
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-sources.jar
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}-tests.jar
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}.jar
+      - arrow-memory-netty-buffer-patch-{no_rc_snapshot_version}.pom
       - arrow-memory-unsafe-{no_rc_snapshot_version}-cyclonedx.json
       - arrow-memory-unsafe-{no_rc_snapshot_version}-cyclonedx.xml
       - arrow-memory-unsafe-{no_rc_snapshot_version}-javadoc.jar
@@ -839,6 +850,13 @@ tasks:
       - flight-sql-jdbc-driver-{no_rc_snapshot_version}-tests.jar
       - flight-sql-jdbc-driver-{no_rc_snapshot_version}.jar
       - flight-sql-jdbc-driver-{no_rc_snapshot_version}.pom
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.json
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.xml
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-javadoc.jar
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-sources.jar
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-src.zip
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.jar
+      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.pom
 
   ############################## NuGet packages ###############################
 

From cd5faafb0c811d5985156c1fd1aecd1aa7130e9f Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 6 Feb 2024 13:53:54 +0900
Subject: [PATCH 325/570] GH-39955: [C++] Use make -j1 to install bundled bzip2
 (#39956)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

It seems that parallel "make install" isn't stable with "-G 'Unix Makefiles'" ("read jobs pipe: Bad file descriptor.  Stop." is the important part):

    [ 19%] Performing install step for 'bzip2_ep'
    CMake Error at /tmp/Rtmp5v99SJ/file70b591df48f/bzip2_ep-prefix/src/bzip2_ep-stamp/bzip2_ep-install-RELEASE.cmake:37 (message):
      Command failed: 2

       '/bin/make' 'install' 'PREFIX=/tmp/Rtmp5v99SJ/file70b591df48f/bzip2_ep-install' 'CC=/bin/gcc' 'CFLAGS=-g -O2 -ffile-prefix-map=/build/reproducible-path/r-base-4.3.2=. -fstack-protector-strong -fstack-clash-protection -Wformat -Werror=format-security -fcf-protection -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC' 'AR=/bin/ar' 'RANLIB=/bin/ranlib'

      See also

        /tmp/Rtmp5v99SJ/file70b591df48f/bzip2_ep-prefix/src/bzip2_ep-stamp/bzip2_ep-install-*.log

    -- stdout output is:

    -- stderr output is:
    make[3]: *** read jobs pipe: Bad file descriptor.  Stop.
    make[3]: *** Waiting for unfinished jobs....
    bzip2.c: In function ‘applySavedFileAttrToOutputFile’:
    bzip2.c:1073:11: warning: ignoring return value of ‘fchown’ declared with attribute ‘warn_unused_result’ [-Wunused-result]
     1073 |    (void) fchown ( fd, fileMetaInfo.st_uid, fileMetaInfo.st_gid );
          |           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    CMake Error at /tmp/Rtmp5v99SJ/file70b591df48f/bzip2_ep-prefix/src/bzip2_ep-stamp/bzip2_ep-install-RELEASE.cmake:47 (message):
      Stopping after outputting logs.

    make[2]: *** [CMakeFiles/bzip2_ep.dir/build.make:104: bzip2_ep-prefix/src/bzip2_ep-stamp/bzip2_ep-install] Error 1
    make[1]: *** [CMakeFiles/Makefile2:1205: CMakeFiles/bzip2_ep.dir/all] Error 2
    make[1]: *** Waiting for unfinished jobs....

### What changes are included in this PR?

Force to disable parallel processing for `make install`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39955

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 0238c26c0fb51..b16ee07756013 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2629,7 +2629,7 @@ macro(build_bzip2)
                       BUILD_IN_SOURCE 1
                       BUILD_COMMAND ${MAKE} libbz2.a ${MAKE_BUILD_ARGS}
                                     ${BZIP2_EXTRA_ARGS}
-                      INSTALL_COMMAND ${MAKE} install PREFIX=${BZIP2_PREFIX}
+                      INSTALL_COMMAND ${MAKE} install -j1 PREFIX=${BZIP2_PREFIX}
                                       ${BZIP2_EXTRA_ARGS}
                       INSTALL_DIR ${BZIP2_PREFIX}
                       URL ${ARROW_BZIP2_SOURCE_URL}

From a6e577d031d20a1a7d3dd01536b9a77db5d1bff8 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 6 Feb 2024 16:19:03 +0100
Subject: [PATCH 326/570] GH-39857: [C++] Improve error message for "chunker
 out of sync" condition (#39892)

### Rationale for this change

When writing the CSV reader, we thought that the parser not finding the same line limits as the chunker should never happen, hence the terse "chunker out of sync" error message.

It turns out that, if the input contains multiline cell values and the `newlines_in_values` option was not enabled, the chunker can happily delimit a block on a newline that's inside a quoted string. The parser will then see truncated data and will stop parsing, yielding a parsed size that's smaller than the first block (see added comment in the code).

### What changes are included in this PR?

* Add some parser tests that showcase the condition encountered in GH-39857
* Improve error message to guide users towards the solution

### Are these changes tested?

There's no functional change, the error message itself isn't tested.

### Are there any user-facing changes?

No.

* Closes: #39857

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/parser_test.cc | 22 +++++++++++++++++++++
 cpp/src/arrow/csv/reader.cc      | 34 +++++++++++++++++++++++++++-----
 python/pyarrow/tests/test_csv.py | 25 +++++++++++++++++++++++
 3 files changed, 76 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/csv/parser_test.cc b/cpp/src/arrow/csv/parser_test.cc
index 960a69c59db5d..dd3d025202018 100644
--- a/cpp/src/arrow/csv/parser_test.cc
+++ b/cpp/src/arrow/csv/parser_test.cc
@@ -175,6 +175,13 @@ void AssertParsePartial(BlockParser& parser, const std::string& str,
   ASSERT_EQ(parsed_size, expected_size);
 }
 
+void AssertParsePartial(BlockParser& parser, const std::vector<std::string_view>& data,
+                        uint32_t expected_size) {
+  uint32_t parsed_size = static_cast<uint32_t>(-1);
+  ASSERT_OK(parser.Parse(data, &parsed_size));
+  ASSERT_EQ(parsed_size, expected_size);
+}
+
 void AssertLastRowEq(const BlockParser& parser,
                      const std::vector<std::string>& expected) {
   std::vector<std::string> values;
@@ -376,6 +383,21 @@ TEST(BlockParser, TruncatedData) {
   }
 }
 
+TEST(BlockParser, TruncatedDataViews) {
+  // The BlockParser API mandates that, when passing a vector of views,
+  // only the last view may be a truncated CSV block.
+  // In the current implementation, receiving a truncated non-last view
+  // simply stops parsing after that view.
+  BlockParser parser(ParseOptions::Defaults(), /*num_cols=*/3);
+  AssertParsePartial(parser, Views({"a,b,", "c\n"}), 0);
+  AssertParsePartial(parser, Views({"a,b,c\nd,", "e,f\n"}), 6);
+
+  // More sophisticated: non-last block ends on some newline inside a quoted string
+  // (terse reproducer of gh-39857)
+  AssertParsePartial(parser, Views({"a,b,\"c\n", "\"\n"}), 0);
+  AssertParsePartial(parser, Views({"a,b,c\n\"d\n", "\",e,f\n"}), 6);
+}
+
 TEST(BlockParser, Final) {
   // Tests for ParseFinal()
   BlockParser parser(ParseOptions::Defaults());
diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 332fad054fea3..1ac25e290a814 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -261,11 +261,10 @@ class SerialBlockReader : public BlockReader {
     auto consume_bytes = [this, bytes_before_buffer,
                           next_buffer](int64_t nbytes) -> Status {
       DCHECK_GE(nbytes, 0);
-      auto offset = nbytes - bytes_before_buffer;
-      if (offset < 0) {
-        // Should not happen
-        return Status::Invalid("CSV parser got out of sync with chunker");
-      }
+      int64_t offset = nbytes - bytes_before_buffer;
+      // All data before the buffer should have been consumed.
+      // This is checked in Parse() and BlockParsingOperator::operator().
+      DCHECK_GE(offset, 0);
       partial_ = SliceBuffer(buffer_, offset);
       buffer_ = next_buffer;
       return Status::OK();
@@ -400,6 +399,7 @@ class BlockParsingOperator {
         count_rows_(first_row >= 0),
         num_rows_seen_(first_row) {}
 
+  // TODO: this is almost entirely the same as ReaderMixin::Parse(). Refactor?
   Result<ParsedBlock> operator()(const CSVBlock& block) {
     constexpr int32_t max_num_rows = std::numeric_limits<int32_t>::max();
     auto parser = std::make_shared<BlockParser>(
@@ -427,9 +427,24 @@ class BlockParsingOperator {
     } else {
       RETURN_NOT_OK(parser->Parse(views, &parsed_size));
     }
+
+    // `partial + completion` should have been entirely consumed.
+    const int64_t bytes_before_buffer = block.partial->size() + block.completion->size();
+    if (static_cast<int64_t>(parsed_size) < bytes_before_buffer) {
+      // This can happen if `newlines_in_values` is not enabled and
+      // `partial + completion` ends with a newline inside a quoted string.
+      // In this case, the BlockParser stops at the truncated data in the first
+      // block (see gh-39857).
+      return Status::Invalid(
+          "CSV parser got out of sync with chunker. This can mean the data file "
+          "contains cell values spanning multiple lines; please consider enabling "
+          "the option 'newlines_in_values'.");
+    }
+
     if (count_rows_) {
       num_rows_seen_ += parser->total_num_rows();
     }
+
     RETURN_NOT_OK(block.consume_bytes(parsed_size));
     return ParsedBlock{std::move(parser), block.block_index,
                        static_cast<int64_t>(parsed_size) + block.bytes_skipped};
@@ -738,6 +753,15 @@ class ReaderMixin {
     } else {
       RETURN_NOT_OK(parser->Parse(views, &parsed_size));
     }
+    // See BlockParsingOperator for explanation.
+    const int64_t bytes_before_buffer = partial->size() + completion->size();
+    if (static_cast<int64_t>(parsed_size) < bytes_before_buffer) {
+      return Status::Invalid(
+          "CSV parser got out of sync with chunker. This can mean the data file "
+          "contains cell values spanning multiple lines; please consider enabling "
+          "the option 'newlines_in_values'.");
+    }
+
     if (count_rows_) {
       num_rows_seen_ += parser->total_num_rows();
     }
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 31f24187e3b37..bc1dd8a09a768 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -667,6 +667,31 @@ def row_num(x):
             'b': ["e", "j"],
         }
 
+    def test_chunker_out_of_sync(self):
+        # GH-39892: if there are newlines in values, the parser may become
+        # out of sync with the chunker. In this case, we try to produce an
+        # informative error message.
+        rows = b"""a,b,c\nd,e,"f\n"\ng,h,i\n"""
+        expected = {
+            'a': ["d", "g"],
+            'b': ["e", "h"],
+            'c': ["f\n", "i"],
+        }
+        for block_size in range(8, 15):
+            # Sanity check: parsing works with newlines_in_values=True
+            d = self.read_bytes(
+                rows, parse_options=ParseOptions(newlines_in_values=True),
+                read_options=ReadOptions(block_size=block_size)).to_pydict()
+            assert d == expected
+        # With these block sizes, a block would end on the physical newline
+        # inside the quoted cell value, leading to a mismatch between
+        # CSV chunker and parser.
+        for block_size in range(8, 11):
+            with pytest.raises(ValueError,
+                               match="cell values spanning multiple lines"):
+                self.read_bytes(
+                    rows, read_options=ReadOptions(block_size=block_size))
+
 
 class BaseCSVTableRead(BaseTestCSV):
 

From 0a05626f08836152526babf103aec95d0e4ec507 Mon Sep 17 00:00:00 2001
From: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
Date: Thu, 8 Feb 2024 00:01:46 +1100
Subject: [PATCH 327/570] MINOR: [Rust][Docs] Update Rust FlightSQL status doc
 (#39959)

<!--
Thanks for opening a pull request!
If this is your first pull request you can find detailed information on
how
to contribute here:
* [New Contributor's
Guide](https://arrow.apache.org/docs/dev/developers/guide/step_by_step/pr_lifecycle.html#reviews-and-merge-of-the-pull-request)
* [Contributing
Overview](https://arrow.apache.org/docs/dev/developers/overview.html)


If this is not a [minor
PR](https://github.com/apache/arrow/blob/main/CONTRIBUTING.md#Minor-Fixes).
Could you open an issue for this pull request on GitHub?
https://github.com/apache/arrow/issues/new/choose

Opening GitHub issues ahead of time contributes to the
[Openness](http://theapacheway.com/open/#:~:text=Openness%20allows%20new%20users%20the,must%20happen%20in%20the%20open.)
of the Apache Arrow project.

Then could you also rename the pull request title in the following
format?

    GH-${GITHUB_ISSUE_ID}: [${COMPONENT}] ${SUMMARY}

or

    MINOR: [${COMPONENT}] ${SUMMARY}

In the case of PARQUET issues on JIRA the title also supports:

    PARQUET-${JIRA_ISSUE_ID}: [${COMPONENT}] ${SUMMARY}

-->

### Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

Updating arrow-rs FlightSQL support status on site:
https://arrow.apache.org/docs/status.html#flight-sql

arrow-rs issue: https://github.com/apache/arrow-rs/issues/4337

### What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

### Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

### Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please uncomment the
line below and explain which changes are breaking.
-->
<!-- **This PR includes breaking changes to public APIs.** -->

<!--
Please uncomment the line below (and provide explanation) if the changes
fix either (a) a security vulnerability, (b) a bug that caused incorrect
or invalid data to be produced, or (c) a bug that causes a crash (even
when the API contract is upheld). We use this to highlight fixes to
issues that may affect users without their knowledge. For this reason,
fixing bugs that cause errors don't count, since those are usually
obvious.
-->
<!-- **This PR contains a "Critical Fix".** -->
---
 docs/source/status.rst | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/docs/source/status.rst b/docs/source/status.rst
index 11dd9c2c2965c..a0375585dbee2 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -257,9 +257,9 @@ support/not support individual features.
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | CancelQuery                                | ✓     | ✓     |       |            |       |       |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| ClosePreparedStatement                     | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| ClosePreparedStatement                     | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| CreatePreparedStatement                    | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| CreatePreparedStatement                    | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | CreatePreparedSubstraitPlan                | ✓     | ✓     |       |            |       |       |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -267,35 +267,35 @@ support/not support individual features.
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | EndTransaction                             | ✓     | ✓     |       |            |       |       |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetCatalogs                                | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetCatalogs                                | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetCrossReference                          | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetCrossReference                          | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetDbSchemas                               | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetDbSchemas                               | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetExportedKeys                            | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetExportedKeys                            | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetImportedKeys                            | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetImportedKeys                            | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetPrimaryKeys                             | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetPrimaryKeys                             | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetSqlInfo                                 | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetSqlInfo                                 | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetTables                                  | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetTables                                  | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetTableTypes                              | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetTableTypes                              | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| GetXdbcTypeInfo                            | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| GetXdbcTypeInfo                            | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| PreparedStatementQuery                     | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| PreparedStatementQuery                     | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| PreparedStatementUpdate                    | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| PreparedStatementUpdate                    | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | StatementSubstraitPlan                     | ✓     | ✓     |       |            |       |       |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| StatementQuery                             | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| StatementQuery                             | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| StatementUpdate                            | ✓     | ✓     | ✓     |            | ✓     |       |       |       |
+| StatementUpdate                            | ✓     | ✓     | ✓     |            | ✓     | ✓     |       |       |
 +--------------------------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 
 .. seealso::

From c9f6e04323a0b714487a0f707b46fc3c55b909e0 Mon Sep 17 00:00:00 2001
From: Adam Reeve <adreeve@gmail.com>
Date: Thu, 8 Feb 2024 02:32:16 +1300
Subject: [PATCH 328/570] GH-24834: [C#] Support writing compressed IPC data
 (#39871)

### Rationale for this change

This allows using compression when writing IPC streams and files with the Arrow .NET library

### What changes are included in this PR?

* Adds a compress method to the `ICompressionCodec` interface and implements this for Zstd and LZ4Frame in the `Apache.Arrow.Compression` package
* Adds new compression related options to `IpcOptions`
* Implements buffer compression in `ArrowStreamWriter`

### Are these changes tested?

Yes, new unit tests have been added

### Are there any user-facing changes?

Yes, this is a new user-facing feature and the `status.rst` and `csharp/README` files have been updated

* Closes: #24834

Authored-by: Adam Reeve <adreeve@gmail.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/README.md                              |   8 +-
 .../CompressionCodecFactory.cs                |   9 +-
 .../Lz4CompressionCodec.cs                    |  32 ++-
 .../ZstdCompressionCodec.cs                   |  22 ++-
 .../src/Apache.Arrow/Ipc/ArrowFileWriter.cs   |  10 +-
 .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 117 +++++++++--
 .../src/Apache.Arrow/Ipc/ICompressionCodec.cs |  15 ++
 .../Ipc/ICompressionCodecFactory.cs           |  21 ++
 csharp/src/Apache.Arrow/Ipc/IpcOptions.cs     |  17 ++
 .../Apache.Arrow.Compression.Tests.csproj     |   1 +
 .../ArrowFileWriterTests.cs                   | 147 ++++++++++++++
 .../ArrowStreamWriterTests.cs                 | 184 ++++++++++++++++++
 .../Apache.Arrow.IntegrationTest.csproj       |   1 +
 .../IntegrationCommand.cs                     |  10 +-
 dev/archery/archery/integration/runner.py     |   1 -
 docs/source/status.rst                        |   4 +-
 16 files changed, 564 insertions(+), 35 deletions(-)
 create mode 100644 csharp/test/Apache.Arrow.Compression.Tests/ArrowFileWriterTests.cs
 create mode 100644 csharp/test/Apache.Arrow.Compression.Tests/ArrowStreamWriterTests.cs

diff --git a/csharp/README.md b/csharp/README.md
index 6e6ed9c756873..b36eb899db2d5 100644
--- a/csharp/README.md
+++ b/csharp/README.md
@@ -115,10 +115,10 @@ for currently available features.
 
 ### Compression
 
-- Buffer compression is not supported when writing IPC files or streams
-- Buffer decompression is supported, but requires installing the `Apache.Arrow.Compression` package,
-  and passing an `Apache.Arrow.Compression.CompressionCodecFactory` instance to the
-  `ArrowFileReader` or `ArrowStreamReader` constructor.
+- Buffer compression and decompression is supported, but requires installing the `Apache.Arrow.Compression` package.
+  When reading compressed data, you must pass an `Apache.Arrow.Compression.CompressionCodecFactory` instance to the
+  `ArrowFileReader` or `ArrowStreamReader` constructor, and when writing compressed data a
+  `CompressionCodecFactory` must be set in the `IpcOptions`.
   Alternatively, a custom implementation of `ICompressionCodecFactory` can be used.
 
 ## Not Implemented
diff --git a/csharp/src/Apache.Arrow.Compression/CompressionCodecFactory.cs b/csharp/src/Apache.Arrow.Compression/CompressionCodecFactory.cs
index 3e0a537a89a8f..4bfcdf6544f9d 100644
--- a/csharp/src/Apache.Arrow.Compression/CompressionCodecFactory.cs
+++ b/csharp/src/Apache.Arrow.Compression/CompressionCodecFactory.cs
@@ -24,11 +24,16 @@ namespace Apache.Arrow.Compression
     public sealed class CompressionCodecFactory : ICompressionCodecFactory
     {
         public ICompressionCodec CreateCodec(CompressionCodecType compressionCodecType)
+        {
+            return CreateCodec(compressionCodecType, null);
+        }
+
+        public ICompressionCodec CreateCodec(CompressionCodecType compressionCodecType, int? compressionLevel)
         {
             return compressionCodecType switch
             {
-                CompressionCodecType.Lz4Frame => Lz4CompressionCodec.Instance,
-                CompressionCodecType.Zstd => new ZstdCompressionCodec(),
+                CompressionCodecType.Lz4Frame => new Lz4CompressionCodec(compressionLevel),
+                CompressionCodecType.Zstd => new ZstdCompressionCodec(compressionLevel),
                 _ => throw new NotImplementedException($"Compression type {compressionCodecType} is not supported")
             };
         }
diff --git a/csharp/src/Apache.Arrow.Compression/Lz4CompressionCodec.cs b/csharp/src/Apache.Arrow.Compression/Lz4CompressionCodec.cs
index ebbcfbc3e095f..df19c16a30213 100644
--- a/csharp/src/Apache.Arrow.Compression/Lz4CompressionCodec.cs
+++ b/csharp/src/Apache.Arrow.Compression/Lz4CompressionCodec.cs
@@ -14,17 +14,35 @@
 // limitations under the License.
 
 using System;
+using System.IO;
 using Apache.Arrow.Ipc;
+using K4os.Compression.LZ4;
 using K4os.Compression.LZ4.Streams;
 
 namespace Apache.Arrow.Compression
 {
     internal sealed class Lz4CompressionCodec : ICompressionCodec
     {
-        /// <summary>
-        /// Singleton instance, used as this class doesn't need to be disposed and has no state
-        /// </summary>
-        public static readonly Lz4CompressionCodec Instance = new Lz4CompressionCodec();
+        private readonly LZ4EncoderSettings _settings = null;
+
+        public Lz4CompressionCodec(int? compressionLevel = null)
+        {
+            if (compressionLevel.HasValue)
+            {
+                if (Enum.IsDefined(typeof(LZ4Level), compressionLevel))
+                {
+                    _settings = new LZ4EncoderSettings
+                    {
+                        CompressionLevel = (LZ4Level) compressionLevel,
+                    };
+                }
+                else
+                {
+                    throw new ArgumentException(
+                        $"Invalid LZ4 compression level ({compressionLevel})", nameof(compressionLevel));
+                }
+            }
+        }
 
         public int Decompress(ReadOnlyMemory<byte> source, Memory<byte> destination)
         {
@@ -32,6 +50,12 @@ public int Decompress(ReadOnlyMemory<byte> source, Memory<byte> destination)
             return decoder.ReadManyBytes(destination.Span);
         }
 
+        public void Compress(ReadOnlyMemory<byte> source, Stream destination)
+        {
+            using var encoder = LZ4Frame.Encode(destination, _settings, leaveOpen: true);
+            encoder.WriteManyBytes(source.Span);
+        }
+
         public void Dispose()
         {
         }
diff --git a/csharp/src/Apache.Arrow.Compression/ZstdCompressionCodec.cs b/csharp/src/Apache.Arrow.Compression/ZstdCompressionCodec.cs
index 92c2e65371612..cc340a7cd1b9f 100644
--- a/csharp/src/Apache.Arrow.Compression/ZstdCompressionCodec.cs
+++ b/csharp/src/Apache.Arrow.Compression/ZstdCompressionCodec.cs
@@ -14,6 +14,7 @@
 // limitations under the License.
 
 using System;
+using System.IO;
 using Apache.Arrow.Ipc;
 using ZstdSharp;
 
@@ -22,10 +23,21 @@ namespace Apache.Arrow.Compression
     internal sealed class ZstdCompressionCodec : ICompressionCodec
     {
         private readonly Decompressor _decompressor;
+        private readonly Compressor _compressor;
 
-        public ZstdCompressionCodec()
+        public ZstdCompressionCodec(int? compressionLevel = null)
         {
+            if (compressionLevel.HasValue &&
+                (compressionLevel.Value < Compressor.MinCompressionLevel ||
+                 compressionLevel.Value > Compressor.MaxCompressionLevel))
+            {
+                throw new ArgumentException(
+                    $"Zstd compression level must be between {Compressor.MinCompressionLevel} and {Compressor.MaxCompressionLevel}",
+                    nameof(compressionLevel));
+            }
+
             _decompressor = new Decompressor();
+            _compressor = new Compressor(compressionLevel ?? Compressor.DefaultCompressionLevel);
         }
 
         public int Decompress(ReadOnlyMemory<byte> source, Memory<byte> destination)
@@ -33,9 +45,17 @@ public int Decompress(ReadOnlyMemory<byte> source, Memory<byte> destination)
             return _decompressor.Unwrap(source.Span, destination.Span);
         }
 
+        public void Compress(ReadOnlyMemory<byte> source, Stream destination)
+        {
+            using var compressor = new CompressionStream(
+                destination, _compressor, preserveCompressor: true, leaveOpen: true);
+            compressor.Write(source.Span);
+        }
+
         public void Dispose()
         {
             _decompressor.Dispose();
+            _compressor.Dispose();
         }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
index 547fa800ec71e..a643012bab1a2 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowFileWriter.cs
@@ -20,6 +20,7 @@
 using System.IO;
 using System.Threading;
 using System.Threading.Tasks;
+using Apache.Arrow.Memory;
 
 namespace Apache.Arrow.Ipc
 {
@@ -37,12 +38,17 @@ public ArrowFileWriter(Stream stream, Schema schema)
         }
 
         public ArrowFileWriter(Stream stream, Schema schema, bool leaveOpen)
-            : this(stream, schema, leaveOpen, options: null)
+            : this(stream, schema, leaveOpen, options: null, allocator: null)
         {
         }
 
         public ArrowFileWriter(Stream stream, Schema schema, bool leaveOpen, IpcOptions options)
-            : base(stream, schema, leaveOpen, options)
+            : this(stream, schema, leaveOpen, options, allocator: null)
+        {
+        }
+
+        public ArrowFileWriter(Stream stream, Schema schema, bool leaveOpen, IpcOptions options, MemoryAllocator allocator)
+            : base(stream, schema, leaveOpen, options, allocator)
         {
             if (!stream.CanWrite)
             {
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 07d1dcfdb171d..b002f8c8b1578 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -22,6 +22,7 @@
 using System.Threading;
 using System.Threading.Tasks;
 using Apache.Arrow.Arrays;
+using Apache.Arrow.Memory;
 using Apache.Arrow.Types;
 using Google.FlatBuffers;
 
@@ -29,7 +30,7 @@ namespace Apache.Arrow.Ipc
 {
     public class ArrowStreamWriter : IDisposable
     {
-        internal class ArrowRecordBatchFlatBufferBuilder :
+        private class ArrowRecordBatchFlatBufferBuilder :
             IArrowArrayVisitor<Int8Array>,
             IArrowArrayVisitor<Int16Array>,
             IArrowArrayVisitor<Int32Array>,
@@ -81,14 +82,21 @@ public Buffer(ArrowBuffer buffer, int offset)
             }
 
             private readonly List<Buffer> _buffers;
+            private readonly ICompressionCodec _compressionCodec;
+            private readonly MemoryAllocator _allocator;
+            private readonly MemoryStream _compressionStream;
 
             public IReadOnlyList<Buffer> Buffers => _buffers;
 
             public List<long> VariadicCounts { get; private set; } 
             public int TotalLength { get; private set; }
 
-            public ArrowRecordBatchFlatBufferBuilder()
+            public ArrowRecordBatchFlatBufferBuilder(
+                ICompressionCodec compressionCodec, MemoryAllocator allocator, MemoryStream compressionStream)
             {
+                _compressionCodec = compressionCodec;
+                _compressionStream = compressionStream;
+                _allocator = allocator;
                 _buffers = new List<Buffer>();
                 TotalLength = 0;
             }
@@ -238,11 +246,50 @@ private void CreateBuffers<T>(PrimitiveArray<T> array)
             private Buffer CreateBuffer(ArrowBuffer buffer)
             {
                 int offset = TotalLength;
+                const int UncompressedLengthSize = 8;
 
-                int paddedLength = checked((int)BitUtility.RoundUpToMultipleOf8(buffer.Length));
+                ArrowBuffer bufferToWrite;
+                if (_compressionCodec == null)
+                {
+                    bufferToWrite = buffer;
+                }
+                else if (buffer.Length == 0)
+                {
+                    // Write zero length and skip compression
+                    var uncompressedLengthBytes = _allocator.Allocate(UncompressedLengthSize);
+                    BinaryPrimitives.WriteInt64LittleEndian(uncompressedLengthBytes.Memory.Span, 0);
+                    bufferToWrite = new ArrowBuffer(uncompressedLengthBytes);
+                }
+                else
+                {
+                    // See format/Message.fbs, and the BUFFER BodyCompressionMethod for documentation on how
+                    // compressed buffers are stored.
+                    _compressionStream.Seek(0, SeekOrigin.Begin);
+                    _compressionStream.SetLength(0);
+                    _compressionCodec.Compress(buffer.Memory, _compressionStream);
+                    if (_compressionStream.Length < buffer.Length)
+                    {
+                        var newBuffer = _allocator.Allocate((int) _compressionStream.Length + UncompressedLengthSize);
+                        BinaryPrimitives.WriteInt64LittleEndian(newBuffer.Memory.Span, buffer.Length);
+                        _compressionStream.Seek(0, SeekOrigin.Begin);
+                        _compressionStream.ReadFullBuffer(newBuffer.Memory.Slice(UncompressedLengthSize));
+                        bufferToWrite = new ArrowBuffer(newBuffer);
+                    }
+                    else
+                    {
+                        // If the compressed buffer is larger than the uncompressed buffer, use the uncompressed
+                        // buffer instead, and indicate this by setting the uncompressed length to -1
+                        var newBuffer = _allocator.Allocate(buffer.Length + UncompressedLengthSize);
+                        BinaryPrimitives.WriteInt64LittleEndian(newBuffer.Memory.Span, -1);
+                        buffer.Memory.CopyTo(newBuffer.Memory.Slice(UncompressedLengthSize));
+                        bufferToWrite = new ArrowBuffer(newBuffer);
+                    }
+                }
+
+                int paddedLength = checked((int)BitUtility.RoundUpToMultipleOf8(bufferToWrite.Length));
                 TotalLength += paddedLength;
 
-                return new Buffer(buffer, offset);
+                return new Buffer(bufferToWrite, offset);
             }
 
             public void Visit(IArrowArray array)
@@ -269,6 +316,9 @@ public void Visit(IArrowArray array)
 
         private readonly bool _leaveOpen;
         private readonly IpcOptions _options;
+        private readonly MemoryAllocator _allocator;
+        // Reuse a single memory stream for writing compressed data to, to reduce memory allocations
+        private readonly MemoryStream _compressionStream = new MemoryStream();
 
         private protected const Flatbuf.MetadataVersion CurrentMetadataVersion = Flatbuf.MetadataVersion.V5;
 
@@ -285,15 +335,21 @@ public ArrowStreamWriter(Stream baseStream, Schema schema)
         }
 
         public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen)
-            : this(baseStream, schema, leaveOpen, options: null)
+            : this(baseStream, schema, leaveOpen, options: null, allocator: null)
         {
         }
 
         public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen, IpcOptions options)
+            : this(baseStream, schema, leaveOpen, options, allocator: null)
+        {
+        }
+
+        public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen, IpcOptions options, MemoryAllocator allocator)
         {
             BaseStream = baseStream ?? throw new ArgumentNullException(nameof(baseStream));
             Schema = schema ?? throw new ArgumentNullException(nameof(schema));
             _leaveOpen = leaveOpen;
+            _allocator = allocator ?? MemoryAllocator.Default.Value;
 
             Buffers = ArrayPool<byte>.Create();
             Builder = new FlatBufferBuilder(1024);
@@ -301,6 +357,13 @@ public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen, IpcOp
 
             _fieldTypeBuilder = new ArrowTypeFlatbufferBuilder(Builder);
             _options = options ?? IpcOptions.Default;
+
+            if (_options.CompressionCodec.HasValue && _options.CompressionCodecFactory == null)
+            {
+                throw new ArgumentException(
+                    $"A {nameof(_options.CompressionCodecFactory)} must be provided when a {nameof(_options.CompressionCodec)} is specified",
+                    nameof(options));
+            }
         }
 
         private void CreateSelfAndChildrenFieldNodes(ArrayData data)
@@ -326,6 +389,23 @@ private static int CountAllNodes(IReadOnlyList<Field> fields)
             return count;
         }
 
+        private Offset<Flatbuf.BodyCompression> GetBodyCompression()
+        {
+            if (_options.CompressionCodec == null)
+            {
+                return default;
+            }
+
+            var compressionType = _options.CompressionCodec.Value switch
+            {
+                CompressionCodecType.Lz4Frame => Flatbuf.CompressionType.LZ4_FRAME,
+                CompressionCodecType.Zstd => Flatbuf.CompressionType.ZSTD,
+                _ => throw new ArgumentOutOfRangeException()
+            };
+            return Flatbuf.BodyCompression.CreateBodyCompression(
+                Builder, compressionType, Flatbuf.BodyCompressionMethod.BUFFER);
+        }
+
         private static void CountSelfAndChildrenNodes(IArrowType type, ref int count)
         {
             if (type is NestedType nestedType)
@@ -356,7 +436,7 @@ private protected void WriteRecordBatchInternal(RecordBatch recordBatch)
             }
 
             (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
-                PreparingWritingRecordBatch(recordBatch);
+                PrepareWritingRecordBatch(recordBatch);
 
             VectorOffset buffersVectorOffset = Builder.EndVector();
 
@@ -367,7 +447,7 @@ private protected void WriteRecordBatchInternal(RecordBatch recordBatch)
             Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                 fieldNodesVectorOffset,
                 buffersVectorOffset,
-                default,
+                GetBodyCompression(),
                 variadicCountsOffset);
 
             long metadataLength = WriteMessage(Flatbuf.MessageHeader.RecordBatch,
@@ -397,7 +477,7 @@ private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBat
             }
 
             (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
-                PreparingWritingRecordBatch(recordBatch);
+                PrepareWritingRecordBatch(recordBatch);
 
             VectorOffset buffersVectorOffset = Builder.EndVector();
 
@@ -408,7 +488,7 @@ private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBat
             Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                 fieldNodesVectorOffset,
                 buffersVectorOffset,
-                default,
+                GetBodyCompression(),
                 variadicCountsOffset);
 
             long metadataLength = await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch,
@@ -482,12 +562,12 @@ private async ValueTask<long> WriteBufferDataAsync(IReadOnlyList<ArrowRecordBatc
             return bodyLength + bodyPaddingLength;
         }
 
-        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> PreparingWritingRecordBatch(RecordBatch recordBatch)
+        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> PrepareWritingRecordBatch(RecordBatch recordBatch)
         {
-            return PreparingWritingRecordBatch(recordBatch.Schema.FieldsList, recordBatch.ArrayList);
+            return PrepareWritingRecordBatch(recordBatch.Schema.FieldsList, recordBatch.ArrayList);
         }
 
-        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> PreparingWritingRecordBatch(IReadOnlyList<Field> fields, IReadOnlyList<IArrowArray> arrays)
+        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> PrepareWritingRecordBatch(IReadOnlyList<Field> fields, IReadOnlyList<IArrowArray> arrays)
         {
             Builder.Clear();
 
@@ -507,7 +587,13 @@ private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> Pre
 
             // Serialize buffers
 
-            var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder();
+            // CompressionCodec can be disposed after all data is visited by the builder,
+            // and doesn't need to be alive for the full lifetime of the ArrowRecordBatchFlatBufferBuilder
+            using var compressionCodec = _options.CompressionCodec.HasValue
+                ? _options.CompressionCodecFactory.CreateCodec(_options.CompressionCodec.Value, _options.CompressionLevel)
+                : null;
+
+            var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder(compressionCodec, _allocator, _compressionStream);
             for (int i = 0; i < fieldCount; i++)
             {
                 IArrowArray fieldArray = arrays[i];
@@ -599,7 +685,7 @@ private protected async Task WriteDictionaryAsync(long id, IArrowType valueType,
             var arrays = new List<IArrowArray> { dictionary };
 
             (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
-                PreparingWritingRecordBatch(fields, arrays);
+                PrepareWritingRecordBatch(fields, arrays);
 
             VectorOffset buffersVectorOffset = Builder.EndVector();
 
@@ -607,7 +693,7 @@ private protected async Task WriteDictionaryAsync(long id, IArrowType valueType,
             Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, dictionary.Length,
                 fieldNodesVectorOffset,
                 buffersVectorOffset,
-                default,
+                GetBodyCompression(),
                 variadicCountsOffset);
 
             // TODO: Support delta.
@@ -994,6 +1080,7 @@ public virtual void Dispose()
             {
                 BaseStream.Dispose();
             }
+            _compressionStream.Dispose();
         }
     }
 
diff --git a/csharp/src/Apache.Arrow/Ipc/ICompressionCodec.cs b/csharp/src/Apache.Arrow/Ipc/ICompressionCodec.cs
index b18ca3a5e4190..16c01d7130fb5 100644
--- a/csharp/src/Apache.Arrow/Ipc/ICompressionCodec.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ICompressionCodec.cs
@@ -14,6 +14,7 @@
 // limitations under the License.
 
 using System;
+using System.IO;
 
 namespace Apache.Arrow.Ipc
 {
@@ -29,5 +30,19 @@ public interface ICompressionCodec : IDisposable
         /// <param name="destination">Data buffer to write decompressed data to</param>
         /// <returns>The number of decompressed bytes written into the destination</returns>
         int Decompress(ReadOnlyMemory<byte> source, Memory<byte> destination);
+
+        /// <summary>
+        /// Write compressed data
+        /// </summary>
+        /// <param name="source">The data to compress</param>
+        /// <param name="destination">The stream to write compressed data to</param>
+        void Compress(ReadOnlyMemory<byte> source, Stream destination)
+#if NET6_0_OR_GREATER
+        {
+            throw new NotImplementedException("This codec does not support compression");
+        }
+#else
+        ;
+#endif
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/ICompressionCodecFactory.cs b/csharp/src/Apache.Arrow/Ipc/ICompressionCodecFactory.cs
index 5422a033bd6d2..f367b15574b6e 100644
--- a/csharp/src/Apache.Arrow/Ipc/ICompressionCodecFactory.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ICompressionCodecFactory.cs
@@ -20,6 +20,27 @@ namespace Apache.Arrow.Ipc
     /// </summary>
     public interface ICompressionCodecFactory
     {
+        /// <summary>
+        /// Create a new compression codec
+        /// </summary>
+        /// <param name="compressionCodecType">The type of codec to create</param>
+        /// <returns>The created codec</returns>
         ICompressionCodec CreateCodec(CompressionCodecType compressionCodecType);
+
+        /// <summary>
+        /// Create a new compression codec with a specified compression level
+        /// </summary>
+        /// <param name="compressionCodecType">The type of codec to create</param>
+        /// <param name="compressionLevel">The compression level to use when compressing data</param>
+        /// <returns>The created codec</returns>
+        ICompressionCodec CreateCodec(CompressionCodecType compressionCodecType, int? compressionLevel)
+#if NET6_0_OR_GREATER
+        {
+            // Default implementation ignores the compression level
+            return CreateCodec(compressionCodecType);
+        }
+#else
+        ;
+#endif
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/IpcOptions.cs b/csharp/src/Apache.Arrow/Ipc/IpcOptions.cs
index b6cc3a1cb4b51..8484c9a04ab2d 100644
--- a/csharp/src/Apache.Arrow/Ipc/IpcOptions.cs
+++ b/csharp/src/Apache.Arrow/Ipc/IpcOptions.cs
@@ -25,6 +25,23 @@ public class IpcOptions
         /// </summary>
         public bool WriteLegacyIpcFormat { get; set; }
 
+        /// <summary>
+        /// The compression codec to use to compress data buffers.
+        /// If null (the default value), no compression is used.
+        /// </summary>
+        public CompressionCodecType? CompressionCodec { get; set; }
+
+        /// <summary>
+        /// The compression codec factory used to create compression codecs.
+        /// Must be provided if a CompressionCodec is specified.
+        /// </summary>
+        public ICompressionCodecFactory CompressionCodecFactory { get; set; }
+
+        /// <summary>
+        /// Sets the compression level to use for codecs that support this.
+        /// </summary>
+        public int? CompressionLevel { get; set; }
+
         public IpcOptions()
         {
         }
diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index 8ed7a93bdcf27..f5e2a0ef8e16e 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -15,6 +15,7 @@
   <ItemGroup>
     <ProjectReference Include="..\..\src\Apache.Arrow\Apache.Arrow.csproj" />
     <ProjectReference Include="..\..\src\Apache.Arrow.Compression\Apache.Arrow.Compression.csproj" />
+    <ProjectReference Include="..\Apache.Arrow.Tests\Apache.Arrow.Tests.csproj" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Compression.Tests/ArrowFileWriterTests.cs b/csharp/test/Apache.Arrow.Compression.Tests/ArrowFileWriterTests.cs
new file mode 100644
index 0000000000000..a237f9c1d0660
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Compression.Tests/ArrowFileWriterTests.cs
@@ -0,0 +1,147 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading.Tasks;
+using Apache.Arrow.Ipc;
+using Apache.Arrow.Tests;
+using K4os.Compression.LZ4;
+using Xunit;
+
+namespace Apache.Arrow.Compression.Tests
+{
+    public class ArrowFileWriterTests
+    {
+        [Fact]
+        public void ThrowsWhenNoCompressionFactoryProvided()
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var options = new IpcOptions
+            {
+                CompressionCodec = CompressionCodecType.Zstd,
+            };
+
+            using var stream = new MemoryStream();
+            var exception = Assert.Throws<ArgumentException>(
+                () => new ArrowFileWriter(stream, batch.Schema, leaveOpen: true, options));
+
+            Assert.Contains("A CompressionCodecFactory must be provided", exception.Message);
+        }
+
+        [Theory]
+        [InlineData(CompressionCodecType.Zstd, null)]
+        [InlineData(CompressionCodecType.Zstd, 2)]
+        [InlineData(CompressionCodecType.Lz4Frame, null)]
+        [InlineData(CompressionCodecType.Lz4Frame, (int)LZ4Level.L03_HC)]
+        public void CanWriteCompressedIpcFile(CompressionCodecType codec, int? compressionLevel)
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var codecFactory = new CompressionCodecFactory();
+            var options = new IpcOptions
+            {
+                CompressionCodecFactory = codecFactory,
+                CompressionCodec = codec,
+                CompressionLevel = compressionLevel,
+            };
+            TestRoundTripRecordBatches(new [] {batch}, options, codecFactory);
+        }
+
+        [Theory]
+        [InlineData(CompressionCodecType.Zstd)]
+        [InlineData(CompressionCodecType.Lz4Frame)]
+        public async Task CanWriteCompressedIpcFileAsync(CompressionCodecType codec)
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var codecFactory = new CompressionCodecFactory();
+            var options = new IpcOptions
+            {
+                CompressionCodecFactory = codecFactory,
+                CompressionCodec = codec,
+            };
+            await TestRoundTripRecordBatchesAsync(new [] {batch}, options, codecFactory);
+        }
+
+        private static void TestRoundTripRecordBatches(
+            IReadOnlyList<RecordBatch> originalBatches, IpcOptions options, ICompressionCodecFactory codecFactory)
+        {
+            using var stream = new MemoryStream();
+
+            using (var writer = new ArrowFileWriter(stream, originalBatches[0].Schema, leaveOpen: true, options))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    writer.WriteRecordBatch(originalBatch);
+                }
+                writer.WriteEnd();
+            }
+
+            // Should throw if trying to read without an ICompressionCodecFactory
+            stream.Position = 0;
+            var exception = Assert.Throws<Exception>(() =>
+            {
+                using var reader = new ArrowFileReader(stream, leaveOpen: true);
+                reader.ReadNextRecordBatch();
+            });
+            Assert.Contains(nameof(ICompressionCodecFactory), exception.Message);
+
+            stream.Position = 0;
+            using (var reader = new ArrowFileReader(stream, codecFactory))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    var newBatch = reader.ReadNextRecordBatch();
+                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
+                }
+            }
+        }
+
+        private static async Task TestRoundTripRecordBatchesAsync(
+            IReadOnlyList<RecordBatch> originalBatches, IpcOptions options, ICompressionCodecFactory codecFactory)
+        {
+            using var stream = new MemoryStream();
+
+            using (var writer = new ArrowFileWriter(stream, originalBatches[0].Schema, leaveOpen: true, options))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    await writer.WriteRecordBatchAsync(originalBatch);
+                }
+                await writer.WriteEndAsync();
+            }
+
+            // Should throw if trying to read without an ICompressionCodecFactory
+            stream.Position = 0;
+            var exception = await Assert.ThrowsAsync<Exception>(async () =>
+            {
+                using var reader = new ArrowFileReader(stream, leaveOpen: true);
+                await reader.ReadNextRecordBatchAsync();
+            });
+            Assert.Contains(nameof(ICompressionCodecFactory), exception.Message);
+
+            stream.Position = 0;
+            using (var reader = new ArrowFileReader(stream, codecFactory))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    var newBatch = await reader.ReadNextRecordBatchAsync();
+                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
+                }
+            }
+        }
+    }
+}
+
diff --git a/csharp/test/Apache.Arrow.Compression.Tests/ArrowStreamWriterTests.cs b/csharp/test/Apache.Arrow.Compression.Tests/ArrowStreamWriterTests.cs
new file mode 100644
index 0000000000000..3b09dc26a343f
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Compression.Tests/ArrowStreamWriterTests.cs
@@ -0,0 +1,184 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading.Tasks;
+using Apache.Arrow.Ipc;
+using Apache.Arrow.Tests;
+using K4os.Compression.LZ4;
+using Xunit;
+
+namespace Apache.Arrow.Compression.Tests
+{
+    public class ArrowStreamWriterTests
+    {
+        [Fact]
+        public void ThrowsWhenNoCompressionFactoryProvided()
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var options = new IpcOptions
+            {
+                CompressionCodec = CompressionCodecType.Zstd,
+            };
+
+            using var stream = new MemoryStream();
+            var exception = Assert.Throws<ArgumentException>(
+                () => new ArrowStreamWriter(stream, batch.Schema, leaveOpen: true, options));
+
+            Assert.Contains("A CompressionCodecFactory must be provided", exception.Message);
+        }
+
+        [Theory]
+        [InlineData(CompressionCodecType.Zstd, null)]
+        [InlineData(CompressionCodecType.Zstd, 2)]
+        [InlineData(CompressionCodecType.Lz4Frame, null)]
+        [InlineData(CompressionCodecType.Lz4Frame, (int)LZ4Level.L03_HC)]
+        public void CanWriteCompressedIpcStream(CompressionCodecType codec, int? compressionLevel)
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var codecFactory = new CompressionCodecFactory();
+            var options = new IpcOptions
+            {
+                CompressionCodecFactory = codecFactory,
+                CompressionCodec = codec,
+                CompressionLevel = compressionLevel,
+            };
+            TestRoundTripRecordBatches(new [] {batch}, options, codecFactory);
+        }
+
+        [Theory]
+        [InlineData(CompressionCodecType.Zstd)]
+        [InlineData(CompressionCodecType.Lz4Frame)]
+        public async Task CanWriteCompressedIpcStreamAsync(CompressionCodecType codec)
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var codecFactory = new CompressionCodecFactory();
+            var options = new IpcOptions
+            {
+                CompressionCodecFactory = codecFactory,
+                CompressionCodec = codec,
+            };
+            await TestRoundTripRecordBatchesAsync(new [] {batch}, options, codecFactory);
+        }
+
+        [Fact]
+        public void CanWriteEmptyBatches()
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 0);
+            var codecFactory = new CompressionCodecFactory();
+            var options = new IpcOptions
+            {
+                CompressionCodecFactory = codecFactory,
+                CompressionCodec = CompressionCodecType.Lz4Frame,
+            };
+            TestRoundTripRecordBatches(new [] {batch}, options, codecFactory);
+        }
+
+        [Theory]
+        [InlineData(CompressionCodecType.Zstd)]
+        [InlineData(CompressionCodecType.Lz4Frame)]
+        public void ThrowsForInvalidCompressionLevel(CompressionCodecType codec)
+        {
+            var batch = TestData.CreateSampleRecordBatch(length: 100);
+            var codecFactory = new CompressionCodecFactory();
+            var options = new IpcOptions
+            {
+                CompressionCodecFactory = codecFactory,
+                CompressionCodec = codec,
+                CompressionLevel = 12345,
+            };
+
+            using var stream = new MemoryStream();
+
+            Assert.Throws<ArgumentException>(() =>
+            {
+                using var writer = new ArrowStreamWriter(stream, batch.Schema, leaveOpen: false, options);
+                writer.WriteRecordBatch(batch);
+                writer.WriteEnd();
+            });
+        }
+
+        private static void TestRoundTripRecordBatches(
+            IReadOnlyList<RecordBatch> originalBatches, IpcOptions options, ICompressionCodecFactory codecFactory)
+        {
+            using var stream = new MemoryStream();
+
+            using (var writer = new ArrowStreamWriter(stream, originalBatches[0].Schema, leaveOpen: true, options))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    writer.WriteRecordBatch(originalBatch);
+                }
+                writer.WriteEnd();
+            }
+
+            // Should throw if trying to read without an ICompressionCodecFactory
+            stream.Position = 0;
+            var exception = Assert.Throws<Exception>(() =>
+            {
+                using var reader = new ArrowStreamReader(stream, leaveOpen: true);
+                reader.ReadNextRecordBatch();
+            });
+            Assert.Contains(nameof(ICompressionCodecFactory), exception.Message);
+
+            stream.Position = 0;
+            using (var reader = new ArrowStreamReader(stream, codecFactory))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    var newBatch = reader.ReadNextRecordBatch();
+                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
+                }
+            }
+        }
+
+        private static async Task TestRoundTripRecordBatchesAsync(
+            IReadOnlyList<RecordBatch> originalBatches, IpcOptions options, ICompressionCodecFactory codecFactory)
+        {
+            using var stream = new MemoryStream();
+
+            using (var writer = new ArrowStreamWriter(stream, originalBatches[0].Schema, leaveOpen: true, options))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    await writer.WriteRecordBatchAsync(originalBatch);
+                }
+                await writer.WriteEndAsync();
+            }
+
+            // Should throw if trying to read without an ICompressionCodecFactory
+            stream.Position = 0;
+            var exception = await Assert.ThrowsAsync<Exception>(async () =>
+            {
+                using var reader = new ArrowStreamReader(stream, leaveOpen: true);
+                await reader.ReadNextRecordBatchAsync();
+            });
+            Assert.Contains(nameof(ICompressionCodecFactory), exception.Message);
+
+            stream.Position = 0;
+            using (var reader = new ArrowStreamReader(stream, codecFactory))
+            {
+                foreach (var originalBatch in originalBatches)
+                {
+                    var newBatch = await reader.ReadNextRecordBatchAsync();
+                    ArrowReaderVerifier.CompareBatches(originalBatch, newBatch);
+                }
+            }
+        }
+    }
+}
+
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj b/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj
index cb7f7ae896ee2..e77f329bf2a15 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj
+++ b/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj
@@ -10,6 +10,7 @@
   <ItemGroup>
     <PackageReference Include="System.CommandLine" Version="2.0.0-beta1.21216.1" />
     <PackageReference Include="System.Text.Json" Version="5.0.2" />
+    <ProjectReference Include="..\..\src\Apache.Arrow.Compression\Apache.Arrow.Compression.csproj" />
     <ProjectReference Include="..\..\src\Apache.Arrow\Apache.Arrow.csproj" />
     <ProjectReference Include="..\Apache.Arrow.Tests\Apache.Arrow.Tests.csproj" />
   </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
index 6a1e91240989b..3886846833c27 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
@@ -16,6 +16,7 @@
 using System;
 using System.IO;
 using System.Threading.Tasks;
+using Apache.Arrow.Compression;
 using Apache.Arrow.Ipc;
 using Apache.Arrow.Tests;
 using Apache.Arrow.Types;
@@ -65,8 +66,9 @@ private async Task<int> Validate()
         {
             JsonFile jsonFile = await ParseJsonFile();
 
+            var compressionFactory = new CompressionCodecFactory();
             using FileStream arrowFileStream = ArrowFileInfo.OpenRead();
-            using ArrowFileReader reader = new ArrowFileReader(arrowFileStream);
+            using ArrowFileReader reader = new ArrowFileReader(arrowFileStream, compressionCodecFactory: compressionFactory);
             int batchCount = await reader.RecordBatchCountAsync();
 
             if (batchCount != jsonFile.Batches.Count)
@@ -122,7 +124,8 @@ private async Task<int> JsonToArrow()
 
         private async Task<int> StreamToFile()
         {
-            using ArrowStreamReader reader = new ArrowStreamReader(Console.OpenStandardInput());
+            var compressionFactory = new CompressionCodecFactory();
+            using ArrowStreamReader reader = new ArrowStreamReader(Console.OpenStandardInput(), compressionCodecFactory: compressionFactory);
 
             RecordBatch batch = await reader.ReadNextRecordBatchAsync();
 
@@ -145,7 +148,8 @@ private async Task<int> StreamToFile()
         private async Task<int> FileToStream()
         {
             using FileStream fileStream = ArrowFileInfo.OpenRead();
-            using ArrowFileReader fileReader = new ArrowFileReader(fileStream);
+            var compressionFactory = new CompressionCodecFactory();
+            using ArrowFileReader fileReader = new ArrowFileReader(fileStream, compressionCodecFactory: compressionFactory);
 
             // read the record batch count to initialize the Schema
             await fileReader.RecordBatchCountAsync();
diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index 7fadb7e47cf93..299983f62f283 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -158,7 +158,6 @@ def _gold_tests(self, gold_dir):
                 skip_testers.add("JS")
                 skip_testers.add("Rust")
             if prefix == '2.0.0-compression':
-                skip_testers.add("C#")
                 skip_testers.add("JS")
 
             # See https://github.com/apache/arrow/pull/9822 for how to
diff --git a/docs/source/status.rst b/docs/source/status.rst
index a0375585dbee2..4bff37c8527fa 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -145,7 +145,7 @@ IPC Format
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Sparse tensors              | ✓     |       |       |            |       |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Buffer compression          | ✓     | ✓ (3) | ✓     |            | ✓ (4) |  ✓    | ✓     |       |
+| Buffer compression          | ✓     | ✓ (3) | ✓     |            | ✓     |  ✓    | ✓     |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Endianness conversion       | ✓ (2) |       | ✓ (2) |            |       |       |       |       |
 +-----------------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -160,8 +160,6 @@ Notes:
 
 * \(3) LZ4 Codec currently is quite inefficient. ARROW-11901 tracks improving performance.
 
-* \(4) Compression when writing is not supported, only decompression when reading.
-
 .. seealso::
    The :ref:`format-ipc` specification.
 

From 478755f0ef79a921aaa14822c8829c122bbbe92e Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 7 Feb 2024 09:45:36 -0400
Subject: [PATCH 329/570] GH-38717: [C++] Add ImportChunkedArray and
 ExportChunkedArray to/from ArrowArrayStream (#39455)

### Rationale for this change

The `ChunkedArray` has no equivalent in the C data interface; however, it is the primary array structure that higher level bindings interact with (because it is a column in a `Table`). In the Python capsule interface, this means that ChunkedArrays always require a workaround involving loops in Python.

### What changes are included in this PR?

- Added `ImportChunkedArray()` and `ExportChunkedArray()`
- Generalized the classes that support import/export to relax the assumption that every `ArrowArray` in an `ArrowArrayStream` is a `RecordBatch`.

### Are these changes tested?

TODO

### Are there any user-facing changes?

Yes, two new functions are added to bridge.h.
* Closes: #38717

Lead-authored-by: Dewey Dunnington <dewey@voltrondata.com>
Co-authored-by: Dewey Dunnington <dewey@fishandwhistle.net>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Dewey Dunnington <dewey@voltrondata.com>
---
 cpp/src/arrow/c/bridge.cc      | 252 +++++++++++++++++++++++++--------
 cpp/src/arrow/c/bridge.h       |  22 +++
 cpp/src/arrow/c/bridge_test.cc | 115 +++++++++++++++
 3 files changed, 328 insertions(+), 61 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 119249da99a6d..022fce72f59b8 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -2002,13 +2002,49 @@ Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
 
 namespace {
 
+Status ExportStreamSchema(const std::shared_ptr<RecordBatchReader>& src,
+                          struct ArrowSchema* out_schema) {
+  return ExportSchema(*src->schema(), out_schema);
+}
+
+Status ExportStreamSchema(const std::shared_ptr<ChunkedArray>& src,
+                          struct ArrowSchema* out_schema) {
+  return ExportType(*src->type(), out_schema);
+}
+
+Status ExportStreamNext(const std::shared_ptr<RecordBatchReader>& src, int64_t i,
+                        struct ArrowArray* out_array) {
+  std::shared_ptr<RecordBatch> batch;
+  RETURN_NOT_OK(src->ReadNext(&batch));
+  if (batch == nullptr) {
+    // End of stream
+    ArrowArrayMarkReleased(out_array);
+    return Status::OK();
+  } else {
+    return ExportRecordBatch(*batch, out_array);
+  }
+}
+
+Status ExportStreamNext(const std::shared_ptr<ChunkedArray>& src, int64_t i,
+                        struct ArrowArray* out_array) {
+  if (i >= src->num_chunks()) {
+    // End of stream
+    ArrowArrayMarkReleased(out_array);
+    return Status::OK();
+  } else {
+    return ExportArray(*src->chunk(static_cast<int>(i)), out_array);
+  }
+}
+
+template <typename T>
 class ExportedArrayStream {
  public:
   struct PrivateData {
-    explicit PrivateData(std::shared_ptr<RecordBatchReader> reader)
-        : reader_(std::move(reader)) {}
+    explicit PrivateData(std::shared_ptr<T> reader)
+        : reader_(std::move(reader)), batch_num_(0) {}
 
-    std::shared_ptr<RecordBatchReader> reader_;
+    std::shared_ptr<T> reader_;
+    int64_t batch_num_;
     std::string last_error_;
 
     PrivateData() = default;
@@ -2018,19 +2054,11 @@ class ExportedArrayStream {
   explicit ExportedArrayStream(struct ArrowArrayStream* stream) : stream_(stream) {}
 
   Status GetSchema(struct ArrowSchema* out_schema) {
-    return ExportSchema(*reader()->schema(), out_schema);
+    return ExportStreamSchema(reader(), out_schema);
   }
 
   Status GetNext(struct ArrowArray* out_array) {
-    std::shared_ptr<RecordBatch> batch;
-    RETURN_NOT_OK(reader()->ReadNext(&batch));
-    if (batch == nullptr) {
-      // End of stream
-      ArrowArrayMarkReleased(out_array);
-      return Status::OK();
-    } else {
-      return ExportRecordBatch(*batch, out_array);
-    }
+    return ExportStreamNext(reader(), next_batch_num(), out_array);
   }
 
   const char* GetLastError() {
@@ -2070,6 +2098,15 @@ class ExportedArrayStream {
     return ExportedArrayStream{stream}.GetLastError();
   }
 
+  static Status Make(std::shared_ptr<T> reader, struct ArrowArrayStream* out) {
+    out->get_schema = ExportedArrayStream::StaticGetSchema;
+    out->get_next = ExportedArrayStream::StaticGetNext;
+    out->get_last_error = ExportedArrayStream::StaticGetLastError;
+    out->release = ExportedArrayStream::StaticRelease;
+    out->private_data = new ExportedArrayStream::PrivateData{std::move(reader)};
+    return Status::OK();
+  }
+
  private:
   int ToCError(const Status& status) {
     if (ARROW_PREDICT_TRUE(status.ok())) {
@@ -2093,7 +2130,9 @@ class ExportedArrayStream {
     return reinterpret_cast<PrivateData*>(stream_->private_data);
   }
 
-  const std::shared_ptr<RecordBatchReader>& reader() { return private_data()->reader_; }
+  const std::shared_ptr<T>& reader() { return private_data()->reader_; }
+
+  int64_t next_batch_num() { return private_data()->batch_num_++; }
 
   struct ArrowArrayStream* stream_;
 };
@@ -2102,12 +2141,12 @@ class ExportedArrayStream {
 
 Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
                                struct ArrowArrayStream* out) {
-  out->get_schema = ExportedArrayStream::StaticGetSchema;
-  out->get_next = ExportedArrayStream::StaticGetNext;
-  out->get_last_error = ExportedArrayStream::StaticGetLastError;
-  out->release = ExportedArrayStream::StaticRelease;
-  out->private_data = new ExportedArrayStream::PrivateData{std::move(reader)};
-  return Status::OK();
+  return ExportedArrayStream<RecordBatchReader>::Make(std::move(reader), out);
+}
+
+Status ExportChunkedArray(std::shared_ptr<ChunkedArray> chunked_array,
+                          struct ArrowArrayStream* out) {
+  return ExportedArrayStream<ChunkedArray>::Make(std::move(chunked_array), out);
 }
 
 //////////////////////////////////////////////////////////////////////////
@@ -2115,66 +2154,58 @@ Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
 
 namespace {
 
-class ArrayStreamBatchReader : public RecordBatchReader {
+class ArrayStreamReader {
  public:
-  explicit ArrayStreamBatchReader(std::shared_ptr<Schema> schema,
-                                  struct ArrowArrayStream* stream)
-      : schema_(std::move(schema)) {
+  explicit ArrayStreamReader(struct ArrowArrayStream* stream) {
     ArrowArrayStreamMove(stream, &stream_);
     DCHECK(!ArrowArrayStreamIsReleased(&stream_));
   }
 
-  ~ArrayStreamBatchReader() override {
+  ~ArrayStreamReader() { ReleaseStream(); }
+
+  void ReleaseStream() {
     if (!ArrowArrayStreamIsReleased(&stream_)) {
       ArrowArrayStreamRelease(&stream_);
     }
     DCHECK(ArrowArrayStreamIsReleased(&stream_));
   }
 
-  std::shared_ptr<Schema> schema() const override { return schema_; }
-
-  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
-    struct ArrowArray c_array;
-    if (ArrowArrayStreamIsReleased(&stream_)) {
-      return Status::Invalid(
-          "Attempt to read from a reader that has already been closed");
-    }
-    RETURN_NOT_OK(StatusFromCError(stream_.get_next(&stream_, &c_array)));
-    if (ArrowArrayIsReleased(&c_array)) {
-      // End of stream
-      batch->reset();
-      return Status::OK();
-    } else {
-      return ImportRecordBatch(&c_array, schema_).Value(batch);
+ protected:
+  Status ReadNextArrayInternal(struct ArrowArray* array) {
+    ArrowArrayMarkReleased(array);
+    Status status = StatusFromCError(stream_.get_next(&stream_, array));
+    if (!status.ok() && !ArrowArrayIsReleased(array)) {
+      ArrowArrayRelease(array);
     }
+
+    return status;
   }
 
-  Status Close() override {
-    if (!ArrowArrayStreamIsReleased(&stream_)) {
-      ArrowArrayStreamRelease(&stream_);
-    }
-    return Status::OK();
+  Result<std::shared_ptr<Schema>> ReadSchema() {
+    struct ArrowSchema c_schema = {};
+    ARROW_RETURN_NOT_OK(
+        StatusFromCError(&stream_, stream_.get_schema(&stream_, &c_schema)));
+    ARROW_ASSIGN_OR_RAISE(auto schema, ImportSchema(&c_schema));
+    return schema;
   }
 
-  static Result<std::shared_ptr<RecordBatchReader>> Make(
-      struct ArrowArrayStream* stream) {
-    if (ArrowArrayStreamIsReleased(stream)) {
-      return Status::Invalid("Cannot import released ArrowArrayStream");
-    }
-    std::shared_ptr<Schema> schema;
+  Result<std::shared_ptr<Field>> ReadField() {
     struct ArrowSchema c_schema = {};
-    auto status = StatusFromCError(stream, stream->get_schema(stream, &c_schema));
-    if (status.ok()) {
-      status = ImportSchema(&c_schema).Value(&schema);
-    }
-    if (!status.ok()) {
-      ArrowArrayStreamRelease(stream);
-      return status;
+    ARROW_RETURN_NOT_OK(
+        StatusFromCError(&stream_, stream_.get_schema(&stream_, &c_schema)));
+    ARROW_ASSIGN_OR_RAISE(auto schema, ImportField(&c_schema));
+    return schema;
+  }
+
+  Status CheckNotReleased() {
+    if (ArrowArrayStreamIsReleased(&stream_)) {
+      return Status::Invalid(
+          "Attempt to read from a stream that has already been closed");
+    } else {
+      return Status::OK();
     }
-    return std::make_shared<ArrayStreamBatchReader>(std::move(schema), stream);
   }
 
- private:
   Status StatusFromCError(int errno_like) const {
     return StatusFromCError(&stream_, errno_like);
   }
@@ -2203,15 +2234,114 @@ class ArrayStreamBatchReader : public RecordBatchReader {
     return {code, last_error ? std::string(last_error) : ""};
   }
 
+ private:
   mutable struct ArrowArrayStream stream_;
+};
+
+class ArrayStreamBatchReader : public RecordBatchReader, public ArrayStreamReader {
+ public:
+  explicit ArrayStreamBatchReader(struct ArrowArrayStream* stream)
+      : ArrayStreamReader(stream) {}
+
+  Status Init() {
+    ARROW_ASSIGN_OR_RAISE(schema_, ReadSchema());
+    return Status::OK();
+  }
+
+  std::shared_ptr<Schema> schema() const override { return schema_; }
+
+  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override {
+    ARROW_RETURN_NOT_OK(CheckNotReleased());
+
+    struct ArrowArray c_array;
+    ARROW_RETURN_NOT_OK(ReadNextArrayInternal(&c_array));
+
+    if (ArrowArrayIsReleased(&c_array)) {
+      // End of stream
+      batch->reset();
+      return Status::OK();
+    } else {
+      return ImportRecordBatch(&c_array, schema_).Value(batch);
+    }
+  }
+
+  Status Close() override {
+    ReleaseStream();
+    return Status::OK();
+  }
+
+ private:
   std::shared_ptr<Schema> schema_;
 };
 
+class ArrayStreamArrayReader : public ArrayStreamReader {
+ public:
+  explicit ArrayStreamArrayReader(struct ArrowArrayStream* stream)
+      : ArrayStreamReader(stream) {}
+
+  Status Init() {
+    ARROW_ASSIGN_OR_RAISE(field_, ReadField());
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> data_type() const { return field_->type(); }
+
+  Status ReadNext(std::shared_ptr<Array>* array) {
+    ARROW_RETURN_NOT_OK(CheckNotReleased());
+
+    struct ArrowArray c_array;
+    ARROW_RETURN_NOT_OK(ReadNextArrayInternal(&c_array));
+
+    if (ArrowArrayIsReleased(&c_array)) {
+      // End of stream
+      array->reset();
+      return Status::OK();
+    } else {
+      return ImportArray(&c_array, field_->type()).Value(array);
+    }
+  }
+
+ private:
+  std::shared_ptr<Field> field_;
+};
+
 }  // namespace
 
 Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader(
     struct ArrowArrayStream* stream) {
-  return ArrayStreamBatchReader::Make(stream);
+  if (ArrowArrayStreamIsReleased(stream)) {
+    return Status::Invalid("Cannot import released ArrowArrayStream");
+  }
+
+  auto reader = std::make_shared<ArrayStreamBatchReader>(stream);
+  ARROW_RETURN_NOT_OK(reader->Init());
+  return reader;
+}
+
+Result<std::shared_ptr<ChunkedArray>> ImportChunkedArray(
+    struct ArrowArrayStream* stream) {
+  if (ArrowArrayStreamIsReleased(stream)) {
+    return Status::Invalid("Cannot import released ArrowArrayStream");
+  }
+
+  auto reader = std::make_shared<ArrayStreamArrayReader>(stream);
+  ARROW_RETURN_NOT_OK(reader->Init());
+
+  std::shared_ptr<DataType> data_type = reader->data_type();
+
+  ArrayVector chunks;
+  std::shared_ptr<Array> chunk;
+  while (true) {
+    ARROW_RETURN_NOT_OK(reader->ReadNext(&chunk));
+    if (!chunk) {
+      break;
+    }
+
+    chunks.push_back(std::move(chunk));
+  }
+
+  reader->ReleaseStream();
+  return ChunkedArray::Make(std::move(chunks), std::move(data_type));
 }
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/c/bridge.h b/cpp/src/arrow/c/bridge.h
index 45583109a761f..e98a42818f628 100644
--- a/cpp/src/arrow/c/bridge.h
+++ b/cpp/src/arrow/c/bridge.h
@@ -302,6 +302,17 @@ ARROW_EXPORT
 Status ExportRecordBatchReader(std::shared_ptr<RecordBatchReader> reader,
                                struct ArrowArrayStream* out);
 
+/// \brief Export C++ ChunkedArray using the C data interface format.
+///
+/// The resulting ArrowArrayStream struct keeps the chunked array data and buffers alive
+/// until its release callback is called by the consumer.
+///
+/// \param[in] chunked_array ChunkedArray object to export
+/// \param[out] out C struct where to export the stream
+ARROW_EXPORT
+Status ExportChunkedArray(std::shared_ptr<ChunkedArray> chunked_array,
+                          struct ArrowArrayStream* out);
+
 /// \brief Import C++ RecordBatchReader from the C stream interface.
 ///
 /// The ArrowArrayStream struct has its contents moved to a private object
@@ -313,6 +324,17 @@ ARROW_EXPORT
 Result<std::shared_ptr<RecordBatchReader>> ImportRecordBatchReader(
     struct ArrowArrayStream* stream);
 
+/// \brief Import C++ ChunkedArray from the C stream interface
+///
+/// The ArrowArrayStream struct has its contents moved to a private object,
+/// is consumed in its entirity, and released before returning all chunks
+/// as a ChunkedArray.
+///
+/// \param[in,out] stream C stream interface struct
+/// \return Imported ChunkedArray object
+ARROW_EXPORT
+Result<std::shared_ptr<ChunkedArray>> ImportChunkedArray(struct ArrowArrayStream* stream);
+
 /// @}
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index b8d5e0fcd3845..dba6e4736b673 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -4400,6 +4400,17 @@ class TestArrayStreamExport : public BaseArrayStreamTest {
     ASSERT_OK_AND_ASSIGN(auto batch, ImportRecordBatch(&c_array, expected.schema()));
     AssertBatchesEqual(expected, *batch);
   }
+
+  void AssertStreamNext(struct ArrowArrayStream* c_stream, const Array& expected) {
+    struct ArrowArray c_array;
+    ASSERT_EQ(0, c_stream->get_next(c_stream, &c_array));
+
+    ArrayExportGuard guard(&c_array);
+    ASSERT_FALSE(ArrowArrayIsReleased(&c_array));
+
+    ASSERT_OK_AND_ASSIGN(auto array, ImportArray(&c_array, expected.type()));
+    AssertArraysEqual(expected, *array);
+  }
 };
 
 TEST_F(TestArrayStreamExport, Empty) {
@@ -4495,6 +4506,67 @@ TEST_F(TestArrayStreamExport, Errors) {
   ASSERT_EQ(EINVAL, c_stream.get_next(&c_stream, &c_array));
 }
 
+TEST_F(TestArrayStreamExport, ChunkedArrayExportEmpty) {
+  ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({}, int32()));
+
+  struct ArrowArrayStream c_stream;
+  struct ArrowSchema c_schema;
+
+  ASSERT_OK(ExportChunkedArray(chunked_array, &c_stream));
+  ArrayStreamExportGuard guard(&c_stream);
+
+  {
+    ArrayStreamExportGuard guard(&c_stream);
+    ASSERT_FALSE(ArrowArrayStreamIsReleased(&c_stream));
+
+    ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema));
+    AssertStreamEnd(&c_stream);
+  }
+
+  {
+    SchemaExportGuard schema_guard(&c_schema);
+    ASSERT_OK_AND_ASSIGN(auto got_type, ImportType(&c_schema));
+    AssertTypeEqual(*chunked_array->type(), *got_type);
+  }
+}
+
+TEST_F(TestArrayStreamExport, ChunkedArrayExport) {
+  ASSERT_OK_AND_ASSIGN(auto chunked_array,
+                       ChunkedArray::Make({ArrayFromJSON(int32(), "[1, 2]"),
+                                           ArrayFromJSON(int32(), "[4, 5, null]")}));
+
+  struct ArrowArrayStream c_stream;
+  struct ArrowSchema c_schema;
+  struct ArrowArray c_array0, c_array1;
+
+  ASSERT_OK(ExportChunkedArray(chunked_array, &c_stream));
+  ArrayStreamExportGuard guard(&c_stream);
+
+  {
+    ArrayStreamExportGuard guard(&c_stream);
+    ASSERT_FALSE(ArrowArrayStreamIsReleased(&c_stream));
+
+    ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema));
+    ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array0));
+    ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array1));
+    AssertStreamEnd(&c_stream);
+  }
+
+  ArrayExportGuard guard0(&c_array0), guard1(&c_array1);
+
+  {
+    SchemaExportGuard schema_guard(&c_schema);
+    ASSERT_OK_AND_ASSIGN(auto got_type, ImportType(&c_schema));
+    AssertTypeEqual(*chunked_array->type(), *got_type);
+  }
+
+  ASSERT_GT(pool_->bytes_allocated(), orig_allocated_);
+  ASSERT_OK_AND_ASSIGN(auto array, ImportArray(&c_array0, chunked_array->type()));
+  AssertArraysEqual(*chunked_array->chunk(0), *array);
+  ASSERT_OK_AND_ASSIGN(array, ImportArray(&c_array1, chunked_array->type()));
+  AssertArraysEqual(*chunked_array->chunk(1), *array);
+}
+
 ////////////////////////////////////////////////////////////////////////////
 // Array stream roundtrip tests
 
@@ -4534,6 +4606,29 @@ class TestArrayStreamRoundtrip : public BaseArrayStreamTest {
     ASSERT_TRUE(weak_reader.expired());
   }
 
+  void Roundtrip(std::shared_ptr<ChunkedArray> src,
+                 std::function<void(const std::shared_ptr<ChunkedArray>&)> check_func) {
+    ArrowArrayStream c_stream;
+
+    // One original copy which to compare the result, one copy held by the stream
+    std::weak_ptr<ChunkedArray> weak_src(src);
+    int64_t initial_use_count = weak_src.use_count();
+
+    ASSERT_OK(ExportChunkedArray(std::move(src), &c_stream));
+    ASSERT_FALSE(ArrowArrayStreamIsReleased(&c_stream));
+
+    {
+      ASSERT_OK_AND_ASSIGN(auto dst, ImportChunkedArray(&c_stream));
+      // Stream was moved, consumed, and released
+      ASSERT_TRUE(ArrowArrayStreamIsReleased(&c_stream));
+
+      // Stream was released by ImportChunkedArray but original copy remains
+      ASSERT_EQ(weak_src.use_count(), initial_use_count - 1);
+
+      check_func(dst);
+    }
+  }
+
   void AssertReaderNext(const std::shared_ptr<RecordBatchReader>& reader,
                         const RecordBatch& expected) {
     ASSERT_OK_AND_ASSIGN(auto batch, reader->Next());
@@ -4631,4 +4726,24 @@ TEST_F(TestArrayStreamRoundtrip, SchemaError) {
   ASSERT_TRUE(state.released);
 }
 
+TEST_F(TestArrayStreamRoundtrip, ChunkedArrayRoundtrip) {
+  ASSERT_OK_AND_ASSIGN(auto src,
+                       ChunkedArray::Make({ArrayFromJSON(int32(), "[1, 2]"),
+                                           ArrayFromJSON(int32(), "[4, 5, null]")}));
+
+  Roundtrip(src, [&](const std::shared_ptr<ChunkedArray>& dst) {
+    AssertTypeEqual(*dst->type(), *src->type());
+    AssertChunkedEqual(*dst, *src);
+  });
+}
+
+TEST_F(TestArrayStreamRoundtrip, ChunkedArrayRoundtripEmpty) {
+  ASSERT_OK_AND_ASSIGN(auto src, ChunkedArray::Make({}, int32()));
+
+  Roundtrip(src, [&](const std::shared_ptr<ChunkedArray>& dst) {
+    AssertTypeEqual(*dst->type(), *src->type());
+    AssertChunkedEqual(*dst, *src);
+  });
+}
+
 }  // namespace arrow

From 7e2fe4fe7634c359017213b79255c9040786fc06 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 7 Feb 2024 15:21:37 +0100
Subject: [PATCH 330/570] GH-39852: [Python] Support creating Binary/StringView
 arrays from python objects (#39853)

Next step for Binary/StringView support in Python (https://github.com/apache/arrow/issues/39633), now adding it to the python->arrow conversion code path.
* Closes: #39852

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 .../src/arrow/python/python_to_arrow.cc       | 35 ++++++++++++-------
 python/pyarrow/tests/test_convert_builtin.py  | 19 ++++++++--
 python/pyarrow/tests/test_scalars.py          | 28 +++------------
 3 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index d1d94ac17a13e..3c4d59d6594a2 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -486,6 +486,10 @@ class PyValue {
     return view.ParseString(obj);
   }
 
+  static Status Convert(const BinaryViewType*, const O&, I obj, PyBytesView& view) {
+    return view.ParseString(obj);
+  }
+
   static Status Convert(const FixedSizeBinaryType* type, const O&, I obj,
                         PyBytesView& view) {
     ARROW_RETURN_NOT_OK(view.ParseString(obj));
@@ -499,8 +503,8 @@ class PyValue {
   }
 
   template <typename T>
-  static enable_if_string<T, Status> Convert(const T*, const O& options, I obj,
-                                             PyBytesView& view) {
+  static enable_if_t<is_string_type<T>::value || is_string_view_type<T>::value, Status>
+  Convert(const T*, const O& options, I obj, PyBytesView& view) {
     if (options.strict) {
       // Strict conversion, force output to be unicode / utf8 and validate that
       // any binary values are utf8
@@ -570,18 +574,12 @@ struct PyConverterTrait;
 
 template <typename T>
 struct PyConverterTrait<
-    T,
-    enable_if_t<(!is_nested_type<T>::value && !is_interval_type<T>::value &&
-                 !is_extension_type<T>::value && !is_binary_view_like_type<T>::value) ||
-                std::is_same<T, MonthDayNanoIntervalType>::value>> {
+    T, enable_if_t<(!is_nested_type<T>::value && !is_interval_type<T>::value &&
+                    !is_extension_type<T>::value) ||
+                   std::is_same<T, MonthDayNanoIntervalType>::value>> {
   using type = PyPrimitiveConverter<T>;
 };
 
-template <typename T>
-struct PyConverterTrait<T, enable_if_binary_view_like<T>> {
-  // not implemented
-};
-
 template <typename T>
 struct PyConverterTrait<T, enable_if_list_like<T>> {
   using type = PyListConverter<T>;
@@ -699,11 +697,22 @@ class PyPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::
   PyBytesView view_;
 };
 
+template <typename T, typename Enable = void>
+struct OffsetTypeTrait {
+  using type = typename T::offset_type;
+};
+
+template <typename T>
+struct OffsetTypeTrait<T, enable_if_binary_view_like<T>> {
+  using type = int64_t;
+};
+
 template <typename T>
-class PyPrimitiveConverter<T, enable_if_base_binary<T>>
+class PyPrimitiveConverter<
+    T, enable_if_t<is_base_binary_type<T>::value || is_binary_view_like_type<T>::value>>
     : public PrimitiveConverter<T, PyConverter> {
  public:
-  using OffsetType = typename T::offset_type;
+  using OffsetType = typename OffsetTypeTrait<T>::type;
 
   Status Append(PyObject* value) override {
     if (PyValue::IsNull(this->options_, value)) {
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 49c4f1a6e79d6..55ea28f50fbb3 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -763,6 +763,16 @@ def test_sequence_unicode():
     assert arr.to_pylist() == data
 
 
+@pytest.mark.parametrize("ty", [pa.string(), pa.large_string(), pa.string_view()])
+def test_sequence_unicode_explicit_type(ty):
+    data = ['foo', 'bar', None, 'mañana']
+    arr = pa.array(data, type=ty)
+    assert len(arr) == 4
+    assert arr.null_count == 1
+    assert arr.type == ty
+    assert arr.to_pylist() == data
+
+
 def check_array_mixed_unicode_bytes(binary_type, string_type):
     values = ['qux', b'foo', bytearray(b'barz')]
     b_values = [b'qux', b'foo', b'barz']
@@ -787,6 +797,7 @@ def check_array_mixed_unicode_bytes(binary_type, string_type):
 def test_array_mixed_unicode_bytes():
     check_array_mixed_unicode_bytes(pa.binary(), pa.string())
     check_array_mixed_unicode_bytes(pa.large_binary(), pa.large_string())
+    check_array_mixed_unicode_bytes(pa.binary_view(), pa.string_view())
 
 
 @pytest.mark.large_memory
@@ -818,7 +829,7 @@ def test_large_binary_value(ty):
 
 
 @pytest.mark.large_memory
-@pytest.mark.parametrize("ty", [pa.binary(), pa.string()])
+@pytest.mark.parametrize("ty", [pa.binary(), pa.string(), pa.string_view()])
 def test_string_too_large(ty):
     # Construct a binary array with a single value larger than 4GB
     s = b"0123456789abcdefghijklmnopqrstuvwxyz"
@@ -836,7 +847,7 @@ def test_sequence_bytes():
             u1.decode('utf-8'),  # unicode gets encoded,
             bytearray(b'bar'),
             None]
-    for ty in [None, pa.binary(), pa.large_binary()]:
+    for ty in [None, pa.binary(), pa.large_binary(), pa.binary_view()]:
         arr = pa.array(data, type=ty)
         assert len(arr) == 6
         assert arr.null_count == 1
@@ -844,7 +855,7 @@ def test_sequence_bytes():
         assert arr.to_pylist() == [b'foo', b'dada', b'data', u1, b'bar', None]
 
 
-@pytest.mark.parametrize("ty", [pa.string(), pa.large_string()])
+@pytest.mark.parametrize("ty", [pa.string(), pa.large_string(), pa.string_view()])
 def test_sequence_utf8_to_unicode(ty):
     # ARROW-1225
     data = [b'foo', None, b'bar']
@@ -2431,6 +2442,8 @@ def test_array_from_pylist_offset_overflow():
         pa.binary(3)),
     ([b"a"], [pa.scalar("a", type=pa.large_binary())], pa.large_binary()),
     (["a"], [pa.scalar("a", type=pa.large_string())], pa.large_string()),
+    ([b"a"], [pa.scalar("a", type=pa.binary_view())], pa.binary_view()),
+    (["a"], [pa.scalar("a", type=pa.string_view())], pa.string_view()),
     (
         ["a"],
         [pa.scalar("a", type=pa.dictionary(pa.int64(), pa.string()))],
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 4a239b23d5676..eed5f045be945 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -51,9 +51,8 @@
     (b"bytes", None, pa.BinaryScalar),
     ("largestring", pa.large_string(), pa.LargeStringScalar),
     (b"largebytes", pa.large_binary(), pa.LargeBinaryScalar),
-    # TODO(GH-39633) pa.scalar(..) requires python->arrow conversion to be implemented
-    # ("string_view", pa.string_view(), pa.StringViewScalar),
-    # (b"bytes_view", pa.binary_view(), pa.BinaryViewScalar),
+    ("string_view", pa.string_view(), pa.StringViewScalar),
+    (b"bytes_view", pa.binary_view(), pa.BinaryViewScalar),
     (b"abc", pa.binary(3), pa.FixedSizeBinaryScalar),
     ([1, 2, 3], None, pa.ListScalar),
     ([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar),
@@ -492,7 +491,7 @@ def test_month_day_nano_interval():
 @pytest.mark.parametrize(('ty', 'scalar_typ'), [
     (pa.string(), pa.StringScalar),
     (pa.large_string(), pa.LargeStringScalar),
-    # (pa.string_view(), pa.StringViewScalar),
+    (pa.string_view(), pa.StringViewScalar),
 ])
 def test_string(value, ty, scalar_typ):
     s = pa.scalar(value, type=ty)
@@ -507,30 +506,11 @@ def test_string(value, ty, scalar_typ):
     assert buf.to_pybytes() == value.encode()
 
 
-@pytest.mark.parametrize('value', ['foo', 'mañana'])
-def test_string_view(value):
-    # TODO: replace with normal scalar construction
-    builder = pa.lib.StringViewBuilder()
-    builder.append(value)
-    arr = builder.finish()
-
-    s = arr[0]
-    assert isinstance(s, pa.StringViewScalar)
-    assert s.as_py() == value
-    assert s.as_py() != 'something'
-    assert repr(value) in repr(s)
-    assert str(s) == str(value)
-
-    buf = s.as_buffer()
-    assert isinstance(buf, pa.Buffer)
-    assert buf.to_pybytes() == value.encode()
-
-
 @pytest.mark.parametrize('value', [b'foo', b'bar'])
 @pytest.mark.parametrize(('ty', 'scalar_typ'), [
     (pa.binary(), pa.BinaryScalar),
     (pa.large_binary(), pa.LargeBinaryScalar),
-    # (pa.binary_view(), pa.BinaryViewScalar),
+    (pa.binary_view(), pa.BinaryViewScalar),
 ])
 def test_binary(value, ty, scalar_typ):
     s = pa.scalar(value, type=ty)

From 8ffc2140af3c994240b62f3c6412b8dbc889f604 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Wed, 7 Feb 2024 10:29:46 -0400
Subject: [PATCH 331/570] GH-39933: [R] Fix pointer conversion to Python for
 latest reticulate (#39969)

### Rationale for this change

The integration tests and documentation build is failing

### What changes are included in this PR?

Instead of relying on how reticulate converts an R external pointer, use a Python integer instead. We can't use an R integer (because they're only 32 bits); we can't use an R double (because the static cast to/from uintptr_t is a bit iffy); however, we can use Python to convert a string to Python integer. This is probably how I should have written it the first time but it didn't occur to me at the time.

### Are these changes tested?

Yes, covered by existing tests.

### Are there any user-facing changes?

No
* Closes: #39933

Lead-authored-by: Dewey Dunnington <dewey@fishandwhistle.net>
Co-authored-by: Dewey Dunnington <dewey@voltrondata.com>
Signed-off-by: Dewey Dunnington <dewey@voltrondata.com>
---
 r/R/python.R | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/r/R/python.R b/r/R/python.R
index 023d914f16a9e..1159806bf7c25 100644
--- a/r/R/python.R
+++ b/r/R/python.R
@@ -339,15 +339,9 @@ install_pyarrow <- function(envname = NULL, nightly = FALSE, ...) {
 }
 
 pyarrow_compatible_pointer <- function(ptr) {
-  pa <- reticulate::import("pyarrow")
-  version_string <- pa$`__version__`
-  # remove trailing .devXXX because it won't work with package_version()
-  pyarrow_version <- package_version(gsub("\\.dev.*?$", "", version_string))
-
-  # pyarrow pointers changed in version 7.0.0
-  if (pyarrow_version >= "7.0.0") {
-    return(ptr)
-  } else {
-    return(external_pointer_addr_double(ptr))
-  }
+  # GH-39933: Workaround because there is no built-in way to send a
+  # 64-bit integer to Python from an R object
+  py <- reticulate::import_builtins(convert = FALSE)
+  addr <- external_pointer_addr_character(ptr)
+  py$int(addr)
 }

From e83295b1aafbea985f0be61983b0b4fc9094854c Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Wed, 7 Feb 2024 11:02:23 -0500
Subject: [PATCH 332/570] MINOR: [Documentation] Fix LargeListView format
 string in example (#39974)

### Rationale for this change

https://github.com/apache/arrow/pull/38899 fixed the format string for
LargeListView in the table but left the incorrect format string in the
example below. This fixes that.

### What changes are included in this PR?

Documentation change

### Are these changes tested?

N/A
---
 docs/source/format/CDataInterface.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/format/CDataInterface.rst b/docs/source/format/CDataInterface.rst
index 812212f536169..ef4bf1cf3238d 100644
--- a/docs/source/format/CDataInterface.rst
+++ b/docs/source/format/CDataInterface.rst
@@ -251,7 +251,7 @@ Examples
   array has format string ``d:12,5``.
 * A ``list<uint64>`` array has format string ``+l``, and its single child
   has format string ``L``.
-* A ``large_list_view<uint64>`` array has format string ``+Lv``, and its single
+* A ``large_list_view<uint64>`` array has format string ``+vL``, and its single
   child has format string ``L``.
 * A ``struct<ints: int32, floats: float32>`` has format string ``+s``; its two
   children have names ``ints`` and ``floats``, and format strings ``i`` and

From f609bb171a8bce973d7b040d8684b04a60e806ed Mon Sep 17 00:00:00 2001
From: abandy <abandy@live.com>
Date: Wed, 7 Feb 2024 16:01:55 -0500
Subject: [PATCH 333/570] GH-39910: [Go] Add func to load prepared statement
 from ActionCreatePreparedStatementResult (#39913)

Currently, in order to create a PreparedStatement a DoAction call will always be made via the client. I need to be able to make a PreparedStatement from persisted data that will not trigger the DoAction call to the server.
* Closes: #39910

Authored-by: Alva Bandy <abandy@live.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/flight/flightsql/client.go      | 65 ++++++++++++++++++++++++
 go/arrow/flight/flightsql/client_test.go | 30 +++++++++++
 go/arrow/flight/flightsql/types.go       |  2 +
 3 files changed, 97 insertions(+)

diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go
index 441f88f39f43a..068bfa84c3144 100644
--- a/go/arrow/flight/flightsql/client.go
+++ b/go/arrow/flight/flightsql/client.go
@@ -450,6 +450,31 @@ func (c *Client) PrepareSubstrait(ctx context.Context, plan SubstraitPlan, opts
 	return parsePreparedStatementResponse(c, c.Alloc, stream)
 }
 
+func (c *Client) LoadPreparedStatementFromResult(result *CreatePreparedStatementResult) (*PreparedStatement, error) {
+	var (
+		err                   error
+		dsSchema, paramSchema *arrow.Schema
+	)
+	if result.DatasetSchema != nil {
+		dsSchema, err = flight.DeserializeSchema(result.DatasetSchema, c.Alloc)
+		if err != nil {
+			return nil, err
+		}
+	}
+	if result.ParameterSchema != nil {
+		paramSchema, err = flight.DeserializeSchema(result.ParameterSchema, c.Alloc)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return &PreparedStatement{
+		client:        c,
+		handle:        result.PreparedStatementHandle,
+		datasetSchema: dsSchema,
+		paramSchema:   paramSchema,
+	}, nil
+}
+
 func parsePreparedStatementResponse(c *Client, mem memory.Allocator, results pb.FlightService_DoActionClient) (*PreparedStatement, error) {
 	if err := results.CloseSend(); err != nil {
 		return nil, err
@@ -1027,6 +1052,46 @@ func (p *PreparedStatement) Execute(ctx context.Context, opts ...grpc.CallOption
 	return p.client.getFlightInfo(ctx, desc, opts...)
 }
 
+// ExecutePut calls DoPut for the prepared statement on the server. If SetParameters
+// has been called then the parameter bindings will be sent before execution.
+//
+// Will error if already closed.
+func (p *PreparedStatement) ExecutePut(ctx context.Context, opts ...grpc.CallOption) error {
+	if p.closed {
+		return errors.New("arrow/flightsql: prepared statement already closed")
+	}
+
+	cmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: p.handle}
+
+	desc, err := descForCommand(cmd)
+	if err != nil {
+		return err
+	}
+
+	if p.hasBindParameters() {
+		pstream, err := p.client.Client.DoPut(ctx, opts...)
+		if err != nil {
+			return err
+		}
+
+		wr, err := p.writeBindParameters(pstream, desc)
+		if err != nil {
+			return err
+		}
+		if err = wr.Close(); err != nil {
+			return err
+		}
+		pstream.CloseSend()
+
+		// wait for the server to ack the result
+		if _, err = pstream.Recv(); err != nil && err != io.EOF {
+			return err
+		}
+	}
+
+	return nil
+}
+
 // ExecutePoll executes the prepared statement on the server and returns a PollInfo
 // indicating the progress of execution.
 //
diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go
index c8b9f7f1246c1..f35aeefcf4628 100644
--- a/go/arrow/flight/flightsql/client_test.go
+++ b/go/arrow/flight/flightsql/client_test.go
@@ -665,6 +665,36 @@ func (s *FlightSqlClientSuite) TestRenewFlightEndpoint() {
 	s.Equal(&mockedRenewedEndpoint, renewedEndpoint)
 }
 
+func (s *FlightSqlClientSuite) TestPreparedStatementLoadFromResult() {
+	const query = "query"
+
+	result := &pb.ActionCreatePreparedStatementResult{
+		PreparedStatementHandle: []byte(query),
+	}
+	
+	parameterSchemaResult := arrow.NewSchema([]arrow.Field{{Name: "p_id", Type: arrow.PrimitiveTypes.Int64, Nullable: true}}, nil)
+	result.ParameterSchema = flight.SerializeSchema(parameterSchemaResult, memory.DefaultAllocator)
+	datasetSchemaResult := arrow.NewSchema([]arrow.Field{{Name: "ds_id", Type: arrow.PrimitiveTypes.Int64, Nullable: true}}, nil)
+	result.DatasetSchema = flight.SerializeSchema(datasetSchemaResult, memory.DefaultAllocator)
+
+	prepared, err := s.sqlClient.LoadPreparedStatementFromResult(result)
+	s.NoError(err)
+
+	s.Equal(string(prepared.Handle()), "query")
+
+	paramSchema := prepared.ParameterSchema()
+	paramRec, _, err := array.RecordFromJSON(memory.DefaultAllocator, paramSchema, strings.NewReader(`[{"p_id": 1}]`))
+	s.NoError(err)
+	defer paramRec.Release()
+
+	datasetSchema := prepared.DatasetSchema()
+	datasetRec, _, err := array.RecordFromJSON(memory.DefaultAllocator, datasetSchema, strings.NewReader(`[{"ds_id": 1}]`))
+	s.NoError(err)
+	defer datasetRec.Release()
+
+	s.Equal(string(prepared.Handle()), "query")
+}
+
 func TestFlightSqlClient(t *testing.T) {
 	suite.Run(t, new(FlightSqlClientSuite))
 }
diff --git a/go/arrow/flight/flightsql/types.go b/go/arrow/flight/flightsql/types.go
index d89e68f028bb8..c70a8bdc4ec26 100644
--- a/go/arrow/flight/flightsql/types.go
+++ b/go/arrow/flight/flightsql/types.go
@@ -852,3 +852,5 @@ const (
 	// cancellation request.
 	CancelResultNotCancellable = pb.ActionCancelQueryResult_CANCEL_RESULT_NOT_CANCELLABLE
 )
+
+type CreatePreparedStatementResult = pb.ActionCreatePreparedStatementResult

From 66b41c48554cf79fb449fd6c627e44cd0a202cd8 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Wed, 7 Feb 2024 20:21:07 -0400
Subject: [PATCH 334/570] GH-39738: [R] Support build against the last three
 released versions of Arrow (#39739)

### Rationale for this change

Development velocity of the R package has slowed considerably since early versions of Arrow such that the commit-level integration that we once relied on is no longer necessary. The ability to build against older versions of Arrow also opens up more options for our CRAN submissions, since we may be able to work with CRAN to build a version of Arrow C++ they are happy with.

This change doesn't require us to *do* anything about it...it just adds a check so that we are aware of the first PR that breaks the ability to build against a previous version.

There is a possibility that an accidentally but previously installed version will end up being used via pkg-config, which I believe is how the version checking came into existence in the first place.

### What changes are included in this PR?

- An `#if` to guard code that was added to support the string view/binary view
- Changes to the version checker script to not error for supported Arrow C++ versions
- CI job that checks build against supported Arrow versions

### Are these changes tested?

Yes, a CI job was added

### Are there any user-facing changes?

Yes, but I'll wait until there's consensus on this before documenting what our intended support policy will be.

* Closes: #39738

Lead-authored-by: Dewey Dunnington <dewey@voltrondata.com>
Co-authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Co-authored-by: Dewey Dunnington <dewey@fishandwhistle.net>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 .github/workflows/r.yml       | 57 +++++++++++++++++++++++++++++++++++
 r/PACKAGING.md                |  1 +
 r/src/r_to_arrow.cpp          |  9 ++++++
 r/tools/check-versions.R      | 35 +++++++++++++--------
 r/tools/test-check-versions.R | 40 ++++++++++++++++--------
 5 files changed, 116 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 3d1f75ede4bb5..8c47915b7b6d3 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -54,6 +54,63 @@ env:
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
+  ubuntu-minimum-cpp-version:
+    name: Check minimum supported Arrow C++ Version (${{ matrix.cpp_version }})
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - cpp_version: "13.0.0"
+    steps:
+    - name: Checkout Arrow
+      uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
+      with:
+        path: src
+        submodules: recursive
+
+    - name: Install Arrow C++ (${{ matrix.cpp_version }})
+      run: |
+        sudo apt update
+        sudo apt install -y -V ca-certificates lsb-release wget
+        wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+        sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb
+        sudo apt update
+        # We have to list all packages to avoid version conflicts.
+        sudo apt install -y -V libarrow-dev=${{ matrix.cpp_version }}-1 \
+                               libarrow-acero-dev=${{ matrix.cpp_version }}-1 \
+                               libparquet-dev=${{ matrix.cpp_version }}-1 \
+                               libarrow-dataset-dev=${{ matrix.cpp_version }}-1
+
+    - name: Install checkbashisms
+      run: |
+        sudo apt-get install devscripts
+
+    - uses: r-lib/actions/setup-r@v2
+      with:
+        use-public-rspm: true
+        install-r: false
+
+    - uses: r-lib/actions/setup-r-dependencies@v2
+      with:
+        extra-packages: any::rcmdcheck
+        needs: check
+        working-directory: src/r
+
+    - uses: r-lib/actions/check-r-package@v2
+      with:
+        working-directory: src/r
+      env:
+        LIBARROW_BINARY: "false"
+        LIBARROW_BUILD: "false"
+        ARROW_R_VERBOSE_TEST: "true"
+        ARROW_R_ALLOW_CPP_VERSION_MISMATCH: "true"
+
+    - name: Show install output
+      if: always()
+      run: find src/r/check -name '00install.out*' -exec cat '{}' \; || true
+      shell: bash
+
+
   ubuntu:
     name: AMD64 Ubuntu ${{ matrix.ubuntu }} R ${{ matrix.r }} Force-Tests ${{ matrix.force-tests }}
     runs-on: ubuntu-latest
diff --git a/r/PACKAGING.md b/r/PACKAGING.md
index 7f42ecf562e59..4edeb4f2130cc 100644
--- a/r/PACKAGING.md
+++ b/r/PACKAGING.md
@@ -26,6 +26,7 @@ For a high-level overview of the release process see the
 ## Before the release candidate is cut
 
 - [ ] [Create a GitHub issue](https://github.com/apache/arrow/issues/new/) entitled `[R] CRAN packaging checklist for version X.X.X` and copy this checklist to the issue.
+- [ ] Review deprecated functions to advance their deprecation status, including removing preprocessor directives that no longer apply (search for `ARROW_VERSION_MAJOR` in r/src).
 - [ ] Evaluate the status of any failing [nightly tests and nightly packaging builds](http://crossbow.voltrondata.com). These checks replicate most of the checks that CRAN runs, so we need them all to be passing or to understand that the failures may (though won't necessarily) result in a rejection from CRAN.
 - [ ] Check [current CRAN check results](https://cran.rstudio.org/web/checks/check_results_arrow.html)
 - [ ] Ensure the contents of the README are accurate and up to date.
diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp
index d2db11e14a787..a81210f0ad914 100644
--- a/r/src/r_to_arrow.cpp
+++ b/r/src/r_to_arrow.cpp
@@ -1050,6 +1050,7 @@ class RDictionaryConverter<ValueType, enable_if_has_string_view<ValueType>>
 template <typename T, typename Enable = void>
 struct RConverterTrait;
 
+#if ARROW_VERSION_MAJOR >= 15
 template <typename T>
 struct RConverterTrait<
     T, enable_if_t<!is_nested_type<T>::value && !is_interval_type<T>::value &&
@@ -1061,6 +1062,14 @@ template <typename T>
 struct RConverterTrait<T, enable_if_binary_view_like<T>> {
   // not implemented
 };
+#else
+template <typename T>
+struct RConverterTrait<
+    T, enable_if_t<!is_nested_type<T>::value && !is_interval_type<T>::value &&
+                   !is_extension_type<T>::value>> {
+  using type = RPrimitiveConverter<T>;
+};
+#endif
 
 template <typename T>
 struct RConverterTrait<T, enable_if_list_like<T>> {
diff --git a/r/tools/check-versions.R b/r/tools/check-versions.R
index 3d8cbf02a14c9..34b2ef680c547 100644
--- a/r/tools/check-versions.R
+++ b/r/tools/check-versions.R
@@ -20,6 +20,20 @@ args <- commandArgs(TRUE)
 # TESTING is set in test-check-version.R; it won't be set when called from configure
 test_mode <- exists("TESTING")
 
+release_version_supported <- function(r_version, cpp_version) {
+  r_version <- package_version(r_version)
+  cpp_version <- package_version(cpp_version)
+  major <- function(x) as.numeric(x[1, 1])
+  minimum_cpp_version <- package_version("13.0.0")
+
+  allow_mismatch <- identical(tolower(Sys.getenv("ARROW_R_ALLOW_CPP_VERSION_MISMATCH", "false")), "true")
+  # If we allow a version mismatch we still need to cover the minimum version (13.0.0 for now)
+  # we don't allow newer C++ versions as new features without additional feature gates are likely to
+  # break the R package
+  version_valid <- cpp_version >= minimum_cpp_version && major(cpp_version) <= major(r_version)
+  allow_mismatch && version_valid || major(r_version) == major(cpp_version)
+}
+
 check_versions <- function(r_version, cpp_version) {
   r_parsed <- package_version(r_version)
   r_dev_version <- r_parsed[1, 4]
@@ -39,20 +53,10 @@ check_versions <- function(r_version, cpp_version) {
       "*** > or retry with FORCE_BUNDLED_BUILD=true"
     )
     cat(paste0(msg, "\n", collapse = ""))
-  } else if (r_is_patch && as.character(r_parsed[1, 1:3]) == cpp_version) {
-    # Patch releases we do for CRAN feedback get an extra x.y.z.1 version.
-    # These should work with the x.y.z C++ library (which never has .1 added)
-    cat(
-      sprintf(
-        "*** > Using C++ library version %s with R package %s\n",
-        cpp_version,
-        r_version
-      )
-    )
-  } else if (r_version != cpp_version) {
+  } else if (cpp_is_dev || !release_version_supported(r_version, cpp_parsed)) {
     cat(
       sprintf(
-        "**** Not using: C++ library version (%s) does not match R package (%s)\n",
+        "**** Not using: C++ library version (%s): not supported by R package version %s\n",
         cpp_version,
         r_version
       )
@@ -61,7 +65,12 @@ check_versions <- function(r_version, cpp_version) {
     # Add ALLOW_VERSION_MISMATCH env var to override stop()? (Could be useful for debugging)
   } else {
     # OK
-    cat(sprintf("**** C++ and R library versions match: %s\n", cpp_version))
+    cat(
+      sprintf(
+        "**** C++ library version %s is supported by R version %s\n",
+        cpp_version, r_version
+      )
+    )
   }
 }
 
diff --git a/r/tools/test-check-versions.R b/r/tools/test-check-versions.R
index 9c284507b8801..f558648bed1e3 100644
--- a/r/tools/test-check-versions.R
+++ b/r/tools/test-check-versions.R
@@ -24,10 +24,10 @@ TESTING <- TRUE
 
 source("check-versions.R", local = TRUE)
 
-test_that("check_versions", {
+test_that("check_versions without mismatch", {
   expect_output(
     check_versions("10.0.0", "10.0.0"),
-    "**** C++ and R library versions match: 10.0.0",
+    "**** C++ library version 10.0.0 is supported by R version 10.0.0",
     fixed = TRUE
   )
   expect_output(
@@ -35,7 +35,7 @@ test_that("check_versions", {
       check_versions("10.0.0", "10.0.0-SNAPSHOT"),
       "version mismatch"
     ),
-    "**** Not using: C++ library version (10.0.0-SNAPSHOT) does not match R package (10.0.0)",
+    "**** Not using: C++ library version (10.0.0-SNAPSHOT): not supported by R package version 10.0.0",
     fixed = TRUE
   )
   expect_output(
@@ -43,20 +43,12 @@ test_that("check_versions", {
       check_versions("10.0.0.9000", "10.0.0-SNAPSHOT"),
       "version mismatch"
     ),
-    "**** Not using: C++ library version (10.0.0-SNAPSHOT) does not match R package (10.0.0.9000)",
-    fixed = TRUE
-  )
-  expect_output(
-    expect_error(
-      check_versions("10.0.0.9000", "10.0.0"),
-      "version mismatch"
-    ),
-    "**** Not using: C++ library version (10.0.0) does not match R package (10.0.0.9000)",
+    "**** Not using: C++ library version (10.0.0-SNAPSHOT): not supported by R package version 10.0.0.9000",
     fixed = TRUE
   )
   expect_output(
     check_versions("10.0.0.3", "10.0.0"),
-    "*** > Using C++ library version 10.0.0 with R package 10.0.0.3",
+    "**** C++ library version 10.0.0 is supported by R version 10.0.0.3",
     fixed = TRUE
   )
   expect_output(
@@ -65,3 +57,25 @@ test_that("check_versions", {
     fixed = TRUE
   )
 })
+
+test_that("check_versions with mismatch", {
+  withr::local_envvar(.new = c(ARROW_R_ALLOW_CPP_VERSION_MISMATCH = "false"))
+
+  expect_false(
+    release_version_supported("15.0.0", "13.0.0")
+  )
+
+  withr::local_envvar(.new = c(ARROW_R_ALLOW_CPP_VERSION_MISMATCH = "true"))
+
+  expect_true(
+    release_version_supported("15.0.0", "13.0.0")
+  )
+
+  expect_false(
+    release_version_supported("15.0.0", "16.0.0")
+  )
+
+  expect_false(
+    release_version_supported("15.0.0", "12.0.0")
+  )
+})

From e1241e74a92561d65c134c06b9d5a95deeb273f3 Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Thu, 8 Feb 2024 01:37:12 +0100
Subject: [PATCH 335/570] GH-39987: [R] Make it possible to use a rtools
 libarrow on windows  (#39986)

This enables the use of libarrow from rtools. This is currently only possible by cross compiling manually but will  be part of a future rtools version.

These changes can't be tested, there are no user facing changes for now.

* Closes: #39987

Lead-authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Co-authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/configure.win | 201 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 187 insertions(+), 14 deletions(-)

diff --git a/r/configure.win b/r/configure.win
index 2d9e5cdf54e44..b6ac19faea2d4 100755
--- a/r/configure.win
+++ b/r/configure.win
@@ -17,33 +17,58 @@
 # specific language governing permissions and limitations
 # under the License.
 
+: ${PKG_CONFIG:="pkg-config"}
+# Library settings
+PKG_CONFIG_NAME="arrow"
+PKG_TEST_HEADER="<arrow/api.h>"
+
+VERSION=`grep '^Version' DESCRIPTION | sed s/Version:\ //`
+
+# Development mode, also increases verbosity in the bundled build
+ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'`
+# If present, `pkg-config` will be used to find libarrow on the system,
+# unless this is set to false
+ARROW_USE_PKG_CONFIG=`echo $ARROW_USE_PKG_CONFIG | tr '[:upper:]' '[:lower:]'`
 
 # generate code
-if [ "$ARROW_R_DEV" == "TRUE" ]; then
+if [ "$ARROW_R_DEV" == "true" ]; then
   echo "*** Generating code with data-raw/codegen.R"
   "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" data-raw/codegen.R
 fi
 
-OPENSSL_LIBS="-lcrypto -lcrypt32"
-MIMALLOC_LIBS="-lbcrypt -lpsapi"
-BROTLI_LIBS="-lbrotlienc -lbrotlidec -lbrotlicommon" # Common goes last since dec and enc depend on it
-AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-management \
-          -laws-cpp-sdk-cognito-identity -laws-cpp-sdk-sts -laws-cpp-sdk-s3 \
-          -laws-cpp-sdk-core -laws-c-event-stream -laws-checksums -laws-c-common \
-          -lUserenv -lversion -lws2_32 -lBcrypt -lWininet -lwinhttp"
-# pkg-config --libs libcurl
-GCS_LIBS="-lcurl -lnormaliz -lssh2 -lgdi32 -lssl -lcrypto -lcrypt32 -lwldap32 \
-          -lz -lws2_32 -lnghttp2 -ldbghelp"
+# Test if pkg-config is available to use
+if ${PKG_CONFIG} --version >/dev/null 2>&1; then
+  PKG_CONFIG_AVAILABLE="true"
+  echo "*** pkg-config found."
+else
+  echo "*** pkg-config not found."
+  PKG_CONFIG_AVAILABLE="false"
+  ARROW_USE_PKG_CONFIG="false"
+fi
 
-function configure_release() {
-  VERSION=$(grep ^Version DESCRIPTION | sed s/Version:\ //)
+
+function configure_binaries() {
   # Try to find/download a C++ Arrow binary,
   "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" "tools/nixlibs.R" $VERSION 
   # If binary not found, script exits nonzero
   if [ $? -ne 0 ]; then
+    _LIBARROW_FOUND="false"
     echo "Arrow C++ library was not found"
+    # return 0 so set -e doesn't exit the script
+    return 0
   fi
 
+  OPENSSL_LIBS="-lcrypto -lcrypt32"
+  MIMALLOC_LIBS="-lbcrypt -lpsapi"
+  BROTLI_LIBS="-lbrotlienc -lbrotlidec -lbrotlicommon" # Common goes last since dec and enc depend on it
+  AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-management \
+            -laws-cpp-sdk-cognito-identity -laws-cpp-sdk-sts -laws-cpp-sdk-s3 \
+            -laws-cpp-sdk-core -laws-c-event-stream -laws-checksums -laws-c-common \
+            -luserenv -lversion -lws2_32 -lbcrypt -lwininet -lwinhttp"
+  # pkg-config --libs libcurl
+  GCS_LIBS="-lcurl -lnormaliz -lssh2 -lgdi32 -lssl -lcrypto -lcrypt32 -lwldap32 \
+            -lz -lws2_32 -lnghttp2 -ldbghelp"
+
   # Set the right flags to point to and enable arrow/parquet
   if [ -d "windows/arrow-$VERSION" ]; then
     RWINLIB="../windows/arrow-$VERSION"
@@ -75,12 +100,160 @@ function configure_release() {
     # It seems that order matters
     PKG_LIBS="${PKG_LIBS} -lws2_32"
   fi
+
+}
+
+# Once libarrow is obtained, this function sets `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS`
+# either from pkg-config or by inferring things about the directory in $1
+set_pkg_vars () {
+  set_lib_dir_with_pc
+
+  # Check cmake options for enabled features. This uses LIB_DIR that
+  # is set by the above set_lib_dir_* call.
+  add_feature_flags
+  set_pkg_vars_with_pc
+
+  # Set any user-defined CXXFLAGS
+  if [ "$ARROW_R_CXXFLAGS" ]; then
+    PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS"
+  fi
+
+  # We use expr because the product version returns more than just 10.13 and we want to
+  # match the substring. However, expr always outputs the number of matched characters
+  # to stdout, to avoid noise in the log we redirect the output to /dev/null
+  if [ "$UNAME" = "Darwin" ] && expr $(sw_vers -productVersion) : '10\.13' >/dev/null 2>&1; then
+    # avoid C++17 availability warnings on macOS < 11
+    PKG_CFLAGS="$PKG_CFLAGS -D_LIBCPP_DISABLE_AVAILABILITY"
+  fi
+}
+
+# If we have pkg-config, it will tell us what libarrow needs
+set_lib_dir_with_pc () {
+  LIB_DIR="`${PKG_CONFIG} --variable=libdir  ${PKG_CONFIG_NAME}`"
+}
+set_pkg_vars_with_pc () {
+  pkg_config_names="${PKG_CONFIG_NAME} ${PKG_CONFIG_NAMES_FEATURES}"
+  PKG_CFLAGS="`${PKG_CONFIG} --cflags  ${pkg_config_names}` $PKG_CFLAGS"
+  PKG_CFLAGS="$PKG_CFLAGS $PKG_CFLAGS_FEATURES"
+  PKG_LIBS=`${PKG_CONFIG} --libs-only-l --libs-only-other ${pkg_config_names}`
+  PKG_LIBS="$PKG_LIBS $PKG_LIBS_FEATURES"
+  PKG_DIRS=`${PKG_CONFIG} --libs-only-L  ${pkg_config_names}`
+}
+
+add_feature_flags () {
+  PKG_CFLAGS_FEATURES=""
+  PKG_CONFIG_NAMES_FEATURES=""
+  PKG_LIBS_FEATURES=""
+  PKG_LIBS_FEATURES_WITHOUT_PC=""
+
+  # Now we need to check what features it was built with and enable
+  # the corresponding feature flags in the R bindings (-DARROW_R_WITH_stuff).
+  # We do this by inspecting ArrowOptions.cmake, which the libarrow build
+  # generates.
+  ARROW_OPTS_CMAKE="$LIB_DIR/cmake/Arrow/ArrowOptions.cmake"
+  if [ ! -f "${ARROW_OPTS_CMAKE}" ]; then
+    echo "*** $ARROW_OPTS_CMAKE not found; some features will not be enabled"
+  else
+    if arrow_built_with ARROW_PARQUET; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_PARQUET"
+      PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES parquet"
+      PKG_LIBS_FEATURES_WITHOUT_PC="-lparquet $PKG_LIBS_FEATURES_WITHOUT_PC"
+      # NOTE: parquet is assumed to have the same -L flag as arrow
+      # so there is no need to add its location to PKG_DIRS
+    fi
+    if arrow_built_with ARROW_DATASET; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_DATASET"
+      PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-dataset"
+      PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_dataset $PKG_LIBS_FEATURES_WITHOUT_PC"
+      # NOTE: arrow_dataset is assumed to have the same -L flag as arrow
+      # so there is no need to add its location to PKG_DIRS
+    fi
+    if arrow_built_with ARROW_ACERO; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_ACERO"
+      PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-acero"
+      PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_acero $PKG_LIBS_FEATURES_WITHOUT_PC"
+      # NOTE: arrow_acero is assumed to have the same -L flag as arrow
+      # so there is no need to add its location to PKG_DIRS
+    fi
+    if arrow_built_with ARROW_SUBSTRAIT; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_SUBSTRAIT"
+      PKG_CONFIG_NAMES_FEATURES="$PKG_CONFIG_NAMES_FEATURES arrow-substrait"
+      PKG_LIBS_FEATURES_WITHOUT_PC="-larrow_substrait $PKG_LIBS_FEATURES_WITHOUT_PC"
+      # NOTE: arrow_substrait is assumed to have the same -L flag as arrow
+      # so there is no need to add its location to PKG_DIRS
+    fi
+    if arrow_built_with ARROW_JSON; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_JSON"
+    fi
+    if arrow_built_with ARROW_S3; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_S3"
+    fi
+    if arrow_built_with ARROW_GCS; then
+      PKG_CFLAGS_FEATURES="$PKG_CFLAGS_FEATURES -DARROW_R_WITH_GCS"
+    fi
+    if arrow_built_with ARROW_GCS || arrow_built_with ARROW_S3; then
+      # If pkg-config is available it will handle this for us automatically
+      SSL_LIBS_WITHOUT_PC="-lcurl -lssl -lcrypto"
+    fi
+  fi
+}
+
+
+arrow_built_with() {
+  # Function to check cmake options for features
+  grep -i 'set('"$1"' "ON")' $ARROW_OPTS_CMAKE >/dev/null 2>&1
+}
+
+function configure_rtools() {
+  # Use pkg-config to find arrow from rtools
+  _LIBARROW_PREFIX="`${PKG_CONFIG} --variable=prefix ${PKG_CONFIG_NAME}`"
+  _LIBARROW_FOUND="true"
+  echo "*** Trying Arrow C++ found by pkg-config: $_LIBARROW_PREFIX"
+
+  PC_LIB_VERSION=`${PKG_CONFIG} --modversion ${PKG_CONFIG_NAME}`
+  # This is in an R script for convenience and testability.
+  # Success means the found C++ library is ok to use.
+  # Error means the versions don't line up and we shouldn't use it.
+  # More specific messaging to the user is in the R script
+  if ! ${R_HOME}/bin/Rscript tools/check-versions.R $VERSION $PC_LIB_VERSION 2> /dev/null; then
+    _LIBARROW_FOUND="false"
+  fi
+  
+  # We should have a valid libarrow build in $_LIBARROW_FOUND
+# Now set `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS` based on that.
+if [ "$_LIBARROW_FOUND" == "true" ]; then
+  set_pkg_vars ${_LIBARROW_PREFIX}
+  # add mingw specific windows flags
+  PKG_LIBS="$PKG_LIBS -lws2_32 -lole32 -lwldap32 -lsecur32 -lncrypt -lcrypt32 -lshlwapi"
+  # override -fno-exceptions from aws-cpp-sdk pc file
+  PKG_CFLAGS="$PKG_CFLAGS -fexceptions"
+else
+  # To make it easier to debug which code path was taken add a specific 
+  # message to the log in addition to the 'NOTE'
+  echo "*** Failed to find Arrow C++ libraries in rtools"
+fi
+}
+
+function configure_release() {
+  if [ "$ARROW_USE_PKG_CONFIG" != "false" ] && $PKG_CONFIG --exists $PKG_CONFIG_NAME; then
+    configure_rtools
+  else
+    configure_binaries
+  fi 
+
+  if [ "$_LIBARROW_FOUND" == "false" ]; then
+    echo "------------------------- NOTE ---------------------------"
+    echo "There was an issue preparing the Arrow C++ libraries."
+    echo "See https://arrow.apache.org/docs/r/articles/install.html"
+    echo "----------------------------------------------------------"
+    exit 1
+  fi
 }
 
 # Returns 1 if CMAKE options is set "ON", otherwise 0
 function cmake_option() {
   ARROW_OPTS_CMAKE="$ARROW_HOME/lib/cmake/Arrow/ArrowOptions.cmake"
-  grep -cm1 "set($1 \"ON\")" $ARROW_OPTS_CMAKE
+  arrow_built_with $1
 }
 
 function configure_dev() {

From c38b0f33f3361350dd1321a93b53716e64489a69 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 8 Feb 2024 11:40:12 +0900
Subject: [PATCH 336/570] GH-39992: [CI][Docs][Java] ubuntu-docs uses Maven
 version in .env (#39993)

### Rationale for this change

GH-39696 updated Maven version but `ubuntu-docs` haven't used it yet.

### What changes are included in this PR?

Use `MAVEN` in `.env` in `ubuntu-docs`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39992

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/docker/linux-apt-docs.dockerfile | 2 +-
 docker-compose.yml                  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index c51600a1e5920..3d102796b8c00 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -60,7 +60,7 @@ RUN apt-get update -y && \
 
 ENV JAVA_HOME=/usr/lib/jvm/java-${jdk}-openjdk-amd64
 
-ARG maven=3.5.4
+ARG maven=3.6.3
 COPY ci/scripts/util_download_apache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/util_download_apache.sh \
     "maven/maven-3/${maven}/binaries/apache-maven-${maven}-bin.tar.gz" /opt
diff --git a/docker-compose.yml b/docker-compose.yml
index 8a7223b57632f..a31fa0d9aa659 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1740,6 +1740,7 @@ services:
       args:
         r: ${R}
         jdk: ${JDK}
+        maven: ${MAVEN}
         node: ${NODE}
         base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3
     environment:

From fa26fa56ce579f52de8f1fa07dfb103a122e2d8a Mon Sep 17 00:00:00 2001
From: av8or1 <fivetomins@yahoo.com>
Date: Wed, 7 Feb 2024 23:53:01 -0600
Subject: [PATCH 337/570] GH-38703: [C++][FS][Azure] Implement DeleteFile()
 (#39840)

### Rationale for this change

`DeleteFile()` API isn't implemented yet.

### What changes are included in this PR?

Implement `DeleteFile()` by the "Delete Blob" API: https://learn.microsoft.com/en-us/rest/api/storageservices/delete-blob

### Are these changes tested?

I tested the modification by creating a file via the web browser on our internal ADLS, then ran a sample program that deleted the file. I added three regression tests to cover the use case scenarios of:

* A valid delete attempt, where "valid" means that the file exists and is indeed a file
* An intentional failure where a file delete is attempted, but the file does not exist
* An intentional failure where a file delete is attempted, but the target is a container
* An intentional failure where a file delete is attempted, but the target is a directory

### Are there any user-facing changes?

Yes.

* Closes: #38703

Lead-authored-by: av8or1 <fivetomins@yahoo.com>
Co-authored-by: jerry.adair <Jerry.Adair@sas.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 23 ++++++++++++++++-
 cpp/src/arrow/filesystem/azurefs_test.cc | 33 +++++++++++++++++++++++-
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index a5179c22190e1..87b9822878cce 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -1690,6 +1690,26 @@ class AzureFileSystem::Impl {
     }
   }
 
+  Status DeleteFile(const AzureLocation& location) {
+    RETURN_NOT_OK(ValidateFileLocation(location));
+    auto file_client = datalake_service_client_->GetFileSystemClient(location.container)
+                           .GetFileClient(location.path);
+    try {
+      auto response = file_client.Delete();
+      // Only the "*IfExists" functions ever set Deleted to false.
+      // All the others either succeed or throw an exception.
+      DCHECK(response.Value.Deleted);
+    } catch (const Storage::StorageException& exception) {
+      if (exception.ErrorCode == "FilesystemNotFound" ||
+          exception.ErrorCode == "PathNotFound") {
+        return PathNotFound(location);
+      }
+      return ExceptionToStatus(exception, "Failed to delete a file: ", location.path,
+                               ": ", file_client.GetUrl());
+    }
+    return Status::OK();
+  }
+
   Status CopyFile(const AzureLocation& src, const AzureLocation& dest) {
     RETURN_NOT_OK(ValidateFileLocation(src));
     RETURN_NOT_OK(ValidateFileLocation(dest));
@@ -1875,7 +1895,8 @@ Status AzureFileSystem::DeleteRootDirContents() {
 }
 
 Status AzureFileSystem::DeleteFile(const std::string& path) {
-  return Status::NotImplemented("The Azure FileSystem is not fully implemented");
+  ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path));
+  return impl_->DeleteFile(location);
 }
 
 Status AzureFileSystem::Move(const std::string& src, const std::string& dest) {
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 6104b04411b32..4d123028ea86e 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -1382,6 +1382,38 @@ TEST_F(TestAzuriteFileSystem, DeleteDirContentsFailureNonexistent) {
   this->TestDeleteDirContentsFailureNonexistent();
 }
 
+TEST_F(TestAzuriteFileSystem, DeleteFileSuccess) {
+  const auto container_name = PreexistingData::RandomContainerName(rng_);
+  ASSERT_OK(fs()->CreateDir(container_name));
+  const auto file_name = ConcatAbstractPath(container_name, "abc");
+  CreateFile(fs(), file_name, "data");
+  arrow::fs::AssertFileInfo(fs(), file_name, FileType::File);
+  ASSERT_OK(fs()->DeleteFile(file_name));
+  arrow::fs::AssertFileInfo(fs(), file_name, FileType::NotFound);
+}
+
+TEST_F(TestAzuriteFileSystem, DeleteFileFailureNonexistent) {
+  const auto container_name = PreexistingData::RandomContainerName(rng_);
+  ASSERT_OK(fs()->CreateDir(container_name));
+  const auto nonexistent_file_name = ConcatAbstractPath(container_name, "nonexistent");
+  ASSERT_RAISES(IOError, fs()->DeleteFile(nonexistent_file_name));
+}
+
+TEST_F(TestAzuriteFileSystem, DeleteFileFailureContainer) {
+  const auto container_name = PreexistingData::RandomContainerName(rng_);
+  ASSERT_OK(fs()->CreateDir(container_name));
+  arrow::fs::AssertFileInfo(fs(), container_name, FileType::Directory);
+  ASSERT_RAISES(IOError, fs()->DeleteFile(container_name));
+}
+
+TEST_F(TestAzuriteFileSystem, DeleteFileFailureDirectory) {
+  const auto directory_name =
+      ConcatAbstractPath(PreexistingData::RandomContainerName(rng_), "directory");
+  ASSERT_OK(fs()->CreateDir(directory_name));
+  arrow::fs::AssertFileInfo(fs(), directory_name, FileType::Directory);
+  ASSERT_RAISES(IOError, fs()->DeleteFile(directory_name));
+}
+
 TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationNonexistent) {
   auto data = SetUpPreexistingData();
   const auto destination_path = data.ContainerPath("copy-destionation");
@@ -1868,6 +1900,5 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) {
   ASSERT_RAISES(Invalid, stream->ReadAt(1, 1));
   ASSERT_RAISES(Invalid, stream->Seek(2));
 }
-
 }  // namespace fs
 }  // namespace arrow

From 026188e3bb36c58573f23215aedc14e6392264c2 Mon Sep 17 00:00:00 2001
From: Rok Mihevc <rok@mihevc.org>
Date: Thu, 8 Feb 2024 12:25:38 +0100
Subject: [PATCH 338/570] GH-37484: [Python] Add a FixedSizeTensorScalar class
 (#37533)

### Rationale for this change

When working with `FixedSizeTensorArray` we want to access individual tensors. This would be enabled by adding:
```python
def FixedSizeTensorScalar(pa.ExtensionScalar):
    def to_numpy_ndarray(): ...
```

See #37484.

### What changes are included in this PR?

This adds `FixedSizeTensorScalar` and tests for it.

### Are there any user-facing changes?

Yes, when calling `FixedSizeTensorArray[i]` we would get back `FixedSizeTensorScalar` instead of `ExtensionScalar`.
* Closes: #37484

Lead-authored-by: Rok Mihevc <rok@mihevc.org>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/extension/fixed_shape_tensor.cc | 130 +++++++---
 cpp/src/arrow/extension/fixed_shape_tensor.h  |  11 +-
 .../extension/fixed_shape_tensor_test.cc      | 226 ++++++++++++++++--
 cpp/src/arrow/extension/tensor_internal.h     |  45 ++++
 python/pyarrow/array.pxi                      |  77 ++++--
 python/pyarrow/includes/libarrow.pxd          |  12 +-
 python/pyarrow/scalar.pxi                     |  42 ++++
 python/pyarrow/tests/test_extension_type.py   | 123 ++++++++--
 python/pyarrow/types.pxi                      |  22 +-
 9 files changed, 566 insertions(+), 122 deletions(-)
 create mode 100644 cpp/src/arrow/extension/tensor_internal.h

diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.cc b/cpp/src/arrow/extension/fixed_shape_tensor.cc
index af8305a025291..02e0a890e4b3d 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor.cc
+++ b/cpp/src/arrow/extension/fixed_shape_tensor.cc
@@ -19,6 +19,8 @@
 #include <sstream>
 
 #include "arrow/extension/fixed_shape_tensor.h"
+#include "arrow/extension/tensor_internal.h"
+#include "arrow/scalar.h"
 
 #include "arrow/array/array_nested.h"
 #include "arrow/array/array_primitive.h"
@@ -86,7 +88,7 @@ bool FixedShapeTensorType::ExtensionEquals(const ExtensionType& other) const {
   if (extension_name() != other.extension_name()) {
     return false;
   }
-  const auto& other_ext = static_cast<const FixedShapeTensorType&>(other);
+  const auto& other_ext = internal::checked_cast<const FixedShapeTensorType&>(other);
 
   auto is_permutation_trivial = [](const std::vector<int64_t>& permutation) {
     for (size_t i = 1; i < permutation.size(); ++i) {
@@ -143,7 +145,7 @@ std::string FixedShapeTensorType::Serialize() const {
 
   if (!dim_names_.empty()) {
     rj::Value dim_names(rj::kArrayType);
-    for (std::string v : dim_names_) {
+    for (const std::string& v : dim_names_) {
       dim_names.PushBack(rj::Value{}.SetString(v.c_str(), allocator), allocator);
     }
     document.AddMember(rj::Value("dim_names", allocator), dim_names, allocator);
@@ -199,10 +201,52 @@ std::shared_ptr<Array> FixedShapeTensorType::MakeArray(
     std::shared_ptr<ArrayData> data) const {
   DCHECK_EQ(data->type->id(), Type::EXTENSION);
   DCHECK_EQ("arrow.fixed_shape_tensor",
-            static_cast<const ExtensionType&>(*data->type).extension_name());
+            internal::checked_cast<const ExtensionType&>(*data->type).extension_name());
   return std::make_shared<ExtensionArray>(data);
 }
 
+Result<std::shared_ptr<Tensor>> FixedShapeTensorType::MakeTensor(
+    const std::shared_ptr<ExtensionScalar>& scalar) {
+  const auto ext_scalar = internal::checked_pointer_cast<ExtensionScalar>(scalar);
+  const auto ext_type =
+      internal::checked_pointer_cast<FixedShapeTensorType>(scalar->type);
+  if (!is_fixed_width(*ext_type->value_type())) {
+    return Status::TypeError("Cannot convert non-fixed-width values to Tensor.");
+  }
+  const auto array =
+      internal::checked_pointer_cast<const FixedSizeListScalar>(ext_scalar->value)->value;
+  if (array->null_count() > 0) {
+    return Status::Invalid("Cannot convert data with nulls to Tensor.");
+  }
+  const auto value_type =
+      internal::checked_pointer_cast<FixedWidthType>(ext_type->value_type());
+  const auto byte_width = value_type->byte_width();
+
+  std::vector<int64_t> permutation = ext_type->permutation();
+  if (permutation.empty()) {
+    permutation.resize(ext_type->ndim());
+    std::iota(permutation.begin(), permutation.end(), 0);
+  }
+
+  std::vector<int64_t> shape = ext_type->shape();
+  internal::Permute<int64_t>(permutation, &shape);
+
+  std::vector<std::string> dim_names = ext_type->dim_names();
+  if (!dim_names.empty()) {
+    internal::Permute<std::string>(permutation, &dim_names);
+  }
+
+  std::vector<int64_t> strides;
+  RETURN_NOT_OK(ComputeStrides(*value_type.get(), shape, permutation, &strides));
+  const auto start_position = array->offset() * byte_width;
+  const auto size = std::accumulate(shape.begin(), shape.end(), static_cast<int64_t>(1),
+                                    std::multiplies<>());
+  const auto buffer =
+      SliceBuffer(array->data()->buffers[1], start_position, size * byte_width);
+
+  return Tensor::Make(ext_type->value_type(), buffer, shape, strides, dim_names);
+}
+
 Result<std::shared_ptr<FixedShapeTensorArray>> FixedShapeTensorArray::FromTensor(
     const std::shared_ptr<Tensor>& tensor) {
   auto permutation = internal::ArgSort(tensor->strides(), std::greater<>());
@@ -293,53 +337,71 @@ const Result<std::shared_ptr<Tensor>> FixedShapeTensorArray::ToTensor() const {
   // To convert an array of n dimensional tensors to a n+1 dimensional tensor we
   // interpret the array's length as the first dimension the new tensor.
 
-  auto ext_arr = std::static_pointer_cast<FixedSizeListArray>(this->storage());
-  auto ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(this->type());
-  ARROW_RETURN_IF(!is_fixed_width(*ext_arr->value_type()),
-                  Status::Invalid(ext_arr->value_type()->ToString(),
-                                  " is not valid data type for a tensor"));
-  auto permutation = ext_type->permutation();
-
-  std::vector<std::string> dim_names;
-  if (!ext_type->dim_names().empty()) {
-    for (auto i : permutation) {
-      dim_names.emplace_back(ext_type->dim_names()[i]);
-    }
-    dim_names.insert(dim_names.begin(), 1, "");
+  const auto ext_type =
+      internal::checked_pointer_cast<FixedShapeTensorType>(this->type());
+  const auto value_type = ext_type->value_type();
+  ARROW_RETURN_IF(
+      !is_fixed_width(*value_type),
+      Status::TypeError(value_type->ToString(), " is not valid data type for a tensor"));
+
+  // ext_type->permutation() gives us permutation for a single row with values in
+  // range [0, ndim). Here want to create a ndim + 1 dimensional tensor from the entire
+  // array and we assume the first dimension will always have the greatest stride, so it
+  // will get permutation index 0 and remaining values from ext_type->permutation() need
+  // to be shifted to fill the [1, ndim+1) range. Computed permutation will be used to
+  // generate the new tensor's shape, strides and dim_names.
+  std::vector<int64_t> permutation = ext_type->permutation();
+  if (permutation.empty()) {
+    permutation.resize(ext_type->ndim() + 1);
+    std::iota(permutation.begin(), permutation.end(), 0);
   } else {
-    dim_names = {};
+    for (auto i = 0; i < static_cast<int64_t>(ext_type->ndim()); i++) {
+      permutation[i] += 1;
+    }
+    permutation.insert(permutation.begin(), 1, 0);
   }
 
-  std::vector<int64_t> shape;
-  for (int64_t& i : permutation) {
-    shape.emplace_back(ext_type->shape()[i]);
-    ++i;
+  std::vector<std::string> dim_names = ext_type->dim_names();
+  if (!dim_names.empty()) {
+    dim_names.insert(dim_names.begin(), 1, "");
+    internal::Permute<std::string>(permutation, &dim_names);
   }
+
+  std::vector<int64_t> shape = ext_type->shape();
+  auto cell_size = std::accumulate(shape.begin(), shape.end(), static_cast<int64_t>(1),
+                                   std::multiplies<>());
   shape.insert(shape.begin(), 1, this->length());
-  permutation.insert(permutation.begin(), 1, 0);
+  internal::Permute<int64_t>(permutation, &shape);
 
   std::vector<int64_t> tensor_strides;
-  auto value_type = internal::checked_pointer_cast<FixedWidthType>(ext_arr->value_type());
+  const auto fw_value_type = internal::checked_pointer_cast<FixedWidthType>(value_type);
   ARROW_RETURN_NOT_OK(
-      ComputeStrides(*value_type.get(), shape, permutation, &tensor_strides));
-  ARROW_ASSIGN_OR_RAISE(auto buffers, ext_arr->Flatten());
+      ComputeStrides(*fw_value_type.get(), shape, permutation, &tensor_strides));
+
+  const auto raw_buffer = this->storage()->data()->child_data[0]->buffers[1];
   ARROW_ASSIGN_OR_RAISE(
-      auto tensor, Tensor::Make(ext_arr->value_type(), buffers->data()->buffers[1], shape,
-                                tensor_strides, dim_names));
-  return tensor;
+      const auto buffer,
+      SliceBufferSafe(raw_buffer, this->offset() * cell_size * value_type->byte_width()));
+
+  return Tensor::Make(value_type, buffer, shape, tensor_strides, dim_names);
 }
 
 Result<std::shared_ptr<DataType>> FixedShapeTensorType::Make(
     const std::shared_ptr<DataType>& value_type, const std::vector<int64_t>& shape,
     const std::vector<int64_t>& permutation, const std::vector<std::string>& dim_names) {
-  if (!permutation.empty() && shape.size() != permutation.size()) {
-    return Status::Invalid("permutation size must match shape size. Expected: ",
-                           shape.size(), " Got: ", permutation.size());
+  const auto ndim = shape.size();
+  if (!permutation.empty() && ndim != permutation.size()) {
+    return Status::Invalid("permutation size must match shape size. Expected: ", ndim,
+                           " Got: ", permutation.size());
+  }
+  if (!dim_names.empty() && ndim != dim_names.size()) {
+    return Status::Invalid("dim_names size must match shape size. Expected: ", ndim,
+                           " Got: ", dim_names.size());
   }
-  if (!dim_names.empty() && shape.size() != dim_names.size()) {
-    return Status::Invalid("dim_names size must match shape size. Expected: ",
-                           shape.size(), " Got: ", dim_names.size());
+  if (!permutation.empty()) {
+    RETURN_NOT_OK(internal::IsPermutationValid(permutation));
   }
+
   const auto size = std::accumulate(shape.begin(), shape.end(), static_cast<int64_t>(1),
                                     std::multiplies<>());
   return std::make_shared<FixedShapeTensorType>(value_type, static_cast<int32_t>(size),
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.h b/cpp/src/arrow/extension/fixed_shape_tensor.h
index fcfb1ebbab96a..591a7cee32a34 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor.h
+++ b/cpp/src/arrow/extension/fixed_shape_tensor.h
@@ -64,7 +64,7 @@ class ARROW_EXPORT FixedShapeTensorType : public ExtensionType {
   std::string ToString() const override;
 
   /// Number of dimensions of tensor elements
-  size_t ndim() { return shape_.size(); }
+  size_t ndim() const { return shape_.size(); }
 
   /// Shape of tensor elements
   const std::vector<int64_t> shape() const { return shape_; }
@@ -94,6 +94,15 @@ class ARROW_EXPORT FixedShapeTensorType : public ExtensionType {
   /// Create a FixedShapeTensorArray from ArrayData
   std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
 
+  /// \brief Create a Tensor from an ExtensionScalar from a FixedShapeTensorArray
+  ///
+  /// This method will return a Tensor from ExtensionScalar with strides
+  /// derived from shape and permutation of FixedShapeTensorType. Shape and
+  /// dim_names will be permuted according to permutation stored in the
+  /// FixedShapeTensorType metadata.
+  static Result<std::shared_ptr<Tensor>> MakeTensor(
+      const std::shared_ptr<ExtensionScalar>& scalar);
+
   /// \brief Create a FixedShapeTensorType instance
   static Result<std::shared_ptr<DataType>> Make(
       const std::shared_ptr<DataType>& value_type, const std::vector<int64_t>& shape,
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
index 2b8e703d3c66e..3fd39a11ff50d 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
+++ b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
@@ -28,6 +28,7 @@
 #include "arrow/tensor.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/key_value_metadata.h"
+#include "arrow/util/sort.h"
 
 namespace arrow {
 
@@ -39,34 +40,34 @@ class TestExtensionType : public ::testing::Test {
  public:
   void SetUp() override {
     shape_ = {3, 3, 4};
-    cell_shape_ = {3, 4};
+    element_shape_ = {3, 4};
     value_type_ = int64();
-    cell_type_ = fixed_size_list(value_type_, 12);
+    element_type_ = fixed_size_list(value_type_, 12);
     dim_names_ = {"x", "y"};
     ext_type_ = internal::checked_pointer_cast<ExtensionType>(
-        fixed_shape_tensor(value_type_, cell_shape_, {}, dim_names_));
+        fixed_shape_tensor(value_type_, element_shape_, {}, dim_names_));
     values_ = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17,
                18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
     values_partial_ = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
                        12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
     shape_partial_ = {2, 3, 4};
     tensor_strides_ = {96, 32, 8};
-    cell_strides_ = {32, 8};
+    element_strides_ = {32, 8};
     serialized_ = R"({"shape":[3,4],"dim_names":["x","y"]})";
   }
 
  protected:
   std::vector<int64_t> shape_;
   std::vector<int64_t> shape_partial_;
-  std::vector<int64_t> cell_shape_;
+  std::vector<int64_t> element_shape_;
   std::shared_ptr<DataType> value_type_;
-  std::shared_ptr<DataType> cell_type_;
+  std::shared_ptr<DataType> element_type_;
   std::vector<std::string> dim_names_;
   std::shared_ptr<ExtensionType> ext_type_;
   std::vector<int64_t> values_;
   std::vector<int64_t> values_partial_;
   std::vector<int64_t> tensor_strides_;
-  std::vector<int64_t> cell_strides_;
+  std::vector<int64_t> element_strides_;
   std::string serialized_;
 };
 
@@ -96,8 +97,8 @@ TEST_F(TestExtensionType, CreateExtensionType) {
   // Test ExtensionType methods
   ASSERT_EQ(ext_type_->extension_name(), "arrow.fixed_shape_tensor");
   ASSERT_TRUE(ext_type_->Equals(*exact_ext_type));
-  ASSERT_FALSE(ext_type_->Equals(*cell_type_));
-  ASSERT_TRUE(ext_type_->storage_type()->Equals(*cell_type_));
+  ASSERT_FALSE(ext_type_->Equals(*element_type_));
+  ASSERT_TRUE(ext_type_->storage_type()->Equals(*element_type_));
   ASSERT_EQ(ext_type_->Serialize(), serialized_);
   ASSERT_OK_AND_ASSIGN(auto ds,
                        ext_type_->Deserialize(ext_type_->storage_type(), serialized_));
@@ -106,18 +107,28 @@ TEST_F(TestExtensionType, CreateExtensionType) {
 
   // Test FixedShapeTensorType methods
   ASSERT_EQ(exact_ext_type->id(), Type::EXTENSION);
-  ASSERT_EQ(exact_ext_type->ndim(), cell_shape_.size());
-  ASSERT_EQ(exact_ext_type->shape(), cell_shape_);
+  ASSERT_EQ(exact_ext_type->ndim(), element_shape_.size());
+  ASSERT_EQ(exact_ext_type->shape(), element_shape_);
   ASSERT_EQ(exact_ext_type->value_type(), value_type_);
-  ASSERT_EQ(exact_ext_type->strides(), cell_strides_);
+  ASSERT_EQ(exact_ext_type->strides(), element_strides_);
   ASSERT_EQ(exact_ext_type->dim_names(), dim_names_);
 
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       Invalid, testing::HasSubstr("Invalid: permutation size must match shape size."),
-      FixedShapeTensorType::Make(value_type_, cell_shape_, {0}));
+      FixedShapeTensorType::Make(value_type_, element_shape_, {0}));
   EXPECT_RAISES_WITH_MESSAGE_THAT(
       Invalid, testing::HasSubstr("Invalid: dim_names size must match shape size."),
-      FixedShapeTensorType::Make(value_type_, cell_shape_, {}, {"x"}));
+      FixedShapeTensorType::Make(value_type_, element_shape_, {}, {"x"}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      testing::HasSubstr("Invalid: Permutation indices for 2 dimensional tensors must be "
+                         "unique and within [0, 1] range. Got: [3,0]"),
+      FixedShapeTensorType::Make(value_type_, {5, 6}, {3, 0}));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid,
+      testing::HasSubstr("Invalid: Permutation indices for 3 dimensional tensors must be "
+                         "unique and within [0, 2] range. Got: [0,1,1]"),
+      FixedShapeTensorType::Make(value_type_, {1, 2, 3}, {0, 1, 1}));
 }
 
 TEST_F(TestExtensionType, EqualsCases) {
@@ -148,7 +159,7 @@ TEST_F(TestExtensionType, CreateFromArray) {
   std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, Buffer::Wrap(values_)};
   auto arr_data = std::make_shared<ArrayData>(value_type_, values_.size(), buffers, 0, 0);
   auto arr = std::make_shared<Int64Array>(arr_data);
-  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, cell_type_));
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, element_type_));
   auto ext_arr = ExtensionType::WrapArray(ext_type_, fsla_arr);
   ASSERT_EQ(ext_arr->length(), shape_[0]);
   ASSERT_EQ(ext_arr->null_count(), 0);
@@ -200,7 +211,7 @@ TEST_F(TestExtensionType, RoundtripBatch) {
   std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, Buffer::Wrap(values_)};
   auto arr_data = std::make_shared<ArrayData>(value_type_, values_.size(), buffers, 0, 0);
   auto arr = std::make_shared<Int64Array>(arr_data);
-  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, cell_type_));
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, element_type_));
   auto ext_arr = ExtensionType::WrapArray(ext_type_, fsla_arr);
 
   // Pass extension array, expect getting back extension array
@@ -215,7 +226,7 @@ TEST_F(TestExtensionType, RoundtripBatch) {
   auto ext_metadata =
       key_value_metadata({{"ARROW:extension:name", exact_ext_type->extension_name()},
                           {"ARROW:extension:metadata", serialized_}});
-  ext_field = field(/*name=*/"f0", /*type=*/cell_type_, /*nullable=*/true,
+  ext_field = field(/*name=*/"f0", /*type=*/element_type_, /*nullable=*/true,
                     /*metadata=*/ext_metadata);
   auto batch2 = RecordBatch::Make(schema({ext_field}), fsla_arr->length(), {fsla_arr});
   RoundtripBatch(batch2, &read_batch2);
@@ -270,7 +281,7 @@ TEST_F(TestExtensionType, CreateFromTensor) {
   auto ext_arr_5 = std::static_pointer_cast<FixedShapeTensorArray>(
       ExtensionType::WrapArray(ext_type_5, fsla_arr));
   EXPECT_RAISES_WITH_MESSAGE_THAT(
-      Invalid, testing::HasSubstr("binary is not valid data type for a tensor"),
+      TypeError, testing::HasSubstr("binary is not valid data type for a tensor"),
       ext_arr_5->ToTensor());
 
   auto ext_type_6 = internal::checked_pointer_cast<FixedShapeTensorType>(
@@ -278,6 +289,10 @@ TEST_F(TestExtensionType, CreateFromTensor) {
   auto arr_with_null = ArrayFromJSON(int64(), "[1, 0, null, null, 1, 2]");
   ASSERT_OK_AND_ASSIGN(auto fsla_arr_6, FixedSizeListArray::FromArrays(
                                             arr_with_null, fixed_size_list(int64(), 2)));
+
+  auto ext_type_7 = internal::checked_pointer_cast<FixedShapeTensorType>(
+      fixed_shape_tensor(int64(), {3, 4}, {}));
+  ASSERT_OK_AND_ASSIGN(auto ext_arr_7, FixedShapeTensorArray::FromTensor(tensor));
 }
 
 void CheckFromTensorType(const std::shared_ptr<Tensor>& tensor,
@@ -308,7 +323,7 @@ TEST_F(TestExtensionType, TestFromTensorType) {
   auto dim_names = std::vector<std::vector<std::string>>{
       {"y", "z"}, {"z", "y"}, {"y", "z"}, {"z", "y"},
       {"y", "z"}, {"y", "z"}, {"y", "z"}, {"y", "z"}};
-  auto cell_shapes = std::vector<std::vector<int64_t>>{{3, 4}, {4, 3}, {4, 3}, {3, 4}};
+  auto element_shapes = std::vector<std::vector<int64_t>>{{3, 4}, {4, 3}, {4, 3}, {3, 4}};
   auto permutations = std::vector<std::vector<int64_t>>{{0, 1}, {1, 0}, {0, 1}, {1, 0}};
 
   for (size_t i = 0; i < shapes.size(); i++) {
@@ -316,11 +331,82 @@ TEST_F(TestExtensionType, TestFromTensorType) {
                                                    strides[i], tensor_dim_names[i]));
     ASSERT_OK_AND_ASSIGN(auto ext_arr, FixedShapeTensorArray::FromTensor(tensor));
     auto ext_type =
-        fixed_shape_tensor(value_type_, cell_shapes[i], permutations[i], dim_names[i]);
+        fixed_shape_tensor(value_type_, element_shapes[i], permutations[i], dim_names[i]);
     CheckFromTensorType(tensor, ext_type);
   }
 }
 
+template <typename T>
+void CheckToTensor(const std::vector<T>& values, const std::shared_ptr<DataType> typ,
+                   const int32_t& element_size, const std::vector<int64_t>& element_shape,
+                   const std::vector<int64_t>& element_permutation,
+                   const std::vector<std::string>& element_dim_names,
+                   const std::vector<int64_t>& tensor_shape,
+                   const std::vector<std::string>& tensor_dim_names,
+                   const std::vector<int64_t>& tensor_strides) {
+  auto buffer = Buffer::Wrap(values);
+  const std::shared_ptr<DataType> element_type = fixed_size_list(typ, element_size);
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, buffer};
+  auto arr_data = std::make_shared<ArrayData>(typ, values.size(), buffers);
+  auto arr = std::make_shared<Int64Array>(arr_data);
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, element_type));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto expected_tensor,
+      Tensor::Make(typ, buffer, tensor_shape, tensor_strides, tensor_dim_names));
+  const auto ext_type =
+      fixed_shape_tensor(typ, element_shape, element_permutation, element_dim_names);
+
+  auto ext_arr = ExtensionType::WrapArray(ext_type, fsla_arr);
+  const auto tensor_array = std::static_pointer_cast<FixedShapeTensorArray>(ext_arr);
+  ASSERT_OK_AND_ASSIGN(const auto actual_tensor, tensor_array->ToTensor());
+  ASSERT_OK(actual_tensor->Validate());
+
+  ASSERT_EQ(actual_tensor->type(), expected_tensor->type());
+  ASSERT_EQ(actual_tensor->shape(), expected_tensor->shape());
+  ASSERT_EQ(actual_tensor->strides(), expected_tensor->strides());
+  ASSERT_EQ(actual_tensor->dim_names(), expected_tensor->dim_names());
+  ASSERT_TRUE(actual_tensor->data()->Equals(*expected_tensor->data()));
+  ASSERT_TRUE(actual_tensor->Equals(*expected_tensor));
+}
+
+TEST_F(TestExtensionType, ToTensor) {
+  std::vector<float_t> float_values = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11,
+                                       12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+                                       24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35};
+
+  auto element_sizes = std::vector<int32_t>{6, 6, 18, 18, 18, 18};
+
+  auto element_shapes = std::vector<std::vector<int64_t>>{{2, 3}, {3, 2},    {3, 6},
+                                                          {6, 3}, {3, 2, 3}, {3, 2, 3}};
+  auto tensor_shapes = std::vector<std::vector<int64_t>>{
+      {6, 2, 3}, {6, 2, 3}, {2, 3, 6}, {2, 3, 6}, {2, 3, 2, 3}, {2, 3, 2, 3}};
+
+  auto element_permutations = std::vector<std::vector<int64_t>>{
+      {0, 1}, {1, 0}, {0, 1}, {1, 0}, {0, 1, 2}, {2, 1, 0}};
+  auto tensor_strides_32 =
+      std::vector<std::vector<int64_t>>{{24, 12, 4}, {24, 4, 8},      {72, 24, 4},
+                                        {72, 4, 12}, {72, 24, 12, 4}, {72, 4, 12, 24}};
+  auto tensor_strides_64 =
+      std::vector<std::vector<int64_t>>{{48, 24, 8},  {48, 8, 16},      {144, 48, 8},
+                                        {144, 8, 24}, {144, 48, 24, 8}, {144, 8, 24, 48}};
+
+  auto element_dim_names = std::vector<std::vector<std::string>>{
+      {"y", "z"}, {"z", "y"}, {"y", "z"}, {"z", "y"}, {"H", "W", "C"}, {"H", "W", "C"}};
+  auto tensor_dim_names = std::vector<std::vector<std::string>>{
+      {"", "y", "z"}, {"", "y", "z"},      {"", "y", "z"},
+      {"", "y", "z"}, {"", "H", "W", "C"}, {"", "C", "W", "H"}};
+
+  for (size_t i = 0; i < element_shapes.size(); i++) {
+    CheckToTensor(float_values, float32(), element_sizes[i], element_shapes[i],
+                  element_permutations[i], element_dim_names[i], tensor_shapes[i],
+                  tensor_dim_names[i], tensor_strides_32[i]);
+    CheckToTensor(values_, int64(), element_sizes[i], element_shapes[i],
+                  element_permutations[i], element_dim_names[i], tensor_shapes[i],
+                  tensor_dim_names[i], tensor_strides_64[i]);
+  }
+}
+
 void CheckTensorRoundtrip(const std::shared_ptr<Tensor>& tensor) {
   ASSERT_OK_AND_ASSIGN(auto ext_arr, FixedShapeTensorArray::FromTensor(tensor));
   ASSERT_OK_AND_ASSIGN(auto tensor_from_array, ext_arr->ToTensor());
@@ -364,7 +450,7 @@ TEST_F(TestExtensionType, SliceTensor) {
       Tensor::Make(value_type_, Buffer::Wrap(values_partial_), shape_partial_));
   ASSERT_EQ(tensor->strides(), tensor_strides_);
   ASSERT_EQ(tensor_partial->strides(), tensor_strides_);
-  auto ext_type = fixed_shape_tensor(value_type_, cell_shape_, {}, dim_names_);
+  auto ext_type = fixed_shape_tensor(value_type_, element_shape_, {}, dim_names_);
   auto exact_ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
 
   ASSERT_OK_AND_ASSIGN(auto ext_arr, FixedShapeTensorArray::FromTensor(tensor));
@@ -404,11 +490,11 @@ TEST_F(TestExtensionType, ComputeStrides) {
   auto exact_ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(ext_type_);
 
   auto ext_type_1 = internal::checked_pointer_cast<FixedShapeTensorType>(
-      fixed_shape_tensor(int64(), cell_shape_, {}, dim_names_));
+      fixed_shape_tensor(int64(), element_shape_, {}, dim_names_));
   auto ext_type_2 = internal::checked_pointer_cast<FixedShapeTensorType>(
-      fixed_shape_tensor(int64(), cell_shape_, {}, dim_names_));
+      fixed_shape_tensor(int64(), element_shape_, {}, dim_names_));
   auto ext_type_3 = internal::checked_pointer_cast<FixedShapeTensorType>(
-      fixed_shape_tensor(int32(), cell_shape_, {}, dim_names_));
+      fixed_shape_tensor(int32(), element_shape_, {}, dim_names_));
   ASSERT_TRUE(ext_type_1->Equals(*ext_type_2));
   ASSERT_FALSE(ext_type_1->Equals(*ext_type_3));
 
@@ -462,4 +548,96 @@ TEST_F(TestExtensionType, ToString) {
   ASSERT_EQ(expected_3, result_3);
 }
 
+TEST_F(TestExtensionType, GetTensor) {
+  auto arr = ArrayFromJSON(element_type_,
+                           "[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],"
+                           "[12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]]");
+  auto element_values =
+      std::vector<std::vector<int64_t>>{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+                                        {12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}};
+
+  auto ext_type = fixed_shape_tensor(value_type_, element_shape_, {}, dim_names_);
+  auto permuted_ext_type = fixed_shape_tensor(value_type_, {3, 4}, {1, 0}, {"x", "y"});
+  auto exact_ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(ext_type);
+  auto exact_permuted_ext_type =
+      internal::checked_pointer_cast<FixedShapeTensorType>(permuted_ext_type);
+
+  auto array = std::static_pointer_cast<FixedShapeTensorArray>(
+      ExtensionType::WrapArray(ext_type, arr));
+  auto permuted_array = std::static_pointer_cast<FixedShapeTensorArray>(
+      ExtensionType::WrapArray(permuted_ext_type, arr));
+
+  for (size_t i = 0; i < element_values.size(); i++) {
+    // Get tensor from extension array with trivial permutation
+    ASSERT_OK_AND_ASSIGN(auto scalar, array->GetScalar(i));
+    auto actual_ext_scalar = internal::checked_pointer_cast<ExtensionScalar>(scalar);
+    ASSERT_OK_AND_ASSIGN(auto actual_tensor,
+                         exact_ext_type->MakeTensor(actual_ext_scalar));
+    ASSERT_OK(actual_tensor->Validate());
+    ASSERT_OK_AND_ASSIGN(auto expected_tensor,
+                         Tensor::Make(value_type_, Buffer::Wrap(element_values[i]),
+                                      {3, 4}, {}, {"x", "y"}));
+    ASSERT_EQ(expected_tensor->shape(), actual_tensor->shape());
+    ASSERT_EQ(expected_tensor->dim_names(), actual_tensor->dim_names());
+    ASSERT_EQ(expected_tensor->strides(), actual_tensor->strides());
+    ASSERT_EQ(actual_tensor->strides(), std::vector<int64_t>({32, 8}));
+    ASSERT_EQ(expected_tensor->type(), actual_tensor->type());
+    ASSERT_TRUE(expected_tensor->Equals(*actual_tensor));
+
+    // Get tensor from extension array with non-trivial permutation
+    ASSERT_OK_AND_ASSIGN(auto expected_permuted_tensor,
+                         Tensor::Make(value_type_, Buffer::Wrap(element_values[i]),
+                                      {4, 3}, {8, 24}, {"y", "x"}));
+    ASSERT_OK_AND_ASSIGN(scalar, permuted_array->GetScalar(i));
+    ASSERT_OK_AND_ASSIGN(auto actual_permuted_tensor,
+                         exact_permuted_ext_type->MakeTensor(
+                             internal::checked_pointer_cast<ExtensionScalar>(scalar)));
+    ASSERT_OK(actual_permuted_tensor->Validate());
+    ASSERT_EQ(expected_permuted_tensor->strides(), actual_permuted_tensor->strides());
+    ASSERT_EQ(expected_permuted_tensor->shape(), actual_permuted_tensor->shape());
+    ASSERT_EQ(expected_permuted_tensor->dim_names(), actual_permuted_tensor->dim_names());
+    ASSERT_EQ(expected_permuted_tensor->type(), actual_permuted_tensor->type());
+    ASSERT_EQ(expected_permuted_tensor->is_contiguous(),
+              actual_permuted_tensor->is_contiguous());
+    ASSERT_EQ(expected_permuted_tensor->is_column_major(),
+              actual_permuted_tensor->is_column_major());
+    ASSERT_TRUE(expected_permuted_tensor->Equals(*actual_permuted_tensor));
+  }
+
+  // Test null values fail
+  auto element_type = fixed_size_list(int64(), 1);
+  auto fsla_arr = ArrayFromJSON(element_type, "[[1], [null], null]");
+  ext_type = fixed_shape_tensor(int64(), {1});
+  exact_ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(ext_type);
+  auto ext_arr = ExtensionType::WrapArray(ext_type, fsla_arr);
+  auto tensor_array = internal::checked_pointer_cast<ExtensionArray>(ext_arr);
+
+  ASSERT_OK_AND_ASSIGN(auto scalar, tensor_array->GetScalar(0));
+  ASSERT_OK_AND_ASSIGN(auto tensor,
+                       exact_ext_type->MakeTensor(
+                           internal::checked_pointer_cast<ExtensionScalar>(scalar)));
+
+  ASSERT_OK_AND_ASSIGN(scalar, tensor_array->GetScalar(1));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Invalid: Cannot convert data with nulls to Tensor."),
+      exact_ext_type->MakeTensor(
+          internal::checked_pointer_cast<ExtensionScalar>(scalar)));
+
+  ASSERT_OK_AND_ASSIGN(scalar, tensor_array->GetScalar(2));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("Invalid: Cannot convert data with nulls to Tensor."),
+      exact_ext_type->MakeTensor(
+          internal::checked_pointer_cast<ExtensionScalar>(scalar)));
+
+  element_type = list(utf8());
+  ext_type = fixed_shape_tensor(utf8(), {1});
+  exact_ext_type = internal::checked_pointer_cast<FixedShapeTensorType>(ext_type);
+  scalar = std::make_shared<ListScalar>(ArrayFromJSON(element_type, R"([["a", "b"]])"));
+  auto ext_scalar = std::make_shared<ExtensionScalar>(scalar, ext_type);
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      TypeError,
+      testing::HasSubstr("Type error: Cannot convert non-fixed-width values to Tensor."),
+      exact_ext_type->MakeTensor(ext_scalar));
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/extension/tensor_internal.h b/cpp/src/arrow/extension/tensor_internal.h
new file mode 100644
index 0000000000000..069880cb17c85
--- /dev/null
+++ b/cpp/src/arrow/extension/tensor_internal.h
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/util/print.h"
+
+namespace arrow::internal {
+
+ARROW_EXPORT
+Status IsPermutationValid(const std::vector<int64_t>& permutation) {
+  const auto size = static_cast<int64_t>(permutation.size());
+  std::vector<uint8_t> dim_seen(size, 0);
+
+  for (const auto p : permutation) {
+    if (p < 0 || p >= size || dim_seen[p] != 0) {
+      return Status::Invalid(
+          "Permutation indices for ", size,
+          " dimensional tensors must be unique and within [0, ", size - 1,
+          "] range. Got: ", ::arrow::internal::PrintVector{permutation, ","});
+    }
+    dim_seen[p] = 1;
+  }
+  return Status::OK();
+}
+
+}  // namespace arrow::internal
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 1029f3a629817..5ab07f21d5b71 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -3541,7 +3541,7 @@ cdef class ExtensionArray(Array):
         return result
 
 
-class FixedShapeTensorArray(ExtensionArray):
+cdef class FixedShapeTensorArray(ExtensionArray):
     """
     Concrete class for fixed shape tensor extension arrays.
 
@@ -3582,17 +3582,48 @@ class FixedShapeTensorArray(ExtensionArray):
 
     def to_numpy_ndarray(self):
         """
-        Convert fixed shape tensor extension array to a numpy array (with dim+1).
+        Convert fixed shape tensor extension array to a multi-dimensional numpy.ndarray.
 
-        Note: ``permutation`` should be trivial (``None`` or ``[0, 1, ..., len(shape)-1]``).
+        The resulting ndarray will have (ndim + 1) dimensions.
+        The size of the first dimension will be the length of the fixed shape tensor array
+        and the rest of the dimensions will match the permuted shape of the fixed
+        shape tensor.
+
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        numpy.ndarray
+            Ndarray representing tensors in the fixed shape tensor array concatenated
+            along the first dimension.
         """
-        if self.type.permutation is None or self.type.permutation == list(range(len(self.type.shape))):
-            np_flat = np.asarray(self.storage.flatten())
-            numpy_tensor = np_flat.reshape((len(self),) + tuple(self.type.shape))
-            return numpy_tensor
-        else:
-            raise ValueError(
-                'Only non-permuted tensors can be converted to numpy tensors.')
+
+        return self.to_tensor().to_numpy()
+
+    def to_tensor(self):
+        """
+        Convert fixed shape tensor extension array to a pyarrow.Tensor.
+
+        The resulting Tensor will have (ndim + 1) dimensions.
+        The size of the first dimension will be the length of the fixed shape tensor array
+        and the rest of the dimensions will match the permuted shape of the fixed
+        shape tensor.
+
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        pyarrow.Tensor
+            Tensor representing tensors in the fixed shape tensor array concatenated
+            along the first dimension.
+        """
+
+        cdef:
+            CFixedShapeTensorArray* ext_array = <CFixedShapeTensorArray*>(self.ap)
+            CResult[shared_ptr[CTensor]] ctensor
+        with nogil:
+            ctensor = ext_array.ToTensor()
+        return pyarrow_wrap_tensor(GetResultValue(ctensor))
 
     @staticmethod
     def from_numpy_ndarray(obj):
@@ -3600,9 +3631,7 @@ class FixedShapeTensorArray(ExtensionArray):
         Convert numpy tensors (ndarrays) to a fixed shape tensor extension array.
         The first dimension of ndarray will become the length of the fixed
         shape tensor array.
-
-        Numpy array needs to be C-contiguous in memory
-        (``obj.flags["C_CONTIGUOUS"]==True``).
+        If input array data is not contiguous a copy will be made.
 
         Parameters
         ----------
@@ -3636,17 +3665,25 @@ class FixedShapeTensorArray(ExtensionArray):
           ]
         ]
         """
-        if not obj.flags["C_CONTIGUOUS"]:
-            raise ValueError('The data in the numpy array need to be in a single, '
-                             'C-style contiguous segment.')
+
+        if len(obj.shape) < 2:
+            raise ValueError(
+                "Cannot convert 1D array or scalar to fixed shape tensor array")
+        if np.prod(obj.shape) == 0:
+            raise ValueError("Expected a non-empty ndarray")
+
+        permutation = (-np.array(obj.strides)).argsort(kind='stable')
+        if permutation[0] != 0:
+            raise ValueError('First stride needs to be largest to ensure that '
+                             'individual tensor data is contiguous in memory.')
 
         arrow_type = from_numpy_dtype(obj.dtype)
-        shape = obj.shape[1:]
-        size = obj.size / obj.shape[0]
+        shape = np.take(obj.shape, permutation)
+        values = np.ravel(obj, order="K")
 
         return ExtensionArray.from_storage(
-            fixed_shape_tensor(arrow_type, shape),
-            FixedSizeListArray.from_arrays(np.ravel(obj, order='C'), size)
+            fixed_shape_tensor(arrow_type, shape[1:], permutation=permutation[1:] - 1),
+            FixedSizeListArray.from_arrays(values, shape[1:].prod())
         )
 
 
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index d92f09da779b6..6149bee97236f 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -2704,26 +2704,26 @@ cdef extern from "arrow/extension_type.h" namespace "arrow":
         shared_ptr[CArray] storage()
 
 
-cdef extern from "arrow/extension/fixed_shape_tensor.h" namespace "arrow::extension":
+cdef extern from "arrow/extension/fixed_shape_tensor.h" namespace "arrow::extension" nogil:
     cdef cppclass CFixedShapeTensorType \
             " arrow::extension::FixedShapeTensorType"(CExtensionType):
 
+        CResult[shared_ptr[CTensor]] MakeTensor(const shared_ptr[CExtensionScalar]& scalar) const
+
         @staticmethod
         CResult[shared_ptr[CDataType]] Make(const shared_ptr[CDataType]& value_type,
                                             const vector[int64_t]& shape,
                                             const vector[int64_t]& permutation,
                                             const vector[c_string]& dim_names)
 
-        CResult[shared_ptr[CDataType]] Deserialize(const shared_ptr[CDataType] storage_type,
-                                                   const c_string& serialized_data) const
-
-        c_string Serialize() const
-
         const shared_ptr[CDataType] value_type()
         const vector[int64_t] shape()
         const vector[int64_t] permutation()
         const vector[c_string] dim_names()
 
+    cdef cppclass CFixedShapeTensorArray \
+            " arrow::extension::FixedShapeTensorArray"(CExtensionArray):
+        const CResult[shared_ptr[CTensor]] ToTensor() const
 
 cdef extern from "arrow/util/compression.h" namespace "arrow" nogil:
     cdef enum CCompressionType" arrow::Compression::type":
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 2772acf81861c..80ca3ea84187e 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -1035,6 +1035,48 @@ cdef class ExtensionScalar(Scalar):
         return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
 
 
+cdef class FixedShapeTensorScalar(ExtensionScalar):
+    """
+    Concrete class for fixed shape tensor extension scalar.
+    """
+
+    def to_numpy(self):
+        """
+        Convert fixed shape tensor scalar to a numpy.ndarray.
+
+        The resulting ndarray's shape matches the permuted shape of the
+        fixed shape tensor scalar.
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        numpy.ndarray
+        """
+        return self.to_tensor().to_numpy()
+
+    def to_tensor(self):
+        """
+        Convert fixed shape tensor extension scalar to a pyarrow.Tensor, using shape
+        and strides derived from corresponding FixedShapeTensorType.
+
+        The conversion is zero-copy.
+
+        Returns
+        -------
+        pyarrow.Tensor
+            Tensor represented stored in FixedShapeTensorScalar.
+        """
+        cdef:
+            CFixedShapeTensorType* c_type = static_pointer_cast[CFixedShapeTensorType, CDataType](
+                self.wrapped.get().type).get()
+            shared_ptr[CExtensionScalar] scalar = static_pointer_cast[CExtensionScalar, CScalar](self.wrapped)
+            shared_ptr[CTensor] ctensor
+
+        with nogil:
+            ctensor = GetResultValue(c_type.MakeTensor(scalar))
+        return pyarrow_wrap_tensor(ctensor)
+
+
 cdef dict _scalar_classes = {
     _Type_BOOL: BooleanScalar,
     _Type_UINT8: UInt8Scalar,
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index d8c792ef00c6b..fe38bf651baae 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -1318,39 +1318,120 @@ def test_tensor_type():
     assert tensor_type.permutation is None
 
 
-def test_tensor_class_methods():
-    tensor_type = pa.fixed_shape_tensor(pa.float32(), [2, 3])
-    storage = pa.array([[1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6]],
-                       pa.list_(pa.float32(), 6))
+@pytest.mark.parametrize("value_type", (np.int8(), np.int64(), np.float32()))
+def test_tensor_class_methods(value_type):
+    from numpy.lib.stride_tricks import as_strided
+    arrow_type = pa.from_numpy_dtype(value_type)
+
+    tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 3])
+    storage = pa.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+                       pa.list_(arrow_type, 6))
     arr = pa.ExtensionArray.from_storage(tensor_type, storage)
     expected = np.array(
-        [[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]], dtype=np.float32)
-    result = arr.to_numpy_ndarray()
+        [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=value_type)
+    np.testing.assert_array_equal(arr.to_tensor(), expected)
+    np.testing.assert_array_equal(arr.to_numpy_ndarray(), expected)
+
+    expected = np.array([[[7, 8, 9], [10, 11, 12]]], dtype=value_type)
+    result = arr[1:].to_numpy_ndarray()
     np.testing.assert_array_equal(result, expected)
 
-    expected = np.array([[[1, 2, 3], [4, 5, 6]]], dtype=np.float32)
-    result = arr[:1].to_numpy_ndarray()
+    values = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]
+    flat_arr = np.array(values[0], dtype=value_type)
+    bw = value_type.itemsize
+    storage = pa.array(values, pa.list_(arrow_type, 12))
+
+    tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 1, 2])
+    result = pa.ExtensionArray.from_storage(tensor_type, storage)
+    expected = np.array(
+        [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]], dtype=value_type)
+    np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
+
+    result = flat_arr.reshape(1, 2, 3, 2)
+    expected = np.array(
+        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]], dtype=value_type)
     np.testing.assert_array_equal(result, expected)
 
-    arr = np.array(
-        [[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]],
-        dtype=np.float32, order="C")
+    tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 2, 1])
+    result = pa.ExtensionArray.from_storage(tensor_type, storage)
+    expected = as_strided(flat_arr, shape=(1, 2, 3, 2),
+                          strides=(bw * 12, bw * 6, bw, bw * 3))
+    np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
+
+    tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[2, 0, 1])
+    result = pa.ExtensionArray.from_storage(tensor_type, storage)
+    expected = as_strided(flat_arr, shape=(1, 3, 2, 2),
+                          strides=(bw * 12, bw, bw * 6, bw * 2))
+    np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
+
+    assert result.type.permutation == [2, 0, 1]
+    assert result.type.shape == [2, 2, 3]
+    assert result.to_tensor().shape == (1, 3, 2, 2)
+    assert result.to_tensor().strides == (12 * bw, 1 * bw, 6 * bw, 2 * bw)
+
+
+@pytest.mark.parametrize("value_type", (np.int8(), np.int64(), np.float32()))
+def test_tensor_array_from_numpy(value_type):
+    from numpy.lib.stride_tricks import as_strided
+    arrow_type = pa.from_numpy_dtype(value_type)
+
+    arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
+                   dtype=value_type, order="C")
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
     assert isinstance(tensor_array_from_numpy.type, pa.FixedShapeTensorType)
-    assert tensor_array_from_numpy.type.value_type == pa.float32()
+    assert tensor_array_from_numpy.type.value_type == arrow_type
     assert tensor_array_from_numpy.type.shape == [2, 3]
 
-    arr = np.array(
-        [[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]],
-        dtype=np.float32, order="F")
-    with pytest.raises(ValueError, match="C-style contiguous segment"):
+    arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]],
+                   dtype=value_type, order="F")
+    with pytest.raises(ValueError, match="First stride needs to be largest"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    tensor_type = pa.fixed_shape_tensor(pa.int8(), [2, 2, 3], permutation=[0, 2, 1])
-    storage = pa.array([[1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]], pa.list_(pa.int8(), 12))
-    arr = pa.ExtensionArray.from_storage(tensor_type, storage)
-    with pytest.raises(ValueError, match="non-permuted tensors"):
-        arr.to_numpy_ndarray()
+    flat_arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=value_type)
+    bw = value_type.itemsize
+
+    arr = flat_arr.reshape(1, 3, 4)
+    tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
+    assert tensor_array_from_numpy.type.shape == [3, 4]
+    assert tensor_array_from_numpy.type.permutation == [0, 1]
+    assert tensor_array_from_numpy.to_tensor() == pa.Tensor.from_numpy(arr)
+
+    arr = as_strided(flat_arr, shape=(1, 2, 3, 2),
+                     strides=(bw * 12, bw * 6, bw, bw * 3))
+    tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
+    assert tensor_array_from_numpy.type.shape == [2, 2, 3]
+    assert tensor_array_from_numpy.type.permutation == [0, 2, 1]
+    assert tensor_array_from_numpy.to_tensor() == pa.Tensor.from_numpy(arr)
+
+    arr = flat_arr.reshape(1, 2, 3, 2)
+    result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
+    expected = np.array(
+        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]], dtype=value_type)
+    np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
+
+    arr = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], dtype=value_type)
+    expected = arr[1:]
+    result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)[1:].to_numpy_ndarray()
+    np.testing.assert_array_equal(result, expected)
+
+    arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=value_type)
+    with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
+        pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
+
+    arr = np.array(1, dtype=value_type)
+    with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
+        pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
+
+    arr = np.array([], dtype=value_type)
+
+    with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
+        pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((0)))
+
+    with pytest.raises(ValueError, match="Expected a non-empty ndarray"):
+        pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((0, 3, 2)))
+
+    with pytest.raises(ValueError, match="Expected a non-empty ndarray"):
+        pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((3, 0, 2)))
 
 
 @pytest.mark.parametrize("tensor_type", (
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index ce3736b5af847..b9ba157a327a5 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -1658,20 +1658,6 @@ cdef class FixedShapeTensorType(BaseExtensionType):
         else:
             return None
 
-    def __arrow_ext_serialize__(self):
-        """
-        Serialized representation of metadata to reconstruct the type object.
-        """
-        return self.tensor_ext_type.Serialize()
-
-    @classmethod
-    def __arrow_ext_deserialize__(self, storage_type, serialized):
-        """
-        Return an FixedShapeTensor type instance from the storage type and serialized
-        metadata.
-        """
-        return self.tensor_ext_type.Deserialize(storage_type, serialized)
-
     def __arrow_ext_class__(self):
         return FixedShapeTensorArray
 
@@ -1679,6 +1665,9 @@ cdef class FixedShapeTensorType(BaseExtensionType):
         return fixed_shape_tensor, (self.value_type, self.shape,
                                     self.dim_names, self.permutation)
 
+    def __arrow_ext_scalar_class__(self):
+        return FixedShapeTensorScalar
+
 
 _py_extension_type_auto_load = False
 
@@ -4976,8 +4965,9 @@ def fixed_shape_tensor(DataType value_type, shape, dim_names=None, permutation=N
 
     cdef FixedShapeTensorType out = FixedShapeTensorType.__new__(FixedShapeTensorType)
 
-    c_tensor_ext_type = GetResultValue(CFixedShapeTensorType.Make(
-        value_type.sp_type, c_shape, c_permutation, c_dim_names))
+    with nogil:
+        c_tensor_ext_type = GetResultValue(CFixedShapeTensorType.Make(
+            value_type.sp_type, c_shape, c_permutation, c_dim_names))
 
     out.init(c_tensor_ext_type)
 

From 42e35f101e87e689dcc48981abf81bc32c41d162 Mon Sep 17 00:00:00 2001
From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com>
Date: Thu, 8 Feb 2024 09:44:19 -0500
Subject: [PATCH 339/570] GH-39812: [Python] Add bindings for ListView and
 LargeListView (#39813)

### Rationale for this change

Add bindings to the ListView and LargeListView array formats.

### What changes are included in this PR?

* Add initial implementation for ListView and LargeListView
* Add basic unit tests

### Are these changes tested?

* Basic unit tests only (follow up PRs will be needed to implement full functionality)

### Are there any user-facing changes?

Yes, documentation is updated in this PR to include the new PyArrow objects.
* Closes: #39812

Lead-authored-by: Dane Pitkin <dane@voltrondata.com>
Co-authored-by: Dane Pitkin <48041712+danepitkin@users.noreply.github.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 docs/source/python/api/arrays.rst    |   4 +
 docs/source/python/api/datatypes.rst |   4 +
 python/pyarrow/__init__.py           |  14 +-
 python/pyarrow/array.pxi             | 574 +++++++++++++++++++++++++++
 python/pyarrow/includes/libarrow.pxd |  90 +++++
 python/pyarrow/lib.pxd               |  18 +
 python/pyarrow/lib.pyx               |   2 +
 python/pyarrow/public-api.pxi        |   4 +
 python/pyarrow/scalar.pxi            |  10 +
 python/pyarrow/tests/test_array.py   |  71 ++++
 python/pyarrow/tests/test_misc.py    |   4 +
 python/pyarrow/tests/test_scalars.py |   8 +-
 python/pyarrow/tests/test_types.py   |  49 +++
 python/pyarrow/types.pxi             | 171 ++++++++
 python/pyarrow/types.py              |  10 +
 15 files changed, 1027 insertions(+), 6 deletions(-)

diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst
index b858862dcff01..e6f6c3dbbd3d1 100644
--- a/docs/source/python/api/arrays.rst
+++ b/docs/source/python/api/arrays.rst
@@ -77,6 +77,8 @@ may expose data type-specific methods or properties.
    ListArray
    FixedSizeListArray
    LargeListArray
+   ListViewArray
+   LargeListViewArray
    MapArray
    RunEndEncodedArray
    StructArray
@@ -135,6 +137,8 @@ classes may expose data type-specific methods or properties.
    RunEndEncodedScalar
    ListScalar
    LargeListScalar
+   ListViewScalar
+   LargeListViewScalar
    MapScalar
    StructScalar
    UnionScalar
diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst
index 642c243b21af0..62bf4b7723558 100644
--- a/docs/source/python/api/datatypes.rst
+++ b/docs/source/python/api/datatypes.rst
@@ -60,6 +60,8 @@ These should be used to create Arrow data types and schemas.
    decimal128
    list_
    large_list
+   list_view
+   large_list_view
    map_
    struct
    dictionary
@@ -149,6 +151,8 @@ represents a given data type (such as ``int32``) or general category
    is_list
    is_large_list
    is_fixed_size_list
+   is_list_view
+   is_large_list_view
    is_struct
    is_union
    is_nested
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 4dbd1258d3cea..2ee97ddb662e5 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -166,7 +166,8 @@ def print_entry(label, value):
                          binary, string, utf8, binary_view, string_view,
                          large_binary, large_string, large_utf8,
                          decimal128, decimal256,
-                         list_, large_list, map_, struct,
+                         list_, large_list, list_view, large_list_view,
+                         map_, struct,
                          union, sparse_union, dense_union,
                          dictionary,
                          run_end_encoded,
@@ -174,8 +175,9 @@ def print_entry(label, value):
                          field,
                          type_for_alias,
                          DataType, DictionaryType, StructType,
-                         ListType, LargeListType, MapType, FixedSizeListType,
-                         UnionType, SparseUnionType, DenseUnionType,
+                         ListType, LargeListType, FixedSizeListType,
+                         ListViewType, LargeListViewType,
+                         MapType, UnionType, SparseUnionType, DenseUnionType,
                          TimestampType, Time32Type, Time64Type, DurationType,
                          FixedSizeBinaryType, Decimal128Type, Decimal256Type,
                          BaseExtensionType, ExtensionType,
@@ -201,8 +203,9 @@ def print_entry(label, value):
                          Int32Array, UInt32Array,
                          Int64Array, UInt64Array,
                          HalfFloatArray, FloatArray, DoubleArray,
-                         ListArray, LargeListArray, MapArray,
-                         FixedSizeListArray, UnionArray,
+                         ListArray, LargeListArray, FixedSizeListArray,
+                         ListViewArray, LargeListViewArray,
+                         MapArray, UnionArray,
                          BinaryArray, StringArray,
                          LargeBinaryArray, LargeStringArray,
                          BinaryViewArray, StringViewArray,
@@ -220,6 +223,7 @@ def print_entry(label, value):
                          HalfFloatScalar, FloatScalar, DoubleScalar,
                          Decimal128Scalar, Decimal256Scalar,
                          ListScalar, LargeListScalar, FixedSizeListScalar,
+                         ListViewScalar, LargeListViewScalar,
                          Date32Scalar, Date64Scalar,
                          Time32Scalar, Time64Scalar,
                          TimestampScalar, DurationScalar,
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 5ab07f21d5b71..ad01d45571ba1 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2460,6 +2460,578 @@ cdef class LargeListArray(BaseListArray):
         return pyarrow_wrap_array((<CLargeListArray*> self.ap).offsets())
 
 
+cdef class ListViewArray(Array):
+    """
+    Concrete class for Arrow arrays of a list view data type.
+    """
+
+    @staticmethod
+    def from_arrays(offsets, sizes, values, DataType type=None, MemoryPool pool=None, mask=None):
+        """
+        Construct ListViewArray from arrays of int32 offsets, sizes, and values.
+
+        Parameters
+        ----------
+        offsets : Array (int32 type)
+        sizes : Array (int32 type)
+        values : Array (any type)
+        type : DataType, optional
+            If not specified, a default ListType with the values' type is
+            used.
+        pool : MemoryPool, optional
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        list_view_array : ListViewArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 1, 2])
+        >>> sizes = pa.array([2, 2, 2])
+        >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # use a null mask to represent null values
+        >>> mask = pa.array([False, True, False])
+        >>> pa.ListViewArray.from_arrays(offsets, sizes, values, mask=mask)
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # null values can be defined in either offsets or sizes arrays
+        >>> # WARNING: this will result in a copy of the offsets or sizes arrays
+        >>> offsets = pa.array([0, None, 2])
+        >>> pa.ListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        """
+        cdef:
+            Array _offsets, _sizes, _values
+            shared_ptr[CArray] out
+            shared_ptr[CBuffer] c_mask
+            CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
+
+        _offsets = asarray(offsets, type='int32')
+        _sizes = asarray(sizes, type='int32')
+        _values = asarray(values)
+
+        c_mask = c_mask_inverted_from_obj(mask, pool)
+
+        if type is not None:
+            with nogil:
+                out = GetResultValue(
+                    CListViewArray.FromArraysAndType(
+                        type.sp_type, _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool, c_mask))
+        else:
+            with nogil:
+                out = GetResultValue(
+                    CListViewArray.FromArrays(
+                        _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool, c_mask))
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+    @property
+    def values(self):
+        """
+        Return the underlying array of values which backs the ListViewArray
+        ignoring the array's offset and sizes.
+
+        The values array may be out of order and/or contain additional values
+        that are not found in the logical representation of the array. The only
+        guarantee is that each non-null value in the ListView Array is contiguous.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        values taking into consideration the array's order and offset.
+
+        Returns
+        -------
+        values : Array
+
+        Examples
+        --------
+        The values include null elements from sub-lists:
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [],
+          [
+            2,
+            null,
+            3,
+            4
+          ]
+        ]
+        >>> array.values
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          2,
+          null,
+          3,
+          4
+        ]
+        """
+        cdef CListViewArray* arr = <CListViewArray*> self.ap
+        return pyarrow_wrap_array(arr.values())
+
+    @property
+    def offsets(self):
+        """
+        Return the list offsets as an int32 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `ListViewArray.from_arrays` and get back the same
+        list array if the original one has nulls.
+
+        Returns
+        -------
+        offsets : Int32Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.offsets
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          0,
+          0,
+          1
+        ]
+        """
+        return pyarrow_wrap_array((<CListViewArray*> self.ap).offsets())
+
+    @property
+    def sizes(self):
+        """
+        Return the list sizes as an int32 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `ListViewArray.from_arrays` and get back the same
+        list array if the original one has nulls.
+
+        Returns
+        -------
+        sizes : Int32Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.sizes
+        <pyarrow.lib.Int32Array object at ...>
+        [
+          2,
+          0,
+          4
+        ]
+        """
+        return pyarrow_wrap_array((<CListViewArray*> self.ap).sizes())
+
+    def flatten(self, memory_pool=None):
+        """
+        Unnest this ListViewArray by one level.
+
+        The returned Array is logically a concatenation of all the sub-lists
+        in this Array.
+
+        Note that this method is different from ``self.values`` in that
+        it takes care of the slicing offset as well as null elements backed
+        by non-empty sub-lists.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, optional
+
+        Returns
+        -------
+        result : Array
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, 3, 4]
+        >>> offsets = [2, 1, 0]
+        >>> sizes = [2, 2, 2]
+        >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.ListViewArray object at ...>
+        [
+          [
+            3,
+            4
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            1,
+            2
+          ]
+        ]
+        >>> array.flatten()
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          3,
+          4,
+          2,
+          3,
+          1,
+          2
+        ]
+        """
+        cdef CMemoryPool* cpool = maybe_unbox_memory_pool(memory_pool)
+        with nogil:
+            out = GetResultValue((<CListViewArray*> self.ap).Flatten(cpool))
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+
+cdef class LargeListViewArray(Array):
+    """
+    Concrete class for Arrow arrays of a large list view data type.
+
+    Identical to ListViewArray, but with 64-bit offsets.
+    """
+    @staticmethod
+    def from_arrays(offsets, sizes, values, DataType type=None, MemoryPool pool=None, mask=None):
+        """
+        Construct LargeListViewArray from arrays of int64 offsets and values.
+
+        Parameters
+        ----------
+        offsets : Array (int64 type)
+        sizes : Array (int64 type)
+        values : Array (any type)
+        type : DataType, optional
+            If not specified, a default ListType with the values' type is
+            used.
+        pool : MemoryPool, optional
+        mask : Array (boolean type), optional
+            Indicate which values are null (True) or not null (False).
+
+        Returns
+        -------
+        list_view_array : LargeListViewArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 1, 2])
+        >>> sizes = pa.array([2, 2, 2])
+        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # use a null mask to represent null values
+        >>> mask = pa.array([False, True, False])
+        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values, mask=mask)
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        >>> # null values can be defined in either offsets or sizes arrays
+        >>> # WARNING: this will result in a copy of the offsets or sizes arrays
+        >>> offsets = pa.array([0, None, 2])
+        >>> pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          null,
+          [
+            3,
+            4
+          ]
+        ]
+        """
+        cdef:
+            Array _offsets, _sizes, _values
+            shared_ptr[CArray] out
+            shared_ptr[CBuffer] c_mask
+            CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
+
+        _offsets = asarray(offsets, type='int64')
+        _sizes = asarray(sizes, type='int64')
+        _values = asarray(values)
+
+        c_mask = c_mask_inverted_from_obj(mask, pool)
+
+        if type is not None:
+            with nogil:
+                out = GetResultValue(
+                    CLargeListViewArray.FromArraysAndType(
+                        type.sp_type, _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool, c_mask))
+        else:
+            with nogil:
+                out = GetResultValue(
+                    CLargeListViewArray.FromArrays(
+                        _offsets.ap[0], _sizes.ap[0], _values.ap[0], cpool, c_mask))
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+    @property
+    def values(self):
+        """
+        Return the underlying array of values which backs the LargeListArray
+        ignoring the array's offset.
+
+        The values array may be out of order and/or contain additional values
+        that are not found in the logical representation of the array. The only
+        guarantee is that each non-null value in the ListView Array is contiguous.
+
+        Compare with :meth:`flatten`, which returns only the non-null
+        values taking into consideration the array's order and offset.
+
+        Returns
+        -------
+        values : Array
+
+        See Also
+        --------
+        LargeListArray.flatten : ...
+
+        Examples
+        --------
+
+        The values include null elements from sub-lists:
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            1,
+            2
+          ],
+          [],
+          [
+            2,
+            null,
+            3,
+            4
+          ]
+        ]
+        >>> array.values
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          1,
+          2,
+          null,
+          3,
+          4
+        ]
+        """
+        cdef CLargeListViewArray* arr = <CLargeListViewArray*> self.ap
+        return pyarrow_wrap_array(arr.values())
+
+    @property
+    def offsets(self):
+        """
+        Return the list view offsets as an int64 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `LargeListViewArray.from_arrays` and get back the
+        same list array if the original one has nulls.
+
+        Returns
+        -------
+        offsets : Int64Array
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.offsets
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          0,
+          0,
+          1
+        ]
+        """
+        return pyarrow_wrap_array((<CLargeListViewArray*> self.ap).offsets())
+
+    @property
+    def sizes(self):
+        """
+        Return the list view sizes as an int64 array.
+
+        The returned array will not have a validity bitmap, so you cannot
+        expect to pass it to `LargeListViewArray.from_arrays` and get back the
+        same list array if the original one has nulls.
+
+        Returns
+        -------
+        sizes : Int64Array
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, None, 3, 4]
+        >>> offsets = [0, 0, 1]
+        >>> sizes = [2, 0, 4]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array.sizes
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          2,
+          0,
+          4
+        ]
+        """
+        return pyarrow_wrap_array((<CLargeListViewArray*> self.ap).sizes())
+
+    def flatten(self, memory_pool=None):
+        """
+        Unnest this LargeListViewArray by one level.
+
+        The returned Array is logically a concatenation of all the sub-lists
+        in this Array.
+
+        Note that this method is different from ``self.values`` in that
+        it takes care of the slicing offset as well as null elements backed
+        by non-empty sub-lists.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, optional
+
+        Returns
+        -------
+        result : Array
+
+        Examples
+        --------
+
+        >>> import pyarrow as pa
+        >>> values = [1, 2, 3, 4]
+        >>> offsets = [2, 1, 0]
+        >>> sizes = [2, 2, 2]
+        >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values)
+        >>> array
+        <pyarrow.lib.LargeListViewArray object at ...>
+        [
+          [
+            3,
+            4
+          ],
+          [
+            2,
+            3
+          ],
+          [
+            1,
+            2
+          ]
+        ]
+        >>> array.flatten()
+        <pyarrow.lib.Int64Array object at ...>
+        [
+          3,
+          4,
+          2,
+          3,
+          1,
+          2
+        ]
+        """
+        cdef CMemoryPool* cpool = maybe_unbox_memory_pool(memory_pool)
+        with nogil:
+            out = GetResultValue((<CLargeListViewArray*> self.ap).Flatten(cpool))
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+
 cdef class MapArray(ListArray):
     """
     Concrete class for Arrow arrays of a map data type.
@@ -3710,6 +4282,8 @@ cdef dict _array_classes = {
     _Type_DOUBLE: DoubleArray,
     _Type_LIST: ListArray,
     _Type_LARGE_LIST: LargeListArray,
+    _Type_LIST_VIEW: ListViewArray,
+    _Type_LARGE_LIST_VIEW: LargeListViewArray,
     _Type_MAP: MapArray,
     _Type_FIXED_SIZE_LIST: FixedSizeListArray,
     _Type_SPARSE_UNION: UnionArray,
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 6149bee97236f..8056d99354965 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -132,6 +132,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         _Type_LIST" arrow::Type::LIST"
         _Type_LARGE_LIST" arrow::Type::LARGE_LIST"
         _Type_FIXED_SIZE_LIST" arrow::Type::FIXED_SIZE_LIST"
+        _Type_LIST_VIEW" arrow::Type::LIST_VIEW"
+        _Type_LARGE_LIST_VIEW" arrow::Type::LARGE_LIST_VIEW"
         _Type_STRUCT" arrow::Type::STRUCT"
         _Type_SPARSE_UNION" arrow::Type::SPARSE_UNION"
         _Type_DENSE_UNION" arrow::Type::DENSE_UNION"
@@ -366,6 +368,18 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CDataType] value_type()
         shared_ptr[CField] value_field()
 
+    cdef cppclass CListViewType" arrow::ListViewType"(CDataType):
+        CListViewType(const shared_ptr[CDataType]& value_type)
+        CListViewType(const shared_ptr[CField]& field)
+        shared_ptr[CDataType] value_type()
+        shared_ptr[CField] value_field()
+
+    cdef cppclass CLargeListViewType" arrow::LargeListViewType"(CDataType):
+        CLargeListViewType(const shared_ptr[CDataType]& value_type)
+        CLargeListViewType(const shared_ptr[CField]& field)
+        shared_ptr[CDataType] value_type()
+        shared_ptr[CField] value_field()
+
     cdef cppclass CMapType" arrow::MapType"(CDataType):
         CMapType(const shared_ptr[CField]& key_field,
                  const shared_ptr[CField]& item_field, c_bool keys_sorted)
@@ -485,6 +499,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CDataType] run_end_type,
         shared_ptr[CDataType] value_type)
 
+    cdef shared_ptr[CDataType] CMakeListViewType" arrow::list_view"(
+        shared_ptr[CField] value_type)
+
+    cdef shared_ptr[CDataType] CMakeLargeListViewType" arrow::large_list_view"(
+        shared_ptr[CField] value_type)
+
     cdef cppclass CSchema" arrow::Schema":
         CSchema(const vector[shared_ptr[CField]]& fields)
         CSchema(const vector[shared_ptr[CField]]& fields,
@@ -690,6 +710,70 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CArray] values()
         shared_ptr[CDataType] value_type()
 
+    cdef cppclass CListViewArray" arrow::ListViewArray"(CArray):
+        @staticmethod
+        CResult[shared_ptr[CArray]] FromArrays(
+            const CArray& offsets,
+            const CArray& sizes,
+            const CArray& values,
+            CMemoryPool* pool,
+            shared_ptr[CBuffer] null_bitmap,
+        )
+
+        @staticmethod
+        CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
+            shared_ptr[CDataType],
+            const CArray& offsets,
+            const CArray& sizes,
+            const CArray& values,
+            CMemoryPool* pool,
+            shared_ptr[CBuffer] null_bitmap,
+        )
+
+        CResult[shared_ptr[CArray]] Flatten(
+            CMemoryPool* pool
+        )
+
+        const int32_t* raw_value_offsets()
+        const int32_t* raw_value_sizes()
+        int32_t value_offset(int i)
+        int32_t value_length(int i)
+        shared_ptr[CArray] values()
+        shared_ptr[CArray] offsets()
+        shared_ptr[CArray] sizes()
+        shared_ptr[CDataType] value_type()
+
+    cdef cppclass CLargeListViewArray" arrow::LargeListViewArray"(CArray):
+        @staticmethod
+        CResult[shared_ptr[CArray]] FromArrays(
+            const CArray& offsets,
+            const CArray& sizes,
+            const CArray& values,
+            CMemoryPool* pool,
+            shared_ptr[CBuffer] null_bitmap,
+        )
+
+        @staticmethod
+        CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"(
+            shared_ptr[CDataType],
+            const CArray& offsets,
+            const CArray& sizes,
+            const CArray& values,
+            CMemoryPool* pool,
+            shared_ptr[CBuffer] null_bitmap,
+        )
+
+        CResult[shared_ptr[CArray]] Flatten(
+            CMemoryPool* pool
+        )
+
+        int64_t value_offset(int i)
+        int64_t value_length(int i)
+        shared_ptr[CArray] values()
+        shared_ptr[CArray] offsets()
+        shared_ptr[CArray] sizes()
+        shared_ptr[CDataType] value_type()
+
     cdef cppclass CMapArray" arrow::MapArray"(CArray):
         @staticmethod
         CResult[shared_ptr[CArray]] FromArrays(
@@ -1150,6 +1234,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef cppclass CListScalar" arrow::ListScalar"(CBaseListScalar):
         pass
 
+    cdef cppclass CListViewScalar" arrow::ListViewScalar"(CBaseListScalar):
+        pass
+
+    cdef cppclass CLargeListViewScalar" arrow::LargeListViewScalar"(CBaseListScalar):
+        pass
+
     cdef cppclass CMapScalar" arrow::MapScalar"(CListScalar):
         pass
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index c1104864066e9..48350212c2076 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -120,6 +120,16 @@ cdef class LargeListType(DataType):
         const CLargeListType* list_type
 
 
+cdef class ListViewType(DataType):
+    cdef:
+        const CListViewType* list_view_type
+
+
+cdef class LargeListViewType(DataType):
+    cdef:
+        const CLargeListViewType* list_view_type
+
+
 cdef class MapType(DataType):
     cdef:
         const CMapType* map_type
@@ -425,6 +435,14 @@ cdef class LargeListArray(BaseListArray):
     pass
 
 
+cdef class ListViewArray(Array):
+    pass
+
+
+cdef class LargeListViewArray(Array):
+    pass
+
+
 cdef class MapArray(ListArray):
     pass
 
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index b0368b67f790e..3245e50f0fe69 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -110,6 +110,8 @@ Type_BINARY_VIEW = _Type_BINARY_VIEW
 Type_STRING_VIEW = _Type_STRING_VIEW
 Type_LIST = _Type_LIST
 Type_LARGE_LIST = _Type_LARGE_LIST
+Type_LIST_VIEW = _Type_LIST_VIEW
+Type_LARGE_LIST_VIEW = _Type_LARGE_LIST_VIEW
 Type_MAP = _Type_MAP
 Type_FIXED_SIZE_LIST = _Type_FIXED_SIZE_LIST
 Type_STRUCT = _Type_STRUCT
diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi
index 72e16f2cec387..966273b4bea84 100644
--- a/python/pyarrow/public-api.pxi
+++ b/python/pyarrow/public-api.pxi
@@ -87,6 +87,10 @@ cdef api object pyarrow_wrap_data_type(
         out = ListType.__new__(ListType)
     elif type.get().id() == _Type_LARGE_LIST:
         out = LargeListType.__new__(LargeListType)
+    elif type.get().id() == _Type_LIST_VIEW:
+        out = ListViewType.__new__(ListViewType)
+    elif type.get().id() == _Type_LARGE_LIST_VIEW:
+        out = LargeListViewType.__new__(LargeListViewType)
     elif type.get().id() == _Type_MAP:
         out = MapType.__new__(MapType)
     elif type.get().id() == _Type_FIXED_SIZE_LIST:
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 80ca3ea84187e..41bfde39adb6f 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -720,6 +720,14 @@ cdef class LargeListScalar(ListScalar):
     pass
 
 
+cdef class ListViewScalar(ListScalar):
+    pass
+
+
+cdef class LargeListViewScalar(ListScalar):
+    pass
+
+
 cdef class StructScalar(Scalar, collections.abc.Mapping):
     """
     Concrete class for struct scalars.
@@ -1108,6 +1116,8 @@ cdef dict _scalar_classes = {
     _Type_LIST: ListScalar,
     _Type_LARGE_LIST: LargeListScalar,
     _Type_FIXED_SIZE_LIST: FixedSizeListScalar,
+    _Type_LIST_VIEW: ListViewScalar,
+    _Type_LARGE_LIST_VIEW: LargeListViewScalar,
     _Type_STRUCT: StructScalar,
     _Type_MAP: MapScalar,
     _Type_DICTIONARY: DictionaryScalar,
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index f851d4e0b6c29..bd9ae214b041e 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -3573,3 +3573,74 @@ def test_run_end_encoded_from_buffers():
     with pytest.raises(ValueError):
         pa.RunEndEncodedArray.from_buffers(ree_type, length, buffers,
                                            1, offset, children)
+
+
+@pytest.mark.parametrize(('list_array_type'),
+                         [pa.ListViewArray, pa.LargeListViewArray])
+def test_list_view_from_arrays(list_array_type):
+    # test in order offsets, similar to ListArray representation
+    values = [1, 2, 3, 4, 5, 6, None, 7]
+    offsets = [0, 2, 4, 6]
+    sizes = [2, 2, 2, 2]
+    array = list_array_type.from_arrays(offsets, sizes, values)
+
+    assert array.to_pylist() == [[1, 2], [3, 4], [5, 6], [None, 7]]
+    assert array.values.to_pylist() == values
+    assert array.offsets.to_pylist() == offsets
+    assert array.sizes.to_pylist() == sizes
+
+    # test out of order offsets with overlapping values
+    values = [1, 2, 3, 4]
+    offsets = [2, 1, 0]
+    sizes = [2, 2, 2]
+    array = list_array_type.from_arrays(offsets, sizes, values)
+
+    assert array.to_pylist() == [[3, 4], [2, 3], [1, 2]]
+    assert array.values.to_pylist() == values
+    assert array.offsets.to_pylist() == offsets
+    assert array.sizes.to_pylist() == sizes
+
+    # test null offsets and empty list values
+    values = []
+    offsets = [0, None]
+    sizes = [0, 0]
+    array = list_array_type.from_arrays(offsets, sizes, values)
+
+    assert array.to_pylist() == [[], None]
+    assert array.values.to_pylist() == values
+    assert array.offsets.to_pylist() == [0, 0]
+    assert array.sizes.to_pylist() == sizes
+
+    # test null sizes and empty list values
+    values = []
+    offsets = [0, 0]
+    sizes = [None, 0]
+    array = list_array_type.from_arrays(offsets, sizes, values)
+
+    assert array.to_pylist() == [None, []]
+    assert array.values.to_pylist() == values
+    assert array.offsets.to_pylist() == offsets
+    assert array.sizes.to_pylist() == [0, 0]
+
+    # test null bitmask
+    values = [1, 2]
+    offsets = [0, 0, 1]
+    sizes = [1, 0, 1]
+    mask = pa.array([False, True, False])
+    array = list_array_type.from_arrays(offsets, sizes, values, mask=mask)
+
+    assert array.to_pylist() == [[1], None, [2]]
+    assert array.values.to_pylist() == values
+    assert array.offsets.to_pylist() == offsets
+    assert array.sizes.to_pylist() == sizes
+
+
+@pytest.mark.parametrize(('list_array_type'),
+                         [pa.ListViewArray, pa.LargeListViewArray])
+def test_list_view_flatten(list_array_type):
+    values = [1, 2, 3, 4]
+    offsets = [3, 2, 1, 0]
+    sizes = [1, 1, 1, 1]
+    array = list_array_type.from_arrays(offsets, sizes, values)
+
+    assert array.flatten().to_pylist() == [4, 3, 2, 1]
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index 8cec8783280dd..39dac4eb81dfb 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -154,6 +154,8 @@ def test_set_timezone_db_path_non_windows():
     pa.ListType,
     pa.LargeListType,
     pa.FixedSizeListType,
+    pa.ListViewType,
+    pa.LargeListViewType,
     pa.UnionType,
     pa.SparseUnionType,
     pa.DenseUnionType,
@@ -227,6 +229,8 @@ def test_set_timezone_db_path_non_windows():
     pa.StringViewScalar,
     pa.ListScalar,
     pa.LargeListScalar,
+    pa.ListViewScalar,
+    pa.LargeListViewScalar,
     pa.MapScalar,
     pa.FixedSizeListScalar,
     pa.UnionScalar,
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index eed5f045be945..074fb757e265a 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -57,6 +57,9 @@
     ([1, 2, 3], None, pa.ListScalar),
     ([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar),
     ([1, 2, 3, 4, 5], pa.list_(pa.int8(), 5), pa.FixedSizeListScalar),
+    # TODO GH-39855
+    # ([1, 2, 3], pa.list_view(pa.int8()), pa.ListViewScalar),
+    # ([1, 2, 3, 4], pa.large_list_view(pa.int8()), pa.LargeListViewScalar),
     (datetime.date.today(), None, pa.Date32Scalar),
     (datetime.date.today(), pa.date64(), pa.Date64Scalar),
     (datetime.datetime.now(), None, pa.TimestampScalar),
@@ -537,7 +540,10 @@ def test_fixed_size_binary():
 
 @pytest.mark.parametrize(('ty', 'klass'), [
     (pa.list_(pa.string()), pa.ListScalar),
-    (pa.large_list(pa.string()), pa.LargeListScalar)
+    (pa.large_list(pa.string()), pa.LargeListScalar),
+    # TODO GH-39855
+    # (pa.list_view(pa.string()), pa.ListViewScalar),
+    # (pa.large_list_view(pa.string()), pa.LargeListViewScalar)
 ])
 def test_list(ty, klass):
     v = ['foo', None]
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index a5ab3128dc874..0add5786088d3 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -66,6 +66,8 @@ def get_many_types():
         pa.list_(pa.int32()),
         pa.list_(pa.int32(), 2),
         pa.large_list(pa.uint16()),
+        pa.list_view(pa.int32()),
+        pa.large_list_view(pa.uint16()),
         pa.map_(pa.string(), pa.int32()),
         pa.map_(pa.field('key', pa.int32(), nullable=False),
                 pa.field('value', pa.int32())),
@@ -169,6 +171,18 @@ def test_is_list():
     assert not types.is_list(pa.int32())
 
 
+def test_is_list_view():
+    a = pa.list_view(pa.int32())
+    b = pa.large_list_view(pa.int32())
+
+    assert types.is_list_view(a)
+    assert not types.is_large_list_view(a)
+    assert not types.is_list(a)
+    assert types.is_large_list_view(b)
+    assert not types.is_list_view(b)
+    assert not types.is_large_list(b)
+
+
 def test_is_map():
     m = pa.map_(pa.utf8(), pa.int32())
 
@@ -573,6 +587,41 @@ def test_large_list_type():
         pa.large_list(None)
 
 
+def test_list_view_type():
+    ty = pa.list_view(pa.int64())
+    assert isinstance(ty, pa.ListViewType)
+    assert ty.value_type == pa.int64()
+    assert ty.value_field == pa.field("item", pa.int64(), nullable=True)
+
+    # nullability matters in comparison
+    ty_non_nullable = pa.list_view(pa.field("item", pa.int64(), nullable=False))
+    assert ty != ty_non_nullable
+
+    # field names don't matter by default
+    ty_named = pa.list_view(pa.field("element", pa.int64()))
+    assert ty == ty_named
+    assert not ty.equals(ty_named, check_metadata=True)
+
+    # metadata doesn't matter by default
+    ty_metadata = pa.list_view(
+        pa.field("item", pa.int64(), metadata={"hello": "world"}))
+    assert ty == ty_metadata
+    assert not ty.equals(ty_metadata, check_metadata=True)
+
+    with pytest.raises(TypeError):
+        pa.list_view(None)
+
+
+def test_large_list_view_type():
+    ty = pa.large_list_view(pa.utf8())
+    assert isinstance(ty, pa.LargeListViewType)
+    assert ty.value_type == pa.utf8()
+    assert ty.value_field == pa.field("item", pa.utf8(), nullable=True)
+
+    with pytest.raises(TypeError):
+        pa.large_list_view(None)
+
+
 def test_map_type():
     ty = pa.map_(pa.utf8(), pa.int32())
     assert isinstance(ty, pa.MapType)
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index b9ba157a327a5..50b10c5512dc1 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -557,6 +557,101 @@ cdef class LargeListType(DataType):
         return pyarrow_wrap_data_type(self.list_type.value_type())
 
 
+cdef class ListViewType(DataType):
+    """
+    Concrete class for list view data types.
+
+    Examples
+    --------
+    Create an instance of ListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_view(pa.string())
+    ListViewType(list_view<item: string>)
+    """
+
+    cdef void init(self, const shared_ptr[CDataType]& type) except *:
+        DataType.init(self, type)
+        self.list_view_type = <const CListViewType*> type.get()
+
+    def __reduce__(self):
+        return list_view, (self.value_field,)
+
+    @property
+    def value_field(self):
+        """
+        The field for list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_view(pa.string()).value_field
+        pyarrow.Field<item: string>
+        """
+        return pyarrow_wrap_field(self.list_view_type.value_field())
+
+    @property
+    def value_type(self):
+        """
+        The data type of list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.list_view(pa.string()).value_type
+        DataType(string)
+        """
+        return pyarrow_wrap_data_type(self.list_view_type.value_type())
+
+
+cdef class LargeListViewType(DataType):
+    """
+    Concrete class for large list view data types
+    (like ListViewType, but with 64-bit offsets).
+
+    Examples
+    --------
+    Create an instance of LargeListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.large_list_view(pa.string())
+    LargeListViewType(large_list_view<item: string>)
+    """
+
+    cdef void init(self, const shared_ptr[CDataType]& type) except *:
+        DataType.init(self, type)
+        self.list_view_type = <const CLargeListViewType*> type.get()
+
+    def __reduce__(self):
+        return large_list_view, (self.value_field,)
+
+    @property
+    def value_field(self):
+        """
+        The field for large list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.large_list_view(pa.string()).value_field
+        pyarrow.Field<item: string>
+        """
+        return pyarrow_wrap_field(self.list_view_type.value_field())
+
+    @property
+    def value_type(self):
+        """
+        The data type of large list view values.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.large_list_view(pa.string()).value_type
+        DataType(string)
+        """
+        return pyarrow_wrap_data_type(self.list_view_type.value_type())
+
+
 cdef class MapType(DataType):
     """
     Concrete class for map data types.
@@ -4517,6 +4612,82 @@ cpdef LargeListType large_list(value_type):
     return out
 
 
+cpdef ListViewType list_view(value_type):
+    """
+    Create ListViewType instance from child data type or field.
+
+    This data type may not be supported by all Arrow implementations
+    because it is an alternative to the ListType.
+
+    Parameters
+    ----------
+    value_type : DataType or Field
+
+    Returns
+    -------
+    list_view_type : DataType
+
+    Examples
+    --------
+    Create an instance of ListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.list_view(pa.string())
+    ListViewType(list_view<item: string>)
+    """
+    cdef:
+        Field _field
+        shared_ptr[CDataType] list_view_type
+
+    if isinstance(value_type, DataType):
+        _field = field('item', value_type)
+    elif isinstance(value_type, Field):
+        _field = value_type
+    else:
+        raise TypeError('ListView requires DataType or Field')
+
+    list_view_type = CMakeListViewType(_field.sp_field)
+    return pyarrow_wrap_data_type(list_view_type)
+
+
+cpdef LargeListViewType large_list_view(value_type):
+    """
+    Create LargeListViewType instance from child data type or field.
+
+    This data type may not be supported by all Arrow implementations
+    because it is an alternative to the ListType.
+
+    Parameters
+    ----------
+    value_type : DataType or Field
+
+    Returns
+    -------
+    list_view_type : DataType
+
+    Examples
+    --------
+    Create an instance of LargeListViewType:
+
+    >>> import pyarrow as pa
+    >>> pa.large_list_view(pa.int8())
+    LargeListViewType(large_list_view<item: int8>)
+    """
+    cdef:
+        Field _field
+        shared_ptr[CDataType] list_view_type
+
+    if isinstance(value_type, DataType):
+        _field = field('item', value_type)
+    elif isinstance(value_type, Field):
+        _field = value_type
+    else:
+        raise TypeError('LargeListView requires DataType or Field')
+
+    list_view_type = CMakeLargeListViewType(_field.sp_field)
+    return pyarrow_wrap_data_type(list_view_type)
+
+
 cpdef MapType map_(key_type, item_type, keys_sorted=False):
     """
     Create MapType instance from key and item data types or fields.
diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py
index 32398dac9c5f5..0f68ca9fe574b 100644
--- a/python/pyarrow/types.py
+++ b/python/pyarrow/types.py
@@ -151,6 +151,16 @@ def is_fixed_size_list(t):
     return t.id == lib.Type_FIXED_SIZE_LIST
 
 
+@doc(is_null, datatype="list view")
+def is_list_view(t):
+    return t.id == lib.Type_LIST_VIEW
+
+
+@doc(is_null, datatype="large list view")
+def is_large_list_view(t):
+    return t.id == lib.Type_LARGE_LIST_VIEW
+
+
 @doc(is_null, datatype="struct")
 def is_struct(t):
     return t.id == lib.Type_STRUCT

From 30f6fdbbd32b77f0351c08732c395bbd28af2850 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 8 Feb 2024 16:28:25 +0100
Subject: [PATCH 340/570] GH-39996: [Archery] Fix Crossbow build on a PR from a
 fork's main branch (#39997)

### Rationale for this change

### What changes are included in this PR?

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #39996

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 dev/archery/archery/bot.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/dev/archery/archery/bot.py b/dev/archery/archery/bot.py
index 4e5104362254c..caab824aeb38f 100644
--- a/dev/archery/archery/bot.py
+++ b/dev/archery/archery/bot.py
@@ -324,7 +324,8 @@ def crossbow(obj, crossbow):
     obj['crossbow_repo'] = crossbow
 
 
-def _clone_arrow_and_crossbow(dest, crossbow_repo, pull_request):
+def _clone_arrow_and_crossbow(dest, crossbow_repo, arrow_repo_url,
+                              pr_number, pr_branch):
     """
     Clone the repositories and initialize crossbow objects.
 
@@ -338,22 +339,25 @@ def _clone_arrow_and_crossbow(dest, crossbow_repo, pull_request):
         Object containing information about the pull request the comment bot
         was triggered from.
     """
+    bare_arrow_path = dest / 'arrow_bare'
     arrow_path = dest / 'arrow'
     queue_path = dest / 'crossbow'
 
-    # clone arrow and checkout the pull request's branch
-    pull_request_ref = 'pull/{}/head:{}'.format(
-        pull_request.number, pull_request.head.ref
-    )
-    git.clone(pull_request.base.repo.clone_url, str(arrow_path))
-    git.fetch('origin', pull_request_ref, git_dir=arrow_path)
-    git.checkout(pull_request.head.ref, git_dir=arrow_path)
-
-    # clone crossbow repository
+    # 1. clone arrow and checkout the PR's branch
+    pr_ref = f'pull/{pr_number}/head:{pr_branch}'
+    # we do a bare clone of upstream arrow to avoid issues when the PR is
+    # submitted from a fork's main branch (GH-39996)
+    git.clone('--bare', arrow_repo_url, str(bare_arrow_path))
+    # fetch the PR's branch into the bare clone
+    git.fetch('origin', pr_ref, git_dir=bare_arrow_path)
+    # clone and checkout the PR's branch into a full local repo
+    git.clone(f'--branch={pr_branch}', bare_arrow_path, arrow_path)
+
+    # 2. clone crossbow repository
     crossbow_url = 'https://github.com/{}'.format(crossbow_repo)
     git.clone(crossbow_url, str(queue_path))
 
-    # initialize crossbow objects
+    # 3. initialize crossbow objects
     github_token = os.environ['CROSSBOW_GITHUB_TOKEN']
     arrow = Repo(arrow_path)
     queue = Queue(queue_path, github_token=github_token, require_https=True)
@@ -385,7 +389,9 @@ def submit(obj, tasks, groups, params, arrow_version, wait):
         arrow, queue = _clone_arrow_and_crossbow(
             dest=Path(tmpdir),
             crossbow_repo=crossbow_repo,
-            pull_request=pull_request,
+            arrow_repo_url=pull_request.base.repo.clone_url,
+            pr_number=pull_request.number,
+            pr_branch=pull_request.head.ref,
         )
         # load available tasks configuration and groups from yaml
         config = Config.load_yaml(arrow.path / "dev" / "tasks" / "tasks.yml")

From 98c422557cfe33e714ea009a270aab98818e2748 Mon Sep 17 00:00:00 2001
From: Rossi Sun <zanmato1984@gmail.com>
Date: Fri, 9 Feb 2024 00:05:50 +0800
Subject: [PATCH 341/570] GH-39976: [C++] Fix out-of-line data size calculation
 in BinaryViewBuilder::AppendArraySlice (#39994)

### Rationale for this change

Fix the bug in `BinaryViewBuilder::AppendArraySlice` that, when calculating out-of-line data size, the array is wrongly iterated.

### What changes are included in this PR?

Fix and UT.

### Are these changes tested?

UT included.

### Are there any user-facing changes?

No.

* Closes: #39976

Authored-by: Ruoxi Sun <zanmato1984@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/array/array_test.cc     | 23 +++++++++++++++++++++++
 cpp/src/arrow/array/builder_binary.cc |  2 +-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index e9d478f108584..21ac1a09f56e7 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -905,6 +905,29 @@ TEST_F(TestArray, TestAppendArraySlice) {
   }
 }
 
+// GH-39976: Test out-of-line data size calculation in
+// BinaryViewBuilder::AppendArraySlice.
+TEST_F(TestArray, TestBinaryViewAppendArraySlice) {
+  BinaryViewBuilder src_builder(pool_);
+  ASSERT_OK(src_builder.AppendNull());
+  ASSERT_OK(src_builder.Append("long string; not inlined"));
+  ASSERT_EQ(2, src_builder.length());
+  ASSERT_OK_AND_ASSIGN(auto src, src_builder.Finish());
+  ASSERT_OK(src->ValidateFull());
+
+  ArraySpan span;
+  span.SetMembers(*src->data());
+  BinaryViewBuilder dst_builder(pool_);
+  ASSERT_OK(dst_builder.AppendArraySlice(span, 0, 1));
+  ASSERT_EQ(1, dst_builder.length());
+  ASSERT_OK(dst_builder.AppendArraySlice(span, 1, 1));
+  ASSERT_EQ(2, dst_builder.length());
+  ASSERT_OK_AND_ASSIGN(auto dst, dst_builder.Finish());
+  ASSERT_OK(dst->ValidateFull());
+
+  AssertArraysEqual(*src, *dst);
+}
+
 TEST_F(TestArray, ValidateBuffersPrimitive) {
   auto empty_buffer = std::make_shared<Buffer>("");
   auto null_buffer = Buffer::FromString("\xff");
diff --git a/cpp/src/arrow/array/builder_binary.cc b/cpp/src/arrow/array/builder_binary.cc
index f85852fa0eda6..7e5721917f3a0 100644
--- a/cpp/src/arrow/array/builder_binary.cc
+++ b/cpp/src/arrow/array/builder_binary.cc
@@ -54,7 +54,7 @@ Status BinaryViewBuilder::AppendArraySlice(const ArraySpan& array, int64_t offse
 
   int64_t out_of_line_total = 0, i = 0;
   VisitNullBitmapInline(
-      array.buffers[0].data, array.offset, array.length, array.null_count,
+      array.buffers[0].data, array.offset + offset, length, array.null_count,
       [&] {
         if (!values[i].is_inline()) {
           out_of_line_total += static_cast<int64_t>(values[i].size());

From a946214b127ff50ea0cf7e68946c186fa66009a2 Mon Sep 17 00:00:00 2001
From: Rossi Sun <zanmato1984@gmail.com>
Date: Fri, 9 Feb 2024 01:02:04 +0800
Subject: [PATCH 342/570] GH-39973: [C++][CI] Disable debug memory pool for
 ASAN and Valgrind (#39975)

### Rationale for this change

Disable debug memory pool for ASAN and Valgrind so that they can detect more subtle memory issues regarding to buffer tail bytes.

### What changes are included in this PR?

1. Add a `none` option to debug memory pool env var to make other things slightly easier.
2. Change `*_test.sh` scripts to conditionally set debug memory pool env var.
3. Top-level docker compose change to pass none to debug memory pool env var for ASAN and Valgrind.

### Are these changes tested?

The CI should cover it well.

### Are there any user-facing changes?

No.

* Closes: #39973

Authored-by: Ruoxi Sun <zanmato1984@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 ci/appveyor-cpp-build.bat           |  5 ++++-
 ci/scripts/c_glib_test.sh           |  6 ++++--
 ci/scripts/cpp_test.sh              |  6 ++++--
 ci/scripts/python_test.sh           |  6 ++++--
 ci/scripts/r_test.sh                |  6 ++++--
 ci/scripts/ruby_test.sh             |  6 ++++--
 cpp/src/arrow/memory_pool.cc        |  4 ++--
 docker-compose.yml                  |  4 ++++
 docs/source/cpp/env_vars.rst        |  4 +++-
 python/pyarrow/tests/test_memory.py | 30 +++++++++++++++++++++++++----
 10 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index 5e561a0461ea3..ab85032fe9924 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -26,7 +26,10 @@ git submodule update --init || exit /B
 set ARROW_TEST_DATA=%CD%\testing\data
 set PARQUET_TEST_DATA=%CD%\cpp\submodules\parquet-testing\data
 
-set ARROW_DEBUG_MEMORY_POOL=trap
+@rem Enable memory debug checks if the env is not set already
+IF "%ARROW_DEBUG_MEMORY_POOL%"=="" (
+  set ARROW_DEBUG_MEMORY_POOL=trap
+)
 
 set CMAKE_BUILD_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS%
 set CTEST_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS%
diff --git a/ci/scripts/c_glib_test.sh b/ci/scripts/c_glib_test.sh
index cea600191ae05..f8083c7759d8a 100755
--- a/ci/scripts/c_glib_test.sh
+++ b/ci/scripts/c_glib_test.sh
@@ -28,8 +28,10 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
 export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
 export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 pushd ${source_dir}
 
diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh
index 0c6e1c6ef7057..1d685c51a9326 100755
--- a/ci/scripts/cpp_test.sh
+++ b/ci/scripts/cpp_test.sh
@@ -37,8 +37,10 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib}:${LD_LIBRARY_P
 # to retrieve metadata. Disable this so that S3FileSystem tests run faster.
 export AWS_EC2_METADATA_DISABLED=TRUE
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 ctest_options=()
 case "$(uname)" in
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 341c2dd0577ef..8dfedb2880b50 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -32,8 +32,10 @@ export ARROW_GDB_SCRIPT=${arrow_dir}/cpp/gdb_arrow.py
 # Enable some checks inside Python itself
 export PYTHONDEVMODE=1
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 # By default, force-test all optional components
 : ${PYARROW_TEST_ACERO:=${ARROW_ACERO:-ON}}
diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh
index 22ec551edb9fa..72078ab3c06c2 100755
--- a/ci/scripts/r_test.sh
+++ b/ci/scripts/r_test.sh
@@ -72,8 +72,10 @@ export _R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_=TRUE
 # to retrieve metadata. Disable this so that S3FileSystem tests run faster.
 export AWS_EC2_METADATA_DISABLED=TRUE
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 # Hack so that texlive2020 doesn't pollute the home dir
 export TEXMFCONFIG=/tmp/texmf-config
diff --git a/ci/scripts/ruby_test.sh b/ci/scripts/ruby_test.sh
index 4fd6a85fe3966..56c33a4d6378a 100755
--- a/ci/scripts/ruby_test.sh
+++ b/ci/scripts/ruby_test.sh
@@ -26,7 +26,9 @@ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
 export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
 export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
 
-# Enable memory debug checks.
-export ARROW_DEBUG_MEMORY_POOL=trap
+# Enable memory debug checks if the env is not set already
+if [ -z "${ARROW_DEBUG_MEMORY_POOL}" ]; then
+  export ARROW_DEBUG_MEMORY_POOL=trap
+fi
 
 rake -f ${source_dir}/Rakefile BUILD_DIR=${build_dir} USE_BUNDLER=yes
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 843329c17bc28..d58c203d2ae27 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -195,7 +195,7 @@ bool IsDebugEnabled() {
       return false;
     }
     auto env_value = *std::move(maybe_env_value);
-    if (env_value.empty()) {
+    if (env_value.empty() || env_value == "none") {
       return false;
     }
     auto debug_state = DebugState::Instance();
@@ -212,7 +212,7 @@ bool IsDebugEnabled() {
       return true;
     }
     ARROW_LOG(WARNING) << "Invalid value for " << kDebugMemoryEnvVar << ": '" << env_value
-                       << "'. Valid values are 'abort', 'trap', 'warn'.";
+                       << "'. Valid values are 'abort', 'trap', 'warn', 'none'.";
     return false;
   }();
 
diff --git a/docker-compose.yml b/docker-compose.yml
index a31fa0d9aa659..7ae625a017417 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -320,6 +320,8 @@ services:
       # Shrink test runtime by enabling minimal optimizations
       ARROW_C_FLAGS_DEBUG: "-g1 -Og"
       ARROW_CXX_FLAGS_DEBUG: "-g1 -Og"
+      # GH-39973: Do not use debug memory pool for valgrind
+      ARROW_DEBUG_MEMORY_POOL: "none"
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       ARROW_FLIGHT: "OFF"
       ARROW_FLIGHT_SQL: "OFF"
@@ -598,6 +600,8 @@ services:
       CXX: clang++-${CLANG_TOOLS}
       # Avoid creating huge static libraries
       ARROW_BUILD_STATIC: "OFF"
+      # GH-39973: Do not use debug memory pool for ASAN
+      ARROW_DEBUG_MEMORY_POOL: "none"
       ARROW_ENABLE_TIMING_TESTS:  # inherit
       # GH-33920: Disable Flight SQL to reduce build time.
       # We'll be able to re-enable this with Ubuntu 24.04 because
diff --git a/docs/source/cpp/env_vars.rst b/docs/source/cpp/env_vars.rst
index 0fa80aa1106c1..eb7c797df5e27 100644
--- a/docs/source/cpp/env_vars.rst
+++ b/docs/source/cpp/env_vars.rst
@@ -58,8 +58,10 @@ that changing their value later will have an effect.
    - ``abort`` exits the processus with a non-zero return value;
    - ``trap`` issues a platform-specific debugger breakpoint / trap instruction;
    - ``warn`` prints a warning on stderr and continues execution;
+   - ``none`` disables memory checks;
 
-   If this variable is not set, or has empty an value, memory checks are disabled.
+   If this variable is not set, or has an empty value, it has the same effect
+   as the value ``none`` - memory checks are disabled.
 
    .. note::
       While this functionality can be useful and has little overhead, it
diff --git a/python/pyarrow/tests/test_memory.py b/python/pyarrow/tests/test_memory.py
index d9fdeb152c35e..4f199952344f2 100644
--- a/python/pyarrow/tests/test_memory.py
+++ b/python/pyarrow/tests/test_memory.py
@@ -243,13 +243,35 @@ def test_debug_memory_pool_warn(pool_factory):
     assert "Wrong size on deallocation" in res.stderr
 
 
-@pytest.mark.parametrize('pool_factory', supported_factories())
-def test_debug_memory_pool_disabled(pool_factory):
-    res = run_debug_memory_pool(pool_factory.__name__, "")
+def check_debug_memory_pool_disabled(pool_factory, env_value, msg):
+    res = run_debug_memory_pool(pool_factory.__name__, env_value)
     # The subprocess either returned successfully or was killed by a signal
     # (due to writing out of bounds), depending on the underlying allocator.
     if os.name == "posix":
         assert res.returncode <= 0
     else:
         res.check_returncode()
-    assert res.stderr == ""
+    if msg == "":
+        assert res.stderr == ""
+    else:
+        assert msg in res.stderr
+
+
+@pytest.mark.parametrize('pool_factory', supported_factories())
+def test_debug_memory_pool_none(pool_factory):
+    check_debug_memory_pool_disabled(pool_factory, "none", "")
+
+
+@pytest.mark.parametrize('pool_factory', supported_factories())
+def test_debug_memory_pool_empty(pool_factory):
+    check_debug_memory_pool_disabled(pool_factory, "", "")
+
+
+@pytest.mark.parametrize('pool_factory', supported_factories())
+def test_debug_memory_pool_unknown(pool_factory):
+    env_value = "some_arbitrary_value"
+    msg = (
+        f"Invalid value for ARROW_DEBUG_MEMORY_POOL: '{env_value}'. "
+        "Valid values are 'abort', 'trap', 'warn', 'none'."
+    )
+    check_debug_memory_pool_disabled(pool_factory, env_value, msg)

From de3cdc00c21fd3e9d8d763099591f23720ca8d1f Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 8 Feb 2024 18:45:40 +0100
Subject: [PATCH 343/570] GH-39962: [C++] Small CSV reader refactoring (#39963)

### What changes are included in this PR?

Factor our some shared functionality in the CSV reader to avoid code duplication.

### Are these changes tested?

Yes, by existing tests.

### Are there any user-facing changes?

No.

* Closes: #39962

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/csv/reader.cc | 144 +++++++++++-------------------------
 1 file changed, 45 insertions(+), 99 deletions(-)

diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc
index 1ac25e290a814..e981fafe8e780 100644
--- a/cpp/src/arrow/csv/reader.cc
+++ b/cpp/src/arrow/csv/reader.cc
@@ -445,16 +445,20 @@ class BlockParsingOperator {
       num_rows_seen_ += parser->total_num_rows();
     }
 
-    RETURN_NOT_OK(block.consume_bytes(parsed_size));
+    if (block.consume_bytes) {
+      RETURN_NOT_OK(block.consume_bytes(parsed_size));
+    }
     return ParsedBlock{std::move(parser), block.block_index,
                        static_cast<int64_t>(parsed_size) + block.bytes_skipped};
   }
 
+  int num_csv_cols() const { return num_csv_cols_; }
+
  private:
   io::IOContext io_context_;
-  ParseOptions parse_options_;
-  int num_csv_cols_;
-  bool count_rows_;
+  const ParseOptions parse_options_;
+  const int num_csv_cols_;
+  const bool count_rows_;
   int64_t num_rows_seen_;
 };
 
@@ -570,7 +574,6 @@ class ReaderMixin {
         parse_options_(parse_options),
         convert_options_(convert_options),
         count_rows_(count_rows),
-        num_rows_seen_(count_rows_ ? 1 : -1),
         input_(std::move(input)) {}
 
  protected:
@@ -581,6 +584,7 @@ class ReaderMixin {
     const uint8_t* data = buf->data();
     const auto data_end = data + buf->size();
     DCHECK_GT(data_end - data, 0);
+    int64_t num_rows_seen = 1;
 
     if (read_options_.skip_rows) {
       // Skip initial rows (potentially invalid CSV data)
@@ -593,14 +597,14 @@ class ReaderMixin {
             "either file is too short or header is larger than block size");
       }
       if (count_rows_) {
-        num_rows_seen_ += num_skipped_rows;
+        num_rows_seen += num_skipped_rows;
       }
     }
 
     if (read_options_.column_names.empty()) {
       // Parse one row (either to read column names or to know the number of columns)
-      BlockParser parser(io_context_.pool(), parse_options_, num_csv_cols_,
-                         num_rows_seen_, 1);
+      BlockParser parser(io_context_.pool(), parse_options_, /*num_cols=*/-1,
+                         /*first_row=*/num_rows_seen, /*max_num_rows=*/1);
       uint32_t parsed_size = 0;
       RETURN_NOT_OK(parser.Parse(
           std::string_view(reinterpret_cast<const char*>(data), data_end - data),
@@ -627,7 +631,7 @@ class ReaderMixin {
         // Skip parsed header row
         data += parsed_size;
         if (count_rows_) {
-          ++num_rows_seen_;
+          ++num_rows_seen;
         }
       }
     } else {
@@ -636,14 +640,17 @@ class ReaderMixin {
 
     if (count_rows_) {
       // increase rows seen to skip past rows which will be skipped
-      num_rows_seen_ += read_options_.skip_rows_after_names;
+      num_rows_seen += read_options_.skip_rows_after_names;
     }
 
     auto bytes_consumed = data - buf->data();
     *rest = SliceBuffer(buf, bytes_consumed);
 
-    num_csv_cols_ = static_cast<int32_t>(column_names_.size());
-    DCHECK_GT(num_csv_cols_, 0);
+    int32_t num_csv_cols = static_cast<int32_t>(column_names_.size());
+    DCHECK_GT(num_csv_cols, 0);
+    // Since we know the number of columns, we can instantiate the BlockParsingOperator
+    parsing_operator_.emplace(io_context_, parse_options_, num_csv_cols,
+                              count_rows_ ? num_rows_seen : -1);
 
     RETURN_NOT_OK(MakeConversionSchema());
     return bytes_consumed;
@@ -691,7 +698,7 @@ class ReaderMixin {
 
     if (convert_options_.include_columns.empty()) {
       // Include all columns in CSV file order
-      for (int32_t col_index = 0; col_index < num_csv_cols_; ++col_index) {
+      for (int32_t col_index = 0; col_index < num_csv_cols(); ++col_index) {
         append_csv_column(column_names_[col_index], col_index);
       }
     } else {
@@ -719,66 +726,25 @@ class ReaderMixin {
     return Status::OK();
   }
 
-  struct ParseResult {
-    std::shared_ptr<BlockParser> parser;
-    int64_t parsed_bytes;
-  };
-
-  Result<ParseResult> Parse(const std::shared_ptr<Buffer>& partial,
-                            const std::shared_ptr<Buffer>& completion,
-                            const std::shared_ptr<Buffer>& block, int64_t block_index,
-                            bool is_final) {
-    static constexpr int32_t max_num_rows = std::numeric_limits<int32_t>::max();
-    auto parser = std::make_shared<BlockParser>(
-        io_context_.pool(), parse_options_, num_csv_cols_, num_rows_seen_, max_num_rows);
-
-    std::shared_ptr<Buffer> straddling;
-    std::vector<std::string_view> views;
-    if (partial->size() != 0 || completion->size() != 0) {
-      if (partial->size() == 0) {
-        straddling = completion;
-      } else if (completion->size() == 0) {
-        straddling = partial;
-      } else {
-        ARROW_ASSIGN_OR_RAISE(
-            straddling, ConcatenateBuffers({partial, completion}, io_context_.pool()));
-      }
-      views = {std::string_view(*straddling), std::string_view(*block)};
-    } else {
-      views = {std::string_view(*block)};
-    }
-    uint32_t parsed_size;
-    if (is_final) {
-      RETURN_NOT_OK(parser->ParseFinal(views, &parsed_size));
-    } else {
-      RETURN_NOT_OK(parser->Parse(views, &parsed_size));
-    }
-    // See BlockParsingOperator for explanation.
-    const int64_t bytes_before_buffer = partial->size() + completion->size();
-    if (static_cast<int64_t>(parsed_size) < bytes_before_buffer) {
-      return Status::Invalid(
-          "CSV parser got out of sync with chunker. This can mean the data file "
-          "contains cell values spanning multiple lines; please consider enabling "
-          "the option 'newlines_in_values'.");
-    }
+  Result<ParsedBlock> Parse(const CSVBlock& block) {
+    DCHECK(parsing_operator_.has_value());
+    return (*parsing_operator_)(block);
+  }
 
-    if (count_rows_) {
-      num_rows_seen_ += parser->total_num_rows();
-    }
-    return ParseResult{std::move(parser), static_cast<int64_t>(parsed_size)};
+  int num_csv_cols() const {
+    DCHECK(parsing_operator_.has_value());
+    return parsing_operator_->num_csv_cols();
   }
 
   io::IOContext io_context_;
-  ReadOptions read_options_;
-  ParseOptions parse_options_;
-  ConvertOptions convert_options_;
-
-  // Number of columns in the CSV file
-  int32_t num_csv_cols_ = -1;
-  // Whether num_rows_seen_ tracks the number of rows seen in the CSV being parsed
-  bool count_rows_;
-  // Number of rows seen in the csv. Not used if count_rows is false
-  int64_t num_rows_seen_;
+  const ReadOptions read_options_;
+  const ParseOptions parse_options_;
+  const ConvertOptions convert_options_;
+  // Whether to track the number of rows seen in the CSV being parsed
+  const bool count_rows_;
+
+  std::optional<BlockParsingOperator> parsing_operator_;
+
   // Column names in the CSV file
   std::vector<std::string> column_names_;
   ConversionSchema conversion_schema_;
@@ -822,14 +788,10 @@ class BaseTableReader : public ReaderMixin, public csv::TableReader {
     return Status::OK();
   }
 
-  Result<int64_t> ParseAndInsert(const std::shared_ptr<Buffer>& partial,
-                                 const std::shared_ptr<Buffer>& completion,
-                                 const std::shared_ptr<Buffer>& block,
-                                 int64_t block_index, bool is_final) {
-    ARROW_ASSIGN_OR_RAISE(auto result,
-                          Parse(partial, completion, block, block_index, is_final));
-    RETURN_NOT_OK(ProcessData(result.parser, block_index));
-    return result.parsed_bytes;
+  Status ParseAndInsert(const CSVBlock& block) {
+    ARROW_ASSIGN_OR_RAISE(auto result, Parse(block));
+    RETURN_NOT_OK(ProcessData(result.parser, result.block_index));
+    return Status::OK();
   }
 
   // Trigger conversion of parsed block data
@@ -921,8 +883,6 @@ class StreamingReaderImpl : public ReaderMixin,
                           ProcessHeader(first_buffer, &after_header));
     bytes_decoded_->fetch_add(header_bytes_consumed);
 
-    auto parser_op =
-        BlockParsingOperator(io_context_, parse_options_, num_csv_cols_, num_rows_seen_);
     ARROW_ASSIGN_OR_RAISE(
         auto decoder_op,
         BlockDecodingOperator::Make(io_context_, convert_options_, conversion_schema_));
@@ -930,8 +890,7 @@ class StreamingReaderImpl : public ReaderMixin,
     auto block_gen = SerialBlockReader::MakeAsyncIterator(
         std::move(buffer_generator), MakeChunker(parse_options_), std::move(after_header),
         read_options_.skip_rows_after_names);
-    auto parsed_block_gen =
-        MakeMappedGenerator(std::move(block_gen), std::move(parser_op));
+    auto parsed_block_gen = MakeMappedGenerator(std::move(block_gen), *parsing_operator_);
     auto rb_gen = MakeMappedGenerator(std::move(parsed_block_gen), std::move(decoder_op));
 
     auto self = shared_from_this();
@@ -1035,11 +994,7 @@ class SerialTableReader : public BaseTableReader {
         // EOF
         break;
       }
-      ARROW_ASSIGN_OR_RAISE(
-          int64_t parsed_bytes,
-          ParseAndInsert(maybe_block.partial, maybe_block.completion, maybe_block.buffer,
-                         maybe_block.block_index, maybe_block.is_final));
-      RETURN_NOT_OK(maybe_block.consume_bytes(parsed_bytes));
+      RETURN_NOT_OK(ParseAndInsert(maybe_block));
     }
     // Finish conversion, create schema and table
     RETURN_NOT_OK(task_group_->Finish());
@@ -1110,13 +1065,8 @@ class AsyncThreadedTableReader
         DCHECK(!maybe_block.consume_bytes);
 
         // Launch parse task
-        self->task_group_->Append([self, maybe_block] {
-          return self
-              ->ParseAndInsert(maybe_block.partial, maybe_block.completion,
-                               maybe_block.buffer, maybe_block.block_index,
-                               maybe_block.is_final)
-              .status();
-        });
+        self->task_group_->Append(
+            [self, maybe_block] { return self->ParseAndInsert(maybe_block); });
         return Status::OK();
       };
 
@@ -1239,12 +1189,8 @@ class CSVRowCounter : public ReaderMixin,
     // IterationEnd.
     std::function<Result<std::optional<int64_t>>(const CSVBlock&)> count_cb =
         [self](const CSVBlock& maybe_block) -> Result<std::optional<int64_t>> {
-      ARROW_ASSIGN_OR_RAISE(
-          auto parser,
-          self->Parse(maybe_block.partial, maybe_block.completion, maybe_block.buffer,
-                      maybe_block.block_index, maybe_block.is_final));
-      RETURN_NOT_OK(maybe_block.consume_bytes(parser.parsed_bytes));
-      int32_t total_row_count = parser.parser->total_num_rows();
+      ARROW_ASSIGN_OR_RAISE(auto parsed_block, self->Parse(maybe_block));
+      int32_t total_row_count = parsed_block.parser->total_num_rows();
       self->row_count_ += total_row_count;
       return total_row_count;
     };

From 8f1537468a7e5f5a526ddf1e9c634740c923404b Mon Sep 17 00:00:00 2001
From: Lubo Slivka <lubomir.slivka@gooddata.com>
Date: Thu, 8 Feb 2024 22:58:07 +0100
Subject: [PATCH 344/570] GH-40004: [Python][FlightRPC] Release GIL in
 GeneratorStream (#40005)

Fixes #40004.

* Closes: #40004

Authored-by: lupko <lubomir.slivka@gooddata.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 python/pyarrow/_flight.pyx | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index a2ff045f256ac..67ee7590560f0 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -2013,8 +2013,9 @@ cdef CStatus _data_stream_next(void* self, CFlightPayload* payload) except *:
     max_attempts = 128
     for _ in range(max_attempts):
         if stream.current_stream != nullptr:
-            check_flight_status(
-                stream.current_stream.get().Next().Value(payload))
+            with nogil:
+                check_flight_status(
+                    stream.current_stream.get().Next().Value(payload))
             # If the stream ended, see if there's another stream from the
             # generator
             if payload.ipc_message.metadata != nullptr:

From f9a88e5398fe6ec406759e55935d17bb09f9569b Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Thu, 8 Feb 2024 14:26:06 -0800
Subject: [PATCH 345/570] GH-39916: [C#] Restore support for .NET 4.6.2
 (#40008)

### What changes are included in this PR?

Project targets have been added for net462 which is still in support. A few tests have been modified to allow them to build against that target.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

There are new build artifacts for Apache.Arrow.dll and Apache.Arrow.Compression.dll.

* Closes: #39916

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Apache.Arrow.Compression.csproj                  |  8 +++++++-
 csharp/src/Apache.Arrow/Apache.Arrow.csproj          | 12 +++++++++---
 .../Extensions/TupleExtensions.netstandard.cs        |  7 +++++++
 .../Apache.Arrow.Tests/Apache.Arrow.Tests.csproj     |  2 +-
 .../Apache.Arrow.Tests/BinaryArrayBuilderTests.cs    |  8 ++++----
 5 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj
index fded62911262c..6988567193db4 100644
--- a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj
+++ b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj
@@ -1,10 +1,16 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFramework>netstandard2.0</TargetFramework>
     <Description>Provides decompression support for the Arrow IPC format</Description>
   </PropertyGroup>
 
+  <PropertyGroup Condition="'$(IsWindows)'=='true'">
+    <TargetFrameworks>netstandard2.0;net462</TargetFrameworks>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(IsWindows)'!='true'">
+    <TargetFrameworks>netstandard2.0</TargetFrameworks>
+  </PropertyGroup>
+
   <ItemGroup>
     <PackageReference Include="K4os.Compression.LZ4.Streams" Version="1.3.6" />
     <PackageReference Include="ZstdSharp.Port" Version="0.7.3" />
diff --git a/csharp/src/Apache.Arrow/Apache.Arrow.csproj b/csharp/src/Apache.Arrow/Apache.Arrow.csproj
index 3a229f4ffcaf8..c4bb64b73a9ed 100644
--- a/csharp/src/Apache.Arrow/Apache.Arrow.csproj
+++ b/csharp/src/Apache.Arrow/Apache.Arrow.csproj
@@ -1,14 +1,20 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFrameworks>netstandard2.0;net6.0</TargetFrameworks>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
     <DefineConstants>$(DefineConstants);UNSAFE_BYTEBUFFER;BYTEBUFFER_NO_BOUNDS_CHECK;ENABLE_SPAN_T</DefineConstants>
     
     <Description>Apache Arrow is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware.</Description>
   </PropertyGroup>
 
-  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard'">
+  <PropertyGroup Condition="'$(IsWindows)'=='true'">
+    <TargetFrameworks>netstandard2.0;net6.0;net462</TargetFrameworks>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(IsWindows)'!='true'">
+    <TargetFrameworks>netstandard2.0;net6.0</TargetFrameworks>
+  </PropertyGroup>
+
+  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard' or '$(TargetFramework)' == 'net462'">
     <PackageReference Include="System.Buffers" Version="4.5.1" />
     <PackageReference Include="System.Memory" Version="4.5.5" />
     <PackageReference Include="System.Runtime.CompilerServices.Unsafe" Version="4.7.1" />
@@ -34,7 +40,7 @@
     </EmbeddedResource>
   </ItemGroup>
 
-  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard'">
+  <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard' or '$(TargetFramework)' == 'net462'">
     <Compile Remove="Extensions\StreamExtensions.netcoreapp.cs" />
   </ItemGroup>
   <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETCoreApp'">
diff --git a/csharp/src/Apache.Arrow/Extensions/TupleExtensions.netstandard.cs b/csharp/src/Apache.Arrow/Extensions/TupleExtensions.netstandard.cs
index fe42075f14f73..e0e0f5707086b 100644
--- a/csharp/src/Apache.Arrow/Extensions/TupleExtensions.netstandard.cs
+++ b/csharp/src/Apache.Arrow/Extensions/TupleExtensions.netstandard.cs
@@ -25,5 +25,12 @@ public static void Deconstruct<T1, T2>(this Tuple<T1, T2> value, out T1 item1, o
             item1 = value.Item1;
             item2 = value.Item2;
         }
+
+        public static void Deconstruct<T1, T2, T3>(this Tuple<T1, T2, T3> value, out T1 item1, out T2 item2, out T3 item3)
+        {
+            item1 = value.Item1;
+            item2 = value.Item2;
+            item3 = value.Item3;
+        }
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index d8a92ff756751..c422da56b4cef 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -7,7 +7,7 @@
   </PropertyGroup>
 
   <PropertyGroup Condition="'$(IsWindows)'=='true'">
-    <TargetFrameworks>net7.0;net472</TargetFrameworks>
+    <TargetFrameworks>net7.0;net472;net462</TargetFrameworks>
   </PropertyGroup>
   <PropertyGroup Condition="'$(IsWindows)'!='true'">
     <TargetFrameworks>net7.0</TargetFrameworks>
diff --git a/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs b/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
index 4c2b050d0c8ba..447572dda0eea 100644
--- a/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/BinaryArrayBuilderTests.cs
@@ -83,7 +83,7 @@ public void AppendSingleByte(byte[][] initialContents, byte singleByte)
                     builder.AppendRange(initialContents);
                 int initialLength = builder.Length;
                 int expectedLength = initialLength + 1;
-                var expectedArrayContents = initialContents.Append(new[] { singleByte });
+                var expectedArrayContents = initialContents.Concat(new[] { new[] { singleByte } });
 
                 // Act
                 var actualReturnValue = builder.Append(singleByte);
@@ -130,7 +130,7 @@ public void AppendNull(byte[][] initialContents)
                     builder.AppendRange(initialContents);
                 int initialLength = builder.Length;
                 int expectedLength = initialLength + 1;
-                var expectedArrayContents = initialContents.Append(null);
+                var expectedArrayContents = initialContents.Concat(new byte[][] { null });
 
                 // Act
                 var actualReturnValue = builder.AppendNull();
@@ -180,7 +180,7 @@ public void AppendReadOnlySpan(byte[][] initialContents, byte[] bytes)
                 int initialLength = builder.Length;
                 var span = (ReadOnlySpan<byte>)bytes;
                 int expectedLength = initialLength + 1;
-                var expectedArrayContents = initialContents.Append(bytes);
+                var expectedArrayContents = initialContents.Concat(new[] { bytes });
 
                 // Act
                 var actualReturnValue = builder.Append(span);
@@ -230,7 +230,7 @@ public void AppendEnumerable(byte[][] initialContents, byte[] bytes)
                 int initialLength = builder.Length;
                 int expectedLength = initialLength + 1;
                 var enumerable = (IEnumerable<byte>)bytes;
-                var expectedArrayContents = initialContents.Append(bytes);
+                var expectedArrayContents = initialContents.Concat(new[] { bytes });
 
                 // Act
                 var actualReturnValue = builder.Append(enumerable);

From a0dec7f39394e619c8bdfe0eacb6ecde73a9ec12 Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Fri, 9 Feb 2024 01:41:36 +0000
Subject: [PATCH 346/570] GH-39352: [FS][Azure] Enable azure in builds (#39971)

### Rationale for this change

### What changes are included in this PR?
Enable Azure in linux and mac os wheel builds. Tried to copy GCS

Don't enable Azure for windows builds because windows builds where all failing. Failures were a combination of cmake version or `Could not find a package configuration file provided by "wil"`. I think it makes sense to come back to windows builds in another PR.

### Are these changes tested?
There is no new functionality to test.

### Are there any user-facing changes?
No

* Closes: #39352

Authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/docker/python-wheel-manylinux.dockerfile  |  1 +
 ci/scripts/cpp_build.sh                      |  1 +
 ci/scripts/python_build.sh                   |  1 +
 ci/scripts/python_test.sh                    |  1 +
 ci/scripts/python_wheel_macos_build.sh       |  3 +++
 ci/scripts/python_wheel_manylinux_build.sh   |  3 +++
 ci/scripts/python_wheel_unix_test.sh         |  6 ++++--
 ci/vcpkg/vcpkg.json                          | 10 ++++++++++
 dev/tasks/python-wheels/github.osx.amd64.yml |  1 +
 dev/tasks/python-wheels/github.osx.arm64.yml |  1 +
 10 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index 2831440d5a967..b1d9ed5ab88d9 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -82,6 +82,7 @@ RUN vcpkg install \
         --clean-after-build \
         --x-install-root=${VCPKG_ROOT}/installed \
         --x-manifest-root=/arrow/ci/vcpkg \
+        --x-feature=azure \ 
         --x-feature=flight \
         --x-feature=gcs \
         --x-feature=json \
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index 69d86e871ac5f..60cab1a9feaba 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -54,6 +54,7 @@ if [ "${GITHUB_ACTIONS:-false}" = "true" ]; then
 fi
 
 if [ "${ARROW_ENABLE_THREADING:-ON}" = "OFF" ]; then
+  ARROW_AZURE=OFF
   ARROW_FLIGHT=OFF
   ARROW_FLIGHT_SQL=OFF
   ARROW_GCS=OFF
diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh
index c0a27e6e705e9..9bdcc4d687584 100755
--- a/ci/scripts/python_build.sh
+++ b/ci/scripts/python_build.sh
@@ -55,6 +55,7 @@ export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
 
 export PYARROW_WITH_ACERO=${ARROW_ACERO:-OFF}
+export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF}
 export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
 export PYARROW_WITH_DATASET=${ARROW_DATASET:-ON}
 export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 8dfedb2880b50..20ca3300c0538 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -39,6 +39,7 @@ fi
 
 # By default, force-test all optional components
 : ${PYARROW_TEST_ACERO:=${ARROW_ACERO:-ON}}
+: ${PYARROW_TEST_AZURE:=${ARROW_AZURE:-ON}}
 : ${PYARROW_TEST_CUDA:=${ARROW_CUDA:-ON}}
 : ${PYARROW_TEST_DATASET:=${ARROW_DATASET:-ON}}
 : ${PYARROW_TEST_FLIGHT:=${ARROW_FLIGHT:-ON}}
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index 8123a9fdf1c48..bea5409100770 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -63,6 +63,7 @@ pip install "delocate>=0.10.3"
 
 echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
 : ${ARROW_ACERO:=ON}
+: ${ARROW_AZURE:=ON}
 : ${ARROW_DATASET:=ON}
 : ${ARROW_FLIGHT:=ON}
 : ${ARROW_GANDIVA:=OFF}
@@ -95,6 +96,7 @@ pushd ${build_dir}/build
 
 cmake \
     -DARROW_ACERO=${ARROW_ACERO} \
+    -DARROW_AZURE=${ARROW_AZURE} \
     -DARROW_BUILD_SHARED=ON \
     -DARROW_BUILD_STATIC=OFF \
     -DARROW_BUILD_TESTS=OFF \
@@ -148,6 +150,7 @@ export PYARROW_BUNDLE_ARROW_CPP=1
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
 export PYARROW_INSTALL_TESTS=1
 export PYARROW_WITH_ACERO=${ARROW_ACERO}
+export PYARROW_WITH_AZURE=${ARROW_AZURE}
 export PYARROW_WITH_DATASET=${ARROW_DATASET}
 export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT}
 export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA}
diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh
index 58e42fea88088..4d4d4fb694e0b 100755
--- a/ci/scripts/python_wheel_manylinux_build.sh
+++ b/ci/scripts/python_wheel_manylinux_build.sh
@@ -49,6 +49,7 @@ rm -rf /arrow/python/pyarrow/*.so.*
 
 echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
 : ${ARROW_ACERO:=ON}
+: ${ARROW_AZURE:=ON}
 : ${ARROW_DATASET:=ON}
 : ${ARROW_FLIGHT:=ON}
 : ${ARROW_GANDIVA:=OFF}
@@ -87,6 +88,7 @@ pushd /tmp/arrow-build
 
 cmake \
     -DARROW_ACERO=${ARROW_ACERO} \
+    -DARROW_AZURE=${ARROW_AZURE} \
     -DARROW_BUILD_SHARED=ON \
     -DARROW_BUILD_STATIC=OFF \
     -DARROW_BUILD_TESTS=OFF \
@@ -141,6 +143,7 @@ export PYARROW_BUNDLE_ARROW_CPP=1
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
 export PYARROW_INSTALL_TESTS=1
 export PYARROW_WITH_ACERO=${ARROW_ACERO}
+export PYARROW_WITH_AZURE=${ARROW_AZURE}
 export PYARROW_WITH_DATASET=${ARROW_DATASET}
 export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT}
 export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA}
diff --git a/ci/scripts/python_wheel_unix_test.sh b/ci/scripts/python_wheel_unix_test.sh
index 01250ff7ef40c..a25e5c51bddbc 100755
--- a/ci/scripts/python_wheel_unix_test.sh
+++ b/ci/scripts/python_wheel_unix_test.sh
@@ -28,15 +28,17 @@ fi
 
 source_dir=${1}
 
+: ${ARROW_AZURE:=ON}
 : ${ARROW_FLIGHT:=ON}
-: ${ARROW_SUBSTRAIT:=ON}
-: ${ARROW_S3:=ON}
 : ${ARROW_GCS:=ON}
+: ${ARROW_S3:=ON}
+: ${ARROW_SUBSTRAIT:=ON}
 : ${CHECK_IMPORTS:=ON}
 : ${CHECK_UNITTESTS:=ON}
 : ${INSTALL_PYARROW:=ON}
 
 export PYARROW_TEST_ACERO=ON
+export PYARROW_TEST_AZURE=${ARROW_AZURE}
 export PYARROW_TEST_CYTHON=OFF
 export PYARROW_TEST_DATASET=ON
 export PYARROW_TEST_FLIGHT=${ARROW_FLIGHT}
diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json
index 99771728ecf18..e86479a7c32fc 100644
--- a/ci/vcpkg/vcpkg.json
+++ b/ci/vcpkg/vcpkg.json
@@ -105,6 +105,16 @@
         }
       ]
     },
+    "azure": {
+      "description": "Azure blob storage support",
+      "dependencies": [
+        "azure-core-cpp",
+        "azure-identity-cpp",
+        "azure-storage-blobs-cpp",
+        "azure-storage-common-cpp",
+        "azure-storage-files-datalake-cpp"
+      ]
+    },
     "orc": {
       "description": "ORC support",
       "dependencies": [
diff --git a/dev/tasks/python-wheels/github.osx.amd64.yml b/dev/tasks/python-wheels/github.osx.amd64.yml
index 526412f84214b..e31a681653b37 100644
--- a/dev/tasks/python-wheels/github.osx.amd64.yml
+++ b/dev/tasks/python-wheels/github.osx.amd64.yml
@@ -85,6 +85,7 @@ jobs:
             --clean-after-build \
             --x-install-root=${VCPKG_ROOT}/installed \
             --x-manifest-root=arrow/ci/vcpkg \
+            --x-feature=azure \
             --x-feature=flight \
             --x-feature=gcs \
             --x-feature=json \
diff --git a/dev/tasks/python-wheels/github.osx.arm64.yml b/dev/tasks/python-wheels/github.osx.arm64.yml
index 35d74f1462453..380c2e42f1d88 100644
--- a/dev/tasks/python-wheels/github.osx.arm64.yml
+++ b/dev/tasks/python-wheels/github.osx.arm64.yml
@@ -71,6 +71,7 @@ jobs:
             --clean-after-build \
             --x-install-root=${VCPKG_ROOT}/installed \
             --x-manifest-root=arrow/ci/vcpkg \
+            --x-feature=azure \
             --x-feature=flight \
             --x-feature=gcs \
             --x-feature=json \

From 40cb0a22c1685a1861652b68b6eb394903cf3cba Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 9 Feb 2024 09:04:16 +0100
Subject: [PATCH 347/570] GH-39999: [Python] Fix tests for pandas with CoW /
 nightly integration tests (#40000)

### Rationale for this change

Fixing a failing test with pandas nightly because of CoW changes.

* Closes: #39999

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/tests/test_pandas.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 8106219057efe..676cc96151161 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -3650,7 +3650,8 @@ def test_singleton_blocks_zero_copy():
 
     prior_allocation = pa.total_allocated_bytes()
     result = t.to_pandas()
-    assert result['f0'].values.flags.writeable
+    # access private `_values` because the public `values` is made read-only by pandas
+    assert result['f0']._values.flags.writeable
     assert pa.total_allocated_bytes() > prior_allocation
 
 
From abf4fbf924391149ba2717aa9b57090094271a5d Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Fri, 9 Feb 2024 09:45:45 -0400
Subject: [PATCH 348/570] GH-39942: [Python] Make capsule name check more
 lenient (#39977)

### Rationale for this change

While #39969 fixed the immediate issue caused by the update of the capsule name used by reticulate whilst converting an R "external pointer", it will still result in an error if somebody is using an older version of the Arrow R package.

### What changes are included in this PR?

The pyarrow Cython code was modified to accept capsules with the name NULL or "r_extptr".

### Are these changes tested?

Not sure where the best place for this is, but:

CRAN arrow + released pyarrow + new reticulate (errors):

``` r
library(arrow, warn.conflicts = FALSE)
reticulate::use_virtualenv("~/Desktop/rscratch/arrow/.venv")
packageVersion("arrow")
#> [1] '14.0.0.2'
packageVersion("reticulate")
#> [1] '1.35.0'
pa <- reticulate::import("pyarrow")
pa[["__version__"]]
#> [1] "15.0.0"

reticulate::r_to_py(arrow::int32())
#> PyCapsule_GetPointer called with incorrect name
```

CRAN arrow + pyarrow from this PR + old reticulate:

``` r
library(arrow, warn.conflicts = FALSE)
reticulate::use_virtualenv("~/Desktop/rscratch/arrow/.venv")
packageVersion("arrow")
#> [1] '14.0.0.2'
packageVersion("reticulate")
#> [1] '1.34.0'
pa <- reticulate::import("pyarrow")
pa[["__version__"]]
#> [1] "16.0.0.dev92+geafcff7a5"

reticulate::r_to_py(arrow::int32())
#> DataType(int32)
```

CRAN arrow + pyarrow from this PR + new reticulate:

``` r
library(arrow, warn.conflicts = FALSE)
reticulate::use_virtualenv("~/Desktop/rscratch/arrow/.venv")
packageVersion("arrow")
#> [1] '14.0.0.2'
packageVersion("reticulate")
#> [1] '1.35.0'
pa <- reticulate::import("pyarrow")
pa[["__version__"]]
#> [1] "16.0.0.dev92+geafcff7a5"

reticulate::r_to_py(arrow::int32())
#> DataType(int32)
```

### Are there any user-facing changes?

No
* Closes: #39942

Lead-authored-by: Dewey Dunnington <dewey@fishandwhistle.net>
Co-authored-by: Dewey Dunnington <dewey@voltrondata.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Dewey Dunnington <dewey@voltrondata.com>
---
 python/pyarrow/types.pxi | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 50b10c5512dc1..e9bf56c6213f6 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -15,7 +15,13 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from cpython.pycapsule cimport PyCapsule_CheckExact, PyCapsule_GetPointer, PyCapsule_New, PyCapsule_IsValid
+from cpython.pycapsule cimport (
+    PyCapsule_CheckExact,
+    PyCapsule_GetPointer,
+    PyCapsule_GetName,
+    PyCapsule_New,
+    PyCapsule_IsValid
+)
 
 import atexit
 from collections.abc import Mapping
@@ -105,6 +111,7 @@ cdef void* _as_c_pointer(v, allow_null=False) except *:
     (the latter for compatibility with raw pointers exported by reticulate)
     """
     cdef void* c_ptr
+    cdef const char* capsule_name
     if isinstance(v, int):
         c_ptr = <void*> <uintptr_t > v
     elif isinstance(v, float):
@@ -114,7 +121,20 @@ cdef void* _as_c_pointer(v, allow_null=False) except *:
             "Arrow library", UserWarning, stacklevel=2)
         c_ptr = <void*> <uintptr_t > v
     elif PyCapsule_CheckExact(v):
-        c_ptr = PyCapsule_GetPointer(v, NULL)
+        # An R external pointer was how the R bindings passed pointer values to
+        # Python from versions 7 to 15 (inclusive); however, the reticulate 1.35.0
+        # update changed the name of the capsule from NULL to "r_extptr".
+        # Newer versions of the R package pass a Python integer; however, this
+        # workaround ensures that old versions of the R package continue to work
+        # with newer versions of pyarrow.
+        capsule_name = PyCapsule_GetName(v)
+        if capsule_name == NULL or capsule_name == b"r_extptr":
+            c_ptr = PyCapsule_GetPointer(v, capsule_name)
+        else:
+            capsule_name_str = capsule_name.decode()
+            raise ValueError(
+                f"Can't convert PyCapsule with name '{capsule_name_str}' to pointer address"
+            )
     else:
         raise TypeError(f"Expected a pointer value, got {type(v)!r}")
     if not allow_null and c_ptr == NULL:

From 990e4756183cfa9e69236aeb0af7aeb5f70f1978 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 10 Feb 2024 08:29:55 +0900
Subject: [PATCH 349/570] GH-40009: [C++] Add missing "#include <algorithm>"
 (#40010)

### Rationale for this change

`std::find()` is defined in `<algorithm>`. If we don't include `<algorithm>` explicitly, g++-14 complains:

    cpp/src/arrow/filesystem/util_internal.cc: In function 'arrow::Result<std::__cxx11::basic_string<char> > arrow::fs::internal::PathFromUriHelper(const std::string&, std::vector<std::__cxx11::basic_string<char> >, bool, AuthorityHandlingBehavior)':
    cpp/src/arrow/filesystem/util_internal.cc:143:16: error: no matching function for call to 'find(std::vector<std::__cxx11::basic_string<char> >::iterator, std::vector<std::__cxx11::basic_string<char> >::iterator, const std::__cxx11::basic_string<char>&)'
      143 |   if (std::find(supported_schemes.begin(), supported_schemes.end(), scheme) ==
          |       ~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    /usr/include/c++/14/bits/streambuf_iterator.h:435:5: note: candidate: 'template<class _CharT2> typename __gnu_cxx::__enable_if<std::__is_char<_CharT2>::__value, std::istreambuf_iterator<_CharT, std::char_traits<_CharT> > >::__type std::find(istreambuf_iterator<_CharT, char_traits<_CharT> >, istreambuf_iterator<_CharT, char_traits<_CharT> >, const _CharT2&)'
      435 |     find(istreambuf_iterator<_CharT> __first,
          |     ^~~~
    /usr/include/c++/14/bits/streambuf_iterator.h:435:5: note:   template argument deduction/substitution failed:
    cpp/src/arrow/filesystem/util_internal.cc:143:16: note:   '__gnu_cxx::__normal_iterator<std::__cxx11::basic_string<char>*, std::vector<std::__cxx11::basic_string<char> > >' is not derived from 'std::istreambuf_iterator<_CharT, std::char_traits<_CharT> >'
      143 |   if (std::find(supported_schemes.begin(), supported_schemes.end(), scheme) ==
          |       ~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

### What changes are included in this PR?

Include `<algorithm>` explicitly.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #40009

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 cpp/src/arrow/filesystem/util_internal.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/src/arrow/filesystem/util_internal.cc b/cpp/src/arrow/filesystem/util_internal.cc
index 1ca5af27fc895..13f43d45db6c1 100644
--- a/cpp/src/arrow/filesystem/util_internal.cc
+++ b/cpp/src/arrow/filesystem/util_internal.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/filesystem/util_internal.h"
 
+#include <algorithm>
 #include <cerrno>
 
 #include "arrow/buffer.h"

From 0ce54b6a2ad7ee5dc48dc31eae4f10cb4cd2915d Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Fri, 9 Feb 2024 23:14:20 -0300
Subject: [PATCH 350/570] GH-38704: [C++] Implement Azure FileSystem Move() via
 Azure DataLake Storage Gen 2 API (#39904)

### Rationale for this change

We need to move directories and files via the `arrow::FileSystem` interface.

### What changes are included in this PR?

 - A few filesystem error reporting improvements
 - A helper class to deal with Azure Storage leases [1]
 - The `Move()` implementation that can move files and directories within the same container on storage accounts with Hierarchical Namespace Support enabled
 - Lots of tests

[1]: https://learn.microsoft.com/en-us/rest/api/storageservices/lease-blob

### Are these changes tested?

Yes, by existing and a huge number of tests added by this PR. The test code introduced here should be extracted to a reusable test module that we can use to test move in other file system implementations.

### Are there any user-facing changes?

No breaking changes, only new functionality.
* Closes: #38704

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc       | 725 +++++++++++++++++++++-
 cpp/src/arrow/filesystem/azurefs.h        |  19 +
 cpp/src/arrow/filesystem/azurefs_test.cc  | 499 ++++++++++++++-
 cpp/src/arrow/filesystem/util_internal.cc |  10 +
 cpp/src/arrow/filesystem/util_internal.h  |   6 +
 cpp/src/arrow/util/io_util.cc             |   7 +
 cpp/src/arrow/util/io_util.h              |   3 +
 7 files changed, 1216 insertions(+), 53 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 87b9822878cce..d4bb445701444 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -15,6 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <atomic>
+#include <chrono>
+#include <memory>
+#include <optional>
+
 #include "arrow/filesystem/azurefs.h"
 #include "arrow/filesystem/azurefs_internal.h"
 
@@ -283,18 +288,26 @@ struct AzureLocation {
 template <typename... PrefixArgs>
 Status ExceptionToStatus(const Storage::StorageException& exception,
                          PrefixArgs&&... prefix_args) {
-  return Status::IOError(std::forward<PrefixArgs>(prefix_args)...,
-                         " Azure Error: ", exception.what());
+  return Status::IOError(std::forward<PrefixArgs>(prefix_args)..., " Azure Error: [",
+                         exception.ErrorCode, "] ", exception.what());
 }
 
 Status PathNotFound(const AzureLocation& location) {
   return ::arrow::fs::internal::PathNotFound(location.all);
 }
 
+Status NotADir(const AzureLocation& location) {
+  return ::arrow::fs::internal::NotADir(location.all);
+}
+
 Status NotAFile(const AzureLocation& location) {
   return ::arrow::fs::internal::NotAFile(location.all);
 }
 
+Status NotEmpty(const AzureLocation& location) {
+  return ::arrow::fs::internal::NotEmpty(location.all);
+}
+
 Status ValidateFileLocation(const AzureLocation& location) {
   if (location.container.empty()) {
     return PathNotFound(location);
@@ -305,6 +318,23 @@ Status ValidateFileLocation(const AzureLocation& location) {
   return internal::AssertNoTrailingSlash(location.path);
 }
 
+Status InvalidDirMoveToSubdir(const AzureLocation& src, const AzureLocation& dest) {
+  return Status::Invalid("Cannot Move to '", dest.all, "' and make '", src.all,
+                         "' a sub-directory of itself.");
+}
+
+Status DestinationParentPathNotFound(const AzureLocation& dest) {
+  return Status::IOError("The parent directory of the destination path '", dest.all,
+                         "' does not exist.");
+}
+
+Status CrossContainerMoveNotImplemented(const AzureLocation& src,
+                                        const AzureLocation& dest) {
+  return Status::NotImplemented(
+      "Move of '", src.all, "' to '", dest.all,
+      "' requires moving data between containers, which is not implemented.");
+}
+
 bool IsContainerNotFound(const Storage::StorageException& e) {
   // In some situations, only the ReasonPhrase is set and the
   // ErrorCode is empty, so we check both.
@@ -942,6 +972,185 @@ FileInfo FileInfoFromBlob(std::string_view container,
   return info;
 }
 
+/// \brief RAII-style guard for releasing a lease on a blob or container.
+///
+/// The guard should be constructed right after a successful BlobLeaseClient::Acquire()
+/// call. Use std::optional<LeaseGuard> to declare a guard in outer scope and construct it
+/// later with std::optional::emplace(...).
+///
+/// Leases expire automatically, but explicit release means concurrent clients or
+/// ourselves when trying new operations on the same blob or container don't have
+/// to wait for the lease to expire by itself.
+///
+/// Learn more about leases at
+/// https://learn.microsoft.com/en-us/rest/api/storageservices/lease-blob
+class LeaseGuard {
+ public:
+  using SteadyClock = std::chrono::steady_clock;
+
+ private:
+  /// \brief The time when the lease expires or is broken.
+  ///
+  /// The lease is not guaranteed to be valid until this time, but it is guaranteed to
+  /// be expired after this time. In other words, this is an overestimation of
+  /// the true time_point.
+  SteadyClock::time_point break_or_expires_at_;
+  const std::unique_ptr<Blobs::BlobLeaseClient> lease_client_;
+  bool release_attempt_pending_ = true;
+
+  /// \brief The latest known expiry time of a lease guarded by this class
+  /// that failed to be released or was forgotten by calling Forget().
+  static std::atomic<SteadyClock::time_point> latest_known_expiry_time_;
+
+  /// \brief The maximum lease duration supported by Azure Storage.
+  static constexpr std::chrono::seconds kMaxLeaseDuration{60};
+
+ public:
+  LeaseGuard(std::unique_ptr<Blobs::BlobLeaseClient> lease_client,
+             std::chrono::seconds lease_duration)
+      : break_or_expires_at_(SteadyClock::now() +
+                             std::min(kMaxLeaseDuration, lease_duration)),
+        lease_client_(std::move(lease_client)) {
+    DCHECK(lease_duration <= kMaxLeaseDuration);
+    DCHECK(this->lease_client_);
+  }
+
+  ARROW_DISALLOW_COPY_AND_ASSIGN(LeaseGuard);
+
+  ~LeaseGuard() {
+    // No point in trying any error handling here other than the debug checking. The lease
+    // will eventually expire on the backend without any intervention from us (just much
+    // later than if we released it).
+    [[maybe_unused]] auto status = Release();
+    ARROW_LOG(DEBUG) << status;
+  }
+
+  bool PendingRelease() const {
+    return release_attempt_pending_ && SteadyClock::now() <= break_or_expires_at_;
+  }
+
+ private:
+  Status DoRelease() {
+    DCHECK(release_attempt_pending_);
+    try {
+      lease_client_->Release();
+    } catch (const Storage::StorageException& exception) {
+      return ExceptionToStatus(exception, "Failed to release the ",
+                               lease_client_->GetLeaseId(), " lease");
+    }
+    return Status::OK();
+  }
+
+ public:
+  std::string LeaseId() const { return lease_client_->GetLeaseId(); }
+
+  bool StillValidFor(SteadyClock::duration expected_time_left) const {
+    return SteadyClock::now() + expected_time_left < break_or_expires_at_;
+  }
+
+  /// \brief Break the lease.
+  ///
+  /// The lease will stay in the "Breaking" state for break_period seconds or
+  /// less if the lease is expiring before that.
+  ///
+  /// https://learn.microsoft.com/en-us/rest/api/storageservices/lease-container#outcomes-of-use-attempts-on-containers-by-lease-state
+  /// https://learn.microsoft.com/en-us/rest/api/storageservices/lease-blob#outcomes-of-use-attempts-on-blobs-by-lease-state
+  Status Break(Azure::Nullable<std::chrono::seconds> break_period = {}) {
+    auto remaining_time_ms = [this]() {
+      const auto remaining_time = break_or_expires_at_ - SteadyClock::now();
+      return std::chrono::duration_cast<std::chrono::milliseconds>(remaining_time);
+    };
+#ifndef NDEBUG
+    if (break_period.HasValue() && !StillValidFor(*break_period)) {
+      ARROW_LOG(WARNING)
+          << "Azure Storage: requested break_period ("
+          << break_period.ValueOr(std::chrono::seconds{0}).count()
+          << "s) is too long or lease duration is too short for all the operations "
+             "performed so far (lease expires in "
+          << remaining_time_ms().count() << "ms)";
+    }
+#endif
+    Blobs::BreakLeaseOptions options;
+    options.BreakPeriod = break_period;
+    try {
+      lease_client_->Break(options);
+      break_or_expires_at_ =
+          std::min(break_or_expires_at_,
+                   SteadyClock::now() + break_period.ValueOr(std::chrono::seconds{0}));
+    } catch (const Storage::StorageException& exception) {
+      return ExceptionToStatus(exception, "Failed to break the ",
+                               lease_client_->GetLeaseId(), " lease expiring in ",
+                               remaining_time_ms().count(), "ms");
+    }
+    return Status::OK();
+  }
+
+  /// \brief Break the lease before deleting or renaming the resource.
+  ///
+  /// Calling this is recommended when the resource for which the lease was acquired is
+  /// about to be deleted as there is no way of releasing the lease after that, we can
+  /// only forget about it. The break_period should be a conservative estimate of the time
+  /// it takes to delete/rename the resource.
+  ///
+  /// If break_period is too small, the delete/rename will fail with a lease conflict,
+  /// and if it's too large the only consequence is that a lease on a non-existent
+  /// resource will remain in the "Breaking" state for a while blocking others
+  /// from recreating the resource.
+  void BreakBeforeDeletion(std::chrono::seconds break_period) {
+    ARROW_CHECK_OK(Break(break_period));
+  }
+
+  // These functions are marked ARROW_NOINLINE because they are called from
+  // multiple locations, but are not performance-critical.
+
+  ARROW_NOINLINE Status Release() {
+    if (!PendingRelease()) {
+      return Status::OK();
+    }
+    auto status = DoRelease();
+    if (!status.ok()) {
+      Forget();
+      return status;
+    }
+    release_attempt_pending_ = false;
+    return Status::OK();
+  }
+
+  /// \brief Prevent any release attempts in the destructor.
+  ///
+  /// When it's known they would certainly fail.
+  /// \see LeaseGuard::BreakBeforeDeletion()
+  ARROW_NOINLINE void Forget() {
+    if (!PendingRelease()) {
+      release_attempt_pending_ = false;
+      return;
+    }
+    release_attempt_pending_ = false;
+    // Remember the latest known expiry time so we can gracefully handle lease
+    // acquisition failures by waiting until the latest forgotten lease.
+    auto latest = latest_known_expiry_time_.load(std::memory_order_relaxed);
+    while (
+        latest < break_or_expires_at_ &&
+        !latest_known_expiry_time_.compare_exchange_weak(latest, break_or_expires_at_)) {
+    }
+    DCHECK_GE(latest_known_expiry_time_.load(), break_or_expires_at_);
+  }
+
+  ARROW_NOINLINE static void WaitUntilLatestKnownExpiryTime() {
+    auto remaining_time = latest_known_expiry_time_.load() - SteadyClock::now();
+#ifndef NDEBUG
+    int64_t remaining_time_ms =
+        std::chrono::duration_cast<std::chrono::milliseconds>(remaining_time).count();
+    ARROW_LOG(WARNING) << "LeaseGuard::WaitUntilLatestKnownExpiryTime for "
+                       << remaining_time_ms << "ms...";
+#endif
+    DCHECK(remaining_time <= kMaxLeaseDuration);
+    if (remaining_time > SteadyClock::duration::zero()) {
+      std::this_thread::sleep_for(remaining_time);
+    }
+  }
+};
+
 }  // namespace
 
 class AzureFileSystem::Impl {
@@ -975,6 +1184,11 @@ class AzureFileSystem::Impl {
     return blob_service_client_->GetBlobContainerClient(container_name);
   }
 
+  Blobs::BlobClient GetBlobClient(const std::string& container_name,
+                                  const std::string& blob_name) {
+    return GetBlobContainerClient(container_name).GetBlobClient(blob_name);
+  }
+
   /// \param container_name Also known as "filesystem" in the ADLS Gen2 API.
   DataLake::DataLakeFileSystemClient GetFileSystemClient(
       const std::string& container_name) {
@@ -1030,11 +1244,14 @@ class AzureFileSystem::Impl {
 
   /// \pre location.path is not empty.
   Result<FileInfo> GetFileInfo(const DataLake::DataLakeFileSystemClient& adlfs_client,
-                               const AzureLocation& location) {
+                               const AzureLocation& location,
+                               Azure::Nullable<std::string> lease_id = {}) {
     auto file_client = adlfs_client.GetFileClient(location.path);
+    DataLake::GetPathPropertiesOptions options;
+    options.AccessConditions.LeaseId = std::move(lease_id);
     try {
       FileInfo info{location.all};
-      auto properties = file_client.GetProperties();
+      auto properties = file_client.GetProperties(options);
       if (properties.Value.IsDirectory) {
         info.set_type(FileType::Directory);
       } else if (internal::HasTrailingSlash(location.path)) {
@@ -1115,6 +1332,22 @@ class AzureFileSystem::Impl {
     }
   }
 
+  Result<FileInfo> GetFileInfoOfPathWithinContainer(const AzureLocation& location) {
+    DCHECK(!location.container.empty() && !location.path.empty());
+    // There is a path to search within the container. Check HNS support to proceed.
+    auto adlfs_client = GetFileSystemClient(location.container);
+    ARROW_ASSIGN_OR_RAISE(auto hns_support, HierarchicalNamespaceSupport(adlfs_client));
+    if (hns_support == HNSSupport::kContainerNotFound) {
+      return FileInfo{location.all, FileType::NotFound};
+    }
+    if (hns_support == HNSSupport::kEnabled) {
+      return GetFileInfo(adlfs_client, location);
+    }
+    DCHECK_EQ(hns_support, HNSSupport::kDisabled);
+    auto container_client = GetBlobContainerClient(location.container);
+    return GetFileInfo(container_client, location);
+  }
+
  private:
   /// \pref location.container is not empty.
   template <typename ContainerClient>
@@ -1320,8 +1553,7 @@ class AzureFileSystem::Impl {
                                                          AzureFileSystem* fs) {
     RETURN_NOT_OK(ValidateFileLocation(location));
     auto blob_client = std::make_shared<Blobs::BlobClient>(
-        blob_service_client_->GetBlobContainerClient(location.container)
-            .GetBlobClient(location.path));
+        GetBlobClient(location.container, location.path));
 
     auto ptr = std::make_shared<ObjectInputFile>(blob_client, fs->io_context(),
                                                  std::move(location));
@@ -1340,8 +1572,7 @@ class AzureFileSystem::Impl {
     ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(info.path()));
     RETURN_NOT_OK(ValidateFileLocation(location));
     auto blob_client = std::make_shared<Blobs::BlobClient>(
-        blob_service_client_->GetBlobContainerClient(location.container)
-            .GetBlobClient(location.path));
+        GetBlobClient(location.container, location.path));
 
     auto ptr = std::make_shared<ObjectInputFile>(blob_client, fs->io_context(),
                                                  std::move(location), info.size());
@@ -1625,13 +1856,16 @@ class AzureFileSystem::Impl {
   /// \pre location.path is not empty.
   Status DeleteDirOnFileSystem(const DataLake::DataLakeFileSystemClient& adlfs_client,
                                const AzureLocation& location, bool recursive,
-                               bool require_dir_to_exist) {
+                               bool require_dir_to_exist,
+                               Azure::Nullable<std::string> lease_id = {}) {
     DCHECK(!location.container.empty());
     DCHECK(!location.path.empty());
     auto directory_client = adlfs_client.GetDirectoryClient(location.path);
+    DataLake::DeleteDirectoryOptions options;
+    options.AccessConditions.LeaseId = std::move(lease_id);
     try {
-      auto response =
-          recursive ? directory_client.DeleteRecursive() : directory_client.DeleteEmpty();
+      auto response = recursive ? directory_client.DeleteRecursive(options)
+                                : directory_client.DeleteEmpty(options);
       // Only the "*IfExists" functions ever set Deleted to false.
       // All the others either succeed or throw an exception.
       DCHECK(response.Value.Deleted);
@@ -1710,17 +1944,440 @@ class AzureFileSystem::Impl {
     return Status::OK();
   }
 
+ private:
+  /// \brief Create a BlobLeaseClient and acquire a lease on the container.
+  ///
+  /// \param allow_missing_container if true, a nullptr may be returned when the container
+  /// doesn't exist, otherwise a PathNotFound(location) error is produced right away
+  /// \return A BlobLeaseClient is wrapped as a unique_ptr so it's moveable and
+  /// optional (nullptr denotes container not found)
+  Result<std::unique_ptr<Blobs::BlobLeaseClient>> AcquireContainerLease(
+      const AzureLocation& location, std::chrono::seconds lease_duration,
+      bool allow_missing_container = false, bool retry_allowed = true) {
+    DCHECK(!location.container.empty());
+    auto container_client = GetBlobContainerClient(location.container);
+    auto lease_id = Blobs::BlobLeaseClient::CreateUniqueLeaseId();
+    auto container_url = container_client.GetUrl();
+    auto lease_client = std::make_unique<Blobs::BlobLeaseClient>(
+        std::move(container_client), std::move(lease_id));
+    try {
+      [[maybe_unused]] auto result = lease_client->Acquire(lease_duration);
+      DCHECK_EQ(result.Value.LeaseId, lease_client->GetLeaseId());
+    } catch (const Storage::StorageException& exception) {
+      if (IsContainerNotFound(exception)) {
+        if (allow_missing_container) {
+          return nullptr;
+        }
+        return PathNotFound(location);
+      } else if (exception.StatusCode == Http::HttpStatusCode::Conflict &&
+                 exception.ErrorCode == "LeaseAlreadyPresent") {
+        if (retry_allowed) {
+          LeaseGuard::WaitUntilLatestKnownExpiryTime();
+          return AcquireContainerLease(location, lease_duration, allow_missing_container,
+                                       /*retry_allowed=*/false);
+        }
+      }
+      return ExceptionToStatus(exception, "Failed to acquire a lease on container '",
+                               location.container, "': ", container_url);
+    }
+    return lease_client;
+  }
+
+  /// \brief Create a BlobLeaseClient and acquire a lease on a blob/file (or
+  /// directory if Hierarchical Namespace is supported).
+  ///
+  /// \param allow_missing if true, a nullptr may be returned when the blob
+  /// doesn't exist, otherwise a PathNotFound(location) error is produced right away
+  /// \return A BlobLeaseClient is wrapped as a unique_ptr so it's moveable and
+  /// optional (nullptr denotes blob not found)
+  Result<std::unique_ptr<Blobs::BlobLeaseClient>> AcquireBlobLease(
+      const AzureLocation& location, std::chrono::seconds lease_duration,
+      bool allow_missing = false, bool retry_allowed = true) {
+    DCHECK(!location.container.empty() && !location.path.empty());
+    auto path = std::string{internal::RemoveTrailingSlash(location.path)};
+    auto blob_client = GetBlobClient(location.container, std::move(path));
+    auto lease_id = Blobs::BlobLeaseClient::CreateUniqueLeaseId();
+    auto blob_url = blob_client.GetUrl();
+    auto lease_client = std::make_unique<Blobs::BlobLeaseClient>(std::move(blob_client),
+                                                                 std::move(lease_id));
+    try {
+      [[maybe_unused]] auto result = lease_client->Acquire(lease_duration);
+      DCHECK_EQ(result.Value.LeaseId, lease_client->GetLeaseId());
+    } catch (const Storage::StorageException& exception) {
+      if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
+        if (allow_missing) {
+          return nullptr;
+        }
+        return PathNotFound(location);
+      } else if (exception.StatusCode == Http::HttpStatusCode::Conflict &&
+                 exception.ErrorCode == "LeaseAlreadyPresent") {
+        if (retry_allowed) {
+          LeaseGuard::WaitUntilLatestKnownExpiryTime();
+          return AcquireBlobLease(location, lease_duration, allow_missing,
+                                  /*retry_allowed=*/false);
+        }
+      }
+      return ExceptionToStatus(exception, "Failed to acquire a lease on file '",
+                               location.all, "': ", blob_url);
+    }
+    return lease_client;
+  }
+
+  /// \brief The default lease duration used for acquiring a lease on a container or blob.
+  ///
+  /// Azure Storage leases can be acquired for a duration of 15 to 60 seconds.
+  ///
+  /// Operations consisting of an unpredictable number of sub-operations should
+  /// renew the lease periodically (heartbeat pattern) instead of acquiring an
+  /// infinite lease (very bad idea for a library like Arrow).
+  static constexpr auto kLeaseDuration = std::chrono::seconds{15};
+
+  // These are conservative estimates of how long it takes for the client
+  // request to reach the server counting from the moment the Azure SDK function
+  // issuing the request is called. See their usage for more context.
+  //
+  // If the client connection to the server is unpredictably slow, operations
+  // may fail, but due to the use of leases, the entire arrow::FileSystem
+  // operation can be retried without risk of data loss. Thus, unexpected network
+  // slow downs can be fixed with retries (either by some system using Arrow or
+  // an user interactively retrying a failed operation).
+  //
+  // If a network is permanently slow, the lease time and these numbers should be
+  // increased but not so much so that the client gives up an operation because the
+  // values say it takes more time to reach the server than the remaining lease
+  // time on the resources.
+  //
+  // NOTE: The initial constant values were chosen conservatively. If we learn,
+  // from experience, that they are causing issues, we can increase them. And if
+  // broadly applicable values aren't possible, we can make them configurable.
+  static constexpr auto kTimeNeededForContainerDeletion = std::chrono::seconds{3};
+  static constexpr auto kTimeNeededForContainerRename = std::chrono::seconds{3};
+  static constexpr auto kTimeNeededForFileOrDirectoryRename = std::chrono::seconds{3};
+
+ public:
+  /// The conditions for a successful container rename are derived from the
+  /// conditions for a successful `Move("/$src.container", "/$dest.container")`.
+  /// The numbers here match the list in `Move`.
+  ///
+  /// 1. `src.container` must exist.
+  /// 2. If `src.container` and `dest.container` are the same, do nothing and
+  ///    return OK.
+  /// 3. N/A.
+  /// 4. N/A.
+  /// 5. `dest.container` doesn't exist or is empty.
+  ///    NOTE: one exception related to container Move is that when the
+  ///    destination is empty we also require the source container to be empty,
+  ///    because the only way to perform the "Move" is by deleting the source
+  ///    instead of deleting the destination and renaming the source.
+  Status RenameContainer(const AzureLocation& src, const AzureLocation& dest) {
+    DCHECK(!src.container.empty() && src.path.empty());
+    DCHECK(!dest.container.empty() && dest.path.empty());
+    auto src_container_client = GetBlobContainerClient(src.container);
+
+    // If src and dest are the same, we only have to check src exists.
+    if (src.container == dest.container) {
+      ARROW_ASSIGN_OR_RAISE(auto info,
+                            GetContainerPropsAsFileInfo(src, src_container_client));
+      if (info.type() == FileType::NotFound) {
+        return PathNotFound(src);
+      }
+      DCHECK(info.type() == FileType::Directory);
+      return Status::OK();
+    }
+
+    // Acquire a lease on the source container because (1) we need the lease
+    // before rename and (2) it works as a way of checking the container exists.
+    ARROW_ASSIGN_OR_RAISE(auto src_lease_client,
+                          AcquireContainerLease(src, kLeaseDuration));
+    LeaseGuard src_lease_guard{std::move(src_lease_client), kLeaseDuration};
+    // Check dest.container doesn't exist or is empty.
+    auto dest_container_client = GetBlobContainerClient(dest.container);
+    std::optional<LeaseGuard> dest_lease_guard;
+    bool dest_exists = false;
+    bool dest_is_empty = false;
+    ARROW_ASSIGN_OR_RAISE(
+        auto dest_lease_client,
+        AcquireContainerLease(dest, kLeaseDuration, /*allow_missing_container*/ true));
+    if (dest_lease_client) {
+      dest_lease_guard.emplace(std::move(dest_lease_client), kLeaseDuration);
+      dest_exists = true;
+      // Emptiness check after successful acquisition of the lease.
+      Blobs::ListBlobsOptions list_blobs_options;
+      list_blobs_options.PageSizeHint = 1;
+      try {
+        auto dest_list_response = dest_container_client.ListBlobs(list_blobs_options);
+        dest_is_empty = dest_list_response.Blobs.empty();
+        if (!dest_is_empty) {
+          return NotEmpty(dest);
+        }
+      } catch (const Storage::StorageException& exception) {
+        return ExceptionToStatus(exception, "Failed to check that '", dest.container,
+                                 "' is empty: ", dest_container_client.GetUrl());
+      }
+    }
+    DCHECK(!dest_exists || dest_is_empty);
+
+    if (!dest_exists) {
+      // Rename the source container.
+      Blobs::RenameBlobContainerOptions options;
+      options.SourceAccessConditions.LeaseId = src_lease_guard.LeaseId();
+      try {
+        src_lease_guard.BreakBeforeDeletion(kTimeNeededForContainerRename);
+        blob_service_client_->RenameBlobContainer(src.container, dest.container, options);
+        src_lease_guard.Forget();
+      } catch (const Storage::StorageException& exception) {
+        if (exception.StatusCode == Http::HttpStatusCode::BadRequest &&
+            exception.ErrorCode == "InvalidQueryParameterValue") {
+          auto param_name = exception.AdditionalInformation.find("QueryParameterName");
+          if (param_name != exception.AdditionalInformation.end() &&
+              param_name->second == "comp") {
+            return ExceptionToStatus(
+                exception, "The 'rename' operation is not supported on containers. ",
+                "Attempting a rename of '", src.container, "' to '", dest.container,
+                "': ", blob_service_client_->GetUrl());
+          }
+        }
+        return ExceptionToStatus(exception, "Failed to rename container '", src.container,
+                                 "' to '", dest.container,
+                                 "': ", blob_service_client_->GetUrl());
+      }
+    } else if (dest_is_empty) {
+      // Even if we deleted the empty dest.container, RenameBlobContainer() would still
+      // fail because containers are not immediately deleted by the service -- they are
+      // deleted asynchronously based on retention policies and non-deterministic behavior
+      // of the garbage collection process.
+      //
+      // One way to still match POSIX rename semantics is to delete the src.container if
+      // and only if it's empty because the final state would be equivalent to replacing
+      // the dest.container with the src.container and its contents (which happens
+      // to also be empty).
+      Blobs::ListBlobsOptions list_blobs_options;
+      list_blobs_options.PageSizeHint = 1;
+      try {
+        auto src_list_response = src_container_client.ListBlobs(list_blobs_options);
+        if (!src_list_response.Blobs.empty()) {
+          // Reminder: dest is used here because we're semantically replacing dest
+          // with src. By deleting src if it's empty just like dest.
+          return Status::IOError("Unable to replace empty container: '", dest.all, "'");
+        }
+        // Delete the source container now that we know it's empty.
+        Blobs::DeleteBlobContainerOptions options;
+        options.AccessConditions.LeaseId = src_lease_guard.LeaseId();
+        DCHECK(dest_lease_guard.has_value());
+        // Make sure lease on dest is still valid before deleting src. This is to ensure
+        // the destination container is not deleted by another process/client before
+        // Move() returns.
+        if (!dest_lease_guard->StillValidFor(kTimeNeededForContainerDeletion)) {
+          return Status::IOError("Unable to replace empty container: '", dest.all, "'. ",
+                                 "Preparation for the operation took too long and a "
+                                 "container lease expired.");
+        }
+        try {
+          src_lease_guard.BreakBeforeDeletion(kTimeNeededForContainerDeletion);
+          src_container_client.Delete(options);
+          src_lease_guard.Forget();
+        } catch (const Storage::StorageException& exception) {
+          return ExceptionToStatus(exception, "Failed to delete empty container: '",
+                                   src.container, "': ", src_container_client.GetUrl());
+        }
+      } catch (const Storage::StorageException& exception) {
+        return ExceptionToStatus(exception, "Unable to replace empty container: '",
+                                 dest.all, "': ", dest_container_client.GetUrl());
+      }
+    }
+    return Status::OK();
+  }
+
+  Status MoveContainerToPath(const AzureLocation& src, const AzureLocation& dest) {
+    DCHECK(!src.container.empty() && src.path.empty());
+    DCHECK(!dest.container.empty() && !dest.path.empty());
+    // Check Move pre-condition 1 -- `src` must exist.
+    auto container_client = GetBlobContainerClient(src.container);
+    ARROW_ASSIGN_OR_RAISE(auto src_info,
+                          GetContainerPropsAsFileInfo(src, container_client));
+    if (src_info.type() == FileType::NotFound) {
+      return PathNotFound(src);
+    }
+    // Check Move pre-condition 2.
+    if (src.container == dest.container) {
+      return InvalidDirMoveToSubdir(src, dest);
+    }
+    // Instead of checking more pre-conditions, we just abort with a
+    // NotImplemented status.
+    return CrossContainerMoveNotImplemented(src, dest);
+  }
+
+  Status CreateContainerFromPath(const AzureLocation& src, const AzureLocation& dest) {
+    DCHECK(!src.container.empty() && !src.path.empty());
+    DCHECK(!dest.empty() && dest.path.empty());
+    ARROW_ASSIGN_OR_RAISE(auto src_file_info, GetFileInfoOfPathWithinContainer(src));
+    switch (src_file_info.type()) {
+      case FileType::Unknown:
+      case FileType::NotFound:
+        return PathNotFound(src);
+      case FileType::File:
+        return Status::Invalid(
+            "Creating files at '/' is not possible, only directories.");
+      case FileType::Directory:
+        break;
+    }
+    if (src.container == dest.container) {
+      return InvalidDirMoveToSubdir(src, dest);
+    }
+    return CrossContainerMoveNotImplemented(src, dest);
+  }
+
+  Status MovePathWithDataLakeAPI(
+      const DataLake::DataLakeFileSystemClient& src_adlfs_client,
+      const AzureLocation& src, const AzureLocation& dest) {
+    DCHECK(!src.container.empty() && !src.path.empty());
+    DCHECK(!dest.container.empty() && !dest.path.empty());
+    const auto src_path = std::string{internal::RemoveTrailingSlash(src.path)};
+    const auto dest_path = std::string{internal::RemoveTrailingSlash(dest.path)};
+
+    // Ensure that src exists and, if path has a trailing slash, that it's a directory.
+    ARROW_ASSIGN_OR_RAISE(auto src_lease_client, AcquireBlobLease(src, kLeaseDuration));
+    LeaseGuard src_lease_guard{std::move(src_lease_client), kLeaseDuration};
+    // It might be necessary to check src is a directory 0-3 times in this function,
+    // so we use a lazy evaluation function to avoid redundant calls to GetFileInfo().
+    std::optional<bool> src_is_dir_opt{};
+    auto src_is_dir_lazy = [&]() -> Result<bool> {
+      if (src_is_dir_opt.has_value()) {
+        return *src_is_dir_opt;
+      }
+      ARROW_ASSIGN_OR_RAISE(
+          auto src_info, GetFileInfo(src_adlfs_client, src, src_lease_guard.LeaseId()));
+      src_is_dir_opt = src_info.type() == FileType::Directory;
+      return *src_is_dir_opt;
+    };
+    // src must be a directory if it has a trailing slash.
+    if (internal::HasTrailingSlash(src.path)) {
+      ARROW_ASSIGN_OR_RAISE(auto src_is_dir, src_is_dir_lazy());
+      if (!src_is_dir) {
+        return NotADir(src);
+      }
+    }
+    // The Azure SDK and the backend don't perform many important checks, so we have to
+    // do them ourselves. Additionally, based on many tests on a default-configuration
+    // storage account, if the destination is an empty directory, the rename operation
+    // will most likely fail due to a timeout. Providing both leases -- to source and
+    // destination -- seems to have made things work.
+    ARROW_ASSIGN_OR_RAISE(auto dest_lease_client,
+                          AcquireBlobLease(dest, kLeaseDuration, /*allow_missing=*/true));
+    std::optional<LeaseGuard> dest_lease_guard;
+    if (dest_lease_client) {
+      dest_lease_guard.emplace(std::move(dest_lease_client), kLeaseDuration);
+      // Perform all the checks on dest (and src) before proceeding with the rename.
+      auto dest_adlfs_client = GetFileSystemClient(dest.container);
+      ARROW_ASSIGN_OR_RAISE(auto dest_info, GetFileInfo(dest_adlfs_client, dest,
+                                                        dest_lease_guard->LeaseId()));
+      if (dest_info.type() == FileType::Directory) {
+        ARROW_ASSIGN_OR_RAISE(auto src_is_dir, src_is_dir_lazy());
+        if (!src_is_dir) {
+          // If src is a regular file, complain that dest is a directory
+          // like POSIX rename() does.
+          return internal::IsADir(dest.all);
+        }
+      } else {
+        if (internal::HasTrailingSlash(dest.path)) {
+          return NotADir(dest);
+        }
+      }
+    } else {
+      // If the destination has trailing slash, we would have to check that it's a
+      // directory, but since it doesn't exist we must return PathNotFound...
+      if (internal::HasTrailingSlash(dest.path)) {
+        // ...unless the src is a directory, in which case we can proceed with the rename.
+        ARROW_ASSIGN_OR_RAISE(auto src_is_dir, src_is_dir_lazy());
+        if (!src_is_dir) {
+          return PathNotFound(dest);
+        }
+      }
+    }
+
+    try {
+      // NOTE: The Azure SDK provides a RenameDirectory() function, but the
+      // implementation is the same as RenameFile() with the only difference being
+      // the type of the returned object (DataLakeDirectoryClient vs DataLakeFileClient).
+      //
+      // If we call RenameDirectory() and the source is a file, no error would
+      // be returned and the file would be renamed just fine (!).
+      //
+      // Since we don't use the returned object, we can just use RenameFile() for both
+      // files and directories. Ideally, the SDK would simply expose the PathClient
+      // that it uses internally for both files and directories.
+      DataLake::RenameFileOptions options;
+      options.DestinationFileSystem = dest.container;
+      options.SourceAccessConditions.LeaseId = src_lease_guard.LeaseId();
+      if (dest_lease_guard.has_value()) {
+        options.AccessConditions.LeaseId = dest_lease_guard->LeaseId();
+      }
+      src_lease_guard.BreakBeforeDeletion(kTimeNeededForFileOrDirectoryRename);
+      src_adlfs_client.RenameFile(src_path, dest_path, options);
+      src_lease_guard.Forget();
+    } catch (const Storage::StorageException& exception) {
+      // https://learn.microsoft.com/en-gb/rest/api/storageservices/datalakestoragegen2/path/create
+      if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
+        if (exception.ErrorCode == "PathNotFound") {
+          return PathNotFound(src);
+        }
+        // "FilesystemNotFound" could be triggered by the source or destination filesystem
+        // not existing, but since we already checked the source filesystem exists (and
+        // hold a lease to it), we can assume the destination filesystem is the one the
+        // doesn't exist.
+        if (exception.ErrorCode == "FilesystemNotFound" ||
+            exception.ErrorCode == "RenameDestinationParentPathNotFound") {
+          return DestinationParentPathNotFound(dest);
+        }
+      } else if (exception.StatusCode == Http::HttpStatusCode::Conflict &&
+                 exception.ErrorCode == "PathAlreadyExists") {
+        // "PathAlreadyExists" is only produced when the destination exists and is a
+        // non-empty directory, so we produce the appropriate error.
+        return NotEmpty(dest);
+      }
+      return ExceptionToStatus(exception, "Failed to rename '", src.all, "' to '",
+                               dest.all, "': ", src_adlfs_client.GetUrl());
+    }
+    return Status::OK();
+  }
+
+  Status MovePathUsingBlobsAPI(const AzureLocation& src, const AzureLocation& dest) {
+    DCHECK(!src.container.empty() && !src.path.empty());
+    DCHECK(!dest.container.empty() && !dest.path.empty());
+    if (src.container != dest.container) {
+      ARROW_ASSIGN_OR_RAISE(auto src_file_info, GetFileInfoOfPathWithinContainer(src));
+      if (src_file_info.type() == FileType::NotFound) {
+        return PathNotFound(src);
+      }
+      return CrossContainerMoveNotImplemented(src, dest);
+    }
+    return Status::NotImplemented("The Azure FileSystem is not fully implemented");
+  }
+
+  Status MovePath(const AzureLocation& src, const AzureLocation& dest) {
+    DCHECK(!src.container.empty() && !src.path.empty());
+    DCHECK(!dest.container.empty() && !dest.path.empty());
+    auto src_adlfs_client = GetFileSystemClient(src.container);
+    ARROW_ASSIGN_OR_RAISE(auto hns_support,
+                          HierarchicalNamespaceSupport(src_adlfs_client));
+    if (hns_support == HNSSupport::kContainerNotFound) {
+      return PathNotFound(src);
+    }
+    if (hns_support == HNSSupport::kEnabled) {
+      return MovePathWithDataLakeAPI(src_adlfs_client, src, dest);
+    }
+    DCHECK_EQ(hns_support, HNSSupport::kDisabled);
+    return MovePathUsingBlobsAPI(src, dest);
+  }
+
   Status CopyFile(const AzureLocation& src, const AzureLocation& dest) {
     RETURN_NOT_OK(ValidateFileLocation(src));
     RETURN_NOT_OK(ValidateFileLocation(dest));
     if (src == dest) {
       return Status::OK();
     }
-    auto dest_blob_client = blob_service_client_->GetBlobContainerClient(dest.container)
-                                .GetBlobClient(dest.path);
-    auto src_url = blob_service_client_->GetBlobContainerClient(src.container)
-                       .GetBlobClient(src.path)
-                       .GetUrl();
+    auto dest_blob_client = GetBlobClient(dest.container, dest.path);
+    auto src_url = GetBlobClient(src.container, src.path).GetUrl();
     try {
       dest_blob_client.CopyFromUri(src_url);
     } catch (const Storage::StorageException& exception) {
@@ -1731,6 +2388,9 @@ class AzureFileSystem::Impl {
   }
 };
 
+std::atomic<LeaseGuard::SteadyClock::time_point> LeaseGuard::latest_known_expiry_time_ =
+    SteadyClock::time_point{SteadyClock::duration::zero()};
+
 AzureFileSystem::AzureFileSystem(std::unique_ptr<Impl>&& impl)
     : FileSystem(impl->io_context()), impl_(std::move(impl)) {
   default_async_is_sync_ = false;
@@ -1772,19 +2432,7 @@ Result<FileInfo> AzureFileSystem::GetFileInfo(const std::string& path) {
     auto container_client = impl_->GetBlobContainerClient(location.container);
     return GetContainerPropsAsFileInfo(location, container_client);
   }
-  // There is a path to search within the container. Check HNS support to proceed.
-  auto adlfs_client = impl_->GetFileSystemClient(location.container);
-  ARROW_ASSIGN_OR_RAISE(auto hns_support,
-                        impl_->HierarchicalNamespaceSupport(adlfs_client));
-  if (hns_support == HNSSupport::kContainerNotFound) {
-    return FileInfo{location.all, FileType::NotFound};
-  }
-  if (hns_support == HNSSupport::kEnabled) {
-    return impl_->GetFileInfo(adlfs_client, location);
-  }
-  DCHECK_EQ(hns_support, HNSSupport::kDisabled);
-  auto container_client = impl_->GetBlobContainerClient(location.container);
-  return impl_->GetFileInfo(container_client, location);
+  return impl_->GetFileInfoOfPathWithinContainer(location);
 }
 
 Result<FileInfoVector> AzureFileSystem::GetFileInfo(const FileSelector& select) {
@@ -1900,7 +2548,24 @@ Status AzureFileSystem::DeleteFile(const std::string& path) {
 }
 
 Status AzureFileSystem::Move(const std::string& src, const std::string& dest) {
-  return Status::NotImplemented("The Azure FileSystem is not fully implemented");
+  ARROW_ASSIGN_OR_RAISE(auto src_location, AzureLocation::FromString(src));
+  ARROW_ASSIGN_OR_RAISE(auto dest_location, AzureLocation::FromString(dest));
+  if (src_location.container.empty()) {
+    return Status::Invalid("Move requires a non-empty source path.");
+  }
+  if (dest_location.container.empty()) {
+    return Status::Invalid("Move requires a non-empty destination path.");
+  }
+  if (src_location.path.empty()) {
+    if (dest_location.path.empty()) {
+      return impl_->RenameContainer(src_location, dest_location);
+    }
+    return impl_->MoveContainerToPath(src_location, dest_location);
+  }
+  if (dest_location.path.empty()) {
+    return impl_->CreateContainerFromPath(src_location, dest_location);
+  }
+  return impl_->MovePath(src_location, dest_location);
 }
 
 Status AzureFileSystem::CopyFile(const std::string& src, const std::string& dest) {
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 55f89ba4776e2..2a131e40c05bf 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -210,6 +210,25 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {
 
   Status DeleteFile(const std::string& path) override;
 
+  /// \brief Move / rename a file or directory.
+  ///
+  /// There are no files immediately at the root directory, so paths like
+  /// "/segment" always refer to a container of the storage account and are
+  /// treated as directories.
+  ///
+  /// If `dest` exists but the operation fails for some reason, `Move`
+  /// guarantees `dest` is not lost.
+  ///
+  /// Conditions for a successful move:
+  /// 1. `src` must exist.
+  /// 2. `dest` can't contain a strict path prefix of `src`. More generally,
+  ///    a directory can't be made a subdirectory of itself.
+  /// 3. If `dest` already exists and it's a file, `src` must also be a file.
+  ///    `dest` is then replaced by `src`.
+  /// 4. All components of `dest` must exist, except for the last.
+  /// 5. If `dest` already exists and it's a directory, `src` must also be a
+  ///    directory and `dest` must be empty. `dest` is then replaced by `src`
+  ///    and its contents.
   Status Move(const std::string& src, const std::string& dest) override;
 
   Status CopyFile(const std::string& src, const std::string& dest) override;
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 4d123028ea86e..c39a5b7d22bdd 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -50,12 +50,15 @@
 #include "arrow/filesystem/path_util.h"
 #include "arrow/filesystem/test_util.h"
 #include "arrow/result.h"
+#include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/pcg_random.h"
 #include "arrow/util/string.h"
+#include "arrow/util/unreachable.h"
 #include "arrow/util/value_parsing.h"
 
 namespace arrow {
@@ -335,7 +338,7 @@ TEST(AzureFileSystem, OptionsCompare) {
 
 struct PreexistingData {
  public:
-  using RNG = std::mt19937_64;
+  using RNG = random::pcg32_fast;
 
  public:
   const std::string container_name;
@@ -354,7 +357,10 @@ culpa qui officia deserunt mollit anim id est laborum.
   explicit PreexistingData(RNG& rng) : container_name{RandomContainerName(rng)} {}
 
   // Creates a path by concatenating the container name and the stem.
-  std::string ContainerPath(std::string_view stem) const {
+  std::string ContainerPath(std::string_view stem) const { return Path(stem); }
+
+  // Short alias to ContainerPath()
+  std::string Path(std::string_view stem) const {
     return ConcatAbstractPath(container_name, stem);
   }
 
@@ -391,7 +397,7 @@ culpa qui officia deserunt mollit anim id est laborum.
 class TestAzureFileSystem : public ::testing::Test {
  protected:
   // Set in constructor
-  std::mt19937_64 rng_;
+  random::pcg32_fast rng_;
 
   // Set in SetUp()
   int64_t debug_log_start_ = 0;
@@ -477,18 +483,41 @@ class TestAzureFileSystem : public ::testing::Test {
 
   Blobs::BlobContainerClient CreateContainer(const std::string& name) {
     auto container_client = blob_service_client_->GetBlobContainerClient(name);
-    (void)container_client.CreateIfNotExists();
+    ARROW_UNUSED(container_client.CreateIfNotExists());
     return container_client;
   }
 
+  DataLake::DataLakeFileSystemClient CreateFilesystem(const std::string& name) {
+    auto adlfs_client = datalake_service_client_->GetFileSystemClient(name);
+    ARROW_UNUSED(adlfs_client.CreateIfNotExists());
+    return adlfs_client;
+  }
+
   Blobs::BlobClient CreateBlob(Blobs::BlobContainerClient& container_client,
                                const std::string& name, const std::string& data = "") {
     auto blob_client = container_client.GetBlockBlobClient(name);
-    (void)blob_client.UploadFrom(reinterpret_cast<const uint8_t*>(data.data()),
-                                 data.size());
+    ARROW_UNUSED(blob_client.UploadFrom(reinterpret_cast<const uint8_t*>(data.data()),
+                                        data.size()));
     return blob_client;
   }
 
+  DataLake::DataLakeFileClient CreateFile(
+      DataLake::DataLakeFileSystemClient& filesystem_client, const std::string& name,
+      const std::string& data = "") {
+    auto file_client = filesystem_client.GetFileClient(name);
+    ARROW_UNUSED(file_client.UploadFrom(reinterpret_cast<const uint8_t*>(data.data()),
+                                        data.size()));
+    return file_client;
+  }
+
+  DataLake::DataLakeDirectoryClient CreateDirectory(
+      DataLake::DataLakeFileSystemClient& adlfs_client, const std::string& name) {
+    EXPECT_TRUE(WithHierarchicalNamespace());
+    auto dir_client = adlfs_client.GetDirectoryClient(name);
+    dir_client.Create();
+    return dir_client;
+  }
+
   Blobs::Models::BlobProperties GetBlobProperties(const std::string& container_name,
                                                   const std::string& blob_name) {
     return blob_service_client_->GetBlobContainerClient(container_name)
@@ -507,8 +536,13 @@ class TestAzureFileSystem : public ::testing::Test {
 
   PreexistingData SetUpPreexistingData() {
     PreexistingData data(rng_);
-    auto container_client = CreateContainer(data.container_name);
-    CreateBlob(container_client, data.kObjectName, PreexistingData::kLoremIpsum);
+    if (WithHierarchicalNamespace()) {
+      auto filesystem_client = CreateFilesystem(data.container_name);
+      CreateFile(filesystem_client, data.kObjectName, PreexistingData::kLoremIpsum);
+    } else {
+      auto container_client = CreateContainer(data.container_name);
+      CreateBlob(container_client, data.kObjectName, PreexistingData::kLoremIpsum);
+    }
     return data;
   }
 
@@ -854,6 +888,393 @@ class TestAzureFileSystem : public ::testing::Test {
     const auto directory_path = data.RandomDirectoryPath(rng_);
     ASSERT_RAISES(IOError, fs()->DeleteDirContents(directory_path, false));
   }
+
+ private:
+  using StringMatcher =
+      ::testing::PolymorphicMatcher<::testing::internal::HasSubstrMatcher<std::string>>;
+
+  StringMatcher HasDirMoveToSubdirMessage(const std::string& src,
+                                          const std::string& dest) {
+    return ::testing::HasSubstr("Cannot Move to '" + dest + "' and make '" + src +
+                                "' a sub-directory of itself.");
+  }
+
+  StringMatcher HasCrossContainerNotImplementedMessage(const std::string& container_name,
+                                                       const std::string& dest) {
+    return ::testing::HasSubstr("Move of '" + container_name + "' to '" + dest +
+                                "' requires moving data between "
+                                "containers, which is not implemented.");
+  }
+
+  StringMatcher HasMissingParentDirMessage(const std::string& dest) {
+    return ::testing::HasSubstr("The parent directory of the destination path '" + dest +
+                                "' does not exist.");
+  }
+
+  /// \brief Expected POSIX semantics for the rename operation on multiple
+  /// scenarios.
+  ///
+  /// If the src doesn't exist, the error is always ENOENT, otherwise we are
+  /// left with the following combinations:
+  ///
+  /// 1. src's type
+  ///    a. File
+  ///    b. Directory
+  /// 2. dest's existence
+  ///    a. NotFound
+  ///    b. File
+  ///    c. Directory
+  ///       - empty
+  ///       - non-empty
+  /// 3. src path has a trailing slash (or not)
+  /// 4. dest path has a trailing slash (or not)
+  ///
+  /// Limitations: this function doesn't consider paths so it assumes that the
+  /// paths don't lead requests for moves that would make the source a subdir of
+  /// the destination.
+  ///
+  /// \param paths_are_equal src and dest paths without trailing slashes are equal
+  /// \return std::nullopt if success is expected in the scenario or the errno
+  /// if failure is expected.
+  static std::optional<int> RenameSemantics(FileType src_type, bool src_trailing_slash,
+                                            FileType dest_type, bool dest_trailing_slash,
+                                            bool dest_is_empty_dir = false,
+                                            bool paths_are_equal = false) {
+    DCHECK(src_type != FileType::Unknown && dest_type != FileType::Unknown);
+    DCHECK(!dest_is_empty_dir || dest_type == FileType::Directory)
+        << "dest_is_empty_dir must imply dest_type == FileType::Directory";
+    switch (src_type) {
+      case FileType::Unknown:
+        break;
+      case FileType::NotFound:
+        return {ENOENT};
+      case FileType::File:
+        switch (dest_type) {
+          case FileType::Unknown:
+            break;
+          case FileType::NotFound:
+            if (src_trailing_slash) {
+              return {ENOTDIR};
+            }
+            if (dest_trailing_slash) {
+              // A slash on the destination path requires that it exists,
+              // so a confirmation that it's a directory can be performed.
+              return {ENOENT};
+            }
+            return {};
+          case FileType::File:
+            if (src_trailing_slash || dest_trailing_slash) {
+              return {ENOTDIR};
+            }
+            // The existing file is replaced successfuly.
+            return {};
+          case FileType::Directory:
+            if (src_trailing_slash) {
+              return {ENOTDIR};
+            }
+            return EISDIR;
+        }
+        break;
+      case FileType::Directory:
+        switch (dest_type) {
+          case FileType::Unknown:
+            break;
+          case FileType::NotFound:
+            // We don't have to care about the slashes when the source is a directory.
+            return {};
+          case FileType::File:
+            return {ENOTDIR};
+          case FileType::Directory:
+            if (!paths_are_equal && !dest_is_empty_dir) {
+              return {ENOTEMPTY};
+            }
+            return {};
+        }
+        break;
+    }
+    Unreachable("Invalid parameters passed to RenameSemantics");
+  }
+
+  Status CheckExpectedErrno(const std::string& src, const std::string& dest,
+                            std::optional<int> expected_errno,
+                            const char* expected_errno_name, FileInfo* out_src_info) {
+    auto the_fs = fs();
+    const bool src_trailing_slash = internal::HasTrailingSlash(src);
+    const bool dest_trailing_slash = internal::HasTrailingSlash(dest);
+    const auto src_path = std::string{internal::RemoveTrailingSlash(src)};
+    const auto dest_path = std::string{internal::RemoveTrailingSlash(dest)};
+    ARROW_ASSIGN_OR_RAISE(*out_src_info, the_fs->GetFileInfo(src_path));
+    ARROW_ASSIGN_OR_RAISE(auto dest_info, the_fs->GetFileInfo(dest_path));
+    bool dest_is_empty_dir = false;
+    if (dest_info.type() == FileType::Directory) {
+      FileSelector select;
+      select.base_dir = dest_path;
+      select.recursive = false;
+      // TODO(ARROW-40014): investigate why this can't be false here
+      select.allow_not_found = true;
+      ARROW_ASSIGN_OR_RAISE(auto dest_contents, the_fs->GetFileInfo(select));
+      if (dest_contents.empty()) {
+        dest_is_empty_dir = true;
+      }
+    }
+    auto paths_are_equal = src_path == dest_path;
+    auto truly_expected_errno =
+        RenameSemantics(out_src_info->type(), src_trailing_slash, dest_info.type(),
+                        dest_trailing_slash, dest_is_empty_dir, paths_are_equal);
+    if (truly_expected_errno != expected_errno) {
+      if (expected_errno.has_value()) {
+        return Status::Invalid("expected_errno=", expected_errno_name, "=",
+                               *expected_errno,
+                               " used in ASSERT_MOVE is incorrect. "
+                               "POSIX semantics for this scenario require errno=",
+                               strerror(truly_expected_errno.value_or(0)));
+      } else {
+        DCHECK(truly_expected_errno.has_value());
+        return Status::Invalid(
+            "ASSERT_MOVE used to assert success in a scenario for which "
+            "POSIX semantics requires errno=",
+            strerror(*truly_expected_errno));
+      }
+    }
+    return Status::OK();
+  }
+
+  void AssertAfterMove(const std::string& src, const std::string& dest, FileType type) {
+    if (internal::RemoveTrailingSlash(src) != internal::RemoveTrailingSlash(dest)) {
+      AssertFileInfo(fs(), src, FileType::NotFound);
+    }
+    AssertFileInfo(fs(), dest, type);
+  }
+
+  static bool WithErrno(const Status& status, int expected_errno) {
+    auto* detail = status.detail().get();
+    return detail &&
+           arrow::internal::ErrnoFromStatusDetail(*detail).value_or(-1) == expected_errno;
+  }
+
+  std::optional<StringMatcher> MoveErrorMessageMatcher(const FileInfo& src_info,
+                                                       const std::string& src,
+                                                       const std::string& dest,
+                                                       int for_errno) {
+    switch (for_errno) {
+      case ENOENT: {
+        auto& path = src_info.type() == FileType::NotFound ? src : dest;
+        return ::testing::HasSubstr("Path does not exist '" + path + "'");
+      }
+      case ENOTEMPTY:
+        return ::testing::HasSubstr("Directory not empty: '" + dest + "'");
+    }
+    return std::nullopt;
+  }
+
+#define ASSERT_MOVE(src, dest, expected_errno)                                          \
+  do {                                                                                  \
+    auto _src = (src);                                                                  \
+    auto _dest = (dest);                                                                \
+    std::optional<int> _expected_errno = (expected_errno);                              \
+    FileInfo _src_info;                                                                 \
+    ASSERT_OK(                                                                          \
+        CheckExpectedErrno(_src, _dest, _expected_errno, #expected_errno, &_src_info)); \
+    auto _move_st = ::arrow::internal::GenericToStatus(fs()->Move(_src, _dest));        \
+    if (_expected_errno.has_value()) {                                                  \
+      if (WithErrno(_move_st, *_expected_errno)) {                                      \
+        /* If the Move failed, the source should remain unchanged. */                   \
+        AssertFileInfo(fs(), std::string{internal::RemoveTrailingSlash(_src)},          \
+                       _src_info.type());                                               \
+        auto _message_matcher =                                                         \
+            MoveErrorMessageMatcher(_src_info, _src, _dest, *_expected_errno);          \
+        if (_message_matcher.has_value()) {                                             \
+          EXPECT_RAISES_WITH_MESSAGE_THAT(IOError, *_message_matcher, _move_st);        \
+        } else {                                                                        \
+          SUCCEED();                                                                    \
+        }                                                                               \
+      } else {                                                                          \
+        FAIL() << "Move '" ARROW_STRINGIFY(src) "' to '" ARROW_STRINGIFY(dest)          \
+               << "' did not fail with errno=" << #expected_errno;                      \
+      }                                                                                 \
+    } else {                                                                            \
+      if (!_move_st.ok()) {                                                             \
+        FAIL() << "Move '" ARROW_STRINGIFY(src) "' to '" ARROW_STRINGIFY(dest)          \
+               << "' failed with " << _move_st.ToString();                              \
+      } else {                                                                          \
+        AssertAfterMove(_src, _dest, _src_info.type());                                 \
+      }                                                                                 \
+    }                                                                                   \
+  } while (false)
+
+#define ASSERT_MOVE_OK(src, dest) ASSERT_MOVE((src), (dest), std::nullopt)
+
+  // Tests for Move()
+
+ public:
+  void TestRenameContainer() {
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    auto data = SetUpPreexistingData();
+    // Container exists, so renaming to the same name succeeds because it's a no-op.
+    ASSERT_MOVE_OK(data.container_name, data.container_name);
+    // Renaming a container that doesn't exist fails.
+    ASSERT_MOVE("missing-container", "missing-container", ENOENT);
+    ASSERT_MOVE("missing-container", data.container_name, ENOENT);
+    // Renaming a container to an existing non-empty container fails.
+    auto non_empty_container = PreexistingData::RandomContainerName(rng_);
+    auto non_empty_container_client = CreateContainer(non_empty_container);
+    CreateBlob(non_empty_container_client, "object1", PreexistingData::kLoremIpsum);
+    ASSERT_MOVE(data.container_name, non_empty_container, ENOTEMPTY);
+    // Renaming to an empty container fails to replace it
+    auto empty_container = PreexistingData::RandomContainerName(rng_);
+    auto empty_container_client = CreateContainer(empty_container);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError,
+        ::testing::HasSubstr("Unable to replace empty container: '" + empty_container +
+                             "'"),
+        fs()->Move(data.container_name, empty_container));
+    // Renaming to a non-existing container creates it
+    auto missing_container = PreexistingData::RandomContainerName(rng_);
+    AssertFileInfo(fs(), missing_container, FileType::NotFound);
+    if (env->backend() == AzureBackend::kAzurite) {
+      // Azurite returns a 201 Created for RenameBlobContainer, but the created
+      // container doesn't contain the blobs from the source container and
+      // the source container remains undeleted after the "rename".
+    } else {
+      // See Azure SDK issue/question:
+      // https://github.com/Azure/azure-sdk-for-cpp/issues/5262
+      EXPECT_RAISES_WITH_MESSAGE_THAT(
+          IOError,
+          ::testing::HasSubstr("The 'rename' operation is not supported on containers."),
+          fs()->Move(data.container_name, missing_container));
+      // ASSERT_MOVE_OK(data.container_name, missing_container);
+      // AssertFileInfo(fs(),
+      //                ConcatAbstractPath(missing_container,
+      //                PreexistingData::kObjectName), FileType::File);
+    }
+    // Renaming to an empty container can work if the source is also empty
+    auto new_empty_container = PreexistingData::RandomContainerName(rng_);
+    auto new_empty_container_client = CreateContainer(new_empty_container);
+    ASSERT_MOVE_OK(empty_container, new_empty_container);
+  }
+
+  void TestMoveContainerToPath() {
+    auto data = SetUpPreexistingData();
+    ASSERT_MOVE("missing-container", data.ContainerPath("new-subdir"), ENOENT);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid,
+        HasDirMoveToSubdirMessage(data.container_name, data.ContainerPath("new-subdir")),
+        fs()->Move(data.container_name, data.ContainerPath("new-subdir")));
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        NotImplemented,
+        HasCrossContainerNotImplementedMessage(data.container_name,
+                                               "missing-container/new-subdir"),
+        fs()->Move(data.container_name, "missing-container/new-subdir"));
+  }
+
+  void TestCreateContainerFromPath() {
+    auto data = SetUpPreexistingData();
+    auto missing_path = data.RandomDirectoryPath(rng_);
+    ASSERT_MOVE(missing_path, "new-container", ENOENT);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid,
+        ::testing::HasSubstr("Creating files at '/' is not possible, only directories."),
+        fs()->Move(data.ObjectPath(), "new-file"));
+    auto src_dir_path = data.RandomDirectoryPath(rng_);
+    ASSERT_OK(fs()->CreateDir(src_dir_path, false));
+    EXPECT_OK_AND_ASSIGN(auto src_dir_info, fs()->GetFileInfo(src_dir_path));
+    EXPECT_EQ(src_dir_info.type(), FileType::Directory);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        NotImplemented,
+        HasCrossContainerNotImplementedMessage(src_dir_path, "new-container"),
+        fs()->Move(src_dir_path, "new-container"));
+  }
+
+  void TestMovePath() {
+    Status st;
+    auto data = SetUpPreexistingData();
+    // When source doesn't exist.
+    ASSERT_MOVE("missing-container/src-path", data.ContainerPath("dest-path"), ENOENT);
+    auto missing_path1 = data.RandomDirectoryPath(rng_);
+    ASSERT_MOVE(missing_path1, "missing-container/path", ENOENT);
+
+    // But when source exists...
+    if (!WithHierarchicalNamespace()) {
+      // ...and containers are different, we get an error message telling cross-container
+      // moves are not implemented.
+      EXPECT_RAISES_WITH_MESSAGE_THAT(
+          NotImplemented,
+          HasCrossContainerNotImplementedMessage(data.ObjectPath(),
+                                                 "missing-container/path"),
+          fs()->Move(data.ObjectPath(), "missing-container/path"));
+      GTEST_SKIP() << "The rest of TestMovePath is not implemented for non-HNS scenarios";
+    }
+    auto adlfs_client =
+        datalake_service_client_->GetFileSystemClient(data.container_name);
+    // ...and dest.container doesn't exist.
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, HasMissingParentDirMessage("missing-container/path"),
+        fs()->Move(data.ObjectPath(), "missing-container/path"));
+    AssertFileInfo(fs(), data.ObjectPath(), FileType::File);
+
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, HasMissingParentDirMessage(data.Path("missing-subdir/file")),
+        fs()->Move(data.ObjectPath(), data.Path("missing-subdir/file")));
+    AssertFileInfo(fs(), data.ObjectPath(), FileType::File);
+
+    // src is a file and dest does not exists
+    ASSERT_MOVE_OK(data.ObjectPath(), data.Path("file0"));
+    ASSERT_MOVE(data.Path("file0/"), data.Path("file1"), ENOTDIR);
+    ASSERT_MOVE(data.Path("file0"), data.Path("file1/"), ENOENT);
+    ASSERT_MOVE(data.Path("file0/"), data.Path("file1/"), ENOTDIR);
+    // "file0" exists
+
+    // src is a file and dest exists (as a file)
+    CreateFile(adlfs_client, PreexistingData::kObjectName, PreexistingData::kLoremIpsum);
+    CreateFile(adlfs_client, "file1", PreexistingData::kLoremIpsum);
+    ASSERT_MOVE_OK(data.ObjectPath(), data.Path("file0"));
+    ASSERT_MOVE(data.Path("file1/"), data.Path("file0"), ENOTDIR);
+    ASSERT_MOVE(data.Path("file1"), data.Path("file0/"), ENOTDIR);
+    ASSERT_MOVE(data.Path("file1/"), data.Path("file0/"), ENOTDIR);
+    // "file0" and "file1" exist
+
+    // src is a file and dest exists (as an empty dir)
+    CreateDirectory(adlfs_client, "subdir0");
+    ASSERT_MOVE(data.Path("file0"), data.Path("subdir0"), EISDIR);
+    ASSERT_MOVE(data.Path("file0/"), data.Path("subdir0"), ENOTDIR);
+    ASSERT_MOVE(data.Path("file0"), data.Path("subdir0/"), EISDIR);
+    ASSERT_MOVE(data.Path("file0/"), data.Path("subdir0/"), ENOTDIR);
+
+    // src is a file and dest exists (as a non-empty dir)
+    CreateFile(adlfs_client, "subdir0/file-at-subdir");
+    ASSERT_MOVE(data.Path("file0"), data.Path("subdir0"), EISDIR);
+    ASSERT_MOVE(data.Path("file0/"), data.Path("subdir0"), ENOTDIR);
+    ASSERT_MOVE(data.Path("file0"), data.Path("subdir0/"), EISDIR);
+    ASSERT_MOVE(data.Path("file0/"), data.Path("subdir0/"), ENOTDIR);
+    // "subdir0/file-at-subdir" exists
+
+    // src is a directory and dest does not exists
+    ASSERT_MOVE_OK(data.Path("subdir0"), data.Path("subdir1"));
+    ASSERT_MOVE_OK(data.Path("subdir1/"), data.Path("subdir2"));
+    ASSERT_MOVE_OK(data.Path("subdir2"), data.Path("subdir3/"));
+    ASSERT_MOVE_OK(data.Path("subdir3/"), data.Path("subdir4/"));
+    AssertFileInfo(fs(), data.Path("subdir4/file-at-subdir"), FileType::File);
+    // "subdir4/file-at-subdir" exists
+
+    // src is a directory and dest exists as an empty directory
+    CreateDirectory(adlfs_client, "subdir0");
+    CreateDirectory(adlfs_client, "subdir1");
+    CreateDirectory(adlfs_client, "subdir2");
+    CreateDirectory(adlfs_client, "subdir3");
+    ASSERT_MOVE_OK(data.Path("subdir4"), data.Path("subdir0"));
+    ASSERT_MOVE_OK(data.Path("subdir0/"), data.Path("subdir1"));
+    ASSERT_MOVE_OK(data.Path("subdir1"), data.Path("subdir2/"));
+    ASSERT_MOVE_OK(data.Path("subdir2/"), data.Path("subdir3/"));
+    AssertFileInfo(fs(), data.Path("subdir3/file-at-subdir"), FileType::File);
+    // "subdir3/file-at-subdir" exists
+
+    // src is directory and dest exists as a non-empty directory
+    CreateDirectory(adlfs_client, "subdir0");
+    ASSERT_MOVE(data.Path("subdir0"), data.Path("subdir3"), ENOTEMPTY);
+    ASSERT_MOVE(data.Path("subdir0/"), data.Path("subdir3"), ENOTEMPTY);
+    ASSERT_MOVE(data.Path("subdir0"), data.Path("subdir3/"), ENOTEMPTY);
+    ASSERT_MOVE(data.Path("subdir0/"), data.Path("subdir3/"), ENOTEMPTY);
+  }
 };
 
 void TestAzureFileSystem::TestDetectHierarchicalNamespace(bool trip_up_azurite) {
@@ -940,10 +1361,9 @@ void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() {
                  FileType::NotFound);
 
   if (WithHierarchicalNamespace()) {
-    datalake_service_client_->GetFileSystemClient(data.container_name)
-        .GetDirectoryClient("test-empty-object-dir")
-        .Create();
-
+    auto adlfs_client =
+        datalake_service_client_->GetFileSystemClient(data.container_name);
+    CreateDirectory(adlfs_client, "test-empty-object-dir");
     AssertFileInfo(fs(), data.ContainerPath("test-empty-object-dir"),
                    FileType::Directory);
   }
@@ -1108,6 +1528,20 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsFailureNonexisten
   this->TestDeleteDirContentsFailureNonexistent();
 }
 
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, RenameContainer) {
+  this->TestRenameContainer();
+}
+
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, MoveContainerToPath) {
+  this->TestMoveContainerToPath();
+}
+
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateContainerFromPath) {
+  this->TestCreateContainerFromPath();
+}
+
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, MovePath) { this->TestMovePath(); }
+
 // Tests using Azurite (the local Azure emulator)
 
 TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) {
@@ -1384,9 +1818,14 @@ TEST_F(TestAzuriteFileSystem, DeleteDirContentsFailureNonexistent) {
 
 TEST_F(TestAzuriteFileSystem, DeleteFileSuccess) {
   const auto container_name = PreexistingData::RandomContainerName(rng_);
-  ASSERT_OK(fs()->CreateDir(container_name));
-  const auto file_name = ConcatAbstractPath(container_name, "abc");
-  CreateFile(fs(), file_name, "data");
+  const auto file_name = ConcatAbstractPath(container_name, "filename");
+  if (WithHierarchicalNamespace()) {
+    auto adlfs_client = CreateFilesystem(container_name);
+    CreateFile(adlfs_client, "filename", "data");
+  } else {
+    auto container = CreateContainer(container_name);
+    CreateBlob(container, "filename", "data");
+  }
   arrow::fs::AssertFileInfo(fs(), file_name, FileType::File);
   ASSERT_OK(fs()->DeleteFile(file_name));
   arrow::fs::AssertFileInfo(fs(), file_name, FileType::NotFound);
@@ -1394,24 +1833,38 @@ TEST_F(TestAzuriteFileSystem, DeleteFileSuccess) {
 
 TEST_F(TestAzuriteFileSystem, DeleteFileFailureNonexistent) {
   const auto container_name = PreexistingData::RandomContainerName(rng_);
-  ASSERT_OK(fs()->CreateDir(container_name));
   const auto nonexistent_file_name = ConcatAbstractPath(container_name, "nonexistent");
+  if (WithHierarchicalNamespace()) {
+    ARROW_UNUSED(CreateFilesystem(container_name));
+  } else {
+    ARROW_UNUSED(CreateContainer(container_name));
+  }
   ASSERT_RAISES(IOError, fs()->DeleteFile(nonexistent_file_name));
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteFileFailureContainer) {
   const auto container_name = PreexistingData::RandomContainerName(rng_);
-  ASSERT_OK(fs()->CreateDir(container_name));
+  if (WithHierarchicalNamespace()) {
+    ARROW_UNUSED(CreateFilesystem(container_name));
+  } else {
+    ARROW_UNUSED(CreateContainer(container_name));
+  }
   arrow::fs::AssertFileInfo(fs(), container_name, FileType::Directory);
   ASSERT_RAISES(IOError, fs()->DeleteFile(container_name));
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteFileFailureDirectory) {
-  const auto directory_name =
-      ConcatAbstractPath(PreexistingData::RandomContainerName(rng_), "directory");
-  ASSERT_OK(fs()->CreateDir(directory_name));
-  arrow::fs::AssertFileInfo(fs(), directory_name, FileType::Directory);
-  ASSERT_RAISES(IOError, fs()->DeleteFile(directory_name));
+  auto container_name = PreexistingData::RandomContainerName(rng_);
+  if (WithHierarchicalNamespace()) {
+    auto adlfs_client = CreateFilesystem(container_name);
+    CreateDirectory(adlfs_client, "directory");
+  } else {
+    auto container = CreateContainer(container_name);
+    CreateBlob(container, "directory/");
+  }
+  auto directory_path = ConcatAbstractPath(container_name, "directory");
+  arrow::fs::AssertFileInfo(fs(), directory_path, FileType::Directory);
+  ASSERT_RAISES(IOError, fs()->DeleteFile(directory_path));
 }
 
 TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationNonexistent) {
@@ -1542,7 +1995,7 @@ std::shared_ptr<const KeyValueMetadata> NormalizerKeyValueMetadata(
     auto value = metadata->value(i);
     if (key == "Content-Hash") {
       std::vector<uint8_t> output;
-      output.reserve(value.size() / 2);
+      output.resize(value.size() / 2);
       if (ParseHexValues(value, output.data()).ok()) {
         // Valid value
         value = std::string(value.size(), 'F');
diff --git a/cpp/src/arrow/filesystem/util_internal.cc b/cpp/src/arrow/filesystem/util_internal.cc
index 13f43d45db6c1..8747f9683b90f 100644
--- a/cpp/src/arrow/filesystem/util_internal.cc
+++ b/cpp/src/arrow/filesystem/util_internal.cc
@@ -64,11 +64,21 @@ Status PathNotFound(std::string_view path) {
       .WithDetail(StatusDetailFromErrno(ENOENT));
 }
 
+Status IsADir(std::string_view path) {
+  return Status::IOError("Is a directory: '", path, "'")
+      .WithDetail(StatusDetailFromErrno(EISDIR));
+}
+
 Status NotADir(std::string_view path) {
   return Status::IOError("Not a directory: '", path, "'")
       .WithDetail(StatusDetailFromErrno(ENOTDIR));
 }
 
+Status NotEmpty(std::string_view path) {
+  return Status::IOError("Directory not empty: '", path, "'")
+      .WithDetail(StatusDetailFromErrno(ENOTEMPTY));
+}
+
 Status NotAFile(std::string_view path) {
   return Status::IOError("Not a regular file: '", path, "'");
 }
diff --git a/cpp/src/arrow/filesystem/util_internal.h b/cpp/src/arrow/filesystem/util_internal.h
index 29a51512d0aa2..96cc5178a9f31 100644
--- a/cpp/src/arrow/filesystem/util_internal.h
+++ b/cpp/src/arrow/filesystem/util_internal.h
@@ -43,9 +43,15 @@ Status CopyStream(const std::shared_ptr<io::InputStream>& src,
 ARROW_EXPORT
 Status PathNotFound(std::string_view path);
 
+ARROW_EXPORT
+Status IsADir(std::string_view path);
+
 ARROW_EXPORT
 Status NotADir(std::string_view path);
 
+ARROW_EXPORT
+Status NotEmpty(std::string_view path);
+
 ARROW_EXPORT
 Status NotAFile(std::string_view path);
 
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index 751ef28d415e0..b693336e09921 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -449,6 +449,13 @@ std::shared_ptr<StatusDetail> StatusDetailFromErrno(int errnum) {
   return std::make_shared<ErrnoDetail>(errnum);
 }
 
+std::optional<int> ErrnoFromStatusDetail(const StatusDetail& detail) {
+  if (detail.type_id() == kErrnoDetailTypeId) {
+    return checked_cast<const ErrnoDetail&>(detail).errnum();
+  }
+  return std::nullopt;
+}
+
 #if _WIN32
 std::shared_ptr<StatusDetail> StatusDetailFromWinError(int errnum) {
   if (!errnum) {
diff --git a/cpp/src/arrow/util/io_util.h b/cpp/src/arrow/util/io_util.h
index 113b1bdd93103..bba71c0d80ad4 100644
--- a/cpp/src/arrow/util/io_util.h
+++ b/cpp/src/arrow/util/io_util.h
@@ -23,6 +23,7 @@
 
 #include <atomic>
 #include <memory>
+#include <optional>
 #include <string>
 #include <utility>
 #include <vector>
@@ -264,6 +265,8 @@ std::string WinErrorMessage(int errnum);
 
 ARROW_EXPORT
 std::shared_ptr<StatusDetail> StatusDetailFromErrno(int errnum);
+ARROW_EXPORT
+std::optional<int> ErrnoFromStatusDetail(const StatusDetail& detail);
 #if _WIN32
 ARROW_EXPORT
 std::shared_ptr<StatusDetail> StatusDetailFromWinError(int errnum);

From 9eebe64bb15d8a9059087c748d4d63b724bf7225 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 10 Feb 2024 19:22:44 +0900
Subject: [PATCH 351/570] GH-40011: [CI] Update Fedora to 39 from 38 (#40012)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Fedora 39 is the latest release and Fedora 38 will reach EOL on 2024-05-14.

### What changes are included in this PR?

Use Fedora 39.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #40011

Lead-authored-by: Sutou Kouhei <kou@clear-code.com>
Co-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 .env                                                      | 2 +-
 ...{fedora-38-cpp.dockerfile => fedora-39-cpp.dockerfile} | 7 +++++--
 dev/tasks/tasks.yml                                       | 8 ++++----
 docker-compose.yml                                        | 4 ++--
 python/examples/minimal_build/Dockerfile.fedora           | 2 +-
 5 files changed, 13 insertions(+), 10 deletions(-)
 rename ci/docker/{fedora-38-cpp.dockerfile => fedora-39-cpp.dockerfile} (92%)

diff --git a/.env b/.env
index eb87dc62bdd8c..00c238421d301 100644
--- a/.env
+++ b/.env
@@ -49,7 +49,7 @@ ULIMIT_CORE=-1
 ALMALINUX=8
 ALPINE_LINUX=3.16
 DEBIAN=11
-FEDORA=38
+FEDORA=39
 UBUNTU=20.04
 
 # Default versions for various dependencies
diff --git a/ci/docker/fedora-38-cpp.dockerfile b/ci/docker/fedora-39-cpp.dockerfile
similarity index 92%
rename from ci/docker/fedora-38-cpp.dockerfile
rename to ci/docker/fedora-39-cpp.dockerfile
index 2dcc094ee20c5..c8e98bdd00b11 100644
--- a/ci/docker/fedora-38-cpp.dockerfile
+++ b/ci/docker/fedora-39-cpp.dockerfile
@@ -16,7 +16,7 @@
 # under the License.
 
 ARG arch
-FROM ${arch}/fedora:38
+FROM ${arch}/fedora:39
 ARG arch
 
 # install dependencies
@@ -76,6 +76,8 @@ RUN /arrow/ci/scripts/install_gcs_testbench.sh default
 COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
+# PYARROW_TEST_GANDIVA=OFF: GH-39695: We need to make LLVM symbols visible in
+# Python process explicitly if we use LLVM 17 or later.
 ENV absl_SOURCE=BUNDLED \
     ARROW_ACERO=ON \
     ARROW_BUILD_TESTS=ON \
@@ -103,4 +105,5 @@ ENV absl_SOURCE=BUNDLED \
     google_cloud_cpp_storage_SOURCE=BUNDLED \
     PARQUET_BUILD_EXAMPLES=ON \
     PARQUET_BUILD_EXECUTABLES=ON \
-    PATH=/usr/lib/ccache/:$PATH
+    PATH=/usr/lib/ccache/:$PATH \
+    PYARROW_TEST_GANDIVA=OFF
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index cf04d29715306..8a32724b153fe 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1104,12 +1104,12 @@ tasks:
       image: debian-cpp
 {% endfor %}
 
-  test-fedora-38-cpp:
+  test-fedora-39-cpp:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        FEDORA: 38
+        FEDORA: 39
       image: fedora-cpp
 
 {% for cpp_standard in [20] %}
@@ -1217,12 +1217,12 @@ tasks:
         UBUNTU: 22.04
       image: ubuntu-python
 
-  test-fedora-38-python-3:
+  test-fedora-39-python-3:
     ci: azure
     template: docker-tests/azure.linux.yml
     params:
       env:
-        FEDORA: 38
+        FEDORA: 39
       image: fedora-python
 
   test-r-linux-valgrind:
diff --git a/docker-compose.yml b/docker-compose.yml
index 7ae625a017417..aec685775aab1 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -658,7 +658,7 @@ services:
     #   docker-compose run --rm fedora-cpp
     # Parameters:
     #   ARCH: amd64, arm64v8, ...
-    #   FEDORA: 38
+    #   FEDORA: 39
     image: ${REPO}:${ARCH}-fedora-${FEDORA}-cpp
     build:
       context: .
@@ -959,7 +959,7 @@ services:
     #   docker-compose run --rm fedora-python
     # Parameters:
     #   ARCH: amd64, arm64v8, ...
-    #   FEDORA: 38
+    #   FEDORA: 39
     image: ${REPO}:${ARCH}-fedora-${FEDORA}-python-3
     build:
       context: .
diff --git a/python/examples/minimal_build/Dockerfile.fedora b/python/examples/minimal_build/Dockerfile.fedora
index cc3d62bec0ebe..e7b9600b67b0e 100644
--- a/python/examples/minimal_build/Dockerfile.fedora
+++ b/python/examples/minimal_build/Dockerfile.fedora
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-FROM fedora:35
+FROM fedora:39
 
 RUN dnf update -y && \
     dnf install -y \

From f7dc37fff17b5e5c68be59cfab287c4a2fa2cdb7 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 10 Feb 2024 21:09:29 +0900
Subject: [PATCH 352/570] GH-39919: [C++][Dataset] Add missing Protobuf static
 link dependency (#40015)

### Rationale for this change

We need to link system libprotobuf.a too when we link libarrow.a, ORC is enabled and system Protobuf is used.

If we don't link system libprotobuf.a, some symbols are missing.

### What changes are included in this PR?

Add missing `${ARROW_PROTOBUF_LIBPROTOBUF}` to `ARROW_STATIC_INSTALL_INTERFACE_LIBS`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #39919

Lead-authored-by: Sutou Kouhei <kou@clear-code.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/CMakeLists.txt | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 50a85b33d5489..7f2f7812e3cd5 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -774,8 +774,7 @@ if(ARROW_ORC)
   list(APPEND ARROW_SHARED_LINK_LIBS orc::orc ${ARROW_PROTOBUF_LIBPROTOBUF})
   list(APPEND ARROW_STATIC_LINK_LIBS orc::orc ${ARROW_PROTOBUF_LIBPROTOBUF})
   if(ORC_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::orc
-         ${ARROW_PROTOBUF_LIBPROTOBUF})
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::orc)
   endif()
 endif()
 
@@ -824,9 +823,6 @@ if(ARROW_WITH_OPENTELEMETRY)
          opentelemetry-cpp::ostream_span_exporter
          opentelemetry-cpp::otlp_http_exporter)
   endif()
-  if(Protobuf_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF})
-  endif()
   list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl)
 endif()
 
@@ -861,6 +857,14 @@ if(ARROW_USE_XSIMD)
   list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_XSIMD})
 endif()
 
+# This should be done after if(ARROW_ORC) and if(ARROW_WITH_OPENTELEMETRY)
+# because they depend on Protobuf.
+if(ARROW_WITH_PROTOBUF)
+  if(Protobuf_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF})
+  endif()
+endif()
+
 add_custom_target(arrow_dependencies)
 add_custom_target(arrow_benchmark_dependencies)
 add_custom_target(arrow_test_dependencies)

From d9891918a42c74002533ed5c06c42b7d0d070820 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Sat, 10 Feb 2024 04:15:41 -0800
Subject: [PATCH 353/570] MINOR: [Docs] Use ' and remove trailing spaces in
 basic_arrow.rst (#39989)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

basic_arrow.rst had two minor issues:
- It used `’` (U+2019) instead of `'`
- While editing, I found it wasn't whitespace normalized

### What changes are included in this PR?

- `’` replaced with `'`
- Whitespace has been normalized

### Are these changes tested?

No

### Are there any user-facing changes?

Just docs.

Authored-by: Bryce Mecum <petridish@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 docs/source/cpp/tutorials/basic_arrow.rst | 72 +++++++++++------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/docs/source/cpp/tutorials/basic_arrow.rst b/docs/source/cpp/tutorials/basic_arrow.rst
index 409dfcc40d28f..355976d02a072 100644
--- a/docs/source/cpp/tutorials/basic_arrow.rst
+++ b/docs/source/cpp/tutorials/basic_arrow.rst
@@ -25,15 +25,15 @@ Basic Arrow Data Structures
 ===========================
 
 Apache Arrow provides fundamental data structures for representing data:
-:class:`Array`, :class:`ChunkedArray`, :class:`RecordBatch`, and :class:`Table`. 
-This article shows how to construct these data structures from primitive 
-data types; specifically, we will work with integers of varying size 
+:class:`Array`, :class:`ChunkedArray`, :class:`RecordBatch`, and :class:`Table`.
+This article shows how to construct these data structures from primitive
+data types; specifically, we will work with integers of varying size
 representing days, months, and years. We will use them to create the following data structures:
 
 #. Arrow :class:`Arrays <Array>`
-#. :class:`ChunkedArrays<ChunkedArray>` 
+#. :class:`ChunkedArrays<ChunkedArray>`
 #. :class:`RecordBatch`, from :class:`Arrays <Array>`
-#. :class:`Table`, from :class:`ChunkedArrays<ChunkedArray>` 
+#. :class:`Table`, from :class:`ChunkedArrays<ChunkedArray>`
 
 Pre-requisites
 --------------
@@ -50,14 +50,14 @@ Setup
 Before trying out Arrow, we need to fill in a couple gaps:
 
 1. We need to include necessary headers.
-   
+
 2. ``A main()`` is needed to glue things together.
 
 Includes
 ^^^^^^^^
 
 First, as ever, we need some includes. We'll get ``iostream`` for output, then import Arrow's basic
-functionality from ``api.h``, like so: 
+functionality from ``api.h``, like so:
 
 .. literalinclude:: ../../../../cpp/examples/tutorial_examples/arrow_example.cc
   :language: cpp
@@ -75,14 +75,14 @@ following:
   :start-after: (Doc section: Main)
   :end-before: (Doc section: Main)
 
-This allows us to easily use Arrow’s error-handling macros, which will
+This allows us to easily use Arrow's error-handling macros, which will
 return back to ``main()`` with a :class:`arrow::Status` object if a failure occurs – and
 this ``main()`` will report the error. Note that this means Arrow never
 raises exceptions, instead relying upon returning :class:`Status`. For more on
 that, read here: :doc:`/cpp/conventions`.
 
 To accompany this ``main()``, we have a ``RunMain()`` from which any :class:`Status`
-objects can return – this is where we’ll write the rest of the program:
+objects can return – this is where we'll write the rest of the program:
 
 .. literalinclude:: ../../../../cpp/examples/tutorial_examples/arrow_example.cc
   :language: cpp
@@ -97,14 +97,14 @@ Building int8 Arrays
 ^^^^^^^^^^^^^^^^^^^^
 
 Given that we have some data in standard C++ arrays, and want to use Arrow, we need to move
-the data from said arrays into Arrow arrays. We still guarantee contiguity of memory in an 
+the data from said arrays into Arrow arrays. We still guarantee contiguity of memory in an
 :class:`Array`, so no worries about a performance loss when using :class:`Array` vs C++ arrays.
-The easiest way to construct an :class:`Array` uses an :class:`ArrayBuilder`. 
+The easiest way to construct an :class:`Array` uses an :class:`ArrayBuilder`.
 
 .. seealso:: :doc:`/cpp/arrays` for more technical details on :class:`Array`
 
 The following code initializes an :class:`ArrayBuilder` for an :class:`Array` that will hold 8 bit
-integers. Specifically, it uses the ``AppendValues()`` method, present in concrete 
+integers. Specifically, it uses the ``AppendValues()`` method, present in concrete
 :class:`arrow::ArrayBuilder` subclasses, to fill the :class:`ArrayBuilder` with the
 contents of a standard C++ array. Note the use of :c:macro:`ARROW_RETURN_NOT_OK`.
 If ``AppendValues()`` fails, this macro will return to ``main()``, which will
@@ -115,10 +115,10 @@ print out the meaning of the failure.
   :start-after: (Doc section: int8builder 1 Append)
   :end-before: (Doc section: int8builder 1 Append)
 
-Given an :class:`ArrayBuilder` has the values we want in our :class:`Array`, we can use 
-:func:`ArrayBuilder::Finish` to output the final structure to an :class:`Array` – specifically, 
+Given an :class:`ArrayBuilder` has the values we want in our :class:`Array`, we can use
+:func:`ArrayBuilder::Finish` to output the final structure to an :class:`Array` – specifically,
 we output to a ``std::shared_ptr<arrow::Array>``. Note the use of :c:macro:`ARROW_ASSIGN_OR_RAISE`
-in the following code. :func:`~ArrayBuilder::Finish` outputs a :class:`arrow::Result` object, which :c:macro:`ARROW_ASSIGN_OR_RAISE` 
+in the following code. :func:`~ArrayBuilder::Finish` outputs a :class:`arrow::Result` object, which :c:macro:`ARROW_ASSIGN_OR_RAISE`
 can process. If the method fails, it will return to ``main()`` with a :class:`Status`
 that will explain what went wrong. If it succeeds, then it will assign
 the final output to the left-hand variable.
@@ -141,7 +141,7 @@ Building int16 Arrays
 
 An :class:`ArrayBuilder` has its type specified at the time of declaration.
 Once this is done, it cannot have its type changed. We have to make a new one when we switch to year data, which
-requires a 16-bit integer at the minimum. Of course, there’s an :class:`ArrayBuilder` for that. 
+requires a 16-bit integer at the minimum. Of course, there's an :class:`ArrayBuilder` for that.
 It uses the exact same methods, but with the new data type:
 
 .. literalinclude:: ../../../../cpp/examples/tutorial_examples/arrow_example.cc
@@ -154,16 +154,16 @@ Now, we have three Arrow :class:`Arrays <arrow::Array>`, with some variance in t
 Making a RecordBatch
 --------------------
 
-A columnar data format only really comes into play when you have a table. 
-So, let’s make one. The first kind we’ll make is the :class:`RecordBatch` – this 
-uses :class:`Arrays <Array>` internally, which means all data will be contiguous within each 
+A columnar data format only really comes into play when you have a table.
+So, let's make one. The first kind we'll make is the :class:`RecordBatch` – this
+uses :class:`Arrays <Array>` internally, which means all data will be contiguous within each
 column, but any appending or concatenating will require copying. Making a :class:`RecordBatch`
 has two steps, given existing :class:`Arrays <Array>`:
 
 #. Defining a :class:`Schema`
 #. Loading the :class:`Schema` and Arrays into the constructor
 
-Defining a Schema 
+Defining a Schema
 ^^^^^^^^^^^^^^^^^
 
 To get started making a :class:`RecordBatch`, we first need to define
@@ -180,8 +180,8 @@ so:
 Building a RecordBatch
 ^^^^^^^^^^^^^^^^^^^^^^
 
-With data in :class:`Arrays <Array>` from the previous section, and column descriptions in our 
-:class:`Schema` from the previous step, we can make the :class:`RecordBatch`. Note that the 
+With data in :class:`Arrays <Array>` from the previous section, and column descriptions in our
+:class:`Schema` from the previous step, we can make the :class:`RecordBatch`. Note that the
 length of the columns is necessary, and the length is shared by all columns.
 
 .. literalinclude:: ../../../../cpp/examples/tutorial_examples/arrow_example.cc
@@ -190,18 +190,18 @@ length of the columns is necessary, and the length is shared by all columns.
   :end-before: (Doc section: RBatch)
 
 Now, we have our data in a nice tabular form, safely within the :class:`RecordBatch`.
-What we can do with this will be discussed in the later tutorials. 
+What we can do with this will be discussed in the later tutorials.
 
 Making a ChunkedArray
 ---------------------
 
-Let’s say that we want an array made up of sub-arrays, because it
+Let's say that we want an array made up of sub-arrays, because it
 can be useful for avoiding data copies when concatenating, for parallelizing work, for fitting each chunk
 cutely into cache, or for exceeding the 2,147,483,647 row limit in a
 standard Arrow :class:`Array`. For this, Arrow offers :class:`ChunkedArray`, which can be
 made up of individual Arrow :class:`Arrays <Array>`. In this example, we can reuse the arrays
 we made earlier in part of our chunked array, allowing us to extend them without having to copy
-data. So, let’s build a few more :class:`Arrays <Array>`,
+data. So, let's build a few more :class:`Arrays <Array>`,
 using the same builders for ease of use:
 
 .. literalinclude:: ../../../../cpp/examples/tutorial_examples/arrow_example.cc
@@ -209,7 +209,7 @@ using the same builders for ease of use:
   :start-after: (Doc section: More Arrays)
   :end-before: (Doc section: More Arrays)
 
-In order to support an arbitrary amount of :class:`Arrays <Array>` in the construction of the 
+In order to support an arbitrary amount of :class:`Arrays <Array>` in the construction of the
 :class:`ChunkedArray`, Arrow supplies :class:`ArrayVector`. This provides a vector for :class:`Arrays <Array>`,
 and we'll use it here to prepare to make a :class:`ChunkedArray`:
 
@@ -233,18 +233,18 @@ for the month and year data:
   :start-after: (Doc section: ChunkedArray Month Year)
   :end-before: (Doc section: ChunkedArray Month Year)
 
-With that, we are left with three :class:`ChunkedArrays <ChunkedArray>`, varying in type. 
+With that, we are left with three :class:`ChunkedArrays <ChunkedArray>`, varying in type.
 
 Making a Table
 --------------
 
-One particularly useful thing we can do with the :class:`ChunkedArrays <ChunkedArray>` from the previous section is creating 
-:class:`Tables <Table>`. Much like a :class:`RecordBatch`, a :class:`Table` stores tabular data. However, a 
+One particularly useful thing we can do with the :class:`ChunkedArrays <ChunkedArray>` from the previous section is creating
+:class:`Tables <Table>`. Much like a :class:`RecordBatch`, a :class:`Table` stores tabular data. However, a
 :class:`Table` does not guarantee contiguity, due to being made up of :class:`ChunkedArrays <ChunkedArray>`.
 This can be useful for logic, parallelizing work, for fitting chunks into cache, or exceeding the 2,147,483,647 row limit
 present in :class:`Array` and, thus, :class:`RecordBatch`.
 
-If you read up to :class:`RecordBatch`, you may note that the :class:`Table` constructor in the following code is  
+If you read up to :class:`RecordBatch`, you may note that the :class:`Table` constructor in the following code is
 effectively identical, it just happens to put the length of the columns
 in position 3, and makes a :class:`Table`. We re-use the :class:`Schema` from before, and
 make our :class:`Table`:
@@ -255,23 +255,23 @@ make our :class:`Table`:
   :end-before: (Doc section: Table)
 
 Now, we have our data in a nice tabular form, safely within the :class:`Table`.
-What we can do with this will be discussed in the later tutorials. 
+What we can do with this will be discussed in the later tutorials.
 
-Ending Program 
+Ending Program
 --------------
 
 At the end, we just return :func:`Status::OK()`, so the ``main()`` knows that
-we’re done, and that everything’s okay.
+we're done, and that everything's okay.
 
 .. literalinclude:: ../../../../cpp/examples/tutorial_examples/arrow_example.cc
   :language: cpp
   :start-after: (Doc section: Ret)
   :end-before: (Doc section: Ret)
 
-Wrapping Up 
+Wrapping Up
 -----------
 
-With that, you’ve created the fundamental data structures in Arrow, and
+With that, you've created the fundamental data structures in Arrow, and
 can proceed to getting them in and out of a program with file I/O in the next article.
 
 Refer to the below for a copy of the complete code:
@@ -281,4 +281,4 @@ Refer to the below for a copy of the complete code:
   :start-after: (Doc section: Basic Example)
   :end-before: (Doc section: Basic Example)
   :linenos:
-  :lineno-match:
\ No newline at end of file
+  :lineno-match:

From 56d1ec1142cd95f1c2d35805396262713fa5b29f Mon Sep 17 00:00:00 2001
From: Miles <miles59923@gmail.com>
Date: Mon, 12 Feb 2024 14:29:34 +0100
Subject: [PATCH 354/570] GH-39780: [Python][Parquet] Support hashing for
 FileMetaData and ParquetSchema (#39781)

I think the hash, especially for `FileMetaData` could be better, maybe just use return of `__repr__`, even though that won't include row group info?

### Rationale for this change

Helpful for dependent projects.

### What changes are included in this PR?

Impl `__hash__` for `ParquetSchema` and `FileMetaData`

### Are these changes tested?

Yes

### Are there any user-facing changes?

Supports hashing metadata:

```python
In [1]: import pyarrow.parquet as pq

In [2]: f = pq.ParquetFile('test.parquet')

In [3]: hash(f.metadata)
Out[3]: 4816453453708427907

In [4]: hash(f.metadata.schema)
Out[4]: 2300988959078172540
```
* Closes: #39780

Authored-by: Miles Granger <miles59923@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/_parquet.pyx                   | 10 +++++++
 python/pyarrow/tests/parquet/test_metadata.py | 26 +++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 0b685245655a2..7bc68a288aa78 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -849,6 +849,13 @@ cdef class FileMetaData(_Weakrefable):
         cdef Buffer buffer = sink.getvalue()
         return _reconstruct_filemetadata, (buffer,)
 
+    def __hash__(self):
+        return hash((self.schema,
+                     self.num_rows,
+                     self.num_row_groups,
+                     self.format_version,
+                     self.serialized_size))
+
     def __repr__(self):
         return """{0}
   created_by: {1}
@@ -1071,6 +1078,9 @@ cdef class ParquetSchema(_Weakrefable):
     def __getitem__(self, i):
         return self.column(i)
 
+    def __hash__(self):
+        return hash(self.schema.ToString())
+
     @property
     def names(self):
         """Name of each field (list of str)."""
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index 73284d2e53b9e..bf186bd923c4f 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -499,6 +499,32 @@ def test_multi_dataset_metadata(tempdir):
     assert md['serialized_size'] > 0
 
 
+def test_metadata_hashing(tempdir):
+    path1 = str(tempdir / "metadata1")
+    schema1 = pa.schema([("a", "int64"), ("b", "float64")])
+    pq.write_metadata(schema1, path1)
+    parquet_meta1 = pq.read_metadata(path1)
+
+    # Same as 1, just different path
+    path2 = str(tempdir / "metadata2")
+    schema2 = pa.schema([("a", "int64"), ("b", "float64")])
+    pq.write_metadata(schema2, path2)
+    parquet_meta2 = pq.read_metadata(path2)
+
+    # different schema
+    path3 = str(tempdir / "metadata3")
+    schema3 = pa.schema([("a", "int64"), ("b", "float32")])
+    pq.write_metadata(schema3, path3)
+    parquet_meta3 = pq.read_metadata(path3)
+
+    # Deterministic
+    assert hash(parquet_meta1) == hash(parquet_meta1)  # equal w/ same instance
+    assert hash(parquet_meta1) == hash(parquet_meta2)  # equal w/ different instance
+
+    # Not the same as other metadata with different schema
+    assert hash(parquet_meta1) != hash(parquet_meta3)
+
+
 @pytest.mark.filterwarnings("ignore:Parquet format:FutureWarning")
 def test_write_metadata(tempdir):
     path = str(tempdir / "metadata")

From 66351e3bc82995776304e8e150e3f7068d2254e4 Mon Sep 17 00:00:00 2001
From: Lev Tolmachev <lev.tolmachev@gmail.com>
Date: Mon, 12 Feb 2024 14:27:44 +0000
Subject: [PATCH 355/570] GH-40039: [Java][FlightRPC] Improve performance by
 removing unnecessary memory copies (#40042)

### Rationale for this change
Described in details in the issue: https://github.com/apache/arrow/issues/40039

Summary: class ArrowMessage uses CompositeByteBuf to avoid memory copies but `maxNumComponents` for it is calculated incorrectly and as a result memory copies are still performed which significantly affects the performance of the server.

### What changes are included in this PR?
Changing maxNumComponents to `Integer.MAX_VALUE` because we never want to silently merge large buffers into one.

User can set useZeroCopy=false (default) and then the library will copy data into a new buffer before sending it to Netty for write.

### Are these changes tested?

**TestPerf: 30% throughput boost**
```
BEFORE
Transferred 100000000 records totaling 3200000000 bytes at 877.812629 MiB/s. 28764164.218015 record/s. 7024.784185 batch/s.

AFTER
Transferred 100000000 records totaling 3200000000 bytes at 1145.333893 MiB/s. 37530301.022096 record/s. 9165.650116 batch/s.
```

Also tested with a simple client-server application and I saw even more significant performance boost if padding isn't needed.

Two tests with zero-copy set to true:
**50 batches, 30 columns (Int32), 199999 rows in each batch**
- before change: throughput ~25Gbit/s (memory copy happens in `grpc-nio-worker-ELG-*`)
- after change: throughput ~32Gbit/s (20% boost)

**50 batches, 30 columns (Int32), 200k rows in each batch**
- before change: throughput ~15Gbit/s (much slower than with 199999 because memory copy happens in `flight-server-default-executor-*` thread and blocks server from writing next batch.
- after change: throughput ~32Gbit/s (**115% boost**)
* Closes: #40039

Authored-by: Lev Tolmachev <lev.tolmachev@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../org/apache/arrow/flight/ArrowMessage.java   | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java
index 46cb282e9f3ce..5b946932f39f2 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ArrowMessage.java
@@ -429,11 +429,26 @@ private InputStream asInputStream() {
       ByteBuf initialBuf = Unpooled.buffer(baos.size());
       initialBuf.writeBytes(baos.toByteArray());
       final CompositeByteBuf bb;
-      final int maxNumComponents = Math.max(2, bufs.size() + 1);
       final ImmutableList<ByteBuf> byteBufs = ImmutableList.<ByteBuf>builder()
           .add(initialBuf)
           .addAll(allBufs)
           .build();
+      // See: https://github.com/apache/arrow/issues/40039
+      // CompositeByteBuf requires us to pass maxNumComponents to constructor.
+      // This number will be used to decide when to stop adding new components as separate buffers
+      // and instead merge existing components into a new buffer by performing a memory copy.
+      // We want to avoind memory copies as much as possible so we want to set the limit that won't be reached.
+      // At a first glance it seems reasonable to set limit to byteBufs.size() + 1,
+      // because it will be enough to avoid merges of byteBufs that we pass to constructor.
+      // But later this buffer will be written to socket by Netty
+      // and DefaultHttp2ConnectionEncoder uses CoalescingBufferQueue to combine small buffers into one.
+      // Method CoalescingBufferQueue.compose will check if current buffer is already a CompositeByteBuf
+      // and if it's the case it will just add a new component to this buffer.
+      // But in out case if we set maxNumComponents=byteBufs.size() + 1 it will happen on the first attempt
+      // to write data to socket because header message is small and Netty will always try to compine it with the
+      // large CompositeByteBuf we're creating here.
+      // We never want additional memory copies so setting the limit to Integer.MAX_VALUE
+      final int maxNumComponents = Integer.MAX_VALUE;
       if (tryZeroCopyWrite) {
         bb = new ArrowBufRetainingCompositeByteBuf(maxNumComponents, byteBufs, bufs);
       } else {

From 52e560381b916449b9efec3348c73ac339521641 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 12 Feb 2024 16:30:32 +0100
Subject: [PATCH 356/570] GH-39996: [Archery] Fix Crossbow build on a PR from a
 fork's main branch (#40002)

Instead of doing an intermediate bare clone, just fix the locally created branch name when fetching.

Amended from PR #39997, which wasn't sufficient (the `git fetch` that works for me doesn't seem to work on GHA).

* Closes: #39996

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .github/workflows/comment_bot.yml |  2 +-
 dev/archery/archery/bot.py        | 30 +++++++++++++++---------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index fc939693c369c..dbcbbff54953c 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -51,7 +51,7 @@ jobs:
           ARROW_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           CROSSBOW_GITHUB_TOKEN: ${{ secrets.CROSSBOW_GITHUB_TOKEN }}
         run: |
-          archery trigger-bot \
+          archery --debug trigger-bot \
             --event-name ${{ github.event_name }} \
             --event-payload ${{ github.event_path }}
 
diff --git a/dev/archery/archery/bot.py b/dev/archery/archery/bot.py
index caab824aeb38f..2b87b9386877f 100644
--- a/dev/archery/archery/bot.py
+++ b/dev/archery/archery/bot.py
@@ -324,8 +324,7 @@ def crossbow(obj, crossbow):
     obj['crossbow_repo'] = crossbow
 
 
-def _clone_arrow_and_crossbow(dest, crossbow_repo, arrow_repo_url,
-                              pr_number, pr_branch):
+def _clone_arrow_and_crossbow(dest, crossbow_repo, arrow_repo_url, pr_number):
     """
     Clone the repositories and initialize crossbow objects.
 
@@ -335,23 +334,25 @@ def _clone_arrow_and_crossbow(dest, crossbow_repo, arrow_repo_url,
         Filesystem path to clone the repositories to.
     crossbow_repo : str
         GitHub repository name, like kszucs/crossbow.
-    pull_request : pygithub.PullRequest
-        Object containing information about the pull request the comment bot
-        was triggered from.
+    arrow_repo_url : str
+        Target Apache Arrow repository's clone URL, such as
+        "https://github.com/apache/arrow.git".
+    pr_number : int
+        Target PR number.
     """
-    bare_arrow_path = dest / 'arrow_bare'
     arrow_path = dest / 'arrow'
     queue_path = dest / 'crossbow'
 
+    # we use unique branch name instead of fork's branch name to avoid
+    # branch name conflict such as 'main' (GH-39996)
+    local_branch = f'archery/pr-{pr_number}'
     # 1. clone arrow and checkout the PR's branch
-    pr_ref = f'pull/{pr_number}/head:{pr_branch}'
-    # we do a bare clone of upstream arrow to avoid issues when the PR is
-    # submitted from a fork's main branch (GH-39996)
-    git.clone('--bare', arrow_repo_url, str(bare_arrow_path))
-    # fetch the PR's branch into the bare clone
-    git.fetch('origin', pr_ref, git_dir=bare_arrow_path)
-    # clone and checkout the PR's branch into a full local repo
-    git.clone(f'--branch={pr_branch}', bare_arrow_path, arrow_path)
+    pr_ref = f'pull/{pr_number}/head:{local_branch}'
+    git.clone('--no-checkout', arrow_repo_url, str(arrow_path))
+    # fetch the PR's branch into the clone
+    git.fetch('origin', pr_ref, git_dir=arrow_path)
+    # checkout the PR's branch into the clone
+    git.checkout(local_branch, git_dir=arrow_path)
 
     # 2. clone crossbow repository
     crossbow_url = 'https://github.com/{}'.format(crossbow_repo)
@@ -391,7 +392,6 @@ def submit(obj, tasks, groups, params, arrow_version, wait):
             crossbow_repo=crossbow_repo,
             arrow_repo_url=pull_request.base.repo.clone_url,
             pr_number=pull_request.number,
-            pr_branch=pull_request.head.ref,
         )
         # load available tasks configuration and groups from yaml
         config = Config.load_yaml(arrow.path / "dev" / "tasks" / "tasks.yml")

From b6313a7511953734778b76489ce26ca0e9a71dd1 Mon Sep 17 00:00:00 2001
From: Xiansen Chen <khn64@163.com>
Date: Tue, 13 Feb 2024 00:10:32 +0800
Subject: [PATCH 357/570] GH-39863: [C++]  Thirdparty: Bump google benchmark to
 1.8.3 (#39878)

## Rationale for this change
Bump benchmark to 1.8.3

## What changes are included in this PR?
Bump benchmark to 1.8.3

## Are these changes tested?
Already has testing

## Are there any user-facing changes?
no

* Closes: #39863

Authored-by: xiansen.chen <xiansen.chen@openpie.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/thirdparty/versions.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index dd3f5da84f777..18bb6c9b6e09c 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -66,8 +66,8 @@ ARROW_CARES_BUILD_VERSION=1.17.2
 ARROW_CARES_BUILD_SHA256_CHECKSUM=4803c844ce20ce510ef0eb83f8ea41fa24ecaae9d280c468c582d2bb25b3913d
 ARROW_CRC32C_BUILD_VERSION=1.1.2
 ARROW_CRC32C_BUILD_SHA256_CHECKSUM=ac07840513072b7fcebda6e821068aa04889018f24e10e46181068fb214d7e56
-ARROW_GBENCHMARK_BUILD_VERSION=v1.7.1
-ARROW_GBENCHMARK_BUILD_SHA256_CHECKSUM=6430e4092653380d9dc4ccb45a1e2dc9259d581f4866dc0759713126056bc1d7
+ARROW_GBENCHMARK_BUILD_VERSION=v1.8.3
+ARROW_GBENCHMARK_BUILD_SHA256_CHECKSUM=6bc180a57d23d4d9515519f92b0c83d61b05b5bab188961f36ac7b06b0d9e9ce
 ARROW_GFLAGS_BUILD_VERSION=v2.2.2
 ARROW_GFLAGS_BUILD_SHA256_CHECKSUM=34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf
 ARROW_GLOG_BUILD_VERSION=v0.5.0

From 135b3640665992457ff39acdedab3a6d19bd00f2 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Mon, 12 Feb 2024 07:50:30 -0900
Subject: [PATCH 358/570] MINOR: [Docs] Tweak language in basic_arrow.rst
 (#40006)

### Rationale for this change

I came across this use of the word "cutely" and thought it might trip people up. I think the author was trying to make a point about CPU cache-friendliness and I don't think cutely is a common enough way to talk about CPU caches to be used here.

### What changes are included in this PR?

It might be more specific to say that it's the data that's in buffers of the chunks that is either in the CPU cache or not but I think the simpler language I went with matches the high-level nature of this document.

### Are these changes tested?

No

### Are there any user-facing changes?

Yes, just to docs.

Authored-by: Bryce Mecum <petridish@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 docs/source/cpp/tutorials/basic_arrow.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/cpp/tutorials/basic_arrow.rst b/docs/source/cpp/tutorials/basic_arrow.rst
index 355976d02a072..0b7afa6130373 100644
--- a/docs/source/cpp/tutorials/basic_arrow.rst
+++ b/docs/source/cpp/tutorials/basic_arrow.rst
@@ -197,7 +197,7 @@ Making a ChunkedArray
 
 Let's say that we want an array made up of sub-arrays, because it
 can be useful for avoiding data copies when concatenating, for parallelizing work, for fitting each chunk
-cutely into cache, or for exceeding the 2,147,483,647 row limit in a
+into cache, or for exceeding the 2,147,483,647 row limit in a
 standard Arrow :class:`Array`. For this, Arrow offers :class:`ChunkedArray`, which can be
 made up of individual Arrow :class:`Arrays <Array>`. In this example, we can reuse the arrays
 we made earlier in part of our chunked array, allowing us to extend them without having to copy

From c23a097965b5c626cbc91b229c76a6c13d36b4e8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Feb 2024 10:02:31 -0800
Subject: [PATCH 359/570] MINOR: [C#] Bump Microsoft.NET.Test.Sdk from 17.8.0
 to 17.9.0 in /csharp (#40049)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [Microsoft.NET.Test.Sdk](https://github.com/microsoft/vstest) from 17.8.0 to 17.9.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/microsoft/vstest/releases">Microsoft.NET.Test.Sdk's releases</a>.</em></p>
<blockquote>
<h2>v17.9.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Remove dependency on Nuget.Frameworks by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4693">microsoft/vstest#4693</a></li>
<li>Add platform guard by <a href="https://github.com/dotMorten"><code>@​dotMorten</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4737">microsoft/vstest#4737</a></li>
<li>Remove javascript in <code>HtmlLogger</code> by <a href="https://github.com/daveMueller"><code>@​daveMueller</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4744">microsoft/vstest#4744</a></li>
<li>Fix CPU is consumed by polling frequently when there is no subscriber by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4735">microsoft/vstest#4735</a></li>
<li>Create a proper MSBuild ToolTask based VSTestTask by <a href="https://github.com/mcartoixa"><code>@​mcartoixa</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/2702">microsoft/vstest#2702</a></li>
<li>Update SDK, copy logger by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4755">microsoft/vstest#4755</a></li>
<li>Fix staircase in logger by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4756">microsoft/vstest#4756</a></li>
<li>Update dump client to latest by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4724">microsoft/vstest#4724</a></li>
<li>[rel/17.9] Disable testhost prestart by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4836">microsoft/vstest#4836</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/jithu7432"><code>@​jithu7432</code></a> made their first contribution in <a href="https://redirect.github.com/microsoft/vstest/pull/4698">microsoft/vstest#4698</a></li>
<li><a href="https://github.com/mcartoixa"><code>@​mcartoixa</code></a> made their first contribution in <a href="https://redirect.github.com/microsoft/vstest/pull/2702">microsoft/vstest#2702</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/microsoft/vstest/compare/v17.8.0...v17.9.0">https://github.com/microsoft/vstest/compare/v17.8.0...v17.9.0</a></p>
<h2>v17.9.0-release-23619-01</h2>
<h2>What's Changed</h2>
<p><strong>Full Changelog</strong>: <a href="https://github.com/microsoft/vstest/compare/v17.9.0-preview-23577-04...v17.9.0-release-23619-01">https://github.com/microsoft/vstest/compare/v17.9.0-preview-23577-04...v17.9.0-release-23619-01</a></p>
<h2>v17.9.0-preview-23577-04</h2>
<h2>What's Changed</h2>
<ul>
<li>Updated VSSDK.BuildTools version by <a href="https://github.com/cvpoienaru"><code>@​cvpoienaru</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4765">#4765</a></li>
<li>Update dump client to latest by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4724">#4724</a></li>
<li>Fix staircase in logger by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4756">#4756</a></li>
<li>Create a proper MSBuild ToolTask based VSTestTask by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/2702">#2702</a></li>
<li>Fix CPU is consumed by polling frequently when there is no subscriber by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a>, <a href="https://github.com/cvpoienaru"><code>@​cvpoienaru</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4735">#4735</a></li>
<li>Remove javascript in <code>HtmlLogger</code> by <a href="https://github.com/daveMueller"><code>@​daveMueller</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4744">#4744</a></li>
<li>Update VSIX metadata by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4550">#4550</a></li>
<li>Add platform guard by <a href="https://github.com/dotMorten"><code>@​dotMorten</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4737">#4737</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/microsoft/vstest/compare/v17.9.0-preview-23531-01...v17.9.0-preview-23577-04">https://github.com/microsoft/vstest/compare/v17.9.0-preview-23531-01...v17.9.0-preview-23577-04</a></p>
<h2>v17.9.0-preview-23531-01</h2>
<h2>What's Changed</h2>
<p><strong>Full Changelog</strong>: <a href="https://github.com/microsoft/vstest/compare/v17.9.0-preview-23503-02...v17.9.0-preview-23531-01">https://github.com/microsoft/vstest/compare/v17.9.0-preview-23503-02...v17.9.0-preview-23531-01</a></p>
<h2>v17.9.0-preview-23503-02</h2>
<h2>What's Changed</h2>
<ul>
<li>Remove dependency on Nuget.Frameworks by <a href="https://github.com/nohwnd"><code>@​nohwnd</code></a> in <a href="https://redirect.github.com/microsoft/vstest/pull/4693">#4693</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/microsoft/vstest/compare/v17.8.0-release-23468-02...v17.9.0-preview-23503-02">https://github.com/microsoft/vstest/compare/v17.8.0-release-23468-02...v17.9.0-preview-23503-02</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/microsoft/vstest/commit/a77b8d5b4aa89504bbff10e2880c27fd55adc55b"><code>a77b8d5</code></a> Update dependencies from <a href="https://dev.azure.com/devdiv/DevDiv/_git/vs-code-cov">https://dev.azure.com/devdiv/DevDiv/_git/vs-code-cov</a>...</li>
<li><a href="https://github.com/microsoft/vstest/commit/60eca4b7f34b04b127a9297037096ca47857425d"><code>60eca4b</code></a> Disable testhost prestart (<a href="https://redirect.github.com/microsoft/vstest/issues/4833">#4833</a>) (<a href="https://redirect.github.com/microsoft/vstest/issues/4836">#4836</a>)</li>
<li><a href="https://github.com/microsoft/vstest/commit/053d7114a72aac12d1382ecc2a23b2dfdd5b084b"><code>053d711</code></a> Replaced obsolete NuGetAuthenticate@ 0 with NuGetAuthenticate@ 1 (<a href="https://redirect.github.com/microsoft/vstest/issues/4800">#4800</a>)</li>
<li><a href="https://github.com/microsoft/vstest/commit/f33b3e4ec550c48607057bf051574c048d3ef7b6"><code>f33b3e4</code></a> Fixed version in tests (<a href="https://redirect.github.com/microsoft/vstest/issues/4790">#4790</a>)</li>
<li><a href="https://github.com/microsoft/vstest/commit/f8d273c3d503a0513e7a93dd44fa06ab4b38ef23"><code>f8d273c</code></a> Changed pre-release label version (<a href="https://redirect.github.com/microsoft/vstest/issues/4786">#4786</a>)</li>
<li><a href="https://github.com/microsoft/vstest/commit/2cda152e0b9c3a4bb474b40e52c646df67f774a4"><code>2cda152</code></a> Update dependencies from <a href="https://dev.azure.com/devdiv/DevDiv/_git/vs-code-cov">https://dev.azure.com/devdiv/DevDiv/_git/vs-code-cov</a>...</li>
<li><a href="https://github.com/microsoft/vstest/commit/0b981467e2bec3b945957f1a0df2cb576ece4b04"><code>0b98146</code></a> Update dependencies from <a href="https://dev.azure.com/devdiv/DevDiv/_git/vs-code-cov">https://dev.azure.com/devdiv/DevDiv/_git/vs-code-cov</a>...</li>
<li><a href="https://github.com/microsoft/vstest/commit/e017f6270cd76203f0290a6253c15ed20a03a3fd"><code>e017f62</code></a> Update dependencies from <a href="https://dev.azure.com/devdiv/DevDiv/_git/vs-code-cov">https://dev.azure.com/devdiv/DevDiv/_git/vs-code-cov</a>...</li>
<li><a href="https://github.com/microsoft/vstest/commit/4572ac35d2bb1c3c8de81eab54cc99ec76f987c2"><code>4572ac3</code></a> Update dependencies from <a href="https://dev.azure.com/devdiv/DevDiv/_git/vs-code-cov">https://dev.azure.com/devdiv/DevDiv/_git/vs-code-cov</a>...</li>
<li><a href="https://github.com/microsoft/vstest/commit/9ba40788aa9f1a794a71e9d3055a246cb62381e9"><code>9ba4078</code></a> [main] Update dependencies from dotnet/arcade (<a href="https://redirect.github.com/microsoft/vstest/issues/4767">#4767</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/microsoft/vstest/compare/v17.8.0...v17.9.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Microsoft.NET.Test.Sdk&package-manager=nuget&previous-version=17.8.0&new-version=17.9.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Apache.Arrow.Compression.Tests.csproj                       | 2 +-
 .../Apache.Arrow.Flight.Sql.Tests.csproj                        | 2 +-
 .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj  | 2 +-
 csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index f5e2a0ef8e16e..dd27e790d9d4f 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -7,7 +7,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
     <PackageReference Include="xunit" Version="2.6.6" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
   </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index b5e7170a8c31d..3d7b415599907 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -6,7 +6,7 @@
     </PropertyGroup>
 
     <ItemGroup>
-      <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
+      <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
       <PackageReference Include="xunit" Version="2.6.6" />
       <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
       <PackageReference Include="coverlet.collector" Version="6.0.0" />
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index a7c52846fd9a4..3038376327e70 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -6,7 +6,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
     <PackageReference Include="xunit" Version="2.6.6" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
     <PackageReference Include="coverlet.collector" Version="6.0.0" />
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index c422da56b4cef..48f5747b790b3 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -14,7 +14,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
     <PackageReference Include="xunit" Version="2.6.6" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6">
       <PrivateAssets>all</PrivateAssets>

From b102465c789b01721b0d9d069ad5d4ef18459879 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 13 Feb 2024 09:14:49 +0900
Subject: [PATCH 360/570] MINOR: [Java] Bump parquet.version from 1.11.0 to
 1.13.1 in /java (#40045)

Bumps `parquet.version` from 1.11.0 to 1.13.1.
Updates `org.apache.parquet:parquet-avro` from 1.11.0 to 1.13.1
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/apache/parquet-mr/blob/master/CHANGES.md">org.apache.parquet:parquet-avro's changelog</a>.</em></p>
<blockquote>
<h3>Version 1.13.1</h3>
<p>Release Notes - Parquet - Version 1.13.1</p>
<h4>Improvement</h4>
<ul>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2276">PARQUET-2276</a> - Bring back support for Hadoop 2.7.3</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2297">PARQUET-2297</a> - Skip delta problem check</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2292">PARQUET-2292</a> - Improve default SpecificRecord model selection for Avro <code>{Write,Read}</code>Support</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2290">PARQUET-2290</a> - Add CI for Hadoop 2</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2282">PARQUET-2282</a> - Don't initialize HadoopCodec</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2283">PARQUET-2283</a> - Remove Hadoop HiddenFileFilter</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2081">PARQUET-2081</a> - Fix support for rewriting files without ColumnIndexes</li>
</ul>
<h3>Version 1.13.0</h3>
<p>Release Notes - Parquet - Version 1.13.0</p>
<h4>New Feature</h4>
<ul>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-1020">PARQUET-1020</a> - Add support for Dynamic Messages in parquet-protobuf</li>
</ul>
<h4>Task</h4>
<ul>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2230">PARQUET-2230</a> - Add a new rewrite command powered by ParquetRewriter</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2228">PARQUET-2228</a> - ParquetRewriter supports more than one input file</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2229">PARQUET-2229</a> - ParquetRewriter supports masking and encrypting the same column</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2227">PARQUET-2227</a> - Refactor different file rewriters to use single implementation</li>
</ul>
<h4>Improvement</h4>
<ul>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2258">PARQUET-2258</a> - Storing toString fields in FilterPredicate instances can lead to memory pressure</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2252">PARQUET-2252</a> - Make some methods public to allow external projects to implement page skipping</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2159">PARQUET-2159</a> - Vectorized BytePacker decoder using Java VectorAPI</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2246">PARQUET-2246</a> - Add short circuit logic to column index filter</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2226">PARQUET-2226</a> - Support merge Bloom Filters</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2224">PARQUET-2224</a> - Publish SBOM artifacts</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2208">PARQUET-2208</a> - Add details to nested column encryption config doc and exception text</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2195">PARQUET-2195</a> - Add scan command to parquet-cli</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2196">PARQUET-2196</a> - Support LZ4_RAW codec</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2176">PARQUET-2176</a> - Column index/statistics truncation in ParquetWriter</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2197">PARQUET-2197</a> - Document uniform encryption</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2191">PARQUET-2191</a> - Upgrade Scala to 2.12.17</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2169">PARQUET-2169</a> - Upgrade Avro to version 1.11.1</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2155">PARQUET-2155</a> - Upgrade protobuf version to 3.17.3</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2158">PARQUET-2158</a> - Upgrade Hadoop dependency to version 3.2.0</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2138">PARQUET-2138</a> - Add ShowBloomFilterCommand to parquet-cli</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2157">PARQUET-2157</a> - Add BloomFilter fpp config</li>
</ul>
<h4>Bug</h4>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/parquet-mr/commit/db4183109d5b734ec5930d870cdae161e408ddba"><code>db41831</code></a> [maven-release-plugin] prepare release apache-parquet-1.13.1-rc0</li>
<li><a href="https://github.com/apache/parquet-mr/commit/e2c2499da64bcb69555bfdfe5c06b6e511f9fe73"><code>e2c2499</code></a> PARQUET-2276: Bring back support for Hadoop 2.7.3 (<a href="https://redirect.github.com/apache/parquet-mr/issues/1084">#1084</a>) (<a href="https://redirect.github.com/apache/parquet-mr/issues/1090">#1090</a>)</li>
<li><a href="https://github.com/apache/parquet-mr/commit/728c1cbf97ef89d1041218240bb90b702911e51d"><code>728c1cb</code></a> PARQUET-2297: Skip delta problem check</li>
<li><a href="https://github.com/apache/parquet-mr/commit/5b62b4350eb59cd7b069d9e70342ec314288eac4"><code>5b62b43</code></a> PARQUET-2292: Improve default SpecificRecord model selection for Avro{Write,R...</li>
<li><a href="https://github.com/apache/parquet-mr/commit/bc5b658265b0e6051e79c959c8b0257e30e3b16d"><code>bc5b658</code></a> PARQUET-2290: Add CI for Hadoop 2 (<a href="https://redirect.github.com/apache/parquet-mr/issues/1076">#1076</a>) (<a href="https://redirect.github.com/apache/parquet-mr/issues/1083">#1083</a>)</li>
<li><a href="https://github.com/apache/parquet-mr/commit/7d74292a7d7f0959cdb8e8b41b723e7eb3182db1"><code>7d74292</code></a> PARQUET-2282: Don't initialize HadoopCodec (<a href="https://redirect.github.com/apache/parquet-mr/issues/1071">#1071</a>) (<a href="https://redirect.github.com/apache/parquet-mr/issues/1074">#1074</a>)</li>
<li><a href="https://github.com/apache/parquet-mr/commit/4f7ced5f34b1705ce9e20a3ee9bb6635f2dbd5c7"><code>4f7ced5</code></a> PARQUET-2283: Remove Hadoop HiddenFileFilter (<a href="https://redirect.github.com/apache/parquet-mr/issues/1072">#1072</a>) (<a href="https://redirect.github.com/apache/parquet-mr/issues/1073">#1073</a>)</li>
<li><a href="https://github.com/apache/parquet-mr/commit/fad89ee39f6a29fb001a433a2f2006a00e39ce8e"><code>fad89ee</code></a> PARQUET-2081: Fix support for rewriting files without ColumnIndexes (<a href="https://redirect.github.com/apache/parquet-mr/issues/1048">#1048</a>) (...</li>
<li><a href="https://github.com/apache/parquet-mr/commit/30a42c3624ceae11aac293d2c59656200f5ffb74"><code>30a42c3</code></a> MINOR: update version of disabled module (<a href="https://redirect.github.com/apache/parquet-mr/issues/1066">#1066</a>)</li>
<li><a href="https://github.com/apache/parquet-mr/commit/54b4501a490af19c91f362e95d52881b6658ad50"><code>54b4501</code></a> Prepare for next development iteration</li>
<li>Additional commits viewable in <a href="https://github.com/apache/parquet-mr/compare/apache-parquet-1.11.0...apache-parquet-1.13.1">compare view</a></li>
</ul>
</details>
<br />

Updates `org.apache.parquet:parquet-hadoop` from 1.11.0 to 1.13.1
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/apache/parquet-mr/blob/master/CHANGES.md">org.apache.parquet:parquet-hadoop's changelog</a>.</em></p>
<blockquote>
<h3>Version 1.13.1</h3>
<p>Release Notes - Parquet - Version 1.13.1</p>
<h4>Improvement</h4>
<ul>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2276">PARQUET-2276</a> - Bring back support for Hadoop 2.7.3</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2297">PARQUET-2297</a> - Skip delta problem check</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2292">PARQUET-2292</a> - Improve default SpecificRecord model selection for Avro <code>{Write,Read}</code>Support</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2290">PARQUET-2290</a> - Add CI for Hadoop 2</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2282">PARQUET-2282</a> - Don't initialize HadoopCodec</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2283">PARQUET-2283</a> - Remove Hadoop HiddenFileFilter</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2081">PARQUET-2081</a> - Fix support for rewriting files without ColumnIndexes</li>
</ul>
<h3>Version 1.13.0</h3>
<p>Release Notes - Parquet - Version 1.13.0</p>
<h4>New Feature</h4>
<ul>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-1020">PARQUET-1020</a> - Add support for Dynamic Messages in parquet-protobuf</li>
</ul>
<h4>Task</h4>
<ul>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2230">PARQUET-2230</a> - Add a new rewrite command powered by ParquetRewriter</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2228">PARQUET-2228</a> - ParquetRewriter supports more than one input file</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2229">PARQUET-2229</a> - ParquetRewriter supports masking and encrypting the same column</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2227">PARQUET-2227</a> - Refactor different file rewriters to use single implementation</li>
</ul>
<h4>Improvement</h4>
<ul>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2258">PARQUET-2258</a> - Storing toString fields in FilterPredicate instances can lead to memory pressure</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2252">PARQUET-2252</a> - Make some methods public to allow external projects to implement page skipping</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2159">PARQUET-2159</a> - Vectorized BytePacker decoder using Java VectorAPI</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2246">PARQUET-2246</a> - Add short circuit logic to column index filter</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2226">PARQUET-2226</a> - Support merge Bloom Filters</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2224">PARQUET-2224</a> - Publish SBOM artifacts</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2208">PARQUET-2208</a> - Add details to nested column encryption config doc and exception text</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2195">PARQUET-2195</a> - Add scan command to parquet-cli</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2196">PARQUET-2196</a> - Support LZ4_RAW codec</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2176">PARQUET-2176</a> - Column index/statistics truncation in ParquetWriter</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2197">PARQUET-2197</a> - Document uniform encryption</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2191">PARQUET-2191</a> - Upgrade Scala to 2.12.17</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2169">PARQUET-2169</a> - Upgrade Avro to version 1.11.1</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2155">PARQUET-2155</a> - Upgrade protobuf version to 3.17.3</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2158">PARQUET-2158</a> - Upgrade Hadoop dependency to version 3.2.0</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2138">PARQUET-2138</a> - Add ShowBloomFilterCommand to parquet-cli</li>
<li><a href="https://issues.apache.org/jira/browse/PARQUET-2157">PARQUET-2157</a> - Add BloomFilter fpp config</li>
</ul>
<h4>Bug</h4>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/parquet-mr/commit/db4183109d5b734ec5930d870cdae161e408ddba"><code>db41831</code></a> [maven-release-plugin] prepare release apache-parquet-1.13.1-rc0</li>
<li><a href="https://github.com/apache/parquet-mr/commit/e2c2499da64bcb69555bfdfe5c06b6e511f9fe73"><code>e2c2499</code></a> PARQUET-2276: Bring back support for Hadoop 2.7.3 (<a href="https://redirect.github.com/apache/parquet-mr/issues/1084">#1084</a>) (<a href="https://redirect.github.com/apache/parquet-mr/issues/1090">#1090</a>)</li>
<li><a href="https://github.com/apache/parquet-mr/commit/728c1cbf97ef89d1041218240bb90b702911e51d"><code>728c1cb</code></a> PARQUET-2297: Skip delta problem check</li>
<li><a href="https://github.com/apache/parquet-mr/commit/5b62b4350eb59cd7b069d9e70342ec314288eac4"><code>5b62b43</code></a> PARQUET-2292: Improve default SpecificRecord model selection for Avro{Write,R...</li>
<li><a href="https://github.com/apache/parquet-mr/commit/bc5b658265b0e6051e79c959c8b0257e30e3b16d"><code>bc5b658</code></a> PARQUET-2290: Add CI for Hadoop 2 (<a href="https://redirect.github.com/apache/parquet-mr/issues/1076">#1076</a>) (<a href="https://redirect.github.com/apache/parquet-mr/issues/1083">#1083</a>)</li>
<li><a href="https://github.com/apache/parquet-mr/commit/7d74292a7d7f0959cdb8e8b41b723e7eb3182db1"><code>7d74292</code></a> PARQUET-2282: Don't initialize HadoopCodec (<a href="https://redirect.github.com/apache/parquet-mr/issues/1071">#1071</a>) (<a href="https://redirect.github.com/apache/parquet-mr/issues/1074">#1074</a>)</li>
<li><a href="https://github.com/apache/parquet-mr/commit/4f7ced5f34b1705ce9e20a3ee9bb6635f2dbd5c7"><code>4f7ced5</code></a> PARQUET-2283: Remove Hadoop HiddenFileFilter (<a href="https://redirect.github.com/apache/parquet-mr/issues/1072">#1072</a>) (<a href="https://redirect.github.com/apache/parquet-mr/issues/1073">#1073</a>)</li>
<li><a href="https://github.com/apache/parquet-mr/commit/fad89ee39f6a29fb001a433a2f2006a00e39ce8e"><code>fad89ee</code></a> PARQUET-2081: Fix support for rewriting files without ColumnIndexes (<a href="https://redirect.github.com/apache/parquet-mr/issues/1048">#1048</a>) (...</li>
<li><a href="https://github.com/apache/parquet-mr/commit/30a42c3624ceae11aac293d2c59656200f5ffb74"><code>30a42c3</code></a> MINOR: update version of disabled module (<a href="https://redirect.github.com/apache/parquet-mr/issues/1066">#1066</a>)</li>
<li><a href="https://github.com/apache/parquet-mr/commit/54b4501a490af19c91f362e95d52881b6658ad50"><code>54b4501</code></a> Prepare for next development iteration</li>
<li>Additional commits viewable in <a href="https://github.com/apache/parquet-mr/compare/apache-parquet-1.11.0...apache-parquet-1.13.1">compare view</a></li>
</ul>
</details>
<br />

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/dataset/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index 8723fafa8dadd..4c302ea59dbe3 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -26,7 +26,7 @@
     <properties>
         <arrow.cpp.build.dir>../../../cpp/release-build/</arrow.cpp.build.dir>
         <protobuf.version>2.5.0</protobuf.version>
-        <parquet.version>1.11.0</parquet.version>
+        <parquet.version>1.13.1</parquet.version>
         <avro.version>1.11.3</avro.version>
     </properties>
 

From c87d47c7a17a54c5de25a0289447124c77a037da Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 13 Feb 2024 09:22:50 +0900
Subject: [PATCH 361/570] MINOR: [Java] Bump junit:junit from 4.13.1 to 4.13.2
 in /java (#40048)

Bumps [junit:junit](https://github.com/junit-team/junit4) from 4.13.1 to 4.13.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/junit-team/junit4/releases">junit:junit's releases</a>.</em></p>
<blockquote>
<h2>JUnit 4.13.2</h2>
<p>Please refer to the <a href="https://github.com/junit-team/junit/blob/HEAD/doc/ReleaseNotes4.13.2.md">release notes</a> for details.</p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/junit-team/junit4/commit/05fe2a64f59127c02135be22f416e91260d6ede6"><code>05fe2a6</code></a> [maven-release-plugin] prepare release r4.13.2</li>
<li><a href="https://github.com/junit-team/junit4/commit/ff57344f7171ea8b0935c4f842cacf1097266592"><code>ff57344</code></a> Add build for JDK 17-ea</li>
<li><a href="https://github.com/junit-team/junit4/commit/02aaa01b8f74c0eb496d76685ec49fddeb311087"><code>02aaa01</code></a> Improve check that thread is stopped</li>
<li><a href="https://github.com/junit-team/junit4/commit/e9a75f4be71a4d5a794ccd063522eea4b0f3194f"><code>e9a75f4</code></a> Merge test for exception type and message</li>
<li><a href="https://github.com/junit-team/junit4/commit/d27ad5259228e84c235dac24cd62f974ac0a8e1d"><code>d27ad52</code></a> Rename DelegateStatement to DelegatingStatement</li>
<li><a href="https://github.com/junit-team/junit4/commit/b83dc2e8c4ff06cf233cd347f1280bb417482985"><code>b83dc2e</code></a> Better name for test that stops statement</li>
<li><a href="https://github.com/junit-team/junit4/commit/527f3a3d0d71ad6dc66ede6f68f6fc316904ed2a"><code>527f3a3</code></a> Replace InfiniteLoop with RunForASecond</li>
<li><a href="https://github.com/junit-team/junit4/commit/2db63942882d91020b46d7333285e5c94f1d1e52"><code>2db6394</code></a> Tidy up FailOnTimeoutTest</li>
<li><a href="https://github.com/junit-team/junit4/commit/64634e1c3e357251a84278c26b73b04fc3450ea3"><code>64634e1</code></a> Update 4.13.2 release notes to document pull 1654</li>
<li><a href="https://github.com/junit-team/junit4/commit/f8ee412316b1a94d3dc35498359cc2f0ca273216"><code>f8ee412</code></a> Fix serialization of AssumptionViolatedException (<a href="https://redirect.github.com/junit-team/junit4/issues/1654">#1654</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/junit-team/junit4/compare/r4.13.1...r4.13.2">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=junit:junit&package-manager=maven&previous-version=4.13.1&new-version=4.13.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 6442987f5a192..5194a7d3e35ca 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -750,7 +750,7 @@
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <version>4.13.1</version>
+      <version>4.13.2</version>
       <scope>test</scope>
     </dependency>
     <dependency>

From 7f9ce911b787c8aa46800f42adfc4eebd3b660c5 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 13 Feb 2024 09:29:50 +0900
Subject: [PATCH 362/570] GH-40029: [Packaging][Ubuntu] Drop support for Ubuntu
 23.10 Mantic Minotaur (#40030)

### Rationale for this change

We already have Ubuntu 24.04 LTS support. So Ubuntu 23.10 non-LTS support is needless.

### What changes are included in this PR?

Remove Ubuntu 23.10 Mantic Minotaur related codes.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* Closes: #40029

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/release/binary-task.rb                    |  3 -
 dev/release/verify-release-candidate.sh       |  2 -
 .../apt/ubuntu-mantic/Dockerfile              | 41 ---------
 .../apache-arrow/apt/ubuntu-mantic-arm64/from | 18 ----
 .../apache-arrow/apt/ubuntu-mantic/Dockerfile | 85 -------------------
 dev/tasks/linux-packages/package-task.rb      |  2 -
 dev/tasks/tasks.yml                           |  1 -
 7 files changed, 152 deletions(-)
 delete mode 100644 dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-mantic/Dockerfile
 delete mode 100644 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-mantic-arm64/from
 delete mode 100644 dev/tasks/linux-packages/apache-arrow/apt/ubuntu-mantic/Dockerfile

diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index 0c1b98ab32c95..8fcdcf1f5f442 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -1088,7 +1088,6 @@ def available_apt_targets
       ["debian", "trixie", "main"],
       ["ubuntu", "focal", "main"],
       ["ubuntu", "jammy", "main"],
-      ["ubuntu", "mantic", "main"],
       ["ubuntu", "noble", "main"],
     ]
   end
@@ -2122,8 +2121,6 @@ def apt_test_targets_default
       # "ubuntu-focal-arm64",
       "ubuntu-jammy",
       # "ubuntu-jammy-arm64",
-      "ubuntu-mantic",
-      # "ubuntu-mantic-arm64",
       "ubuntu-noble",
       # "ubuntu-noble-arm64",
     ]
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index a61b5ba094c8a..08b3feac13e2f 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -195,8 +195,6 @@ test_apt() {
                 "arm64v8/ubuntu:focal" \
                 "ubuntu:jammy" \
                 "arm64v8/ubuntu:jammy" \
-                "ubuntu:mantic" \
-                "arm64v8/ubuntu:mantic" \
                 "ubuntu:noble" \
                 "arm64v8/ubuntu:noble"; do \
     case "${target}" in
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-mantic/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-mantic/Dockerfile
deleted file mode 100644
index b5a61282b30fc..0000000000000
--- a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/ubuntu-mantic/Dockerfile
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-FROM ubuntu:mantic
-
-RUN \
-  echo "debconf debconf/frontend select Noninteractive" | \
-    debconf-set-selections
-
-RUN \
-  echo 'APT::Install-Recommends "false";' > \
-    /etc/apt/apt.conf.d/disable-install-recommends
-
-ARG DEBUG
-
-RUN \
-  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
-  apt update ${quiet} && \
-  apt install -y -V ${quiet} \
-    build-essential \
-    debhelper \
-    devscripts \
-    fakeroot \
-    gnupg \
-    lsb-release && \
-  apt clean && \
-  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-mantic-arm64/from b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-mantic-arm64/from
deleted file mode 100644
index 247faef234794..0000000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-mantic-arm64/from
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-arm64v8/ubuntu:mantic
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-mantic/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-mantic/Dockerfile
deleted file mode 100644
index 9e90e08d26513..0000000000000
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-mantic/Dockerfile
+++ /dev/null
@@ -1,85 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ARG FROM=ubuntu:mantic
-FROM ${FROM}
-
-RUN \
-  echo "debconf debconf/frontend select Noninteractive" | \
-    debconf-set-selections
-
-RUN \
-  echo 'APT::Install-Recommends "false";' > \
-    /etc/apt/apt.conf.d/disable-install-recommends
-
-ARG DEBUG
-RUN \
-  quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \
-  apt update ${quiet} && \
-  apt install -y -V ${quiet} \
-    build-essential \
-    clang \
-    clang-tools \
-    cmake \
-    debhelper \
-    devscripts \
-    git \
-    gtk-doc-tools \
-    libboost-filesystem-dev \
-    libboost-system-dev \
-    libbrotli-dev \
-    libbz2-dev \
-    libc-ares-dev \
-    libcurl4-openssl-dev \
-    libgirepository1.0-dev \
-    libglib2.0-doc \
-    libgmock-dev \
-    libgoogle-glog-dev \
-    libgrpc++-dev \
-    libgtest-dev \
-    liblz4-dev \
-    libmlir-15-dev \
-    libprotobuf-dev \
-    libprotoc-dev \
-    libre2-dev \
-    libsnappy-dev \
-    libssl-dev \
-    libthrift-dev \
-    libutf8proc-dev \
-    libzstd-dev \
-    llvm-dev \
-    lsb-release \
-    meson \
-    mlir-15-tools \
-    ninja-build \
-    nlohmann-json3-dev \
-    pkg-config \
-    protobuf-compiler-grpc \
-    python3-dev \
-    python3-pip \
-    python3-setuptools \
-    rapidjson-dev \
-    tzdata \
-    valac \
-    zlib1g-dev && \
-  if apt list | grep -q '^libcuda1'; then \
-    apt install -y -V ${quiet} nvidia-cuda-toolkit; \
-  else \
-    :; \
-  fi && \
-  apt clean && \
-  rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/package-task.rb b/dev/tasks/linux-packages/package-task.rb
index 51fe0b9a75b0c..3a9e5e48b4585 100644
--- a/dev/tasks/linux-packages/package-task.rb
+++ b/dev/tasks/linux-packages/package-task.rb
@@ -277,8 +277,6 @@ def apt_targets_default
       # "ubuntu-focal-arm64",
       "ubuntu-jammy",
       # "ubuntu-jammy-arm64",
-      "ubuntu-mantic",
-      # "ubuntu-mantic-arm64",
       "ubuntu-noble",
       # "ubuntu-noble-arm64",
     ]
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 8a32724b153fe..c2321453052dc 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -465,7 +465,6 @@ tasks:
                   "debian-trixie",
                   "ubuntu-focal",
                   "ubuntu-jammy",
-                  "ubuntu-mantic",
                   "ubuntu-noble"] %}
   {% for architecture in ["amd64", "arm64"] %}
   {{ target }}-{{ architecture }}:

From 3486dec0e703f582084009f4b9a9a297e00137d1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 13 Feb 2024 10:11:11 +0900
Subject: [PATCH 363/570] MINOR: [Java] Bump jmh.version from 1.21 to 1.37 in
 /java (#40044)

Bumps `jmh.version` from 1.21 to 1.37.
Updates `org.openjdk.jmh:jmh-core` from 1.21 to 1.37
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/openjdk/jmh/commit/2effa2c8310e1d3ad03c8ee02024edca9252b46a"><code>2effa2c</code></a> JMH v1.37.</li>
<li><a href="https://github.com/openjdk/jmh/commit/09c78d5d0752ffc409e64ca4cabe9dc7b96704d7"><code>09c78d5</code></a> 7903508: JMH: Remove the Unicode dot prefix from secondary results</li>
<li><a href="https://github.com/openjdk/jmh/commit/843f64123bc25b0402e78b863999dd7c69adb309"><code>843f641</code></a> 7903510: JMH: Add core performance checking tests</li>
<li><a href="https://github.com/openjdk/jmh/commit/8bc325b2de5728077a62e9e32ea3ff15189636f8"><code>8bc325b</code></a> 7903511: JMH: Add score stability performance tests</li>
<li><a href="https://github.com/openjdk/jmh/commit/6b09724579b8ae58554e15a1bc5badb543da5bd2"><code>6b09724</code></a> 7903450: JMH: Improve -prof perfnorm accuracy with robust estimations</li>
<li><a href="https://github.com/openjdk/jmh/commit/d88f901b2a50539e276aa409b5d7ce1eb3e1bfc9"><code>d88f901</code></a> 7903504: JMH: Fix new Sonar warnings</li>
<li><a href="https://github.com/openjdk/jmh/commit/47f651b72d05c2c335f8ced5ed33f2fb0dd26720"><code>47f651b</code></a> 7903498: JMH: Reset worker interrupt status after iteration</li>
<li><a href="https://github.com/openjdk/jmh/commit/482561a2be24e47f1c3a855b3ce69f56130ec57e"><code>482561a</code></a> 7903492: JMH: Infrastructure code should yield occasionally for virtual execu...</li>
<li><a href="https://github.com/openjdk/jmh/commit/9a9755714746f76bfaaa067e777457ce6ef64bf1"><code>9a97557</code></a> 7903490: JMH: The interrupt to time-outing benchmark can be delivered to work...</li>
<li><a href="https://github.com/openjdk/jmh/commit/bf8db38250af9435a13dde822df22c3aee6dc2bb"><code>bf8db38</code></a> 7903487: JMH: Make sure JMH profilers work on all tested configurations</li>
<li>Additional commits viewable in <a href="https://github.com/openjdk/jmh/compare/1.21...1.37">compare view</a></li>
</ul>
</details>
<br />

Updates `org.openjdk.jmh:jmh-generator-annprocess` from 1.21 to 1.37
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/openjdk/jmh/commit/2effa2c8310e1d3ad03c8ee02024edca9252b46a"><code>2effa2c</code></a> JMH v1.37.</li>
<li><a href="https://github.com/openjdk/jmh/commit/09c78d5d0752ffc409e64ca4cabe9dc7b96704d7"><code>09c78d5</code></a> 7903508: JMH: Remove the Unicode dot prefix from secondary results</li>
<li><a href="https://github.com/openjdk/jmh/commit/843f64123bc25b0402e78b863999dd7c69adb309"><code>843f641</code></a> 7903510: JMH: Add core performance checking tests</li>
<li><a href="https://github.com/openjdk/jmh/commit/8bc325b2de5728077a62e9e32ea3ff15189636f8"><code>8bc325b</code></a> 7903511: JMH: Add score stability performance tests</li>
<li><a href="https://github.com/openjdk/jmh/commit/6b09724579b8ae58554e15a1bc5badb543da5bd2"><code>6b09724</code></a> 7903450: JMH: Improve -prof perfnorm accuracy with robust estimations</li>
<li><a href="https://github.com/openjdk/jmh/commit/d88f901b2a50539e276aa409b5d7ce1eb3e1bfc9"><code>d88f901</code></a> 7903504: JMH: Fix new Sonar warnings</li>
<li><a href="https://github.com/openjdk/jmh/commit/47f651b72d05c2c335f8ced5ed33f2fb0dd26720"><code>47f651b</code></a> 7903498: JMH: Reset worker interrupt status after iteration</li>
<li><a href="https://github.com/openjdk/jmh/commit/482561a2be24e47f1c3a855b3ce69f56130ec57e"><code>482561a</code></a> 7903492: JMH: Infrastructure code should yield occasionally for virtual execu...</li>
<li><a href="https://github.com/openjdk/jmh/commit/9a9755714746f76bfaaa067e777457ce6ef64bf1"><code>9a97557</code></a> 7903490: JMH: The interrupt to time-outing benchmark can be delivered to work...</li>
<li><a href="https://github.com/openjdk/jmh/commit/bf8db38250af9435a13dde822df22c3aee6dc2bb"><code>bf8db38</code></a> 7903487: JMH: Make sure JMH profilers work on all tested configurations</li>
<li>Additional commits viewable in <a href="https://github.com/openjdk/jmh/compare/1.21...1.37">compare view</a></li>
</ul>
</details>
<br />

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/performance/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index ba5a6616dca77..d572876e724a5 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -81,7 +81,7 @@
 
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-        <jmh.version>1.21</jmh.version>
+        <jmh.version>1.37</jmh.version>
         <javac.target>1.8</javac.target>
         <uberjar.name>benchmarks</uberjar.name>
         <skip.perf.benchmarks>true</skip.perf.benchmarks>

From bbe59b35de33a0534fc76c9617aa4746031ce16c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 13 Feb 2024 16:18:32 +0900
Subject: [PATCH 364/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-failsafe-plugin from 3.0.0-M7 to 3.2.5 in
 /java (#40047)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.apache.maven.plugins:maven-failsafe-plugin](https://github.com/apache/maven-surefire) from 3.0.0-M7 to 3.2.5.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/maven-surefire/releases">org.apache.maven.plugins:maven-failsafe-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.2.5</h2>
<p><a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12317927&amp;version=12354100">JIRA link</a></p>
<h1>Release Notes - Maven Surefire - Version 3.2.5</h1>

<hr />
<h2>What's Changed</h2>
<ul>
<li>Bump org.htmlunit:htmlunit from 3.8.0 to 3.9.0 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/695">apache/maven-surefire#695</a></li>
<li>Bump org.fusesource.jansi:jansi from 2.4.0 to 2.4.1 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/684">apache/maven-surefire#684</a></li>
<li>Bump doxiaVersion from 1.11.1 to 1.12.0 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/609">apache/maven-surefire#609</a></li>
<li>[SUREFIRE-2221] Document minimum supported Java version for Toolchains by <a href="https://github.com/sbernard31"><code>@​sbernard31</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/701">apache/maven-surefire#701</a></li>
<li>[SUREFIRE-2224] StatelessXmlReporter#getTestProblems() does not properly reflect report schema structure  by <a href="https://github.com/michael-o"><code>@​michael-o</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/702">apache/maven-surefire#702</a></li>
<li>[SUREFIRE-2223] Surefire evaluates parameter jvm before skip by <a href="https://github.com/michael-o"><code>@​michael-o</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/703">apache/maven-surefire#703</a></li>
<li>Use uppercase convention for enum member names by <a href="https://github.com/michael-o"><code>@​michael-o</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/704">apache/maven-surefire#704</a></li>
<li>[SUREFIRE-2225] Surefire ITs fail when project directory contains space by <a href="https://github.com/michael-o"><code>@​michael-o</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/705">apache/maven-surefire#705</a></li>
<li>Run CI tests also with Java 21 by <a href="https://github.com/slachiewicz"><code>@​slachiewicz</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/707">apache/maven-surefire#707</a></li>
<li>Bump org.apache.maven.wagon:wagon-http-lightweight from 3.5.1 to 3.5.3 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/699">apache/maven-surefire#699</a></li>
<li>Bump org.htmlunit:htmlunit from 3.8.0 to 3.9.0 in /maven-failsafe-plugin/src/it/jetty-war-test-failing by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/694">apache/maven-surefire#694</a></li>
<li>Bump org.htmlunit:htmlunit from 3.8.0 to 3.9.0 in /maven-failsafe-plugin/src/it/jetty-war-test-passing by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/693">apache/maven-surefire#693</a></li>
<li>Bump commons-io:commons-io from 2.15.0 to 2.15.1 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/712">apache/maven-surefire#712</a></li>
<li>Bump net.java.dev.javacc:javacc from 7.0.12 to 7.0.13 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/711">apache/maven-surefire#711</a></li>
<li>Bump org.apache.maven.plugins:maven-docck-plugin from 1.1 to 1.2 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/713">apache/maven-surefire#713</a></li>
<li>[SUREFIRE-2231] JaCoCo 0.8.11 fails with old TestNG releases on Java 17+ by <a href="https://github.com/michael-o"><code>@​michael-o</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/710">apache/maven-surefire#710</a></li>
<li>Bump org.assertj:assertj-core from 3.24.2 to 3.25.1 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/714">apache/maven-surefire#714</a></li>
<li>Bump org.codehaus.plexus:plexus-component-metadata from 2.1.1 to 2.2.0 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/apache/maven-surefire/pull/715">apache/maven-surefire#715</a></li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-surefire/commit/4b3a2719d80613f2ed304fc34144fed81c3043cd"><code>4b3a271</code></a> [maven-release-plugin] prepare release surefire-3.2.5</li>
<li><a href="https://github.com/apache/maven-surefire/commit/eb3f1d946fbb9d1dbaba2fc7113f408f15a60f62"><code>eb3f1d9</code></a> Bump org.codehaus.plexus:plexus-component-metadata from 2.1.1 to 2.2.0</li>
<li><a href="https://github.com/apache/maven-surefire/commit/430c406756df4e6bfad462426544d71a0d5e5867"><code>430c406</code></a> Bump org.assertj:assertj-core from 3.24.2 to 3.25.1</li>
<li><a href="https://github.com/apache/maven-surefire/commit/2d92f2d422f07e75c188312cd2371127508a8e10"><code>2d92f2d</code></a> [SUREFIRE-2231] JaCoCo 0.8.11 fails with old TestNG releases on Java 17+</li>
<li><a href="https://github.com/apache/maven-surefire/commit/3290740be8d81351331d5cd320e56346713ed2c2"><code>3290740</code></a> Bump org.apache.maven.plugins:maven-docck-plugin from 1.1 to 1.2</li>
<li><a href="https://github.com/apache/maven-surefire/commit/25a9776c0e1d0c445a1ef5dbcb7ee27483bb029d"><code>25a9776</code></a> Bump net.java.dev.javacc:javacc from 7.0.12 to 7.0.13</li>
<li><a href="https://github.com/apache/maven-surefire/commit/7752f7e62bf6710616e231c9a0cf6cd7e574416f"><code>7752f7e</code></a> Bump commons-io:commons-io from 2.15.0 to 2.15.1</li>
<li><a href="https://github.com/apache/maven-surefire/commit/8874add5bb1b32e65eb5022f02d7bb01add9a3a8"><code>8874add</code></a> Revert &quot;Bump jacocoVersion from 0.8.8 to 0.8.11&quot;</li>
<li><a href="https://github.com/apache/maven-surefire/commit/c0f775569b4ddd603a1c5c96e16abc78aa794173"><code>c0f7755</code></a> Fix formatting</li>
<li><a href="https://github.com/apache/maven-surefire/commit/e5f45452728fe78753e713b15ce4743634db01a2"><code>e5f4545</code></a> Bump jacocoVersion from 0.8.8 to 0.8.11</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-surefire/compare/surefire-3.0.0-M7...surefire-3.2.5">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-failsafe-plugin&package-manager=maven&previous-version=3.0.0-M7&new-version=3.2.5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 5194a7d3e35ca..accff4c9b9f69 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -484,7 +484,7 @@
         </plugin>
         <plugin>
           <artifactId>maven-failsafe-plugin</artifactId>
-          <version>3.0.0-M7</version>
+          <version>3.2.5</version>
           <configuration>
             <systemPropertyVariables>
               <java.io.tmpdir>${project.build.directory}</java.io.tmpdir>

From 0dbbd43ca9133912d1809394727784560cc5e797 Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Tue, 13 Feb 2024 18:15:10 +0000
Subject: [PATCH 365/570] GH-40052: [C++][FS][Azure] Fix CreateDir and
 DeleteDir trailing slash issues on hierarchical namespace accounts (#40054)

### Rationale for this change
There were the following failure cases
```
fs->CreateDir("directory/")
```
```
fs->DeleteDir("directory/")
```
They fail with
```
Failed to delete a directory: directory/: https://tomtesthns.blob.core.windows.net/ea119933-c9d3-11ee-989a-71cec6336ac8/directory/ Azure Error: [InvalidUri] 400 The request URI is invalid.
The request URI is invalid.
RequestId:c9ad826a-101f-0005-5be0-5d0db4000000
Time:2024-02-12T18:24:12.9974541Z
Request ID: c9ad826a-101f-0005-5be0-5d0db4000000
```

### What changes are included in this PR?
Add tests to cover these cases.
Remove trailing slashes to avoid these issues.

### Are these changes tested?
Yes. I added new test cases to cover these cases. I was rather torn about whether to add precise tests like I've done or to duplicate every test case we had and run it a second time with trailing slashes.

### Are there any user-facing changes?
Fixed bug on `CreateDir` and `DeleteDir`.

* Closes: #40052

Authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      |  6 ++-
 cpp/src/arrow/filesystem/azurefs_test.cc | 64 ++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index d4bb445701444..11750591932e9 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -1635,7 +1635,8 @@ class AzureFileSystem::Impl {
     return CreateDirTemplate(
         adlfs_client,
         [](const auto& adlfs_client, const auto& location) {
-          auto directory_client = adlfs_client.GetDirectoryClient(location.path);
+          auto directory_client = adlfs_client.GetDirectoryClient(
+              std::string(internal::RemoveTrailingSlash(location.path)));
           directory_client.CreateIfNotExists();
         },
         location, recursive);
@@ -1860,7 +1861,8 @@ class AzureFileSystem::Impl {
                                Azure::Nullable<std::string> lease_id = {}) {
     DCHECK(!location.container.empty());
     DCHECK(!location.path.empty());
-    auto directory_client = adlfs_client.GetDirectoryClient(location.path);
+    auto directory_client = adlfs_client.GetDirectoryClient(
+        std::string(internal::RemoveTrailingSlash(location.path)));
     DataLake::DeleteDirectoryOptions options;
     options.AccessConditions.LeaseId = std::move(lease_id);
     try {
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index c39a5b7d22bdd..42f38f1ed6ac7 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -698,6 +698,14 @@ class TestAzureFileSystem : public ::testing::Test {
     AssertFileInfo(fs(), dir1, FileType::Directory);
   }
 
+  void TestCreateDirOnRootWithTrailingSlash() {
+    auto dir1 = PreexistingData::RandomContainerName(rng_) + "/";
+
+    AssertFileInfo(fs(), dir1, FileType::NotFound);
+    ASSERT_OK(fs()->CreateDir(dir1, false));
+    AssertFileInfo(fs(), dir1, FileType::Directory);
+  }
+
   void TestCreateDirOnExistingContainer() {
     auto data = SetUpPreexistingData();
     auto dir1 = data.RandomDirectoryPath(rng_);
@@ -758,6 +766,15 @@ class TestAzureFileSystem : public ::testing::Test {
     AssertFileInfo(fs(), subdir5, FileType::Directory);
   }
 
+  void TestCreateDirOnExistingContainerWithTrailingSlash() {
+    auto data = SetUpPreexistingData();
+    auto dir1 = data.RandomDirectoryPath(rng_) + "/";
+
+    AssertFileInfo(fs(), dir1, FileType::NotFound);
+    ASSERT_OK(fs()->CreateDir(dir1, /*recursive=*/false));
+    AssertFileInfo(fs(), dir1, FileType::Directory);
+  }
+
   void TestCreateDirOnMissingContainer() {
     auto container1 = PreexistingData::RandomContainerName(rng_);
     auto container2 = PreexistingData::RandomContainerName(rng_);
@@ -844,6 +861,21 @@ class TestAzureFileSystem : public ::testing::Test {
     AssertFileInfo(fs(), blob_path, FileType::NotFound);
   }
 
+  void TestNonEmptyDirWithTrailingSlash() {
+    if (HasSubmitBatchBug()) {
+      GTEST_SKIP() << kSubmitBatchBugMessage;
+    }
+    auto data = SetUpPreexistingData();
+    const auto directory_path = data.RandomDirectoryPath(rng_);
+    const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt");
+    ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(blob_path));
+    ASSERT_OK(output->Write("hello"));
+    ASSERT_OK(output->Close());
+    AssertFileInfo(fs(), blob_path, FileType::File);
+    ASSERT_OK(fs()->DeleteDir(directory_path + "/"));
+    AssertFileInfo(fs(), blob_path, FileType::NotFound);
+  }
+
   void TestDeleteDirSuccessHaveDirectory() {
     if (HasSubmitBatchBug()) {
       GTEST_SKIP() << kSubmitBatchBugMessage;
@@ -873,6 +905,20 @@ class TestAzureFileSystem : public ::testing::Test {
     }
   }
 
+  void TestDeleteDirContentsSuccessExistWithTrailingSlash() {
+    if (HasSubmitBatchBug()) {
+      GTEST_SKIP() << kSubmitBatchBugMessage;
+    }
+    auto preexisting_data = SetUpPreexistingData();
+    HierarchicalPaths paths;
+    CreateHierarchicalData(&paths);
+    ASSERT_OK(fs()->DeleteDirContents(paths.directory + "/"));
+    AssertFileInfo(fs(), paths.directory, FileType::Directory);
+    for (const auto& sub_path : paths.sub_paths) {
+      AssertFileInfo(fs(), sub_path, FileType::NotFound);
+    }
+  }
+
   void TestDeleteDirContentsSuccessNonexistent() {
     if (HasSubmitBatchBug()) {
       GTEST_SKIP() << kSubmitBatchBugMessage;
@@ -1466,6 +1512,10 @@ TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirWithEmptyPath) {
 
 TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirOnRoot) { this->TestCreateDirOnRoot(); }
 
+TYPED_TEST(TestAzureFileSystemOnAllEnvs, CreateDirOnRootWithTrailingSlash) {
+  this->TestCreateDirOnRootWithTrailingSlash();
+}
+
 // Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS)
 // combined with the two scenarios for AzureFileSystem::cached_hns_support_ -- unknown and
 // known according to the environment.
@@ -1496,6 +1546,11 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirOnExistingContainer) {
   this->TestCreateDirOnExistingContainer();
 }
 
+TYPED_TEST(TestAzureFileSystemOnAllScenarios,
+           CreateDirOnExistingContainerWithTrailingSlash) {
+  this->TestCreateDirOnExistingContainerWithTrailingSlash();
+}
+
 TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirOnMissingContainer) {
   this->TestCreateDirOnMissingContainer();
 }
@@ -1512,6 +1567,10 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessHaveBlob) {
   this->TestDeleteDirSuccessHaveBlob();
 }
 
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, NonEmptyDirWithTrailingSlash) {
+  this->TestNonEmptyDirWithTrailingSlash();
+}
+
 TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessHaveDirectory) {
   this->TestDeleteDirSuccessHaveDirectory();
 }
@@ -1520,6 +1579,11 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsSuccessExist) {
   this->TestDeleteDirContentsSuccessExist();
 }
 
+TYPED_TEST(TestAzureFileSystemOnAllScenarios,
+           DeleteDirContentsSuccessExistWithTrailingSlash) {
+  this->TestDeleteDirContentsSuccessExistWithTrailingSlash();
+}
+
 TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsSuccessNonexistent) {
   this->TestDeleteDirContentsSuccessNonexistent();
 }

From 967831b49d8ffeb9499c22aa3a812e46dc5cb1aa Mon Sep 17 00:00:00 2001
From: Yue <niyue.com@gmail.com>
Date: Wed, 14 Feb 2024 16:40:56 +0800
Subject: [PATCH 366/570] GH-40040: [C++][Gandiva] Make Gandiva's default cache
 size to be 5000 for object code cache (#40041)

### Rationale for this change
Gandiva's default cache is object code cache, however, the default cache size is still the old value for LLVM module based cache, which is too small.

More details about the `GANDIVA_ENABLE_OBJECT_CODE_CACHE` flag can be found in GH-40040

### What changes are included in this PR?
Remove the unused `GANDIVA_ENABLE_OBJECT_CODE_CACHE` flag and make the default cache size to be `500000` for object code cache.

### Are these changes tested?
No

### Are there any user-facing changes?
Yes, default cache size will be changed from 500 to 500000, and it may help the default deployment's performance.
* Closes: #40040

Authored-by: Yue Ni <niyue.com@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/gandiva/CMakeLists.txt |  1 +
 cpp/src/gandiva/cache.cc       |  6 +----
 cpp/src/gandiva/cache_test.cc  | 42 ++++++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 5 deletions(-)
 create mode 100644 cpp/src/gandiva/cache_test.cc

diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index d773fb5ff5895..9352ac5c4a938 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -243,6 +243,7 @@ endfunction()
 add_gandiva_test(internals-test
                  SOURCES
                  bitmap_accumulator_test.cc
+                 cache_test.cc
                  engine_llvm_test.cc
                  function_registry_test.cc
                  function_signature_test.cc
diff --git a/cpp/src/gandiva/cache.cc b/cpp/src/gandiva/cache.cc
index f7e3e5e9f8f1f..a1333ccdc5d43 100644
--- a/cpp/src/gandiva/cache.cc
+++ b/cpp/src/gandiva/cache.cc
@@ -23,11 +23,7 @@
 
 namespace gandiva {
 
-#ifdef GANDIVA_ENABLE_OBJECT_CODE_CACHE
-static const size_t DEFAULT_CACHE_SIZE = 500000;
-#else
-static const size_t DEFAULT_CACHE_SIZE = 500;
-#endif
+static const size_t DEFAULT_CACHE_SIZE = 5000;
 
 int GetCapacity() {
   size_t capacity = DEFAULT_CACHE_SIZE;
diff --git a/cpp/src/gandiva/cache_test.cc b/cpp/src/gandiva/cache_test.cc
new file mode 100644
index 0000000000000..a146707079fa6
--- /dev/null
+++ b/cpp/src/gandiva/cache_test.cc
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/cache.h"
+
+#include <gtest/gtest.h>
+
+namespace gandiva {
+class TestCacheKey {
+ public:
+  explicit TestCacheKey(int value) : value_(value) {}
+  std::size_t Hash() const { return value_; }
+  bool operator==(const TestCacheKey& other) const { return value_ == other.value_; }
+
+ private:
+  int value_;
+};
+
+TEST(TestCache, TestGetPut) {
+  Cache<TestCacheKey, std::string> cache(2);
+  cache.PutObjectCode(TestCacheKey(1), "hello");
+  cache.PutObjectCode(TestCacheKey(2), "world");
+  ASSERT_EQ(cache.GetObjectCode(TestCacheKey(1)), "hello");
+  ASSERT_EQ(cache.GetObjectCode(TestCacheKey(2)), "world");
+}
+
+TEST(TestCache, TestGetCacheCapacity) { ASSERT_EQ(GetCapacity(), 5000); }
+}  // namespace gandiva

From 91bf1c9c170c1917ad47bb0dbb38aa5c9fbbbfb2 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 14 Feb 2024 04:55:27 -0400
Subject: [PATCH 367/570] GH-39984: [Python] Add ChunkedArray import/export
 to/from C  (#39985)

### Rationale for this change

ChunkedArrays have an unambiguous representation as a stream of arrays. #39455 added the ability to import/export in C++...this PR wires up the new functions in pyarrow.

### What changes are included in this PR?

- Added `__arrow_c_stream__()` and `_import_from_c_capsule()` to the `ChunkedArray`

### Are these changes tested?

Yes! Tests were added.

### Are there any user-facing changes?

Yes! But I'm not sure where the protocol methods are documented.

```python
import pyarrow as pa
import nanoarrow as na
chunked = pa.chunked_array([pa.array([0, 1, 2]), pa.array([3, 4, 5])])
[na.c_array_view(item) for item in na.c_array_stream(chunked)]
```

    [<nanoarrow.c_lib.CArrayView>
     - storage_type: 'int64'
     - length: 3
     - offset: 0
     - null_count: 0
     - buffers[2]:
       - <bool validity[0 b] >
       - <int64 data[24 b] 0 1 2>
     - dictionary: NULL
     - children[0]:,
     <nanoarrow.c_lib.CArrayView>
     - storage_type: 'int64'
     - length: 3
     - offset: 0
     - null_count: 0
     - buffers[2]:
       - <bool validity[0 b] >
       - <int64 data[24 b] 3 4 5>
     - dictionary: NULL
     - children[0]:]

```python
stream_capsule = chunked.__arrow_c_stream__()
chunked2 = chunked._import_from_c_capsule(stream_capsule)
chunked2
```

    <pyarrow.lib.ChunkedArray object at 0x105bb70b0>
    [
      [
        0,
        1,
        2
      ],
      [
        3,
        4,
        5
      ]
    ]

* Closes: #39984

Lead-authored-by: Dewey Dunnington <dewey@fishandwhistle.net>
Co-authored-by: Dewey Dunnington <dewey@voltrondata.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/includes/libarrow.pxd |  3 ++
 python/pyarrow/table.pxi             | 61 ++++++++++++++++++++++++++++
 python/pyarrow/tests/test_cffi.py    | 26 ++++++++++++
 3 files changed, 90 insertions(+)

diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 8056d99354965..935fb4d34b318 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -2930,6 +2930,9 @@ cdef extern from "arrow/c/bridge.h" namespace "arrow" nogil:
     CResult[shared_ptr[CRecordBatchReader]] ImportRecordBatchReader(
         ArrowArrayStream*)
 
+    CStatus ExportChunkedArray(shared_ptr[CChunkedArray], ArrowArrayStream*)
+    CResult[shared_ptr[CChunkedArray]] ImportChunkedArray(ArrowArrayStream*)
+
 
 cdef extern from "arrow/util/byte_size.h" namespace "arrow::util" nogil:
     CResult[int64_t] ReferencedBufferSize(const CArray& array_data)
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index abda784fb7c18..ee3872aa3a242 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -1327,6 +1327,67 @@ cdef class ChunkedArray(_PandasConvertible):
             result += self.chunk(i).to_pylist()
         return result
 
+    def __arrow_c_stream__(self, requested_schema=None):
+        """
+        Export to a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        requested_schema : PyCapsule, default None
+            The schema to which the stream should be casted, passed as a
+            PyCapsule containing a C ArrowSchema representation of the
+            requested schema.
+
+        Returns
+        -------
+        PyCapsule
+            A capsule containing a C ArrowArrayStream struct.
+        """
+        cdef:
+            ArrowArrayStream* c_stream = NULL
+
+        if requested_schema is not None:
+            out_type = DataType._import_from_c_capsule(requested_schema)
+            if self.type != out_type:
+                raise NotImplementedError("Casting to requested_schema")
+
+        stream_capsule = alloc_c_stream(&c_stream)
+
+        with nogil:
+            check_status(ExportChunkedArray(self.sp_chunked_array, c_stream))
+
+        return stream_capsule
+
+    @staticmethod
+    def _import_from_c_capsule(stream):
+        """
+        Import ChunkedArray from a C ArrowArrayStream PyCapsule.
+
+        Parameters
+        ----------
+        stream: PyCapsule
+            A capsule containing a C ArrowArrayStream PyCapsule.
+
+        Returns
+        -------
+        ChunkedArray
+        """
+        cdef:
+            ArrowArrayStream* c_stream
+            shared_ptr[CChunkedArray] c_chunked_array
+            ChunkedArray self
+
+        c_stream = <ArrowArrayStream*>PyCapsule_GetPointer(
+            stream, 'arrow_array_stream'
+        )
+
+        with nogil:
+            c_chunked_array = GetResultValue(ImportChunkedArray(c_stream))
+
+        self = ChunkedArray.__new__(ChunkedArray)
+        self.init(c_chunked_array)
+        return self
+
 
 def chunked_array(arrays, type=None):
     """
diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
index ff81b06440f03..3a0c7b5b7152f 100644
--- a/python/pyarrow/tests/test_cffi.py
+++ b/python/pyarrow/tests/test_cffi.py
@@ -601,3 +601,29 @@ def test_roundtrip_batch_reader_capsule():
     assert imported_reader.read_next_batch().equals(batch)
     with pytest.raises(StopIteration):
         imported_reader.read_next_batch()
+
+
+def test_roundtrip_chunked_array_capsule():
+    chunked = pa.chunked_array([pa.array(["a", "b", "c"])])
+
+    capsule = chunked.__arrow_c_stream__()
+    assert PyCapsule_IsValid(capsule, b"arrow_array_stream") == 1
+    imported_chunked = pa.ChunkedArray._import_from_c_capsule(capsule)
+    assert imported_chunked.type == chunked.type
+    assert imported_chunked == chunked
+
+
+def test_roundtrip_chunked_array_capsule_requested_schema():
+    chunked = pa.chunked_array([pa.array(["a", "b", "c"])])
+
+    # Requesting the same type should work
+    requested_capsule = chunked.type.__arrow_c_schema__()
+    capsule = chunked.__arrow_c_stream__(requested_capsule)
+    imported_chunked = pa.ChunkedArray._import_from_c_capsule(capsule)
+    assert imported_chunked == chunked
+
+    # Casting to something else should error
+    requested_type = pa.binary()
+    requested_capsule = requested_type.__arrow_c_schema__()
+    with pytest.raises(NotImplementedError):
+        chunked.__arrow_c_stream__(requested_capsule)

From 2422994de04cf4f5a989fec0f00fabccad15b03f Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Thu, 15 Feb 2024 01:16:16 +0900
Subject: [PATCH 368/570] GH-39463: [C++] Support cast kernel from large
 string, (large) binary to dictionary (#40017)

### Rationale for this change

Support `cast` kernel from large string(`large_utf8()`, (large) binary(`binary()`, `large_binary()`) to `dictionary`

### What changes are included in this PR?

- Support `cast` kernel
  - from large string(`large_utf8()`) to `dictionary`
  - from binary(`binary()`) to `dictionary`
  - from large binary(`large_binary()`) to `dictionary`

### Are these changes tested?

Yes. It is passed by existing test cases.

### Are there any user-facing changes?

No.

* Closes: #39463

Authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 .../compute/kernels/scalar_cast_dictionary.cc | 14 +++--
 cpp/src/arrow/scalar_test.cc                  | 56 ++++++++++---------
 2 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
index f13aa26d969c1..ae88ef1cb7534 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_dictionary.cc
@@ -45,11 +45,12 @@ Status CastToDictionary(KernelContext* ctx, const ExecSpan& batch, ExecResult* o
     return Status::OK();
   }
 
-  // If the input type is STRING, it is first encoded as a dictionary to facilitate
-  // processing. This approach allows the subsequent code to uniformly handle STRING
-  // inputs as if they were originally provided in dictionary format. Encoding as a
-  // dictionary helps in reusing the same logic for dictionary operations.
-  if (batch[0].type()->id() == Type::STRING) {
+  // If the input type is string or binary-like, it is first encoded as a dictionary to
+  // facilitate processing. This approach allows the subsequent code to uniformly handle
+  // string or binary-like inputs as if they were originally provided in dictionary
+  // format. Encoding as a dictionary helps in reusing the same logic for dictionary
+  // operations.
+  if (is_base_binary_like(in_array->type->id())) {
     in_array = DictionaryEncode(in_array)->array();
   }
   const auto& in_type = checked_cast<const DictionaryType&>(*in_array->type);
@@ -98,6 +99,9 @@ std::vector<std::shared_ptr<CastFunction>> GetDictionaryCasts() {
   AddCommonCasts(Type::DICTIONARY, kOutputTargetType, cast_dict.get());
   AddDictionaryCast<DictionaryType>(cast_dict.get());
   AddDictionaryCast<StringType>(cast_dict.get());
+  AddDictionaryCast<LargeStringType>(cast_dict.get());
+  AddDictionaryCast<BinaryType>(cast_dict.get());
+  AddDictionaryCast<LargeBinaryType>(cast_dict.get());
 
   return {cast_dict};
 }
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index d9fb3feaeea6e..09dfde3227109 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -1482,33 +1482,35 @@ TEST(TestDictionaryScalar, ValidateErrors) {
 
 TEST(TestDictionaryScalar, Cast) {
   for (auto index_ty : all_dictionary_index_types()) {
-    auto ty = dictionary(index_ty, utf8());
-    auto dict = checked_pointer_cast<StringArray>(
-        ArrayFromJSON(utf8(), R"(["alpha", null, "gamma"])"));
-
-    for (int64_t i = 0; i < dict->length(); ++i) {
-      auto alpha =
-          dict->IsValid(i) ? MakeScalar(dict->GetString(i)) : MakeNullScalar(utf8());
-      // Cast string to dict(..., string)
-      ASSERT_OK_AND_ASSIGN(auto cast_alpha_datum, Cast(alpha, ty));
-      const auto& cast_alpha = cast_alpha_datum.scalar();
-      ASSERT_OK(cast_alpha->ValidateFull());
-      ASSERT_OK_AND_ASSIGN(
-          auto roundtripped_alpha,
-          checked_cast<const DictionaryScalar&>(*cast_alpha).GetEncodedValue());
-
-      ASSERT_OK_AND_ASSIGN(auto i_scalar, MakeScalar(index_ty, i));
-      auto alpha_dict = DictionaryScalar({i_scalar, dict}, ty);
-      ASSERT_OK(alpha_dict.ValidateFull());
-      ASSERT_OK_AND_ASSIGN(
-          auto encoded_alpha,
-          checked_cast<const DictionaryScalar&>(alpha_dict).GetEncodedValue());
-
-      AssertScalarsEqual(*alpha, *roundtripped_alpha);
-      AssertScalarsEqual(*encoded_alpha, *roundtripped_alpha);
-
-      // dictionaries differ, though encoded values are identical
-      ASSERT_FALSE(alpha_dict.Equals(*cast_alpha));
+    for (auto value_ty : {utf8(), large_utf8(), binary(), large_binary()}) {
+      auto ty = dictionary(index_ty, value_ty);
+      auto dict = ArrayFromJSON(value_ty, R"(["alpha", null, "gamma"])");
+      ASSERT_OK(dict->ValidateFull());
+
+      for (int64_t i = 0; i < dict->length(); ++i) {
+        ASSERT_OK_AND_ASSIGN(auto alpha, dict->GetScalar(i));
+
+        // Cast string to dict(..., string)
+        ASSERT_OK_AND_ASSIGN(auto cast_alpha_datum, Cast(alpha, ty));
+        const auto& cast_alpha = cast_alpha_datum.scalar();
+        ASSERT_OK(cast_alpha->ValidateFull());
+        ASSERT_OK_AND_ASSIGN(
+            auto roundtripped_alpha,
+            checked_cast<const DictionaryScalar&>(*cast_alpha).GetEncodedValue());
+
+        ASSERT_OK_AND_ASSIGN(auto i_scalar, MakeScalar(index_ty, i));
+        auto alpha_dict = DictionaryScalar({i_scalar, dict}, ty);
+        ASSERT_OK(alpha_dict.ValidateFull());
+        ASSERT_OK_AND_ASSIGN(
+            auto encoded_alpha,
+            checked_cast<const DictionaryScalar&>(alpha_dict).GetEncodedValue());
+
+        AssertScalarsEqual(*alpha, *roundtripped_alpha);
+        AssertScalarsEqual(*encoded_alpha, *roundtripped_alpha);
+
+        // dictionaries differ, though encoded values are identical
+        ASSERT_FALSE(alpha_dict.Equals(*cast_alpha));
+      }
     }
   }
 }

From 621f707f62bee8bde128eed0ef1e239abe5eb8c0 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Thu, 15 Feb 2024 09:13:29 -0300
Subject: [PATCH 369/570] GH-40085: [C++][FS][Azure] Validate containers in
 AzureFileSystem::Impl::MovePaths() (#40086)

### Rationale for this change

Cross container moves aren't supported yet (and might never be).

### What changes are included in this PR?

 - Check that containers are the same before calling a `Rename` that assumes `src` and `dest` are on the same container

### Are these changes tested?

Yes, new tests were added.
* Closes: #40085

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      |  5 ++++
 cpp/src/arrow/filesystem/azurefs_test.cc | 32 +++++++++++++-----------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 11750591932e9..23af67a33d688 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -2297,6 +2297,11 @@ class AzureFileSystem::Impl {
       }
     }
 
+    // Now that src and dest are validated, make sure they are on the same filesystem.
+    if (src.container != dest.container) {
+      return CrossContainerMoveNotImplemented(src, dest);
+    }
+
     try {
       // NOTE: The Azure SDK provides a RenameDirectory() function, but the
       // implementation is the same as RenameFile() with the only difference being
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 42f38f1ed6ac7..e6bd80d1d2508 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -1234,30 +1234,32 @@ class TestAzureFileSystem : public ::testing::Test {
   void TestMovePath() {
     Status st;
     auto data = SetUpPreexistingData();
+    auto another_container = PreexistingData::RandomContainerName(rng_);
+    CreateContainer(another_container);
     // When source doesn't exist.
     ASSERT_MOVE("missing-container/src-path", data.ContainerPath("dest-path"), ENOENT);
     auto missing_path1 = data.RandomDirectoryPath(rng_);
     ASSERT_MOVE(missing_path1, "missing-container/path", ENOENT);
 
     // But when source exists...
-    if (!WithHierarchicalNamespace()) {
-      // ...and containers are different, we get an error message telling cross-container
-      // moves are not implemented.
-      EXPECT_RAISES_WITH_MESSAGE_THAT(
-          NotImplemented,
-          HasCrossContainerNotImplementedMessage(data.ObjectPath(),
-                                                 "missing-container/path"),
-          fs()->Move(data.ObjectPath(), "missing-container/path"));
-      GTEST_SKIP() << "The rest of TestMovePath is not implemented for non-HNS scenarios";
-    }
-    auto adlfs_client =
-        datalake_service_client_->GetFileSystemClient(data.container_name);
-    // ...and dest.container doesn't exist.
+    // ...and containers are different, we get an error message telling cross-container
+    // moves are not implemented.
     EXPECT_RAISES_WITH_MESSAGE_THAT(
-        IOError, HasMissingParentDirMessage("missing-container/path"),
+        NotImplemented,
+        HasCrossContainerNotImplementedMessage(data.ObjectPath(),
+                                               "missing-container/path"),
         fs()->Move(data.ObjectPath(), "missing-container/path"));
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        NotImplemented,
+        HasCrossContainerNotImplementedMessage(
+            data.ObjectPath(), ConcatAbstractPath(another_container, "path")),
+        fs()->Move(data.ObjectPath(), ConcatAbstractPath(another_container, "path")));
     AssertFileInfo(fs(), data.ObjectPath(), FileType::File);
 
+    if (!WithHierarchicalNamespace()) {
+      GTEST_SKIP() << "The rest of TestMovePath is not implemented for non-HNS scenarios";
+    }
+
     EXPECT_RAISES_WITH_MESSAGE_THAT(
         IOError, HasMissingParentDirMessage(data.Path("missing-subdir/file")),
         fs()->Move(data.ObjectPath(), data.Path("missing-subdir/file")));
@@ -1271,6 +1273,8 @@ class TestAzureFileSystem : public ::testing::Test {
     // "file0" exists
 
     // src is a file and dest exists (as a file)
+    auto adlfs_client =
+        datalake_service_client_->GetFileSystemClient(data.container_name);
     CreateFile(adlfs_client, PreexistingData::kObjectName, PreexistingData::kLoremIpsum);
     CreateFile(adlfs_client, "file1", PreexistingData::kLoremIpsum);
     ASSERT_MOVE_OK(data.ObjectPath(), data.Path("file0"));

From a03d957b5b8d0425f9d5b6c98b6ee1efa56a1248 Mon Sep 17 00:00:00 2001
From: david dali susanibar arce <davi.sarces@gmail.com>
Date: Thu, 15 Feb 2024 08:11:44 -0500
Subject: [PATCH 370/570] GH-40055: [Java][Docs] Simplify use of Filter and
 Expression into Dataset Substrait (#40056)

### Rationale for this change

Simplify creation of SQL Expression Filter and Projections into Arrow Java Dataset module using new [Substrait Feature for SQL Expressions](https://github.com/substrait-io/substrait-java/releases/tag/v0.26.0).

### What changes are included in this PR?

Update Apache Arrow Java Dataset Substrait documentation

### Are these changes tested?

Yes

### Are there any user-facing changes?

No
* Closes: #40055

Authored-by: david dali susanibar arce <davi.sarces@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 docs/source/java/substrait.rst | 333 +++++----------------------------
 1 file changed, 42 insertions(+), 291 deletions(-)

diff --git a/docs/source/java/substrait.rst b/docs/source/java/substrait.rst
index d8d49a96e88f8..c5857dcc23f75 100644
--- a/docs/source/java/substrait.rst
+++ b/docs/source/java/substrait.rst
@@ -113,31 +113,19 @@ This requires the substrait-java library.
 This Java program:
 
 - Loads a Parquet file containing the "nation" table from the TPC-H benchmark.
+- Applies a filter:
+    - ``N_NATIONKEY > 18``
 - Projects two new columns:
-    - ``N_NAME || ' - ' || N_COMMENT``
     - ``N_REGIONKEY + 10``
-- Applies a filter: ``N_NATIONKEY > 18``
+    - ``N_NAME || ' - ' || N_COMMENT``
+
+
 
 .. code-block:: Java
 
-    import io.substrait.extension.ExtensionCollector;
-    import io.substrait.proto.Expression;
-    import io.substrait.proto.ExpressionReference;
+    import com.google.common.collect.ImmutableList;
+    import io.substrait.isthmus.SqlExpressionToSubstrait;
     import io.substrait.proto.ExtendedExpression;
-    import io.substrait.proto.FunctionArgument;
-    import io.substrait.proto.SimpleExtensionDeclaration;
-    import io.substrait.proto.SimpleExtensionURI;
-    import io.substrait.type.NamedStruct;
-    import io.substrait.type.Type;
-    import io.substrait.type.TypeCreator;
-    import io.substrait.type.proto.TypeProtoConverter;
-    import java.nio.ByteBuffer;
-    import java.util.ArrayList;
-    import java.util.Arrays;
-    import java.util.Base64;
-    import java.util.HashMap;
-    import java.util.List;
-    import java.util.Optional;
     import org.apache.arrow.dataset.file.FileFormat;
     import org.apache.arrow.dataset.file.FileSystemDatasetFactory;
     import org.apache.arrow.dataset.jni.NativeMemoryPool;
@@ -148,297 +136,60 @@ This Java program:
     import org.apache.arrow.memory.BufferAllocator;
     import org.apache.arrow.memory.RootAllocator;
     import org.apache.arrow.vector.ipc.ArrowReader;
+    import org.apache.calcite.sql.parser.SqlParseException;
+
+    import java.nio.ByteBuffer;
+    import java.util.Base64;
+    import java.util.Optional;
 
     public class ClientSubstraitExtendedExpressionsCookbook {
 
-      public static void main(String[] args) throws Exception {
-        // project and filter dataset using extended expression definition - 03 Expressions:
-        // Expression 01 - CONCAT: N_NAME || ' - ' || N_COMMENT = col 1 || ' - ' || col 3
-        // Expression 02 - ADD: N_REGIONKEY + 10 = col 1 + 10
-        // Expression 03 - FILTER: N_NATIONKEY > 18 = col 3 > 18
+      public static void main(String[] args) throws SqlParseException {
         projectAndFilterDataset();
       }
 
-      public static void projectAndFilterDataset() {
+      private static void projectAndFilterDataset() throws SqlParseException {
         String uri = "file:///Users/data/tpch_parquet/nation.parquet";
-        ScanOptions options = new ScanOptions.Builder(/*batchSize*/ 32768)
-            .columns(Optional.empty())
-            .substraitFilter(getSubstraitExpressionFilter())
-            .substraitProjection(getSubstraitExpressionProjection())
-            .build();
-        try (
-            BufferAllocator allocator = new RootAllocator();
-            DatasetFactory datasetFactory = new FileSystemDatasetFactory(
-                allocator, NativeMemoryPool.getDefault(),
-                FileFormat.PARQUET, uri);
-            Dataset dataset = datasetFactory.finish();
-            Scanner scanner = dataset.newScan(options);
-            ArrowReader reader = scanner.scanBatches()
-        ) {
+        ScanOptions options =
+            new ScanOptions.Builder(/*batchSize*/ 32768)
+                .columns(Optional.empty())
+                .substraitFilter(getByteBuffer(new String[]{"N_NATIONKEY > 18"}))
+                .substraitProjection(getByteBuffer(new String[]{"N_REGIONKEY + 10",
+                    "N_NAME || CAST(' - ' as VARCHAR) || N_COMMENT"}))
+                .build();
+        try (BufferAllocator allocator = new RootAllocator();
+             DatasetFactory datasetFactory =
+                 new FileSystemDatasetFactory(
+                     allocator, NativeMemoryPool.getDefault(), FileFormat.PARQUET, uri);
+             Dataset dataset = datasetFactory.finish();
+             Scanner scanner = dataset.newScan(options);
+             ArrowReader reader = scanner.scanBatches()) {
           while (reader.loadNextBatch()) {
-            System.out.println(
-                reader.getVectorSchemaRoot().contentToTSVString());
+            System.out.println(reader.getVectorSchemaRoot().contentToTSVString());
           }
         } catch (Exception e) {
           throw new RuntimeException(e);
         }
       }
 
-      private static ByteBuffer getSubstraitExpressionProjection() {
-        // Expression: N_REGIONKEY + 10 = col 3 + 10
-        Expression.Builder selectionBuilderProjectOne = Expression.newBuilder().
-            setSelection(
-                Expression.FieldReference.newBuilder().
-                    setDirectReference(
-                        Expression.ReferenceSegment.newBuilder().
-                            setStructField(
-                                Expression.ReferenceSegment.StructField.newBuilder().setField(
-                                    2)
-                            )
-                    )
-            );
-        Expression.Builder literalBuilderProjectOne = Expression.newBuilder()
-            .setLiteral(
-                Expression.Literal.newBuilder().setI32(10)
-            );
-        io.substrait.proto.Type outputProjectOne = TypeCreator.NULLABLE.I32.accept(
-            new TypeProtoConverter(new ExtensionCollector()));
-        Expression.Builder expressionBuilderProjectOne = Expression.
-            newBuilder().
-            setScalarFunction(
-                Expression.
-                    ScalarFunction.
-                    newBuilder().
-                    setFunctionReference(0).
-                    setOutputType(outputProjectOne).
-                    addArguments(
-                        0,
-                        FunctionArgument.newBuilder().setValue(
-                            selectionBuilderProjectOne)
-                    ).
-                    addArguments(
-                        1,
-                        FunctionArgument.newBuilder().setValue(
-                            literalBuilderProjectOne)
-                    )
-            );
-        ExpressionReference.Builder expressionReferenceBuilderProjectOne = ExpressionReference.newBuilder().
-            setExpression(expressionBuilderProjectOne)
-            .addOutputNames("ADD_TEN_TO_COLUMN_N_REGIONKEY");
-
-        // Expression: name || name = N_NAME || "-" || N_COMMENT = col 1 || col 3
-        Expression.Builder selectionBuilderProjectTwo = Expression.newBuilder().
-            setSelection(
-                Expression.FieldReference.newBuilder().
-                    setDirectReference(
-                        Expression.ReferenceSegment.newBuilder().
-                            setStructField(
-                                Expression.ReferenceSegment.StructField.newBuilder().setField(
-                                    1)
-                            )
-                    )
-            );
-        Expression.Builder selectionBuilderProjectTwoConcatLiteral = Expression.newBuilder()
-            .setLiteral(
-                Expression.Literal.newBuilder().setString(" - ")
-            );
-        Expression.Builder selectionBuilderProjectOneToConcat = Expression.newBuilder().
-            setSelection(
-                Expression.FieldReference.newBuilder().
-                    setDirectReference(
-                        Expression.ReferenceSegment.newBuilder().
-                            setStructField(
-                                Expression.ReferenceSegment.StructField.newBuilder().setField(
-                                    3)
-                            )
-                    )
-            );
-        io.substrait.proto.Type outputProjectTwo = TypeCreator.NULLABLE.STRING.accept(
-            new TypeProtoConverter(new ExtensionCollector()));
-        Expression.Builder expressionBuilderProjectTwo = Expression.
-            newBuilder().
-            setScalarFunction(
-                Expression.
-                    ScalarFunction.
-                    newBuilder().
-                    setFunctionReference(1).
-                    setOutputType(outputProjectTwo).
-                    addArguments(
-                        0,
-                        FunctionArgument.newBuilder().setValue(
-                            selectionBuilderProjectTwo)
-                    ).
-                    addArguments(
-                        1,
-                        FunctionArgument.newBuilder().setValue(
-                            selectionBuilderProjectTwoConcatLiteral)
-                    ).
-                    addArguments(
-                        2,
-                        FunctionArgument.newBuilder().setValue(
-                            selectionBuilderProjectOneToConcat)
-                    )
-            );
-        ExpressionReference.Builder expressionReferenceBuilderProjectTwo = ExpressionReference.newBuilder().
-            setExpression(expressionBuilderProjectTwo)
-            .addOutputNames("CONCAT_COLUMNS_N_NAME_AND_N_COMMENT");
-
-        List<String> columnNames = Arrays.asList("N_NATIONKEY", "N_NAME",
-            "N_REGIONKEY", "N_COMMENT");
-        List<Type> dataTypes = Arrays.asList(
-            TypeCreator.NULLABLE.I32,
-            TypeCreator.NULLABLE.STRING,
-            TypeCreator.NULLABLE.I32,
-            TypeCreator.NULLABLE.STRING
-        );
-        NamedStruct of = NamedStruct.of(
-            columnNames,
-            Type.Struct.builder().fields(dataTypes).nullable(false).build()
-        );
-        // Extensions URI
-        HashMap<String, SimpleExtensionURI> extensionUris = new HashMap<>();
-        extensionUris.put(
-            "key-001",
-            SimpleExtensionURI.newBuilder()
-                .setExtensionUriAnchor(1)
-                .setUri("/functions_arithmetic.yaml")
-                .build()
-        );
-        // Extensions
-        ArrayList<SimpleExtensionDeclaration> extensions = new ArrayList<>();
-        SimpleExtensionDeclaration extensionFunctionAdd = SimpleExtensionDeclaration.newBuilder()
-            .setExtensionFunction(
-                SimpleExtensionDeclaration.ExtensionFunction.newBuilder()
-                    .setFunctionAnchor(0)
-                    .setName("add:i32_i32")
-                    .setExtensionUriReference(1))
-            .build();
-        SimpleExtensionDeclaration extensionFunctionGreaterThan = SimpleExtensionDeclaration.newBuilder()
-            .setExtensionFunction(
-                SimpleExtensionDeclaration.ExtensionFunction.newBuilder()
-                    .setFunctionAnchor(1)
-                    .setName("concat:vchar")
-                    .setExtensionUriReference(2))
-            .build();
-        extensions.add(extensionFunctionAdd);
-        extensions.add(extensionFunctionGreaterThan);
-        // Extended Expression
-        ExtendedExpression.Builder extendedExpressionBuilder =
-            ExtendedExpression.newBuilder().
-                addReferredExpr(0,
-                    expressionReferenceBuilderProjectOne).
-                addReferredExpr(1,
-                    expressionReferenceBuilderProjectTwo).
-                setBaseSchema(of.toProto(new TypeProtoConverter(
-                    new ExtensionCollector())));
-        extendedExpressionBuilder.addAllExtensionUris(extensionUris.values());
-        extendedExpressionBuilder.addAllExtensions(extensions);
-        ExtendedExpression extendedExpression = extendedExpressionBuilder.build();
-        byte[] extendedExpressions = Base64.getDecoder().decode(
-            Base64.getEncoder().encodeToString(
-                extendedExpression.toByteArray()));
-        ByteBuffer substraitExpressionProjection = ByteBuffer.allocateDirect(
-            extendedExpressions.length);
-        substraitExpressionProjection.put(extendedExpressions);
-        return substraitExpressionProjection;
-      }
-
-      private static ByteBuffer getSubstraitExpressionFilter() {
-        // Expression: Filter: N_NATIONKEY > 18 = col 1 > 18
-        Expression.Builder selectionBuilderFilterOne = Expression.newBuilder().
-            setSelection(
-                Expression.FieldReference.newBuilder().
-                    setDirectReference(
-                        Expression.ReferenceSegment.newBuilder().
-                            setStructField(
-                                Expression.ReferenceSegment.StructField.newBuilder().setField(
-                                    0)
-                            )
-                    )
-            );
-        Expression.Builder literalBuilderFilterOne = Expression.newBuilder()
-            .setLiteral(
-                Expression.Literal.newBuilder().setI32(18)
-            );
-        io.substrait.proto.Type outputFilterOne = TypeCreator.NULLABLE.BOOLEAN.accept(
-            new TypeProtoConverter(new ExtensionCollector()));
-        Expression.Builder expressionBuilderFilterOne = Expression.
-            newBuilder().
-            setScalarFunction(
-                Expression.
-                    ScalarFunction.
-                    newBuilder().
-                    setFunctionReference(1).
-                    setOutputType(outputFilterOne).
-                    addArguments(
-                        0,
-                        FunctionArgument.newBuilder().setValue(
-                            selectionBuilderFilterOne)
-                    ).
-                    addArguments(
-                        1,
-                        FunctionArgument.newBuilder().setValue(
-                            literalBuilderFilterOne)
-                    )
-            );
-        ExpressionReference.Builder expressionReferenceBuilderFilterOne = ExpressionReference.newBuilder().
-            setExpression(expressionBuilderFilterOne)
-            .addOutputNames("COLUMN_N_NATIONKEY_GREATER_THAN_18");
-
-        List<String> columnNames = Arrays.asList("N_NATIONKEY", "N_NAME",
-            "N_REGIONKEY", "N_COMMENT");
-        List<Type> dataTypes = Arrays.asList(
-            TypeCreator.NULLABLE.I32,
-            TypeCreator.NULLABLE.STRING,
-            TypeCreator.NULLABLE.I32,
-            TypeCreator.NULLABLE.STRING
-        );
-        NamedStruct of = NamedStruct.of(
-            columnNames,
-            Type.Struct.builder().fields(dataTypes).nullable(false).build()
-        );
-        // Extensions URI
-        HashMap<String, SimpleExtensionURI> extensionUris = new HashMap<>();
-        extensionUris.put(
-            "key-001",
-            SimpleExtensionURI.newBuilder()
-                .setExtensionUriAnchor(1)
-                .setUri("/functions_comparison.yaml")
-                .build()
-        );
-        // Extensions
-        ArrayList<SimpleExtensionDeclaration> extensions = new ArrayList<>();
-        SimpleExtensionDeclaration extensionFunctionLowerThan = SimpleExtensionDeclaration.newBuilder()
-            .setExtensionFunction(
-                SimpleExtensionDeclaration.ExtensionFunction.newBuilder()
-                    .setFunctionAnchor(1)
-                    .setName("gt:any_any")
-                    .setExtensionUriReference(1))
-            .build();
-        extensions.add(extensionFunctionLowerThan);
-        // Extended Expression
-        ExtendedExpression.Builder extendedExpressionBuilder =
-            ExtendedExpression.newBuilder().
-                addReferredExpr(0,
-                    expressionReferenceBuilderFilterOne).
-                setBaseSchema(of.toProto(new TypeProtoConverter(
-                    new ExtensionCollector())));
-        extendedExpressionBuilder.addAllExtensionUris(extensionUris.values());
-        extendedExpressionBuilder.addAllExtensions(extensions);
-        ExtendedExpression extendedExpression = extendedExpressionBuilder.build();
-        byte[] extendedExpressions = Base64.getDecoder().decode(
-            Base64.getEncoder().encodeToString(
-                extendedExpression.toByteArray()));
-        ByteBuffer substraitExpressionFilter = ByteBuffer.allocateDirect(
-            extendedExpressions.length);
-        substraitExpressionFilter.put(extendedExpressions);
-        return substraitExpressionFilter;
+      private static ByteBuffer getByteBuffer(String[] sqlExpression) throws SqlParseException {
+        String schema =
+            "CREATE TABLE NATION (N_NATIONKEY INT NOT NULL, N_NAME VARCHAR, "
+                + "N_REGIONKEY INT NOT NULL, N_COMMENT VARCHAR)";
+        SqlExpressionToSubstrait expressionToSubstrait = new SqlExpressionToSubstrait();
+        ExtendedExpression expression =
+            expressionToSubstrait.convert(sqlExpression, ImmutableList.of(schema));
+        byte[] expressionToByte =
+            Base64.getDecoder().decode(Base64.getEncoder().encodeToString(expression.toByteArray()));
+        ByteBuffer byteBuffer = ByteBuffer.allocateDirect(expressionToByte.length);
+        byteBuffer.put(expressionToByte);
+        return byteBuffer;
       }
     }
 
 .. code-block:: text
 
-    ADD_TEN_TO_COLUMN_N_REGIONKEY	CONCAT_COLUMNS_N_NAME_AND_N_COMMENT
+    column-1	column-2
     13	ROMANIA - ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account
     14	SAUDI ARABIA - ts. silent requests haggle. closely express packages sleep across the blithely
     12	VIETNAM - hely enticingly express accounts. even, final

From ca67ec22440305d9cc9bfa22d3133cb064e5e257 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 19 Feb 2024 11:37:47 +0100
Subject: [PATCH 371/570] GH-40095: [C++][Parquet] Remove AVX512 variants of
 BYTE_STREAM_SPLIT encoding (#40127)

Two reasons:
* the SSE2 and AVX2 variants are already fast enough (on the order of 10 GB/s)
* the AVX512 variants do not seem faster, and can even be slower, on tested Intel machines

* Closes: #40095

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/util/byte_stream_split_internal.h   | 222 +-----------------
 cpp/src/arrow/util/byte_stream_split_test.cc  |   4 -
 cpp/src/parquet/encoding_benchmark.cc         |  27 ---
 3 files changed, 2 insertions(+), 251 deletions(-)

diff --git a/cpp/src/arrow/util/byte_stream_split_internal.h b/cpp/src/arrow/util/byte_stream_split_internal.h
index f70b3991473fa..cd43d8ec00b5d 100644
--- a/cpp/src/arrow/util/byte_stream_split_internal.h
+++ b/cpp/src/arrow/util/byte_stream_split_internal.h
@@ -332,226 +332,11 @@ void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const int64_t num_valu
 }
 #endif  // ARROW_HAVE_AVX2
 
-#if defined(ARROW_HAVE_AVX512)
-template <int kNumStreams>
-void ByteStreamSplitDecodeAvx512(const uint8_t* data, int64_t num_values, int64_t stride,
-                                 uint8_t* out) {
-  static_assert(kNumStreams == 4 || kNumStreams == 8, "Invalid number of streams.");
-  constexpr int kNumStreamsLog2 = (kNumStreams == 8 ? 3 : 2);
-  constexpr int64_t kBlockSize = sizeof(__m512i) * kNumStreams;
-
-  const int64_t size = num_values * kNumStreams;
-  if (size < kBlockSize)  // Back to AVX2 for small size
-    return ByteStreamSplitDecodeAvx2<kNumStreams>(data, num_values, stride, out);
-  const int64_t num_blocks = size / kBlockSize;
-
-  // First handle suffix.
-  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
-  for (int64_t i = num_processed_elements; i < num_values; ++i) {
-    uint8_t gathered_byte_data[kNumStreams];
-    for (int b = 0; b < kNumStreams; ++b) {
-      const int64_t byte_index = b * stride + i;
-      gathered_byte_data[b] = data[byte_index];
-    }
-    memcpy(out + i * kNumStreams, gathered_byte_data, kNumStreams);
-  }
-
-  // Processed hierarchically using the unpack, then two shuffles.
-  __m512i stage[kNumStreamsLog2 + 1][kNumStreams];
-  __m512i shuffle[kNumStreams];
-  __m512i final_result[kNumStreams];
-  constexpr int kNumStreamsHalf = kNumStreams / 2U;
-
-  for (int64_t i = 0; i < num_blocks; ++i) {
-    for (int j = 0; j < kNumStreams; ++j) {
-      stage[0][j] = _mm512_loadu_si512(
-          reinterpret_cast<const __m512i*>(&data[i * sizeof(__m512i) + j * stride]));
-    }
-
-    for (int step = 0; step < kNumStreamsLog2; ++step) {
-      for (int j = 0; j < kNumStreamsHalf; ++j) {
-        stage[step + 1][j * 2] =
-            _mm512_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
-        stage[step + 1][j * 2 + 1] =
-            _mm512_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
-      }
-    }
-
-    if constexpr (kNumStreams == 8) {
-      // path for double, 128i index:
-      // {0x00, 0x04, 0x08, 0x0C}, {0x10, 0x14, 0x18, 0x1C},
-      // {0x01, 0x05, 0x09, 0x0D}, {0x11, 0x15, 0x19, 0x1D},
-      // {0x02, 0x06, 0x0A, 0x0E}, {0x12, 0x16, 0x1A, 0x1E},
-      // {0x03, 0x07, 0x0B, 0x0F}, {0x13, 0x17, 0x1B, 0x1F},
-      shuffle[0] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0],
-                                        stage[kNumStreamsLog2][1], 0b01000100);
-      shuffle[1] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2],
-                                        stage[kNumStreamsLog2][3], 0b01000100);
-      shuffle[2] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][4],
-                                        stage[kNumStreamsLog2][5], 0b01000100);
-      shuffle[3] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][6],
-                                        stage[kNumStreamsLog2][7], 0b01000100);
-      shuffle[4] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0],
-                                        stage[kNumStreamsLog2][1], 0b11101110);
-      shuffle[5] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2],
-                                        stage[kNumStreamsLog2][3], 0b11101110);
-      shuffle[6] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][4],
-                                        stage[kNumStreamsLog2][5], 0b11101110);
-      shuffle[7] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][6],
-                                        stage[kNumStreamsLog2][7], 0b11101110);
-
-      final_result[0] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b10001000);
-      final_result[1] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b10001000);
-      final_result[2] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b11011101);
-      final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101);
-      final_result[4] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b10001000);
-      final_result[5] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b10001000);
-      final_result[6] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b11011101);
-      final_result[7] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b11011101);
-    } else {
-      // path for float, 128i index:
-      // {0x00, 0x04, 0x08, 0x0C}, {0x01, 0x05, 0x09, 0x0D}
-      // {0x02, 0x06, 0x0A, 0x0E}, {0x03, 0x07, 0x0B, 0x0F},
-      shuffle[0] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0],
-                                        stage[kNumStreamsLog2][1], 0b01000100);
-      shuffle[1] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2],
-                                        stage[kNumStreamsLog2][3], 0b01000100);
-      shuffle[2] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0],
-                                        stage[kNumStreamsLog2][1], 0b11101110);
-      shuffle[3] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2],
-                                        stage[kNumStreamsLog2][3], 0b11101110);
-
-      final_result[0] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b10001000);
-      final_result[1] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b11011101);
-      final_result[2] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b10001000);
-      final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101);
-    }
-
-    for (int j = 0; j < kNumStreams; ++j) {
-      _mm512_storeu_si512(
-          reinterpret_cast<__m512i*>(out + (i * kNumStreams + j) * sizeof(__m512i)),
-          final_result[j]);
-    }
-  }
-}
-
-template <int kNumStreams>
-void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const int64_t num_values,
-                                 uint8_t* output_buffer_raw) {
-  static_assert(kNumStreams == 4 || kNumStreams == 8, "Invalid number of streams.");
-  constexpr int kBlockSize = sizeof(__m512i) * kNumStreams;
-
-  const int64_t size = num_values * kNumStreams;
-
-  if (size < kBlockSize)  // Back to AVX2 for small size
-    return ByteStreamSplitEncodeAvx2<kNumStreams>(raw_values, num_values,
-                                                  output_buffer_raw);
-
-  const int64_t num_blocks = size / kBlockSize;
-  const __m512i* raw_values_simd = reinterpret_cast<const __m512i*>(raw_values);
-  __m512i* output_buffer_streams[kNumStreams];
-  for (int i = 0; i < kNumStreams; ++i) {
-    output_buffer_streams[i] =
-        reinterpret_cast<__m512i*>(&output_buffer_raw[num_values * i]);
-  }
-
-  // First handle suffix.
-  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
-  for (int64_t i = num_processed_elements; i < num_values; ++i) {
-    for (int j = 0; j < kNumStreams; ++j) {
-      const uint8_t byte_in_value = raw_values[i * kNumStreams + j];
-      output_buffer_raw[j * num_values + i] = byte_in_value;
-    }
-  }
-
-  constexpr int KNumUnpack = (kNumStreams == 8) ? 2 : 3;
-  __m512i final_result[kNumStreams];
-  __m512i unpack[KNumUnpack + 1][kNumStreams];
-  __m512i permutex[kNumStreams];
-  __m512i permutex_mask;
-  if constexpr (kNumStreams == 8) {
-    // use _mm512_set_epi32, no _mm512_set_epi16 for some old gcc version.
-    permutex_mask = _mm512_set_epi32(0x001F0017, 0x000F0007, 0x001E0016, 0x000E0006,
-                                     0x001D0015, 0x000D0005, 0x001C0014, 0x000C0004,
-                                     0x001B0013, 0x000B0003, 0x001A0012, 0x000A0002,
-                                     0x00190011, 0x00090001, 0x00180010, 0x00080000);
-  } else {
-    permutex_mask = _mm512_set_epi32(0x0F, 0x0B, 0x07, 0x03, 0x0E, 0x0A, 0x06, 0x02, 0x0D,
-                                     0x09, 0x05, 0x01, 0x0C, 0x08, 0x04, 0x00);
-  }
-
-  for (int64_t block_index = 0; block_index < num_blocks; ++block_index) {
-    for (int i = 0; i < kNumStreams; ++i) {
-      unpack[0][i] = _mm512_loadu_si512(&raw_values_simd[block_index * kNumStreams + i]);
-    }
-
-    for (int unpack_lvl = 0; unpack_lvl < KNumUnpack; ++unpack_lvl) {
-      for (int i = 0; i < kNumStreams / 2; ++i) {
-        unpack[unpack_lvl + 1][i * 2] = _mm512_unpacklo_epi8(
-            unpack[unpack_lvl][i * 2], unpack[unpack_lvl][i * 2 + 1]);
-        unpack[unpack_lvl + 1][i * 2 + 1] = _mm512_unpackhi_epi8(
-            unpack[unpack_lvl][i * 2], unpack[unpack_lvl][i * 2 + 1]);
-      }
-    }
-
-    if constexpr (kNumStreams == 8) {
-      // path for double
-      // 1. unpack to epi16 block
-      // 2. permutexvar_epi16 to 128i block
-      // 3. shuffle 128i to final 512i target, index:
-      //   {0x00, 0x04, 0x08, 0x0C}, {0x10, 0x14, 0x18, 0x1C},
-      //   {0x01, 0x05, 0x09, 0x0D}, {0x11, 0x15, 0x19, 0x1D},
-      //   {0x02, 0x06, 0x0A, 0x0E}, {0x12, 0x16, 0x1A, 0x1E},
-      //   {0x03, 0x07, 0x0B, 0x0F}, {0x13, 0x17, 0x1B, 0x1F},
-      for (int i = 0; i < kNumStreams; ++i)
-        permutex[i] = _mm512_permutexvar_epi16(permutex_mask, unpack[KNumUnpack][i]);
-
-      __m512i shuffle[kNumStreams];
-      shuffle[0] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b01000100);
-      shuffle[1] = _mm512_shuffle_i32x4(permutex[4], permutex[6], 0b01000100);
-      shuffle[2] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b11101110);
-      shuffle[3] = _mm512_shuffle_i32x4(permutex[4], permutex[6], 0b11101110);
-      shuffle[4] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b01000100);
-      shuffle[5] = _mm512_shuffle_i32x4(permutex[5], permutex[7], 0b01000100);
-      shuffle[6] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b11101110);
-      shuffle[7] = _mm512_shuffle_i32x4(permutex[5], permutex[7], 0b11101110);
-
-      final_result[0] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b10001000);
-      final_result[1] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b11011101);
-      final_result[2] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b10001000);
-      final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101);
-      final_result[4] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b10001000);
-      final_result[5] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b11011101);
-      final_result[6] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b10001000);
-      final_result[7] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b11011101);
-    } else {
-      // Path for float.
-      // 1. Processed hierarchically to 32i block using the unpack intrinsics.
-      // 2. Pack 128i block using _mm256_permutevar8x32_epi32.
-      // 3. Pack final 256i block with _mm256_permute2x128_si256.
-      for (int i = 0; i < kNumStreams; ++i)
-        permutex[i] = _mm512_permutexvar_epi32(permutex_mask, unpack[KNumUnpack][i]);
-
-      final_result[0] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b01000100);
-      final_result[1] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b11101110);
-      final_result[2] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b01000100);
-      final_result[3] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b11101110);
-    }
-
-    for (int i = 0; i < kNumStreams; ++i) {
-      _mm512_storeu_si512(&output_buffer_streams[i][block_index], final_result[i]);
-    }
-  }
-}
-#endif  // ARROW_HAVE_AVX512
-
 #if defined(ARROW_HAVE_SIMD_SPLIT)
 template <int kNumStreams>
 void inline ByteStreamSplitDecodeSimd(const uint8_t* data, int64_t num_values,
                                       int64_t stride, uint8_t* out) {
-#if defined(ARROW_HAVE_AVX512)
-  return ByteStreamSplitDecodeAvx512<kNumStreams>(data, num_values, stride, out);
-#elif defined(ARROW_HAVE_AVX2)
+#if defined(ARROW_HAVE_AVX2)
   return ByteStreamSplitDecodeAvx2<kNumStreams>(data, num_values, stride, out);
 #elif defined(ARROW_HAVE_SSE4_2)
   return ByteStreamSplitDecodeSse2<kNumStreams>(data, num_values, stride, out);
@@ -563,10 +348,7 @@ void inline ByteStreamSplitDecodeSimd(const uint8_t* data, int64_t num_values,
 template <int kNumStreams>
 void inline ByteStreamSplitEncodeSimd(const uint8_t* raw_values, const int64_t num_values,
                                       uint8_t* output_buffer_raw) {
-#if defined(ARROW_HAVE_AVX512)
-  return ByteStreamSplitEncodeAvx512<kNumStreams>(raw_values, num_values,
-                                                  output_buffer_raw);
-#elif defined(ARROW_HAVE_AVX2)
+#if defined(ARROW_HAVE_AVX2)
   return ByteStreamSplitEncodeAvx2<kNumStreams>(raw_values, num_values,
                                                 output_buffer_raw);
 #elif defined(ARROW_HAVE_SSE4_2)
diff --git a/cpp/src/arrow/util/byte_stream_split_test.cc b/cpp/src/arrow/util/byte_stream_split_test.cc
index 71c6063179ea6..421edce4e0aa3 100644
--- a/cpp/src/arrow/util/byte_stream_split_test.cc
+++ b/cpp/src/arrow/util/byte_stream_split_test.cc
@@ -81,10 +81,6 @@ class TestByteStreamSplitSpecialized : public ::testing::Test {
 #if defined(ARROW_HAVE_AVX2)
     encode_funcs_.push_back({"avx2", &ByteStreamSplitEncodeAvx2<kWidth>});
     decode_funcs_.push_back({"avx2", &ByteStreamSplitDecodeAvx2<kWidth>});
-#endif
-#if defined(ARROW_HAVE_AVX512)
-    encode_funcs_.push_back({"avx512", &ByteStreamSplitEncodeAvx512<kWidth>});
-    decode_funcs_.push_back({"avx512", &ByteStreamSplitDecodeAvx512<kWidth>});
 #endif
   }
 
diff --git a/cpp/src/parquet/encoding_benchmark.cc b/cpp/src/parquet/encoding_benchmark.cc
index 76c411244b22d..dd258ab815244 100644
--- a/cpp/src/parquet/encoding_benchmark.cc
+++ b/cpp/src/parquet/encoding_benchmark.cc
@@ -468,33 +468,6 @@ BENCHMARK(BM_ByteStreamSplitEncode_Float_Avx2)->Range(MIN_RANGE, MAX_RANGE);
 BENCHMARK(BM_ByteStreamSplitEncode_Double_Avx2)->Range(MIN_RANGE, MAX_RANGE);
 #endif
 
-#if defined(ARROW_HAVE_AVX512)
-static void BM_ByteStreamSplitDecode_Float_Avx512(benchmark::State& state) {
-  BM_ByteStreamSplitDecode<float>(
-      state, ::arrow::util::internal::ByteStreamSplitDecodeAvx512<sizeof(float)>);
-}
-
-static void BM_ByteStreamSplitDecode_Double_Avx512(benchmark::State& state) {
-  BM_ByteStreamSplitDecode<double>(
-      state, ::arrow::util::internal::ByteStreamSplitDecodeAvx512<sizeof(double)>);
-}
-
-static void BM_ByteStreamSplitEncode_Float_Avx512(benchmark::State& state) {
-  BM_ByteStreamSplitEncode<float>(
-      state, ::arrow::util::internal::ByteStreamSplitEncodeAvx512<sizeof(float)>);
-}
-
-static void BM_ByteStreamSplitEncode_Double_Avx512(benchmark::State& state) {
-  BM_ByteStreamSplitEncode<double>(
-      state, ::arrow::util::internal::ByteStreamSplitEncodeAvx512<sizeof(double)>);
-}
-
-BENCHMARK(BM_ByteStreamSplitDecode_Float_Avx512)->Range(MIN_RANGE, MAX_RANGE);
-BENCHMARK(BM_ByteStreamSplitDecode_Double_Avx512)->Range(MIN_RANGE, MAX_RANGE);
-BENCHMARK(BM_ByteStreamSplitEncode_Float_Avx512)->Range(MIN_RANGE, MAX_RANGE);
-BENCHMARK(BM_ByteStreamSplitEncode_Double_Avx512)->Range(MIN_RANGE, MAX_RANGE);
-#endif
-
 template <typename DType>
 static auto MakeDeltaBitPackingInputFixed(size_t length) {
   using T = typename DType::c_type;

From d1e852f4d804d741422c258b8bdd0cb4ce7925b6 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 19 Feb 2024 15:11:16 +0100
Subject: [PATCH 372/570] MINOR: [Archery] Output full Docker progress when
 --debug is passed (#40129)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 dev/archery/archery/cli.py         | 2 +-
 dev/archery/archery/docker/cli.py  | 3 ++-
 dev/archery/archery/docker/core.py | 7 ++++++-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 0ad3eee14d1f3..32921afb2e61b 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -63,7 +63,7 @@ def archery(ctx, debug, pdb, quiet):
     if debug:
         logger.setLevel(logging.DEBUG)
 
-    ctx.debug = debug
+    ctx.obj['debug'] = debug
 
     if pdb:
         import pdb
diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py
index 42caecd7427a4..162f73ec0ffe0 100644
--- a/dev/archery/archery/docker/cli.py
+++ b/dev/archery/archery/docker/cli.py
@@ -64,7 +64,8 @@ def docker(ctx, src, dry_run):
 
     # take the docker-compose parameters like PYTHON, PANDAS, UBUNTU from the
     # environment variables to keep the usage similar to docker-compose
-    compose = DockerCompose(config_path, params=os.environ)
+    compose = DockerCompose(config_path, params=os.environ,
+                            debug=ctx.obj.get('debug', False))
     if dry_run:
         _mock_compose_calls(compose)
     ctx.obj['compose'] = compose
diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py
index b0e9d32552cbb..184d9808759b8 100644
--- a/dev/archery/archery/docker/core.py
+++ b/dev/archery/archery/docker/core.py
@@ -164,11 +164,12 @@ def __init__(self, docker_bin=None):
 class DockerCompose(Command):
 
     def __init__(self, config_path, dotenv_path=None, compose_bin=None,
-                 params=None):
+                 params=None, debug=False):
         compose_bin = default_bin(compose_bin, 'docker-compose')
         self.config = ComposeConfig(config_path, dotenv_path, compose_bin,
                                     params)
         self.bin = compose_bin
+        self.debug = debug
         self.pull_memory = set()
 
     def clear_pull_memory(self):
@@ -296,6 +297,8 @@ def _build(service, use_cache):
                 self._execute_docker("buildx", "build", *args)
             elif using_docker:
                 # better for caching
+                if self.debug:
+                    args.append("--progress=plain")
                 for k, v in service['build'].get('args', {}).items():
                     args.extend(['--build-arg', '{}={}'.format(k, v)])
                 for img in cache_from:
@@ -307,6 +310,8 @@ def _build(service, use_cache):
                 ])
                 self._execute_docker("build", *args)
             else:
+                if self.debug:
+                    args.append("--progress=plain")
                 self._execute_compose("build", *args, service['name'])
 
         service = self.config.get(service_name)

From 4b74b451d581e557765994a68ad87c575b4139b8 Mon Sep 17 00:00:00 2001
From: Jeremy Aguilon <jeraguilon@gmail.com>
Date: Mon, 19 Feb 2024 09:54:57 -0500
Subject: [PATCH 373/570] GH-39803: [C++][Acero] Fix AsOfJoin with differently
 ordered schemas than the output (#39804)

### Rationale for this change

Issue is described visually in https://github.com/apache/arrow/issues/39803.

The key hasher works by hashing every row of the input tables' key columns. An important step is inspecting the [column metadata](https://github.com/apache/arrow/blob/main/cpp/src/arrow/acero/asof_join_node.cc#L412) for the asof-join key fields. This returns whether columns are fixed width, among other things.

The issue is we are passing the `output_schema`, rather than the input's schema.

If an input looks like

```
key_string_type,ts_int32_type,val
```

But our expected output schema looks like:

```
ts_int32,key_string_type,...
```
Then the hasher will think that the `key_string_type`'s type is an int32. This completely throws off hashes. Tests currently get away with it since we just use ints across the board.

### What changes are included in this PR?

One line fix and test with string types.

### Are these changes tested?

Yes. Can see the test run before and after changes here: https://gist.github.com/JerAguilon/953d82ed288d58f9ce24d1a925def2cc

Before the change, notice that inputs 0 and 1 have mismatched hashes:

```
AsofjoinNode(0x16cf9e2d8): key hasher 1 got hashes [0, 9784892099856512926, 1050982531982388796, 10763536662319179482, 2029627098739957112, 11814237723602982167, 3080328155728858293, 12792882290360550483, 4058972722486426609, 13771526852823217039]
...
AsofjoinNode(0x16cf9dd18): key hasher 0 got hashes [17528465654998409509, 12047706865972860560, 18017664240540048750, 12358837084497432044, 8151160321586084686, 8691136767698756332, 15973065724125580046, 9654919479117127288, 618127929167745505, 3403805303373270709]

```

And after, they do match:

```
AsofjoinNode(0x16f2ea2d8): key hasher 1 got hashes [17528465654998409509, 12047706865972860560, 18017664240540048750, 12358837084497432044, 8151160321586084686, 8691136767698756332, 15973065724125580046, 9654919479117127288, 618127929167745505, 3403805303373270709]
...
AsofjoinNode(0x16f2e9d18): key hasher 0 got hashes [17528465654998409509, 12047706865972860560, 18017664240540048750, 12358837084497432044, 8151160321586084686, 8691136767698756332, 15973065724125580046, 9654919479117127288, 618127929167745505, 3403805303373270709]
```

...which is exactly what you want, since the `key` column for both tables looks like `["0", "1", ..."9"]`

### Are there any user-facing changes?

* Closes: #39803

Lead-authored-by: Jeremy Aguilon <jeraguilon@gmail.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/acero/asof_join_node.cc      |  2 +-
 cpp/src/arrow/acero/asof_join_node_test.cc | 64 ++++++++++++++++++++++
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc
index 2609905a0b552..e96d5ad44a9e6 100644
--- a/cpp/src/arrow/acero/asof_join_node.cc
+++ b/cpp/src/arrow/acero/asof_join_node.cc
@@ -1098,7 +1098,7 @@ class AsofJoinNode : public ExecNode {
     auto inputs = this->inputs();
     for (size_t i = 0; i < inputs.size(); i++) {
       RETURN_NOT_OK(key_hashers_[i]->Init(plan()->query_context()->exec_context(),
-                                          output_schema()));
+                                          inputs[i]->output_schema()));
       ARROW_ASSIGN_OR_RAISE(
           auto input_state,
           InputState::Make(i, tolerance_, must_hash_, may_rehash_, key_hashers_[i].get(),
diff --git a/cpp/src/arrow/acero/asof_join_node_test.cc b/cpp/src/arrow/acero/asof_join_node_test.cc
index e400cc031693a..d95d2aaad3643 100644
--- a/cpp/src/arrow/acero/asof_join_node_test.cc
+++ b/cpp/src/arrow/acero/asof_join_node_test.cc
@@ -1582,6 +1582,70 @@ TEST(AsofJoinTest, BatchSequencing) {
   return TestSequencing(MakeIntegerBatches, /*num_batches=*/32, /*batch_size=*/1);
 }
 
+template <typename BatchesMaker>
+void TestSchemaResolution(BatchesMaker maker, int num_batches, int batch_size) {
+  // GH-39803: The key hasher needs to resolve the types of key columns. All other
+  // tests use int32 for all columns, but this test converts the key columns to
+  // strings via a projection node to test that the column is correctly resolved
+  // to string.
+  auto l_schema =
+      schema({field("time", int32()), field("key", int32()), field("l_value", int32())});
+  auto r_schema =
+      schema({field("time", int32()), field("key", int32()), field("r0_value", int32())});
+
+  auto make_shift = [&maker, num_batches, batch_size](
+                        const std::shared_ptr<Schema>& schema, int shift) {
+    return maker({[](int row) -> int64_t { return row; },
+                  [num_batches](int row) -> int64_t { return row / num_batches; },
+                  [shift](int row) -> int64_t { return row * 10 + shift; }},
+                 schema, num_batches, batch_size);
+  };
+  ASSERT_OK_AND_ASSIGN(auto l_batches, make_shift(l_schema, 0));
+  ASSERT_OK_AND_ASSIGN(auto r_batches, make_shift(r_schema, 1));
+
+  Declaration l_src = {"source",
+                       SourceNodeOptions(l_schema, l_batches.gen(false, false))};
+  Declaration r_src = {"source",
+                       SourceNodeOptions(r_schema, r_batches.gen(false, false))};
+  Declaration l_project = {
+      "project",
+      {std::move(l_src)},
+      ProjectNodeOptions({compute::field_ref("time"),
+                          compute::call("cast", {compute::field_ref("key")},
+                                        compute::CastOptions::Safe(utf8())),
+                          compute::field_ref("l_value")},
+                         {"time", "key", "l_value"})};
+  Declaration r_project = {
+      "project",
+      {std::move(r_src)},
+      ProjectNodeOptions({compute::call("cast", {compute::field_ref("key")},
+                                        compute::CastOptions::Safe(utf8())),
+                          compute::field_ref("r0_value"), compute::field_ref("time")},
+                         {"key", "r0_value", "time"})};
+
+  Declaration asofjoin = {
+      "asofjoin", {l_project, r_project}, GetRepeatedOptions(2, "time", {"key"}, 1000)};
+
+  QueryOptions query_options;
+  query_options.use_threads = false;
+  ASSERT_OK_AND_ASSIGN(auto table, DeclarationToTable(asofjoin, query_options));
+
+  Int32Builder expected_r0_b;
+  for (int i = 1; i <= 91; i += 10) {
+    ASSERT_OK(expected_r0_b.Append(i));
+  }
+  ASSERT_OK_AND_ASSIGN(auto expected_r0, expected_r0_b.Finish());
+
+  auto actual_r0 = table->GetColumnByName("r0_value");
+  std::vector<std::shared_ptr<arrow::Array>> chunks = {expected_r0};
+  auto expected_r0_chunked = std::make_shared<arrow::ChunkedArray>(chunks);
+  ASSERT_TRUE(actual_r0->Equals(expected_r0_chunked));
+}
+
+TEST(AsofJoinTest, OutputSchemaResolution) {
+  return TestSchemaResolution(MakeIntegerBatches, /*num_batches=*/1, /*batch_size=*/10);
+}
+
 namespace {
 
 Result<AsyncGenerator<std::optional<ExecBatch>>> MakeIntegerBatchGenForTest(

From ec3723e497d2ae338600cc22b977d0a3f1006886 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 19 Feb 2024 15:57:36 +0100
Subject: [PATCH 374/570] GH-40112: [CI][Python] Ensure CPython is selected,
 not PyPy (#40131)

Sometimes, mamba might select PyPy over CPython in certain environment upgrade/downgrade scenarios.

* Closes: #40112

Lead-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Uwe L. Korn <uwe.korn@quantco.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 ci/docker/conda-python.dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/docker/conda-python.dockerfile b/ci/docker/conda-python.dockerfile
index 5d37c53386e7d..027fd589cecca 100644
--- a/ci/docker/conda-python.dockerfile
+++ b/ci/docker/conda-python.dockerfile
@@ -28,7 +28,7 @@ COPY ci/conda_env_python.txt \
 RUN mamba install -q -y \
         --file arrow/ci/conda_env_python.txt \
         $([ "$python" == $(gdb --batch --eval-command 'python import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') ] && echo "gdb") \
-        python=${python} \
+        "python=${python}.*=*_cpython" \
         nomkl && \
     mamba clean --all
 

From b224c583f3c7dfacfecc356e0e91de867f044d22 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 19 Feb 2024 16:09:57 +0100
Subject: [PATCH 375/570] GH-39663: [C++] Ensure top-level benchmarks present
 informative metrics (#40091)

### Rationale for this change

Some benchmarks may present only an iteration time, or not present sufficiently informative metrics.

### What changes are included in this PR?

Add bytes/second and/or items/second metrics to top-level benchmarks where applicable.

This PR only tackles miscellaneous benchmarks from the top-level Arrow directory, as well as IO, IPC and utilities.

### Are these changes tested?

Manually.

### Are there any user-facing changes?

No.
* Closes: #39663

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/builder_benchmark.cc       | 12 ++++++++++++
 cpp/src/arrow/memory_pool_benchmark.cc   | 13 +++++++++++--
 cpp/src/arrow/util/int_util_benchmark.cc |  4 ++++
 cpp/src/arrow/util/range_benchmark.cc    |  6 ++++++
 4 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/builder_benchmark.cc b/cpp/src/arrow/builder_benchmark.cc
index e639a900cc5b5..84f27d20ee038 100644
--- a/cpp/src/arrow/builder_benchmark.cc
+++ b/cpp/src/arrow/builder_benchmark.cc
@@ -56,6 +56,7 @@ constexpr int64_t kRounds = 256;
 static VectorType kData = AlmostU8CompressibleVector();
 constexpr int64_t kBytesProcessPerRound = kNumberOfElements * sizeof(ValueType);
 constexpr int64_t kBytesProcessed = kRounds * kBytesProcessPerRound;
+constexpr int64_t kItemsProcessed = kRounds * kNumberOfElements;
 
 static const char* kBinaryString = "12345678";
 static std::string_view kBinaryView(kBinaryString);
@@ -73,6 +74,7 @@ static void BuildIntArrayNoNulls(benchmark::State& state) {  // NOLINT non-const
   }
 
   state.SetBytesProcessed(state.iterations() * kBytesProcessed);
+  state.SetItemsProcessed(state.iterations() * kItemsProcessed);
 }
 
 static void BuildAdaptiveIntNoNulls(
@@ -89,6 +91,7 @@ static void BuildAdaptiveIntNoNulls(
   }
 
   state.SetBytesProcessed(state.iterations() * kBytesProcessed);
+  state.SetItemsProcessed(state.iterations() * kItemsProcessed);
 }
 
 static void BuildAdaptiveIntNoNullsScalarAppend(
@@ -107,6 +110,7 @@ static void BuildAdaptiveIntNoNullsScalarAppend(
   }
 
   state.SetBytesProcessed(state.iterations() * kBytesProcessed);
+  state.SetItemsProcessed(state.iterations() * kItemsProcessed);
 }
 
 static void BuildBooleanArrayNoNulls(
@@ -127,6 +131,7 @@ static void BuildBooleanArrayNoNulls(
   }
 
   state.SetBytesProcessed(state.iterations() * kBytesProcessed);
+  state.SetItemsProcessed(state.iterations() * kItemsProcessed);
 }
 
 static void BuildBinaryArray(benchmark::State& state) {  // NOLINT non-const reference
@@ -142,6 +147,7 @@ static void BuildBinaryArray(benchmark::State& state) {  // NOLINT non-const ref
   }
 
   state.SetBytesProcessed(state.iterations() * kBytesProcessed);
+  state.SetItemsProcessed(state.iterations() * kItemsProcessed);
 }
 
 static void BuildChunkedBinaryArray(
@@ -161,6 +167,7 @@ static void BuildChunkedBinaryArray(
   }
 
   state.SetBytesProcessed(state.iterations() * kBytesProcessed);
+  state.SetItemsProcessed(state.iterations() * kItemsProcessed);
 }
 
 static void BuildFixedSizeBinaryArray(
@@ -179,6 +186,7 @@ static void BuildFixedSizeBinaryArray(
   }
 
   state.SetBytesProcessed(state.iterations() * kBytesProcessed);
+  state.SetItemsProcessed(state.iterations() * kItemsProcessed);
 }
 
 static void BuildDecimalArray(benchmark::State& state) {  // NOLINT non-const reference
@@ -199,6 +207,7 @@ static void BuildDecimalArray(benchmark::State& state) {  // NOLINT non-const re
   }
 
   state.SetBytesProcessed(state.iterations() * kRounds * kNumberOfElements * 16);
+  state.SetItemsProcessed(state.iterations() * kRounds * kNumberOfElements);
 }
 
 // ----------------------------------------------------------------------
@@ -317,6 +326,7 @@ static void BenchmarkDictionaryArray(
     fodder_nbytes = fodder.size() * sizeof(Scalar);
   }
   state.SetBytesProcessed(state.iterations() * fodder_nbytes * kRounds);
+  state.SetItemsProcessed(state.iterations() * fodder.size() * kRounds);
 }
 
 static void BuildInt64DictionaryArrayRandom(
@@ -361,6 +371,7 @@ static void ArrayDataConstructDestruct(
     InitArrays();
     arrays.clear();
   }
+  state.SetItemsProcessed(state.iterations() * kNumArrays);
 }
 
 // ----------------------------------------------------------------------
@@ -430,6 +441,7 @@ static void ReferenceBuildVectorNoNulls(
   }
 
   state.SetBytesProcessed(state.iterations() * kBytesProcessed);
+  state.SetItemsProcessed(state.iterations() * kItemsProcessed);
 }
 
 BENCHMARK(ReferenceBuildVectorNoNulls);
diff --git a/cpp/src/arrow/memory_pool_benchmark.cc b/cpp/src/arrow/memory_pool_benchmark.cc
index ba39310a82ec0..fe7a3dd2f8ee0 100644
--- a/cpp/src/arrow/memory_pool_benchmark.cc
+++ b/cpp/src/arrow/memory_pool_benchmark.cc
@@ -23,6 +23,8 @@
 
 namespace arrow {
 
+static constexpr int64_t kCacheLineSize = 64;
+
 struct SystemAlloc {
   static Result<MemoryPool*> GetAllocator() { return system_memory_pool(); }
 };
@@ -51,8 +53,8 @@ static void TouchCacheLines(uint8_t* data, int64_t nbytes) {
   uint8_t total = 0;
   while (nbytes > 0) {
     total += *data;
-    data += 64;
-    nbytes -= 64;
+    data += kCacheLineSize;
+    nbytes -= kCacheLineSize;
   }
   benchmark::DoNotOptimize(total);
 }
@@ -71,6 +73,8 @@ static void TouchArea(benchmark::State& state) {  // NOLINT non-const reference
   }
 
   pool->Free(data, nbytes);
+  state.SetItemsProcessed(state.iterations());
+  state.SetBytesProcessed(state.iterations() * nbytes);
 }
 
 // Benchmark the raw cost of allocating memory.
@@ -88,6 +92,9 @@ static void AllocateDeallocate(benchmark::State& state) {  // NOLINT non-const r
     ARROW_CHECK_OK(pool->Allocate(nbytes, &data));
     pool->Free(data, nbytes);
   }
+  state.SetItemsProcessed(state.iterations());
+  // SetBytesProcessed() would give nonsensical figures since the data is not
+  // actually processed.
 }
 
 // Benchmark the cost of allocating memory plus accessing it.
@@ -103,6 +110,8 @@ static void AllocateTouchDeallocate(
     TouchCacheLines(data, nbytes);
     pool->Free(data, nbytes);
   }
+  state.SetItemsProcessed(state.iterations());
+  state.SetBytesProcessed(state.iterations() * nbytes);
 }
 
 #define BENCHMARK_ALLOCATE_ARGS \
diff --git a/cpp/src/arrow/util/int_util_benchmark.cc b/cpp/src/arrow/util/int_util_benchmark.cc
index 1eae604a7dab8..696a957c3ce85 100644
--- a/cpp/src/arrow/util/int_util_benchmark.cc
+++ b/cpp/src/arrow/util/int_util_benchmark.cc
@@ -64,6 +64,7 @@ static void DetectUIntWidthNoNulls(
     benchmark::DoNotOptimize(result);
   }
   state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t));
+  state.SetItemsProcessed(state.iterations() * values.size());
 }
 
 static void DetectUIntWidthNulls(benchmark::State& state) {  // NOLINT non-const reference
@@ -76,6 +77,7 @@ static void DetectUIntWidthNulls(benchmark::State& state) {  // NOLINT non-const
     benchmark::DoNotOptimize(result);
   }
   state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t));
+  state.SetItemsProcessed(state.iterations() * values.size());
 }
 
 static void DetectIntWidthNoNulls(
@@ -87,6 +89,7 @@ static void DetectIntWidthNoNulls(
     benchmark::DoNotOptimize(result);
   }
   state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t));
+  state.SetItemsProcessed(state.iterations() * values.size());
 }
 
 static void DetectIntWidthNulls(benchmark::State& state) {  // NOLINT non-const reference
@@ -99,6 +102,7 @@ static void DetectIntWidthNulls(benchmark::State& state) {  // NOLINT non-const
     benchmark::DoNotOptimize(result);
   }
   state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t));
+  state.SetItemsProcessed(state.iterations() * values.size());
 }
 
 static void CheckIndexBoundsInt32(
diff --git a/cpp/src/arrow/util/range_benchmark.cc b/cpp/src/arrow/util/range_benchmark.cc
index 204fd24f791d0..ca9f675b9d5a7 100644
--- a/cpp/src/arrow/util/range_benchmark.cc
+++ b/cpp/src/arrow/util/range_benchmark.cc
@@ -46,6 +46,7 @@ void for_loop(benchmark::State& state) {
   for (auto _ : state) {
     for (int64_t index = 0; index < kSize; ++index) target[index] = source[index] + 1;
   }
+  state.SetItemsProcessed(state.iterations() * kSize);
 }
 
 BENCHMARK(for_loop);
@@ -58,6 +59,7 @@ void std_copy(benchmark::State& state) {
   for (auto _ : state) {
     std::copy(source.begin(), source.end(), target.begin());
   }
+  state.SetItemsProcessed(state.iterations() * kSize);
 }
 
 BENCHMARK(std_copy);
@@ -71,6 +73,7 @@ void std_copy_converting(benchmark::State& state) {
   for (auto _ : state) {
     std::copy(source.begin(), source.end(), target.begin());
   }
+  state.SetItemsProcessed(state.iterations() * kSize);
 }
 
 BENCHMARK(std_copy_converting);
@@ -85,6 +88,7 @@ void lazy_copy(benchmark::State& state) {
   for (auto _ : state) {
     std::copy(lazy_range.begin(), lazy_range.end(), target.begin());
   }
+  state.SetItemsProcessed(state.iterations() * kSize);
 }
 
 BENCHMARK(lazy_copy);
@@ -101,6 +105,7 @@ void lazy_copy_converting(benchmark::State& state) {
   for (auto _ : state) {
     std::copy(lazy_range.begin(), lazy_range.end(), target.begin());
   }
+  state.SetItemsProcessed(state.iterations() * kSize);
 }
 
 BENCHMARK(lazy_copy_converting);
@@ -119,6 +124,7 @@ void lazy_postinc(benchmark::State& state) {
 
     while (lazy_iter != lazy_end) *(target_iter++) = *(lazy_iter++);
   }
+  state.SetItemsProcessed(state.iterations() * kSize);
 }
 
 BENCHMARK(lazy_postinc);

From 2456258bdb1cc0eeeed9448110dc9c0c51c38d7d Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 19 Feb 2024 17:16:25 +0100
Subject: [PATCH 376/570] GH-40133: [C++][Parquet][Tools] Print
 FIXED_LEN_BYTE_ARRAY length (#40132)

In `ParquetFilePrinter`, when printing the type of the column, also print its byte width if the type is FIXED_LEN_BYTE_ARRAY.

Before:
```
Column 0: float16_plain (FIXED_LEN_BYTE_ARRAY / Float16)
Column 1: float16_byte_stream_split (FIXED_LEN_BYTE_ARRAY / Float16)
Column 2: float_plain (FLOAT)
Column 3: float_byte_stream_split (FLOAT)
Column 4: double_plain (DOUBLE)
Column 5: double_byte_stream_split (DOUBLE)
Column 6: int32_plain (INT32)
Column 7: int32_byte_stream_split (INT32)
Column 8: int64_plain (INT64)
Column 9: int64_byte_stream_split (INT64)
Column 10: flba5_plain (FIXED_LEN_BYTE_ARRAY)
Column 11: flba5_byte_stream_split (FIXED_LEN_BYTE_ARRAY)
Column 12: decimal_plain (FIXED_LEN_BYTE_ARRAY / Decimal(precision=7, scale=3) / DECIMAL(7,3))
Column 13: decimal_byte_stream_split (FIXED_LEN_BYTE_ARRAY / Decimal(precision=7, scale=3) / DECIMAL(7,3))
```

After:
```
Column 0: float16_plain (FIXED_LEN_BYTE_ARRAY(2) / Float16)
Column 1: float16_byte_stream_split (FIXED_LEN_BYTE_ARRAY(2) / Float16)
Column 2: float_plain (FLOAT)
Column 3: float_byte_stream_split (FLOAT)
Column 4: double_plain (DOUBLE)
Column 5: double_byte_stream_split (DOUBLE)
Column 6: int32_plain (INT32)
Column 7: int32_byte_stream_split (INT32)
Column 8: int64_plain (INT64)
Column 9: int64_byte_stream_split (INT64)
Column 10: flba5_plain (FIXED_LEN_BYTE_ARRAY(5))
Column 11: flba5_byte_stream_split (FIXED_LEN_BYTE_ARRAY(5))
Column 12: decimal_plain (FIXED_LEN_BYTE_ARRAY(4) / Decimal(precision=7, scale=3) / DECIMAL(7,3))
Column 13: decimal_byte_stream_split (FIXED_LEN_BYTE_ARRAY(4) / Decimal(precision=7, scale=3) / DECIMAL(7,3))
```

* Closes: #40133

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/printer.cc |  2 +-
 cpp/src/parquet/types.cc   | 10 ++++++++++
 cpp/src/parquet/types.h    |  2 ++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/cpp/src/parquet/printer.cc b/cpp/src/parquet/printer.cc
index f11397ab96ed8..ce194f897e44d 100644
--- a/cpp/src/parquet/printer.cc
+++ b/cpp/src/parquet/printer.cc
@@ -105,7 +105,7 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
   for (auto i : selected_columns) {
     const ColumnDescriptor* descr = file_metadata->schema()->Column(i);
     stream << "Column " << i << ": " << descr->path()->ToDotString() << " ("
-           << TypeToString(descr->physical_type());
+           << TypeToString(descr->physical_type(), descr->type_length());
     const auto& logical_type = descr->logical_type();
     if (!logical_type->is_none()) {
       stream << " / " << logical_type->ToString();
diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc
index 33fed01ba324f..7b50ed48d06b0 100644
--- a/cpp/src/parquet/types.cc
+++ b/cpp/src/parquet/types.cc
@@ -177,6 +177,16 @@ std::string TypeToString(Type::type t) {
   }
 }
 
+std::string TypeToString(Type::type t, int type_length) {
+  auto s = TypeToString(t);
+  if (t == Type::FIXED_LEN_BYTE_ARRAY) {
+    s += '(';
+    s += std::to_string(type_length);
+    s += ')';
+  }
+  return s;
+}
+
 std::string ConvertedTypeToString(ConvertedType::type t) {
   switch (t) {
     case ConvertedType::NONE:
diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h
index 76dd0efc7cb4a..38529bceae85f 100644
--- a/cpp/src/parquet/types.h
+++ b/cpp/src/parquet/types.h
@@ -796,6 +796,8 @@ PARQUET_EXPORT std::string ConvertedTypeToString(ConvertedType::type t);
 
 PARQUET_EXPORT std::string TypeToString(Type::type t);
 
+PARQUET_EXPORT std::string TypeToString(Type::type t, int type_length);
+
 PARQUET_EXPORT std::string FormatStatValue(Type::type parquet_type,
                                            ::std::string_view val);
 

From fee640c10bc23de17bb9962f193d5b61282a673f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Feb 2024 09:13:31 -0800
Subject: [PATCH 377/570] MINOR: [C#] Bump xunit.runner.visualstudio from 2.5.6
 to 2.5.7 in /csharp (#40140)

Bumps [xunit.runner.visualstudio](https://github.com/xunit/visualstudio.xunit) from 2.5.6 to 2.5.7.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/8f2703126a90acedef4e9dbd7ae1bc6440c25e06"><code>8f27031</code></a> v2.5.7</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/7226c5aa74013fa685e3bf4ce8c777f0c5ef9fb3"><code>7226c5a</code></a> Updated build process</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/1f5a062c607db91c91b07011bef83d4a22990ebd"><code>1f5a062</code></a> Report configuration warnings per <a href="https://redirect.github.com/xunit/xunit/issues/1655">xunit/xunit#1655</a></li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/21484963d6605feb31b2c72c481a21a036621e48"><code>2148496</code></a> Latest dependencies</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/41a6ecafa6e9df1815f7894cfdddd494f807abe3"><code>41a6eca</code></a> Force file copy during package creation</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/8f772933ccee61f2b7f57caf2f98dd5cd0839e85"><code>8f77293</code></a> Revert e266e72 and 3cb1ad2</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/374dbf87cb8fe348d8a39ccf5079c53b4ba130d3"><code>374dbf8</code></a> <a href="https://redirect.github.com/xunit/visualstudio.xunit/issues/399">#399</a>: Modified date of redistributed xunit.abstractions.dll is wrong</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/edf51bcd09049f369ab84abd99e6698b7f756aae"><code>edf51bc</code></a> Update build to use .NET SDK 8 and C# 12</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/3cb1ad21eb3cda20218cafd48b696813a054efe5"><code>3cb1ad2</code></a> Replace SkipUnchangedFiles with a Condition</li>
<li><a href="https://github.com/xunit/visualstudio.xunit/commit/e266e72fbd58b43a34636e481d5b1523104bc040"><code>e266e72</code></a> Replace ItemGroup item with Copy task to see if that fixes <a href="https://redirect.github.com/xunit/xunit/issues/1651">xunit/xunit#1651</a></li>
<li>Additional commits viewable in <a href="https://github.com/xunit/visualstudio.xunit/compare/2.5.6...2.5.7">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit.runner.visualstudio&package-manager=nuget&previous-version=2.5.6&new-version=2.5.7)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Apache.Arrow.Compression.Tests.csproj                       | 2 +-
 .../Apache.Arrow.Flight.Sql.Tests.csproj                        | 2 +-
 .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj  | 2 +-
 csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index dd27e790d9d4f..c85c770e49fb4 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -9,7 +9,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
     <PackageReference Include="xunit" Version="2.6.6" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.7" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 3d7b415599907..73c25479dc9f0 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -8,7 +8,7 @@
     <ItemGroup>
       <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
       <PackageReference Include="xunit" Version="2.6.6" />
-      <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
+      <PackageReference Include="xunit.runner.visualstudio" Version="2.5.7" />
       <PackageReference Include="coverlet.collector" Version="6.0.0" />
     </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index 3038376327e70..5d6116f0479c0 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -8,7 +8,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
     <PackageReference Include="xunit" Version="2.6.6" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.7" />
     <PackageReference Include="coverlet.collector" Version="6.0.0" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index 48f5747b790b3..17c0e95fdf8d3 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -16,7 +16,7 @@
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
     <PackageReference Include="xunit" Version="2.6.6" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6">
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.7">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
     </PackageReference>

From 023c5a1bb1982b126edd94f68df12cadf32be5e8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Feb 2024 09:15:48 -0800
Subject: [PATCH 378/570] MINOR: [C#] Bump Google.Protobuf from 3.25.2 to
 3.25.3 in /csharp (#40141)

Bumps [Google.Protobuf](https://github.com/protocolbuffers/protobuf) from 3.25.2 to 3.25.3.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/4a2aef570deb2bfb8927426558701e8bfc26f2a4"><code>4a2aef5</code></a> Updating version.json and repo version numbers to: 25.3</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/7c6ba838806c6e737a01173ab37d129bfc1ccb22"><code>7c6ba83</code></a> Merge pull request <a href="https://redirect.github.com/protocolbuffers/protobuf/issues/15814">#15814</a> from protocolbuffers/cp-ruby-3.3</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/25b1e8112079e6d463196fc137cc8d1925205426"><code>25b1e81</code></a> Update Ruby GHA to test against Ruby 3.3.</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/70e459f9d43ef00ae1405eeb1d50d4bb61551d58"><code>70e459f</code></a> Merge pull request <a href="https://redirect.github.com/protocolbuffers/protobuf/issues/15802">#15802</a> from protocolbuffers/cp-25.x</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/17ec19db930216d59eb8efdb0bbf6d1f5355f147"><code>17ec19d</code></a> Bump python version to 3.9 for gcloud 460.0.0</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/9dc736d924f6839073028a4d806a3b85f01000cf"><code>9dc736d</code></a> [ObjC] Use a local to avoid warnings in 32bit builds.</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/9d1bc1041be5fefa82e8c9823e79c263f6d8b2ac"><code>9d1bc10</code></a> [CPP] Add the privacy manifest to the C++ CocoaPod.</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/cec08dc615df059d1a93fe03c4e617bba679fa69"><code>cec08dc</code></a> [ObjC] Add the privacy manifest to the ObjC CocoaPod.</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/cf87faa29b57a304a0cfe5af4e9b7a5c659c62b4"><code>cf87faa</code></a> Add PrivacyInfo.xcprivacy (<a href="https://redirect.github.com/protocolbuffers/protobuf/issues/15557">#15557</a>)</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/76d05d4cb9200c371c8894df21f37ba4060bdc8a"><code>76d05d4</code></a> remove use of mach_absolute_time (<a href="https://redirect.github.com/protocolbuffers/protobuf/issues/15554">#15554</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/protocolbuffers/protobuf/compare/v3.25.2...v3.25.3">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Google.Protobuf&package-manager=nuget&previous-version=3.25.2&new-version=3.25.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index 3a6ae28b390d2..95752b0f64858 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
   
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.25.2" />
+    <PackageReference Include="Google.Protobuf" Version="3.25.3" />
     <PackageReference Include="Grpc.Net.Client" Version="2.59.0" />
     <PackageReference Include="Grpc.Tools" Version="2.61.0" PrivateAssets="All" />
   </ItemGroup>

From 977e217adf4e7406b0dc478c1176617af9c347e4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Feb 2024 09:21:29 -0800
Subject: [PATCH 379/570] MINOR: [C#] Bump xunit from 2.6.6 to 2.7.0 in /csharp
 (#40139)

Bumps [xunit](https://github.com/xunit/xunit) from 2.6.6 to 2.7.0.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/xunit/xunit/commit/d806fa137a69112242ecf075b8dd6c65b322c197"><code>d806fa1</code></a> v2.7.0</li>
<li><a href="https://github.com/xunit/xunit/commit/13bb60be3604244c234d304ef7f82657b7a710f6"><code>13bb60b</code></a> <a href="https://redirect.github.com/xunit/xunit/issues/2872">#2872</a>: Expand special handling for sets in Assert.Contains/DoesNotContain (v2)</li>
<li><a href="https://github.com/xunit/xunit/commit/203b4d91c63c14023e61cffde4ec58364d540fb2"><code>203b4d9</code></a> Update to use the common build system (<a href="https://redirect.github.com/xunit/xunit/issues/2883">#2883</a>)</li>
<li><a href="https://github.com/xunit/xunit/commit/a4f585b9e44ca6dcd2d1607ce4ef1547047d6ad9"><code>a4f585b</code></a> Remove UTF-8 byte order marks</li>
<li><a href="https://github.com/xunit/xunit/commit/8b2c7b2c204b2c3eec6ef6102e8f448c212d4c26"><code>8b2c7b2</code></a> Add v3 build submodule</li>
<li><a href="https://github.com/xunit/xunit/commit/2413c5797c378d25bbfa38aa63d5a676d317debf"><code>2413c57</code></a> Throw when user tries to run a non-async test with a timeout</li>
<li><a href="https://github.com/xunit/xunit/commit/9ebc10c86170e6dc40a41cfcf05ee23a9fc260cd"><code>9ebc10c</code></a> <a href="https://redirect.github.com/xunit/xunit/issues/2573">#2573</a>: Opting out of AsyncTestSyncContext (v2)</li>
<li><a href="https://github.com/xunit/xunit/commit/b060404f65904e59372ec4e5bfe5260b39db7ded"><code>b060404</code></a> <a href="https://redirect.github.com/xunit/xunit/issues/2880">xunit/xunit#2880</a>: Update XML documentation for string-based Assert.Equal (v2)</li>
<li><a href="https://github.com/xunit/xunit/commit/6bbf922aa572120d99a7e8dd90de52e8cbf5dadb"><code>6bbf922</code></a> Add conditions to EquivalenceAssertsTests for XUNIT_IMMUTABLE_COLLECTIONS</li>
<li><a href="https://github.com/xunit/xunit/commit/cab6a3e275d3bd4ed4d567110079e2133bac2fc4"><code>cab6a3e</code></a> <a href="https://redirect.github.com/xunit/xunit/issues/2871">#2871</a>: Inner exception stack trace is missing from Assert.Collection failure ...</li>
<li>Additional commits viewable in <a href="https://github.com/xunit/xunit/compare/2.6.6...2.7.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit&package-manager=nuget&previous-version=2.6.6&new-version=2.7.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Apache.Arrow.Compression.Tests.csproj                       | 2 +-
 .../Apache.Arrow.Flight.Sql.Tests.csproj                        | 2 +-
 .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj  | 2 +-
 csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index c85c770e49fb4..d4d124668e081 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -8,7 +8,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
-    <PackageReference Include="xunit" Version="2.6.6" />
+    <PackageReference Include="xunit" Version="2.7.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.7" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 73c25479dc9f0..214553ad1ed22 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -7,7 +7,7 @@
 
     <ItemGroup>
       <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
-      <PackageReference Include="xunit" Version="2.6.6" />
+      <PackageReference Include="xunit" Version="2.7.0" />
       <PackageReference Include="xunit.runner.visualstudio" Version="2.5.7" />
       <PackageReference Include="coverlet.collector" Version="6.0.0" />
     </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index 5d6116f0479c0..d752c077c5521 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -7,7 +7,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
-    <PackageReference Include="xunit" Version="2.6.6" />
+    <PackageReference Include="xunit" Version="2.7.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.7" />
     <PackageReference Include="coverlet.collector" Version="6.0.0" />
   </ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index 17c0e95fdf8d3..3febfc92b97c8 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -15,7 +15,7 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
-    <PackageReference Include="xunit" Version="2.6.6" />
+    <PackageReference Include="xunit" Version="2.7.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.7">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>

From 5dc9a0d2afbb17fffce85c579a5cb97c88c247bf Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Feb 2024 08:15:37 +0900
Subject: [PATCH 380/570] MINOR: [Java] Bump
 org.apache.hadoop:hadoop-client-api from 3.3.2 to 3.3.6 in /java (#40134)

Bumps org.apache.hadoop:hadoop-client-api from 3.3.2 to 3.3.6.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.hadoop:hadoop-client-api&package-manager=maven&previous-version=3.3.2&new-version=3.3.6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/adapter/orc/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index 79e51470a426e..e7a2bfe872eb3 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -70,7 +70,7 @@
         <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-client-api</artifactId>
-            <version>3.3.2</version>
+            <version>3.3.6</version>
         </dependency>
         <dependency>
             <groupId>org.apache.hadoop</groupId>

From f2872444e6dd67203f611c599daffb98060b985b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Feb 2024 08:15:54 +0900
Subject: [PATCH 381/570] MINOR: [Java] Bump dep.junit.jupiter.version from
 5.10.1 to 5.10.2 in /java (#40135)

Bumps `dep.junit.jupiter.version` from 5.10.1 to 5.10.2.
Updates `org.junit.jupiter:junit-jupiter-engine` from 5.10.1 to 5.10.2
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/junit-team/junit5/releases">org.junit.jupiter:junit-jupiter-engine's releases</a>.</em></p>
<blockquote>
<p>JUnit 5.10.2 = Platform 1.10.2 + Jupiter 5.10.2 + Vintage 5.10.2</p>
<p>See <a href="http://junit.org/junit5/docs/5.10.2/release-notes/">Release Notes</a>.</p>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.10.1...r5.10.2">https://github.com/junit-team/junit5/compare/r5.10.1...r5.10.2</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/junit-team/junit5/commit/4c0dddad1b96d4a20e92a2cd583954643ac56ac0"><code>4c0ddda</code></a> Release 5.10.2</li>
<li><a href="https://github.com/junit-team/junit5/commit/463a14773d884f2bf31f073a87bef9d0938872db"><code>463a147</code></a> Finalize release notes for 5.10.2</li>
<li><a href="https://github.com/junit-team/junit5/commit/43c105a246c0130d08145335cb9986236e2a8465"><code>43c105a</code></a> Revert &quot;Apply method predicate before searching type hierarchy&quot;</li>
<li><a href="https://github.com/junit-team/junit5/commit/63d464d1e10a62743fe4024436ef1e006a89df72"><code>63d464d</code></a> Revert &quot;Harmonize application of method and field filters in search algorithms&quot;</li>
<li><a href="https://github.com/junit-team/junit5/commit/85ec2fccb32a51fbe6ff966e25726c94a67dd418"><code>85ec2fc</code></a> Revert &quot;Apply field predicate before searching type hierarchy&quot;</li>
<li><a href="https://github.com/junit-team/junit5/commit/6209006a7693dbf6f680a5ac4541aba86c9da899"><code>6209006</code></a> Update release notes</li>
<li><a href="https://github.com/junit-team/junit5/commit/5ee499f0e0029dbd1120ecf889bd214e2082c589"><code>5ee499f</code></a> Fix CI build</li>
<li><a href="https://github.com/junit-team/junit5/commit/d919ba71ea4c3b1e60f2d21473ba31ff0f6857ab"><code>d919ba7</code></a> Namespace user-specific build parameters</li>
<li><a href="https://github.com/junit-team/junit5/commit/e26cd83ed307ef100399ced9985f04612931984b"><code>e26cd83</code></a> Prepare release notes for 5.10.2</li>
<li><a href="https://github.com/junit-team/junit5/commit/ec8d4282c60f480d3de264330b37e75b1b8d05d9"><code>ec8d428</code></a> Include LauncherInterceptor in launcher module declaration</li>
<li>Additional commits viewable in <a href="https://github.com/junit-team/junit5/compare/r5.10.1...r5.10.2">compare view</a></li>
</ul>
</details>
<br />

Updates `org.junit.jupiter:junit-jupiter-api` from 5.10.1 to 5.10.2
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/junit-team/junit5/releases">org.junit.jupiter:junit-jupiter-api's releases</a>.</em></p>
<blockquote>
<p>JUnit 5.10.2 = Platform 1.10.2 + Jupiter 5.10.2 + Vintage 5.10.2</p>
<p>See <a href="http://junit.org/junit5/docs/5.10.2/release-notes/">Release Notes</a>.</p>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.10.1...r5.10.2">https://github.com/junit-team/junit5/compare/r5.10.1...r5.10.2</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/junit-team/junit5/commit/4c0dddad1b96d4a20e92a2cd583954643ac56ac0"><code>4c0ddda</code></a> Release 5.10.2</li>
<li><a href="https://github.com/junit-team/junit5/commit/463a14773d884f2bf31f073a87bef9d0938872db"><code>463a147</code></a> Finalize release notes for 5.10.2</li>
<li><a href="https://github.com/junit-team/junit5/commit/43c105a246c0130d08145335cb9986236e2a8465"><code>43c105a</code></a> Revert &quot;Apply method predicate before searching type hierarchy&quot;</li>
<li><a href="https://github.com/junit-team/junit5/commit/63d464d1e10a62743fe4024436ef1e006a89df72"><code>63d464d</code></a> Revert &quot;Harmonize application of method and field filters in search algorithms&quot;</li>
<li><a href="https://github.com/junit-team/junit5/commit/85ec2fccb32a51fbe6ff966e25726c94a67dd418"><code>85ec2fc</code></a> Revert &quot;Apply field predicate before searching type hierarchy&quot;</li>
<li><a href="https://github.com/junit-team/junit5/commit/6209006a7693dbf6f680a5ac4541aba86c9da899"><code>6209006</code></a> Update release notes</li>
<li><a href="https://github.com/junit-team/junit5/commit/5ee499f0e0029dbd1120ecf889bd214e2082c589"><code>5ee499f</code></a> Fix CI build</li>
<li><a href="https://github.com/junit-team/junit5/commit/d919ba71ea4c3b1e60f2d21473ba31ff0f6857ab"><code>d919ba7</code></a> Namespace user-specific build parameters</li>
<li><a href="https://github.com/junit-team/junit5/commit/e26cd83ed307ef100399ced9985f04612931984b"><code>e26cd83</code></a> Prepare release notes for 5.10.2</li>
<li><a href="https://github.com/junit-team/junit5/commit/ec8d4282c60f480d3de264330b37e75b1b8d05d9"><code>ec8d428</code></a> Include LauncherInterceptor in launcher module declaration</li>
<li>Additional commits viewable in <a href="https://github.com/junit-team/junit5/compare/r5.10.1...r5.10.2">compare view</a></li>
</ul>
</details>
<br />

Updates `org.junit.vintage:junit-vintage-engine` from 5.10.1 to 5.10.2
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/junit-team/junit5/releases">org.junit.vintage:junit-vintage-engine's releases</a>.</em></p>
<blockquote>
<p>JUnit 5.10.2 = Platform 1.10.2 + Jupiter 5.10.2 + Vintage 5.10.2</p>
<p>See <a href="http://junit.org/junit5/docs/5.10.2/release-notes/">Release Notes</a>.</p>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.10.1...r5.10.2">https://github.com/junit-team/junit5/compare/r5.10.1...r5.10.2</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/junit-team/junit5/commit/4c0dddad1b96d4a20e92a2cd583954643ac56ac0"><code>4c0ddda</code></a> Release 5.10.2</li>
<li><a href="https://github.com/junit-team/junit5/commit/463a14773d884f2bf31f073a87bef9d0938872db"><code>463a147</code></a> Finalize release notes for 5.10.2</li>
<li><a href="https://github.com/junit-team/junit5/commit/43c105a246c0130d08145335cb9986236e2a8465"><code>43c105a</code></a> Revert &quot;Apply method predicate before searching type hierarchy&quot;</li>
<li><a href="https://github.com/junit-team/junit5/commit/63d464d1e10a62743fe4024436ef1e006a89df72"><code>63d464d</code></a> Revert &quot;Harmonize application of method and field filters in search algorithms&quot;</li>
<li><a href="https://github.com/junit-team/junit5/commit/85ec2fccb32a51fbe6ff966e25726c94a67dd418"><code>85ec2fc</code></a> Revert &quot;Apply field predicate before searching type hierarchy&quot;</li>
<li><a href="https://github.com/junit-team/junit5/commit/6209006a7693dbf6f680a5ac4541aba86c9da899"><code>6209006</code></a> Update release notes</li>
<li><a href="https://github.com/junit-team/junit5/commit/5ee499f0e0029dbd1120ecf889bd214e2082c589"><code>5ee499f</code></a> Fix CI build</li>
<li><a href="https://github.com/junit-team/junit5/commit/d919ba71ea4c3b1e60f2d21473ba31ff0f6857ab"><code>d919ba7</code></a> Namespace user-specific build parameters</li>
<li><a href="https://github.com/junit-team/junit5/commit/e26cd83ed307ef100399ced9985f04612931984b"><code>e26cd83</code></a> Prepare release notes for 5.10.2</li>
<li><a href="https://github.com/junit-team/junit5/commit/ec8d4282c60f480d3de264330b37e75b1b8d05d9"><code>ec8d428</code></a> Include LauncherInterceptor in launcher module declaration</li>
<li>Additional commits viewable in <a href="https://github.com/junit-team/junit5/compare/r5.10.1...r5.10.2">compare view</a></li>
</ul>
</details>
<br />

Updates `org.junit.jupiter:junit-jupiter-params` from 5.10.1 to 5.10.2
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/junit-team/junit5/releases">org.junit.jupiter:junit-jupiter-params's releases</a>.</em></p>
<blockquote>
<p>JUnit 5.10.2 = Platform 1.10.2 + Jupiter 5.10.2 + Vintage 5.10.2</p>
<p>See <a href="http://junit.org/junit5/docs/5.10.2/release-notes/">Release Notes</a>.</p>
<p><strong>Full Changelog</strong>: <a href="https://github.com/junit-team/junit5/compare/r5.10.1...r5.10.2">https://github.com/junit-team/junit5/compare/r5.10.1...r5.10.2</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/junit-team/junit5/commit/4c0dddad1b96d4a20e92a2cd583954643ac56ac0"><code>4c0ddda</code></a> Release 5.10.2</li>
<li><a href="https://github.com/junit-team/junit5/commit/463a14773d884f2bf31f073a87bef9d0938872db"><code>463a147</code></a> Finalize release notes for 5.10.2</li>
<li><a href="https://github.com/junit-team/junit5/commit/43c105a246c0130d08145335cb9986236e2a8465"><code>43c105a</code></a> Revert &quot;Apply method predicate before searching type hierarchy&quot;</li>
<li><a href="https://github.com/junit-team/junit5/commit/63d464d1e10a62743fe4024436ef1e006a89df72"><code>63d464d</code></a> Revert &quot;Harmonize application of method and field filters in search algorithms&quot;</li>
<li><a href="https://github.com/junit-team/junit5/commit/85ec2fccb32a51fbe6ff966e25726c94a67dd418"><code>85ec2fc</code></a> Revert &quot;Apply field predicate before searching type hierarchy&quot;</li>
<li><a href="https://github.com/junit-team/junit5/commit/6209006a7693dbf6f680a5ac4541aba86c9da899"><code>6209006</code></a> Update release notes</li>
<li><a href="https://github.com/junit-team/junit5/commit/5ee499f0e0029dbd1120ecf889bd214e2082c589"><code>5ee499f</code></a> Fix CI build</li>
<li><a href="https://github.com/junit-team/junit5/commit/d919ba71ea4c3b1e60f2d21473ba31ff0f6857ab"><code>d919ba7</code></a> Namespace user-specific build parameters</li>
<li><a href="https://github.com/junit-team/junit5/commit/e26cd83ed307ef100399ced9985f04612931984b"><code>e26cd83</code></a> Prepare release notes for 5.10.2</li>
<li><a href="https://github.com/junit-team/junit5/commit/ec8d4282c60f480d3de264330b37e75b1b8d05d9"><code>ec8d428</code></a> Include LauncherInterceptor in launcher module declaration</li>
<li>Additional commits viewable in <a href="https://github.com/junit-team/junit5/compare/r5.10.1...r5.10.2">compare view</a></li>
</ul>
</details>
<br />

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index accff4c9b9f69..79c4219d5a772 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -30,7 +30,7 @@
   <properties>
     <target.gen.source.path>${project.build.directory}/generated-sources</target.gen.source.path>
     <dep.junit.platform.version>1.9.0</dep.junit.platform.version>
-    <dep.junit.jupiter.version>5.10.1</dep.junit.jupiter.version>
+    <dep.junit.jupiter.version>5.10.2</dep.junit.jupiter.version>
     <dep.slf4j.version>2.0.11</dep.slf4j.version>
     <dep.guava-bom.version>33.0.0-jre</dep.guava-bom.version>
     <dep.netty-bom.version>4.1.106.Final</dep.netty-bom.version>

From 31b8d29dfe6bc914a444c8a87e437628d6ed1d5a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Feb 2024 08:16:27 +0900
Subject: [PATCH 382/570] MINOR: [Java] Bump kr.motd.maven:os-maven-plugin from
 1.7.0 to 1.7.1 in /java (#40137)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [kr.motd.maven:os-maven-plugin](https://github.com/trustin/os-maven-plugin) from 1.7.0 to 1.7.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/trustin/os-maven-plugin/releases">kr.motd.maven:os-maven-plugin's releases</a>.</em></p>
<blockquote>
<h2>os-maven-plugin-1.7.1</h2>
<ul>
<li>Added detection of more RISC-V architectures. <a href="https://redirect.github.com/trustin/os-maven-plugin/issues/61">#61</a> <a href="https://redirect.github.com/trustin/os-maven-plugin/issues/62">#62</a> <a href="https://redirect.github.com/trustin/os-maven-plugin/issues/66">#66</a> (Thanks to <a href="https://github.com/zinovya"><code>@​zinovya</code></a> and <a href="https://github.com/luhenry"><code>@​luhenry</code></a>)
<ul>
<li><code>riscv32</code> is detected as <code>riscv</code> for backward compatibility with 1.7.0.</li>
<li><code>riscv64</code> is detected as <code>riscv64</code>.</li>
</ul>
</li>
<li>Added detection of Elbrus 2000 architecture. <a href="https://redirect.github.com/trustin/os-maven-plugin/issues/57">#57</a> (Thanks to <a href="https://github.com/antonbashir"><code>@​antonbashir</code></a>)</li>
<li>Added detection of LoongArch64 architecture. <a href="https://redirect.github.com/trustin/os-maven-plugin/issues/63">#63</a> <a href="https://redirect.github.com/trustin/os-maven-plugin/issues/64">#64</a> (Thanks to <a href="https://github.com/zhangwenlong8911"><code>@​zhangwenlong8911</code></a> and <a href="https://github.com/xiangzhai"><code>@​xiangzhai</code></a>)</li>
<li>Fixed the detection logic so that it detects more MacOS variants. <a href="https://redirect.github.com/trustin/os-maven-plugin/issues/58">#58</a> <a href="https://redirect.github.com/trustin/os-maven-plugin/issues/67">#67</a> (Thanks to <a href="https://github.com/aalmiray"><code>@​aalmiray</code></a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/trustin/os-maven-plugin/commit/9913130a2e3d77dedebc263e5c379685c438b59a"><code>9913130</code></a> [maven-release-plugin] prepare release os-maven-plugin-1.7.1</li>
<li><a href="https://github.com/trustin/os-maven-plugin/commit/211db49dc8b2d743d9d9882d44f086122ccdba93"><code>211db49</code></a> Override the default Maven repository when testing against old Maven versions</li>
<li><a href="https://github.com/trustin/os-maven-plugin/commit/88bae1e838d0918d4d428019206873c80e00f7b0"><code>88bae1e</code></a> Detect more <code>osx</code> variants (<a href="https://redirect.github.com/trustin/os-maven-plugin/issues/67">#67</a>)</li>
<li><a href="https://github.com/trustin/os-maven-plugin/commit/cebc3e81379f378286df1eab692b26abd92fd6bd"><code>cebc3e8</code></a> Add support for LoongArch64 architecture (<a href="https://redirect.github.com/trustin/os-maven-plugin/issues/63">#63</a>)</li>
<li><a href="https://github.com/trustin/os-maven-plugin/commit/4df54948f9eae82c6e5d9e20cc1e4d82c9a83872"><code>4df5494</code></a> Add <code>riscv32</code> and <code>riscv64</code> support (<a href="https://redirect.github.com/trustin/os-maven-plugin/issues/62">#62</a>)</li>
<li><a href="https://github.com/trustin/os-maven-plugin/commit/6bd9cfa16757ac8b81ab1a7f380b0aabc0295c97"><code>6bd9cfa</code></a> Support for E2K (Elbrus 2000) architecture (<a href="https://redirect.github.com/trustin/os-maven-plugin/issues/57">#57</a>)</li>
<li><a href="https://github.com/trustin/os-maven-plugin/commit/6d8134545e654e564301ddff2745ab3f6b02b9f7"><code>6d81345</code></a> Update README.md</li>
<li><a href="https://github.com/trustin/os-maven-plugin/commit/52e547c811f33dd1d5cf401bad0ee13f911e2f34"><code>52e547c</code></a> Update the version in README.md</li>
<li><a href="https://github.com/trustin/os-maven-plugin/commit/ad10438f508639e59b4a2348373af397b93e160f"><code>ad10438</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li>See full diff in <a href="https://github.com/trustin/os-maven-plugin/compare/os-maven-plugin-1.7.0...os-maven-plugin-1.7.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=kr.motd.maven:os-maven-plugin&package-manager=maven&previous-version=1.7.0&new-version=1.7.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/flight/flight-core/pom.xml | 2 +-
 java/gandiva/pom.xml            | 2 +-
 java/pom.xml                    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index 0346172f610a6..e7b299fdbe850 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -142,7 +142,7 @@
       <extension>
         <groupId>kr.motd.maven</groupId>
         <artifactId>os-maven-plugin</artifactId>
-        <version>1.7.0</version>
+        <version>1.7.1</version>
       </extension>
     </extensions>
     <plugins>
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index 6337efcf7e348..819baee11edec 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -125,7 +125,7 @@
             <extension>
                 <groupId>kr.motd.maven</groupId>
                 <artifactId>os-maven-plugin</artifactId>
-                <version>1.7.0</version>
+                <version>1.7.1</version>
             </extension>
         </extensions>
         <plugins>
diff --git a/java/pom.xml b/java/pom.xml
index 79c4219d5a772..f713eb8d8e7b9 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -96,7 +96,7 @@
       <extension>
         <groupId>kr.motd.maven</groupId>
         <artifactId>os-maven-plugin</artifactId>
-        <version>1.7.0</version>
+        <version>1.7.1</version>
       </extension>
     </extensions>
 

From 5a53e98bca762f0e251bc2d1a82dd45073160234 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 20 Feb 2024 08:16:54 +0900
Subject: [PATCH 383/570] MINOR: [Java] Bump
 org.apache.maven.surefire:surefire-junit-platform from 3.2.3 to 3.2.5 in
 /java (#40138)

Bumps org.apache.maven.surefire:surefire-junit-platform from 3.2.3 to 3.2.5.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.surefire:surefire-junit-platform&package-manager=maven&previous-version=3.2.3&new-version=3.2.5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index f713eb8d8e7b9..69ee8a26d946f 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -464,7 +464,7 @@
             <dependency>
               <groupId>org.apache.maven.surefire</groupId>
               <artifactId>surefire-junit-platform</artifactId>
-              <version>3.2.3</version>
+              <version>3.2.5</version>
             </dependency>
           </dependencies>
           <configuration>

From bfddfa24755f3871ec66beb119f177323f1380de Mon Sep 17 00:00:00 2001
From: Divyansh200102 <146909065+Divyansh200102@users.noreply.github.com>
Date: Tue, 20 Feb 2024 04:54:23 +0530
Subject: [PATCH 384/570] GH-40145: [C++][Docs] Correct the console emitter
 link (#40146)

### Rationale for this change
To fix the embedded link of `cmdr` in C++ development docs

### What changes are included in this PR?
The embedded link of `cmdr` is fixed.

### Are these changes tested?
Yes

### Are there any user-facing changes?
Yes

* Closes: #40145

Authored-by: Divyansh200102 <divyanshkhatri200102@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 docs/source/developers/cpp/windows.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst
index d97b038ade5e0..251a45325fe0b 100644
--- a/docs/source/developers/cpp/windows.rst
+++ b/docs/source/developers/cpp/windows.rst
@@ -45,7 +45,7 @@ For Visual Studio 2019, the script is:
 
   "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\Tools\VsDevCmd.bat" -arch=amd64
 
-One can configure a console emulator like `cmder <https://cmder.net/>`_ to
+One can configure a console emulator like `cmder <https://cmder.app/>`_ to
 automatically launch this when starting a new development console.
 
 Using conda-forge for build dependencies

From 5d3f5b61b9585e88b0672840d13d0a065647b11e Mon Sep 17 00:00:00 2001
From: Zhen Wang <643348094@qq.com>
Date: Tue, 20 Feb 2024 07:33:04 +0800
Subject: [PATCH 385/570] MINOR: [Docs][Java] Fix incorrect example (#39941)

### Rationale for this change

`setRowCount` should be called after filling vectors.

### What changes are included in this PR?

Move `setRowCount` to after filling vectors.

### Are these changes tested?

No

### Are there any user-facing changes?

Documentation

Authored-by: wforget <643348094@qq.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 docs/source/java/quickstartguide.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/java/quickstartguide.rst b/docs/source/java/quickstartguide.rst
index 5ce643db01999..e358681c57830 100644
--- a/docs/source/java/quickstartguide.rst
+++ b/docs/source/java/quickstartguide.rst
@@ -180,7 +180,6 @@ Example: Create a dataset of names (strings) and ages (32-bit signed integers).
         IntVector ageVector = (IntVector) root.getVector("age");
         VarCharVector nameVector = (VarCharVector) root.getVector("name");
     ){
-        root.setRowCount(3);
         ageVector.allocateNew(3);
         ageVector.set(0, 10);
         ageVector.set(1, 20);
@@ -189,6 +188,7 @@ Example: Create a dataset of names (strings) and ages (32-bit signed integers).
         nameVector.set(0, "Dave".getBytes(StandardCharsets.UTF_8));
         nameVector.set(1, "Peter".getBytes(StandardCharsets.UTF_8));
         nameVector.set(2, "Mary".getBytes(StandardCharsets.UTF_8));
+        root.setRowCount(3);
         System.out.println("VectorSchemaRoot created: \n" + root.contentToTSVString());
     }
 

From b63770cbe2601908de3439a692b466e32df79392 Mon Sep 17 00:00:00 2001
From: Zhen Wang <643348094@qq.com>
Date: Tue, 20 Feb 2024 07:33:51 +0800
Subject: [PATCH 386/570] MINOR: [Docs][Java] Fix installation documentation
 for BOM file (#39939)

### Rationale for this change

We should import `arrow-bom` in `dependencyManagement`.

### What changes are included in this PR?

Import `arrow-bom` module in `dependencyManagement`

### Are these changes tested?

No

### Are there any user-facing changes?

Documentation

Authored-by: wforget <643348094@qq.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 docs/source/java/install.rst | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst
index 783687fb1f773..7ac1a4990f37d 100644
--- a/docs/source/java/install.rst
+++ b/docs/source/java/install.rst
@@ -134,11 +134,6 @@ every module. An alternative to the above would be:
             <arrow.version>15.0.0</arrow.version>
         </properties>
         <dependencies>
-            <dependency>
-                <groupId>org.apache.arrow</groupId>
-                <artifactId>arrow-bom</artifactId>
-                <version>${arrow.version}</version>
-            </dependency>
             <dependency>
                 <groupId>org.apache.arrow</groupId>
                 <artifactId>arrow-vector</artifactId>
@@ -148,6 +143,17 @@ every module. An alternative to the above would be:
                 <artifactId>arrow-memory-netty</artifactId>
             </dependency>
         </dependencies>
+        <dependencyManagement>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.arrow</groupId>
+                    <artifactId>arrow-bom</artifactId>
+                    <version>${arrow.version}</version>
+                    <type>pom</type>
+                    <scope>import</scope>
+                </dependency>
+            </dependencies>
+        </dependencyManagement>
     </project>
 
 To use the Arrow Flight dependencies, also add the ``os-maven-plugin``

From 4dc3d04ae84d97d02443c0cef555a46535925c2b Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 20 Feb 2024 11:55:48 +0100
Subject: [PATCH 387/570] GH-40151: [C++] Make S3 narrative test more flexible
 (#40144)

`arrow-s3fs-narrative-test` allows manual testing of the S3 filesystem implementation against an actual S3 backend such as AWS.

This PR allows customizing the endpoint address, and adds a command to create a bucket for testing.

Validated with the LocalStack S3 server.

* Closes: #40151

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/filesystem/s3fs_narrative_test.cc   | 35 +++++++++++++------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs_narrative_test.cc b/cpp/src/arrow/filesystem/s3fs_narrative_test.cc
index f75ca4bdfd04d..bbb3c32ee6bd2 100644
--- a/cpp/src/arrow/filesystem/s3fs_narrative_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_narrative_test.cc
@@ -34,6 +34,7 @@
 #include "arrow/util/logging.h"
 
 DEFINE_bool(clear, false, "delete all bucket contents");
+DEFINE_bool(create, false, "create test bucket");
 DEFINE_bool(test, false, "run narrative test against bucket");
 
 DEFINE_bool(verbose, false, "be more verbose (includes AWS warnings)");
@@ -57,8 +58,7 @@ namespace fs {
     PrintError(context_msg, _status_or_result);            \
   } while (0)
 
-std::shared_ptr<FileSystem> MakeFileSystem() {
-  std::shared_ptr<S3FileSystem> s3fs;
+Result<std::shared_ptr<FileSystem>> MakeRootFileSystem() {
   S3Options options;
   if (!FLAGS_access_key.empty()) {
     options = S3Options::FromAccessKey(FLAGS_access_key, FLAGS_secret_key);
@@ -68,8 +68,13 @@ std::shared_ptr<FileSystem> MakeFileSystem() {
   options.endpoint_override = FLAGS_endpoint;
   options.scheme = FLAGS_scheme;
   options.region = FLAGS_region;
-  s3fs = S3FileSystem::Make(options).ValueOrDie();
-  return std::make_shared<SubTreeFileSystem>(FLAGS_bucket, s3fs);
+  options.allow_bucket_creation = FLAGS_create;
+  return S3FileSystem::Make(options);
+}
+
+Result<std::shared_ptr<FileSystem>> MakeFileSystem() {
+  ARROW_ASSIGN_OR_RAISE(auto fs, MakeRootFileSystem());
+  return std::make_shared<SubTreeFileSystem>(FLAGS_bucket, fs);
 }
 
 void PrintError(const std::string& context_msg, const Status& st) {
@@ -90,13 +95,17 @@ void CheckDirectory(FileSystem* fs, const std::string& path) {
 }
 
 void ClearBucket(int argc, char** argv) {
-  auto fs = MakeFileSystem();
-
+  ASSERT_OK_AND_ASSIGN(auto fs, MakeFileSystem());
   ASSERT_OK(fs->DeleteRootDirContents());
 }
 
+void CreateBucket(int argc, char** argv) {
+  ASSERT_OK_AND_ASSIGN(auto fs, MakeRootFileSystem());
+  ASSERT_OK(fs->CreateDir(FLAGS_bucket));
+}
+
 void TestBucket(int argc, char** argv) {
-  auto fs = MakeFileSystem();
+  ASSERT_OK_AND_ASSIGN(auto fs, MakeFileSystem());
   std::vector<FileInfo> infos;
   FileSelector select;
   std::shared_ptr<io::InputStream> is;
@@ -221,13 +230,17 @@ void TestMain(int argc, char** argv) {
                           : (FLAGS_verbose ? S3LogLevel::Warn : S3LogLevel::Fatal);
   ASSERT_OK(InitializeS3(options));
 
-  if (FLAGS_region.empty()) {
+  if (FLAGS_region.empty() && FLAGS_endpoint.empty()) {
     ASSERT_OK_AND_ASSIGN(FLAGS_region, ResolveS3BucketRegion(FLAGS_bucket));
   }
 
+  if (FLAGS_create) {
+    CreateBucket(argc, argv);
+  }
   if (FLAGS_clear) {
     ClearBucket(argc, argv);
-  } else if (FLAGS_test) {
+  }
+  if (FLAGS_test) {
     TestBucket(argc, argv);
   }
 
@@ -244,8 +257,8 @@ int main(int argc, char** argv) {
   gflags::SetUsageMessage(ss.str());
   gflags::ParseCommandLineFlags(&argc, &argv, true);
 
-  if (FLAGS_clear + FLAGS_test != 1) {
-    ARROW_LOG(ERROR) << "Need exactly one of --test and --clear";
+  if (FLAGS_clear + FLAGS_test + FLAGS_create != 1) {
+    ARROW_LOG(ERROR) << "Need exactly one of --test, --clear and --create";
     return 2;
   }
   if (FLAGS_bucket.empty()) {

From df9e0c1264e3d7b83f913bccaec2c9a85fe6777e Mon Sep 17 00:00:00 2001
From: Florian Bernard <florian@flob.fr>
Date: Tue, 20 Feb 2024 14:15:44 +0100
Subject: [PATCH 388/570] GH-20379: [Java] Dataset Failed to update reservation
 while freeing bytes (#40101)

### Rationale for this change
Better controls JNI Thread management in java dataset module to fix #20379
Re-use the same code found in the java arrow-c-data module : https://github.com/apache/arrow/blob/main/java/c/src/main/cpp/jni_wrapper.cc#L107

May JNIEnvGuard class code can be put in a common place for both libraries ...

### What changes are included in this PR?
N/A

### Are these changes tested?
These changes has been tested with :  https://gist.github.com/fb64/71880cde297bc5234b02b68b785670fd
on Linux X86_64 architecture

### Are there any user-facing changes?
N/A

* Closes: #20379

Authored-by: Florian Bernard <florian.bernard@openairlines.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 java/dataset/src/main/cpp/jni_wrapper.cc | 62 +++++++++++++++++++-----
 1 file changed, 50 insertions(+), 12 deletions(-)

diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc
index d2d976677bd6b..19a43c8d2fa41 100644
--- a/java/dataset/src/main/cpp/jni_wrapper.cc
+++ b/java/dataset/src/main/cpp/jni_wrapper.cc
@@ -83,6 +83,40 @@ void ThrowIfError(const arrow::Status& status) {
   }
 }
 
+class JNIEnvGuard {
+ public:
+  explicit JNIEnvGuard(JavaVM* vm) : vm_(vm), env_(nullptr), should_detach_(false) {
+    JNIEnv* env;
+    jint code = vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION);
+    if (code == JNI_EDETACHED) {
+      JavaVMAttachArgs args;
+      args.version = JNI_VERSION;
+      args.name = NULL;
+      args.group = NULL;
+      code = vm->AttachCurrentThread(reinterpret_cast<void**>(&env), &args);
+      should_detach_ = (code == JNI_OK);
+    }
+    if (code != JNI_OK) {
+      ThrowPendingException("Failed to attach the current thread to a Java VM");
+    }
+    env_ = env;
+  }
+
+  JNIEnv* env() { return env_; }
+
+  ~JNIEnvGuard() {
+    if (should_detach_) {
+      vm_->DetachCurrentThread();
+      should_detach_ = false;
+    }
+  }
+
+ private:
+  JavaVM* vm_;
+  JNIEnv* env_;
+  bool should_detach_;
+};
+
 template <typename T>
 T JniGetOrThrow(arrow::Result<T> result) {
   const arrow::Status& status = result.status();
@@ -126,23 +160,27 @@ class ReserveFromJava : public arrow::dataset::jni::ReservationListener {
       : vm_(vm), java_reservation_listener_(java_reservation_listener) {}
 
   arrow::Status OnReservation(int64_t size) override {
-    JNIEnv* env;
-    if (vm_->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION) != JNI_OK) {
-      return arrow::Status::Invalid("JNIEnv was not attached to current thread");
+    try {
+      JNIEnvGuard guard(vm_);
+      JNIEnv* env = guard.env();
+      env->CallObjectMethod(java_reservation_listener_, reserve_memory_method, size);
+      RETURN_NOT_OK(arrow::dataset::jni::CheckException(env));
+      return arrow::Status::OK();
+    } catch (const JniPendingException& e) {
+      return arrow::Status::Invalid(e.what());
     }
-    env->CallObjectMethod(java_reservation_listener_, reserve_memory_method, size);
-    RETURN_NOT_OK(arrow::dataset::jni::CheckException(env));
-    return arrow::Status::OK();
   }
 
   arrow::Status OnRelease(int64_t size) override {
-    JNIEnv* env;
-    if (vm_->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION) != JNI_OK) {
-      return arrow::Status::Invalid("JNIEnv was not attached to current thread");
+    try {
+      JNIEnvGuard guard(vm_);
+      JNIEnv* env = guard.env();
+      env->CallObjectMethod(java_reservation_listener_, unreserve_memory_method, size);
+      RETURN_NOT_OK(arrow::dataset::jni::CheckException(env));
+      return arrow::Status::OK();
+    } catch (const JniPendingException& e) {
+      return arrow::Status::Invalid(e.what());
     }
-    env->CallObjectMethod(java_reservation_listener_, unreserve_memory_method, size);
-    RETURN_NOT_OK(arrow::dataset::jni::CheckException(env));
-    return arrow::Status::OK();
   }
 
   jobject GetJavaReservationListener() { return java_reservation_listener_; }

From 2e2bd8b9f231e1fc13ec4f76aebccae8f7a1ef8f Mon Sep 17 00:00:00 2001
From: Paul Nienaber <github@phox.ca>
Date: Tue, 20 Feb 2024 05:49:26 -0800
Subject: [PATCH 389/570] GH-34865: [C++][Java][Flight RPC] Add Session
 management messages (#34817)

### Rationale for this change

Flight presently contains no formal mechanism for managing connection/query configuration options; instead, request headers and/or non-query SQL statements are often used in lieu, with unnecessary overhead and poor failure handling.  A stateless (from Flight's perspective) Flight format extension is desirable to close this gap for server implementations that use/want connection state/context.

### What changes are included in this PR?

"Session" set/get/close Actions and server-side helper middleware.

### Are these changes tested?

Integration tests (C++ currently broken due to middleware-related framework issue) and some complex-case unit testing are included.

### Are there any user-facing changes?

Non-breaking extensions to wire format and corresponding client/server Flight RPC API extensions.

* Closes: #34865

Lead-authored-by: Paul Nienaber <paul.nienaber@dremio.com>
Co-authored-by: Paul Nienaber <github@phox.ca>
Co-authored-by: James Duong <jduong@dremio.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/flight/client.cc                |  41 ++
 cpp/src/arrow/flight/client.h                 |  21 +
 .../flight_integration_test.cc                |   2 +
 .../integration_tests/test_integration.cc     | 154 ++++++++
 .../arrow/flight/serialization_internal.cc    | 159 ++++++++
 cpp/src/arrow/flight/serialization_internal.h |  20 +
 cpp/src/arrow/flight/sql/CMakeLists.txt       |   7 +-
 cpp/src/arrow/flight/sql/client.h             |  27 ++
 cpp/src/arrow/flight/sql/server.cc            |  76 +++-
 cpp/src/arrow/flight/sql/server.h             |  19 +
 .../flight/sql/server_session_middleware.cc   | 235 ++++++++++++
 .../flight/sql/server_session_middleware.h    |  89 +++++
 .../sql/server_session_middleware_factory.h   |  61 +++
 ...erver_session_middleware_internals_test.cc |  45 +++
 .../flight/transport/grpc/grpc_server.cc      |  26 +-
 cpp/src/arrow/flight/types.cc                 | 363 ++++++++++++++++++
 cpp/src/arrow/flight/types.h                  | 197 ++++++++++
 dev/archery/archery/integration/runner.py     |   5 +
 docs/source/format/FlightSql.rst              |  41 ++
 format/Flight.proto                           | 114 ++++++
 .../arrow/flight/CloseSessionRequest.java     |  58 +++
 .../arrow/flight/CloseSessionResult.java      | 106 +++++
 .../org/apache/arrow/flight/FlightClient.java |  96 +++++
 .../apache/arrow/flight/FlightConstants.java  |  14 +
 .../flight/GetSessionOptionsRequest.java      |  60 +++
 .../arrow/flight/GetSessionOptionsResult.java |  80 ++++
 .../flight/NoOpSessionOptionValueVisitor.java |  72 ++++
 .../arrow/flight/ServerSessionMiddleware.java | 227 +++++++++++
 .../arrow/flight/SessionOptionValue.java      |  94 +++++
 .../flight/SessionOptionValueFactory.java     | 284 ++++++++++++++
 .../flight/SessionOptionValueVisitor.java     |  58 +++
 .../flight/SetSessionOptionsRequest.java      |  81 ++++
 .../arrow/flight/SetSessionOptionsResult.java | 152 ++++++++
 java/flight/flight-integration-tests/pom.xml  |   4 +
 .../flight/integration/tests/Scenarios.java   |   1 +
 .../tests/SessionOptionsProducer.java         | 110 ++++++
 .../tests/SessionOptionsScenario.java         | 107 ++++++
 .../integration/tests/IntegrationTest.java    |   5 +
 .../sql/CloseSessionResultListener.java       |  46 +++
 .../arrow/flight/sql/FlightSqlClient.java     |  18 +
 .../arrow/flight/sql/FlightSqlProducer.java   |  79 ++++
 .../sql/GetSessionOptionsResultListener.java  |  46 +++
 .../sql/SetSessionOptionsResultListener.java  |  46 +++
 testing                                       |   2 +-
 44 files changed, 3527 insertions(+), 21 deletions(-)
 create mode 100644 cpp/src/arrow/flight/sql/server_session_middleware.cc
 create mode 100644 cpp/src/arrow/flight/sql/server_session_middleware.h
 create mode 100644 cpp/src/arrow/flight/sql/server_session_middleware_factory.h
 create mode 100644 cpp/src/arrow/flight/sql/server_session_middleware_internals_test.cc
 create mode 100644 java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionRequest.java
 create mode 100644 java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionResult.java
 create mode 100644 java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsRequest.java
 create mode 100644 java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsResult.java
 create mode 100644 java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpSessionOptionValueVisitor.java
 create mode 100644 java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java
 create mode 100644 java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValue.java
 create mode 100644 java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueFactory.java
 create mode 100644 java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueVisitor.java
 create mode 100644 java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsRequest.java
 create mode 100644 java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsResult.java
 create mode 100644 java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java
 create mode 100644 java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsScenario.java
 create mode 100644 java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CloseSessionResultListener.java
 create mode 100644 java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/GetSessionOptionsResultListener.java
 create mode 100644 java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SetSessionOptionsResultListener.java

diff --git a/cpp/src/arrow/flight/client.cc b/cpp/src/arrow/flight/client.cc
index 25da5e8007660..4d4f13a09fb26 100644
--- a/cpp/src/arrow/flight/client.cc
+++ b/cpp/src/arrow/flight/client.cc
@@ -713,6 +713,47 @@ arrow::Result<FlightClient::DoExchangeResult> FlightClient::DoExchange(
   return result;
 }
 
+::arrow::Result<SetSessionOptionsResult> FlightClient::SetSessionOptions(
+    const FlightCallOptions& options, const SetSessionOptionsRequest& request) {
+  RETURN_NOT_OK(CheckOpen());
+  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString());
+  Action action{ActionType::kSetSessionOptions.type, Buffer::FromString(body)};
+  ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action));
+  ARROW_ASSIGN_OR_RAISE(auto result, stream->Next());
+  ARROW_ASSIGN_OR_RAISE(
+      auto set_session_options_result,
+      SetSessionOptionsResult::Deserialize(std::string_view(*result->body)));
+  ARROW_RETURN_NOT_OK(stream->Drain());
+  return set_session_options_result;
+}
+
+::arrow::Result<GetSessionOptionsResult> FlightClient::GetSessionOptions(
+    const FlightCallOptions& options, const GetSessionOptionsRequest& request) {
+  RETURN_NOT_OK(CheckOpen());
+  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString());
+  Action action{ActionType::kGetSessionOptions.type, Buffer::FromString(body)};
+  ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action));
+  ARROW_ASSIGN_OR_RAISE(auto result, stream->Next());
+  ARROW_ASSIGN_OR_RAISE(
+      auto get_session_options_result,
+      GetSessionOptionsResult::Deserialize(std::string_view(*result->body)));
+  ARROW_RETURN_NOT_OK(stream->Drain());
+  return get_session_options_result;
+}
+
+::arrow::Result<CloseSessionResult> FlightClient::CloseSession(
+    const FlightCallOptions& options, const CloseSessionRequest& request) {
+  RETURN_NOT_OK(CheckOpen());
+  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString());
+  Action action{ActionType::kCloseSession.type, Buffer::FromString(body)};
+  ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action));
+  ARROW_ASSIGN_OR_RAISE(auto result, stream->Next());
+  ARROW_ASSIGN_OR_RAISE(auto close_session_result,
+                        CloseSessionResult::Deserialize(std::string_view(*result->body)));
+  ARROW_RETURN_NOT_OK(stream->Drain());
+  return close_session_result;
+}
+
 Status FlightClient::Close() {
   if (!closed_) {
     closed_ = true;
diff --git a/cpp/src/arrow/flight/client.h b/cpp/src/arrow/flight/client.h
index 1df71d2029f74..330fa8bad730d 100644
--- a/cpp/src/arrow/flight/client.h
+++ b/cpp/src/arrow/flight/client.h
@@ -383,6 +383,27 @@ class ARROW_FLIGHT_EXPORT FlightClient {
     return DoExchange({}, descriptor);
   }
 
+  /// \brief Set server session option(s) by name/value. Sessions are generally
+  /// persisted via HTTP cookies.
+  /// \param[in] options Per-RPC options
+  /// \param[in] request The server session options to set
+  ::arrow::Result<SetSessionOptionsResult> SetSessionOptions(
+      const FlightCallOptions& options, const SetSessionOptionsRequest& request);
+
+  /// \brief Get the current server session options. The session is generally
+  /// accessed via an HTTP cookie.
+  /// \param[in] options Per-RPC options
+  /// \param[in] request The (empty) GetSessionOptions request object.
+  ::arrow::Result<GetSessionOptionsResult> GetSessionOptions(
+      const FlightCallOptions& options, const GetSessionOptionsRequest& request);
+
+  /// \brief Close/invalidate the current server session. The session is generally
+  /// accessed via an HTTP cookie.
+  /// \param[in] options Per-RPC options
+  /// \param[in] request The (empty) CloseSession request object.
+  ::arrow::Result<CloseSessionResult> CloseSession(const FlightCallOptions& options,
+                                                   const CloseSessionRequest& request);
+
   /// \brief Explicitly shut down and clean up the client.
   ///
   /// For backwards compatibility, this will be implicitly called by
diff --git a/cpp/src/arrow/flight/integration_tests/flight_integration_test.cc b/cpp/src/arrow/flight/integration_tests/flight_integration_test.cc
index 67c7ee85f59d3..6f3115cc5ab8a 100644
--- a/cpp/src/arrow/flight/integration_tests/flight_integration_test.cc
+++ b/cpp/src/arrow/flight/integration_tests/flight_integration_test.cc
@@ -71,6 +71,8 @@ TEST(FlightIntegration, ExpirationTimeRenewFlightEndpoint) {
   ASSERT_OK(RunScenario("expiration_time:renew_flight_endpoint"));
 }
 
+TEST(FlightIntegration, SessionOptions) { ASSERT_OK(RunScenario("session_options")); }
+
 TEST(FlightIntegration, PollFlightInfo) { ASSERT_OK(RunScenario("poll_flight_info")); }
 
 TEST(FlightIntegration, AppMetadataFlightInfoEndpoint) {
diff --git a/cpp/src/arrow/flight/integration_tests/test_integration.cc b/cpp/src/arrow/flight/integration_tests/test_integration.cc
index b693662f60740..d4e0a2cda5bd8 100644
--- a/cpp/src/arrow/flight/integration_tests/test_integration.cc
+++ b/cpp/src/arrow/flight/integration_tests/test_integration.cc
@@ -28,11 +28,13 @@
 #include "arrow/array/array_nested.h"
 #include "arrow/array/array_primitive.h"
 #include "arrow/array/builder_primitive.h"
+#include "arrow/flight/client_cookie_middleware.h"
 #include "arrow/flight/client_middleware.h"
 #include "arrow/flight/server_middleware.h"
 #include "arrow/flight/sql/client.h"
 #include "arrow/flight/sql/column_metadata.h"
 #include "arrow/flight/sql/server.h"
+#include "arrow/flight/sql/server_session_middleware.h"
 #include "arrow/flight/sql/types.h"
 #include "arrow/flight/test_util.h"
 #include "arrow/flight/types.h"
@@ -744,6 +746,155 @@ class ExpirationTimeRenewFlightEndpointScenario : public Scenario {
   }
 };
 
+/// \brief The server used for testing Session Options.
+///
+/// SetSessionOptions has a blacklisted option name and string option value,
+/// both "lol_invalid", which will result in errors attempting to set either.
+class SessionOptionsServer : public sql::FlightSqlServerBase {
+  static inline const std::string invalid_option_name = "lol_invalid";
+  static inline const SessionOptionValue invalid_option_value = "lol_invalid";
+
+  const std::string session_middleware_key;
+  // These will never be threaded so using a plain map and no lock
+  std::map<std::string, SessionOptionValue> session_store_;
+
+ public:
+  explicit SessionOptionsServer(std::string session_middleware_key)
+      : FlightSqlServerBase(),
+        session_middleware_key(std::move(session_middleware_key)) {}
+
+  arrow::Result<SetSessionOptionsResult> SetSessionOptions(
+      const ServerCallContext& context,
+      const SetSessionOptionsRequest& request) override {
+    SetSessionOptionsResult res;
+
+    auto* middleware = static_cast<sql::ServerSessionMiddleware*>(
+        context.GetMiddleware(session_middleware_key));
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<sql::FlightSession> session,
+                          middleware->GetSession());
+
+    for (const auto& [name, value] : request.session_options) {
+      // Blacklisted value name
+      if (name == invalid_option_name) {
+        res.errors.emplace(name, SetSessionOptionsResult::Error{
+                                     SetSessionOptionErrorValue::kInvalidName});
+        continue;
+      }
+      // Blacklisted option value
+      if (value == invalid_option_value) {
+        res.errors.emplace(name, SetSessionOptionsResult::Error{
+                                     SetSessionOptionErrorValue::kInvalidValue});
+        continue;
+      }
+      if (std::holds_alternative<std::monostate>(value)) {
+        session->EraseSessionOption(name);
+        continue;
+      }
+      session->SetSessionOption(name, value);
+    }
+
+    return res;
+  }
+
+  arrow::Result<GetSessionOptionsResult> GetSessionOptions(
+      const ServerCallContext& context,
+      const GetSessionOptionsRequest& request) override {
+    auto* middleware = static_cast<sql::ServerSessionMiddleware*>(
+        context.GetMiddleware(session_middleware_key));
+    if (!middleware->HasSession()) {
+      return Status::Invalid("No existing session to get options from.");
+    }
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<sql::FlightSession> session,
+                          middleware->GetSession());
+
+    return GetSessionOptionsResult{session->GetSessionOptions()};
+  }
+
+  arrow::Result<CloseSessionResult> CloseSession(
+      const ServerCallContext& context, const CloseSessionRequest& request) override {
+    // Broken (does not expire cookie) until C++ middleware handling (GH-39791) fixed:
+    auto* middleware = static_cast<sql::ServerSessionMiddleware*>(
+        context.GetMiddleware(session_middleware_key));
+    ARROW_RETURN_NOT_OK(middleware->CloseSession());
+    return CloseSessionResult{CloseSessionStatus::kClosed};
+  }
+};
+
+/// \brief The Session Options scenario.
+///
+/// This tests Session Options functionality as well as ServerSessionMiddleware.
+class SessionOptionsScenario : public Scenario {
+  static inline const std::string server_middleware_key = "sessionmiddleware";
+
+  Status MakeServer(std::unique_ptr<FlightServerBase>* server,
+                    FlightServerOptions* options) override {
+    *server = std::make_unique<SessionOptionsServer>(server_middleware_key);
+
+    auto id_gen_int = std::make_shared<std::atomic_int>(1000);
+    options->middleware.emplace_back(
+        server_middleware_key,
+        sql::MakeServerSessionMiddlewareFactory(
+            [=]() -> std::string { return std::to_string((*id_gen_int)++); }));
+
+    return Status::OK();
+  }
+
+  Status MakeClient(FlightClientOptions* options) override {
+    options->middleware.emplace_back(GetCookieFactory());
+    return Status::OK();
+  }
+
+  Status RunClient(std::unique_ptr<FlightClient> flight_client) override {
+    sql::FlightSqlClient client{std::move(flight_client)};
+
+    // Set
+    auto req1 = SetSessionOptionsRequest{
+        {{"foolong", 123L},
+         {"bardouble", 456.0},
+         {"lol_invalid", "this won't get set"},
+         {"key_with_invalid_value", "lol_invalid"},
+         {"big_ol_string_list", std::vector<std::string>{"a", "b", "sea", "dee", " ",
+                                                         "  ", "geee", "(づ｡◕‿‿◕｡)づ"}}}};
+    ARROW_ASSIGN_OR_RAISE(auto res1, client.SetSessionOptions({}, req1));
+    // Some errors
+    if (res1.errors !=
+        std::map<std::string, SetSessionOptionsResult::Error>{
+            {"lol_invalid",
+             SetSessionOptionsResult::Error{SetSessionOptionErrorValue::kInvalidName}},
+            {"key_with_invalid_value", SetSessionOptionsResult::Error{
+                                           SetSessionOptionErrorValue::kInvalidValue}}}) {
+      return Status::Invalid("res1 incorrect: " + res1.ToString());
+    }
+    // Some set, some omitted due to above errors
+    ARROW_ASSIGN_OR_RAISE(auto res2, client.GetSessionOptions({}, {}));
+    if (res2.session_options !=
+        std::map<std::string, SessionOptionValue>{
+            {"foolong", 123L},
+            {"bardouble", 456.0},
+            {"big_ol_string_list",
+             std::vector<std::string>{"a", "b", "sea", "dee", " ", "  ", "geee",
+                                      "(づ｡◕‿‿◕｡)づ"}}}) {
+      return Status::Invalid("res2 incorrect: " + res2.ToString());
+    }
+    // Update
+    ARROW_ASSIGN_OR_RAISE(
+        auto res3,
+        client.SetSessionOptions(
+            {}, SetSessionOptionsRequest{
+                    {{"foolong", std::monostate{}},
+                     {"big_ol_string_list", "a,b,sea,dee, ,  ,geee,(づ｡◕‿‿◕｡)づ"}}}));
+    ARROW_ASSIGN_OR_RAISE(auto res4, client.GetSessionOptions({}, {}));
+    if (res4.session_options !=
+        std::map<std::string, SessionOptionValue>{
+            {"bardouble", 456.0},
+            {"big_ol_string_list", "a,b,sea,dee, ,  ,geee,(づ｡◕‿‿◕｡)づ"}}) {
+      return Status::Invalid("res4 incorrect: " + res4.ToString());
+    }
+
+    return Status::OK();
+  }
+};
+
 /// \brief The server used for testing PollFlightInfo().
 class PollFlightInfoServer : public FlightServerBase {
  public:
@@ -1952,6 +2103,9 @@ Status GetScenario(const std::string& scenario_name, std::shared_ptr<Scenario>*
   } else if (scenario_name == "expiration_time:renew_flight_endpoint") {
     *out = std::make_shared<ExpirationTimeRenewFlightEndpointScenario>();
     return Status::OK();
+  } else if (scenario_name == "session_options") {
+    *out = std::make_shared<SessionOptionsScenario>();
+    return Status::OK();
   } else if (scenario_name == "poll_flight_info") {
     *out = std::make_shared<PollFlightInfoScenario>();
     return Status::OK();
diff --git a/cpp/src/arrow/flight/serialization_internal.cc b/cpp/src/arrow/flight/serialization_internal.cc
index e5a7503a6386b..10600d055b3a8 100644
--- a/cpp/src/arrow/flight/serialization_internal.cc
+++ b/cpp/src/arrow/flight/serialization_internal.cc
@@ -27,6 +27,14 @@
 #include "arrow/result.h"
 #include "arrow/status.h"
 
+// Lambda helper & CTAD
+template <class... Ts>
+struct overloaded : Ts... {
+  using Ts::operator()...;
+};
+template <class... Ts>  // CTAD will not be needed for >=C++20
+overloaded(Ts...)->overloaded<Ts...>;
+
 namespace arrow {
 namespace flight {
 namespace internal {
@@ -380,6 +388,157 @@ Status ToPayload(const FlightDescriptor& descr, std::shared_ptr<Buffer>* out) {
   return Status::OK();
 }
 
+// SessionOptionValue
+
+Status FromProto(const pb::SessionOptionValue& pb_val, SessionOptionValue* val) {
+  switch (pb_val.option_value_case()) {
+    case pb::SessionOptionValue::OPTION_VALUE_NOT_SET:
+      *val = std::monostate{};
+      break;
+    case pb::SessionOptionValue::kStringValue:
+      *val = pb_val.string_value();
+      break;
+    case pb::SessionOptionValue::kBoolValue:
+      *val = pb_val.bool_value();
+      break;
+    case pb::SessionOptionValue::kInt64Value:
+      *val = pb_val.int64_value();
+      break;
+    case pb::SessionOptionValue::kDoubleValue:
+      *val = pb_val.double_value();
+      break;
+    case pb::SessionOptionValue::kStringListValue: {
+      std::vector<std::string> vec;
+      vec.reserve(pb_val.string_list_value().values_size());
+      for (const std::string& s : pb_val.string_list_value().values()) {
+        vec.push_back(s);
+      }
+      (*val).emplace<std::vector<std::string>>(std::move(vec));
+      break;
+    }
+  }
+  return Status::OK();
+}
+
+Status ToProto(const SessionOptionValue& val, pb::SessionOptionValue* pb_val) {
+  std::visit(overloaded{[&](std::monostate v) { pb_val->clear_option_value(); },
+                        [&](std::string v) { pb_val->set_string_value(v); },
+                        [&](bool v) { pb_val->set_bool_value(v); },
+                        [&](int64_t v) { pb_val->set_int64_value(v); },
+                        [&](double v) { pb_val->set_double_value(v); },
+                        [&](std::vector<std::string> v) {
+                          auto* string_list_value = pb_val->mutable_string_list_value();
+                          for (const std::string& s : v) string_list_value->add_values(s);
+                        }},
+             val);
+  return Status::OK();
+}
+
+// map<string, SessionOptionValue>
+
+Status FromProto(const google::protobuf::Map<std::string, pb::SessionOptionValue>& pb_map,
+                 std::map<std::string, SessionOptionValue>* map) {
+  if (pb_map.empty()) {
+    return Status::OK();
+  }
+  for (const auto& [name, pb_val] : pb_map) {
+    RETURN_NOT_OK(FromProto(pb_val, &(*map)[name]));
+  }
+  return Status::OK();
+}
+
+Status ToProto(const std::map<std::string, SessionOptionValue>& map,
+               google::protobuf::Map<std::string, pb::SessionOptionValue>* pb_map) {
+  for (const auto& [name, val] : map) {
+    RETURN_NOT_OK(ToProto(val, &(*pb_map)[name]));
+  }
+  return Status::OK();
+}
+
+// SetSessionOptionsRequest
+
+Status FromProto(const pb::SetSessionOptionsRequest& pb_request,
+                 SetSessionOptionsRequest* request) {
+  RETURN_NOT_OK(FromProto(pb_request.session_options(), &request->session_options));
+  return Status::OK();
+}
+
+Status ToProto(const SetSessionOptionsRequest& request,
+               pb::SetSessionOptionsRequest* pb_request) {
+  RETURN_NOT_OK(ToProto(request.session_options, pb_request->mutable_session_options()));
+  return Status::OK();
+}
+
+// SetSessionOptionsResult
+
+Status FromProto(const pb::SetSessionOptionsResult& pb_result,
+                 SetSessionOptionsResult* result) {
+  for (const auto& [k, pb_v] : pb_result.errors()) {
+    result->errors.insert({k, {static_cast<SetSessionOptionErrorValue>(pb_v.value())}});
+  }
+  return Status::OK();
+}
+
+Status ToProto(const SetSessionOptionsResult& result,
+               pb::SetSessionOptionsResult* pb_result) {
+  auto* pb_errors = pb_result->mutable_errors();
+  for (const auto& [k, v] : result.errors) {
+    pb::SetSessionOptionsResult::Error e;
+    e.set_value(static_cast<pb::SetSessionOptionsResult::ErrorValue>(v.value));
+    (*pb_errors)[k] = std::move(e);
+  }
+  return Status::OK();
+}
+
+// GetSessionOptionsRequest
+
+Status FromProto(const pb::GetSessionOptionsRequest& pb_request,
+                 GetSessionOptionsRequest* request) {
+  return Status::OK();
+}
+
+Status ToProto(const GetSessionOptionsRequest& request,
+               pb::GetSessionOptionsRequest* pb_request) {
+  return Status::OK();
+}
+
+// GetSessionOptionsResult
+
+Status FromProto(const pb::GetSessionOptionsResult& pb_result,
+                 GetSessionOptionsResult* result) {
+  RETURN_NOT_OK(FromProto(pb_result.session_options(), &result->session_options));
+  return Status::OK();
+}
+
+Status ToProto(const GetSessionOptionsResult& result,
+               pb::GetSessionOptionsResult* pb_result) {
+  RETURN_NOT_OK(ToProto(result.session_options, pb_result->mutable_session_options()));
+  return Status::OK();
+}
+
+// CloseSessionRequest
+
+Status FromProto(const pb::CloseSessionRequest& pb_request,
+                 CloseSessionRequest* request) {
+  return Status::OK();
+}
+
+Status ToProto(const CloseSessionRequest& request, pb::CloseSessionRequest* pb_request) {
+  return Status::OK();
+}
+
+// CloseSessionResult
+
+Status FromProto(const pb::CloseSessionResult& pb_result, CloseSessionResult* result) {
+  result->status = static_cast<CloseSessionStatus>(pb_result.status());
+  return Status::OK();
+}
+
+Status ToProto(const CloseSessionResult& result, pb::CloseSessionResult* pb_result) {
+  pb_result->set_status(static_cast<protocol::CloseSessionResult::Status>(result.status));
+  return Status::OK();
+}
+
 }  // namespace internal
 }  // namespace flight
 }  // namespace arrow
diff --git a/cpp/src/arrow/flight/serialization_internal.h b/cpp/src/arrow/flight/serialization_internal.h
index 1ac7de83d1308..90dde87d3a5eb 100644
--- a/cpp/src/arrow/flight/serialization_internal.h
+++ b/cpp/src/arrow/flight/serialization_internal.h
@@ -66,6 +66,16 @@ Status FromProto(const pb::CancelFlightInfoRequest& pb_request,
                  CancelFlightInfoRequest* request);
 Status FromProto(const pb::SchemaResult& pb_result, std::string* result);
 Status FromProto(const pb::BasicAuth& pb_basic_auth, BasicAuth* info);
+Status FromProto(const pb::SetSessionOptionsRequest& pb_request,
+                 SetSessionOptionsRequest* request);
+Status FromProto(const pb::SetSessionOptionsResult& pb_result,
+                 SetSessionOptionsResult* result);
+Status FromProto(const pb::GetSessionOptionsRequest& pb_request,
+                 GetSessionOptionsRequest* request);
+Status FromProto(const pb::GetSessionOptionsResult& pb_result,
+                 GetSessionOptionsResult* result);
+Status FromProto(const pb::CloseSessionRequest& pb_request, CloseSessionRequest* request);
+Status FromProto(const pb::CloseSessionResult& pb_result, CloseSessionResult* result);
 
 Status ToProto(const Timestamp& timestamp, google::protobuf::Timestamp* pb_timestamp);
 Status ToProto(const FlightDescriptor& descr, pb::FlightDescriptor* pb_descr);
@@ -85,6 +95,16 @@ Status ToProto(const Criteria& criteria, pb::Criteria* pb_criteria);
 Status ToProto(const SchemaResult& result, pb::SchemaResult* pb_result);
 Status ToProto(const Ticket& ticket, pb::Ticket* pb_ticket);
 Status ToProto(const BasicAuth& basic_auth, pb::BasicAuth* pb_basic_auth);
+Status ToProto(const SetSessionOptionsRequest& request,
+               pb::SetSessionOptionsRequest* pb_request);
+Status ToProto(const SetSessionOptionsResult& result,
+               pb::SetSessionOptionsResult* pb_result);
+Status ToProto(const GetSessionOptionsRequest& request,
+               pb::GetSessionOptionsRequest* pb_request);
+Status ToProto(const GetSessionOptionsResult& result,
+               pb::GetSessionOptionsResult* pb_result);
+Status ToProto(const CloseSessionRequest& request, pb::CloseSessionRequest* pb_request);
+Status ToProto(const CloseSessionResult& result, pb::CloseSessionResult* pb_result);
 
 Status ToPayload(const FlightDescriptor& descr, std::shared_ptr<Buffer>* out);
 
diff --git a/cpp/src/arrow/flight/sql/CMakeLists.txt b/cpp/src/arrow/flight/sql/CMakeLists.txt
index b0a551a2bca77..b32f731496749 100644
--- a/cpp/src/arrow/flight/sql/CMakeLists.txt
+++ b/cpp/src/arrow/flight/sql/CMakeLists.txt
@@ -47,7 +47,8 @@ set(ARROW_FLIGHT_SQL_SRCS
     sql_info_internal.cc
     column_metadata.cc
     client.cc
-    protocol_internal.cc)
+    protocol_internal.cc
+    server_session_middleware.cc)
 
 add_arrow_lib(arrow_flight_sql
               CMAKE_PACKAGE_NAME
@@ -104,7 +105,9 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_EXAMPLES)
       example/sqlite_server.cc
       example/sqlite_tables_schema_batch_reader.cc)
 
-  set(ARROW_FLIGHT_SQL_TEST_SRCS server_test.cc)
+  set(ARROW_FLIGHT_SQL_TEST_SRCS server_test.cc
+                                 server_session_middleware_internals_test.cc)
+
   set(ARROW_FLIGHT_SQL_TEST_LIBS ${SQLite3_LIBRARIES})
   set(ARROW_FLIGHT_SQL_ACERO_SRCS example/acero_server.cc)
 
diff --git a/cpp/src/arrow/flight/sql/client.h b/cpp/src/arrow/flight/sql/client.h
index 5f3fc7d8574a9..9782611dbadcd 100644
--- a/cpp/src/arrow/flight/sql/client.h
+++ b/cpp/src/arrow/flight/sql/client.h
@@ -350,6 +350,33 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient {
   ::arrow::Result<CancelResult> CancelQuery(const FlightCallOptions& options,
                                             const FlightInfo& info);
 
+  /// \brief Sets session options.
+  ///
+  /// \param[in] options            RPC-layer hints for this call.
+  /// \param[in] request            The session options to set.
+  ::arrow::Result<SetSessionOptionsResult> SetSessionOptions(
+      const FlightCallOptions& options, const SetSessionOptionsRequest& request) {
+    return impl_->SetSessionOptions(options, request);
+  }
+
+  /// \brief Gets current session options.
+  ///
+  /// \param[in] options            RPC-layer hints for this call.
+  /// \param[in] request            The (empty) GetSessionOptions request object.
+  ::arrow::Result<GetSessionOptionsResult> GetSessionOptions(
+      const FlightCallOptions& options, const GetSessionOptionsRequest& request) {
+    return impl_->GetSessionOptions(options, request);
+  }
+
+  /// \brief Explicitly closes the session if applicable.
+  ///
+  /// \param[in] options      RPC-layer hints for this call.
+  /// \param[in] request      The (empty) CloseSession request object.
+  ::arrow::Result<CloseSessionResult> CloseSession(const FlightCallOptions& options,
+                                                   const CloseSessionRequest& request) {
+    return impl_->CloseSession(options, request);
+  }
+
   /// \brief Extends the expiration of a FlightEndpoint.
   ///
   /// \param[in] options      RPC-layer hints for this call.
diff --git a/cpp/src/arrow/flight/sql/server.cc b/cpp/src/arrow/flight/sql/server.cc
index a6d197d15b2c0..a5cb842de8f49 100644
--- a/cpp/src/arrow/flight/sql/server.cc
+++ b/cpp/src/arrow/flight/sql/server.cc
@@ -442,6 +442,21 @@ arrow::Result<Result> PackActionResult(ActionCreatePreparedStatementResult resul
   return PackActionResult(pb_result);
 }
 
+arrow::Result<Result> PackActionResult(SetSessionOptionsResult result) {
+  ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString());
+  return Result{Buffer::FromString(std::move(serialized))};
+}
+
+arrow::Result<Result> PackActionResult(GetSessionOptionsResult result) {
+  ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString());
+  return Result{Buffer::FromString(std::move(serialized))};
+}
+
+arrow::Result<Result> PackActionResult(CloseSessionResult result) {
+  ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString());
+  return Result{Buffer::FromString(std::move(serialized))};
+}
+
 }  // namespace
 
 arrow::Result<StatementQueryTicket> StatementQueryTicket::Deserialize(
@@ -759,18 +774,19 @@ Status FlightSqlServerBase::DoPut(const ServerCallContext& context,
 
 Status FlightSqlServerBase::ListActions(const ServerCallContext& context,
                                         std::vector<ActionType>* actions) {
-  *actions = {
-      ActionType::kCancelFlightInfo,
-      ActionType::kRenewFlightEndpoint,
-      FlightSqlServerBase::kBeginSavepointActionType,
-      FlightSqlServerBase::kBeginTransactionActionType,
-      FlightSqlServerBase::kCancelQueryActionType,
-      FlightSqlServerBase::kCreatePreparedStatementActionType,
-      FlightSqlServerBase::kCreatePreparedSubstraitPlanActionType,
-      FlightSqlServerBase::kClosePreparedStatementActionType,
-      FlightSqlServerBase::kEndSavepointActionType,
-      FlightSqlServerBase::kEndTransactionActionType,
-  };
+  *actions = {ActionType::kCancelFlightInfo,
+              ActionType::kRenewFlightEndpoint,
+              FlightSqlServerBase::kBeginSavepointActionType,
+              FlightSqlServerBase::kBeginTransactionActionType,
+              FlightSqlServerBase::kCancelQueryActionType,
+              FlightSqlServerBase::kCreatePreparedStatementActionType,
+              FlightSqlServerBase::kCreatePreparedSubstraitPlanActionType,
+              FlightSqlServerBase::kClosePreparedStatementActionType,
+              FlightSqlServerBase::kEndSavepointActionType,
+              FlightSqlServerBase::kEndTransactionActionType,
+              ActionType::kSetSessionOptions,
+              ActionType::kGetSessionOptions,
+              ActionType::kCloseSession};
   return Status::OK();
 }
 
@@ -791,6 +807,27 @@ Status FlightSqlServerBase::DoAction(const ServerCallContext& context,
     ARROW_ASSIGN_OR_RAISE(auto renewed_endpoint, RenewFlightEndpoint(context, request));
     ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(renewed_endpoint));
 
+    results.push_back(std::move(packed_result));
+  } else if (action.type == ActionType::kSetSessionOptions.type) {
+    std::string_view body(*action.body);
+    ARROW_ASSIGN_OR_RAISE(auto request, SetSessionOptionsRequest::Deserialize(body));
+    ARROW_ASSIGN_OR_RAISE(auto result, SetSessionOptions(context, request));
+    ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(std::move(result)));
+
+    results.push_back(std::move(packed_result));
+  } else if (action.type == ActionType::kGetSessionOptions.type) {
+    std::string_view body(*action.body);
+    ARROW_ASSIGN_OR_RAISE(auto request, GetSessionOptionsRequest::Deserialize(body));
+    ARROW_ASSIGN_OR_RAISE(auto result, GetSessionOptions(context, request));
+    ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(std::move(result)));
+
+    results.push_back(std::move(packed_result));
+  } else if (action.type == ActionType::kCloseSession.type) {
+    std::string_view body(*action.body);
+    ARROW_ASSIGN_OR_RAISE(auto request, CloseSessionRequest::Deserialize(body));
+    ARROW_ASSIGN_OR_RAISE(auto result, CloseSession(context, request));
+    ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(std::move(result)));
+
     results.push_back(std::move(packed_result));
   } else {
     google::protobuf::Any any;
@@ -1098,6 +1135,11 @@ arrow::Result<FlightEndpoint> FlightSqlServerBase::RenewFlightEndpoint(
   return Status::NotImplemented("RenewFlightEndpoint not implemented");
 }
 
+arrow::Result<CloseSessionResult> FlightSqlServerBase::CloseSession(
+    const ServerCallContext& context, const CloseSessionRequest& request) {
+  return Status::NotImplemented("CloseSession not implemented");
+}
+
 arrow::Result<ActionCreatePreparedStatementResult>
 FlightSqlServerBase::CreatePreparedStatement(
     const ServerCallContext& context,
@@ -1128,6 +1170,16 @@ Status FlightSqlServerBase::EndTransaction(const ServerCallContext& context,
   return Status::NotImplemented("EndTransaction not implemented");
 }
 
+arrow::Result<SetSessionOptionsResult> FlightSqlServerBase::SetSessionOptions(
+    const ServerCallContext& context, const SetSessionOptionsRequest& request) {
+  return Status::NotImplemented("SetSessionOptions not implemented");
+}
+
+arrow::Result<GetSessionOptionsResult> FlightSqlServerBase::GetSessionOptions(
+    const ServerCallContext& context, const GetSessionOptionsRequest& request) {
+  return Status::NotImplemented("GetSessionOptions not implemented");
+}
+
 Status FlightSqlServerBase::DoPutPreparedStatementQuery(
     const ServerCallContext& context, const PreparedStatementQuery& command,
     FlightMessageReader* reader, FlightMetadataWriter* writer) {
diff --git a/cpp/src/arrow/flight/sql/server.h b/cpp/src/arrow/flight/sql/server.h
index 24f0aa2bd48cf..df46004474a39 100644
--- a/cpp/src/arrow/flight/sql/server.h
+++ b/cpp/src/arrow/flight/sql/server.h
@@ -20,6 +20,7 @@
 
 #pragma once
 
+#include <map>
 #include <memory>
 #include <optional>
 #include <string>
@@ -601,6 +602,24 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlServerBase : public FlightServerBase {
   virtual arrow::Result<CancelFlightInfoResult> CancelFlightInfo(
       const ServerCallContext& context, const CancelFlightInfoRequest& request);
 
+  /// \brief Set server session option(s).
+  /// \param[in] context  The call context.
+  /// \param[in] request  The session options to set.
+  virtual arrow::Result<SetSessionOptionsResult> SetSessionOptions(
+      const ServerCallContext& context, const SetSessionOptionsRequest& request);
+
+  /// \brief Get server session option(s).
+  /// \param[in] context  The call context.
+  /// \param[in] request  Request object.
+  virtual arrow::Result<GetSessionOptionsResult> GetSessionOptions(
+      const ServerCallContext& context, const GetSessionOptionsRequest& request);
+
+  /// \brief Close/invalidate the session.
+  /// \param[in] context  The call context.
+  /// \param[in] request  Request object.
+  virtual arrow::Result<CloseSessionResult> CloseSession(
+      const ServerCallContext& context, const CloseSessionRequest& request);
+
   /// \brief Attempt to explicitly cancel a query.
   ///
   /// \param[in] context  The call context.
diff --git a/cpp/src/arrow/flight/sql/server_session_middleware.cc b/cpp/src/arrow/flight/sql/server_session_middleware.cc
new file mode 100644
index 0000000000000..f3e02de232444
--- /dev/null
+++ b/cpp/src/arrow/flight/sql/server_session_middleware.cc
@@ -0,0 +1,235 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <mutex>
+
+#include "arrow/flight/sql/server_session_middleware.h"
+#include "arrow/flight/sql/server_session_middleware_factory.h"
+
+namespace arrow {
+namespace flight {
+namespace sql {
+
+class ServerSessionMiddlewareImpl : public ServerSessionMiddleware {
+ protected:
+  std::shared_mutex mutex_;
+  ServerSessionMiddlewareFactory* factory_;
+  const CallHeaders& headers_;
+  std::shared_ptr<FlightSession> session_;
+  std::string session_id_;
+  std::string closed_session_id_;
+  bool existing_session_;
+
+ public:
+  ServerSessionMiddlewareImpl(ServerSessionMiddlewareFactory* factory,
+                              const CallHeaders& headers)
+      : factory_(factory), headers_(headers), existing_session_(false) {}
+
+  ServerSessionMiddlewareImpl(ServerSessionMiddlewareFactory* factory,
+                              const CallHeaders& headers,
+                              std::shared_ptr<FlightSession> session,
+                              std::string session_id, bool existing_session = true)
+      : factory_(factory),
+        headers_(headers),
+        session_(std::move(session)),
+        session_id_(std::move(session_id)),
+        existing_session_(existing_session) {}
+
+  void SendingHeaders(AddCallHeaders* add_call_headers) override {
+    if (!existing_session_ && session_) {
+      add_call_headers->AddHeader(
+          "set-cookie", static_cast<std::string>(kSessionCookieName) + "=" + session_id_);
+    }
+    if (!closed_session_id_.empty()) {
+      add_call_headers->AddHeader(
+          "set-cookie", static_cast<std::string>(kSessionCookieName) + "=" + session_id_ +
+                            "; Max-Age=0");
+    }
+  }
+
+  void CallCompleted(const Status&) override {}
+
+  bool HasSession() const override { return static_cast<bool>(session_); }
+
+  arrow::Result<std::shared_ptr<FlightSession>> GetSession() override {
+    const std::lock_guard<std::shared_mutex> l(mutex_);
+    if (!session_) {
+      auto [id, s] = factory_->CreateNewSession();
+      session_ = std::move(s);
+      session_id_ = std::move(id);
+    }
+    if (!static_cast<bool>(session_)) {
+      return Status::UnknownError("Error creating session.");
+    }
+    return session_;
+  }
+
+  Status CloseSession() override {
+    const std::lock_guard<std::shared_mutex> l(mutex_);
+    if (static_cast<bool>(session_)) {
+      return Status::Invalid("Nonexistent session cannot be closed.");
+    }
+    ARROW_RETURN_NOT_OK(factory_->CloseSession(session_id_));
+    closed_session_id_ = std::move(session_id_);
+    session_id_.clear();
+    session_.reset();
+    existing_session_ = false;
+
+    return Status::OK();
+  }
+
+  const CallHeaders& GetCallHeaders() const override { return headers_; }
+};
+
+std::vector<std::pair<std::string, std::string>>
+ServerSessionMiddlewareFactory::ParseCookieString(const std::string_view& s) {
+  const std::string list_sep = "; ";
+  const std::string pair_sep = "=";
+
+  std::vector<std::pair<std::string, std::string>> result;
+
+  size_t cur = 0;
+  while (cur < s.length()) {
+    const size_t end = s.find(list_sep, cur);
+    const bool further_pairs = end != std::string::npos;
+    const size_t len = further_pairs ? end - cur : std::string::npos;
+    const std::string_view tok = s.substr(cur, len);
+    cur = further_pairs ? end + list_sep.length() : s.length();
+
+    const size_t val_pos = tok.find(pair_sep);
+    if (val_pos == std::string::npos) {
+      // The cookie header is somewhat malformed; ignore the key and continue parsing
+      continue;
+    }
+    const std::string_view cookie_name = tok.substr(0, val_pos);
+    std::string_view cookie_value =
+        tok.substr(val_pos + pair_sep.length(), std::string::npos);
+    if (cookie_name.empty()) {
+      continue;
+    }
+    // Strip doublequotes
+    if (cookie_value.length() >= 2 && cookie_value.front() == '"' &&
+        cookie_value.back() == '"') {
+      cookie_value.remove_prefix(1);
+      cookie_value.remove_suffix(1);
+    }
+    result.emplace_back(cookie_name, cookie_value);
+  }
+
+  return result;
+}
+
+Status ServerSessionMiddlewareFactory::StartCall(
+    const CallInfo&, const CallHeaders& incoming_headers,
+    std::shared_ptr<ServerMiddleware>* middleware) {
+  std::string session_id;
+
+  const std::pair<CallHeaders::const_iterator, CallHeaders::const_iterator>&
+      headers_it_pr = incoming_headers.equal_range("cookie");
+  for (auto itr = headers_it_pr.first; itr != headers_it_pr.second; ++itr) {
+    const std::string_view& cookie_header = itr->second;
+    const std::vector<std::pair<std::string, std::string>> cookies =
+        ParseCookieString(cookie_header);
+    for (const std::pair<std::string, std::string>& cookie : cookies) {
+      if (cookie.first == kSessionCookieName) {
+        if (cookie.second.empty())
+          return Status::Invalid("Empty ", kSessionCookieName, " cookie value.");
+        session_id = std::move(cookie.second);
+      }
+    }
+    if (!session_id.empty()) break;
+  }
+
+  if (session_id.empty()) {
+    // No cookie was found
+    // Temporary workaround until middleware handling fixed
+    auto [id, s] = CreateNewSession();
+    *middleware = std::make_shared<ServerSessionMiddlewareImpl>(this, incoming_headers,
+                                                                std::move(s), id, false);
+  } else {
+    const std::shared_lock<std::shared_mutex> l(session_store_lock_);
+    if (auto it = session_store_.find(session_id); it == session_store_.end()) {
+      return Status::Invalid("Invalid or expired ", kSessionCookieName, " cookie.");
+    } else {
+      auto session = it->second;
+      *middleware = std::make_shared<ServerSessionMiddlewareImpl>(
+          this, incoming_headers, std::move(session), session_id);
+    }
+  }
+
+  return Status::OK();
+}
+
+/// \brief Get a new, empty session option map & its id key; {"",NULLPTR} on collision.
+std::pair<std::string, std::shared_ptr<FlightSession>>
+ServerSessionMiddlewareFactory::CreateNewSession() {
+  auto new_id = id_generator_();
+  auto session = std::make_shared<FlightSession>();
+
+  const std::lock_guard<std::shared_mutex> l(session_store_lock_);
+  if (session_store_.count(new_id)) {
+    // Collision
+    return {"", NULLPTR};
+  }
+  session_store_[new_id] = session;
+
+  return {new_id, session};
+}
+
+Status ServerSessionMiddlewareFactory::CloseSession(std::string id) {
+  const std::lock_guard<std::shared_mutex> l(session_store_lock_);
+  if (!session_store_.erase(id)) {
+    return Status::KeyError("Invalid or nonexistent session cannot be closed.");
+  }
+  return Status::OK();
+}
+
+std::shared_ptr<ServerMiddlewareFactory> MakeServerSessionMiddlewareFactory(
+    std::function<std::string()> id_gen) {
+  return std::make_shared<ServerSessionMiddlewareFactory>(std::move(id_gen));
+}
+
+std::optional<SessionOptionValue> FlightSession::GetSessionOption(
+    const std::string& name) {
+  const std::shared_lock<std::shared_mutex> l(map_lock_);
+  auto it = map_.find(name);
+  if (it != map_.end()) {
+    return it->second;
+  } else {
+    return std::nullopt;
+  }
+}
+
+std::map<std::string, SessionOptionValue> FlightSession::GetSessionOptions() {
+  const std::shared_lock<std::shared_mutex> l(map_lock_);
+  return map_;
+}
+
+void FlightSession::SetSessionOption(const std::string& name,
+                                     const SessionOptionValue value) {
+  const std::lock_guard<std::shared_mutex> l(map_lock_);
+  map_[name] = std::move(value);
+}
+
+void FlightSession::EraseSessionOption(const std::string& name) {
+  const std::lock_guard<std::shared_mutex> l(map_lock_);
+  map_.erase(name);
+}
+
+}  // namespace sql
+}  // namespace flight
+}  // namespace arrow
diff --git a/cpp/src/arrow/flight/sql/server_session_middleware.h b/cpp/src/arrow/flight/sql/server_session_middleware.h
new file mode 100644
index 0000000000000..021793de3de32
--- /dev/null
+++ b/cpp/src/arrow/flight/sql/server_session_middleware.h
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Middleware for handling Flight SQL Sessions including session cookie handling.
+// Currently experimental.
+
+#pragma once
+
+#include <functional>
+#include <map>
+#include <optional>
+#include <shared_mutex>
+#include <string_view>
+
+#include "arrow/flight/server_middleware.h"
+#include "arrow/flight/sql/types.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace flight {
+namespace sql {
+
+static constexpr char const kSessionCookieName[] = "arrow_flight_session_id";
+
+class ARROW_FLIGHT_SQL_EXPORT FlightSession {
+ protected:
+  std::map<std::string, SessionOptionValue> map_;
+  std::shared_mutex map_lock_;
+
+ public:
+  /// \brief Get session option by name
+  std::optional<SessionOptionValue> GetSessionOption(const std::string& name);
+  /// \brief Get a copy of the session options map.
+  ///
+  /// The returned options map may be modified by further calls to this FlightSession
+  std::map<std::string, SessionOptionValue> GetSessionOptions();
+  /// \brief Set session option by name to given value
+  void SetSessionOption(const std::string& name, const SessionOptionValue value);
+  /// \brief Idempotently remove name from this session
+  void EraseSessionOption(const std::string& name);
+};
+
+/// \brief A middleware to handle session option persistence and related cookie headers.
+///
+/// WARNING that client cookie invalidation does not currently work due to a gRPC
+/// transport bug.
+class ARROW_FLIGHT_SQL_EXPORT ServerSessionMiddleware : public ServerMiddleware {
+ public:
+  static constexpr char const kMiddlewareName[] =
+      "arrow::flight::sql::ServerSessionMiddleware";
+
+  std::string name() const override { return kMiddlewareName; }
+
+  /// \brief Is there an existing session (either existing or new)
+  virtual bool HasSession() const = 0;
+  /// \brief Get existing or new call-associated session
+  ///
+  /// May return NULLPTR if there is an id generation collision.
+  virtual arrow::Result<std::shared_ptr<FlightSession>> GetSession() = 0;
+  /// Close the current session.
+  ///
+  /// This is presently unsupported in C++ until middleware handling can be fixed.
+  virtual Status CloseSession() = 0;
+  /// \brief Get request headers, in lieu of a provided or created session.
+  virtual const CallHeaders& GetCallHeaders() const = 0;
+};
+
+/// \brief Returns a ServerMiddlewareFactory that handles session option storage.
+/// \param[in] id_gen A thread-safe, collision-free generator for session id strings.
+ARROW_FLIGHT_SQL_EXPORT std::shared_ptr<ServerMiddlewareFactory>
+MakeServerSessionMiddlewareFactory(std::function<std::string()> id_gen);
+
+}  // namespace sql
+}  // namespace flight
+}  // namespace arrow
diff --git a/cpp/src/arrow/flight/sql/server_session_middleware_factory.h b/cpp/src/arrow/flight/sql/server_session_middleware_factory.h
new file mode 100644
index 0000000000000..2613c572eefc2
--- /dev/null
+++ b/cpp/src/arrow/flight/sql/server_session_middleware_factory.h
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// ServerSessionMiddlewareFactory, factored into a separate header for testability
+
+#pragma once
+
+#include <functional>
+#include <map>
+#include <memory>
+#include <shared_mutex>
+#include <utility>
+#include <vector>
+
+#include <arrow/flight/sql/server_session_middleware.h>
+
+namespace arrow {
+namespace flight {
+namespace sql {
+
+/// \brief A factory for ServerSessionMiddleware, itself storing session data.
+class ARROW_FLIGHT_SQL_EXPORT ServerSessionMiddlewareFactory
+    : public ServerMiddlewareFactory {
+ protected:
+  std::map<std::string, std::shared_ptr<FlightSession>> session_store_;
+  std::shared_mutex session_store_lock_;
+  std::function<std::string()> id_generator_;
+
+  static std::vector<std::pair<std::string, std::string>> ParseCookieString(
+      const std::string_view& s);
+
+ public:
+  explicit ServerSessionMiddlewareFactory(std::function<std::string()> id_gen)
+      : id_generator_(id_gen) {}
+  Status StartCall(const CallInfo&, const CallHeaders& incoming_headers,
+                   std::shared_ptr<ServerMiddleware>* middleware) override;
+
+  /// \brief Get a new, empty session option map and its id key.
+  std::pair<std::string, std::shared_ptr<FlightSession>> CreateNewSession();
+  /// \brief Close the session identified by 'id'.
+  /// \param id The string id of the session to close.
+  Status CloseSession(std::string id);
+};
+
+}  // namespace sql
+}  // namespace flight
+}  // namespace arrow
diff --git a/cpp/src/arrow/flight/sql/server_session_middleware_internals_test.cc b/cpp/src/arrow/flight/sql/server_session_middleware_internals_test.cc
new file mode 100644
index 0000000000000..74e4d7845c699
--- /dev/null
+++ b/cpp/src/arrow/flight/sql/server_session_middleware_internals_test.cc
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// ----------------------------------------------------------------------
+// ServerSessionMiddleware{,Factory} tests not involing a client/server instance
+
+#include <gtest/gtest.h>
+
+#include <arrow/flight/sql/server_session_middleware_factory.h>
+
+namespace arrow {
+namespace flight {
+namespace sql {
+
+class ServerSessionMiddlewareFactoryPrivate : public ServerSessionMiddlewareFactory {
+ public:
+  using ServerSessionMiddlewareFactory::ParseCookieString;
+};
+
+TEST(ServerSessionMiddleware, ParseCookieString) {
+  std::vector<std::pair<std::string, std::string>> r1 =
+      ServerSessionMiddlewareFactoryPrivate::ParseCookieString(
+          "k1=v1; k2=\"v2\"; kempty=; k3=v3");
+  std::vector<std::pair<std::string, std::string>> e1 = {
+      {"k1", "v1"}, {"k2", "v2"}, {"kempty", ""}, {"k3", "v3"}};
+  ASSERT_EQ(e1, r1);
+}
+
+}  // namespace sql
+}  // namespace flight
+}  // namespace arrow
diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_server.cc b/cpp/src/arrow/flight/transport/grpc/grpc_server.cc
index bbd01155fe4a4..a9780b5eeb77e 100644
--- a/cpp/src/arrow/flight/transport/grpc/grpc_server.cc
+++ b/cpp/src/arrow/flight/transport/grpc/grpc_server.cc
@@ -290,7 +290,8 @@ class GrpcServiceHandler final : public FlightService::Service {
 
   // Authenticate the client (if applicable) and construct the call context
   ::grpc::Status CheckAuth(const FlightMethod& method, ServerContext* context,
-                           GrpcServerCallContext& flight_context) {
+                           GrpcServerCallContext& flight_context,
+                           bool skip_headers = false) {
     if (!auth_handler_) {
       const auto auth_context = context->auth_context();
       if (auth_context && auth_context->IsPeerAuthenticated()) {
@@ -320,11 +321,11 @@ class GrpcServiceHandler final : public FlightService::Service {
 
   // Authenticate the client (if applicable) and construct the call context
   ::grpc::Status MakeCallContext(const FlightMethod& method, ServerContext* context,
-                                 GrpcServerCallContext& flight_context) {
+                                 GrpcServerCallContext& flight_context,
+                                 bool skip_headers = false) {
     // Run server middleware
     const CallInfo info{method};
 
-    GrpcAddServerHeaders outgoing_headers(context);
     for (const auto& factory : middleware_) {
       std::shared_ptr<ServerMiddleware> instance;
       Status result = factory.second->StartCall(info, flight_context, &instance);
@@ -336,13 +337,25 @@ class GrpcServiceHandler final : public FlightService::Service {
       if (instance != nullptr) {
         flight_context.middleware_.push_back(instance);
         flight_context.middleware_map_.insert({factory.first, instance});
-        instance->SendingHeaders(&outgoing_headers);
       }
     }
 
+    // TODO factor this out after fixing all streaming and non-streaming handlers
+    if (!skip_headers) {
+      addMiddlewareHeaders(context, flight_context);
+    }
+
     return ::grpc::Status::OK;
   }
 
+  void addMiddlewareHeaders(ServerContext* context,
+                            GrpcServerCallContext& flight_context) {
+    GrpcAddServerHeaders outgoing_headers(context);
+    for (const std::shared_ptr<ServerMiddleware>& instance : flight_context.middleware_) {
+      instance->SendingHeaders(&outgoing_headers);
+    }
+  }
+
   ::grpc::Status Handshake(
       ServerContext* context,
       ::grpc::ServerReaderWriter<pb::HandshakeResponse, pb::HandshakeRequest>* stream) {
@@ -399,8 +412,9 @@ class GrpcServiceHandler final : public FlightService::Service {
     SERVICE_RETURN_NOT_OK(flight_context, internal::FromProto(*request, &descr));
 
     std::unique_ptr<FlightInfo> info;
-    SERVICE_RETURN_NOT_OK(flight_context,
-                          impl_->base()->GetFlightInfo(flight_context, descr, &info));
+    auto res = impl_->base()->GetFlightInfo(flight_context, descr, &info);
+    addMiddlewareHeaders(context, flight_context);
+    SERVICE_RETURN_NOT_OK(flight_context, res);
 
     if (!info) {
       // Treat null listing as no flights available
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index 1d43c41b69d9f..11b2baafad220 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -17,9 +17,11 @@
 
 #include "arrow/flight/types.h"
 
+#include <iomanip>
 #include <memory>
 #include <sstream>
 #include <string_view>
+#include <type_traits>
 #include <utility>
 
 #include "arrow/buffer.h"
@@ -473,6 +475,352 @@ arrow::Result<CancelFlightInfoRequest> CancelFlightInfoRequest::Deserialize(
   return out;
 }
 
+static const char* const SetSessionOptionStatusNames[] = {"Unspecified", "InvalidName",
+                                                          "InvalidValue", "Error"};
+static const char* const CloseSessionStatusNames[] = {"Unspecified", "Closed", "Closing",
+                                                      "NotClosable"};
+
+// Helpers for stringifying maps containing various types
+std::string ToString(const SetSessionOptionErrorValue& error_value) {
+  return SetSessionOptionStatusNames[static_cast<int>(error_value)];
+}
+
+std::ostream& operator<<(std::ostream& os,
+                         const SetSessionOptionErrorValue& error_value) {
+  os << ToString(error_value);
+  return os;
+}
+
+std::string ToString(const CloseSessionStatus& status) {
+  return CloseSessionStatusNames[static_cast<int>(status)];
+}
+
+std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status) {
+  os << ToString(status);
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, std::vector<std::string> values) {
+  os << '[';
+  std::string sep = "";
+  for (const auto& v : values) {
+    os << sep << std::quoted(v);
+    sep = ", ";
+  }
+  os << ']';
+
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const SessionOptionValue& v) {
+  if (std::holds_alternative<std::monostate>(v)) {
+    os << "<EMPTY>";
+  } else {
+    std::visit(
+        [&](const auto& x) {
+          if constexpr (std::is_convertible_v<std::decay_t<decltype(x)>,
+                                              std::string_view>) {
+            os << std::quoted(x);
+          } else {
+            os << x;
+          }
+        },
+        v);
+  }
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const SetSessionOptionsResult::Error& e) {
+  os << '{' << e.value << '}';
+  return os;
+}
+
+template <typename T>
+std::ostream& operator<<(std::ostream& os, std::map<std::string, T> m) {
+  os << '{';
+  std::string sep = "";
+  if constexpr (std::is_convertible_v<T, std::string_view>) {
+    // std::string, char*, std::string_view
+    for (const auto& [k, v] : m) {
+      os << sep << '[' << k << "]: " << std::quoted(v) << '"';
+      sep = ", ";
+    }
+  } else {
+    for (const auto& [k, v] : m) {
+      os << sep << '[' << k << "]: " << v;
+      sep = ", ";
+    }
+  }
+  os << '}';
+
+  return os;
+}
+
+namespace {
+static bool CompareSessionOptionMaps(const std::map<std::string, SessionOptionValue>& a,
+                                     const std::map<std::string, SessionOptionValue>& b) {
+  if (a.size() != b.size()) {
+    return false;
+  }
+  for (const auto& [k, v] : a) {
+    if (const auto it = b.find(k); it == b.end()) {
+      return false;
+    } else {
+      const auto& b_v = it->second;
+      if (v.index() != b_v.index()) {
+        return false;
+      }
+      if (v != b_v) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+}  // namespace
+
+// SetSessionOptionsRequest
+
+std::string SetSessionOptionsRequest::ToString() const {
+  std::stringstream ss;
+
+  ss << "<SetSessionOptionsRequest session_options=" << session_options << '>';
+
+  return ss.str();
+}
+
+bool SetSessionOptionsRequest::Equals(const SetSessionOptionsRequest& other) const {
+  return CompareSessionOptionMaps(session_options, other.session_options);
+}
+
+arrow::Result<std::string> SetSessionOptionsRequest::SerializeToString() const {
+  pb::SetSessionOptionsRequest pb_request;
+  RETURN_NOT_OK(internal::ToProto(*this, &pb_request));
+
+  std::string out;
+  if (!pb_request.SerializeToString(&out)) {
+    return Status::IOError("Serialized SetSessionOptionsRequest exceeded 2GiB limit");
+  }
+  return out;
+}
+
+arrow::Result<SetSessionOptionsRequest> SetSessionOptionsRequest::Deserialize(
+    std::string_view serialized) {
+  // TODO these & SerializeToString should all be factored out to a superclass
+  pb::SetSessionOptionsRequest pb_request;
+  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    return Status::Invalid(
+        "Serialized SetSessionOptionsRequest size should not exceed 2 GiB");
+  }
+  google::protobuf::io::ArrayInputStream input(serialized.data(),
+                                               static_cast<int>(serialized.size()));
+  if (!pb_request.ParseFromZeroCopyStream(&input)) {
+    return Status::Invalid("Not a valid SetSessionOptionsRequest");
+  }
+  SetSessionOptionsRequest out;
+  RETURN_NOT_OK(internal::FromProto(pb_request, &out));
+  return out;
+}
+
+// SetSessionOptionsResult
+
+std::string SetSessionOptionsResult::ToString() const {
+  std::stringstream ss;
+
+  ss << "<SetSessionOptionsResult errors=" << errors << '>';
+
+  return ss.str();
+}
+
+bool SetSessionOptionsResult::Equals(const SetSessionOptionsResult& other) const {
+  if (errors != other.errors) {
+    return false;
+  }
+  return true;
+}
+
+arrow::Result<std::string> SetSessionOptionsResult::SerializeToString() const {
+  pb::SetSessionOptionsResult pb_result;
+  RETURN_NOT_OK(internal::ToProto(*this, &pb_result));
+
+  std::string out;
+  if (!pb_result.SerializeToString(&out)) {
+    return Status::IOError("Serialized SetSessionOptionsResult exceeded 2GiB limit");
+  }
+  return out;
+}
+
+arrow::Result<SetSessionOptionsResult> SetSessionOptionsResult::Deserialize(
+    std::string_view serialized) {
+  pb::SetSessionOptionsResult pb_result;
+  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    return Status::Invalid(
+        "Serialized SetSessionOptionsResult size should not exceed 2 GiB");
+  }
+  google::protobuf::io::ArrayInputStream input(serialized.data(),
+                                               static_cast<int>(serialized.size()));
+  if (!pb_result.ParseFromZeroCopyStream(&input)) {
+    return Status::Invalid("Not a valid SetSessionOptionsResult");
+  }
+  SetSessionOptionsResult out;
+  RETURN_NOT_OK(internal::FromProto(pb_result, &out));
+  return out;
+}
+
+// GetSessionOptionsRequest
+
+std::string GetSessionOptionsRequest::ToString() const {
+  return "<GetSessionOptionsRequest>";
+}
+
+bool GetSessionOptionsRequest::Equals(const GetSessionOptionsRequest& other) const {
+  return true;
+}
+
+arrow::Result<std::string> GetSessionOptionsRequest::SerializeToString() const {
+  pb::GetSessionOptionsRequest pb_request;
+  RETURN_NOT_OK(internal::ToProto(*this, &pb_request));
+
+  std::string out;
+  if (!pb_request.SerializeToString(&out)) {
+    return Status::IOError("Serialized GetSessionOptionsRequest exceeded 2GiB limit");
+  }
+  return out;
+}
+
+arrow::Result<GetSessionOptionsRequest> GetSessionOptionsRequest::Deserialize(
+    std::string_view serialized) {
+  pb::GetSessionOptionsRequest pb_request;
+  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    return Status::Invalid(
+        "Serialized GetSessionOptionsRequest size should not exceed 2 GiB");
+  }
+  google::protobuf::io::ArrayInputStream input(serialized.data(),
+                                               static_cast<int>(serialized.size()));
+  if (!pb_request.ParseFromZeroCopyStream(&input)) {
+    return Status::Invalid("Not a valid GetSessionOptionsRequest");
+  }
+  GetSessionOptionsRequest out;
+  RETURN_NOT_OK(internal::FromProto(pb_request, &out));
+  return out;
+}
+
+// GetSessionOptionsResult
+
+std::string GetSessionOptionsResult::ToString() const {
+  std::stringstream ss;
+
+  ss << "<GetSessionOptionsResult session_options=" << session_options << '>';
+
+  return ss.str();
+}
+
+bool GetSessionOptionsResult::Equals(const GetSessionOptionsResult& other) const {
+  return CompareSessionOptionMaps(session_options, other.session_options);
+}
+
+arrow::Result<std::string> GetSessionOptionsResult::SerializeToString() const {
+  pb::GetSessionOptionsResult pb_result;
+  RETURN_NOT_OK(internal::ToProto(*this, &pb_result));
+
+  std::string out;
+  if (!pb_result.SerializeToString(&out)) {
+    return Status::IOError("Serialized GetSessionOptionsResult exceeded 2GiB limit");
+  }
+  return out;
+}
+
+arrow::Result<GetSessionOptionsResult> GetSessionOptionsResult::Deserialize(
+    std::string_view serialized) {
+  pb::GetSessionOptionsResult pb_result;
+  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    return Status::Invalid(
+        "Serialized GetSessionOptionsResult size should not exceed 2 GiB");
+  }
+  google::protobuf::io::ArrayInputStream input(serialized.data(),
+                                               static_cast<int>(serialized.size()));
+  if (!pb_result.ParseFromZeroCopyStream(&input)) {
+    return Status::Invalid("Not a valid GetSessionOptionsResult");
+  }
+  GetSessionOptionsResult out;
+  RETURN_NOT_OK(internal::FromProto(pb_result, &out));
+  return out;
+}
+
+// CloseSessionRequest
+
+std::string CloseSessionRequest::ToString() const { return "<CloseSessionRequest>"; }
+
+bool CloseSessionRequest::Equals(const CloseSessionRequest& other) const { return true; }
+
+arrow::Result<std::string> CloseSessionRequest::SerializeToString() const {
+  pb::CloseSessionRequest pb_request;
+  RETURN_NOT_OK(internal::ToProto(*this, &pb_request));
+
+  std::string out;
+  if (!pb_request.SerializeToString(&out)) {
+    return Status::IOError("Serialized CloseSessionRequest exceeded 2GiB limit");
+  }
+  return out;
+}
+
+arrow::Result<CloseSessionRequest> CloseSessionRequest::Deserialize(
+    std::string_view serialized) {
+  pb::CloseSessionRequest pb_request;
+  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    return Status::Invalid("Serialized CloseSessionRequest size should not exceed 2 GiB");
+  }
+  google::protobuf::io::ArrayInputStream input(serialized.data(),
+                                               static_cast<int>(serialized.size()));
+  if (!pb_request.ParseFromZeroCopyStream(&input)) {
+    return Status::Invalid("Not a valid CloseSessionRequest");
+  }
+  CloseSessionRequest out;
+  RETURN_NOT_OK(internal::FromProto(pb_request, &out));
+  return out;
+}
+
+// CloseSessionResult
+
+std::string CloseSessionResult::ToString() const {
+  std::stringstream ss;
+
+  ss << "<CloseSessionResult status=" << status << '>';
+
+  return ss.str();
+}
+
+bool CloseSessionResult::Equals(const CloseSessionResult& other) const {
+  return status == other.status;
+}
+
+arrow::Result<std::string> CloseSessionResult::SerializeToString() const {
+  pb::CloseSessionResult pb_result;
+  RETURN_NOT_OK(internal::ToProto(*this, &pb_result));
+
+  std::string out;
+  if (!pb_result.SerializeToString(&out)) {
+    return Status::IOError("Serialized CloseSessionResult exceeded 2GiB limit");
+  }
+  return out;
+}
+
+arrow::Result<CloseSessionResult> CloseSessionResult::Deserialize(
+    std::string_view serialized) {
+  pb::CloseSessionResult pb_result;
+  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    return Status::Invalid("Serialized CloseSessionResult size should not exceed 2 GiB");
+  }
+  google::protobuf::io::ArrayInputStream input(serialized.data(),
+                                               static_cast<int>(serialized.size()));
+  if (!pb_result.ParseFromZeroCopyStream(&input)) {
+    return Status::Invalid("Not a valid CloseSessionResult");
+  }
+  CloseSessionResult out;
+  RETURN_NOT_OK(internal::FromProto(pb_result, &out));
+  return out;
+}
+
 Location::Location() { uri_ = std::make_shared<arrow::internal::Uri>(); }
 
 arrow::Result<Location> Location::Parse(const std::string& uri_string) {
@@ -648,6 +996,21 @@ const ActionType ActionType::kRenewFlightEndpoint =
                "Extend expiration time of the given FlightEndpoint.\n"
                "Request Message: RenewFlightEndpointRequest\n"
                "Response Message: Renewed FlightEndpoint"};
+const ActionType ActionType::kSetSessionOptions =
+    ActionType{"SetSessionOptions",
+               "Set client session options by name/value pairs.\n"
+               "Request Message: SetSessionOptionsRequest\n"
+               "Response Message: SetSessionOptionsResult"};
+const ActionType ActionType::kGetSessionOptions =
+    ActionType{"GetSessionOptions",
+               "Get current client session options\n"
+               "Request Message: GetSessionOptionsRequest\n"
+               "Response Message: GetSessionOptionsResult"};
+const ActionType ActionType::kCloseSession =
+    ActionType{"CloseSession",
+               "Explicitly close/invalidate the cookie-specified client session.\n"
+               "Request Message: CloseSessionRequest\n"
+               "Response Message: CloseSessionResult"};
 
 bool ActionType::Equals(const ActionType& other) const {
   return type == other.type && description == other.description;
diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h
index 790a2067dd705..4b17149aa2d46 100644
--- a/cpp/src/arrow/flight/types.h
+++ b/cpp/src/arrow/flight/types.h
@@ -28,6 +28,7 @@
 #include <string>
 #include <string_view>
 #include <utility>
+#include <variant>
 #include <vector>
 
 #include "arrow/flight/type_fwd.h"
@@ -184,6 +185,9 @@ struct ARROW_FLIGHT_EXPORT ActionType {
 
   static const ActionType kCancelFlightInfo;
   static const ActionType kRenewFlightEndpoint;
+  static const ActionType kSetSessionOptions;
+  static const ActionType kGetSessionOptions;
+  static const ActionType kCloseSession;
 };
 
 /// \brief Opaque selection criteria for ListFlights RPC
@@ -761,6 +765,199 @@ struct ARROW_FLIGHT_EXPORT CancelFlightInfoRequest {
   static arrow::Result<CancelFlightInfoRequest> Deserialize(std::string_view serialized);
 };
 
+/// \brief Variant supporting all possible value types for {Set,Get}SessionOptions
+///
+/// By convention, an attempt to set a valueless (std::monostate) SessionOptionValue
+/// should attempt to unset or clear the named option value on the server.
+using SessionOptionValue = std::variant<std::monostate, std::string, bool, int64_t,
+                                        double, std::vector<std::string>>;
+
+/// \brief The result of setting a session option.
+enum class SetSessionOptionErrorValue : int8_t {
+  /// \brief The status of setting the option is unknown.
+  ///
+  /// Servers should avoid using this value (send a NOT_FOUND error if the requested
+  /// session is not known). Clients can retry the request.
+  kUnspecified,
+  /// \brief The given session option name is invalid.
+  kInvalidName,
+  /// \brief The session option value or type is invalid.
+  kInvalidValue,
+  /// \brief The session option cannot be set.
+  kError
+};
+std::string ToString(const SetSessionOptionErrorValue& error_value);
+std::ostream& operator<<(std::ostream& os, const SetSessionOptionErrorValue& error_value);
+
+/// \brief The result of closing a session.
+enum class CloseSessionStatus : int8_t {
+  // \brief The session close status is unknown.
+  //
+  // Servers should avoid using this value (send a NOT_FOUND error if the requested
+  // session is not known). Clients can retry the request.
+  kUnspecified,
+  // \brief The session close request is complete.
+  //
+  // Subsequent requests with the same session produce a NOT_FOUND error.
+  kClosed,
+  // \brief The session close request is in progress.
+  //
+  // The client may retry the request.
+  kClosing,
+  // \brief The session is not closeable.
+  //
+  // The client should not retry the request.
+  kNotClosable
+};
+std::string ToString(const CloseSessionStatus& status);
+std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status);
+
+/// \brief A request to set a set of session options by name/value.
+struct ARROW_FLIGHT_EXPORT SetSessionOptionsRequest {
+  std::map<std::string, SessionOptionValue> session_options;
+
+  std::string ToString() const;
+  bool Equals(const SetSessionOptionsRequest& other) const;
+
+  friend bool operator==(const SetSessionOptionsRequest& left,
+                         const SetSessionOptionsRequest& right) {
+    return left.Equals(right);
+  }
+  friend bool operator!=(const SetSessionOptionsRequest& left,
+                         const SetSessionOptionsRequest& right) {
+    return !(left == right);
+  }
+
+  /// \brief Serialize this message to its wire-format representation.
+  arrow::Result<std::string> SerializeToString() const;
+
+  /// \brief Deserialize this message from its wire-format representation.
+  static arrow::Result<SetSessionOptionsRequest> Deserialize(std::string_view serialized);
+};
+
+/// \brief The result(s) of setting session option(s).
+struct ARROW_FLIGHT_EXPORT SetSessionOptionsResult {
+  struct Error {
+    SetSessionOptionErrorValue value;
+
+    bool Equals(const Error& other) const { return value == other.value; }
+    friend bool operator==(const Error& left, const Error& right) {
+      return left.Equals(right);
+    }
+    friend bool operator!=(const Error& left, const Error& right) {
+      return !(left == right);
+    }
+  };
+
+  std::map<std::string, Error> errors;
+
+  std::string ToString() const;
+  bool Equals(const SetSessionOptionsResult& other) const;
+
+  friend bool operator==(const SetSessionOptionsResult& left,
+                         const SetSessionOptionsResult& right) {
+    return left.Equals(right);
+  }
+  friend bool operator!=(const SetSessionOptionsResult& left,
+                         const SetSessionOptionsResult& right) {
+    return !(left == right);
+  }
+
+  /// \brief Serialize this message to its wire-format representation.
+  arrow::Result<std::string> SerializeToString() const;
+
+  /// \brief Deserialize this message from its wire-format representation.
+  static arrow::Result<SetSessionOptionsResult> Deserialize(std::string_view serialized);
+};
+
+/// \brief A request to get current session options.
+struct ARROW_FLIGHT_EXPORT GetSessionOptionsRequest {
+  std::string ToString() const;
+  bool Equals(const GetSessionOptionsRequest& other) const;
+
+  friend bool operator==(const GetSessionOptionsRequest& left,
+                         const GetSessionOptionsRequest& right) {
+    return left.Equals(right);
+  }
+  friend bool operator!=(const GetSessionOptionsRequest& left,
+                         const GetSessionOptionsRequest& right) {
+    return !(left == right);
+  }
+
+  /// \brief Serialize this message to its wire-format representation.
+  arrow::Result<std::string> SerializeToString() const;
+
+  /// \brief Deserialize this message from its wire-format representation.
+  static arrow::Result<GetSessionOptionsRequest> Deserialize(std::string_view serialized);
+};
+
+/// \brief The current session options.
+struct ARROW_FLIGHT_EXPORT GetSessionOptionsResult {
+  std::map<std::string, SessionOptionValue> session_options;
+
+  std::string ToString() const;
+  bool Equals(const GetSessionOptionsResult& other) const;
+
+  friend bool operator==(const GetSessionOptionsResult& left,
+                         const GetSessionOptionsResult& right) {
+    return left.Equals(right);
+  }
+  friend bool operator!=(const GetSessionOptionsResult& left,
+                         const GetSessionOptionsResult& right) {
+    return !(left == right);
+  }
+
+  /// \brief Serialize this message to its wire-format representation.
+  arrow::Result<std::string> SerializeToString() const;
+
+  /// \brief Deserialize this message from its wire-format representation.
+  static arrow::Result<GetSessionOptionsResult> Deserialize(std::string_view serialized);
+};
+
+/// \brief A request to close the open client session.
+struct ARROW_FLIGHT_EXPORT CloseSessionRequest {
+  std::string ToString() const;
+  bool Equals(const CloseSessionRequest& other) const;
+
+  friend bool operator==(const CloseSessionRequest& left,
+                         const CloseSessionRequest& right) {
+    return left.Equals(right);
+  }
+  friend bool operator!=(const CloseSessionRequest& left,
+                         const CloseSessionRequest& right) {
+    return !(left == right);
+  }
+
+  /// \brief Serialize this message to its wire-format representation.
+  arrow::Result<std::string> SerializeToString() const;
+
+  /// \brief Deserialize this message from its wire-format representation.
+  static arrow::Result<CloseSessionRequest> Deserialize(std::string_view serialized);
+};
+
+/// \brief The result of attempting to close the client session.
+struct ARROW_FLIGHT_EXPORT CloseSessionResult {
+  CloseSessionStatus status;
+
+  std::string ToString() const;
+  bool Equals(const CloseSessionResult& other) const;
+
+  friend bool operator==(const CloseSessionResult& left,
+                         const CloseSessionResult& right) {
+    return left.Equals(right);
+  }
+  friend bool operator!=(const CloseSessionResult& left,
+                         const CloseSessionResult& right) {
+    return !(left == right);
+  }
+
+  /// \brief Serialize this message to its wire-format representation.
+  arrow::Result<std::string> SerializeToString() const;
+
+  /// \brief Deserialize this message from its wire-format representation.
+  static arrow::Result<CloseSessionResult> Deserialize(std::string_view serialized);
+};
+
 /// \brief An iterator to FlightInfo instances returned by ListFlights.
 class ARROW_FLIGHT_EXPORT FlightListing {
  public:
diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index 299983f62f283..e984468bc5052 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -608,6 +608,11 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True,
                          "RenewFlightEndpoint are working as expected."),
             skip_testers={"JS", "C#", "Rust"},
         ),
+        Scenario(
+            "session_options",
+            description="Ensure Flight SQL Sessions work as expected.",
+            skip_testers={"JS", "C#", "Rust", "Go"}
+        ),
         Scenario(
             "poll_flight_info",
             description="Ensure PollFlightInfo is supported.",
diff --git a/docs/source/format/FlightSql.rst b/docs/source/format/FlightSql.rst
index add044c2d3621..6bb917271366c 100644
--- a/docs/source/format/FlightSql.rst
+++ b/docs/source/format/FlightSql.rst
@@ -170,6 +170,47 @@ the ``type`` should be ``ClosePreparedStatement``).
     When used with DoPut: execute the query and return the number of
     affected rows.
 
+Flight Server Session Management
+--------------------------------
+
+Flight SQL provides commands to set and update server session variables
+which affect the server behaviour in various ways.  Common options may
+include (depending on the server implementation) ``catalog`` and
+``schema``, indicating the currently-selected catalog and schema for
+queries to be run against.
+
+Clients should prefer, where possible, setting options prior to issuing
+queries and other commands, as some server implementations may require
+these options be set exactly once and prior to any other activity which
+may trigger their implicit setting.
+
+For compatibility with Database Connectivity drivers (JDBC, ODBC, and
+others), it is strongly recommended that server implementations accept
+string representations of all option values which may be provided to the
+driver as part of a server connection string and passed through to the
+server without further conversion.  For ease of use it is also recommended
+to accept and convert other numeric types to the preferred type for an
+option value, however this is not required.
+
+Sessions are persisted between the client and server using an
+implementation-defined mechanism, which is typically RFC 6265 cookies.
+Servers may also combine other connection state opaquely with the
+session token:  Consider that the lifespan and semantics of a session
+should make sense for any additional uses, e.g. CloseSession would also
+invalidate any authentication context persisted via the session context.
+A session may be initiated upon a nonempty (or empty) SetSessionOptions
+call, or at any other time of the server's choosing.
+
+``SetSessionOptions``
+Set server session option(s) by name/value.
+
+``GetSessionOptions``
+Get the current server session options, including those set by the client
+and any defaulted or implicitly set by the server.
+
+``CloseSession``
+Close and invalidate the current session context.
+
 Sequence Diagrams
 =================
 
diff --git a/format/Flight.proto b/format/Flight.proto
index de3794f05ba83..59714108e1cbc 100644
--- a/format/Flight.proto
+++ b/format/Flight.proto
@@ -525,3 +525,117 @@ message FlightData {
 message PutResult {
   bytes app_metadata = 1;
 }
+
+/*
+ * EXPERIMENTAL: Union of possible value types for a Session Option to be set to.
+ *
+ * By convention, an attempt to set a valueless SessionOptionValue should
+ * attempt to unset or clear the named option value on the server.
+ */
+message SessionOptionValue {
+  message StringListValue {
+    repeated string values = 1;
+  }
+
+  oneof option_value {
+    string string_value = 1;
+    bool bool_value = 2;
+    sfixed64 int64_value = 3;
+    double double_value = 4;
+    StringListValue string_list_value = 5;
+  }
+}
+
+/*
+ * EXPERIMENTAL: A request to set session options for an existing or new (implicit)
+ * server session.
+ *
+ * Sessions are persisted and referenced via a transport-level state management, typically
+ * RFC 6265 HTTP cookies when using an HTTP transport.  The suggested cookie name or state
+ * context key is 'arrow_flight_session_id', although implementations may freely choose their
+ * own name.
+ *
+ * Session creation (if one does not already exist) is implied by this RPC request, however
+ * server implementations may choose to initiate a session that also contains client-provided
+ * session options at any other time, e.g. on authentication, or when any other call is made
+ * and the server wishes to use a session to persist any state (or lack thereof).
+ */
+message SetSessionOptionsRequest {
+  map<string, SessionOptionValue> session_options = 1;
+}
+
+/*
+ * EXPERIMENTAL: The results (individually) of setting a set of session options.
+ *
+ * Option names should only be present in the response if they were not successfully
+ * set on the server; that is, a response without an Error for a name provided in the
+ * SetSessionOptionsRequest implies that the named option value was set successfully.
+ */
+message SetSessionOptionsResult {
+  enum ErrorValue {
+    // Protobuf deserialization fallback value: The status is unknown or unrecognized.
+    // Servers should avoid using this value. The request may be retried by the client.
+    UNSPECIFIED = 0;
+    // The given session option name is invalid.
+    INVALID_NAME = 1;
+    // The session option value or type is invalid.
+    INVALID_VALUE = 2;
+    // The session option cannot be set.
+    ERROR = 3;
+  }
+
+  message Error {
+    ErrorValue value = 1;
+  }
+  
+  map<string, Error> errors = 1;
+}
+
+/*
+ * EXPERIMENTAL: A request to access the session options for the current server session.
+ *
+ * The existing session is referenced via a cookie header or similar (see
+ * SetSessionOptionsRequest above); it is an error to make this request with a missing,
+ * invalid, or expired session cookie header or other implementation-defined session
+ * reference token.
+ */
+message GetSessionOptionsRequest {
+}
+
+/*
+ * EXPERIMENTAL: The result containing the current server session options.
+ */
+message GetSessionOptionsResult {
+    map<string, SessionOptionValue> session_options = 1;
+}
+
+/*
+ * Request message for the "Close Session" action.
+ *
+ * The exiting session is referenced via a cookie header.
+ */
+message CloseSessionRequest {
+}
+
+/*
+ * The result of closing a session.
+ */
+message CloseSessionResult {
+  enum Status {
+    // Protobuf deserialization fallback value: The session close status is unknown or
+    // not recognized. Servers should avoid using this value (send a NOT_FOUND error if
+    // the requested session is not known or expired). Clients can retry the request.
+    UNSPECIFIED = 0;
+    // The session close request is complete. Subsequent requests with
+    // the same session produce a NOT_FOUND error.
+    CLOSED = 1;
+    // The session close request is in progress. The client may retry
+    // the close request.
+    CLOSING = 2;
+    // The session is not closeable. The client should not retry the
+    // close request.
+    NOT_CLOSEABLE = 3;
+  }
+
+  Status status = 1;
+}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionRequest.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionRequest.java
new file mode 100644
index 0000000000000..29eb3664f6286
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionRequest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.arrow.flight.impl.Flight;
+
+/** A request to close/invalidate a server session context. */
+public class CloseSessionRequest {
+  public CloseSessionRequest() {
+  }
+
+  CloseSessionRequest(Flight.CloseSessionRequest proto) {
+  }
+
+  Flight.CloseSessionRequest toProtocol() {
+    return Flight.CloseSessionRequest.getDefaultInstance();
+  }
+
+  /**
+   * Get the serialized form of this protocol message.
+   *
+   * <p>Intended to help interoperability by allowing non-Flight services to still return Flight types.
+   */
+  public ByteBuffer serialize() {
+    return ByteBuffer.wrap(toProtocol().toByteArray());
+  }
+
+  /**
+   * Parse the serialized form of this protocol message.
+   *
+   * <p>Intended to help interoperability by allowing Flight clients to obtain stream info from non-Flight services.
+   *
+   * @param serialized The serialized form of the message, as returned by {@link #serialize()}.
+   * @return The deserialized message.
+   * @throws IOException if the serialized form is invalid.
+   */
+  public static CloseSessionRequest deserialize(ByteBuffer serialized) throws IOException {
+    return new CloseSessionRequest(Flight.CloseSessionRequest.parseFrom(serialized));
+  }
+}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionResult.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionResult.java
new file mode 100644
index 0000000000000..c3710a14b108a
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CloseSessionResult.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.arrow.flight.impl.Flight;
+
+/** The result of attempting to close/invalidate a server session context. */
+public class CloseSessionResult {
+  /**
+   * Close operation result status values.
+   */
+  public enum Status {
+    /**
+     * The session close status is unknown. Servers should avoid using this value
+     * (send a NOT_FOUND error if the requested session is not known). Clients can
+     * retry the request.
+     */
+    UNSPECIFIED,
+    /**
+     * The session close request is complete.
+     */
+    CLOSED,
+    /**
+     * The session close request is in progress. The client may retry the request.
+     */
+    CLOSING,
+    /**
+     * The session is not closeable.
+     */
+    NOT_CLOSABLE,
+    ;
+
+    public static Status fromProtocol(Flight.CloseSessionResult.Status proto) {
+      return values()[proto.getNumber()];
+    }
+
+    public Flight.CloseSessionResult.Status toProtocol() {
+      return Flight.CloseSessionResult.Status.values()[ordinal()];
+    }
+  }
+
+  private final Status status;
+
+  public CloseSessionResult(Status status) {
+    this.status = status;
+  }
+
+  CloseSessionResult(Flight.CloseSessionResult proto) {
+    status = Status.fromProtocol(proto.getStatus());
+    if (status == null) {
+      // Unreachable
+      throw new IllegalArgumentException("");
+    }
+  }
+
+  public Status getStatus() {
+    return status;
+  }
+
+  Flight.CloseSessionResult toProtocol() {
+    Flight.CloseSessionResult.Builder b = Flight.CloseSessionResult.newBuilder();
+    b.setStatus(status.toProtocol());
+    return b.build();
+  }
+
+  /**
+   * Get the serialized form of this protocol message.
+   *
+   * <p>Intended to help interoperability by allowing non-Flight services to still return Flight types.
+   */
+  public ByteBuffer serialize() {
+    return ByteBuffer.wrap(toProtocol().toByteArray());
+  }
+
+  /**
+   * Parse the serialized form of this protocol message.
+   *
+   * <p>Intended to help interoperability by allowing Flight clients to obtain stream info from non-Flight services.
+   *
+   * @param serialized The serialized form of the message, as returned by {@link #serialize()}.
+   * @return The deserialized message.
+   * @throws IOException if the serialized form is invalid.
+   */
+  public static CloseSessionResult deserialize(ByteBuffer serialized) throws IOException {
+    return new CloseSessionResult(Flight.CloseSessionResult.parseFrom(serialized));
+  }
+
+}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
index 8f251a7c7ef07..980a762e397f9 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
@@ -580,6 +580,102 @@ public FlightEndpoint renewFlightEndpoint(RenewFlightEndpointRequest request, Ca
     return result;
   }
 
+  /**
+   * Set server session option(s) by name/value.
+   *
+   * Sessions are generally persisted via HTTP cookies.
+   *
+   * @param request The session options to set on the server.
+   * @param options Call options.
+   * @return The result containing per-value error statuses, if any.
+   */
+  public SetSessionOptionsResult setSessionOptions(SetSessionOptionsRequest request, CallOption... options) {
+    Action action = new Action(FlightConstants.SET_SESSION_OPTIONS.getType(), request.serialize().array());
+    Iterator<Result> results = doAction(action, options);
+    if (!results.hasNext()) {
+      throw CallStatus.INTERNAL
+          .withDescription("Server did not return a response")
+          .toRuntimeException();
+    }
+
+    SetSessionOptionsResult result;
+    try {
+      result = SetSessionOptionsResult.deserialize(ByteBuffer.wrap(results.next().getBody()));
+    } catch (IOException e) {
+      throw CallStatus.INTERNAL
+          .withDescription("Failed to parse server response: " + e)
+          .withCause(e)
+          .toRuntimeException();
+    }
+    results.forEachRemaining((ignored) -> {
+    });
+    return result;
+  }
+
+  /**
+   * Get the current server session options.
+   *
+   * The session is generally accessed via an HTTP cookie.
+   *
+   * @param request The (empty) GetSessionOptionsRequest.
+   * @param options Call options.
+   * @return The result containing the set of session options configured on the server.
+   */
+  public GetSessionOptionsResult getSessionOptions(GetSessionOptionsRequest request, CallOption... options) {
+    Action action = new Action(FlightConstants.GET_SESSION_OPTIONS.getType(), request.serialize().array());
+    Iterator<Result> results = doAction(action, options);
+    if (!results.hasNext()) {
+      throw CallStatus.INTERNAL
+          .withDescription("Server did not return a response")
+          .toRuntimeException();
+    }
+
+    GetSessionOptionsResult result;
+    try {
+      result = GetSessionOptionsResult.deserialize(ByteBuffer.wrap(results.next().getBody()));
+    } catch (IOException e) {
+      throw CallStatus.INTERNAL
+          .withDescription("Failed to parse server response: " + e)
+          .withCause(e)
+          .toRuntimeException();
+    }
+    results.forEachRemaining((ignored) -> {
+    });
+    return result;
+  }
+
+  /**
+   * Close/invalidate the current server session.
+   *
+   * The session is generally accessed via an HTTP cookie.
+   *
+   * @param request The (empty) CloseSessionRequest.
+   * @param options Call options.
+   * @return The result containing the status of the close operation.
+   */
+  public CloseSessionResult closeSession(CloseSessionRequest request, CallOption... options) {
+    Action action = new Action(FlightConstants.CLOSE_SESSION.getType(), request.serialize().array());
+    Iterator<Result> results = doAction(action, options);
+    if (!results.hasNext()) {
+      throw CallStatus.INTERNAL
+          .withDescription("Server did not return a response")
+          .toRuntimeException();
+    }
+
+    CloseSessionResult result;
+    try {
+      result = CloseSessionResult.deserialize(ByteBuffer.wrap(results.next().getBody()));
+    } catch (IOException e) {
+      throw CallStatus.INTERNAL
+          .withDescription("Failed to parse server response: " + e)
+          .withCause(e)
+          .toRuntimeException();
+    }
+    results.forEachRemaining((ignored) -> {
+    });
+    return result;
+  }
+
   /**
    * Interface for writers to an Arrow data stream.
    */
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightConstants.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightConstants.java
index 2a240abad6d95..4456e3dae4949 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightConstants.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightConstants.java
@@ -35,4 +35,18 @@ public interface FlightConstants {
       "Extend expiration time of the given FlightEndpoint.\n" +
           "Request Message: RenewFlightEndpointRequest\n" +
           "Response Message: Renewed FlightEndpoint");
+
+  ActionType SET_SESSION_OPTIONS = new ActionType("SetSessionOptions",
+          "Set client session options by name/value pairs.\n" +
+          "Request Message: SetSessionOptionsRequest\n" +
+          "Response Message: SetSessionOptionsResult");
+
+  ActionType GET_SESSION_OPTIONS = new ActionType("GetSessionOptions",
+          "Get current client session options\n" +
+          "Request Message: GetSessionOptionsRequest\n" +
+          "Response Message: GetSessionOptionsResult");
+  ActionType CLOSE_SESSION = new ActionType("CloseSession",
+          "Explicitly close/invalidate the cookie-specified client session.\n" +
+          "Request Message: CloseSessionRequest\n" +
+          "Response Message: CloseSessionResult");
 }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsRequest.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsRequest.java
new file mode 100644
index 0000000000000..9d63e59027aac
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsRequest.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.arrow.flight.impl.Flight;
+
+/**
+ * A request to get current session options.
+ */
+public class GetSessionOptionsRequest {
+  public GetSessionOptionsRequest() {
+  }
+
+  GetSessionOptionsRequest(Flight.GetSessionOptionsRequest proto) {
+  }
+
+  Flight.GetSessionOptionsRequest toProtocol() {
+    return Flight.GetSessionOptionsRequest.getDefaultInstance();
+  }
+
+  /**
+   * Get the serialized form of this protocol message.
+   *
+   * <p>Intended to help interoperability by allowing non-Flight services to still return Flight types.
+   */
+  public ByteBuffer serialize() {
+    return ByteBuffer.wrap(toProtocol().toByteArray());
+  }
+
+  /**
+   * Parse the serialized form of this protocol message.
+   *
+   * <p>Intended to help interoperability by allowing Flight clients to obtain stream info from non-Flight services.
+   *
+   * @param serialized The serialized form of the message, as returned by {@link #serialize()}.
+   * @return The deserialized message.
+   * @throws IOException if the serialized form is invalid.
+   */
+  public static GetSessionOptionsRequest deserialize(ByteBuffer serialized) throws IOException {
+    return new GetSessionOptionsRequest(Flight.GetSessionOptionsRequest.parseFrom(serialized));
+  }
+}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsResult.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsResult.java
new file mode 100644
index 0000000000000..c777bd39bd032
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/GetSessionOptionsResult.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flight.impl.Flight;
+
+/** A request to view the currently-set options for the current server session. */
+public class GetSessionOptionsResult {
+  private final Map<String, SessionOptionValue> sessionOptions;
+
+  public GetSessionOptionsResult(Map<String, SessionOptionValue> sessionOptions) {
+    this.sessionOptions = Collections.unmodifiableMap(new HashMap(sessionOptions));
+  }
+
+  GetSessionOptionsResult(Flight.GetSessionOptionsResult proto) {
+    sessionOptions = Collections.unmodifiableMap(
+        proto.getSessionOptionsMap().entrySet().stream().collect(Collectors.toMap(
+            Map.Entry::getKey, (e) -> SessionOptionValueFactory.makeSessionOptionValue(e.getValue()))));
+  }
+
+  /**
+   * Get the session options map contained in the request.
+   *
+   * @return An immutable view of the session options map.
+   */
+  public Map<String, SessionOptionValue> getSessionOptions() {
+    return sessionOptions;
+  }
+
+  Flight.GetSessionOptionsResult toProtocol() {
+    Flight.GetSessionOptionsResult.Builder b = Flight.GetSessionOptionsResult.newBuilder();
+    b.putAllSessionOptions(sessionOptions.entrySet().stream().collect(Collectors.toMap(
+        Map.Entry::getKey, (e) -> e.getValue().toProtocol())));
+    return b.build();
+  }
+
+  /**
+   * Get the serialized form of this protocol message.
+   *
+   * <p>Intended to help interoperability by allowing non-Flight services to still return Flight types.
+   */
+  public ByteBuffer serialize() {
+    return ByteBuffer.wrap(toProtocol().toByteArray());
+  }
+
+  /**
+   * Parse the serialized form of this protocol message.
+   *
+   * <p>Intended to help interoperability by allowing Flight clients to obtain stream info from non-Flight services.
+   *
+   * @param serialized The serialized form of the message, as returned by {@link #serialize()}.
+   * @return The deserialized message.
+   * @throws IOException if the serialized form is invalid.
+   */
+  public static GetSessionOptionsResult deserialize(ByteBuffer serialized) throws IOException {
+    return new GetSessionOptionsResult(Flight.GetSessionOptionsResult.parseFrom(serialized));
+  }
+}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpSessionOptionValueVisitor.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpSessionOptionValueVisitor.java
new file mode 100644
index 0000000000000..c951cce0ed42d
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/NoOpSessionOptionValueVisitor.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+/**
+ * A helper to facilitate easier anonymous subclass declaration.
+ *
+ * Implementations need only override callbacks for types they wish to do something with.
+ *
+ * @param <T> Return type of the visit operation.
+ */
+public class NoOpSessionOptionValueVisitor<T> implements SessionOptionValueVisitor<T> {
+  /**
+   * A callback to handle SessionOptionValue containing a String.
+   */
+  public T visit(String value) {
+    return null;
+  }
+
+  /**
+   * A callback to handle SessionOptionValue containing a boolean.
+   */
+  public T visit(boolean value) {
+    return null;
+  }
+
+  /**
+   * A callback to handle SessionOptionValue containing a long.
+   */
+  public T visit(long value) {
+    return null;
+  }
+
+  /**
+   * A callback to handle SessionOptionValue containing a double.
+   */
+  public T visit(double value) {
+    return null;
+  }
+
+  /**
+   * A callback to handle SessionOptionValue containing an array of String.
+   */
+  public T visit(String[] value) {
+    return null;
+  }
+
+  /**
+   * A callback to handle SessionOptionValue containing no value.
+   *
+   * By convention, an attempt to set a valueless SessionOptionValue should
+   * attempt to unset or clear the named option value on the server.
+   */
+  public T visit(Void value) {
+    return null;
+  }
+}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java
new file mode 100644
index 0000000000000..7091caa5e98bc
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+/**
+ * Middleware for handling Flight SQL Sessions including session cookie handling.
+ *
+ * Currently experimental.
+ */
+public class ServerSessionMiddleware implements FlightServerMiddleware {
+  Factory factory;
+  boolean existingSession;
+  private Session session;
+  private String closedSessionId = null;
+
+  public static final String sessionCookieName = "arrow_flight_session_id";
+
+  /**
+   * Factory for managing and accessing ServerSessionMiddleware.
+   */
+  public static class Factory implements FlightServerMiddleware.Factory<ServerSessionMiddleware> {
+    private final ConcurrentMap<String, Session> sessionStore =
+        new ConcurrentHashMap<>();
+    private final Callable<String> idGenerator;
+
+    /**
+     * Construct a factory for ServerSessionMiddleware.
+     *
+     * Factory manages and accesses persistent sessions based on HTTP cookies.
+     *
+     * @param idGenerator A Callable returning unique session id Strings.
+     */
+    public Factory(Callable<String> idGenerator) {
+      this.idGenerator = idGenerator;
+    }
+
+    private synchronized Session createNewSession() {
+      String id;
+      try {
+        id = idGenerator.call();
+      } catch (Exception ignored) {
+        // Most impls aren't going to throw so don't make caller handle a nonexistent checked exception
+        throw CallStatus.INTERNAL.withDescription("Session creation error").toRuntimeException();
+      }
+
+      Session newSession = new Session(id);
+      if (sessionStore.putIfAbsent(id, newSession) != null) {
+        // Collision, should never happen
+        throw CallStatus.INTERNAL.withDescription("Session creation error").toRuntimeException();
+      }
+      return newSession;
+    }
+
+    private void closeSession(String id) {
+      if (sessionStore.remove(id) == null) {
+        throw CallStatus.NOT_FOUND.withDescription("Session id '" + id + "' not found.").toRuntimeException();
+      }
+    }
+
+    @Override
+    public ServerSessionMiddleware onCallStarted(CallInfo callInfo, CallHeaders incomingHeaders,
+                                                RequestContext context) {
+      String sessionId = null;
+
+      final Iterable<String> it = incomingHeaders.getAll("cookie");
+      if (it != null) {
+        findIdCookie:
+        for (final String headerValue : it) {
+          for (final String cookie : headerValue.split(" ;")) {
+            final String[] cookiePair = cookie.split("=");
+            if (cookiePair.length != 2) {
+              // Soft failure:  Ignore invalid cookie list field
+              break;
+            }
+
+            if (sessionCookieName.equals(cookiePair[0]) && cookiePair[1].length() > 0) {
+              sessionId = cookiePair[1];
+              break findIdCookie;
+            }
+          }
+        }
+      }
+
+      if (sessionId == null) {
+        // No session cookie, create middleware instance without session.
+        return new ServerSessionMiddleware(this, incomingHeaders, null);
+      }
+
+      Session session = sessionStore.get(sessionId);
+      // Cookie provided by caller, but invalid
+      if (session == null) {
+        // Can't soft-fail/proceed here, clients will get unexpected behaviour without options they thought were set.
+        throw CallStatus.NOT_FOUND.withDescription("Invalid " + sessionCookieName + " cookie.").toRuntimeException();
+      }
+
+      return new ServerSessionMiddleware(this, incomingHeaders, session);
+    }
+  }
+
+  /**
+   * A thread-safe container for named SessionOptionValues.
+   */
+  public static class Session {
+    public final String id;
+    private ConcurrentMap<String, SessionOptionValue> sessionData =
+        new ConcurrentHashMap<String, SessionOptionValue>();
+
+    /**
+     * Construct a new Session with the given id.
+     *
+     * @param id The Session's id string, which is used as the session cookie value.
+     */
+    private Session(String id) {
+      this.id = id;
+    }
+
+    /** Get session option by name, or null if it does not exist. */
+    public SessionOptionValue getSessionOption(String name) {
+      return sessionData.get(name);
+    }
+
+    /** Get an immutable copy of the session options map. */
+    public Map<String, SessionOptionValue> getSessionOptions() {
+      return Collections.unmodifiableMap(new HashMap(sessionData));
+    }
+
+    /** Set session option by name to given value. */
+    public void setSessionOption(String name, SessionOptionValue value) {
+      sessionData.put(name, value);
+    }
+
+    /** Idempotently  remove name from this session. */
+    public void eraseSessionOption(String name) {
+      sessionData.remove(name);
+    }
+  }
+
+  private final CallHeaders headers;
+
+  private ServerSessionMiddleware(ServerSessionMiddleware.Factory factory,
+                                  CallHeaders incomingHeaders, Session session) {
+    this.factory = factory;
+    headers = incomingHeaders;
+    this.session = session;
+    existingSession = (session != null);
+  }
+
+  /**
+   * Check if there is an open session associated with this call.
+   *
+   * @return True iff there is an open session associated with this call.
+   */
+  public boolean hasSession() {
+    return session != null;
+  }
+
+  /**
+   * Get the existing or new session value map for this call.
+   *
+   * @return The session option value map, or null in case of an id generation collision.
+   */
+  public synchronized Session getSession() {
+    if (session == null) {
+      session = factory.createNewSession();
+    }
+
+    return session;
+  }
+
+  /**
+   * Close the current session.
+   *
+   * It is an error to call this without a valid session specified via cookie or equivalent.
+   * */
+  public synchronized void closeSession() {
+    if (session == null) {
+      throw CallStatus.NOT_FOUND.withDescription("No session found for the current call.").toRuntimeException();
+    }
+    factory.closeSession(session.id);
+    closedSessionId = session.id;
+    session = null;
+  }
+
+  public CallHeaders getCallHeaders() {
+    return headers;
+  }
+
+  @Override
+  public void onBeforeSendingHeaders(CallHeaders outgoingHeaders) {
+    if (!existingSession && session != null) {
+      outgoingHeaders.insert("set-cookie", sessionCookieName + "=" + session.id);
+    }
+    if (closedSessionId != null) {
+      outgoingHeaders.insert("set-cookie", sessionCookieName + "=" + closedSessionId + "; Max-Age=0");
+    }
+  }
+
+  @Override
+  public void onCallCompleted(CallStatus status) {
+  }
+
+  @Override
+  public void onCallErrored(Throwable err) {
+  }
+}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValue.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValue.java
new file mode 100644
index 0000000000000..db22c736be182
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValue.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.util.Arrays;
+
+import org.apache.arrow.flight.impl.Flight;
+
+/**
+ * A union-like container interface for supported session option value types.
+ */
+public abstract class SessionOptionValue {
+  SessionOptionValue() {
+  }
+
+  /**
+   * Value access via a caller-provided visitor/functor.
+   */
+  public abstract <T> T acceptVisitor(SessionOptionValueVisitor<T> v);
+
+  Flight.SessionOptionValue toProtocol() {
+    Flight.SessionOptionValue.Builder b = Flight.SessionOptionValue.newBuilder();
+    SessionOptionValueToProtocolVisitor visitor = new SessionOptionValueToProtocolVisitor(b);
+    this.acceptVisitor(visitor);
+    return b.build();
+  }
+
+  /** Check whether the SessionOptionValue is empty/valueless. */
+  public boolean isEmpty() {
+    return false;
+  }
+
+  private class SessionOptionValueToProtocolVisitor implements SessionOptionValueVisitor<Void> {
+    final Flight.SessionOptionValue.Builder b;
+
+    SessionOptionValueToProtocolVisitor(Flight.SessionOptionValue.Builder b) {
+      this.b = b;
+    }
+
+    @Override
+    public Void visit(String value) {
+      b.setStringValue(value);
+      return null;
+    }
+
+    @Override
+    public Void visit(boolean value) {
+      b.setBoolValue(value);
+      return null;
+    }
+
+    @Override
+    public Void visit(long value) {
+      b.setInt64Value(value);
+      return null;
+    }
+
+    @Override
+    public Void visit(double value) {
+      b.setDoubleValue(value);
+      return null;
+    }
+
+    @Override
+    public Void visit(String[] value) {
+      Flight.SessionOptionValue.StringListValue.Builder pbSLVBuilder =
+          Flight.SessionOptionValue.StringListValue.newBuilder();
+      pbSLVBuilder.addAllValues(Arrays.asList(value));
+      b.setStringListValue(pbSLVBuilder.build());
+      return null;
+    }
+
+    @Override
+    public Void visit(Void ignored) {
+      b.clearOptionValue();
+      return null;
+    }
+  }
+}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueFactory.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueFactory.java
new file mode 100644
index 0000000000000..47c82fa7bb7fd
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueFactory.java
@@ -0,0 +1,284 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+import org.apache.arrow.flight.impl.Flight;
+
+/** Abstract factory for concrete SessionOptionValue instances. */
+public class SessionOptionValueFactory {
+  public static SessionOptionValue makeSessionOptionValue(String value) {
+    return new SessionOptionValueString(value);
+  }
+
+  public static SessionOptionValue makeSessionOptionValue(boolean value) {
+    return new SessionOptionValueBoolean(value);
+  }
+
+  public static SessionOptionValue makeSessionOptionValue(long value) {
+    return new SessionOptionValueLong(value);
+  }
+
+  public static SessionOptionValue makeSessionOptionValue(double value) {
+    return new SessionOptionValueDouble(value);
+  }
+
+  public static SessionOptionValue makeSessionOptionValue(String[] value) {
+    return new SessionOptionValueStringList(value);
+  }
+
+  public static SessionOptionValue makeEmptySessionOptionValue() {
+    return new SessionOptionValueEmpty();
+  }
+
+  /** Construct a SessionOptionValue from its Protobuf object representation. */
+  public static SessionOptionValue makeSessionOptionValue(Flight.SessionOptionValue proto) {
+    switch (proto.getOptionValueCase()) {
+      case STRING_VALUE:
+        return new SessionOptionValueString(proto.getStringValue());
+      case BOOL_VALUE:
+        return new SessionOptionValueBoolean(proto.getBoolValue());
+      case INT64_VALUE:
+        return new SessionOptionValueLong(proto.getInt64Value());
+      case DOUBLE_VALUE:
+        return new SessionOptionValueDouble(proto.getDoubleValue());
+      case STRING_LIST_VALUE:
+        // Using ByteString::toByteArray() here otherwise we still somehow get `ByteArray`s with broken .equals(String)
+        return new SessionOptionValueStringList(proto.getStringListValue().getValuesList().asByteStringList().stream()
+            .map((e) -> new String(e.toByteArray(), StandardCharsets.UTF_8)).toArray(String[]::new));
+      case OPTIONVALUE_NOT_SET:
+        return new SessionOptionValueEmpty();
+      default:
+        // Unreachable
+        throw new IllegalArgumentException("");
+    }
+  }
+
+  private static class SessionOptionValueString extends SessionOptionValue {
+    private final String value;
+
+    SessionOptionValueString(String value) {
+      this.value = value;
+    }
+
+    @Override
+    public <T> T acceptVisitor(SessionOptionValueVisitor<T> v) {
+      return v.visit(value);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      SessionOptionValueString that = (SessionOptionValueString) o;
+      return value.equals(that.value);
+    }
+
+    @Override
+    public int hashCode() {
+      return value.hashCode();
+    }
+
+    @Override
+    public String toString() {
+      return '"' + value + '"';
+    }
+  }
+
+  private static class SessionOptionValueBoolean extends SessionOptionValue {
+    private final boolean value;
+
+    SessionOptionValueBoolean(boolean value) {
+      this.value = value;
+    }
+
+    @Override
+    public <T> T acceptVisitor(SessionOptionValueVisitor<T> v) {
+      return v.visit(value);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      SessionOptionValueBoolean that = (SessionOptionValueBoolean) o;
+      return value == that.value;
+    }
+
+    @Override
+    public int hashCode() {
+      return Boolean.hashCode(value);
+    }
+
+    @Override
+    public String toString() {
+      return String.valueOf(value);
+    }
+  }
+
+  private static class SessionOptionValueLong extends SessionOptionValue {
+    private final long value;
+
+    SessionOptionValueLong(long value) {
+      this.value = value;
+    }
+
+    @Override
+    public <T> T acceptVisitor(SessionOptionValueVisitor<T> v) {
+      return v.visit(value);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      SessionOptionValueLong that = (SessionOptionValueLong) o;
+      return value == that.value;
+    }
+
+    @Override
+    public int hashCode() {
+      return Long.hashCode(value);
+    }
+
+    @Override
+    public String toString() {
+      return String.valueOf(value);
+    }
+  }
+
+  private static class SessionOptionValueDouble extends SessionOptionValue {
+    private final double value;
+
+    SessionOptionValueDouble(double value) {
+      this.value = value;
+    }
+
+    @Override
+    public <T> T acceptVisitor(SessionOptionValueVisitor<T> v) {
+      return v.visit(value);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      SessionOptionValueDouble that = (SessionOptionValueDouble) o;
+      return value == that.value;
+    }
+
+    @Override
+    public int hashCode() {
+      return Double.hashCode(value);
+    }
+
+    @Override
+    public String toString() {
+      return String.valueOf(value);
+    }
+  }
+
+  private static class SessionOptionValueStringList extends SessionOptionValue {
+    private final String[] value;
+
+    SessionOptionValueStringList(String[] value) {
+      this.value = value.clone();
+    }
+
+    @Override
+    public <T> T acceptVisitor(SessionOptionValueVisitor<T> v) {
+      return v.visit(value);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      SessionOptionValueStringList that = (SessionOptionValueStringList) o;
+      return Arrays.deepEquals(value, that.value);
+    }
+
+    @Override
+    public int hashCode() {
+      return Arrays.deepHashCode(value);
+    }
+
+    @Override
+    public String toString() {
+      if (value.length == 0) {
+        return "[]";
+      }
+      return "[\"" + String.join("\", \"", value) + "\"]";
+    }
+  }
+
+  private static class SessionOptionValueEmpty extends SessionOptionValue {
+    @Override
+    public <T> T acceptVisitor(SessionOptionValueVisitor<T> v) {
+      return v.visit((Void) null);
+    }
+
+    @Override
+    public boolean isEmpty() {
+      return true;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      return SessionOptionValueEmpty.class.hashCode();
+    }
+
+    @Override
+    public String toString() {
+      return "<empty>";
+    }
+  }
+}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueVisitor.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueVisitor.java
new file mode 100644
index 0000000000000..f2178224a0d29
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SessionOptionValueVisitor.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+/**
+ * A visitor interface to access SessionOptionValue's contained value.
+ *
+ * @param <T> Return type of the visit operation.
+ */
+public interface SessionOptionValueVisitor<T> {
+  /**
+   * A callback to handle SessionOptionValue containing a String.
+   */
+  T visit(String value);
+
+  /**
+   * A callback to handle SessionOptionValue containing a boolean.
+   */
+  T visit(boolean value);
+
+  /**
+   * A callback to handle SessionOptionValue containing a long.
+   */
+  T visit(long value);
+
+  /**
+   * A callback to handle SessionOptionValue containing a double.
+   */
+  T visit(double value);
+
+  /**
+   * A callback to handle SessionOptionValue containing an array of String.
+   */
+  T visit(String[] value);
+
+  /**
+   * A callback to handle SessionOptionValue containing no value.
+   *
+   * By convention, an attempt to set a valueless SessionOptionValue should
+   * attempt to unset or clear the named option value on the server.
+   */
+  T visit(Void value);
+}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsRequest.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsRequest.java
new file mode 100644
index 0000000000000..8a5253e682162
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsRequest.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flight.impl.Flight;
+
+/** A request to set option(s) in an existing or implicitly-created server session. */
+public class SetSessionOptionsRequest {
+  private final Map<String, SessionOptionValue> sessionOptions;
+
+  public SetSessionOptionsRequest(Map<String, SessionOptionValue> sessionOptions) {
+    this.sessionOptions = Collections.unmodifiableMap(new HashMap<String, SessionOptionValue>(sessionOptions));
+  }
+
+  SetSessionOptionsRequest(Flight.SetSessionOptionsRequest proto) {
+    sessionOptions = Collections.unmodifiableMap(
+        proto.getSessionOptionsMap().entrySet().stream().collect(Collectors.toMap(
+            Map.Entry::getKey, (e) -> SessionOptionValueFactory.makeSessionOptionValue(e.getValue()))));
+  }
+
+  /**
+   * Get the session option map from the request.
+   *
+   * @return An immutable view of the session options map.
+   */
+  public Map<String, SessionOptionValue> getSessionOptions() {
+    return Collections.unmodifiableMap(sessionOptions);
+  }
+
+  Flight.SetSessionOptionsRequest toProtocol() {
+    Flight.SetSessionOptionsRequest.Builder b = Flight.SetSessionOptionsRequest.newBuilder();
+    b.putAllSessionOptions(sessionOptions.entrySet().stream().collect(Collectors.toMap(
+        Map.Entry::getKey, (e) -> e.getValue().toProtocol())));
+    return b.build();
+  }
+
+  /**
+   * Get the serialized form of this protocol message.
+   *
+   * <p>Intended to help interoperability by allowing non-Flight services to still return Flight types.
+   */
+  public ByteBuffer serialize() {
+    return ByteBuffer.wrap(toProtocol().toByteArray());
+  }
+
+  /**
+   * Parse the serialized form of this protocol message.
+   *
+   * <p>Intended to help interoperability by allowing Flight clients to obtain stream info from non-Flight services.
+   *
+   * @param serialized The serialized form of the message, as returned by {@link #serialize()}.
+   * @return The deserialized message.
+   * @throws IOException if the serialized form is invalid.
+   */
+  public static SetSessionOptionsRequest deserialize(ByteBuffer serialized) throws IOException {
+    return new SetSessionOptionsRequest(Flight.SetSessionOptionsRequest.parseFrom(serialized));
+  }
+
+}
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsResult.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsResult.java
new file mode 100644
index 0000000000000..14d53cc6767e0
--- /dev/null
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/SetSessionOptionsResult.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flight.impl.Flight;
+
+/** The result of attempting to set a set of session options. */
+public class SetSessionOptionsResult {
+  /** Error status value for per-option errors. */
+  public enum ErrorValue {
+    /**
+     * The status of setting the option is unknown. Servers should avoid using this value
+     * (send a NOT_FOUND error if the requested session is not known). Clients can retry
+     * the request.
+      */
+    UNSPECIFIED,
+    /**
+     * The given session option name is invalid.
+     */
+    INVALID_NAME,
+    /**
+     * The session option value or type is invalid.
+     */
+    INVALID_VALUE,
+    /**
+     * The session option cannot be set.
+     */
+    ERROR,
+    ;
+
+    static ErrorValue fromProtocol(Flight.SetSessionOptionsResult.ErrorValue s) {
+      return values()[s.getNumber()];
+    }
+
+    Flight.SetSessionOptionsResult.ErrorValue toProtocol() {
+      return Flight.SetSessionOptionsResult.ErrorValue.values()[ordinal()];
+    }
+  }
+
+  /** Per-option extensible error response container. */
+  public static class Error {
+    public ErrorValue value;
+
+    public Error(ErrorValue value) {
+      this.value = value;
+    }
+
+    Error(Flight.SetSessionOptionsResult.Error e) {
+      value = ErrorValue.fromProtocol(e.getValue());
+    }
+
+    Flight.SetSessionOptionsResult.Error toProtocol() {
+      Flight.SetSessionOptionsResult.Error.Builder b = Flight.SetSessionOptionsResult.Error.newBuilder();
+      b.setValue(value.toProtocol());
+      return b.build();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      Error that = (Error) o;
+      return value == that.value;
+    }
+
+    @Override
+    public int hashCode() {
+      return value.hashCode();
+    }
+  }
+
+  private final Map<String, Error> errors;
+
+  public SetSessionOptionsResult(Map<String, Error> errors) {
+    this.errors = Collections.unmodifiableMap(new HashMap<String, Error>(errors));
+  }
+
+  SetSessionOptionsResult(Flight.SetSessionOptionsResult proto) {
+    errors = Collections.unmodifiableMap(proto.getErrors().entrySet().stream().collect(
+        Collectors.toMap(Map.Entry::getKey, (e) -> new Error(e.getValue()))));
+  }
+
+  /** Report whether the error map has nonzero length. */
+  public boolean hasErrors() {
+    return errors.size() > 0;
+  }
+
+  /**
+   * Get the error status map from the result object.
+   *
+   * @return An immutable view of the error status map.
+   */
+  public Map<String, Error> getErrors() {
+    return errors;
+  }
+
+  Flight.SetSessionOptionsResult toProtocol() {
+    Flight.SetSessionOptionsResult.Builder b = Flight.SetSessionOptionsResult.newBuilder();
+    b.putAllErrors(errors.entrySet().stream().collect(Collectors.toMap(
+        Map.Entry::getKey,
+        (e) -> e.getValue().toProtocol())));
+    return b.build();
+  }
+
+  /**
+   * Get the serialized form of this protocol message.
+   *
+   * <p>Intended to help interoperability by allowing non-Flight services to still return Flight types.
+   */
+  public ByteBuffer serialize() {
+    return ByteBuffer.wrap(toProtocol().toByteArray());
+  }
+
+  /**
+   * Parse the serialized form of this protocol message.
+   *
+   * <p>Intended to help interoperability by allowing Flight clients to obtain stream info from non-Flight services.
+   *
+   * @param serialized The serialized form of the message, as returned by {@link #serialize()}.
+   * @return The deserialized message.
+   * @throws IOException if the serialized form is invalid.
+   */
+  public static SetSessionOptionsResult deserialize(ByteBuffer serialized) throws IOException {
+    return new SetSessionOptionsResult(Flight.SetSessionOptionsResult.parseFrom(serialized));
+  }
+}
diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml
index 944c624d630a2..905c8bdaf013b 100644
--- a/java/flight/flight-integration-tests/pom.xml
+++ b/java/flight/flight-integration-tests/pom.xml
@@ -45,6 +45,10 @@
             <groupId>com.google.protobuf</groupId>
             <artifactId>protobuf-java</artifactId>
         </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+        </dependency>
         <dependency>
             <groupId>commons-cli</groupId>
             <artifactId>commons-cli</artifactId>
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
index c61fd94a4d228..6878c22c5ccdc 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
@@ -50,6 +50,7 @@ private Scenarios() {
     scenarios.put("flight_sql", FlightSqlScenario::new);
     scenarios.put("flight_sql:extension", FlightSqlExtensionScenario::new);
     scenarios.put("app_metadata_flight_info_endpoint", AppMetadataFlightInfoEndpointScenario::new);
+    scenarios.put("session_options", SessionOptionsScenario::new);
   }
 
   private static Scenarios getInstance() {
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java
new file mode 100644
index 0000000000000..f29028547c452
--- /dev/null
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.integration.tests;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.CloseSessionRequest;
+import org.apache.arrow.flight.CloseSessionResult;
+import org.apache.arrow.flight.FlightRuntimeException;
+import org.apache.arrow.flight.FlightServerMiddleware;
+import org.apache.arrow.flight.GetSessionOptionsRequest;
+import org.apache.arrow.flight.GetSessionOptionsResult;
+import org.apache.arrow.flight.ServerSessionMiddleware;
+import org.apache.arrow.flight.SessionOptionValue;
+import org.apache.arrow.flight.SessionOptionValueFactory;
+import org.apache.arrow.flight.SetSessionOptionsRequest;
+import org.apache.arrow.flight.SetSessionOptionsResult;
+import org.apache.arrow.flight.sql.NoOpFlightSqlProducer;
+
+/** The server used for testing Sessions.
+ * <p>
+ * SetSessionOptions(), GetSessionOptions(), and CloseSession() operate on a
+ * simple SessionOptionValue store.
+ */
+final class SessionOptionsProducer extends NoOpFlightSqlProducer {
+  private static final SessionOptionValue invalidOptionValue =
+      SessionOptionValueFactory.makeSessionOptionValue("lol_invalid");
+  private final FlightServerMiddleware.Key<ServerSessionMiddleware> sessionMiddlewareKey;
+
+  SessionOptionsProducer(FlightServerMiddleware.Key<ServerSessionMiddleware> sessionMiddlewareKey) {
+    this.sessionMiddlewareKey = sessionMiddlewareKey;
+  }
+
+  @Override
+  public void setSessionOptions(SetSessionOptionsRequest request, CallContext context,
+                         StreamListener<SetSessionOptionsResult> listener) {
+    Map<String, SetSessionOptionsResult.Error> errors = new HashMap();
+
+    ServerSessionMiddleware middleware = context.getMiddleware(sessionMiddlewareKey);
+    ServerSessionMiddleware.Session session = middleware.getSession();
+    for (Map.Entry<String, SessionOptionValue> entry : request.getSessionOptions().entrySet()) {
+      // Blacklisted option name
+      if (entry.getKey().equals("lol_invalid")) {
+        errors.put(entry.getKey(),
+            new SetSessionOptionsResult.Error(SetSessionOptionsResult.ErrorValue.INVALID_NAME));
+        continue;
+      }
+      // Blacklisted option value
+      // Recommend using a visitor to check polymorphic equality, but this check is easy
+      if (entry.getValue().equals(invalidOptionValue)) {
+        errors.put(entry.getKey(),
+            new SetSessionOptionsResult.Error(SetSessionOptionsResult.ErrorValue.INVALID_VALUE));
+        continue;
+      }
+      // Business as usual:
+      if (entry.getValue().isEmpty()) {
+        session.eraseSessionOption(entry.getKey());
+        continue;
+      }
+      session.setSessionOption(entry.getKey(), entry.getValue());
+    }
+    listener.onNext(new SetSessionOptionsResult(errors));
+    listener.onCompleted();
+  }
+
+  @Override
+  public void getSessionOptions(GetSessionOptionsRequest request, CallContext context,
+                         StreamListener<GetSessionOptionsResult> listener) {
+    ServerSessionMiddleware middleware = context.getMiddleware(sessionMiddlewareKey);
+    if (!middleware.hasSession()) {
+      // Attempt to get options without an existing session
+      listener.onError(CallStatus.NOT_FOUND.withDescription("No current server session").toRuntimeException());
+      return;
+    }
+    final Map<String, SessionOptionValue> sessionOptions = middleware.getSession().getSessionOptions();
+    listener.onNext(new GetSessionOptionsResult(sessionOptions));
+    listener.onCompleted();
+  }
+
+  @Override
+  public void closeSession(CloseSessionRequest request, CallContext context,
+                    StreamListener<CloseSessionResult> listener) {
+    ServerSessionMiddleware middleware = context.getMiddleware(sessionMiddlewareKey);
+    try {
+      middleware.closeSession();
+    } catch (FlightRuntimeException fre) {
+      listener.onError(fre);
+      return;
+    }
+    listener.onNext(new CloseSessionResult(CloseSessionResult.Status.CLOSED));
+    listener.onCompleted();
+  }
+}
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsScenario.java
new file mode 100644
index 0000000000000..c150cfa6ef137
--- /dev/null
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsScenario.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.integration.tests;
+
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.FlightServerMiddleware;
+import org.apache.arrow.flight.GetSessionOptionsRequest;
+import org.apache.arrow.flight.GetSessionOptionsResult;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.ServerSessionMiddleware;
+import org.apache.arrow.flight.SessionOptionValue;
+import org.apache.arrow.flight.SessionOptionValueFactory;
+import org.apache.arrow.flight.SetSessionOptionsRequest;
+import org.apache.arrow.flight.SetSessionOptionsResult;
+import org.apache.arrow.flight.client.ClientCookieMiddleware;
+import org.apache.arrow.flight.sql.FlightSqlClient;
+import org.apache.arrow.memory.BufferAllocator;
+
+import com.google.common.collect.ImmutableMap;
+
+/**
+ * Scenario to exercise Session Options functionality.
+ */
+final class SessionOptionsScenario implements Scenario {
+  private final FlightServerMiddleware.Key<ServerSessionMiddleware> key =
+      FlightServerMiddleware.Key.of("sessionmiddleware");
+
+  @Override
+  public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception {
+    return new SessionOptionsProducer(key);
+  }
+
+  @Override
+  public void buildServer(FlightServer.Builder builder) {
+    AtomicInteger counter = new AtomicInteger(1000);
+    builder.middleware(key, new ServerSessionMiddleware.Factory(() -> String.valueOf(counter.getAndIncrement())));
+  }
+
+  @Override
+  public void client(BufferAllocator allocator, Location location, FlightClient ignored) throws Exception {
+    final ClientCookieMiddleware.Factory factory = new ClientCookieMiddleware.Factory();
+    try (final FlightClient flightClient = FlightClient.builder(allocator, location).intercept(factory).build()) {
+      final FlightSqlClient client = new FlightSqlClient(flightClient);
+
+      // Set
+      SetSessionOptionsRequest req1 = new SetSessionOptionsRequest(ImmutableMap.<String, SessionOptionValue>builder()
+          .put("foolong", SessionOptionValueFactory.makeSessionOptionValue(123L))
+          .put("bardouble", SessionOptionValueFactory.makeSessionOptionValue(456.0))
+          .put("lol_invalid", SessionOptionValueFactory.makeSessionOptionValue("this won't get set"))
+          .put("key_with_invalid_value", SessionOptionValueFactory.makeSessionOptionValue("lol_invalid"))
+          .put("big_ol_string_list", SessionOptionValueFactory.makeSessionOptionValue(
+              new String[]{"a", "b", "sea", "dee", " ", "  ", "geee", "(づ｡◕‿‿◕｡)づ"}))
+          .build());
+      SetSessionOptionsResult res1 = client.setSessionOptions(req1);
+      // Some errors
+      IntegrationAssertions.assertEquals(ImmutableMap.<String, SetSessionOptionsResult.Error>builder()
+            .put("lol_invalid", new SetSessionOptionsResult.Error(SetSessionOptionsResult.ErrorValue.INVALID_NAME))
+            .put("key_with_invalid_value", new SetSessionOptionsResult.Error(
+                SetSessionOptionsResult.ErrorValue.INVALID_VALUE))
+            .build(),
+          res1.getErrors());
+      // Some set, some omitted due to above errors
+      GetSessionOptionsResult res2 = client.getSessionOptions(new GetSessionOptionsRequest());
+      IntegrationAssertions.assertEquals(ImmutableMap.<String, SessionOptionValue>builder()
+            .put("foolong", SessionOptionValueFactory.makeSessionOptionValue(123L))
+            .put("bardouble", SessionOptionValueFactory.makeSessionOptionValue(456.0))
+            .put("big_ol_string_list", SessionOptionValueFactory.makeSessionOptionValue(
+                new String[]{"a", "b", "sea", "dee", " ", "  ", "geee", "(づ｡◕‿‿◕｡)づ"}))
+            .build(),
+          res2.getSessionOptions());
+      // Update
+      client.setSessionOptions(new SetSessionOptionsRequest(ImmutableMap.<String, SessionOptionValue>builder()
+          // Delete
+          .put("foolong", SessionOptionValueFactory.makeEmptySessionOptionValue())
+          // Update
+          .put("big_ol_string_list",
+              SessionOptionValueFactory.makeSessionOptionValue("a,b,sea,dee, ,  ,geee,(づ｡◕‿‿◕｡)づ"))
+          .build()));
+      GetSessionOptionsResult res4 = client.getSessionOptions(new GetSessionOptionsRequest());
+      IntegrationAssertions.assertEquals(ImmutableMap.<String, SessionOptionValue>builder()
+            .put("bardouble", SessionOptionValueFactory.makeSessionOptionValue(456.0))
+            .put("big_ol_string_list",
+                SessionOptionValueFactory.makeSessionOptionValue("a,b,sea,dee, ,  ,geee,(づ｡◕‿‿◕｡)づ"))
+            .build(),
+          res4.getSessionOptions());
+    }
+  }
+}
diff --git a/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java b/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
index 477a56055cbbc..f814427567ae9 100644
--- a/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
+++ b/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
@@ -83,6 +83,11 @@ void appMetadataFlightInfoEndpoint() throws Exception {
     testScenario("app_metadata_flight_info_endpoint");
   }
 
+  @Test
+  void sessionOptions() throws Exception {
+    testScenario("session_options");
+  }
+
   void testScenario(String scenarioName) throws Exception {
     try (final BufferAllocator allocator = new RootAllocator()) {
       final FlightServer.Builder builder = FlightServer.builder()
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CloseSessionResultListener.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CloseSessionResultListener.java
new file mode 100644
index 0000000000000..e1a5b369fe16c
--- /dev/null
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/CloseSessionResultListener.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.sql;
+
+import org.apache.arrow.flight.CloseSessionResult;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.Result;
+
+/** Typed StreamListener for closeSession. */
+public class CloseSessionResultListener implements FlightProducer.StreamListener<CloseSessionResult> {
+  private final FlightProducer.StreamListener<Result> listener;
+
+  CloseSessionResultListener(FlightProducer.StreamListener<Result> listener) {
+    this.listener = listener;
+  }
+
+  @Override
+  public void onNext(CloseSessionResult val) {
+    listener.onNext(new Result(val.serialize().array()));
+  }
+
+  @Override
+  public void onError(Throwable t) {
+    listener.onError(t);
+  }
+
+  @Override
+  public void onCompleted() {
+    listener.onCompleted();
+  }
+}
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java
index 93d933f00f38f..e6eb28fe317e1 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java
@@ -61,15 +61,21 @@
 import org.apache.arrow.flight.CallStatus;
 import org.apache.arrow.flight.CancelFlightInfoRequest;
 import org.apache.arrow.flight.CancelFlightInfoResult;
+import org.apache.arrow.flight.CloseSessionRequest;
+import org.apache.arrow.flight.CloseSessionResult;
 import org.apache.arrow.flight.FlightClient;
 import org.apache.arrow.flight.FlightDescriptor;
 import org.apache.arrow.flight.FlightEndpoint;
 import org.apache.arrow.flight.FlightInfo;
 import org.apache.arrow.flight.FlightStream;
+import org.apache.arrow.flight.GetSessionOptionsRequest;
+import org.apache.arrow.flight.GetSessionOptionsResult;
 import org.apache.arrow.flight.PutResult;
 import org.apache.arrow.flight.RenewFlightEndpointRequest;
 import org.apache.arrow.flight.Result;
 import org.apache.arrow.flight.SchemaResult;
+import org.apache.arrow.flight.SetSessionOptionsRequest;
+import org.apache.arrow.flight.SetSessionOptionsResult;
 import org.apache.arrow.flight.SyncPutListener;
 import org.apache.arrow.flight.Ticket;
 import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementResult;
@@ -917,6 +923,18 @@ public FlightEndpoint renewFlightEndpoint(RenewFlightEndpointRequest request, Ca
     return client.renewFlightEndpoint(request, options);
   }
 
+  public SetSessionOptionsResult setSessionOptions(SetSessionOptionsRequest request, CallOption... options) {
+    return client.setSessionOptions(request, options);
+  }
+
+  public GetSessionOptionsResult getSessionOptions(GetSessionOptionsRequest request, CallOption... options) {
+    return client.getSessionOptions(request, options);
+  }
+
+  public CloseSessionResult closeSession(CloseSessionRequest request, CallOption... options) {
+    return client.closeSession(request, options);
+  }
+
   @Override
   public void close() throws Exception {
     AutoCloseables.close(client);
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
index dbe39ab1d07b4..f06c1b868f4fd 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
@@ -56,16 +56,22 @@
 import org.apache.arrow.flight.CallStatus;
 import org.apache.arrow.flight.CancelFlightInfoRequest;
 import org.apache.arrow.flight.CancelStatus;
+import org.apache.arrow.flight.CloseSessionRequest;
+import org.apache.arrow.flight.CloseSessionResult;
 import org.apache.arrow.flight.FlightConstants;
 import org.apache.arrow.flight.FlightDescriptor;
 import org.apache.arrow.flight.FlightEndpoint;
 import org.apache.arrow.flight.FlightInfo;
 import org.apache.arrow.flight.FlightProducer;
 import org.apache.arrow.flight.FlightStream;
+import org.apache.arrow.flight.GetSessionOptionsRequest;
+import org.apache.arrow.flight.GetSessionOptionsResult;
 import org.apache.arrow.flight.PutResult;
 import org.apache.arrow.flight.RenewFlightEndpointRequest;
 import org.apache.arrow.flight.Result;
 import org.apache.arrow.flight.SchemaResult;
+import org.apache.arrow.flight.SetSessionOptionsRequest;
+import org.apache.arrow.flight.SetSessionOptionsResult;
 import org.apache.arrow.flight.Ticket;
 import org.apache.arrow.flight.sql.impl.FlightSql.ActionClosePreparedStatementRequest;
 import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementRequest;
@@ -383,6 +389,42 @@ default void doAction(CallContext context, Action action, StreamListener<Result>
         return;
       }
       renewFlightEndpoint(request, context, new FlightEndpointListener(listener));
+    } else if (actionType.equals(FlightConstants.SET_SESSION_OPTIONS.getType())) {
+      final SetSessionOptionsRequest request;
+      try {
+        request = SetSessionOptionsRequest.deserialize(ByteBuffer.wrap(action.getBody()));
+      } catch (IOException e) {
+        listener.onError(CallStatus.INTERNAL
+            .withDescription("Could not unpack SetSessionOptionsRequest: " + e)
+            .withCause(e)
+            .toRuntimeException());
+        return;
+      }
+      setSessionOptions(request, context, new SetSessionOptionsResultListener(listener));
+    } else if (actionType.equals(FlightConstants.GET_SESSION_OPTIONS.getType())) {
+      final GetSessionOptionsRequest request;
+      try {
+        request = GetSessionOptionsRequest.deserialize(ByteBuffer.wrap(action.getBody()));
+      } catch (IOException e) {
+        listener.onError(CallStatus.INTERNAL
+            .withDescription("Could not unpack GetSessionOptionsRequest: " + e)
+            .withCause(e)
+            .toRuntimeException());
+        return;
+      }
+      getSessionOptions(request, context, new GetSessionOptionsResultListener(listener));
+    } else if (actionType.equals(FlightConstants.CLOSE_SESSION.getType())) {
+      final CloseSessionRequest request;
+      try {
+        request = CloseSessionRequest.deserialize(ByteBuffer.wrap(action.getBody()));
+      } catch (IOException e) {
+        listener.onError(CallStatus.INTERNAL
+            .withDescription("Could not unpack CloseSessionRequest: " + e)
+            .withCause(e)
+            .toRuntimeException());
+        return;
+      }
+      closeSession(request, context, new CloseSessionResultListener(listener));
     } else {
       throw CallStatus.INVALID_ARGUMENT
           .withDescription("Unrecognized request: " + action.getType())
@@ -472,6 +514,43 @@ public void onCompleted() {
     });
   }
 
+  /**
+   * Set server session options(s).
+   *
+   * @param request The session options to set. For *DBC driver compatibility, servers
+   *                should support converting values from strings.
+   * @param context Per-call context.
+   * @param listener An interface for sending data back to the client.
+   */
+  default void setSessionOptions(SetSessionOptionsRequest request, CallContext context,
+                                StreamListener<SetSessionOptionsResult> listener) {
+    listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException());
+  }
+
+  /**
+   * Get server session option(s).
+   *
+   * @param request The (empty) GetSessionOptionsRequest.
+   * @param context Per-call context.
+   * @param listener An interface for sending data back to the client.
+   */
+  default void getSessionOptions(GetSessionOptionsRequest request, CallContext context,
+                                StreamListener<GetSessionOptionsResult> listener) {
+    listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException());
+  }
+
+  /**
+   * Close/invalidate the session.
+   *
+   * @param request The (empty) CloseSessionRequest.
+   * @param context Per-call context.
+   * @param listener An interface for sending data back to the client.
+   */
+  default void closeSession(CloseSessionRequest request, CallContext context,
+                                StreamListener<CloseSessionResult> listener) {
+    listener.onError(CallStatus.UNIMPLEMENTED.toRuntimeException());
+  }
+
   /**
    * Creates a prepared statement on the server and returns a handle and metadata for in a
    * {@link ActionCreatePreparedStatementResult} object in a {@link Result}
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/GetSessionOptionsResultListener.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/GetSessionOptionsResultListener.java
new file mode 100644
index 0000000000000..4fdffd076243c
--- /dev/null
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/GetSessionOptionsResultListener.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.sql;
+
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.GetSessionOptionsResult;
+import org.apache.arrow.flight.Result;
+
+/** Typed StreamListener for getSessionOptions. */
+public class GetSessionOptionsResultListener implements FlightProducer.StreamListener<GetSessionOptionsResult> {
+  private final FlightProducer.StreamListener<Result> listener;
+
+  GetSessionOptionsResultListener(FlightProducer.StreamListener<Result> listener) {
+    this.listener = listener;
+  }
+
+  @Override
+  public void onNext(GetSessionOptionsResult val) {
+    listener.onNext(new Result(val.serialize().array()));
+  }
+
+  @Override
+  public void onError(Throwable t) {
+    listener.onError(t);
+  }
+
+  @Override
+  public void onCompleted() {
+    listener.onCompleted();
+  }
+}
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SetSessionOptionsResultListener.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SetSessionOptionsResultListener.java
new file mode 100644
index 0000000000000..230be2bf1b316
--- /dev/null
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SetSessionOptionsResultListener.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.sql;
+
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.Result;
+import org.apache.arrow.flight.SetSessionOptionsResult;
+
+/** Typed StreamListener for setSessionOptions. */
+public class SetSessionOptionsResultListener implements FlightProducer.StreamListener<SetSessionOptionsResult> {
+  private final FlightProducer.StreamListener<Result> listener;
+
+  SetSessionOptionsResultListener(FlightProducer.StreamListener<Result> listener) {
+    this.listener = listener;
+  }
+
+  @Override
+  public void onNext(SetSessionOptionsResult val) {
+    listener.onNext(new Result(val.serialize().array()));
+  }
+
+  @Override
+  public void onError(Throwable t) {
+    listener.onError(t);
+  }
+
+  @Override
+  public void onCompleted() {
+    listener.onCompleted();
+  }
+}
diff --git a/testing b/testing
index 25d16511e8d42..ad82a736c170e 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 25d16511e8d42c2744a1d94d90169e3a36e92631
+Subproject commit ad82a736c170e97b7c8c035ebd8a801c17eec170

From a8b9537764144baf5da8e22dfcd4e9ff0c70ab2f Mon Sep 17 00:00:00 2001
From: h-vetinari <h.vetinari@gmx.com>
Date: Tue, 20 Feb 2024 15:49:16 +0100
Subject: [PATCH 390/570] GH-37931: [Python] Revert "GH-37803:
 [CI][Dev][Python] Release and merge script errors (#37819)" (#40150)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 79e49dbfb71efc70555417ba19cb612eb50924e8.

#37931 should have been fixed as of https://github.com/pypa/setuptools_scm/commit/056584b49f039f0913bd6ee5bb5a5befdb396dec in setuptools_scm 8.0.4; I tested that this works in https://github.com/conda-forge/arrow-cpp-feedstock/pull/1314.

CC @ AlenkaF @ raulcd
* Closes: #37931

Authored-by: H. Vetinari <h.vetinari@gmx.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 ci/conda_env_archery.txt                    | 2 +-
 ci/conda_env_crossbow.txt                   | 2 +-
 ci/conda_env_python.txt                     | 2 +-
 dev/archery/setup.py                        | 2 +-
 dev/tasks/conda-recipes/arrow-cpp/meta.yaml | 4 ++--
 python/pyproject.toml                       | 2 +-
 python/requirements-build.txt               | 2 +-
 python/requirements-wheel-build.txt         | 2 +-
 python/setup.py                             | 2 +-
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/ci/conda_env_archery.txt b/ci/conda_env_archery.txt
index 40875e0a55039..ace7a42acb026 100644
--- a/ci/conda_env_archery.txt
+++ b/ci/conda_env_archery.txt
@@ -25,7 +25,7 @@ jira
 pygit2
 pygithub
 ruamel.yaml
-setuptools_scm<8.0.0
+setuptools_scm
 toolz
 
 # benchmark
diff --git a/ci/conda_env_crossbow.txt b/ci/conda_env_crossbow.txt
index 59b799720f12b..347294650ca28 100644
--- a/ci/conda_env_crossbow.txt
+++ b/ci/conda_env_crossbow.txt
@@ -21,5 +21,5 @@ jinja2
 jira
 pygit2
 ruamel.yaml
-setuptools_scm<8.0.0
+setuptools_scm
 toolz
diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt
index 59e2def1bf339..19e94d7d3e5bd 100644
--- a/ci/conda_env_python.txt
+++ b/ci/conda_env_python.txt
@@ -27,4 +27,4 @@ pytest<8
 pytest-faulthandler
 s3fs>=2023.10.0
 setuptools
-setuptools_scm<8.0.0
+setuptools_scm
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index 2ecc72e04e8aa..02a8b34299b1f 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -30,7 +30,7 @@
 extras = {
     'benchmark': ['pandas'],
     'crossbow': ['github3.py', jinja_req, 'pygit2>=1.6.0', 'requests',
-                 'ruamel.yaml', 'setuptools_scm<8.0.0'],
+                 'ruamel.yaml', 'setuptools_scm'],
     'crossbow-upload': ['github3.py', jinja_req, 'ruamel.yaml',
                         'setuptools_scm'],
     'docker': ['ruamel.yaml', 'python-dotenv'],
diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
index 367445c595c4b..10ee9c28f8c78 100644
--- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
+++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml
@@ -240,7 +240,7 @@ outputs:
         - numpy
         - python
         - setuptools
-        - setuptools_scm <8.0.0
+        - setuptools_scm
       run:
         - {{ pin_subpackage('libarrow', exact=True) }}
         - {{ pin_compatible('numpy') }}
@@ -322,7 +322,7 @@ outputs:
         - numpy
         - python
         - setuptools
-        - setuptools_scm <8.0.0
+        - setuptools_scm
       run:
         - {{ pin_subpackage('pyarrow', exact=True) }}
         - python
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 9079618ad1c7d..1588e690a7247 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -24,7 +24,7 @@ requires = [
     # continue using oldest-support-numpy.
     "oldest-supported-numpy>=0.14; python_version<'3.9'",
     "numpy>=1.25; python_version>='3.9'",
-    "setuptools_scm < 8.0.0",
+    "setuptools_scm",
     "setuptools >= 40.1.0",
     "wheel"
 ]
diff --git a/python/requirements-build.txt b/python/requirements-build.txt
index e1372e807f88d..87dcc148ad161 100644
--- a/python/requirements-build.txt
+++ b/python/requirements-build.txt
@@ -1,5 +1,5 @@
 cython>=0.29.31
 oldest-supported-numpy>=0.14; python_version<'3.9'
 numpy>=1.25; python_version>='3.9'
-setuptools_scm<8.0.0
+setuptools_scm
 setuptools>=38.6.0
diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt
index 044f9de5f8214..9920a38a4e288 100644
--- a/python/requirements-wheel-build.txt
+++ b/python/requirements-wheel-build.txt
@@ -1,6 +1,6 @@
 cython>=0.29.31
 oldest-supported-numpy>=0.14; python_version<'3.9'
 numpy>=1.25; python_version>='3.9'
-setuptools_scm<8.0.0
+setuptools_scm
 setuptools>=58
 wheel
diff --git a/python/setup.py b/python/setup.py
index 098d75a3186af..423de708e8813 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -492,7 +492,7 @@ def has_ext_modules(foo):
                                  'pyarrow/_generated_version.py'),
         'version_scheme': guess_next_dev_version
     },
-    setup_requires=['setuptools_scm < 8.0.0', 'cython >= 0.29.31'] + setup_requires,
+    setup_requires=['setuptools_scm', 'cython >= 0.29.31'] + setup_requires,
     install_requires=install_requires,
     tests_require=['pytest', 'pandas', 'hypothesis'],
     python_requires='>=3.8',

From a690088193711447aa4d526f2257027f9a459efa Mon Sep 17 00:00:00 2001
From: wayne <wayne.warren.s@gmail.com>
Date: Tue, 20 Feb 2024 08:38:06 -0700
Subject: [PATCH 391/570] GH-40097: [Go][FlightRPC] Enable disabling TLS
 (#40098)

See https://github.com/apache/arrow/issues/40097 for more in-depth description
about the problem that led me to file this PR.

### Rationale for this change

Because it's annoying to not be able to connect to a non-TLS flightsql endpoint
in my development environment just because my development environment happens
to still use token authentication.

### What changes are included in this PR?

Thread the flightsql `DriverConfig.TLSEnabled` parameter into the
`grpcCredentials` type so that `grpcCredentials.RequireTransportSecurity` can
return false if TLS is not enabled on the driver config.

One thing that occurred to me about the `DriverConfig.TLSEnabled` field is that
its semantics seem very mildly dangerous since golang `bool` types are `false`
by default and golang doesn't require fields on structs to be explicitly
initialized. It seems to me that `DriverConfig.TLSDisabled` would be better (semantically speaking)
because then the API user doesn't have to explicitly enable TLS. But I suppose
it's probably undesirable to change the name of a public field on a public type.

### Are these changes tested?

I haven't written any tests, mostly because there weren't already any tests for
the `grpcCredentials` type but I have manually verified this fixes the problem
I described in https://github.com/apache/arrow/issues/40097 by rebuilding my
tool and running it against the non-TLS listening thing in my development
environment.

### Are there any user-facing changes?

* Closes: #40097

Authored-by: wayne warren <wayne.warren.s@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/flight/flightsql/driver/driver.go |  9 +++++----
 go/arrow/flight/flightsql/driver/utils.go  | 11 ++++++-----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/go/arrow/flight/flightsql/driver/driver.go b/go/arrow/flight/flightsql/driver/driver.go
index 852a97fb4d3ca..65068048ab3d8 100644
--- a/go/arrow/flight/flightsql/driver/driver.go
+++ b/go/arrow/flight/flightsql/driver/driver.go
@@ -364,10 +364,11 @@ func (c *Connector) Configure(config *DriverConfig) error {
 
 	// Set authentication credentials
 	rpcCreds := grpcCredentials{
-		username: config.Username,
-		password: config.Password,
-		token:    config.Token,
-		params:   config.Params,
+		username:   config.Username,
+		password:   config.Password,
+		token:      config.Token,
+		params:     config.Params,
+		tlsEnabled: config.TLSEnabled,
 	}
 	c.options = append(c.options, grpc.WithPerRPCCredentials(rpcCreds))
 
diff --git a/go/arrow/flight/flightsql/driver/utils.go b/go/arrow/flight/flightsql/driver/utils.go
index f7bd2a2e02113..a99c045e2ed02 100644
--- a/go/arrow/flight/flightsql/driver/utils.go
+++ b/go/arrow/flight/flightsql/driver/utils.go
@@ -27,10 +27,11 @@ import (
 
 // *** GRPC helpers ***
 type grpcCredentials struct {
-	username string
-	password string
-	token    string
-	params   map[string]string
+	username   string
+	password   string
+	token      string
+	params     map[string]string
+	tlsEnabled bool
 }
 
 func (g grpcCredentials) GetRequestMetadata(ctx context.Context, uri ...string) (map[string]string, error) {
@@ -53,7 +54,7 @@ func (g grpcCredentials) GetRequestMetadata(ctx context.Context, uri ...string)
 }
 
 func (g grpcCredentials) RequireTransportSecurity() bool {
-	return g.token != "" || g.username != ""
+	return g.tlsEnabled && (g.token != "" || g.username != "")
 }
 
 // *** Type conversions ***

From 47f15b07080d62cd912bfbfd5d067cf70dfe6960 Mon Sep 17 00:00:00 2001
From: Yan Zhou <zhouyan@users.noreply.github.com>
Date: Wed, 21 Feb 2024 00:27:23 +0800
Subject: [PATCH 392/570] GH-40113 [Go][Parquet] New RegisterCodec function
 (#40114)

This is to allow addition/overwrite of custom codec implementation

This allows other modules to provide alternative implementations for the compression algorithms, such as using libdeflate for Gzip, or CGO version of ZSTD.

In addition, it allows others to supply codecs that cannot be easily supported by this library such as LZO due to license reasons or LZ4.

### Rationale for this change

See #40113

### What changes are included in this PR?

A new RegisterCodec function added

### Are these changes tested?

yes

### Are there any user-facing changes?

It's an addition more targeted towards library writers.

* Closes: #40113

Authored-by: Yan Zhou <zhouyan@me.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/parquet/compress/brotli.go   |  2 +-
 go/parquet/compress/compress.go | 20 ++++++++++++++++++++
 go/parquet/compress/gzip.go     |  2 +-
 go/parquet/compress/snappy.go   |  2 +-
 go/parquet/compress/zstd.go     |  2 +-
 5 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/go/parquet/compress/brotli.go b/go/parquet/compress/brotli.go
index 8a7e92a1403c3..3b1575a70cfc8 100644
--- a/go/parquet/compress/brotli.go
+++ b/go/parquet/compress/brotli.go
@@ -110,5 +110,5 @@ func (brotliCodec) NewWriterLevel(w io.Writer, level int) (io.WriteCloser, error
 }
 
 func init() {
-	codecs[Codecs.Brotli] = brotliCodec{}
+	RegisterCodec(Codecs.Brotli, brotliCodec{})
 }
diff --git a/go/parquet/compress/compress.go b/go/parquet/compress/compress.go
index dc45b6ee9311f..2798defca9444 100644
--- a/go/parquet/compress/compress.go
+++ b/go/parquet/compress/compress.go
@@ -92,6 +92,26 @@ type Codec interface {
 
 var codecs = map[Compression]Codec{}
 
+// RegisterCodec adds or overrides a codec implementation for a given compression algorithm.
+// The intended use case is within the init() section of a package. For example,
+//
+//	// inside a custom codec package, say czstd
+//
+//	func init() {
+//	    RegisterCodec(compress.Codecs.Zstd, czstdCodec{})
+//	}
+//
+//	type czstdCodec struct{} // implementing Codec interface using CGO based ZSTD wrapper
+//
+// And user of the custom codec can import the above package like below,
+//
+//	package main
+//
+//	import _ "package/path/to/czstd"
+func RegisterCodec(compression Compression, codec Codec) {
+	codecs[compression] = codec
+}
+
 type nocodec struct{}
 
 func (nocodec) NewReader(r io.Reader) io.ReadCloser {
diff --git a/go/parquet/compress/gzip.go b/go/parquet/compress/gzip.go
index 31f1729e9b3af..4b43f8e906599 100644
--- a/go/parquet/compress/gzip.go
+++ b/go/parquet/compress/gzip.go
@@ -93,5 +93,5 @@ func (gzipCodec) NewWriterLevel(w io.Writer, level int) (io.WriteCloser, error)
 }
 
 func init() {
-	codecs[Codecs.Gzip] = gzipCodec{}
+	RegisterCodec(Codecs.Gzip, gzipCodec{})
 }
diff --git a/go/parquet/compress/snappy.go b/go/parquet/compress/snappy.go
index b7fa1142c3a6c..5c82a2c8dc33e 100644
--- a/go/parquet/compress/snappy.go
+++ b/go/parquet/compress/snappy.go
@@ -57,5 +57,5 @@ func (s snappyCodec) NewWriterLevel(w io.Writer, _ int) (io.WriteCloser, error)
 }
 
 func init() {
-	codecs[Codecs.Snappy] = snappyCodec{}
+	RegisterCodec(Codecs.Snappy, snappyCodec{})
 }
diff --git a/go/parquet/compress/zstd.go b/go/parquet/compress/zstd.go
index 02ffd2eae568a..be3fb507262d4 100644
--- a/go/parquet/compress/zstd.go
+++ b/go/parquet/compress/zstd.go
@@ -108,5 +108,5 @@ func (zstdCodec) CompressBound(len int64) int64 {
 }
 
 func init() {
-	codecs[Codecs.Zstd] = zstdCodec{}
+	RegisterCodec(Codecs.Zstd, zstdCodec{})
 }

From 09889330a296f6767734b42381693e5602419f36 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 20 Feb 2024 18:37:19 +0100
Subject: [PATCH 393/570] MINOR: [Dev][Archery] Reinstate version constraint on
 setuptools_scm for comment bot (#40162)

The comment bot depends on an internal setuptools_scm API that was changed in setuptools_scm 8.
We therefore need to reinstate the Archery version constraint that was removed in https://github.com/apache/arrow/pull/40150

See example failure at https://github.com/apache/arrow/actions/runs/7976567301/job/21777437575

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 dev/archery/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index 02a8b34299b1f..2ecc72e04e8aa 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -30,7 +30,7 @@
 extras = {
     'benchmark': ['pandas'],
     'crossbow': ['github3.py', jinja_req, 'pygit2>=1.6.0', 'requests',
-                 'ruamel.yaml', 'setuptools_scm'],
+                 'ruamel.yaml', 'setuptools_scm<8.0.0'],
     'crossbow-upload': ['github3.py', jinja_req, 'ruamel.yaml',
                         'setuptools_scm'],
     'docker': ['ruamel.yaml', 'python-dotenv'],

From 29d2b168a5c43c2cc0bed65d93d08d83fc1ca80f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Tue, 20 Feb 2024 22:01:59 +0100
Subject: [PATCH 394/570] GH-40153: [Python] Fix OverflowError in
 foreign_buffer on 32-bit platforms (#40158)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use `uintptr_t` rather than `intptr_t` to fix `OverflowError`, visible e.g. when running `tests/interchange/test_conversion.py` tests on 32-bit platforms.

### Rationale for this change

This fixes the `OverflowError`s from #40153, and makes `pyarrow/tests/interchange/` all pass on 32-bit x86.

### What changes are included in this PR?

- change the type used to store pointer from `intptr_t` to `uintptr_t` to provide coverage for pointers above `0x80000000`.

### Are these changes tested?

These changes are covered by the tests in `pyarrow/tests/interchange`.

### Are there any user-facing changes?

It fixes `OverflowError` that can be triggered by working with pandas data types, possibly more (though I'm not sure if this qualifies as a "crash").

* Closes: #40153

Authored-by: Michał Górny <mgorny@gentoo.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/io.pxi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 1897e76efc2a0..b57980b3d68fd 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -1987,7 +1987,7 @@ def foreign_buffer(address, size, base=None):
         Object that owns the referenced memory.
     """
     cdef:
-        intptr_t c_addr = address
+        uintptr_t c_addr = address
         int64_t c_size = size
         shared_ptr[CBuffer] buf
 

From 11ef68d7dc2e15c81dfc75f4304070021ad42a1e Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 20 Feb 2024 22:08:58 +0100
Subject: [PATCH 395/570] GH-40159: [Python][CI] Add 32-bit Debian build on
 Crossbow (#40164)

### What changes are included in this PR?

Add a Debian-based i386 test build for Python, similar to the existing one for C++.

### Are these changes tested?

Yes. The test suite step in the new build will fail until GH-40153 is entirely fixed.

### Are there any user-facing changes?

No.
* Closes: #40159

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 dev/tasks/tasks.yml | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index c2321453052dc..cfc333c6b22f5 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1192,7 +1192,7 @@ tasks:
         PYTHON: "3.10"
       image: conda-python-cython2
 
-  test-debian-11-python-3:
+  test-debian-11-python-3-amd64:
     ci: azure
     template: docker-tests/azure.linux.yml
     params:
@@ -1200,6 +1200,16 @@ tasks:
         DEBIAN: 11
       image: debian-python
 
+  test-debian-11-python-3-i386:
+    ci: github
+    template: docker-tests/github.linux.yml
+    params:
+      env:
+        ARCH: i386
+        DEBIAN: 11
+      flags: "-e ARROW_S3=OFF -e ARROW_GANDIVA=OFF"
+      image: debian-python
+
   test-ubuntu-20.04-python-3:
     ci: azure
     template: docker-tests/azure.linux.yml

From aa6b39859261ab2f116d4d971127c53a8a5be2a5 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 20 Feb 2024 19:01:25 -0500
Subject: [PATCH 396/570] GH-38573: [Java][FlightRPC] Try all locations in JDBC
 driver (#40104)

### Rationale for this change

This brings the JDBC driver up to par with other Flight SQL clients.

### What changes are included in this PR?

Try multiple locations for the Flight SQL driver.

### Are these changes tested?

Yes

### Are there any user-facing changes?

No

* Closes: #38573

Authored-by: David Li <li.davidm96@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../org/apache/arrow/flight/FlightClient.java |  3 +-
 .../client/ArrowFlightSqlClientHandler.java   | 53 +++++++----
 .../jdbc/utils/FlightEndpointDataQueue.java   |  9 +-
 .../arrow/driver/jdbc/ResultSetTest.java      | 91 ++++++++++++++++++-
 4 files changed, 137 insertions(+), 19 deletions(-)

diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
index 980a762e397f9..49f9af4ebfbb7 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
@@ -909,7 +909,8 @@ public FlightClient build() {
 
       builder
           .maxTraceEvents(MAX_CHANNEL_TRACE_EVENTS)
-          .maxInboundMessageSize(maxInboundMessageSize);
+          .maxInboundMessageSize(maxInboundMessageSize)
+          .maxInboundMetadataSize(maxInboundMessageSize);
       return new FlightClient(allocator, builder.build(), middleware);
     }
   }
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
index 234820bd41823..1b03f927d7fc6 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
@@ -116,26 +116,47 @@ public List<CloseableEndpointStreamPair> getStreams(final FlightInfo flightInfo)
               sqlClient.getStream(endpoint.getTicket(), getOptions()), null));
         } else {
           // Clone the builder and then set the new endpoint on it.
-          // GH-38573: This code currently only tries the first Location and treats a failure as fatal.
-          // This should be changed to try other Locations that are available.
-          
+
           // GH-38574: Currently a new FlightClient will be made for each partition that returns a non-empty Location
           // then disposed of. It may be better to cache clients because a server may report the same Locations.
           // It would also be good to identify when the reported location is the same as the original connection's
           // Location and skip creating a FlightClient in that scenario.
-          final URI endpointUri = endpoint.getLocations().get(0).getUri();
-          final Builder builderForEndpoint = new Builder(ArrowFlightSqlClientHandler.this.builder)
-              .withHost(endpointUri.getHost())
-              .withPort(endpointUri.getPort())
-              .withEncryption(endpointUri.getScheme().equals(LocationSchemes.GRPC_TLS));
-
-          final ArrowFlightSqlClientHandler endpointHandler = builderForEndpoint.build();
-          try {
-            endpoints.add(new CloseableEndpointStreamPair(
-                endpointHandler.sqlClient.getStream(endpoint.getTicket(),
-                    endpointHandler.getOptions()), endpointHandler.sqlClient));
-          } catch (Exception ex) {
-            AutoCloseables.close(endpointHandler);
+          List<Exception> exceptions = new ArrayList<>();
+          CloseableEndpointStreamPair stream = null;
+          for (Location location : endpoint.getLocations()) {
+            final URI endpointUri = location.getUri();
+            final Builder builderForEndpoint = new Builder(ArrowFlightSqlClientHandler.this.builder)
+                    .withHost(endpointUri.getHost())
+                    .withPort(endpointUri.getPort())
+                    .withEncryption(endpointUri.getScheme().equals(LocationSchemes.GRPC_TLS));
+
+            ArrowFlightSqlClientHandler endpointHandler = null;
+            try {
+              endpointHandler = builderForEndpoint.build();
+              stream = new CloseableEndpointStreamPair(
+                      endpointHandler.sqlClient.getStream(endpoint.getTicket(),
+                              endpointHandler.getOptions()), endpointHandler.sqlClient);
+              // Make sure we actually get data from the server
+              stream.getStream().getSchema();
+            } catch (Exception ex) {
+              if (endpointHandler != null) {
+                AutoCloseables.close(endpointHandler);
+              }
+              exceptions.add(ex);
+              continue;
+            }
+            break;
+          }
+          if (stream != null) {
+            endpoints.add(stream);
+          } else if (exceptions.isEmpty()) {
+            // This should never happen...
+            throw new IllegalStateException("Could not connect to endpoint and no errors occurred");
+          } else {
+            Exception ex = exceptions.remove(0);
+            while (!exceptions.isEmpty()) {
+              ex.addSuppressed(exceptions.remove(exceptions.size() - 1));
+            }
             throw ex;
           }
         }
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/FlightEndpointDataQueue.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/FlightEndpointDataQueue.java
index 1198d89c40aef..d617026c682d2 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/FlightEndpointDataQueue.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/FlightEndpointDataQueue.java
@@ -108,7 +108,14 @@ private CloseableEndpointStreamPair next(final EndpointStreamSupplier endpointSt
         if (endpoint != null) {
           return endpoint;
         }
-      } catch (final ExecutionException | InterruptedException | CancellationException e) {
+      } catch (final ExecutionException e) {
+        // Unwrap one layer
+        final Throwable cause = e.getCause();
+        if (cause instanceof FlightRuntimeException) {
+          throw (FlightRuntimeException) cause;
+        }
+        throw AvaticaConnection.HELPER.wrap(e.getMessage(), e);
+      } catch (InterruptedException | CancellationException e) {
         throw AvaticaConnection.HELPER.wrap(e.getMessage(), e);
       }
     }
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java
index 0e3e015a04636..680803318e3a2 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java
@@ -39,6 +39,7 @@
 import java.sql.Statement;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Random;
@@ -49,7 +50,10 @@
 import org.apache.arrow.driver.jdbc.utils.PartitionedFlightSqlProducer;
 import org.apache.arrow.flight.FlightEndpoint;
 import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.FlightRuntimeException;
 import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.FlightStatusCode;
+import org.apache.arrow.flight.Location;
 import org.apache.arrow.flight.Ticket;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@@ -63,6 +67,7 @@
 import org.junit.ClassRule;
 import org.junit.Rule;
 import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
 import org.junit.rules.ErrorCollector;
 
 import com.google.common.collect.ImmutableSet;
@@ -351,7 +356,7 @@ public void testShouldInterruptFlightStreamsIfQueryIsCancelledMidProcessingForTi
               .toString(),
           anyOf(is(format("Error while executing SQL \"%s\": Query canceled", query)),
               allOf(containsString(format("Error while executing SQL \"%s\"", query)),
-                  containsString("CANCELLED"))));
+                  anyOf(containsString("CANCELLED"), containsString("Cancelling")))));
     }
   }
 
@@ -455,6 +460,90 @@ allocator, forGrpcInsecure("localhost", 0), rootProducer)
     }
   }
 
+  @Test
+  public void testPartitionedFlightServerIgnoreFailure() throws Exception {
+    final Schema schema = new Schema(
+            Collections.singletonList(Field.nullablePrimitive("int_column", new ArrowType.Int(32, true))));
+    try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE)) {
+      final FlightEndpoint firstEndpoint =
+              new FlightEndpoint(new Ticket("first".getBytes(StandardCharsets.UTF_8)),
+                      Location.forGrpcInsecure("127.0.0.2", 1234),
+                      Location.forGrpcInsecure("127.0.0.3", 1234));
+
+      try (final PartitionedFlightSqlProducer rootProducer = new PartitionedFlightSqlProducer(
+              schema, firstEndpoint);
+           FlightServer rootServer = FlightServer.builder(
+                           allocator, forGrpcInsecure("localhost", 0), rootProducer)
+                   .build()
+                   .start();
+           Connection newConnection = DriverManager.getConnection(String.format(
+                   "jdbc:arrow-flight-sql://%s:%d/?useEncryption=false",
+                   rootServer.getLocation().getUri().getHost(), rootServer.getPort()));
+           Statement newStatement = newConnection.createStatement()) {
+        final SQLException e = Assertions.assertThrows(SQLException.class, () -> {
+          ResultSet result = newStatement.executeQuery("Select partitioned_data");
+          while (result.next()) {
+          }
+        });
+        final Throwable cause = e.getCause();
+        Assertions.assertTrue(cause instanceof FlightRuntimeException);
+        final FlightRuntimeException fre = (FlightRuntimeException) cause;
+        Assertions.assertEquals(FlightStatusCode.UNAVAILABLE, fre.status().code());
+      }
+    }
+  }
+
+  @Test
+  public void testPartitionedFlightServerAllFailure() throws Exception {
+    // Arrange
+    final Schema schema = new Schema(
+            Collections.singletonList(Field.nullablePrimitive("int_column", new ArrowType.Int(32, true))));
+    try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+         VectorSchemaRoot firstPartition = VectorSchemaRoot.create(schema, allocator)) {
+      firstPartition.setRowCount(1);
+      ((IntVector) firstPartition.getVector(0)).set(0, 1);
+
+      // Construct the data-only nodes first.
+      FlightProducer firstProducer = new PartitionedFlightSqlProducer.DataOnlyFlightSqlProducer(
+              new Ticket("first".getBytes(StandardCharsets.UTF_8)), firstPartition);
+
+      final FlightServer.Builder firstBuilder = FlightServer.builder(
+              allocator, forGrpcInsecure("localhost", 0), firstProducer);
+
+      // Run the data-only nodes so that we can get the Locations they are running at.
+      try (FlightServer firstServer = firstBuilder.build()) {
+        firstServer.start();
+        final Location badLocation = Location.forGrpcInsecure("127.0.0.2", 1234);
+        final FlightEndpoint firstEndpoint =
+                new FlightEndpoint(new Ticket("first".getBytes(StandardCharsets.UTF_8)),
+                        badLocation, firstServer.getLocation());
+
+        // Finally start the root node.
+        try (final PartitionedFlightSqlProducer rootProducer = new PartitionedFlightSqlProducer(
+                schema, firstEndpoint);
+             FlightServer rootServer = FlightServer.builder(
+                             allocator, forGrpcInsecure("localhost", 0), rootProducer)
+                     .build()
+                     .start();
+             Connection newConnection = DriverManager.getConnection(String.format(
+                     "jdbc:arrow-flight-sql://%s:%d/?useEncryption=false",
+                     rootServer.getLocation().getUri().getHost(), rootServer.getPort()));
+             Statement newStatement = newConnection.createStatement();
+             // Act
+             ResultSet result = newStatement.executeQuery("Select partitioned_data")) {
+          List<Integer> resultData = new ArrayList<>();
+          while (result.next()) {
+            resultData.add(result.getInt(1));
+          }
+
+          // Assert
+          assertEquals(firstPartition.getRowCount(), resultData.size());
+          assertTrue(resultData.contains(((IntVector) firstPartition.getVector(0)).get(0)));
+        }
+      }
+    }
+  }
+
   @Test
   public void testShouldRunSelectQueryWithEmptyVectorsEmbedded() throws Exception {
     try (Statement statement = connection.createStatement();

From a2d072929fea956a77775671e2eaf20f9d0ed5fa Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Tue, 20 Feb 2024 22:04:21 -0300
Subject: [PATCH 397/570] GH-40074: [C++][FS][Azure] Implement `DeleteFile()`
 for flat-namespace storage accounts (#40075)

### Rationale for this change

It was not implemented yet.

### What changes are included in this PR?

 - An implementation of `DeleteFile()` that is specialized to storage accounts that don't have HNS support enabled
 - This fixes a semantic issue: deleting a file should not delete the parent directory when the file deleted was the last one
 - Increased test coverage
 - Fix of a bug in the version that deletes files in HNS-enabled accounts (we shouldn't let `DeleteFile` delete directories even if they are empty)

### Are these changes tested?

Yes. Tests were re-written and moved to `TestAzureFileSystemOnAllScenarios`.
* Closes: #40074

Lead-authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Co-authored-by: jerry.adair <Jerry.Adair@sas.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 180 +++++++++++++++++++---
 cpp/src/arrow/filesystem/azurefs_test.cc | 184 ++++++++++++++++-------
 2 files changed, 283 insertions(+), 81 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 23af67a33d688..de7cdba245ada 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -1085,7 +1085,11 @@ class LeaseGuard {
     return Status::OK();
   }
 
-  /// \brief Break the lease before deleting or renaming the resource.
+  /// \brief Break the lease before deleting or renaming the resource via the
+  /// DataLakeFileSystemClient API.
+  ///
+  /// NOTE: When using the Blobs API, this is not necessary -- you can release a
+  /// lease on a path after it's deleted with a lease on it.
   ///
   /// Calling this is recommended when the resource for which the lease was acquired is
   /// about to be deleted as there is no way of releasing the lease after that, we can
@@ -1926,26 +1930,6 @@ class AzureFileSystem::Impl {
     }
   }
 
-  Status DeleteFile(const AzureLocation& location) {
-    RETURN_NOT_OK(ValidateFileLocation(location));
-    auto file_client = datalake_service_client_->GetFileSystemClient(location.container)
-                           .GetFileClient(location.path);
-    try {
-      auto response = file_client.Delete();
-      // Only the "*IfExists" functions ever set Deleted to false.
-      // All the others either succeed or throw an exception.
-      DCHECK(response.Value.Deleted);
-    } catch (const Storage::StorageException& exception) {
-      if (exception.ErrorCode == "FilesystemNotFound" ||
-          exception.ErrorCode == "PathNotFound") {
-        return PathNotFound(location);
-      }
-      return ExceptionToStatus(exception, "Failed to delete a file: ", location.path,
-                               ": ", file_client.GetUrl());
-    }
-    return Status::OK();
-  }
-
  private:
   /// \brief Create a BlobLeaseClient and acquire a lease on the container.
   ///
@@ -1994,7 +1978,7 @@ class AzureFileSystem::Impl {
   /// optional (nullptr denotes blob not found)
   Result<std::unique_ptr<Blobs::BlobLeaseClient>> AcquireBlobLease(
       const AzureLocation& location, std::chrono::seconds lease_duration,
-      bool allow_missing = false, bool retry_allowed = true) {
+      bool allow_missing, bool retry_allowed = true) {
     DCHECK(!location.container.empty() && !location.path.empty());
     auto path = std::string{internal::RemoveTrailingSlash(location.path)};
     auto blob_client = GetBlobClient(location.container, std::move(path));
@@ -2057,6 +2041,131 @@ class AzureFileSystem::Impl {
   static constexpr auto kTimeNeededForFileOrDirectoryRename = std::chrono::seconds{3};
 
  public:
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  Status DeleteFileOnFileSystem(const DataLake::DataLakeFileSystemClient& adlfs_client,
+                                const AzureLocation& location,
+                                bool require_file_to_exist) {
+    DCHECK(!location.container.empty());
+    DCHECK(!location.path.empty());
+    auto path_no_trailing_slash =
+        std::string{internal::RemoveTrailingSlash(location.path)};
+    auto file_client = adlfs_client.GetFileClient(path_no_trailing_slash);
+    try {
+      // This is necessary to avoid deletion of directories via DeleteFile.
+      auto properties = file_client.GetProperties();
+      if (properties.Value.IsDirectory) {
+        return internal::NotAFile(location.all);
+      }
+      if (internal::HasTrailingSlash(location.path)) {
+        return internal::NotADir(location.all);
+      }
+      auto response = file_client.Delete();
+      // Only the "*IfExists" functions ever set Deleted to false.
+      // All the others either succeed or throw an exception.
+      DCHECK(response.Value.Deleted);
+    } catch (const Storage::StorageException& exception) {
+      if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
+        // ErrorCode can be "FilesystemNotFound", "PathNotFound"...
+        if (require_file_to_exist) {
+          return PathNotFound(location);
+        }
+        return Status::OK();
+      }
+      return ExceptionToStatus(exception, "Failed to delete a file: ", location.path,
+                               ": ", file_client.GetUrl());
+    }
+    return Status::OK();
+  }
+
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  Status DeleteFileOnContainer(const Blobs::BlobContainerClient& container_client,
+                               const AzureLocation& location, bool require_file_to_exist,
+                               const char* operation) {
+    DCHECK(!location.container.empty());
+    DCHECK(!location.path.empty());
+    constexpr auto kFileBlobLeaseTime = std::chrono::seconds{15};
+
+    // When it's known that the blob doesn't exist as a file, check if it exists as a
+    // directory to generate the appropriate error message.
+    auto check_if_location_exists_as_dir = [&]() -> Status {
+      auto no_trailing_slash = location;
+      no_trailing_slash.path = internal::RemoveTrailingSlash(location.path);
+      no_trailing_slash.all = internal::RemoveTrailingSlash(location.all);
+      ARROW_ASSIGN_OR_RAISE(auto file_info,
+                            GetFileInfo(container_client, no_trailing_slash));
+      if (file_info.type() == FileType::NotFound) {
+        return require_file_to_exist ? PathNotFound(location) : Status::OK();
+      }
+      if (file_info.type() == FileType::Directory) {
+        return internal::NotAFile(location.all);
+      }
+      return internal::HasTrailingSlash(location.path) ? internal::NotADir(location.all)
+                                                       : internal::NotAFile(location.all);
+    };
+
+    // Paths ending with trailing slashes are never leading to a deletion,
+    // but the correct error message requires a check of the path.
+    if (internal::HasTrailingSlash(location.path)) {
+      return check_if_location_exists_as_dir();
+    }
+
+    // If the parent directory of a file is not the container itself, there is a
+    // risk that deleting the file also deletes the *implied directory* -- the
+    // directory that is implied by the existence of the file path.
+    //
+    // In this case, we must ensure that the deletion is not semantically
+    // equivalent to also deleting the directory. This is done by ensuring the
+    // directory marker blob exists before the file is deleted.
+    std::optional<LeaseGuard> file_blob_lease_guard;
+    const auto parent = location.parent();
+    if (!parent.path.empty()) {
+      // We have to check the existence of the file before checking the
+      // existence of the parent directory marker, so we acquire a lease on the
+      // file first.
+      ARROW_ASSIGN_OR_RAISE(auto file_blob_lease_client,
+                            AcquireBlobLease(location, kFileBlobLeaseTime,
+                                             /*allow_missing=*/true));
+      if (file_blob_lease_client) {
+        file_blob_lease_guard.emplace(std::move(file_blob_lease_client),
+                                      kFileBlobLeaseTime);
+        // Ensure the empty directory marker blob of the parent exists before the file is
+        // deleted.
+        //
+        // There is not need to hold a lease on the directory marker because if
+        // a concurrent client deletes the directory marker right after we
+        // create it, the file deletion itself won't be the cause of the directory
+        // deletion. Additionally, the fact that a lease is held on the blob path
+        // semantically preserves the directory -- its existence is implied
+        // until the blob representing the file is deleted -- even if another
+        // client deletes the directory marker.
+        RETURN_NOT_OK(EnsureEmptyDirExists(container_client, parent, operation));
+      } else {
+        return check_if_location_exists_as_dir();
+      }
+    }
+
+    auto blob_client = container_client.GetBlobClient(location.path);
+    Blobs::DeleteBlobOptions options;
+    if (file_blob_lease_guard) {
+      options.AccessConditions.LeaseId = file_blob_lease_guard->LeaseId();
+    }
+    try {
+      auto response = blob_client.Delete(options);
+      // Only the "*IfExists" functions ever set Deleted to false.
+      // All the others either succeed or throw an exception.
+      DCHECK(response.Value.Deleted);
+    } catch (const Storage::StorageException& exception) {
+      if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
+        return check_if_location_exists_as_dir();
+      }
+      return ExceptionToStatus(exception, "Failed to delete a file: ", location.all, ": ",
+                               blob_client.GetUrl());
+    }
+    return Status::OK();
+  }
+
   /// The conditions for a successful container rename are derived from the
   /// conditions for a successful `Move("/$src.container", "/$dest.container")`.
   /// The numbers here match the list in `Move`.
@@ -2238,7 +2347,8 @@ class AzureFileSystem::Impl {
     const auto dest_path = std::string{internal::RemoveTrailingSlash(dest.path)};
 
     // Ensure that src exists and, if path has a trailing slash, that it's a directory.
-    ARROW_ASSIGN_OR_RAISE(auto src_lease_client, AcquireBlobLease(src, kLeaseDuration));
+    ARROW_ASSIGN_OR_RAISE(auto src_lease_client,
+                          AcquireBlobLease(src, kLeaseDuration, /*allow_missing=*/false));
     LeaseGuard src_lease_guard{std::move(src_lease_client), kLeaseDuration};
     // It might be necessary to check src is a directory 0-3 times in this function,
     // so we use a lazy evaluation function to avoid redundant calls to GetFileInfo().
@@ -2551,7 +2661,29 @@ Status AzureFileSystem::DeleteRootDirContents() {
 
 Status AzureFileSystem::DeleteFile(const std::string& path) {
   ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path));
-  return impl_->DeleteFile(location);
+  if (location.container.empty()) {
+    return Status::Invalid("DeleteFile requires a non-empty path.");
+  }
+  auto container_client = impl_->GetBlobContainerClient(location.container);
+  if (location.path.empty()) {
+    // Container paths (locations w/o path) are either not found or represent directories.
+    ARROW_ASSIGN_OR_RAISE(auto container_info,
+                          GetContainerPropsAsFileInfo(location, container_client));
+    return container_info.IsDirectory() ? NotAFile(location) : PathNotFound(location);
+  }
+  auto adlfs_client = impl_->GetFileSystemClient(location.container);
+  ARROW_ASSIGN_OR_RAISE(auto hns_support,
+                        impl_->HierarchicalNamespaceSupport(adlfs_client));
+  if (hns_support == HNSSupport::kContainerNotFound) {
+    return PathNotFound(location);
+  }
+  if (hns_support == HNSSupport::kEnabled) {
+    return impl_->DeleteFileOnFileSystem(adlfs_client, location,
+                                         /*require_file_to_exist=*/true);
+  }
+  return impl_->DeleteFileOnContainer(container_client, location,
+                                      /*require_file_to_exist=*/true,
+                                      /*operation=*/"DeleteFile");
 }
 
 Status AzureFileSystem::Move(const std::string& src, const std::string& dest) {
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index e6bd80d1d2508..7f5cd247a8d35 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -641,6 +641,18 @@ class TestAzureFileSystem : public ::testing::Test {
 #endif
   }
 
+  static bool WithErrno(const Status& status, int expected_errno) {
+    auto* detail = status.detail().get();
+    return detail &&
+           arrow::internal::ErrnoFromStatusDetail(*detail).value_or(-1) == expected_errno;
+  }
+
+#define ASSERT_RAISES_ERRNO(expr, expected_errno)                                     \
+  for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr));              \
+       !WithErrno(_st, (expected_errno));)                                            \
+  FAIL() << "'" ARROW_STRINGIFY(expr) "' did not fail with errno=" << #expected_errno \
+         << ": " << _st.ToString()
+
   // Tests that are called from more than one implementation of TestAzureFileSystem
 
   void TestDetectHierarchicalNamespace(bool trip_up_azurite);
@@ -935,6 +947,106 @@ class TestAzureFileSystem : public ::testing::Test {
     ASSERT_RAISES(IOError, fs()->DeleteDirContents(directory_path, false));
   }
 
+  void TestDeleteFileAtRoot() {
+    ASSERT_RAISES_ERRNO(fs()->DeleteFile("file0"), ENOENT);
+    ASSERT_RAISES_ERRNO(fs()->DeleteFile("file1/"), ENOENT);
+    const auto container_name = PreexistingData::RandomContainerName(rng_);
+    if (WithHierarchicalNamespace()) {
+      ARROW_UNUSED(CreateFilesystem(container_name));
+    } else {
+      ARROW_UNUSED(CreateContainer(container_name));
+    }
+    arrow::fs::AssertFileInfo(fs(), container_name, FileType::Directory);
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, ::testing::HasSubstr("Not a regular file: '" + container_name + "'"),
+        fs()->DeleteFile(container_name));
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, ::testing::HasSubstr("Not a regular file: '" + container_name + "/'"),
+        fs()->DeleteFile(container_name + "/"));
+  }
+
+  void TestDeleteFileAtContainerRoot() {
+    auto data = SetUpPreexistingData();
+
+    ASSERT_RAISES_ERRNO(fs()->DeleteFile(data.Path("nonexistent-path")), ENOENT);
+    ASSERT_RAISES_ERRNO(fs()->DeleteFile(data.Path("nonexistent-path/")), ENOENT);
+
+    arrow::fs::AssertFileInfo(fs(), data.ObjectPath(), FileType::File);
+    ASSERT_OK(fs()->DeleteFile(data.ObjectPath()));
+    arrow::fs::AssertFileInfo(fs(), data.ObjectPath(), FileType::NotFound);
+
+    if (WithHierarchicalNamespace()) {
+      auto adlfs_client =
+          datalake_service_client_->GetFileSystemClient(data.container_name);
+      CreateFile(adlfs_client, data.kObjectName, PreexistingData::kLoremIpsum);
+    } else {
+      auto container_client = CreateContainer(data.container_name);
+      CreateBlob(container_client, data.kObjectName, PreexistingData::kLoremIpsum);
+    }
+    arrow::fs::AssertFileInfo(fs(), data.ObjectPath(), FileType::File);
+
+    ASSERT_RAISES_ERRNO(fs()->DeleteFile(data.ObjectPath() + "/"), ENOTDIR);
+    ASSERT_OK(fs()->DeleteFile(data.ObjectPath()));
+    arrow::fs::AssertFileInfo(fs(), data.ObjectPath(), FileType::NotFound);
+  }
+
+  void TestDeleteFileAtSubdirectory(bool create_empty_dir_marker_first) {
+    auto data = SetUpPreexistingData();
+
+    auto setup_dir_file0 = [this, create_empty_dir_marker_first, &data]() {
+      if (WithHierarchicalNamespace()) {
+        ASSERT_FALSE(create_empty_dir_marker_first);
+        auto adlfs_client =
+            datalake_service_client_->GetFileSystemClient(data.container_name);
+        CreateFile(adlfs_client, "dir/file0", PreexistingData::kLoremIpsum);
+      } else {
+        auto container_client = CreateContainer(data.container_name);
+        if (create_empty_dir_marker_first) {
+          CreateBlob(container_client, "dir/", "");
+        }
+        CreateBlob(container_client, "dir/file0", PreexistingData::kLoremIpsum);
+      }
+    };
+    setup_dir_file0();
+
+    // Trying to delete a non-existing file in an existing directory should fail
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError,
+        ::testing::HasSubstr("Path does not exist '" + data.Path("dir/nonexistent-path") +
+                             "'"),
+        fs()->DeleteFile(data.Path("dir/nonexistent-path")));
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError,
+        ::testing::HasSubstr("Path does not exist '" +
+                             data.Path("dir/nonexistent-path/") + "'"),
+        fs()->DeleteFile(data.Path("dir/nonexistent-path/")));
+
+    // Trying to delete the directory with DeleteFile should fail
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, ::testing::HasSubstr("Not a regular file: '" + data.Path("dir") + "'"),
+        fs()->DeleteFile(data.Path("dir")));
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError, ::testing::HasSubstr("Not a regular file: '" + data.Path("dir/") + "'"),
+        fs()->DeleteFile(data.Path("dir/")));
+
+    arrow::fs::AssertFileInfo(fs(), data.Path("dir"), FileType::Directory);
+    arrow::fs::AssertFileInfo(fs(), data.Path("dir/"), FileType::Directory);
+    arrow::fs::AssertFileInfo(fs(), data.Path("dir/file0"), FileType::File);
+    ASSERT_OK(fs()->DeleteFile(data.Path("dir/file0")));
+    arrow::fs::AssertFileInfo(fs(), data.Path("dir"), FileType::Directory);
+    arrow::fs::AssertFileInfo(fs(), data.Path("dir/"), FileType::Directory);
+    arrow::fs::AssertFileInfo(fs(), data.Path("dir/file0"), FileType::NotFound);
+
+    // Recreating the file on the same path gurantees leases were properly released/broken
+    setup_dir_file0();
+
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        IOError,
+        ::testing::HasSubstr("Not a directory: '" + data.Path("dir/file0/") + "'"),
+        fs()->DeleteFile(data.Path("dir/file0/")));
+    arrow::fs::AssertFileInfo(fs(), data.Path("dir/file0"), FileType::File);
+  }
+
  private:
   using StringMatcher =
       ::testing::PolymorphicMatcher<::testing::internal::HasSubstrMatcher<std::string>>;
@@ -1092,12 +1204,6 @@ class TestAzureFileSystem : public ::testing::Test {
     AssertFileInfo(fs(), dest, type);
   }
 
-  static bool WithErrno(const Status& status, int expected_errno) {
-    auto* detail = status.detail().get();
-    return detail &&
-           arrow::internal::ErrnoFromStatusDetail(*detail).value_or(-1) == expected_errno;
-  }
-
   std::optional<StringMatcher> MoveErrorMessageMatcher(const FileInfo& src_info,
                                                        const std::string& src,
                                                        const std::string& dest,
@@ -1596,6 +1702,21 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirContentsFailureNonexisten
   this->TestDeleteDirContentsFailureNonexistent();
 }
 
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteFileAtRoot) {
+  this->TestDeleteFileAtRoot();
+}
+
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteFileAtContainerRoot) {
+  this->TestDeleteFileAtContainerRoot();
+}
+
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteFileAtSubdirectory) {
+  this->TestDeleteFileAtSubdirectory(/*create_empty_dir_marker_first=*/false);
+  if (!this->WithHierarchicalNamespace()) {
+    this->TestDeleteFileAtSubdirectory(/*create_empty_dir_marker_first=*/true);
+  }
+}
+
 TYPED_TEST(TestAzureFileSystemOnAllScenarios, RenameContainer) {
   this->TestRenameContainer();
 }
@@ -1884,57 +2005,6 @@ TEST_F(TestAzuriteFileSystem, DeleteDirContentsFailureNonexistent) {
   this->TestDeleteDirContentsFailureNonexistent();
 }
 
-TEST_F(TestAzuriteFileSystem, DeleteFileSuccess) {
-  const auto container_name = PreexistingData::RandomContainerName(rng_);
-  const auto file_name = ConcatAbstractPath(container_name, "filename");
-  if (WithHierarchicalNamespace()) {
-    auto adlfs_client = CreateFilesystem(container_name);
-    CreateFile(adlfs_client, "filename", "data");
-  } else {
-    auto container = CreateContainer(container_name);
-    CreateBlob(container, "filename", "data");
-  }
-  arrow::fs::AssertFileInfo(fs(), file_name, FileType::File);
-  ASSERT_OK(fs()->DeleteFile(file_name));
-  arrow::fs::AssertFileInfo(fs(), file_name, FileType::NotFound);
-}
-
-TEST_F(TestAzuriteFileSystem, DeleteFileFailureNonexistent) {
-  const auto container_name = PreexistingData::RandomContainerName(rng_);
-  const auto nonexistent_file_name = ConcatAbstractPath(container_name, "nonexistent");
-  if (WithHierarchicalNamespace()) {
-    ARROW_UNUSED(CreateFilesystem(container_name));
-  } else {
-    ARROW_UNUSED(CreateContainer(container_name));
-  }
-  ASSERT_RAISES(IOError, fs()->DeleteFile(nonexistent_file_name));
-}
-
-TEST_F(TestAzuriteFileSystem, DeleteFileFailureContainer) {
-  const auto container_name = PreexistingData::RandomContainerName(rng_);
-  if (WithHierarchicalNamespace()) {
-    ARROW_UNUSED(CreateFilesystem(container_name));
-  } else {
-    ARROW_UNUSED(CreateContainer(container_name));
-  }
-  arrow::fs::AssertFileInfo(fs(), container_name, FileType::Directory);
-  ASSERT_RAISES(IOError, fs()->DeleteFile(container_name));
-}
-
-TEST_F(TestAzuriteFileSystem, DeleteFileFailureDirectory) {
-  auto container_name = PreexistingData::RandomContainerName(rng_);
-  if (WithHierarchicalNamespace()) {
-    auto adlfs_client = CreateFilesystem(container_name);
-    CreateDirectory(adlfs_client, "directory");
-  } else {
-    auto container = CreateContainer(container_name);
-    CreateBlob(container, "directory/");
-  }
-  auto directory_path = ConcatAbstractPath(container_name, "directory");
-  arrow::fs::AssertFileInfo(fs(), directory_path, FileType::Directory);
-  ASSERT_RAISES(IOError, fs()->DeleteFile(directory_path));
-}
-
 TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationNonexistent) {
   auto data = SetUpPreexistingData();
   const auto destination_path = data.ContainerPath("copy-destionation");

From 29a0581f5bfcad86a6493854f8be8fcb6ffe2fbc Mon Sep 17 00:00:00 2001
From: Matthew McNew <me@mattmcnew.com>
Date: Tue, 20 Feb 2024 19:59:57 -0600
Subject: [PATCH 398/570] GH-39870: [Go] Include buffered pages in
 TotalBytesWritten (#40105)

### Rationale for this change

Currently, buffered data pages are not included in TotalBytesWritten this means that their is not an accurate estimate of the size of the current size.

### Are there any user-facing changes?
`RowGroupTotalBytesWritten` will include the TotalBytes in buffered DataPages minus the buffered data pages headers.

* Closes: #39870

Authored-by: Matthew McNew <me@mattmcnew.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/parquet/file/column_writer.go      |  7 ++++++-
 go/parquet/file/column_writer_test.go | 14 ++++++++++----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go
index 4d603c547ca6a..91f5d18942958 100755
--- a/go/parquet/file/column_writer.go
+++ b/go/parquet/file/column_writer.go
@@ -198,7 +198,12 @@ func (w *columnWriter) TotalCompressedBytes() int64 {
 }
 
 func (w *columnWriter) TotalBytesWritten() int64 {
-	return w.totalBytesWritten
+	bufferedPagesBytes := int64(0)
+	for _, p := range w.pages {
+		bufferedPagesBytes += int64(len(p.Data()))
+	}
+
+	return w.totalBytesWritten + bufferedPagesBytes
 }
 
 func (w *columnWriter) RowsWritten() int {
diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go
index dd597e280b850..d78e1c6761be0 100755
--- a/go/parquet/file/column_writer_test.go
+++ b/go/parquet/file/column_writer_test.go
@@ -430,6 +430,11 @@ func (p *PrimitiveWriterTestSuite) testDictionaryFallbackEncoding(version parque
 }
 
 func (p *PrimitiveWriterTestSuite) testDictionaryFallbackAndCompressedSize(version parquet.Version) {
+	// skip boolean as dictionary encoding is not used
+	if p.Typ.Kind() == reflect.Bool {
+		return
+	}
+
 	p.GenerateData(SmallSize)
 	props := parquet.DefaultColumnProperties()
 	props.DictionaryEnabled = true
@@ -440,13 +445,14 @@ func (p *PrimitiveWriterTestSuite) testDictionaryFallbackAndCompressedSize(versi
 		props.Encoding = parquet.Encodings.RLEDict
 	}
 
-	writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version))
+	writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version), parquet.WithDataPageSize(SmallSize-1))
 	p.WriteBatchValues(writer, nil, nil)
+	p.NotZero(writer.TotalBytesWritten())
 	writer.FallbackToPlain()
-	p.NotEqual(0, writer.TotalCompressedBytes())
+	p.NotZero(writer.TotalCompressedBytes())
 	writer.Close()
-	p.NotEqual(0, writer.TotalCompressedBytes())
-	p.NotEqual(0, writer.TotalBytesWritten())
+	p.NotZero(writer.TotalCompressedBytes())
+	p.NotZero(writer.TotalBytesWritten())
 }
 
 func (p *PrimitiveWriterTestSuite) TestRequiredPlain() {

From 1ffed20f4008a1b3bd06deb904d94ff668cde42a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Wed, 21 Feb 2024 09:58:42 +0100
Subject: [PATCH 399/570] GH-40153: [Python] Update size assumptions for 32-bit
 platforms (#40165)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

This fixes two tests on 32-bit platforms (tested on x86 specifically).

### What changes are included in this PR?

- update the `pd.object_` size assumption to 4 bytes on 32-bit platforms
- update the `pa.schema` size assumptions to be twice smaller on 32-bit platforms

### Are these changes tested?

The changes fix tests.

### Are there any user-facing changes?

Only test fixes.

* Closes: #40153

Authored-by: Michał Górny <mgorny@gentoo.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/tests/test_pandas.py | 5 +++--
 python/pyarrow/tests/test_schema.py | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 676cc96151161..89a241a27efe0 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -2608,8 +2608,9 @@ def test_from_numpy_nested(self):
                                        ('yy', np.bool_)])),
                        ('y', np.int16),
                        ('z', np.object_)])
-        # Note: itemsize is not a multiple of sizeof(object)
-        assert dt.itemsize == 12
+        # Note: itemsize is not necessarily a multiple of sizeof(object)
+        # object_ is 8 bytes on 64-bit systems, 4 bytes on 32-bit systems
+        assert dt.itemsize == (12 if sys.maxsize > 2**32 else 8)
         ty = pa.struct([pa.field('x', pa.struct([pa.field('xx', pa.int8()),
                                                  pa.field('yy', pa.bool_())])),
                         pa.field('y', pa.int16()),
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index fa75fcea30db7..8793c9e773c1d 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -681,7 +681,8 @@ def test_schema_sizeof():
         pa.field('bar', pa.string()),
     ])
 
-    assert sys.getsizeof(schema) > 30
+    # Note: pa.schema is twice as large on 64-bit systems
+    assert sys.getsizeof(schema) > (30 if sys.maxsize > 2**32 else 15)
 
     schema2 = schema.with_metadata({"key": "some metadata"})
     assert sys.getsizeof(schema2) > sys.getsizeof(schema)

From b51c318122ff5db1c6c4c70a69b0c804f9e31704 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 21 Feb 2024 11:24:06 +0100
Subject: [PATCH 400/570] MINOR: [Dev] Remove auto close link forced in PR
 description (#40178)

For some reason, https://github.com/apache/arrow/pull/14783 changed the automatic GH issue link to a "Closes" reference that will forcefully close the linked issue *even if the committer chooses not to close the issue using the merge script*.

Since the original change was done without discussion, this is a MINOR PR as well.

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .github/workflows/dev_pr/link.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/dev_pr/link.js b/.github/workflows/dev_pr/link.js
index 174bd3bae650a..a70dbc604c377 100644
--- a/.github/workflows/dev_pr/link.js
+++ b/.github/workflows/dev_pr/link.js
@@ -82,7 +82,7 @@ async function commentJIRAURL(github, context, pullRequestNumber, jiraID) {
 async function commentGitHubURL(github, context, pullRequestNumber, issueID) {
   // Make the call to ensure issue exists before adding comment
   const issueInfo = await helpers.getGitHubInfo(github, context, issueID, pullRequestNumber);
-  const message = "* Closes: #" + issueInfo.number
+  const message = "* GitHub Issue: #" + issueInfo.number
   if (issueInfo) {
     const body = context.payload.pull_request.body || "";
     if (body.includes(message)) {

From 6a22a1dee78b0f7daa7e4d8793d663e29a5712a6 Mon Sep 17 00:00:00 2001
From: Divyansh200102 <146909065+Divyansh200102@users.noreply.github.com>
Date: Wed, 21 Feb 2024 20:00:24 +0530
Subject: [PATCH 401/570] GH-39291: [Docs] Remove the "Show source" links from
 doc pages (#40167)

### Rationale for this change
To fix the show source button links to 404 page problem

### What changes are included in this PR?
The show source button link will be removed.

### Are these changes tested?
Not yet

### Are there any user-facing changes?
Yes

* Closes: #39291
* GitHub Issue: #39291

Authored-by: Divyansh200102 <divyanshkhatri200102@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 docs/source/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 5af7b7955fdde..c6be6cb94cfb5 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -414,7 +414,7 @@
 
 # If true, links to the reST sources are added to the pages.
 #
-# html_show_sourcelink = True
+html_show_sourcelink = False
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
 #

From c3444469570eb33f32a6f960ffa1d2e446c271f3 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Wed, 21 Feb 2024 15:56:38 +0100
Subject: [PATCH 402/570] GH-40153: [Python][C++] Fix large file handling on
 32-bit Python build (#40176)

### Rationale for this change

Python large file tests fail on 32-bit platforms.

### What changes are included in this PR?

1. Fix passing `int64_t` position to the Python file methods when a Python file object is wrapped in an Arrow `RandomAccessFile`
2. Disallow creating a `MemoryMappedFile` spanning more than the `size_t` maximum, instead of silently truncating its length

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.

* GitHub Issue: #40153

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/io/file.cc              | 22 ++++++++++++++--------
 python/pyarrow/src/arrow/python/io.cc | 15 +++++++++------
 python/pyarrow/tests/test_io.py       | 26 +++++++++++++++++++-------
 3 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc
index 543fa90a86e9b..3b18bb7b0f0f4 100644
--- a/cpp/src/arrow/io/file.cc
+++ b/cpp/src/arrow/io/file.cc
@@ -36,6 +36,7 @@
 #include <cerrno>
 #include <cstdint>
 #include <cstring>
+#include <limits>
 #include <memory>
 #include <mutex>
 #include <sstream>
@@ -560,17 +561,22 @@ class MemoryMappedFile::MemoryMap
       RETURN_NOT_OK(::arrow::internal::FileTruncate(file_->fd(), initial_size));
     }
 
-    size_t mmap_length = static_cast<size_t>(initial_size);
-    if (length > initial_size) {
-      return Status::Invalid("mapping length is beyond file size");
-    }
-    if (length >= 0 && length < initial_size) {
+    int64_t mmap_length = initial_size;
+    if (length >= 0) {
       // memory mapping a file region
-      mmap_length = static_cast<size_t>(length);
+      if (length > initial_size) {
+        return Status::Invalid("mapping length is beyond file size");
+      }
+      mmap_length = length;
+    }
+    if (static_cast<int64_t>(static_cast<size_t>(mmap_length)) != mmap_length) {
+      return Status::CapacityError("Requested memory map length ", mmap_length,
+                                   " does not fit in a C size_t "
+                                   "(are you using a 32-bit build of Arrow?");
     }
 
-    void* result = mmap(nullptr, mmap_length, prot_flags_, map_mode_, file_->fd(),
-                        static_cast<off_t>(offset));
+    void* result = mmap(nullptr, static_cast<size_t>(mmap_length), prot_flags_, map_mode_,
+                        file_->fd(), static_cast<off_t>(offset));
     if (result == MAP_FAILED) {
       return Status::IOError("Memory mapping file failed: ",
                              ::arrow::internal::ErrnoMessage(errno));
diff --git a/python/pyarrow/src/arrow/python/io.cc b/python/pyarrow/src/arrow/python/io.cc
index 43f8297c5a7ec..197f8b9d39804 100644
--- a/python/pyarrow/src/arrow/python/io.cc
+++ b/python/pyarrow/src/arrow/python/io.cc
@@ -92,9 +92,12 @@ class PythonFile {
   Status Seek(int64_t position, int whence) {
     RETURN_NOT_OK(CheckClosed());
 
+    // NOTE: `long long` is at least 64 bits in the C standard, the cast below is
+    // therefore safe.
+
     // whence: 0 for relative to start of file, 2 for end of file
-    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "seek", "(ni)",
-                                               static_cast<Py_ssize_t>(position), whence);
+    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "seek", "(Li)",
+                                               static_cast<long long>(position), whence);
     Py_XDECREF(result);
     PY_RETURN_IF_ERROR(StatusCode::IOError);
     return Status::OK();
@@ -103,16 +106,16 @@ class PythonFile {
   Status Read(int64_t nbytes, PyObject** out) {
     RETURN_NOT_OK(CheckClosed());
 
-    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read", "(n)",
-                                               static_cast<Py_ssize_t>(nbytes));
+    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read", "(L)",
+                                               static_cast<long long>(nbytes));
     PY_RETURN_IF_ERROR(StatusCode::IOError);
     *out = result;
     return Status::OK();
   }
 
   Status ReadBuffer(int64_t nbytes, PyObject** out) {
-    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read_buffer", "(n)",
-                                               static_cast<Py_ssize_t>(nbytes));
+    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read_buffer", "(L)",
+                                               static_cast<long long>(nbytes));
     PY_RETURN_IF_ERROR(StatusCode::IOError);
     *out = result;
     return Status::OK();
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index 5a495aa80abdf..17eab871a2575 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -36,7 +36,7 @@
 import pyarrow as pa
 
 
-def check_large_seeks(file_factory):
+def check_large_seeks(file_factory, expected_error=None):
     if sys.platform in ('win32', 'darwin'):
         pytest.skip("need sparse file support")
     try:
@@ -45,11 +45,16 @@ def check_large_seeks(file_factory):
             f.truncate(2 ** 32 + 10)
             f.seek(2 ** 32 + 5)
             f.write(b'mark\n')
-        with file_factory(filename) as f:
-            assert f.seek(2 ** 32 + 5) == 2 ** 32 + 5
-            assert f.tell() == 2 ** 32 + 5
-            assert f.read(5) == b'mark\n'
-            assert f.tell() == 2 ** 32 + 10
+        if expected_error:
+            with expected_error:
+                file_factory(filename)
+        else:
+            with file_factory(filename) as f:
+                assert f.size() == 2 ** 32 + 10
+                assert f.seek(2 ** 32 + 5) == 2 ** 32 + 5
+                assert f.tell() == 2 ** 32 + 5
+                assert f.read(5) == b'mark\n'
+                assert f.tell() == 2 ** 32 + 10
     finally:
         os.unlink(filename)
 
@@ -1137,7 +1142,14 @@ def test_memory_zero_length(tmpdir):
 
 
 def test_memory_map_large_seeks():
-    check_large_seeks(pa.memory_map)
+    if sys.maxsize >= 2**32:
+        expected_error = None
+    else:
+        expected_error = pytest.raises(
+            pa.ArrowCapacityError,
+            match="Requested memory map length 4294967306 "
+                  "does not fit in a C size_t")
+    check_large_seeks(pa.memory_map, expected_error=expected_error)
 
 
 def test_memory_map_close_remove(tmpdir):

From 8a62f30d34a606c8edca6cfaad56846e0e7aceea Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Wed, 21 Feb 2024 16:29:18 +0000
Subject: [PATCH 403/570] GH-40037: [C++][FS][Azure] Make attempted reads and
 writes against directories fail fast (#40119)

### Rationale for this change
Prevent confusion if a user attempts to read or write a directory.

### What changes are included in this PR?
- Make `ObjectAppendStream::Flush` a noop if `ObjectAppendStream::Init` has not run successfully. This avoids an unhandled error when the destructor calls flush.
- Check blob properties for directory marker metadata when initialising `ObjectInputFile` or `ObjectAppendStream`.
- When initialising `ObjectAppendStream` call `GetFileInfo` if it is a flat namespace account.

### Are these changes tested?
Add new tests `DisallowReadingOrWritingDirectoryMarkers` and `DisallowCreatingFileAndDirectoryWithTheSameName` to cover the new fail fast behaviour.
Also updated `WriteMetadata` to ensure that my changes to Flush didn't break setting metadata without calling `Write` on the stream.

### Are there any user-facing changes?
Yes. Invalid read and write operations will now fail fast and gracefully. Previously could get into a confusing state where there were files and directories at the same path and there were some un-graceful failures.

* Closes: #40037

Authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 95 +++++++++++++++++++-----
 cpp/src/arrow/filesystem/azurefs_test.cc | 60 +++++++++++++++
 2 files changed, 135 insertions(+), 20 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index de7cdba245ada..8ae33b8818827 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -347,6 +347,22 @@ bool IsContainerNotFound(const Storage::StorageException& e) {
   return false;
 }
 
+const auto kHierarchicalNamespaceIsDirectoryMetadataKey = "hdi_isFolder";
+const auto kFlatNamespaceIsDirectoryMetadataKey = "is_directory";
+
+bool MetadataIndicatesIsDirectory(const Storage::Metadata& metadata) {
+  // Inspired by
+  // https://github.com/Azure/azure-sdk-for-cpp/blob/12407e8bfcb9bc1aa43b253c1d0ec93bf795ae3b/sdk/storage/azure-storage-files-datalake/src/datalake_utilities.cpp#L86-L91
+  auto hierarchical_directory_metadata =
+      metadata.find(kHierarchicalNamespaceIsDirectoryMetadataKey);
+  if (hierarchical_directory_metadata != metadata.end()) {
+    return hierarchical_directory_metadata->second == "true";
+  }
+  auto flat_directory_metadata = metadata.find(kFlatNamespaceIsDirectoryMetadataKey);
+  return flat_directory_metadata != metadata.end() &&
+         flat_directory_metadata->second == "true";
+}
+
 template <typename ArrowType>
 std::string FormatValue(typename TypeTraits<ArrowType>::CType value) {
   struct StringAppender {
@@ -512,11 +528,18 @@ class ObjectInputFile final : public io::RandomAccessFile {
 
   Status Init() {
     if (content_length_ != kNoSize) {
+      // When the user provides the file size we don't validate that its a file. This is
+      // only a read so its not a big deal if the user makes a mistake.
       DCHECK_GE(content_length_, 0);
       return Status::OK();
     }
     try {
+      // To open an ObjectInputFile the Blob must exist and it must not represent
+      // a directory. Additionally we need to know the file size.
       auto properties = blob_client_->GetProperties();
+      if (MetadataIndicatesIsDirectory(properties.Value.Metadata)) {
+        return NotAFile(location_);
+      }
       content_length_ = properties.Value.BlobSize;
       metadata_ = PropertiesToMetadata(properties.Value);
       return Status::OK();
@@ -698,11 +721,10 @@ class ObjectAppendStream final : public io::OutputStream {
   ObjectAppendStream(std::shared_ptr<Blobs::BlockBlobClient> block_blob_client,
                      const io::IOContext& io_context, const AzureLocation& location,
                      const std::shared_ptr<const KeyValueMetadata>& metadata,
-                     const AzureOptions& options, int64_t size = kNoSize)
+                     const AzureOptions& options)
       : block_blob_client_(std::move(block_blob_client)),
         io_context_(io_context),
-        location_(location),
-        content_length_(size) {
+        location_(location) {
     if (metadata && metadata->size() != 0) {
       metadata_ = ArrowMetadataToAzureMetadata(metadata);
     } else if (options.default_metadata && options.default_metadata->size() != 0) {
@@ -716,17 +738,31 @@ class ObjectAppendStream final : public io::OutputStream {
     io::internal::CloseFromDestructor(this);
   }
 
-  Status Init() {
-    if (content_length_ != kNoSize) {
-      DCHECK_GE(content_length_, 0);
-      pos_ = content_length_;
+  Status Init(const bool truncate,
+              std::function<Status()> ensure_not_flat_namespace_directory) {
+    if (truncate) {
+      content_length_ = 0;
+      pos_ = 0;
+      // We need to create an empty file overwriting any existing file, but
+      // fail if there is an existing directory.
+      RETURN_NOT_OK(ensure_not_flat_namespace_directory());
+      // On hierarchical namespace CreateEmptyBlockBlob will fail if there is an existing
+      // directory so we don't need to check like we do on flat namespace.
+      RETURN_NOT_OK(CreateEmptyBlockBlob(*block_blob_client_));
     } else {
       try {
         auto properties = block_blob_client_->GetProperties();
+        if (MetadataIndicatesIsDirectory(properties.Value.Metadata)) {
+          return NotAFile(location_);
+        }
         content_length_ = properties.Value.BlobSize;
         pos_ = content_length_;
       } catch (const Storage::StorageException& exception) {
         if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
+          // No file exists but on flat namespace its possible there is a directory
+          // marker or an implied directory. Ensure there is no directory before starting
+          // a new empty file.
+          RETURN_NOT_OK(ensure_not_flat_namespace_directory());
           RETURN_NOT_OK(CreateEmptyBlockBlob(*block_blob_client_));
         } else {
           return ExceptionToStatus(
@@ -743,6 +779,7 @@ class ObjectAppendStream final : public io::OutputStream {
         block_ids_.push_back(block.Name);
       }
     }
+    initialised_ = true;
     return Status::OK();
   }
 
@@ -789,6 +826,11 @@ class ObjectAppendStream final : public io::OutputStream {
 
   Status Flush() override {
     RETURN_NOT_OK(CheckClosed("flush"));
+    if (!initialised_) {
+      // If the stream has not been successfully initialized then there is nothing to
+      // flush. This also avoids some unhandled errors when flushing in the destructor.
+      return Status::OK();
+    }
     return CommitBlockList(block_blob_client_, block_ids_, metadata_);
   }
 
@@ -840,10 +882,11 @@ class ObjectAppendStream final : public io::OutputStream {
   std::shared_ptr<Blobs::BlockBlobClient> block_blob_client_;
   const io::IOContext io_context_;
   const AzureLocation location_;
+  int64_t content_length_ = kNoSize;
 
   bool closed_ = false;
+  bool initialised_ = false;
   int64_t pos_ = 0;
-  int64_t content_length_ = kNoSize;
   std::vector<std::string> block_ids_;
   Storage::Metadata metadata_;
 };
@@ -1666,20 +1709,32 @@ class AzureFileSystem::Impl {
       AzureFileSystem* fs) {
     RETURN_NOT_OK(ValidateFileLocation(location));
 
+    const auto blob_container_client = GetBlobContainerClient(location.container);
     auto block_blob_client = std::make_shared<Blobs::BlockBlobClient>(
-        blob_service_client_->GetBlobContainerClient(location.container)
-            .GetBlockBlobClient(location.path));
+        blob_container_client.GetBlockBlobClient(location.path));
+
+    auto ensure_not_flat_namespace_directory = [this, location,
+                                                blob_container_client]() -> Status {
+      ARROW_ASSIGN_OR_RAISE(
+          auto hns_support,
+          HierarchicalNamespaceSupport(GetFileSystemClient(location.container)));
+      if (hns_support == HNSSupport::kDisabled) {
+        // Flat namespace so we need to GetFileInfo in-case its a directory.
+        ARROW_ASSIGN_OR_RAISE(auto status, GetFileInfo(blob_container_client, location))
+        if (status.type() == FileType::Directory) {
+          return NotAFile(location);
+        }
+      }
+      // kContainerNotFound - it doesn't exist, so no need to check if its a directory.
+      // kEnabled - hierarchical namespace so Azure APIs will fail if its a directory. We
+      // don't need to explicitly check.
+      return Status::OK();
+    };
 
     std::shared_ptr<ObjectAppendStream> stream;
-    if (truncate) {
-      RETURN_NOT_OK(CreateEmptyBlockBlob(*block_blob_client));
-      stream = std::make_shared<ObjectAppendStream>(block_blob_client, fs->io_context(),
-                                                    location, metadata, options_, 0);
-    } else {
-      stream = std::make_shared<ObjectAppendStream>(block_blob_client, fs->io_context(),
-                                                    location, metadata, options_);
-    }
-    RETURN_NOT_OK(stream->Init());
+    stream = std::make_shared<ObjectAppendStream>(block_blob_client, fs->io_context(),
+                                                  location, metadata, options_);
+    RETURN_NOT_OK(stream->Init(truncate, ensure_not_flat_namespace_directory));
     return stream;
   }
 
@@ -1694,7 +1749,7 @@ class AzureFileSystem::Impl {
     // on directory marker blobs.
     // https://github.com/fsspec/adlfs/blob/32132c4094350fca2680155a5c236f2e9f991ba5/adlfs/spec.py#L855-L870
     Blobs::UploadBlockBlobFromOptions blob_options;
-    blob_options.Metadata.emplace("is_directory", "true");
+    blob_options.Metadata.emplace(kFlatNamespaceIsDirectoryMetadataKey, "true");
     block_blob_client.UploadFrom(nullptr, 0, blob_options);
   }
 
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 7f5cd247a8d35..f21876f03cc95 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -838,6 +838,41 @@ class TestAzureFileSystem : public ::testing::Test {
     AssertFileInfo(fs(), subdir3, FileType::Directory);
   }
 
+  void TestDisallowReadingOrWritingDirectoryMarkers() {
+    auto data = SetUpPreexistingData();
+    auto directory_path = data.Path("directory");
+
+    ASSERT_OK(fs()->CreateDir(directory_path));
+    ASSERT_RAISES(IOError, fs()->OpenInputFile(directory_path));
+    ASSERT_RAISES(IOError, fs()->OpenOutputStream(directory_path));
+    ASSERT_RAISES(IOError, fs()->OpenAppendStream(directory_path));
+
+    auto directory_path_with_slash = directory_path + "/";
+    ASSERT_RAISES(IOError, fs()->OpenInputFile(directory_path_with_slash));
+    ASSERT_RAISES(IOError, fs()->OpenOutputStream(directory_path_with_slash));
+    ASSERT_RAISES(IOError, fs()->OpenAppendStream(directory_path_with_slash));
+  }
+
+  void TestDisallowCreatingFileAndDirectoryWithTheSameName() {
+    auto data = SetUpPreexistingData();
+    auto path1 = data.Path("directory1");
+    ASSERT_OK(fs()->CreateDir(path1));
+    ASSERT_RAISES(IOError, fs()->OpenOutputStream(path1));
+    ASSERT_RAISES(IOError, fs()->OpenAppendStream(path1));
+    AssertFileInfo(fs(), path1, FileType::Directory);
+
+    auto path2 = data.Path("directory2");
+    ASSERT_OK(fs()->OpenOutputStream(path2));
+    // CreateDir returns OK even if there is already a file or directory at this
+    // location. Whether or not this is the desired behaviour is debatable.
+    ASSERT_OK(fs()->CreateDir(path2));
+    AssertFileInfo(fs(), path2, FileType::File);
+  }
+
+  void TestOpenOutputStreamWithMissingContainer() {
+    ASSERT_RAISES(IOError, fs()->OpenOutputStream("not-a-container/file", {}));
+  }
+
   void TestDeleteDirSuccessEmpty() {
     if (HasSubmitBatchBug()) {
       GTEST_SKIP() << kSubmitBatchBugMessage;
@@ -1665,6 +1700,19 @@ TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirOnMissingContainer) {
   this->TestCreateDirOnMissingContainer();
 }
 
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DisallowReadingOrWritingDirectoryMarkers) {
+  this->TestDisallowReadingOrWritingDirectoryMarkers();
+}
+
+TYPED_TEST(TestAzureFileSystemOnAllScenarios,
+           DisallowCreatingFileAndDirectoryWithTheSameName) {
+  this->TestDisallowCreatingFileAndDirectoryWithTheSameName();
+}
+
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, OpenOutputStreamWithMissingContainer) {
+  this->TestOpenOutputStreamWithMissingContainer();
+}
+
 TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessEmpty) {
   this->TestDeleteDirSuccessEmpty();
 }
@@ -2232,6 +2280,18 @@ TEST_F(TestAzuriteFileSystem, WriteMetadata) {
                       .Value.Metadata;
   // Defaults are overwritten and not merged.
   EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("bar", "foo")}, blob_metadata);
+
+  // Metadata can be written without writing any data.
+  ASSERT_OK_AND_ASSIGN(
+      output, fs_with_defaults->OpenAppendStream(
+                  full_path, /*metadata=*/arrow::key_value_metadata({{"bar", "baz"}})));
+  ASSERT_OK(output->Close());
+  blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name)
+                      .GetBlockBlobClient(blob_path)
+                      .GetProperties()
+                      .Value.Metadata;
+  // Defaults are overwritten and not merged.
+  EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("bar", "baz")}, blob_metadata);
 }
 
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmall) {

From 884a10931038b689fca6a85178f702e1045f4e61 Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Thu, 22 Feb 2024 00:54:04 +0800
Subject: [PATCH 404/570] GH-40174: [C++][CI][Parquet] Fixing parquet
 column_writer_test building (#40175)

### Rationale for this change

Remove `ThrowsMessage` for CI build.

### What changes are included in this PR?

Remove `ThrowsMessage` for CI build.

### Are these changes tested?

no need

### Are there any user-facing changes?

no

* Closes: #40174

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/column_writer_test.cc | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc
index a40e71ce30aec..86fe0965a6a7f 100644
--- a/cpp/src/parquet/column_writer_test.cc
+++ b/cpp/src/parquet/column_writer_test.cc
@@ -483,7 +483,6 @@ using TestByteArrayValuesWriter = TestPrimitiveWriter<ByteArrayType>;
 using TestFixedLengthByteArrayValuesWriter = TestPrimitiveWriter<FLBAType>;
 
 using ::testing::HasSubstr;
-using ::testing::ThrowsMessage;
 
 TYPED_TEST(TestPrimitiveWriter, RequiredPlain) {
   this->TestRequiredWithEncoding(Encoding::PLAIN);
@@ -918,20 +917,27 @@ TEST(TestPageWriter, ThrowsOnPagesTooLarge) {
   DataPageV1 over_compressed_limit(buffer, /*num_values=*/100, Encoding::BIT_PACKED,
                                    Encoding::BIT_PACKED, Encoding::BIT_PACKED,
                                    /*uncompressed_size=*/100);
-  EXPECT_THAT([&]() { pager->WriteDataPage(over_compressed_limit); },
-              ThrowsMessage<ParquetException>(HasSubstr("overflows INT32_MAX")));
+  EXPECT_THROW_THAT([&]() { pager->WriteDataPage(over_compressed_limit); },
+                    ParquetException,
+                    ::testing::Property(&ParquetException::what,
+                                        ::testing::HasSubstr("overflows INT32_MAX")));
   DictionaryPage dictionary_over_compressed_limit(buffer, /*num_values=*/100,
                                                   Encoding::PLAIN);
-  EXPECT_THAT([&]() { pager->WriteDictionaryPage(dictionary_over_compressed_limit); },
-              ThrowsMessage<ParquetException>(HasSubstr("overflows INT32_MAX")));
+  EXPECT_THROW_THAT(
+      [&]() { pager->WriteDictionaryPage(dictionary_over_compressed_limit); },
+      ParquetException,
+      ::testing::Property(&ParquetException::what,
+                          ::testing::HasSubstr("overflows INT32_MAX")));
 
   buffer = std::make_shared<Buffer>(&data, 1);
   DataPageV1 over_uncompressed_limit(
       buffer, /*num_values=*/100, Encoding::BIT_PACKED, Encoding::BIT_PACKED,
       Encoding::BIT_PACKED,
       /*uncompressed_size=*/std::numeric_limits<int32_t>::max() + int64_t{1});
-  EXPECT_THAT([&]() { pager->WriteDataPage(over_compressed_limit); },
-              ThrowsMessage<ParquetException>(HasSubstr("overflows INT32_MAX")));
+  EXPECT_THROW_THAT([&]() { pager->WriteDataPage(over_compressed_limit); },
+                    ParquetException,
+                    ::testing::Property(&ParquetException::what,
+                                        ::testing::HasSubstr("overflows INT32_MAX")));
 }
 
 TEST(TestColumnWriter, RepeatedListsUpdateSpacedBug) {

From e198f309c577de9a265c04af2bc4644c33f54375 Mon Sep 17 00:00:00 2001
From: david dali susanibar arce <davi.sarces@gmail.com>
Date: Wed, 21 Feb 2024 14:46:30 -0500
Subject: [PATCH 405/570] MINOR: [JAVA] Add unit test for float16 into cdata
 interface module (#40185)

Add unit test for float16 into CData Interface module.

Authored-by: david dali susanibar arce <davi.sarces@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../java/org/apache/arrow/c/RoundtripTest.java    |  9 +++++++++
 .../vector/testing/ValueVectorDataPopulator.java  | 15 +++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
index fe070400ad94f..a7e3cde2e7b4b 100644
--- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
@@ -48,6 +48,7 @@
 import org.apache.arrow.vector.ExtensionTypeVector;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
 import org.apache.arrow.vector.Float4Vector;
 import org.apache.arrow.vector.Float8Vector;
 import org.apache.arrow.vector.IntVector;
@@ -297,6 +298,14 @@ public void testFixedSizeBinaryVector() {
     }
   }
 
+  @Test
+  public void testFloat2Vector() {
+    try (final Float2Vector vector = new Float2Vector("v", allocator)) {
+      setVector(vector, 0.1f, 0.2f, 0.3f, null);
+      assertTrue(roundtrip(vector, Float2Vector.class));
+    }
+  }
+
   @Test
   public void testFloat4Vector() {
     try (final Float4Vector vector = new Float4Vector("v", allocator)) {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
index 9e96e75880522..9bfcb3c635d86 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
@@ -34,6 +34,7 @@
 import org.apache.arrow.vector.DecimalVector;
 import org.apache.arrow.vector.DurationVector;
 import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
 import org.apache.arrow.vector.Float4Vector;
 import org.apache.arrow.vector.Float8Vector;
 import org.apache.arrow.vector.IntVector;
@@ -207,6 +208,20 @@ public static void setVector(FixedSizeBinaryVector vector, byte[]... values) {
     vector.setValueCount(length);
   }
 
+  /**
+   * Populate values for Float2Vector.
+   */
+  public static void setVector(Float2Vector vector, Float... values) {
+    final int length = values.length;
+    vector.allocateNew(length);
+    for (int i = 0; i < length; i++) {
+      if (values[i] != null) {
+        vector.setWithPossibleTruncate(i, values[i]);
+      }
+    }
+    vector.setValueCount(length);
+  }
+
   /**
    * Populate values for Float4Vector.
    */

From aa4512dc21a932064ac11969b5d274762a30c094 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 22 Feb 2024 11:34:27 +0900
Subject: [PATCH 406/570] MINOR: [Java] Bump
 org.apache.commons:commons-compress from 1.25.0 to 1.26.0 in
 /java/compression (#40169)

Bumps org.apache.commons:commons-compress from 1.25.0 to 1.26.0.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.commons:commons-compress&package-manager=maven&previous-version=1.25.0&new-version=1.26.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/apache/arrow/network/alerts).

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/compression/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index dea8c778735a8..d38ad405b94e8 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -42,7 +42,7 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-compress</artifactId>
-      <version>1.25.0</version>
+      <version>1.26.0</version>
     </dependency>
     <dependency>
       <groupId>com.github.luben</groupId>

From 8e53451cc48081df20fdf52b82edcc52ea778ec5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 22 Feb 2024 10:19:17 +0100
Subject: [PATCH 407/570] GH-40092: [Python] Support Binary/StringView
 conversion to numpy/pandas (#40093)

Last step for Binary/StringView support in Python (https://github.com/apache/arrow/issues/39633), now adding it to the arrow->pandas/numpy conversion code path.
* Closes: #40092

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 .../src/arrow/python/arrow_to_pandas.cc       | 22 ++++++++++++++++---
 python/pyarrow/tests/test_pandas.py           | 14 ++++++++++++
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index e979342b886da..2115cd8015cac 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -133,6 +133,13 @@ struct WrapBytes<LargeStringType> {
   }
 };
 
+template <>
+struct WrapBytes<StringViewType> {
+  static inline PyObject* Wrap(const char* data, int64_t length) {
+    return PyUnicode_FromStringAndSize(data, length);
+  }
+};
+
 template <>
 struct WrapBytes<BinaryType> {
   static inline PyObject* Wrap(const char* data, int64_t length) {
@@ -147,6 +154,13 @@ struct WrapBytes<LargeBinaryType> {
   }
 };
 
+template <>
+struct WrapBytes<BinaryViewType> {
+  static inline PyObject* Wrap(const char* data, int64_t length) {
+    return PyBytes_FromStringAndSize(data, length);
+  }
+};
+
 template <>
 struct WrapBytes<FixedSizeBinaryType> {
   static inline PyObject* Wrap(const char* data, int64_t length) {
@@ -1154,7 +1168,8 @@ struct ObjectWriterVisitor {
   }
 
   template <typename Type>
-  enable_if_t<is_base_binary_type<Type>::value || is_fixed_size_binary_type<Type>::value,
+  enable_if_t<is_base_binary_type<Type>::value || is_binary_view_like_type<Type>::value ||
+                  is_fixed_size_binary_type<Type>::value,
               Status>
   Visit(const Type& type) {
     auto WrapValue = [](const std::string_view& view, PyObject** out) {
@@ -1355,8 +1370,7 @@ struct ObjectWriterVisitor {
                   std::is_same<ExtensionType, Type>::value ||
                   (std::is_base_of<IntervalType, Type>::value &&
                    !std::is_same<MonthDayNanoIntervalType, Type>::value) ||
-                  std::is_base_of<UnionType, Type>::value ||
-                  std::is_base_of<BinaryViewType, Type>::value,
+                  std::is_base_of<UnionType, Type>::value,
               Status>
   Visit(const Type& type) {
     return Status::NotImplemented("No implemented conversion to object dtype: ",
@@ -2086,8 +2100,10 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions&
       break;
     case Type::STRING:        // fall through
     case Type::LARGE_STRING:  // fall through
+    case Type::STRING_VIEW:   // fall through
     case Type::BINARY:        // fall through
     case Type::LARGE_BINARY:
+    case Type::BINARY_VIEW:
     case Type::NA:                       // fall through
     case Type::FIXED_SIZE_BINARY:        // fall through
     case Type::STRUCT:                   // fall through
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 89a241a27efe0..fdfd123a8c34f 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -1760,6 +1760,20 @@ def test_large_string(self):
         _check_pandas_roundtrip(
             df, schema=pa.schema([('a', pa.large_string())]))
 
+    def test_binary_view(self):
+        s = pd.Series([b'123', b'', b'a', None])
+        _check_series_roundtrip(s, type_=pa.binary_view())
+        df = pd.DataFrame({'a': s})
+        _check_pandas_roundtrip(
+            df, schema=pa.schema([('a', pa.binary_view())]))
+
+    def test_string_view(self):
+        s = pd.Series(['123', '', 'a', None])
+        _check_series_roundtrip(s, type_=pa.string_view())
+        df = pd.DataFrame({'a': s})
+        _check_pandas_roundtrip(
+            df, schema=pa.schema([('a', pa.string_view())]))
+
     def test_table_empty_str(self):
         values = ['', '', '', '', '']
         df = pd.DataFrame({'strings': values})

From 280bc112b23976d2f17c07c638bb62702ac89e8a Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 22 Feb 2024 16:09:42 +0100
Subject: [PATCH 408/570] MINOR: [C++] Add missing parenthesis in error message
 (#40201)

Followup to https://github.com/apache/arrow/pull/40176

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/io/file.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc
index 3b18bb7b0f0f4..00426f9957b1f 100644
--- a/cpp/src/arrow/io/file.cc
+++ b/cpp/src/arrow/io/file.cc
@@ -572,7 +572,7 @@ class MemoryMappedFile::MemoryMap
     if (static_cast<int64_t>(static_cast<size_t>(mmap_length)) != mmap_length) {
       return Status::CapacityError("Requested memory map length ", mmap_length,
                                    " does not fit in a C size_t "
-                                   "(are you using a 32-bit build of Arrow?");
+                                   "(are you using a 32-bit build of Arrow?)");
     }
 
     void* result = mmap(nullptr, static_cast<size_t>(mmap_length), prot_flags_, map_mode_,

From f9995ac4c104c9df1577a8ac85bddff3c5eacd35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Thu, 22 Feb 2024 22:52:15 +0100
Subject: [PATCH 409/570] GH-31735: [Docs][Release] Move release verification
 guide to developers documentation (#39960)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

The current verification guide is obsolete and should not be on the wiki.

### What changes are included in this PR?

Adding a new page with the release verification information.

### Are these changes tested?

The documentation will be generated and will use the preview-docs job.

### Are there any user-facing changes?

Yes but not relevant in terms of code changes.
* Closes: #31735

Lead-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/README.md                                 |  19 ---
 dev/release/02-source-test.rb                 |   2 +-
 dev/release/02-source.sh                      |   2 +-
 dev/release/README.md                         |   6 +-
 dev/release/VERIFY.md                         | 113 --------------
 docs/source/developers/index.rst              |   8 +
 docs/source/developers/release.rst            |   2 +
 .../developers/release_verification.rst       | 144 ++++++++++++++++++
 8 files changed, 158 insertions(+), 138 deletions(-)
 delete mode 100644 dev/release/VERIFY.md
 create mode 100644 docs/source/developers/release_verification.rst

diff --git a/dev/README.md b/dev/README.md
index 24600a67db8c3..db9a10d527334 100644
--- a/dev/README.md
+++ b/dev/README.md
@@ -108,25 +108,6 @@ Status		closed
 URL		https://github.com/apache/arrow/issues/Y
 ```
 
-## Verifying Release Candidates
-
-We have provided a script to assist with verifying release candidates on Linux
-and macOS:
-
-```shell
-bash dev/release/verify-release-candidate.sh 0.7.0 0
-```
-
-Read the script and check the notes in dev/release for information about system 
-dependencies.
-
-On Windows, we have a script that verifies C++ and Python (requires Visual
-Studio 2015):
-
-```
-dev/release/verify-release-candidate.bat apache-arrow-0.7.0.tar.gz
-```
-
 # Integration testing
 
 Build the following base image used by multiple tests:
diff --git a/dev/release/02-source-test.rb b/dev/release/02-source-test.rb
index b9e6a8505b72b..149a2b27ac94a 100644
--- a/dev/release/02-source-test.rb
+++ b/dev/release/02-source-test.rb
@@ -166,7 +166,7 @@ def test_vote
 [10]: https://apache.jfrog.io/artifactory/arrow/python-rc/#{@release_version}-rc0
 [11]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/
 [12]: https://github.com/apache/arrow/blob/#{@current_commit}/CHANGELOG.md
-[13]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
+[13]: https://arrow.apache.org/docs/developers/release_verification.html
 [14]: #{verify_pr_url || "null"}
     VOTE
   end
diff --git a/dev/release/02-source.sh b/dev/release/02-source.sh
index a3441b23bf539..1bd3c0e19e04e 100755
--- a/dev/release/02-source.sh
+++ b/dev/release/02-source.sh
@@ -202,7 +202,7 @@ The vote will be open for at least 72 hours.
 [10]: https://apache.jfrog.io/artifactory/arrow/python-rc/${version}-rc${rc}
 [11]: https://apache.jfrog.io/artifactory/arrow/ubuntu-rc/
 [12]: https://github.com/apache/arrow/blob/${release_hash}/CHANGELOG.md
-[13]: https://cwiki.apache.org/confluence/display/ARROW/How+to+Verify+Release+Candidates
+[13]: https://arrow.apache.org/docs/developers/release_verification.html
 [14]: ${verify_pr_url}
 MAIL
   echo "---------------------------------------------------------"
diff --git a/dev/release/README.md b/dev/release/README.md
index e1ecdd4332292..ce1eb82d7eba3 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -19,8 +19,6 @@
 
 ## Release management scripts
 
-To learn more, see the project wiki:
+To learn more, see the project documentation:
 
-https://cwiki.apache.org/confluence/display/ARROW/Release+Management+Guide
-
-and [VERIFY.md](VERIFY.md)
+https://arrow.apache.org/docs/developers/release.html
diff --git a/dev/release/VERIFY.md b/dev/release/VERIFY.md
deleted file mode 100644
index 433e6fcb832fc..0000000000000
--- a/dev/release/VERIFY.md
+++ /dev/null
@@ -1,113 +0,0 @@
-<!---
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-
-# Verifying Arrow releases
-
-## Windows
-
-We've provided a convenience script for verifying the C++ and Python builds on
-Windows. Read the comments in `verify-release-candidate.bat` for instructions.
-
-## Linux and macOS
-
-We've provided a convenience script for verifying the C++, C#, C GLib, Go,
-Java, JavaScript, Ruby and Python builds on Linux and macOS. Read the script
-`verify-release-candidate.sh` for further information.
-
-### C GLib
-
-You need the followings to verify C GLib build:
-
-  * GLib
-  * GObject Introspection
-  * Ruby (not EOL-ed version is required)
-  * gobject-introspection gem
-  * test-unit gem
-
-You can install them by the followings on Debian GNU/Linux and Ubuntu:
-
-```console
-% sudo apt install -y -V libgirepository1.0-dev ruby-dev
-% sudo gem install gobject-introspection test-unit
-```
-
-You can install them by the followings on CentOS 7:
-
-```console
-% sudo yum install -y gobject-introspection-devel
-% git clone https://github.com/sstephenson/rbenv.git ~/.rbenv
-% git clone https://github.com/sstephenson/ruby-build.git ~/.rbenv/plugins/ruby-build
-% echo 'export PATH="$HOME/.rbenv/bin:$PATH"' >> ~/.bash_profile
-% echo 'eval "$(rbenv init -)"' >> ~/.bash_profile
-% exec ${SHELL} --login
-% sudo yum install -y gcc make patch openssl-devel readline-devel zlib-devel
-% rbenv install 2.4.2
-% rbenv global 2.4.2
-% gem install gobject-introspection test-unit
-```
-
-You can install them by the followings on macOS:
-
-```console
-% brew install -y gobject-introspection
-% gem install gobject-introspection test-unit
-```
-
-You need to set `PKG_CONFIG_PATH` to find libffi on macOS:
-
-```console
-% export PKG_CONFIG_PATH=$(brew --prefix libffi)/lib/pkgconfig:$PKG_CONFIG_PATH
-```
-
-### C++, C#, C GLib, Go, Java, JavaScript, Python, Ruby
-
-Example scripts to install the dependencies to run the verification
-script for verifying the source on Ubuntu 20.04, Rocky Linux 8 and
-AlmaLinux 8 are in this folder and named `setup-ubuntu.sh` and
-`setup-rhel-rebuilds.sh`. These can be adapted to different
-situations. Go and JavaScript are installed by the verification
-script in the testing environment. Verifying the apt and yum binaries
-additionally requires installation of Docker.
-
-When verifying the source, by default the verification script will try
-to verify all implementations and bindings. Should one of the
-verification tests fail, the script will exit before running the other
-tests. It can be helpful to repeat the failed test to see if it will
-complete, since failures can occur for problems such as slow or failed
-download of a dependency from the internet. It is possible to run
-specific verification tests by setting environment variables, for example
-
-```console
-% TEST_DEFAULT=0 TEST_SOURCE=1 dev/release/verify-release-candidate.sh 6.0.0 3
-% TEST_DEFAULT=0 TEST_BINARIES=1 dev/release/verify-release-candidate.sh 6.0.0 3
-% TEST_DEFAULT=0 TEST_GO=1 dev/release/verify-release-candidate.sh 6.0.0 3
-% TEST_DEFAULT=0 TEST_YUM=1 dev/release/verify-release-candidate.sh 6.0.0 3
-```
-
-It is also possible to use
-[Archery](https://arrow.apache.org/docs/developers/archery.html) to run
-the verification process in a container, for example
-
-```console
-% archery docker run -e VERIFY_VERSION=6.0.1 -e VERIFY_RC=1 almalinux-verify-rc-source
-% archery docker run -e VERIFY_VERSION=6.0.1 -e VERIFY_RC=1 ubuntu-verify-rc-source
-```
-
-To improve software quality, you are encouraged to verify
-on a variety of platforms.
diff --git a/docs/source/developers/index.rst b/docs/source/developers/index.rst
index 83dc556e1605a..fa63f66516e37 100644
--- a/docs/source/developers/index.rst
+++ b/docs/source/developers/index.rst
@@ -211,6 +211,13 @@ All participation in the Apache Arrow project is governed by the ASF's
 
    To learn about the detailed information on the steps followed to perform a release, see :ref:`release`.
 
+.. dropdown:: Release Verification Process
+   :animate: fade-in-slide-down
+   :class-title: sd-fs-5
+   :class-container: sd-shadow-none
+
+   To learn how to verify a release, see :ref:`release_verification`.
+
 .. toctree::
    :maxdepth: 2
    :hidden:
@@ -226,3 +233,4 @@ All participation in the Apache Arrow project is governed by the ASF's
    benchmarks
    documentation
    release
+   release_verification
diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst
index 0ff8e3a824ffc..1ecf747e36379 100644
--- a/docs/source/developers/release.rst
+++ b/docs/source/developers/release.rst
@@ -200,6 +200,8 @@ Verify the Release
     # on dev@arrow.apache.org. To regenerate the email template use
     SOURCE_DEFAULT=0 SOURCE_VOTE=1 dev/release/02-source.sh <version> <rc-number>
 
+See :ref:`release_verification` for details.
+
 Voting and approval
 ===================
 
diff --git a/docs/source/developers/release_verification.rst b/docs/source/developers/release_verification.rst
new file mode 100644
index 0000000000000..53c8f54e5b5bd
--- /dev/null
+++ b/docs/source/developers/release_verification.rst
@@ -0,0 +1,144 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _release_verification:
+
+============================
+Release Verification Process
+============================
+
+This page provides detailed information on the steps followed to perform
+a release verification on the major platforms.
+
+Principles
+==========
+
+The Apache Arrow Release Approval process follows the guidelines defined at the
+`Apache Software Foundation Release Approval <https://www.apache.org/legal/release-policy.html#release-approval>`_.
+
+For a release vote to pass, a minimum of three positive binding votes and more
+positive binding votes than negative binding votes MUST be cast.
+Releases may not be vetoed. Votes cast by PMC members are binding, however,
+non-binding votes are greatly encouraged and a sign of a healthy project.
+
+Running the release verification
+================================
+
+Linux and macOS
+---------------
+
+In order to run the verification script either for the source release or the
+binary artifacts see the following guidelines:
+
+.. code-block::
+
+   # this will create and automatically clean up a temporary directory for the verification environment and will run the source verification
+   TEST_DEFAULT=0 TEST_SOURCE=1 verify-release-candidate.sh $VERSION $RC_NUM
+   
+   # this will create and automatically clean up a temporary directory for the verification environment and will run the binary verification
+   TEST_DEFAULT=0 TEST_BINARIES=1 dev/release/verify-release-candidate.sh $VERSION $RC_NUM
+   
+   # to verify only certain implementations use the TEST_DEFAULT=0 and TEST_* variables
+   # here are a couple of examples, but see the source code for the available options
+   TEST_DEFAULT=0 TEST_CPP=1 verify-release-candidate.sh $VERSION $RC_NUM  # only C++ tests
+   TEST_DEFAULT=0 TEST_CPP=1 TEST_PYTHON=1 verify-release-candidate.sh $VERSION $RC_NUM  # C++ and Python tests
+   TEST_DEFAULT=0 TEST_INTEGRATION_CPP=1 TEST_INTEGRATION_JAVA=1 verify-release-candidate.sh $VERSION $RC_NUM  # C++ and Java integration tests
+   
+   # to verify certain binaries use the TEST_* variables as:
+   TEST_DEFAULT=0 TEST_WHEELS=1 verify-release-candidate.sh $VERSION $RC_NUM  # only Wheels
+   TEST_DEFAULT=0 TEST_APT=1 verify-release-candidate.sh $VERSION $RC_NUM  # only APT packages
+   TEST_DEFAULT=0 TEST_YUM=1 verify-release-candidate.sh $VERSION $RC_NUM  # only YUM packages
+   TEST_DEFAULT=0 TEST_JARS=1 verify-release-candidate.sh $VERSION $RC_NUM  # only JARS
+
+Windows
+-------
+
+In order to run the verification script on Windows you have to download
+the source tarball from the SVN dist system that you wish to verify:
+
+.. code-block::
+
+   dev\release\verify-release-candidate.bat %VERSION% %RC_NUM%
+
+System Configuration Instructions
+=================================
+
+You will need some tools installed like curl, git, etcetera.
+
+Ubuntu
+------
+
+You might have to install some packages on your system. The following
+utility script can be used to set your Ubuntu system. This wil install
+the required packages to perform a source verification on a clean
+Ubuntu:
+
+.. code-block::
+
+   # From the arrow clone
+   sudo dev/release/setup-ubuntu.sh
+
+macOS ARM
+---------
+
+.. code-block::
+
+   # From the arrow clone
+   brew install gpg
+   brew bundle --file=cpp/Brewfile
+   brew bundle --file=c_glib/Brewfile
+   brew uninstall node
+   # You might need to add node, ruby java and maven to the PATH, follow
+   # instructions from brew after installing.
+   brew install node@20
+   brew install ruby
+   brew install openjdk
+   brew install maven
+
+Windows 11
+----------
+
+To be defined
+
+Casting your vote
+=================
+
+Once you have performed the verification you can cast your vote by responding
+to the vote thread on dev@arrow.apache.org and supply your result.
+
+If the verification was successful you can send your +1 vote. We usually send
+along with the vote the command that was executed and the local versions used.
+As an example:
+
+.. code-block::
+   +1
+
+   I've verified successfully the sources and binaries with:
+
+   TEST_DEFAULT=0 TEST_SOURCE=1 dev/release/verify-release-candidate.sh 15.0.0 1
+   TEST_DEFAULT=0 TEST_BINARIES=1 dev/release/verify-release-candidate.sh 15.0.0 1
+   with:
+   * Python 3.10.12
+   * gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
+   * NVIDIA CUDA Build cuda_11.5.r11.5/compiler.30672275_0
+   * openjdk version "17.0.9" 2023-10-17
+   * ruby 3.0.2p107 (2021-07-07 revision 0db68f0233) [x86_64-linux-gnu]
+   * dotnet 7.0.115
+   * Ubuntu 22.04 LTS
+
+If there were some issues during verification please report them on the
+mail thread to diagnose the issue.

From b089c6a77bdf2e542a647105ec6bfc3221df85ce Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Thu, 22 Feb 2024 22:16:31 +0000
Subject: [PATCH 410/570] GH-40079: [CI][Packaging] Enable Azure in more tests
 and builds (#40080)

### Rationale for this change
We want python side tests of `AzureFileSystem` to run in CI.

### What changes are included in this PR?
- Add missing `export` to enable Azure pyarrow tests
- Enable azure in sdist tests.
- Enable Azure on macos python builds
- Enable azure in conda builds and install dependencies (Azure C++ SDK and azurite)
- Enable retries on C++ tests to mitigate https://github.com/apache/arrow/issues/40121

Probably all of this should have been included in https://github.com/apache/arrow/pull/39971

### Are these changes tested?
There is no new functionality to test

### Are there any user-facing changes?
No

* Closes: #40079
* GitHub Issue: #40079

Authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/python.yml    | 1 +
 ci/conda_env_cpp.txt            | 6 ++++++
 ci/docker/conda-cpp.dockerfile  | 8 ++++++++
 ci/scripts/cpp_test.sh          | 1 +
 ci/scripts/python_sdist_test.sh | 1 +
 ci/scripts/python_test.sh       | 1 +
 6 files changed, 18 insertions(+)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 6e3797b29c21e..25d918bcc25aa 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -132,6 +132,7 @@ jobs:
     timeout-minutes: 60
     env:
       ARROW_HOME: /usr/local
+      ARROW_AZURE: ON
       ARROW_DATASET: ON
       ARROW_FLIGHT: ON
       ARROW_GANDIVA: ON
diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt
index ef00f7cf4751c..b8c792008a958 100644
--- a/ci/conda_env_cpp.txt
+++ b/ci/conda_env_cpp.txt
@@ -16,6 +16,11 @@
 # under the License.
 
 aws-sdk-cpp=1.11.68
+azure-core-cpp>=1.10.3
+azure-identity-cpp>=1.6.0
+azure-storage-blobs-cpp>=12.10.0
+azure-storage-common-cpp>=12.5.0
+azure-storage-files-datalake-cpp>=12.9.0
 benchmark>=1.6.0
 boost-cpp>=1.68.0
 brotli
@@ -34,6 +39,7 @@ libutf8proc
 lz4-c
 make
 ninja
+nodejs
 orc
 pkg-config
 python
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index 7a54dcc86f8fa..dff1f2224809a 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -42,6 +42,13 @@ RUN mamba install -q -y \
         valgrind && \
     mamba clean --all
 
+# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to 
+# be on the path for the tests to run.  
+ENV PATH=/opt/conda/envs/arrow/bin:$PATH
+
+COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_azurite.sh
+
 # We want to install the GCS testbench using the same Python binary that the Conda code will use.
 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
 RUN /arrow/ci/scripts/install_gcs_testbench.sh default
@@ -50,6 +57,7 @@ COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
 ENV ARROW_ACERO=ON \
+    ARROW_AZURE=ON \
     ARROW_BUILD_TESTS=ON \
     ARROW_DATASET=ON \
     ARROW_DEPENDENCY_SOURCE=CONDA \
diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh
index 1d685c51a9326..a23ea8eb1cd34 100755
--- a/ci/scripts/cpp_test.sh
+++ b/ci/scripts/cpp_test.sh
@@ -86,6 +86,7 @@ ctest \
     --label-regex unittest \
     --output-on-failure \
     --parallel ${n_jobs} \
+    --repeat until-pass:3 \
     --timeout ${ARROW_CTEST_TIMEOUT:-300} \
     "${ctest_options[@]}" \
     "$@"
diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh
index d3c6f0e6ade89..1cd1000aa3903 100755
--- a/ci/scripts/python_sdist_test.sh
+++ b/ci/scripts/python_sdist_test.sh
@@ -28,6 +28,7 @@ export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
 export PYARROW_WITH_ACERO=${ARROW_ACERO:-ON}
+export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF}
 export PYARROW_WITH_S3=${ARROW_S3:-OFF}
 export PYARROW_WITH_ORC=${ARROW_ORC:-OFF}
 export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 20ca3300c0538..7b803518494ee 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -52,6 +52,7 @@ fi
 : ${PYARROW_TEST_S3:=${ARROW_S3:-ON}}
 
 export PYARROW_TEST_ACERO
+export PYARROW_TEST_AZURE
 export PYARROW_TEST_CUDA
 export PYARROW_TEST_DATASET
 export PYARROW_TEST_FLIGHT

From aceb51a0cedd026daeed43a905ef538bd7cbd30d Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Fri, 23 Feb 2024 03:08:11 +0100
Subject: [PATCH 411/570] GH-40190: [R][Docs] Update NEWS.md with build system
 changes (#40191)

This ports some news entries written for the CRAN branch into main.
* GitHub Issue: #40190

Authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 r/NEWS.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/r/NEWS.md b/r/NEWS.md
index 58c82c5128b82..06c49c7be006f 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -50,6 +50,17 @@
 * A large number of minor spelling mistakes were fixed (@jsoref, #38929, #38257)
 * The developer documentation has been updated to match changes made in recent releases (#38220)
 
+# arrow 14.0.2.1
+
+##  Minor improvements and fixes
+
+* Check for internet access when building from source and fallback to a
+  minimally scoped Arrow C++ build (#39699).
+* Build from source by default on macOS, use `LIBARROW_BINARY=true` for old behavior (#39861).
+* Support building against older versions of Arrow C++. This is currently opt-in
+  (`ARROW_R_ALLOW_CPP_VERSION_MISMATCH=true`) and requires atleast Arrow C++ 13.0.0 (#39739).
+* Make it possible to use Arrow C++ from Rtools on windows (in future Rtools versions). (#39986).
+
 # arrow 14.0.2
 
 ##  Minor improvements and fixes

From 5f75dbf13207b1ec6f4d922c37b9f8bffbd9c3fe Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Fri, 23 Feb 2024 11:12:51 +0800
Subject: [PATCH 412/570] GH-39965: [C++] DatasetWriter avoid creating
 zero-sized batch when `max_rows_per_file` enabled (#39995)

### Rationale for this change

`DatasetWriter` might create empty `RecordBatch` when `max_rows_per_file` enabled. This is because `NextWritableChunk` might return a zero-sized batch when the file exactly contains the dest data.

### What changes are included in this PR?

Check batch-size == 0 when append to file queue

### Are these changes tested?

Yes

### Are there any user-facing changes?

User can avoid zero-sized row-group/batch.

* Closes: #39965

Authored-by: mwish <maplewish117@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 cpp/src/arrow/dataset/dataset_writer.cc      | 11 +++++++-
 cpp/src/arrow/dataset/dataset_writer_test.cc | 28 ++++++++++++++------
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/cpp/src/arrow/dataset/dataset_writer.cc b/cpp/src/arrow/dataset/dataset_writer.cc
index ae9fb36484bb6..34731d19ab3eb 100644
--- a/cpp/src/arrow/dataset/dataset_writer.cc
+++ b/cpp/src/arrow/dataset/dataset_writer.cc
@@ -610,7 +610,16 @@ class DatasetWriter::DatasetWriterImpl {
       bool will_open_file = false;
       ARROW_ASSIGN_OR_RAISE(auto next_chunk, dir_queue->NextWritableChunk(
                                                  batch, &remainder, &will_open_file));
-
+      // GH-39965: `NextWritableChunk` may return an empty batch to signal
+      // that the current file has reached `max_rows_per_file` and should be
+      // finished.
+      if (next_chunk->num_rows() == 0) {
+        batch = std::move(remainder);
+        if (batch) {
+          RETURN_NOT_OK(dir_queue->FinishCurrentFile());
+        }
+        continue;
+      }
       backpressure =
           writer_state_.rows_in_flight_throttle.Acquire(next_chunk->num_rows());
       if (!backpressure.is_finished()) {
diff --git a/cpp/src/arrow/dataset/dataset_writer_test.cc b/cpp/src/arrow/dataset/dataset_writer_test.cc
index 1ac0ec3f39e97..871b6ef6f5507 100644
--- a/cpp/src/arrow/dataset/dataset_writer_test.cc
+++ b/cpp/src/arrow/dataset/dataset_writer_test.cc
@@ -189,8 +189,7 @@ class DatasetWriterTestFixture : public testing::Test {
     }
   }
 
-  void AssertCreatedData(const std::vector<ExpectedFile>& expected_files,
-                         bool check_num_record_batches = true) {
+  void AssertCreatedData(const std::vector<ExpectedFile>& expected_files) {
     counter_ = 0;
     for (const auto& expected_file : expected_files) {
       std::optional<MockFileInfo> written_file = FindFile(expected_file.filename);
@@ -198,9 +197,7 @@ class DatasetWriterTestFixture : public testing::Test {
       int num_batches = 0;
       AssertBatchesEqual(*MakeBatch(expected_file.start, expected_file.num_rows),
                          *ReadAsBatch(written_file->data, &num_batches));
-      if (check_num_record_batches) {
-        ASSERT_EQ(expected_file.num_record_batches, num_batches);
-      }
+      ASSERT_EQ(expected_file.num_record_batches, num_batches);
     }
   }
 
@@ -299,9 +296,7 @@ TEST_F(DatasetWriterTestFixture, MaxRowsOneWriteBackpresure) {
     expected_files.emplace_back("testdir/chunk-" + std::to_string(i) + ".arrow",
                                 kFileSizeLimit * i, kFileSizeLimit);
   }
-  // Not checking the number of record batches because file may contain the
-  // zero-length record batch.
-  AssertCreatedData(expected_files, /*check_num_record_batches=*/false);
+  AssertCreatedData(expected_files);
 }
 
 TEST_F(DatasetWriterTestFixture, MaxRowsOneWriteWithFunctor) {
@@ -348,6 +343,23 @@ TEST_F(DatasetWriterTestFixture, MaxRowsManyWrites) {
       {{"testdir/chunk-0.arrow", 0, 10, 4}, {"testdir/chunk-1.arrow", 10, 8, 3}});
 }
 
+TEST_F(DatasetWriterTestFixture, NotProduceZeroSizedBatch) {
+  // GH-39965: avoid creating zero-sized batch when max_rows_per_file enabled.
+  write_options_.max_rows_per_file = 10;
+  write_options_.max_rows_per_group = 10;
+  auto dataset_writer = MakeDatasetWriter();
+  dataset_writer->WriteRecordBatch(MakeBatch(20), "");
+  dataset_writer->WriteRecordBatch(MakeBatch(20), "");
+  EndWriterChecked(dataset_writer.get());
+  AssertCreatedData({
+      {"testdir/chunk-0.arrow", 0, 10, 1},
+      {"testdir/chunk-1.arrow", 10, 10, 1},
+      {"testdir/chunk-2.arrow", 20, 10, 1},
+      {"testdir/chunk-3.arrow", 30, 10, 1},
+  });
+  AssertNotFiles({"testdir/chunk-4.arrow"});
+}
+
 TEST_F(DatasetWriterTestFixture, MinRowGroup) {
   write_options_.min_rows_per_group = 20;
   auto dataset_writer = MakeDatasetWriter();

From 65c2b46c83da0f82f4cf6531cde938edb0327f49 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 23 Feb 2024 09:36:45 -0600
Subject: [PATCH 413/570] GH-40199: [R] dbplyr 2.5.0 forward compatibility
 (#40197)

I'm not sure why this worked before, but 2.5.0 got a little stricter and now warns about this minor infelicity.

I _think_ this is the only issue with 2.5.0, but if there's any way you could run your CI checks with dev dbplyr I'd really appreciate it!

* GitHub Issue: #40199

Authored-by: Hadley Wickham <h.wickham@gmail.com>
Signed-off-by: Dewey Dunnington <dewey@voltrondata.com>
---
 r/R/duckdb.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/R/duckdb.R b/r/R/duckdb.R
index 9632e9bad1984..a2bf62de2fde2 100644
--- a/r/R/duckdb.R
+++ b/r/R/duckdb.R
@@ -64,7 +64,7 @@ to_duckdb <- function(.data,
   tbl <- dplyr::tbl(con, table_name)
   groups <- dplyr::groups(.data)
   if (length(groups)) {
-    tbl <- dplyr::group_by(tbl, groups)
+    tbl <- dplyr::group_by(tbl, !!!groups)
   }
 
   if (auto_disconnect) {

From 036a22eaff16165dbb8ddbcf4156766079ec7577 Mon Sep 17 00:00:00 2001
From: Miguel Pragier <miguelpragier@gmail.com>
Date: Fri, 23 Feb 2024 19:09:39 +0100
Subject: [PATCH 414/570] GH-40089: [Go] Concurrent Recordset for receiving
 huge recordset (#40090)

### Rationale for this change

Enabling Support for Large Recordsets in Go FlightSQL Driver
Replacing **download-all-at-once->read-later** with **download-chunk-as-reading** approach.

The primary motivation for these changes is to enhance the driver's capability to handle large recordsets without the need for unnecessary memory pre-allocations. By implementing a concurrent streaming approach, the driver avoids loading the entire dataset into memory at once.

### Description:
Implementing Concurrent Record Streaming to Better Support the Handling of Large Recordsets.

For retrieving a recordset, the current implementation works as follows:
- An SQL query results in a set of [endpoints] and a query ticket.
- Each [endpoint] is requested (with the generated ticket), and its response is a [reader].
- Each reader is iterated for records. These records are, in fact, arrays of rows.
- All the retrieved rows are stored at once in an array.
- This means that data, potentially comprising billions of rows, is synchronously read into an array.
- After this array is filled, it is then returned, all at once, to the consumer.
- This can result in out-of-memory failures, or at the very least, unnecessary waiting times and huge pre-allocations.

### Proposed Changes:
Iterate over [endpoints], [readers], and [records] ad hoc, reading only the necessary data according to consumer demand.

### What changes are included in this PR?

**1. Introduction of `sync.Mutex`:**
   - The `Rows` struct has been updated to include a `currentRecordMux` mutex. This addition ensures that operations involving the release of the current record are thread-safe, thus preventing potential race conditions in a concurrent environment.

**2. Channels for Asynchronous Record Fetching:**
   - A new buffered channel, `recordChan`, has been added to the `Rows` struct. This channel permits the driver to asynchronously fetch and queue records. It provides a non-blocking mechanism to manage incoming records, which is particularly advantageous when dealing with large recordsets.

**3. Asynchronous Record Streaming via Goroutines:**
   - The `streamRecordset` function has been introduced and is designed to run concurrently using goroutines. This modification permits the driver to begin processing records as soon as they are received, without having to wait for the entire recordset to be loaded into memory.

**4. Improved Record Management:**
   - A new method, `releaseRecord`, has been created to manage the lifecycle of the current record. This method ensures that resources are released when a record is no longer needed, thus reducing the memory footprint when processing large datasets.

**5. Refactoring of the `Next` Method:**
   - The `Next` method in the `Rows` struct has been refactored to suit the new streaming model. It now efficiently waits for and retrieves the next available record from the `recordChan` channel, enabling a smooth and memory-efficient iteration over large datasets.

### Are These Changes Tested?
The proposed changes have been validated against existing tests.

### Are There Any User-Facing Changes?
No, there are no user-facing changes.

* Closes: #40089

Lead-authored-by: miguel pragier <miguel.pragier@ebnerstolz.de>
Co-authored-by: Miguel Pragier <miguelpragier@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/flight/flightsql/driver/driver.go    | 205 ++--
 .../flight/flightsql/driver/driver_test.go    | 937 ++++++++++++++++++
 2 files changed, 1076 insertions(+), 66 deletions(-)

diff --git a/go/arrow/flight/flightsql/driver/driver.go b/go/arrow/flight/flightsql/driver/driver.go
index 65068048ab3d8..ddd75df381e44 100644
--- a/go/arrow/flight/flightsql/driver/driver.go
+++ b/go/arrow/flight/flightsql/driver/driver.go
@@ -23,6 +23,7 @@ import (
 	"fmt"
 	"io"
 	"sort"
+	"sync"
 	"time"
 
 	"github.com/apache/arrow/go/v16/arrow"
@@ -36,36 +37,77 @@ import (
 	"google.golang.org/grpc/credentials/insecure"
 )
 
+const recordChanBufferSizeDefault = 1
+
 type Rows struct {
-	schema        *arrow.Schema
-	records       []arrow.Record
-	currentRecord int
-	currentRow    int
+	// schema stores the row schema, like column names.
+	schema *arrow.Schema
+	// recordChan enables async reading from server, while client interates.
+	recordChan chan arrow.Record
+	// currentRecord stores a record with n>=0 rows.
+	currentRecord arrow.Record
+	// currentRow tracks the position (row) within currentRecord.
+	currentRow uint64
+	// initializedChan prevents the row being used before properly initialized.
+	initializedChan chan bool
+	// streamError stores the error that interrupted streaming.
+	streamError    error
+	streamErrorMux sync.RWMutex
+	// ctxCancelFunc when called, triggers the streaming cancelation.
+	ctxCancelFunc context.CancelFunc
+}
+
+func newRows() *Rows {
+	return &Rows{
+		recordChan:      make(chan arrow.Record, recordChanBufferSizeDefault),
+		initializedChan: make(chan bool),
+	}
+}
+
+func (r *Rows) setStreamError(err error) {
+	r.streamErrorMux.Lock()
+	defer r.streamErrorMux.Unlock()
+
+	r.streamError = err
+}
+
+func (r *Rows) getStreamError() error {
+	r.streamErrorMux.RLock()
+	defer r.streamErrorMux.RUnlock()
+
+	return r.streamError
 }
 
 // Columns returns the names of the columns.
 func (r *Rows) Columns() []string {
-	if len(r.records) == 0 {
+	if r.schema == nil {
 		return nil
 	}
 
-	// All records have the same columns
-	var cols []string
-	for _, c := range r.schema.Fields() {
-		cols = append(cols, c.Name)
+	// All records have the same columns.
+	cols := make([]string, len(r.schema.Fields()))
+	for i, c := range r.schema.Fields() {
+		cols[i] = c.Name
 	}
 
 	return cols
 }
 
+func (r *Rows) releaseRecord() {
+	if r.currentRecord != nil {
+		r.currentRecord.Release()
+		r.currentRecord = nil
+	}
+}
+
 // Close closes the rows iterator.
 func (r *Rows) Close() error {
-	for _, rec := range r.records {
-		rec.Release()
-	}
-	r.currentRecord = 0
+	r.ctxCancelFunc() // interrupting data streaming.
+
 	r.currentRow = 0
 
+	r.releaseRecord()
+
 	return nil
 }
 
@@ -79,28 +121,37 @@ func (r *Rows) Close() error {
 // should be taken when closing Rows not to modify
 // a buffer held in dest.
 func (r *Rows) Next(dest []driver.Value) error {
-	if r.currentRecord >= len(r.records) {
-		return io.EOF
-	}
-	record := r.records[r.currentRecord]
+	if r.currentRecord == nil || int64(r.currentRow) >= r.currentRecord.NumRows() {
+		if err := r.getStreamError(); err != nil {
+			return err
+		}
+
+		r.releaseRecord()
+
+		// Get the next record from the channel
+		var ok bool
+		if r.currentRecord, ok = <-r.recordChan; !ok {
+			return io.EOF // Channel closed, no more records
+		}
 
-	if int64(r.currentRow) >= record.NumRows() {
-		return ErrOutOfRange
+		r.currentRow = 0
+
+		// safety double-check
+		if r.currentRecord == nil || int64(r.currentRow) >= r.currentRecord.NumRows() {
+			return io.EOF // Channel closed, no more records
+		}
 	}
 
-	for i, arr := range record.Columns() {
-		v, err := fromArrowType(arr, r.currentRow)
+	for i, col := range r.currentRecord.Columns() {
+		v, err := fromArrowType(col, int(r.currentRow))
 		if err != nil {
 			return err
 		}
+
 		dest[i] = v
 	}
 
 	r.currentRow++
-	if int64(r.currentRow) >= record.NumRows() {
-		r.currentRecord++
-		r.currentRow = 0
-	}
 
 	return nil
 }
@@ -226,19 +277,14 @@ func (s *Stmt) QueryContext(ctx context.Context, args []driver.NamedValue) (driv
 		return nil, err
 	}
 
-	rows := Rows{}
-	for _, endpoint := range info.Endpoint {
-		schema, records, err := readEndpoint(ctx, s.client, endpoint)
-		if err != nil {
-			return &rows, err
-		}
-		if rows.schema == nil {
-			rows.schema = schema
-		}
-		rows.records = append(rows.records, records...)
-	}
+	rows := newRows()
+	ctx, rows.ctxCancelFunc = context.WithCancel(ctx)
+
+	go rows.streamRecordset(ctx, s.client, info.Endpoint)
 
-	return &rows, nil
+	<-rows.initializedChan // waits the rows proper initialization.
+
+	return rows, nil
 }
 
 func (s *Stmt) setParameters(args []driver.NamedValue) error {
@@ -462,43 +508,70 @@ func (c *Connection) QueryContext(ctx context.Context, query string, args []driv
 		return nil, err
 	}
 
-	rows := Rows{}
-	for _, endpoint := range info.Endpoint {
-		schema, records, err := readEndpoint(ctx, c.client, endpoint)
-		if err != nil {
-			return &rows, err
-		}
-		if rows.schema == nil {
-			rows.schema = schema
-		}
-		rows.records = append(rows.records, records...)
-	}
+	rows := newRows()
+	ctx, rows.ctxCancelFunc = context.WithCancel(ctx)
 
-	return &rows, nil
+	go rows.streamRecordset(ctx, c.client, info.Endpoint)
 
+	<-rows.initializedChan // waits the rows proper initialization.
+
+	return rows, nil
 }
 
-func readEndpoint(ctx context.Context, client *flightsql.Client, endpoint *flight.FlightEndpoint) (*arrow.Schema, []arrow.Record, error) {
-	reader, err := client.DoGet(ctx, endpoint.GetTicket())
-	if err != nil {
-		return nil, nil, fmt.Errorf("getting ticket failed: %w", err)
-	}
-	defer reader.Release()
+func (r *Rows) streamRecordset(ctx context.Context, c *flightsql.Client, endpoints []*flight.FlightEndpoint) {
+	defer close(r.recordChan)
+
+	// initializeOnceOnly ensures the {r.initializedChan} is valued once only, preventing a deadlock.
+	initializeOnceOnly := &sync.Once{}
+
+	defer func() { // in case of error, init anyway.
+		initializeOnceOnly.Do(func() { r.initializedChan <- true })
+	}()
 
-	schema := reader.Schema()
-	var records []arrow.Record
-	for reader.Next() {
-		if record := reader.Record(); record.NumRows() > 0 {
-			record.Retain()
-			records = append(records, record)
+	// reads each endpoint.
+	for _, endpoint := range endpoints {
+		if ctx.Err() != nil {
+			r.setStreamError(fmt.Errorf("recordset streaming interrupted by context error: %w", ctx.Err()))
+			return
 		}
-	}
 
-	if err := reader.Err(); err != nil && !errors.Is(err, io.EOF) {
-		return nil, nil, err
-	}
+		func() { // with a func() is possible to {defer reader.Release()}.
+			reader, err := c.DoGet(ctx, endpoint.GetTicket())
+			if err != nil {
+				r.setStreamError(fmt.Errorf("getting ticket failed: %w", err))
+				return
+			}
+
+			defer reader.Release()
+
+			r.schema = reader.Schema()
+
+			// reads each record into a blocking channel
+			for reader.Next() {
+				if ctx.Err() != nil {
+					r.setStreamError(fmt.Errorf("recordset streaming interrupted by context error: %w", ctx.Err()))
+					return
+				}
 
-	return schema, records, nil
+				record := reader.Record()
+				record.Retain()
+
+				if record.NumRows() < 1 {
+					record.Release()
+					continue
+				}
+
+				r.recordChan <- record
+
+				go initializeOnceOnly.Do(func() { r.initializedChan <- true })
+			}
+
+			if err := reader.Err(); err != nil && !errors.Is(err, io.EOF) {
+				r.setStreamError(err)
+				return
+			}
+		}()
+	}
 }
 
 // Close invalidates and potentially stops any current
diff --git a/go/arrow/flight/flightsql/driver/driver_test.go b/go/arrow/flight/flightsql/driver/driver_test.go
index 58193626f51dc..79955f6099f8a 100644
--- a/go/arrow/flight/flightsql/driver/driver_test.go
+++ b/go/arrow/flight/flightsql/driver/driver_test.go
@@ -24,6 +24,7 @@ import (
 	"database/sql"
 	"errors"
 	"fmt"
+	"math/rand"
 	"os"
 	"strings"
 	"sync"
@@ -381,6 +382,928 @@ func (s *SqlTestSuite) TestPreparedQuery() {
 	wg.Wait()
 }
 
+// TestRowsManualPrematureClose tests concurrent rows implementation for closing right after loading.
+// Is expected that rows' internal engine update its status, preventing errors and inconsistent further operations.
+func (s *SqlTestSuite) TestRowsManualPrematureClose() {
+	t := s.T()
+
+	// Create and start the server
+	server, addr, err := s.createServer()
+	require.NoError(t, err)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go func() {
+		defer wg.Done()
+		require.NoError(s.T(), s.startServer(server))
+	}()
+
+	defer s.stopServer(server)
+
+	time.Sleep(100 * time.Millisecond)
+
+	// Configure client
+	cfg := s.Config
+	cfg.Address = addr
+
+	db, err := sql.Open("flightsql", cfg.DSN())
+	require.NoError(t, err)
+
+	defer db.Close()
+
+	// Create the table
+	const tableName = `TestRowsManualPrematureClose`
+	const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);`
+
+	_, err = db.Exec(ddlCreateTable)
+	require.NoError(t, err)
+
+	// generate data enough for chunked concurrent test:
+	const rowCount int = 6000
+	const randStringLen = 250
+	const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES `
+
+	gen := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	var sb strings.Builder
+	sb.WriteString(sqlInsert)
+
+	for i := 0; i < rowCount; i++ {
+		sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int()))
+	}
+
+	insertQuery := strings.TrimSuffix(sb.String(), ",")
+
+	rs, err := db.Exec(insertQuery)
+	require.NoError(t, err)
+
+	insertedRows, err := rs.RowsAffected()
+	require.NoError(t, err)
+	require.Equal(t, int64(rowCount), insertedRows)
+
+	// Do query
+	const sqlSelectAll = `SELECT id, name, value FROM ` + tableName
+
+	rows, err := db.QueryContext(context.TODO(), sqlSelectAll)
+	require.NoError(t, err)
+	require.NotNil(t, rows)
+	require.NoError(t, rows.Err())
+
+	// Close Rows normally
+	require.NoError(t, rows.Close())
+
+	require.False(t, rows.Next())
+
+	// Safe double-closing
+	require.NoError(t, rows.Close())
+
+	// Columns() should return an error after rows.Close() (sql: Rows are closed)
+	columns, err := rows.Columns()
+	require.Error(t, err)
+	require.Empty(t, columns)
+
+	// Tear-down server
+	s.stopServer(server)
+	wg.Wait()
+}
+
+// TestRowsNormalExhaustion tests concurrent rows implementation for normal query/netx/close operation
+func (s *SqlTestSuite) TestRowsNormalExhaustion() {
+	t := s.T()
+
+	// Create and start the server
+	server, addr, err := s.createServer()
+	require.NoError(t, err)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go func() {
+		defer wg.Done()
+		require.NoError(s.T(), s.startServer(server))
+	}()
+
+	defer s.stopServer(server)
+
+	time.Sleep(100 * time.Millisecond)
+
+	// Configure client
+	cfg := s.Config
+	cfg.Address = addr
+
+	db, err := sql.Open("flightsql", cfg.DSN())
+	require.NoError(t, err)
+
+	defer db.Close()
+
+	// Create the table
+	const tableName = `TestRowsNormalExhaustion`
+	const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);`
+
+	_, err = db.Exec(ddlCreateTable)
+	require.NoError(t, err)
+
+	// generate data enough for chunked concurrent test:
+	const rowCount int = 6000
+	const randStringLen = 250
+	const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES `
+
+	gen := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	var sb strings.Builder
+	sb.WriteString(sqlInsert)
+
+	for i := 0; i < rowCount; i++ {
+		sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int()))
+	}
+
+	insertQuery := strings.TrimSuffix(sb.String(), ",")
+
+	rs, err := db.Exec(insertQuery)
+	require.NoError(t, err)
+
+	insertedRows, err := rs.RowsAffected()
+	require.NoError(t, err)
+	require.Equal(t, int64(rowCount), insertedRows)
+
+	// Do Query
+	const sqlSelectAll = `SELECT id, name, value FROM ` + tableName
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	defer cancel()
+
+	rows, err := db.QueryContext(ctx, sqlSelectAll)
+	require.NoError(t, err)
+	require.NotNil(t, rows)
+	require.NoError(t, rows.Err())
+
+	var (
+		actualCount = 0
+		xid,
+		xvalue int
+		xname string
+	)
+
+	for rows.Next() {
+		require.NoError(t, rows.Scan(&xid, &xname, &xvalue))
+		actualCount++
+	}
+
+	require.Equal(t, rowCount, actualCount)
+	require.NoError(t, rows.Close())
+
+	// Tear-down server
+	s.stopServer(server)
+	wg.Wait()
+}
+
+// TestRowsPrematureCloseDuringNextLoop ensures that:
+// - closing during Next() loop doesn't trigger concurrency errors.
+// - the interation is properly/promptly interrupted.
+func (s *SqlTestSuite) TestRowsPrematureCloseDuringNextLoop() {
+	t := s.T()
+
+	// Create and start the server.
+	server, addr, err := s.createServer()
+	require.NoError(t, err)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go func() {
+		defer wg.Done()
+		require.NoError(s.T(), s.startServer(server))
+	}()
+
+	defer s.stopServer(server)
+
+	time.Sleep(100 * time.Millisecond)
+
+	// Configure client
+	cfg := s.Config
+	cfg.Address = addr
+
+	db, err := sql.Open("flightsql", cfg.DSN())
+	require.NoError(t, err)
+
+	defer db.Close()
+
+	// Create the table.
+	const tableName = `TestRowsPrematureCloseDuringNextLoop`
+	const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);`
+
+	_, err = db.Exec(ddlCreateTable)
+	require.NoError(t, err)
+
+	// generate data enough for chunked concurrent test:
+	const rowCount = 6000
+	const randStringLen = 250
+	const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES `
+
+	gen := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	var sb strings.Builder
+	sb.WriteString(sqlInsert)
+
+	for i := 0; i < rowCount; i++ {
+		sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int()))
+	}
+
+	insertQuery := strings.TrimSuffix(sb.String(), ",")
+
+	rs, err := db.Exec(insertQuery)
+	require.NoError(t, err)
+
+	insertedRows, err := rs.RowsAffected()
+	require.NoError(t, err)
+	require.Equal(t, int64(rowCount), insertedRows)
+
+	// Do query
+	const sqlSelectAll = `SELECT id, name, value FROM ` + tableName
+
+	rows, err := db.QueryContext(context.TODO(), sqlSelectAll)
+	require.NoError(t, err)
+	require.NotNil(t, rows)
+	require.NoError(t, rows.Err())
+
+	const closeAfterNRows = 10
+	var (
+		i,
+		xid,
+		xvalue int
+		xname string
+	)
+
+	for rows.Next() {
+		err = rows.Scan(&xid, &xname, &xvalue)
+		require.NoError(t, err)
+
+		i++
+		if i >= closeAfterNRows {
+			require.NoError(t, rows.Close())
+		}
+	}
+
+	require.Equal(t, closeAfterNRows, i)
+
+	// Tear-down server
+	s.stopServer(server)
+	wg.Wait()
+}
+
+// TestRowsInterruptionByContextManualCancellation cancels the context before it starts retrieving rows.Next().
+// it gives time for cancellation propagation, and ensures that no further data was retrieved.
+func (s *SqlTestSuite) TestRowsInterruptionByContextManualCancellation() {
+	t := s.T()
+
+	// Create and start the server
+	server, addr, err := s.createServer()
+	require.NoError(t, err)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go func() {
+		defer wg.Done()
+		require.NoError(s.T(), s.startServer(server))
+	}()
+
+	defer s.stopServer(server)
+
+	time.Sleep(100 * time.Millisecond)
+
+	// Configure client
+	cfg := s.Config
+	cfg.Address = addr
+
+	db, err := sql.Open("flightsql", cfg.DSN())
+	require.NoError(t, err)
+
+	defer db.Close()
+
+	// Create the table
+	const tableName = `TestRowsInterruptionByContextManualCancellation`
+	const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);`
+
+	_, err = db.Exec(ddlCreateTable)
+	require.NoError(t, err)
+
+	// generate data enough for chunked concurrent test:
+	const rowCount = 6000
+	const randStringLen = 250
+	const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES `
+
+	gen := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	var sb strings.Builder
+	sb.WriteString(sqlInsert)
+
+	for i := 0; i < rowCount; i++ {
+		sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int()))
+	}
+
+	insertQuery := strings.TrimSuffix(sb.String(), ",")
+
+	rs, err := db.Exec(insertQuery)
+	require.NoError(t, err)
+
+	insertedRows, err := rs.RowsAffected()
+	require.NoError(t, err)
+	require.Equal(t, int64(rowCount), insertedRows)
+
+	// Do query
+	const sqlSelectAll = `SELECT id, name, value FROM ` + tableName
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	defer cancel()
+
+	rows, err := db.QueryContext(ctx, sqlSelectAll)
+	require.NoError(t, err)
+	require.NotNil(t, rows)
+	require.NoError(t, rows.Err())
+
+	defer rows.Close()
+
+	go cancel()
+
+	time.Sleep(100 * time.Millisecond)
+
+	count := 0
+	for rows.Next() {
+		count++
+	}
+
+	require.Zero(t, count)
+
+	// Tear-down server
+	s.stopServer(server)
+	wg.Wait()
+}
+
+// TestRowsInterruptionByContextTimeout forces a timeout, and ensures no further data is retrieved after that.
+func (s *SqlTestSuite) TestRowsInterruptionByContextTimeout() {
+	t := s.T()
+
+	// Create and start the server
+	server, addr, err := s.createServer()
+	require.NoError(t, err)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go func() {
+		defer wg.Done()
+		require.NoError(s.T(), s.startServer(server))
+	}()
+
+	defer s.stopServer(server)
+
+	time.Sleep(100 * time.Millisecond)
+
+	// Configure client
+	cfg := s.Config
+	cfg.Address = addr
+
+	db, err := sql.Open("flightsql", cfg.DSN())
+	require.NoError(t, err)
+
+	defer db.Close()
+
+	// Create the table
+	const tableName = `TestRowsInterruptionByContextTimeout`
+	const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);`
+
+	_, err = db.Exec(ddlCreateTable)
+	require.NoError(t, err)
+
+	// generate data enough for chunked concurrent test:
+	const rowCount = 6000
+	const randStringLen = 250
+	const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES `
+
+	gen := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	var sb strings.Builder
+	sb.WriteString(sqlInsert)
+
+	for i := 0; i < rowCount; i++ {
+		sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int()))
+	}
+
+	insertQuery := strings.TrimSuffix(sb.String(), ",")
+
+	rs, err := db.Exec(insertQuery)
+	require.NoError(t, err)
+
+	insertedRows, err := rs.RowsAffected()
+	require.NoError(t, err)
+	require.Equal(t, int64(rowCount), insertedRows)
+
+	// Do query
+	const (
+		timeout      = 1500 * time.Millisecond
+		sqlSelectAll = `SELECT id, name, value FROM ` + tableName
+	)
+
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+
+	rows, err := db.QueryContext(ctx, sqlSelectAll)
+	require.NoError(t, err)
+	require.NotNil(t, rows)
+	require.NoError(t, rows.Err())
+
+	defer rows.Close()
+
+	// eventually, after time.Sleep(), the context will be cancelled.
+	// then, rows.Next() should return false, and <-ctx.Done() will never be tested.
+	for rows.Next() {
+		select {
+		case <-ctx.Done():
+			t.Fatal("cancellation didn't prevent more records to be read")
+		default:
+			time.Sleep(time.Second)
+		}
+	}
+
+	// Tear-down server
+	s.stopServer(server)
+	wg.Wait()
+}
+
+// TestRowsManualPrematureCloseStmt tests concurrent rows implementation for closing right after loading.
+// Is expected that rows' internal engine update its status, preventing errors and inconsistent further operations.
+func (s *SqlTestSuite) TestRowsManualPrematureCloseStmt() {
+	t := s.T()
+
+	// Create and start the server
+	server, addr, err := s.createServer()
+	require.NoError(t, err)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go func() {
+		defer wg.Done()
+		require.NoError(s.T(), s.startServer(server))
+	}()
+
+	defer s.stopServer(server)
+
+	time.Sleep(100 * time.Millisecond)
+
+	// Configure client
+	cfg := s.Config
+	cfg.Address = addr
+
+	db, err := sql.Open("flightsql", cfg.DSN())
+	require.NoError(t, err)
+
+	defer db.Close()
+
+	// Create the table
+	const tableName = `TestRowsManualPrematureCloseStmt`
+	const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);`
+
+	_, err = db.Exec(ddlCreateTable)
+	require.NoError(t, err)
+
+	// generate data enough for chunked concurrent test:
+	const rowCount int = 6000
+	const randStringLen = 250
+	const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES `
+
+	gen := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	var sb strings.Builder
+	sb.WriteString(sqlInsert)
+
+	for i := 0; i < rowCount; i++ {
+		sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int()))
+	}
+
+	insertQuery := strings.TrimSuffix(sb.String(), ",")
+
+	rs, err := db.Exec(insertQuery)
+	require.NoError(t, err)
+
+	insertedRows, err := rs.RowsAffected()
+	require.NoError(t, err)
+	require.Equal(t, int64(rowCount), insertedRows)
+
+	// Do query
+	const sqlSelectAll = `SELECT id, name, value FROM ` + tableName
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	defer cancel()
+
+	stmt, err := db.PrepareContext(ctx, sqlSelectAll)
+	require.NoError(t, err)
+
+	rows, err := stmt.QueryContext(ctx)
+	require.NoError(t, err)
+	require.NotNil(t, rows)
+	require.NoError(t, rows.Err())
+
+	// Close Rows normally
+	require.NoError(t, rows.Close())
+
+	require.False(t, rows.Next())
+
+	// Safe double-closing
+	require.NoError(t, rows.Close())
+
+	// Columns() should return an error after rows.Close() (sql: Rows are closed)
+	columns, err := rows.Columns()
+	require.Error(t, err)
+	require.Empty(t, columns)
+
+	// Tear-down server
+	s.stopServer(server)
+	wg.Wait()
+}
+
+// TestRowsNormalExhaustionStmt tests concurrent rows implementation for normal query/netx/close operation
+func (s *SqlTestSuite) TestRowsNormalExhaustionStmt() {
+	t := s.T()
+
+	// Create and start the server
+	server, addr, err := s.createServer()
+	require.NoError(t, err)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go func() {
+		defer wg.Done()
+		require.NoError(s.T(), s.startServer(server))
+	}()
+
+	defer s.stopServer(server)
+
+	time.Sleep(100 * time.Millisecond)
+
+	// Configure client
+	cfg := s.Config
+	cfg.Address = addr
+
+	db, err := sql.Open("flightsql", cfg.DSN())
+	require.NoError(t, err)
+
+	defer db.Close()
+
+	// Create the table
+	const tableName = `TestRowsNormalExhaustionStmt`
+	const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);`
+
+	_, err = db.Exec(ddlCreateTable)
+	require.NoError(t, err)
+
+	// generate data enough for chunked concurrent test:
+	const rowCount int = 6000
+	const randStringLen = 250
+	const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES `
+
+	gen := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	var sb strings.Builder
+	sb.WriteString(sqlInsert)
+
+	for i := 0; i < rowCount; i++ {
+		sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int()))
+	}
+
+	insertQuery := strings.TrimSuffix(sb.String(), ",")
+
+	rs, err := db.Exec(insertQuery)
+	require.NoError(t, err)
+
+	insertedRows, err := rs.RowsAffected()
+	require.NoError(t, err)
+	require.Equal(t, int64(rowCount), insertedRows)
+
+	// Do Query
+	const sqlSelectAll = `SELECT id, name, value FROM ` + tableName
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	defer cancel()
+
+	stmt, err := db.PrepareContext(ctx, sqlSelectAll)
+	require.NoError(t, err)
+
+	rows, err := stmt.QueryContext(ctx)
+	require.NoError(t, err)
+	require.NotNil(t, rows)
+	require.NoError(t, rows.Err())
+
+	var (
+		actualCount = 0
+		xid,
+		xvalue int
+		xname string
+	)
+
+	for rows.Next() {
+		require.NoError(t, rows.Scan(&xid, &xname, &xvalue))
+		actualCount++
+	}
+
+	require.Equal(t, rowCount, actualCount)
+	require.NoError(t, rows.Close())
+
+	// Tear-down server
+	s.stopServer(server)
+	wg.Wait()
+}
+
+// TestRowsPrematureCloseDuringNextLoopStmt ensures that:
+// - closing during Next() loop doesn't trigger concurrency errors.
+// - the interation is properly/promptly interrupted.
+func (s *SqlTestSuite) TestRowsPrematureCloseDuringNextLoopStmt() {
+	t := s.T()
+
+	// Create and start the server.
+	server, addr, err := s.createServer()
+	require.NoError(t, err)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go func() {
+		defer wg.Done()
+		require.NoError(s.T(), s.startServer(server))
+	}()
+
+	defer s.stopServer(server)
+
+	time.Sleep(100 * time.Millisecond)
+
+	// Configure client
+	cfg := s.Config
+	cfg.Address = addr
+
+	db, err := sql.Open("flightsql", cfg.DSN())
+	require.NoError(t, err)
+
+	defer db.Close()
+
+	// Create the table.
+	const tableName = `TestRowsPrematureCloseDuringNextLoopStmt`
+	const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);`
+
+	_, err = db.Exec(ddlCreateTable)
+	require.NoError(t, err)
+
+	// generate data enough for chunked concurrent test:
+	const rowCount = 6000
+	const randStringLen = 250
+	const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES `
+
+	gen := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	var sb strings.Builder
+	sb.WriteString(sqlInsert)
+
+	for i := 0; i < rowCount; i++ {
+		sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int()))
+	}
+
+	insertQuery := strings.TrimSuffix(sb.String(), ",")
+
+	rs, err := db.Exec(insertQuery)
+	require.NoError(t, err)
+
+	insertedRows, err := rs.RowsAffected()
+	require.NoError(t, err)
+	require.Equal(t, int64(rowCount), insertedRows)
+
+	// Do query
+	const sqlSelectAll = `SELECT id, name, value FROM ` + tableName
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	defer cancel()
+
+	stmt, err := db.PrepareContext(ctx, sqlSelectAll)
+	require.NoError(t, err)
+
+	rows, err := stmt.QueryContext(ctx)
+
+	require.NoError(t, err)
+	require.NotNil(t, rows)
+	require.NoError(t, rows.Err())
+
+	const closeAfterNRows = 10
+	var (
+		i,
+		xid,
+		xvalue int
+		xname string
+	)
+
+	for rows.Next() {
+		err = rows.Scan(&xid, &xname, &xvalue)
+		require.NoError(t, err)
+
+		i++
+		if i >= closeAfterNRows {
+			require.NoError(t, rows.Close())
+		}
+	}
+
+	require.Equal(t, closeAfterNRows, i)
+
+	// Tear-down server
+	s.stopServer(server)
+	wg.Wait()
+}
+
+// TestRowsInterruptionByContextManualCancellationStmt cancels the context before it starts retrieving rows.Next().
+// it gives time for cancellation propagation, and ensures that no further data was retrieved.
+func (s *SqlTestSuite) TestRowsInterruptionByContextManualCancellationStmt() {
+	t := s.T()
+
+	// Create and start the server
+	server, addr, err := s.createServer()
+	require.NoError(t, err)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go func() {
+		defer wg.Done()
+		require.NoError(s.T(), s.startServer(server))
+	}()
+
+	defer s.stopServer(server)
+
+	time.Sleep(100 * time.Millisecond)
+
+	// Configure client
+	cfg := s.Config
+	cfg.Address = addr
+
+	db, err := sql.Open("flightsql", cfg.DSN())
+	require.NoError(t, err)
+
+	defer db.Close()
+
+	// Create the table
+	const tableName = `TestRowsInterruptionByContextManualCancellationStmt`
+	const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);`
+
+	_, err = db.Exec(ddlCreateTable)
+	require.NoError(t, err)
+
+	// generate data enough for chunked concurrent test:
+	const rowCount = 6000
+	const randStringLen = 250
+	const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES `
+
+	gen := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	var sb strings.Builder
+	sb.WriteString(sqlInsert)
+
+	for i := 0; i < rowCount; i++ {
+		sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int()))
+	}
+
+	insertQuery := strings.TrimSuffix(sb.String(), ",")
+
+	rs, err := db.Exec(insertQuery)
+	require.NoError(t, err)
+
+	insertedRows, err := rs.RowsAffected()
+	require.NoError(t, err)
+	require.Equal(t, int64(rowCount), insertedRows)
+
+	// Do query
+	const sqlSelectAll = `SELECT id, name, value FROM ` + tableName
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
+	defer cancel()
+
+	stmt, err := db.PrepareContext(ctx, sqlSelectAll)
+	require.NoError(t, err)
+
+	rows, err := stmt.QueryContext(ctx)
+	require.NoError(t, err)
+	require.NotNil(t, rows)
+	require.NoError(t, rows.Err())
+
+	defer rows.Close()
+
+	go cancel()
+
+	time.Sleep(100 * time.Millisecond)
+
+	count := 0
+	for rows.Next() {
+		count++
+	}
+
+	require.Zero(t, count)
+
+	// Tear-down server
+	s.stopServer(server)
+	wg.Wait()
+}
+
+// TestRowsInterruptionByContextTimeoutStmt forces a timeout, and ensures no further data is retrieved after that.
+func (s *SqlTestSuite) TestRowsInterruptionByContextTimeoutStmt() {
+	t := s.T()
+
+	// Create and start the server
+	server, addr, err := s.createServer()
+	require.NoError(t, err)
+
+	var wg sync.WaitGroup
+	wg.Add(1)
+
+	go func() {
+		defer wg.Done()
+		require.NoError(s.T(), s.startServer(server))
+	}()
+
+	defer s.stopServer(server)
+
+	time.Sleep(100 * time.Millisecond)
+
+	// Configure client
+	cfg := s.Config
+	cfg.Address = addr
+
+	db, err := sql.Open("flightsql", cfg.DSN())
+	require.NoError(t, err)
+
+	defer db.Close()
+
+	// Create the table
+	const tableName = `TestRowsInterruptionByContextTimeoutStmt`
+	const ddlCreateTable = `CREATE TABLE ` + tableName + ` (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR(300), value INT);`
+
+	_, err = db.Exec(ddlCreateTable)
+	require.NoError(t, err)
+
+	// generate data enough for chunked concurrent test:
+	const rowCount = 6000
+	const randStringLen = 250
+	const sqlInsert = `INSERT INTO ` + tableName + ` (name,value) VALUES `
+
+	gen := rand.New(rand.NewSource(time.Now().UnixNano()))
+
+	var sb strings.Builder
+	sb.WriteString(sqlInsert)
+
+	for i := 0; i < rowCount; i++ {
+		sb.WriteString(fmt.Sprintf(`('%s', %d),`, getRandomString(gen, randStringLen), gen.Int()))
+	}
+
+	insertQuery := strings.TrimSuffix(sb.String(), ",")
+
+	rs, err := db.Exec(insertQuery)
+	require.NoError(t, err)
+
+	insertedRows, err := rs.RowsAffected()
+	require.NoError(t, err)
+	require.Equal(t, int64(rowCount), insertedRows)
+
+	// Do query
+	const (
+		timeout      = 1500 * time.Millisecond
+		sqlSelectAll = `SELECT id, name, value FROM ` + tableName
+	)
+
+	ctx, cancel := context.WithTimeout(context.Background(), timeout)
+	defer cancel()
+
+	stmt, err := db.PrepareContext(ctx, sqlSelectAll)
+	require.NoError(t, err)
+
+	rows, err := stmt.QueryContext(ctx)
+	require.NoError(t, err)
+	require.NotNil(t, rows)
+	require.NoError(t, rows.Err())
+
+	defer rows.Close()
+
+	// eventually, after time.Sleep(), the context will be cancelled.
+	// then, rows.Next() should return false, and <-ctx.Done() will never be tested.
+	for rows.Next() {
+		select {
+		case <-ctx.Done():
+			t.Fatal("cancellation didn't prevent more records to be read")
+		default:
+			time.Sleep(time.Second)
+		}
+	}
+
+	// Tear-down server
+	s.stopServer(server)
+	wg.Wait()
+}
+
 func (s *SqlTestSuite) TestPreparedQueryWithConstraint() {
 	t := s.T()
 
@@ -918,3 +1841,17 @@ func (s *MockServer) GetFlightInfoStatement(_ context.Context, query flightsql.S
 		TotalBytes:   -1,
 	}, nil
 }
+
+const getRandomStringCharset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789. "
+
+var getRandomStringCharsetLen = len(getRandomStringCharset)
+
+func getRandomString(gen *rand.Rand, length int) string {
+	result := make([]byte, length)
+
+	for i := range result {
+		result[i] = getRandomStringCharset[rand.Intn(getRandomStringCharsetLen)]
+	}
+
+	return string(result)
+}

From 193e39cad4d8e1a01376d6b5199077e401484838 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 24 Feb 2024 11:47:17 +0900
Subject: [PATCH 415/570] GH-40209: [C++][CMake] Use "RapidJSON" CMake target
 for RapidJSON (#40210)

### Rationale for this change

Because upstream uses "RapidJSON" for CMake target name.

### What changes are included in this PR?

Rename "rapidjson::rapidjson" to "RapidJSON".

FindRapidJSONAlt.cmake provides "RapidJSON" CMake target instead of just providing RAPIDJSON_INCLUDE_DIR.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40209

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/CMakeLists.txt                          |  4 ++--
 cpp/cmake_modules/FindRapidJSONAlt.cmake    | 20 +++++++++++++++++++-
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 20 +++-----------------
 cpp/src/arrow/CMakeLists.txt                |  4 ++--
 4 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 7f2f7812e3cd5..b045a1cc0c981 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -848,8 +848,8 @@ if(ARROW_WITH_RE2)
 endif()
 
 if(ARROW_WITH_RAPIDJSON)
-  list(APPEND ARROW_SHARED_LINK_LIBS rapidjson::rapidjson)
-  list(APPEND ARROW_STATIC_LINK_LIBS rapidjson::rapidjson)
+  list(APPEND ARROW_SHARED_LINK_LIBS RapidJSON)
+  list(APPEND ARROW_STATIC_LINK_LIBS RapidJSON)
 endif()
 
 if(ARROW_USE_XSIMD)
diff --git a/cpp/cmake_modules/FindRapidJSONAlt.cmake b/cpp/cmake_modules/FindRapidJSONAlt.cmake
index ef5acf18b8223..babb450e204a7 100644
--- a/cpp/cmake_modules/FindRapidJSONAlt.cmake
+++ b/cpp/cmake_modules/FindRapidJSONAlt.cmake
@@ -29,7 +29,14 @@ endif()
 find_package(RapidJSON ${find_package_args})
 if(RapidJSON_FOUND)
   set(RapidJSONAlt_FOUND TRUE)
-  set(RAPIDJSON_INCLUDE_DIR ${RAPIDJSON_INCLUDE_DIRS})
+  if(NOT TARGET RapidJSON)
+    add_library(RapidJSON INTERFACE IMPORTED)
+    if(RapidJSON_INCLUDE_DIRS)
+      target_include_directories(RapidJSON INTERFACE "${RapidJSON_INCLUDE_DIRS}")
+    else()
+      target_include_directories(RapidJSON INTERFACE "${RAPIDJSON_INCLUDE_DIRS}")
+    endif()
+  endif()
   return()
 endif()
 
@@ -74,3 +81,14 @@ find_package_handle_standard_args(
   RapidJSONAlt
   REQUIRED_VARS RAPIDJSON_INCLUDE_DIR
   VERSION_VAR RAPIDJSON_VERSION)
+
+if(RapidJSONAlt_FOUND)
+  if(WIN32 AND "${RAPIDJSON_INCLUDE_DIR}" MATCHES "^/")
+    # MSYS2
+    execute_process(COMMAND "cygpath" "--windows" "${RAPIDJSON_INCLUDE_DIR}"
+                    OUTPUT_VARIABLE RAPIDJSON_INCLUDE_DIR
+                    OUTPUT_STRIP_TRAILING_WHITESPACE)
+  endif()
+  add_library(RapidJSON INTERFACE IMPORTED)
+  target_include_directories(RapidJSON INTERFACE "${RAPIDJSON_INCLUDE_DIR}")
+endif()
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index b16ee07756013..3c56bc7f24f3d 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -18,7 +18,6 @@
 include(ProcessorCount)
 processorcount(NPROC)
 
-add_custom_target(rapidjson)
 add_custom_target(toolchain)
 add_custom_target(toolchain-benchmarks)
 add_custom_target(toolchain-tests)
@@ -2328,9 +2327,9 @@ macro(build_rapidjson)
   # The include directory must exist before it is referenced by a target.
   file(MAKE_DIRECTORY "${RAPIDJSON_INCLUDE_DIR}")
 
-  add_dependencies(toolchain rapidjson_ep)
-  add_dependencies(toolchain-tests rapidjson_ep)
-  add_dependencies(rapidjson rapidjson_ep)
+  add_library(RapidJSON INTERFACE IMPORTED)
+  target_include_directories(RapidJSON INTERFACE "${RAPIDJSON_INCLUDE_DIR}")
+  add_dependencies(RapidJSON rapidjson_ep)
 
   set(RAPIDJSON_VENDORED TRUE)
 endmacro()
@@ -2344,19 +2343,6 @@ if(ARROW_WITH_RAPIDJSON)
                      ${ARROW_RAPIDJSON_REQUIRED_VERSION}
                      IS_RUNTIME_DEPENDENCY
                      FALSE)
-
-  if(RapidJSON_INCLUDE_DIR)
-    set(RAPIDJSON_INCLUDE_DIR "${RapidJSON_INCLUDE_DIR}")
-  endif()
-  if(WIN32 AND "${RAPIDJSON_INCLUDE_DIR}" MATCHES "^/")
-    # MSYS2
-    execute_process(COMMAND "cygpath" "--windows" "${RAPIDJSON_INCLUDE_DIR}"
-                    OUTPUT_VARIABLE RAPIDJSON_INCLUDE_DIR
-                    OUTPUT_STRIP_TRAILING_WHITESPACE)
-  endif()
-
-  add_library(rapidjson::rapidjson INTERFACE IMPORTED)
-  target_include_directories(rapidjson::rapidjson INTERFACE "${RAPIDJSON_INCLUDE_DIR}")
 endif()
 
 macro(build_xsimd)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index c1fafeebc035d..2f0e4a3aa6cff 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -337,9 +337,9 @@ if(ARROW_WITH_ZSTD)
   list(APPEND ARROW_SRCS util/compression_zstd.cc)
 endif()
 
-set(ARROW_TESTING_SHARED_LINK_LIBS arrow::flatbuffers rapidjson::rapidjson arrow_shared
+set(ARROW_TESTING_SHARED_LINK_LIBS arrow::flatbuffers RapidJSON arrow_shared
                                    ${ARROW_GTEST_GTEST})
-set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers rapidjson::rapidjson arrow_static
+set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers RapidJSON arrow_static
                                    ${ARROW_GTEST_GTEST})
 
 set(ARROW_TESTING_SRCS

From b8fff043c6cb351b1fad87fa0eeaf8dbc550e37c Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sun, 25 Feb 2024 07:05:45 +0900
Subject: [PATCH 416/570] GH-40221: [C++][CMake] Use arrow/util/config.h.cmake
 instead of add_definitions() (#40222)

### Rationale for this change

It's easy to maintain.

### What changes are included in this PR?

Use `#cmakedefine` in `cpp/src/arrow/util/config.h.cmake` and `#include "arrow/util/config.h"` instead of `add_definition(...)`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40221

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/CMakeLists.txt                            |  2 --
 cpp/cmake_modules/ThirdpartyToolchain.cmake   |  2 --
 cpp/src/arrow/CMakeLists.txt                  | 10 --------
 .../compute/kernels/scalar_string_ascii.cc    |  9 +++----
 .../compute/kernels/scalar_string_test.cc     |  9 +++----
 .../compute/kernels/scalar_string_utf8.cc     |  7 +++---
 cpp/src/arrow/ipc/feather_test.cc             |  1 +
 cpp/src/arrow/util/compression.cc             |  1 +
 cpp/src/arrow/util/compression_benchmark.cc   |  1 +
 cpp/src/arrow/util/compression_test.cc        |  3 ++-
 cpp/src/arrow/util/config.h.cmake             | 11 +++++++++
 cpp/src/arrow/util/logging.cc                 |  4 +++-
 cpp/src/parquet/CMakeLists.txt                | 24 -------------------
 cpp/src/parquet/column_io_benchmark.cc        |  1 +
 cpp/src/parquet/column_writer_test.cc         |  1 +
 cpp/src/parquet/file_deserialize_test.cc      |  1 +
 cpp/src/parquet/file_serialize_test.cc        |  1 +
 cpp/src/parquet/reader_test.cc                |  1 +
 18 files changed, 38 insertions(+), 51 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index b045a1cc0c981..164f4182d9602 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -784,7 +784,6 @@ if(ARROW_USE_GLOG)
   if(GLOG_SOURCE STREQUAL "SYSTEM")
     list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS glog::glog)
   endif()
-  add_definitions("-DARROW_USE_GLOG")
 endif()
 
 if(ARROW_S3)
@@ -953,7 +952,6 @@ if(ARROW_JEMALLOC)
 endif()
 
 if(ARROW_MIMALLOC)
-  add_definitions(-DARROW_MIMALLOC)
   list(APPEND ARROW_SHARED_LINK_LIBS mimalloc::mimalloc)
   list(APPEND ARROW_STATIC_LINK_LIBS mimalloc::mimalloc)
 endif()
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 3c56bc7f24f3d..951028b6994ad 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2585,7 +2585,6 @@ if(ARROW_WITH_RE2)
                      TRUE
                      PC_PACKAGE_NAMES
                      re2)
-  add_definitions(-DARROW_WITH_RE2)
 endif()
 
 macro(build_bzip2)
@@ -2693,7 +2692,6 @@ if(ARROW_WITH_UTF8PROC)
                      libutf8proc
                      REQUIRED_VERSION
                      "2.2.0")
-  add_definitions(-DARROW_WITH_UTF8PROC)
 endif()
 
 macro(build_cares)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 2f0e4a3aa6cff..c160179ceff0b 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -304,17 +304,14 @@ set_source_files_properties(vendored/datetime/tz.cpp
 add_definitions(-DURI_STATIC_BUILD)
 
 if(ARROW_WITH_BROTLI)
-  add_definitions(-DARROW_WITH_BROTLI)
   list(APPEND ARROW_SRCS util/compression_brotli.cc)
 endif()
 
 if(ARROW_WITH_BZ2)
-  add_definitions(-DARROW_WITH_BZ2)
   list(APPEND ARROW_SRCS util/compression_bz2.cc)
 endif()
 
 if(ARROW_WITH_LZ4)
-  add_definitions(-DARROW_WITH_LZ4)
   list(APPEND ARROW_SRCS util/compression_lz4.cc)
 endif()
 
@@ -323,17 +320,14 @@ if(ARROW_WITH_OPENTELEMETRY)
 endif()
 
 if(ARROW_WITH_SNAPPY)
-  add_definitions(-DARROW_WITH_SNAPPY)
   list(APPEND ARROW_SRCS util/compression_snappy.cc)
 endif()
 
 if(ARROW_WITH_ZLIB)
-  add_definitions(-DARROW_WITH_ZLIB)
   list(APPEND ARROW_SRCS util/compression_zlib.cc)
 endif()
 
 if(ARROW_WITH_ZSTD)
-  add_definitions(-DARROW_WITH_ZSTD)
   list(APPEND ARROW_SRCS util/compression_zstd.cc)
 endif()
 
@@ -493,10 +487,6 @@ if(ARROW_COMPUTE)
 endif()
 
 if(ARROW_FILESYSTEM)
-  if(ARROW_HDFS)
-    add_definitions(-DARROW_HDFS)
-  endif()
-
   list(APPEND
        ARROW_SRCS
        filesystem/filesystem.cc
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
index 8fdc6172aa6d3..038e623b43c53 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
@@ -21,17 +21,18 @@
 #include <memory>
 #include <string>
 
-#ifdef ARROW_WITH_RE2
-#include <re2/re2.h>
-#endif
-
 #include "arrow/array/builder_nested.h"
 #include "arrow/compute/kernels/scalar_string_internal.h"
 #include "arrow/result.h"
+#include "arrow/util/config.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/string.h"
 #include "arrow/util/value_parsing.h"
 
+#ifdef ARROW_WITH_RE2
+#include <re2/re2.h>
+#endif
+
 namespace arrow {
 
 using internal::EndsWith;
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index d7e35d07334ea..26289a7f787e1 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -23,10 +23,6 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
-#ifdef ARROW_WITH_UTF8PROC
-#include <utf8proc.h>
-#endif
-
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/exec.h"
 #include "arrow/compute/kernels/codegen_internal.h"
@@ -34,8 +30,13 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
 #include "arrow/type_fwd.h"
+#include "arrow/util/config.h"
 #include "arrow/util/value_parsing.h"
 
+#ifdef ARROW_WITH_UTF8PROC
+#include <utf8proc.h>
+#endif
+
 namespace arrow::compute {
 
 // interesting utf8 characters for testing (lower case / upper case):
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
index cf8a697fea411..d720d4eee804f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
@@ -19,13 +19,14 @@
 #include <mutex>
 #include <string>
 
+#include "arrow/compute/kernels/scalar_string_internal.h"
+#include "arrow/util/config.h"
+#include "arrow/util/utf8_internal.h"
+
 #ifdef ARROW_WITH_UTF8PROC
 #include <utf8proc.h>
 #endif
 
-#include "arrow/compute/kernels/scalar_string_internal.h"
-#include "arrow/util/utf8_internal.h"
-
 namespace arrow {
 namespace compute {
 namespace internal {
diff --git a/cpp/src/arrow/ipc/feather_test.cc b/cpp/src/arrow/ipc/feather_test.cc
index 80e441fe2b670..ba3f4d828c397 100644
--- a/cpp/src/arrow/ipc/feather_test.cc
+++ b/cpp/src/arrow/ipc/feather_test.cc
@@ -35,6 +35,7 @@
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/compression.h"
+#include "arrow/util/config.h"
 
 namespace arrow {
 
diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc
index 5ad17e993f153..b63aec0aae8f9 100644
--- a/cpp/src/arrow/util/compression.cc
+++ b/cpp/src/arrow/util/compression.cc
@@ -24,6 +24,7 @@
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/util/compression_internal.h"
+#include "arrow/util/config.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/compression_benchmark.cc b/cpp/src/arrow/util/compression_benchmark.cc
index cc04eb4634851..0b9727cff9041 100644
--- a/cpp/src/arrow/util/compression_benchmark.cc
+++ b/cpp/src/arrow/util/compression_benchmark.cc
@@ -27,6 +27,7 @@
 
 #include "arrow/result.h"
 #include "arrow/util/compression.h"
+#include "arrow/util/config.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 
diff --git a/cpp/src/arrow/util/compression_test.cc b/cpp/src/arrow/util/compression_test.cc
index eeeedce17764f..eb2da98d5110d 100644
--- a/cpp/src/arrow/util/compression_test.cc
+++ b/cpp/src/arrow/util/compression_test.cc
@@ -30,6 +30,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/compression.h"
+#include "arrow/util/config.h"
 
 namespace arrow {
 namespace util {
@@ -729,7 +730,7 @@ INSTANTIATE_TEST_SUITE_P(TestLZ4Frame, CodecTest,
 INSTANTIATE_TEST_SUITE_P(TestBrotli, CodecTest, ::testing::Values(Compression::BROTLI));
 #endif
 
-#if ARROW_WITH_BZ2
+#ifdef ARROW_WITH_BZ2
 INSTANTIATE_TEST_SUITE_P(TestBZ2, CodecTest, ::testing::Values(Compression::BZ2));
 #endif
 
diff --git a/cpp/src/arrow/util/config.h.cmake b/cpp/src/arrow/util/config.h.cmake
index 6c8c31ffb856f..fb42a53139f44 100644
--- a/cpp/src/arrow/util/config.h.cmake
+++ b/cpp/src/arrow/util/config.h.cmake
@@ -47,15 +47,26 @@
 #cmakedefine ARROW_JEMALLOC
 #cmakedefine ARROW_JEMALLOC_VENDORED
 #cmakedefine ARROW_JSON
+#cmakedefine ARROW_MIMALLOC
 #cmakedefine ARROW_ORC
 #cmakedefine ARROW_PARQUET
 #cmakedefine ARROW_SUBSTRAIT
 
 #cmakedefine ARROW_ENABLE_THREADING
 #cmakedefine ARROW_GCS
+#cmakedefine ARROW_HDFS
 #cmakedefine ARROW_S3
+#cmakedefine ARROW_USE_GLOG
 #cmakedefine ARROW_USE_NATIVE_INT128
+#cmakedefine ARROW_WITH_BROTLI
+#cmakedefine ARROW_WITH_BZ2
+#cmakedefine ARROW_WITH_LZ4
 #cmakedefine ARROW_WITH_MUSL
 #cmakedefine ARROW_WITH_OPENTELEMETRY
+#cmakedefine ARROW_WITH_RE2
+#cmakedefine ARROW_WITH_SNAPPY
 #cmakedefine ARROW_WITH_UCX
+#cmakedefine ARROW_WITH_UTF8PROC
+#cmakedefine ARROW_WITH_ZLIB
+#cmakedefine ARROW_WITH_ZSTD
 #cmakedefine PARQUET_REQUIRE_ENCRYPTION
diff --git a/cpp/src/arrow/util/logging.cc b/cpp/src/arrow/util/logging.cc
index 9c68982a3d59f..d2931132372a7 100644
--- a/cpp/src/arrow/util/logging.cc
+++ b/cpp/src/arrow/util/logging.cc
@@ -17,6 +17,8 @@
 
 #include "arrow/util/logging.h"
 
+#include "arrow/util/config.h"
+
 #ifdef ARROW_WITH_BACKTRACE
 #include <execinfo.h>
 #endif
@@ -28,7 +30,7 @@
 #include <signal.h>
 #include <vector>
 
-#include "glog/logging.h"
+#include <glog/logging.h>
 
 // Restore our versions of DCHECK and friends, as GLog defines its own
 #undef DCHECK
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 04028431ba157..9fed75704b64c 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -422,27 +422,3 @@ add_parquet_benchmark(level_conversion_benchmark)
 add_parquet_benchmark(page_index_benchmark SOURCES page_index_benchmark.cc
                       benchmark_util.cc)
 add_parquet_benchmark(arrow/reader_writer_benchmark PREFIX "parquet-arrow")
-
-if(ARROW_WITH_BROTLI)
-  add_definitions(-DARROW_WITH_BROTLI)
-endif()
-
-if(ARROW_WITH_BZ2)
-  add_definitions(-DARROW_WITH_BZ2)
-endif()
-
-if(ARROW_WITH_LZ4)
-  add_definitions(-DARROW_WITH_LZ4)
-endif()
-
-if(ARROW_WITH_SNAPPY)
-  add_definitions(-DARROW_WITH_SNAPPY)
-endif()
-
-if(ARROW_WITH_ZLIB)
-  add_definitions(-DARROW_WITH_ZLIB)
-endif()
-
-if(ARROW_WITH_ZSTD)
-  add_definitions(-DARROW_WITH_ZSTD)
-endif()
diff --git a/cpp/src/parquet/column_io_benchmark.cc b/cpp/src/parquet/column_io_benchmark.cc
index 48e434a342e72..593765dcd4e0b 100644
--- a/cpp/src/parquet/column_io_benchmark.cc
+++ b/cpp/src/parquet/column_io_benchmark.cc
@@ -20,6 +20,7 @@
 #include "arrow/array.h"
 #include "arrow/io/memory.h"
 #include "arrow/testing/random.h"
+#include "arrow/util/config.h"
 
 #include "parquet/column_reader.h"
 #include "parquet/column_writer.h"
diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc
index 86fe0965a6a7f..a8519a0f56861 100644
--- a/cpp/src/parquet/column_writer_test.cc
+++ b/cpp/src/parquet/column_writer_test.cc
@@ -26,6 +26,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_builders.h"
+#include "arrow/util/config.h"
 
 #include "parquet/column_page.h"
 #include "parquet/column_reader.h"
diff --git a/cpp/src/parquet/file_deserialize_test.cc b/cpp/src/parquet/file_deserialize_test.cc
index 6b3c7062fcc4a..9f2857c8194c7 100644
--- a/cpp/src/parquet/file_deserialize_test.cc
+++ b/cpp/src/parquet/file_deserialize_test.cc
@@ -37,6 +37,7 @@
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/compression.h"
+#include "arrow/util/config.h"
 #include "arrow/util/crc32.h"
 
 namespace parquet {
diff --git a/cpp/src/parquet/file_serialize_test.cc b/cpp/src/parquet/file_serialize_test.cc
index 62e1965418076..fc356d5d24c1c 100644
--- a/cpp/src/parquet/file_serialize_test.cc
+++ b/cpp/src/parquet/file_serialize_test.cc
@@ -19,6 +19,7 @@
 #include <gtest/gtest.h>
 
 #include "arrow/testing/gtest_compat.h"
+#include "arrow/util/config.h"
 
 #include "parquet/column_reader.h"
 #include "parquet/column_writer.h"
diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc
index 551f62798e3b5..f9c2e06873a22 100644
--- a/cpp/src/parquet/reader_test.cc
+++ b/cpp/src/parquet/reader_test.cc
@@ -32,6 +32,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/config.h"
 
 #include "parquet/column_reader.h"
 #include "parquet/column_scanner.h"

From 8ec7044824609bdebc03f5730f192f64914371fd Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Mon, 26 Feb 2024 17:27:53 +0100
Subject: [PATCH 417/570] GH-40236: [Python][CI] Disable generating C lines in
 Cython tracebacks (#40225)

### Rationale for this change

We're getting timeouts (on AppVeyor) and very long compilation times (on GHA wheel builds) for `lib.cpp`, a Cython-generated C++ file. Examination suggests that `lib.cpp` is more than 300 thousand lines long, and we can hypothesize that this can blow up available memory on some machines and compilers.

### What changes are included in this PR?

Disable a not really useful (and undocumented) Cython feature to make C++ code slightly easier to compile.

### Are these changes tested?

Yes. This solves, at least temporarily, the timeout issues on AppVeyor and makes the wheel builds much faster (down to ~35 minutes for the wheel build step, instead of 3 hours).

### Are there any user-facing changes?

No.
* GitHub Issue: #40236

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/CMakeLists.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 54a5b99e058a5..1d6524373a733 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -722,6 +722,9 @@ endif()
 
 # Error on any warnings not already explicitly ignored.
 set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--warning-errors")
+# GH-40236: make generated C++ code easier to compile by disabling an
+# undocumented Cython feature.
+set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--no-c-in-traceback")
 
 foreach(module ${CYTHON_EXTENSIONS})
   string(REPLACE "." ";" directories ${module})

From a7ac7e0e1037b0067558030c7d16f804aa3954ce Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Mon, 26 Feb 2024 08:53:35 -0800
Subject: [PATCH 418/570] GH-40068:  [C++] Possible data race when reading
 metadata of a parquet file (#40111)

### Rationale for this change

The `ParquetFileFragment` will cache the parquet metadata when loading it.  The `metadata()` method accesses this metadata (a shared_ptr) but does not grab the lock used to set that shared_ptr.  It's possible then that we are reading a shared_ptr at the same time some other thread is setting the shared_ptr which is technically (I think) undefined behavior.

### What changes are included in this PR?

Guard access to the metadata by grabbing the mutex first

### Are these changes tested?

Existing tests should regress this change

### Are there any user-facing changes?

No
* Closes: #40068

Authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/dataset/file_parquet.cc | 5 +++++
 cpp/src/arrow/dataset/file_parquet.h  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 140917a2e6341..c17ba89be7907 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -779,6 +779,11 @@ ParquetFileFragment::ParquetFileFragment(FileSource source,
       parquet_format_(checked_cast<ParquetFileFormat&>(*format_)),
       row_groups_(std::move(row_groups)) {}
 
+std::shared_ptr<parquet::FileMetaData> ParquetFileFragment::metadata() {
+  auto lock = physical_schema_mutex_.Lock();
+  return metadata_;
+}
+
 Status ParquetFileFragment::EnsureCompleteMetadata(parquet::arrow::FileReader* reader) {
   auto lock = physical_schema_mutex_.Lock();
   if (metadata_ != nullptr) {
diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h
index 5141f36385e3f..63d8fd729223c 100644
--- a/cpp/src/arrow/dataset/file_parquet.h
+++ b/cpp/src/arrow/dataset/file_parquet.h
@@ -165,7 +165,7 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
   }
 
   /// \brief Return the FileMetaData associated with this fragment.
-  const std::shared_ptr<parquet::FileMetaData>& metadata() const { return metadata_; }
+  std::shared_ptr<parquet::FileMetaData> metadata();
 
   /// \brief Ensure this fragment's FileMetaData is in memory.
   Status EnsureCompleteMetadata(parquet::arrow::FileReader* reader = NULLPTR);

From 9a7662b41b77a40a76b07435b3eff4fec7454596 Mon Sep 17 00:00:00 2001
From: Sten Larsson <sten@burtcorp.com>
Date: Mon, 26 Feb 2024 18:02:44 +0100
Subject: [PATCH 419/570] GH-39582: [C++][Acero] Increase size of Acero
 TempStack (#40007)

We have had problems for a long time with a specific batch job that combines data from different sources. There is something in the data causing an Acero execution plan to hang or crash at random. The problem has been reproduced since Arrow 11.0.0, originally in Ruby, but it has also in Python. There is unfortunately no test case that reliably reproduces the issue in a release build.

However, in a debug build we can see that the batch job causes an overflow on the temp stack in arrow/cpp/src/arrow/compute/util.cc:38. Increasing the size of the stack created in the Acero QueryContext works around the issue, but a real fix should be investigated separately.

**This PR contains a "Critical Fix".**
* Closes: #39582

Lead-authored-by: Sten Larsson <sten@burtcorp.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/acero/query_context.cc |  2 +-
 cpp/src/arrow/compute/util.cc        | 15 ++++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/acero/query_context.cc b/cpp/src/arrow/acero/query_context.cc
index 9f838508fcd05..a27397d12079d 100644
--- a/cpp/src/arrow/acero/query_context.cc
+++ b/cpp/src/arrow/acero/query_context.cc
@@ -53,7 +53,7 @@ size_t QueryContext::max_concurrency() const { return thread_indexer_.Capacity()
 Result<util::TempVectorStack*> QueryContext::GetTempStack(size_t thread_index) {
   if (!tld_[thread_index].is_init) {
     RETURN_NOT_OK(tld_[thread_index].stack.Init(
-        memory_pool(), 8 * util::MiniBatch::kMiniBatchLength * sizeof(uint64_t)));
+        memory_pool(), 32 * util::MiniBatch::kMiniBatchLength * sizeof(uint64_t)));
     tld_[thread_index].is_init = true;
   }
   return &tld_[thread_index].stack;
diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc
index c55143af0cd59..2058ba9f30757 100644
--- a/cpp/src/arrow/compute/util.cc
+++ b/cpp/src/arrow/compute/util.cc
@@ -32,17 +32,18 @@ using internal::CpuInfo;
 namespace util {
 
 void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) {
-  int64_t old_top = top_;
-  top_ += PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
-  // Stack overflow check
-  ARROW_DCHECK(top_ <= buffer_size_);
-  *data = buffer_->mutable_data() + old_top + sizeof(uint64_t);
+  int64_t new_top = top_ + PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t);
+  // Stack overflow check (see GH-39582).
+  // XXX cannot return a regular Status because most consumers do not either.
+  ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow";
+  *data = buffer_->mutable_data() + top_ + sizeof(uint64_t);
   // We set 8 bytes before the beginning of the allocated range and
   // 8 bytes after the end to check for stack overflow (which would
   // result in those known bytes being corrupted).
-  reinterpret_cast<uint64_t*>(buffer_->mutable_data() + old_top)[0] = kGuard1;
-  reinterpret_cast<uint64_t*>(buffer_->mutable_data() + top_)[-1] = kGuard2;
+  reinterpret_cast<uint64_t*>(buffer_->mutable_data() + top_)[0] = kGuard1;
+  reinterpret_cast<uint64_t*>(buffer_->mutable_data() + new_top)[-1] = kGuard2;
   *id = num_vectors_++;
+  top_ = new_top;
 }
 
 void TempVectorStack::release(int id, uint32_t num_bytes) {

From ca0910a533450a06351c74ad02fad0d595e0ddc2 Mon Sep 17 00:00:00 2001
From: Siyang Tang <82279870+TangSiyang2001@users.noreply.github.com>
Date: Tue, 27 Feb 2024 01:40:20 +0800
Subject: [PATCH 420/570] GH-39897: [C++][FS][S3] Ensure
 `AwsInstance::EnsureInitialized` to do initialization exactly once under
 concurrency (#40110)

### Rationale for this change

`FileSystemFromUri` could be called concurrently, and its implicit call to `AwsInstance::EnsureInitialized` will cause segment fault due to data race.

Therefore, make init stage thread-safe for `AwsInstance::EnsureInitialized`.

### What changes are included in this PR?

Serialize calls to S3 initialization to ensure initialization is done exactly once.

### Are these changes tested?

Yes, a test is added for the PyArrow bindings.

### Are there any user-facing changes?

No.

* Closes: #39897
* GitHub Issue: #39897

Lead-authored-by: tsy <tangsiyang2001@foxmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/s3fs.cc | 15 ++++++++++-----
 python/pyarrow/tests/test_fs.py  | 23 +++++++++++++++++++++++
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index a987d63a6d247..5fefe6b7cb016 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -2898,12 +2898,16 @@ struct AwsInstance {
     if (is_finalized_.load()) {
       return Status::Invalid("Attempt to initialize S3 after it has been finalized");
     }
-    if (!is_initialized_.exchange(true)) {
-      // Not already initialized
+    bool newly_initialized = false;
+    // EnsureInitialized() can be called concurrently by FileSystemFromUri,
+    // therefore we need to serialize initialization (GH-39897).
+    std::call_once(initialize_flag_, [&]() {
+      bool was_initialized = is_initialized_.exchange(true);
+      DCHECK(!was_initialized);
       DoInitialize(options);
-      return true;
-    }
-    return false;
+      newly_initialized = true;
+    });
+    return newly_initialized;
   }
 
   bool IsInitialized() { return !is_finalized_ && is_initialized_; }
@@ -2979,6 +2983,7 @@ struct AwsInstance {
   Aws::SDKOptions aws_options_;
   std::atomic<bool> is_initialized_;
   std::atomic<bool> is_finalized_;
+  std::once_flag initialize_flag_;
 };
 
 AwsInstance* GetAwsInstance() {
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 6ba5137e4f63e..543c4399ddb47 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1904,3 +1904,26 @@ def test_s3_finalize_region_resolver():
             resolve_s3_region('voltrondata-labs-datasets')
         """
     subprocess.check_call([sys.executable, "-c", code])
+
+
+@pytest.mark.s3
+def test_concurrent_s3fs_init():
+    # GH-39897: lazy concurrent initialization of S3 subsystem should not crash
+    code = """if 1:
+        import threading
+        import pytest
+        from pyarrow.fs import (FileSystem, S3FileSystem,
+                                ensure_s3_initialized, finalize_s3)
+        threads = []
+        fn = lambda: FileSystem.from_uri('s3://mf-nwp-models/README.txt')
+        for i in range(4):
+            thread = threading.Thread(target = fn)
+            threads.append(thread)
+            thread.start()
+
+        for thread in threads:
+            thread.join()
+
+        finalize_s3()
+        """
+    subprocess.check_call([sys.executable, "-c", code])

From 5b03b707db223aa817c7cc065c29a3930788ba08 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 26 Feb 2024 10:02:02 -0800
Subject: [PATCH 421/570] MINOR: [C#] Bump coverlet.collector from 6.0.0 to
 6.0.1 in /csharp (#40245)

Bumps [coverlet.collector](https://github.com/coverlet-coverage/coverlet) from 6.0.0 to 6.0.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/coverlet-coverage/coverlet/releases">coverlet.collector's releases</a>.</em></p>
<blockquote>
<h2>v6.0.1</h2>
<h3>Fixed</h3>
<ul>
<li>Uncovered lines in .NET 8 for inheriting records <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1555">#1555</a></li>
<li>Fix record constructors not covered when SkipAutoProps is true <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1561">#1561</a></li>
<li>Fix .NET 7 Method Group branch coverage issue <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1447">#1447</a></li>
<li>Fix ExcludeFromCodeCoverage does not exclude method in a partial class <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1548">#1548</a></li>
<li>Fix ExcludeFromCodeCoverage does not exclude F# task <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1547">#1547</a></li>
<li>Fix issues where ExcludeFromCodeCoverage ignored <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1431">#1431</a></li>
<li>Fix issues with ExcludeFromCodeCoverage attribute <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1484">#1484</a></li>
<li>Fix broken links in documentation <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1514">#1514</a></li>
<li>Fix problem with coverage for .net5 WPF application <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1221">#1221</a> by <a href="https://github.com/lg2de">https://github.com/lg2de</a></li>
<li>Fix unable to instrument module for Microsoft.AspNetCore.Mvc.Razor <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1459">#1459</a> by <a href="https://github.com/lg2de">https://github.com/lg2de</a></li>
</ul>
<h3>Improvements</h3>
<ul>
<li>Extended exclude by attribute feature to work with fully qualified name <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1589">#1589</a></li>
<li>Use System.CommandLine instead of McMaster.Extensions.CommandLineUtils <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1474">#1474</a> by <a href="https://github.com/Bertk">https://github.com/Bertk</a></li>
<li>Fix deadlog in Coverlet.Integration.Tests.BaseTest <a href="https://redirect.github.com/coverlet-coverage/coverlet/pull/1541">#1541</a> by <a href="https://github.com/Bertk">https://github.com/Bertk</a></li>
<li>Add coverlet.msbuild.tasks unit tests <a href="https://redirect.github.com/coverlet-coverage/coverlet/pull/1534">#1534</a> by <a href="https://github.com/Bertk">https://github.com/Bertk</a></li>
</ul>
<p><a href="https://github.com/coverlet-coverage/coverlet/compare/v6.0.0...v6.0.1">Diff between 6.0.0 and 6.0.1</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/440a26a41dfda4f5131d67c61f0007277ceeeaa9"><code>440a26a</code></a> bumb preview version (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1622">#1622</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/ae591a87f597edbf6393fdf0fc2a092aee88ae38"><code>ae591a8</code></a> adapted version info (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1621">#1621</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/52ddde6e8dbf92e56f0f79af4782d0849324f166"><code>52ddde6</code></a> Adaptions to exclude by attribute feature (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1603">#1603</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/16eda38beeab52a92da52bcd0d9044f73f91dc51"><code>16eda38</code></a> fix MergeWith option (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1601">#1601</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/16b9883710bf6ff771de3b0ce66a9dfa38e05a4a"><code>16b9883</code></a> Bump NuGet.Packaging in /test/coverlet.integration.tests (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1616">#1616</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/50fbaa7bb91cf0c5bdc0a1888f393782e34135fb"><code>50fbaa7</code></a> Update Tmds.ExecFunction version 0.7.1 (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1609">#1609</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/9321d2e2cc28eef46052134c17ae0351f0318adc"><code>9321d2e</code></a> create XML coverage file without BOM (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1604">#1604</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/37f6b8c52b6ae3c122f9794a7adbecc4104bb2ec"><code>37f6b8c</code></a> Sign binaries and nuget packages (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1600">#1600</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/d1ca364b7dbff38abce0457d94c4ce1b7e3a4cd9"><code>d1ca364</code></a> Update code signing chapter (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1597">#1597</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/9295fca754531d6badd0185d0709d41a6eeddd5f"><code>9295fca</code></a> Update access  modifier &quot;public Method()&quot; (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1588">#1588</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/coverlet-coverage/coverlet/compare/v6.0.0...v6.0.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=coverlet.collector&package-manager=nuget&previous-version=6.0.0&new-version=6.0.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Apache.Arrow.Flight.Sql.Tests.csproj                        | 2 +-
 .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 214553ad1ed22..81492462d0ffe 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -9,7 +9,7 @@
       <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
       <PackageReference Include="xunit" Version="2.7.0" />
       <PackageReference Include="xunit.runner.visualstudio" Version="2.5.7" />
-      <PackageReference Include="coverlet.collector" Version="6.0.0" />
+      <PackageReference Include="coverlet.collector" Version="6.0.1" />
     </ItemGroup>
 
     <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index d752c077c5521..df9393515c638 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -9,7 +9,7 @@
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
     <PackageReference Include="xunit" Version="2.7.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.7" />
-    <PackageReference Include="coverlet.collector" Version="6.0.0" />
+    <PackageReference Include="coverlet.collector" Version="6.0.1" />
   </ItemGroup>
 
   <ItemGroup>

From 43daa32d74a09bf985adbe1cc9ab731c7b428697 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 26 Feb 2024 10:11:13 -0800
Subject: [PATCH 422/570] MINOR: [C#] Bump Grpc.Tools from 2.60.0 to 2.62.0 in
 /csharp (#40246)

Bumps [Grpc.Tools](https://github.com/grpc/grpc) from 2.60.0 to 2.62.0.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/grpc/grpc/commits">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Grpc.Tools&package-manager=nuget&previous-version=2.60.0&new-version=2.62.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
index 47b9db2acb155..9a3cf190cc376 100644
--- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
+++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.Tools" Version="2.60.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.62.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup>

From 8940a427a5432469823783c52dc07d939b122acc Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 27 Feb 2024 05:46:51 +0900
Subject: [PATCH 423/570] MINOR: [Java] Bump
 de.huxhorn.lilith:de.huxhorn.lilith.logback.appender.multiplex-classic from
 0.9.44 to 8.3.0 in /java (#40239)

Bumps [de.huxhorn.lilith:de.huxhorn.lilith.logback.appender.multiplex-classic](https://github.com/huxi/lilith) from 0.9.44 to 8.3.0.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/huxi/lilith/blob/master/CHANGELOG.md">de.huxhorn.lilith:de.huxhorn.lilith.logback.appender.multiplex-classic's changelog</a>.</em></p>
<blockquote>
<h2>[8.3.0] - 2021-12-11</h2>
<h3>Changed</h3>
<ul>
<li>Demand Java 8 <code>1.8.0_312</code>.</li>
</ul>
<h3>Fixed</h3>
<ul>
<li>SLF4J 1.7.32, logback 1.2.7, log4j2 2.15.0, Groovy 2.5.15, Spring 5.3.13, jackson 2.13.0, commons-io 2.11.0, commons-text 1.5, commons-lang3 3.12.0, flying-saucer 9.1.18, aspectj 1.9.7, woodstox 6.2.7, httpcore 4.4.14, httpclient 4.5.13, protobuf 3.19.1</li>
</ul>
<h2>[8.2.0] - 2018-08-09</h2>
<h3>Added</h3>
<ul>
<li>Added &quot;Find previous active&quot; and &quot;Find next active&quot; buttons to toolbar.</li>
<li>Added &quot;Find previous&quot; and &quot;Find next&quot; buttons to toolbar.</li>
<li>Added lots of missing mnemonics.</li>
<li>If the connection is lost then Message/RequestURI in table will now show &quot;Connection closed.&quot; instead of nothing.</li>
<li>Added smooth horizontal table scrolling option that is enabled by default.</li>
<li>Added support for Log4j 2 <code>JsonLayout</code>, <code>YamlLayout</code> and <code>XmlLayout</code>. <code>SerializedLayout</code> has been deprecated in log4j2 2.9.0 so you should use one of the other options instead.</li>
<li>Added Automatic-Module-Names for artifacts where appropriate. See <a href="http://branchandbound.net/blog/java/2017/12/automatic-module-name/">Automatic-Module-Name: Calling all Java Library Maintainers</a>.</li>
</ul>
<h3>Changed</h3>
<ul>
<li>&quot;Clean all inactive logs&quot; is now less noisy in the Lilith log.</li>
<li>Changed icons for &quot;Find previous active&quot; and &quot;Find next active&quot;. They now differ from &quot;Find previous&quot; and &quot;Find next&quot; as they should.</li>
<li>Refactored actions and icon handling.</li>
<li>Don't add null events to global logs.</li>
<li>Unchecking &quot;Enable global logs.&quot; in Preferences is now deleting existing global log files automatically.</li>
<li>Keyboard help will now always be up-to-date.</li>
<li>Demand Java 8 <code>1.8.0_181</code>.</li>
</ul>
<h3>Deprecated</h3>
<ul>
<li>Nothing.</li>
</ul>
<h3>Removed</h3>
<ul>
<li>&quot;Previous&quot; and &quot;Next&quot; buttons in find panel.</li>
<li>&quot;Pause&quot; action. Pausing only paused updating of the table, not receiving of events. This was confusing (even me) and served no actual purpose. This action was a left-over from the early days of Lilith when it was used for debugging during development.</li>
</ul>
<h3>Fixed</h3>
<ul>
<li>All L&amp;F support mac screen menu bar with Java 9 or higher.</li>
<li>Zero-delimited event receivers did not add a <code>null</code> event when end of stream was reached.</li>
<li>Fixed initial enabled state of &quot;Go to source&quot;.</li>
<li>Fixed enabled state of &quot;Edit&quot; menu. Mustn't be disabled anymore because &quot;Paste StackTraceElement&quot; is always available.</li>
<li>Fixed enabled state of &quot;Copy selection&quot;.</li>
<li>Menu entries related to global logs are now disabled if &quot;Enable global logs.&quot; is unchecked in Preferences.</li>
<li>Added more dependencies and entries to the deserialization whitelist. This is essentially necessary because <code>logback-access</code> does not have an <code>AccessEventVO</code>. See also <a href="http://jira.qos.ch/browse/LOGBACK-1182">LOGBACK-1182 - Problem deserializing AccessEvent.</a>.</li>
<li>Not all event producers expect a heartbeat.</li>
<li>Made sure that &quot;You have changed the look &amp; feel.&quot; and &quot;You have changed the application path.&quot; dialogs aren't hidden by the preferences dialog.</li>
<li>Fixed java executable detection in Windows bat file. Thanks, <a href="https://github.com/tha2015">tha2015</a>!</li>
<li>Logback 1.2.3, log4j2 2.11.1, Groovy 2.5.1, jackson 2.9.6, spring 5.0.8, protobuf 3.6.1, junique 1.0.4, jcommander 1.72, commons-lang 3.7, commons-text 1.4, commons-io 2.6, flying-saucer 9.1.14, glazedlists 1.10.0, aspectj 1.9.1, httpcore 4.4.10, httpclient 4.5.6, woodstox 5.1.0</li>
<li>Fixed several split package issues. Because of this, some classes have changed package names:
<ul>
<li>the two most commonly used classes <code>de.huxhorn.lilith.logback.appender.ClassicMultiplexSocketAppender</code> and <code>de.huxhorn.lilith.logback.encoder.ClassicLilithEncoder</code> have not been moved.</li>
<li><code>de.huxhorn.lilith.logback.encoder.AccessLilithEncoder</code> changed to <code>de.huxhorn.lilith.logback.encoder.access.AccessLilithEncoder</code>.</li>
</ul>
</li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/huxi/lilith/commit/79c8ec553a7a3fffe64516342435ca1ac0ede990"><code>79c8ec5</code></a> console is not working anymore</li>
<li><a href="https://github.com/huxi/lilith/commit/80dce6598d44fbad6b862fb28b2adca5afbfb0ae"><code>80dce65</code></a> Release notes &amp; changelog</li>
<li><a href="https://github.com/huxi/lilith/commit/dd89c2315da976901982dab2d410509e380859dd"><code>dd89c23</code></a> Demand Java 1.8.0_312</li>
<li><a href="https://github.com/huxi/lilith/commit/756463d89c33bbf3b13981cf38e5c10d1862310c"><code>756463d</code></a> byte-buddy 2.12.3</li>
<li><a href="https://github.com/huxi/lilith/commit/833cc31007494b85a0cc1361a250b9a6bb8dc155"><code>833cc31</code></a> log4j 2.15.0</li>
<li><a href="https://github.com/huxi/lilith/commit/4db2419584e9f6c9de7af20bb976c6472784eb8c"><code>4db2419</code></a> checkstyle 9.2</li>
<li><a href="https://github.com/huxi/lilith/commit/d2551fbba2bda36ce1cd68f634f2150be843807f"><code>d2551fb</code></a> protobuf 3.19.1</li>
<li><a href="https://github.com/huxi/lilith/commit/bae38177d7ac1f2bde1024d3d7c3fd6a1c25ffd6"><code>bae3817</code></a> woodstox 6.2.7</li>
<li><a href="https://github.com/huxi/lilith/commit/a055174cb48d6ce7936304de20e8c3385a2d6f87"><code>a055174</code></a> checkstyle 9.1</li>
<li><a href="https://github.com/huxi/lilith/commit/2da5530c12597f89237f2f78f9850483fe322b44"><code>2da5530</code></a> pmd 6.41.0</li>
<li>Additional commits viewable in <a href="https://github.com/huxi/lilith/compare/v0.9.44...v8.3.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=de.huxhorn.lilith:de.huxhorn.lilith.logback.appender.multiplex-classic&package-manager=maven&previous-version=0.9.44&new-version=8.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 69ee8a26d946f..d32a179f39dc6 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -768,7 +768,7 @@
     <dependency>
       <groupId>de.huxhorn.lilith</groupId>
       <artifactId>de.huxhorn.lilith.logback.appender.multiplex-classic</artifactId>
-      <version>0.9.44</version>
+      <version>8.3.0</version>
       <scope>test</scope>
     </dependency>
 

From 229f09730d83099a22c74de99c0bcf1ecafc219e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 27 Feb 2024 05:47:20 +0900
Subject: [PATCH 424/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-site-plugin from 3.7.1 to 3.12.1 in /java
 (#40240)

Bumps [org.apache.maven.plugins:maven-site-plugin](https://github.com/apache/maven-site-plugin) from 3.7.1 to 3.12.1.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-site-plugin/commit/ecae28fb0990eb5a7fc8f2d4ffe07f348d927f4b"><code>ecae28f</code></a> [maven-release-plugin] prepare release maven-site-plugin-3.12.1</li>
<li><a href="https://github.com/apache/maven-site-plugin/commit/d98569b083ded7a5182bf6cb5814ddcbd3150267"><code>d98569b</code></a> [MSITE-908] Upgrade Maven Reporting API to 3.1.1</li>
<li><a href="https://github.com/apache/maven-site-plugin/commit/bd3376f52d0053e05c78327aad39353710702a7a"><code>bd3376f</code></a> [MSITE-901] If precending standalone report has been run, site:jar does not r...</li>
<li><a href="https://github.com/apache/maven-site-plugin/commit/b99c0ef371774a414ade764f2921bdfe8918ed60"><code>b99c0ef</code></a> [MSITE-902] Upgrade Plexus Utils to 3.4.2</li>
<li><a href="https://github.com/apache/maven-site-plugin/commit/3c6ff2e285063231b7042bfe7875871c9d339830"><code>3c6ff2e</code></a> Update CI URL</li>
<li><a href="https://github.com/apache/maven-site-plugin/commit/f314e9da6ba0b5611fd5dd7dcff2d9ecc36dcd61"><code>f314e9d</code></a> [MSITE-898] Upgrade Parent to 36</li>
<li><a href="https://github.com/apache/maven-site-plugin/commit/bce7458375464e58f5d2d6cff92f8dde5f45de67"><code>bce7458</code></a> [MSITE-897] Upgrade Plexus Archiver to 4.2.7</li>
<li><a href="https://github.com/apache/maven-site-plugin/commit/3c8d426aae79c793a2a3acfddbd47a6826346382"><code>3c8d426</code></a> keep only release month, drop day</li>
<li><a href="https://github.com/apache/maven-site-plugin/commit/6604ab3b53d3f4045cc755340aeb0c4feaeaf8df"><code>6604ab3</code></a> also keep only Doxia versions changes</li>
<li><a href="https://github.com/apache/maven-site-plugin/commit/789a7a1054babde3c5b01e48bbf8abca49f5af8f"><code>789a7a1</code></a> lighten content: keep only meaningful values</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-site-plugin/compare/maven-site-plugin-3.7.1...maven-site-plugin-3.12.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-site-plugin&package-manager=maven&previous-version=3.7.1&new-version=3.12.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/bom/pom.xml   | 4 ++--
 java/maven/pom.xml | 4 ++--
 java/pom.xml       | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/java/bom/pom.xml b/java/bom/pom.xml
index 025632c45a56d..2406886222dcb 100644
--- a/java/bom/pom.xml
+++ b/java/bom/pom.xml
@@ -151,7 +151,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-site-plugin</artifactId>
-        <version>3.7.1</version>
+        <version>3.12.1</version>
       </plugin>
     </plugins>
   </build>
@@ -166,7 +166,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-site-plugin</artifactId>
-        <version>3.7.1</version>
+        <version>3.12.1</version>
       </plugin>
     </plugins>
   </reporting>
diff --git a/java/maven/pom.xml b/java/maven/pom.xml
index c2b13119fc440..ccc12f5397fb7 100644
--- a/java/maven/pom.xml
+++ b/java/maven/pom.xml
@@ -322,7 +322,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-site-plugin</artifactId>
-          <version>3.7.1</version>
+          <version>3.12.1</version>
         </plugin>
       </plugins>
     </pluginManagement>
@@ -338,7 +338,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-site-plugin</artifactId>
-        <version>3.7.1</version>
+        <version>3.12.1</version>
       </plugin>
     </plugins>
   </reporting>
diff --git a/java/pom.xml b/java/pom.xml
index d32a179f39dc6..e78e21dafd65a 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -400,7 +400,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-site-plugin</artifactId>
-        <version>3.7.1</version>
+        <version>3.12.1</version>
       </plugin>
     </plugins>
 
@@ -603,7 +603,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-site-plugin</artifactId>
-          <version>3.7.1</version>
+          <version>3.12.1</version>
         </plugin>
       </plugins>
     </pluginManagement>
@@ -808,7 +808,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-site-plugin</artifactId>
-        <version>3.7.1</version>
+        <version>3.12.1</version>
       </plugin>
     </plugins>
   </reporting>

From 9e3f0f1c1dd4ca1eba5338750f92858f99925df0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 27 Feb 2024 05:47:46 +0900
Subject: [PATCH 425/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-compiler-plugin from 3.11.0 to 3.12.1 in /java
 (#40241)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.apache.maven.plugins:maven-compiler-plugin](https://github.com/apache/maven-compiler-plugin) from 3.11.0 to 3.12.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/maven-compiler-plugin/releases">org.apache.maven.plugins:maven-compiler-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.12.1</h2>

<h2>🐛 Bug Fixes</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-567">[MCOMPILER-567]</a> - Fail to compile if the generated-sources/annotation… (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/218">#218</a>) <a href="https://github.com/jorsol"><code>@​jorsol</code></a></li>
</ul>
<h2>📦 Dependency updates</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-568">[MCOMPILER-568]</a> - Bump plexusCompilerVersion from 2.14.1 to 2.14.2 (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/220">#220</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
</ul>
<h2>3.12.0</h2>

<h2>🚀 New features and improvements</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-562">[MCOMPILER-562]</a> - Add property maven.compiler.outputDirectory to CompilerMojo (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/213">#213</a>) <a href="https://github.com/jGauravGupta"><code>@​jGauravGupta</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-381">[MCOMPILER-381]</a> - Refactor incremental detection (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/181">#181</a>) <a href="https://github.com/jorsol"><code>@​jorsol</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-542">[MCOMPILER-542]</a> - Clean JDK patch version in module-info.class  (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/208">#208</a>) <a href="https://github.com/jorsol"><code>@​jorsol</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-558">[MCOMPILER-558]</a> - compileSourceRoots in testCompile should be writable (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/209">#209</a>) <a href="https://github.com/lorenzsimon"><code>@​lorenzsimon</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-559">[MCOMPILER-559]</a> - Warn if overwriting the project's artifact's file to a different value (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/211">#211</a>) <a href="https://github.com/gnodet"><code>@​gnodet</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-550">[MCOMPILER-550]</a> - make outputDirectory writable (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/202">#202</a>) <a href="https://github.com/bmarwell"><code>@​bmarwell</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-549">[MCOMPILER-549]</a> - Improve log message in case of recompilation - fix jenkins build (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/203">#203</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-549">[MCOMPILER-549]</a> - Improve log message in case of recompilation (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/201">#201</a>) <a href="https://github.com/BrowneMonke"><code>@​BrowneMonke</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-391">[MCOMPILER-391]</a> - Use dep mgmt when resolving annotation processors and their deps (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/180">#180</a>) <a href="https://github.com/psiroky"><code>@​psiroky</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-531">[MCOMPILER-531]</a> - Prepare for Java 20(-ea) (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/184">#184</a>) <a href="https://github.com/slachiewicz"><code>@​slachiewicz</code></a></li>
</ul>
<h2>🐛 Bug Fixes</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-333">[MCOMPILER-333]</a> - Cleanup generated source files (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/214">#214</a>) <a href="https://github.com/jorsol"><code>@​jorsol</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-544">[MCOMPILER-544]</a> - don't add items to classpath that are not used for that (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/198">#198</a>) <a href="https://github.com/laeubi"><code>@​laeubi</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-547">[MCOMPILER-547]</a> - : Initialize pathElements to empty (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/199">#199</a>) <a href="https://github.com/rovarga"><code>@​rovarga</code></a></li>
</ul>
<h2>📦 Dependency updates</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-564">[MCOMPILER-564]</a> - Bump plexusCompilerVersion from 2.13.0 to 2.14.1 (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/216">#216</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-557">[MCOMPILER-557]</a> - Upgrade maven-plugin parent to 41 - fix build (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/210">#210</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-554">[MCOMPILER-554]</a> - Update plexus-java to 1.2.0 (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/207">#207</a>) <a href="https://github.com/jorsol"><code>@​jorsol</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-551">[MCOMPILER-551]</a> - Upgrade Parent to 40 (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/205">#205</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-541">[MCOMPILER-541]</a> - update maven-shared-utils to 3.4.2 (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/195">#195</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
<li>Bump apache/maven-gh-actions-shared from 2 to 3 (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/182">#182</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump maven-invoker-plugin from 3.4.0 to 3.5.0 (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/179">#179</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
</ul>
<h2>👻 Maintenance</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-565">[MCOMPILER-565]</a> - Allow project build by Maven 4 (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/217">#217</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MCOMPILER-552">[MCOMPILER-552]</a> - Refresh download page (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/204">#204</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>Remove references to old Maven versions. (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/194">#194</a>) <a href="https://github.com/elharo"><code>@​elharo</code></a></li>
<li>(doc) Drop unused and vulnerable dependency to log4j (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/190">#190</a>) <a href="https://github.com/slachiewicz"><code>@​slachiewicz</code></a></li>
<li><a href="https://issues.apache.org/jira/browse/MNG-6829">[MNG-6829]</a> - Replace StringUtils#isEmpty(String) &amp; #isNotEmpty(String) (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/189">#189</a>) <a href="https://github.com/timtebeek"><code>@​timtebeek</code></a></li>
<li>Update plexus-utils to 3.0.24 - in its (<a href="https://redirect.github.com/apache/maven-compiler-plugin/pull/183">#183</a>) <a href="https://github.com/slachiewicz"><code>@​slachiewicz</code></a></li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-compiler-plugin/commit/736da68adf543cf56cd82a68e5ad28d397ace2f4"><code>736da68</code></a> [maven-release-plugin] prepare release maven-compiler-plugin-3.12.1</li>
<li><a href="https://github.com/apache/maven-compiler-plugin/commit/ef93f3dbf4ad57adf73f7fca64bb5b659041f217"><code>ef93f3d</code></a> [MCOMPILER-568] Bump plexusCompilerVersion from 2.14.1 to 2.14.2 (<a href="https://redirect.github.com/apache/maven-compiler-plugin/issues/220">#220</a>)</li>
<li><a href="https://github.com/apache/maven-compiler-plugin/commit/eb7840cf9f0d29c1cf805c5aec180f7f78e454ac"><code>eb7840c</code></a> [MCOMPILER-567] - Fail to compile if the &quot;generated-sources/annotations&quot; does...</li>
<li><a href="https://github.com/apache/maven-compiler-plugin/commit/2a7a73b34b9e9d244fbf154783437c71a6d32f5f"><code>2a7a73b</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li><a href="https://github.com/apache/maven-compiler-plugin/commit/c08b0fda5445a2a3f495a3d6524d80c82fc9d5e6"><code>c08b0fd</code></a> [maven-release-plugin] prepare release maven-compiler-plugin-3.12.0</li>
<li><a href="https://github.com/apache/maven-compiler-plugin/commit/a1c5b133a1f4f927cc8e4ec6024062dcd69df91e"><code>a1c5b13</code></a> [MCOMPILER-565] Allow project build by Maven 4</li>
<li><a href="https://github.com/apache/maven-compiler-plugin/commit/48557736437d9ebae20510519080cb047d01c8cd"><code>4855773</code></a> Bump plexusCompilerVersion from 2.13.0 to 2.14.1</li>
<li><a href="https://github.com/apache/maven-compiler-plugin/commit/1d053422bde8ef0f0d1a1901a0b00e56c5e62571"><code>1d05342</code></a> [MCOMPILER-562] Add property maven.compiler.outputDirectory to CompilerMojo (...</li>
<li><a href="https://github.com/apache/maven-compiler-plugin/commit/ea74978da1aca9fbb1fe11456065ce114456afd9"><code>ea74978</code></a> [MCOMPILER-381] - Refactor incremental detection (<a href="https://redirect.github.com/apache/maven-compiler-plugin/issues/181">#181</a>)</li>
<li><a href="https://github.com/apache/maven-compiler-plugin/commit/fd37f0934f152ea26a4d9b5c97dd3db6db465e7e"><code>fd37f09</code></a> [MCOMPILER-333] Cleanup generated source files (<a href="https://redirect.github.com/apache/maven-compiler-plugin/issues/214">#214</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-compiler-plugin/compare/maven-compiler-plugin-3.11.0...maven-compiler-plugin-3.12.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-compiler-plugin&package-manager=maven&previous-version=3.11.0&new-version=3.12.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index e78e21dafd65a..ea8e30bf500bf 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -45,7 +45,7 @@
     <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
     <errorprone.javac.version>9+181-r4173-1</errorprone.javac.version>
     <error_prone_core.version>2.24.0</error_prone_core.version>
-    <maven-compiler-plugin.version>3.11.0</maven-compiler-plugin.version>
+    <maven-compiler-plugin.version>3.12.1</maven-compiler-plugin.version>
     <mockito.core.version>5.5.0</mockito.core.version>
     <mockito.inline.version>5.2.0</mockito.inline.version>
     <checker.framework.version>3.42.0</checker.framework.version>

From 8805de7bf2322e285b297c825e459b4ce516fd17 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 27 Feb 2024 05:47:59 +0900
Subject: [PATCH 426/570] MINOR: [Java] Bump commons-codec:commons-codec from
 1.16.0 to 1.16.1 in /java (#40242)

Bumps [commons-codec:commons-codec](https://github.com/apache/commons-codec) from 1.16.0 to 1.16.1.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/apache/commons-codec/blob/master/RELEASE-NOTES.txt">commons-codec:commons-codec's changelog</a>.</em></p>
<blockquote>
<p>Apache Commons Codec 1.16.1
RELEASE NOTES</p>
<p>The Apache Commons Codec component contains encoder and decoders for
various formats such as Base16, Base32, Base64, digest, and Hexadecimal. In addition to these
widely used encoders and decoders, the codec package also maintains a
collection of phonetic encoding utilities.</p>
<p>Feature and fix release. Requires a minimum of Java 8.</p>
<p>Changes in this version include:</p>
<p>New features:
o             Add Maven property project.build.outputTimestamp for build reproducibility. Thanks to Gary Gregory.</p>
<p>Fixed Bugs:
o CODEC-295:  Test clean ups. Thanks to Gary Gregory.
o             [StepSecurity] ci: Harden GitHub Actions <a href="https://redirect.github.com/apache/commons-codec/issues/187">#187</a>. Thanks to step-security-bot, Gary Gregory.
o CODEC-295:  Correct error in Base64 Javadoc <a href="https://redirect.github.com/apache/commons-codec/issues/188">#188</a>. Thanks to Evan Saulpaugh.
o CODEC-295:  Add minimum Java version in changes.xml <a href="https://redirect.github.com/apache/commons-codec/issues/186">#186</a>. Thanks to Olivier Jaquemet, Gary Gregory.
o CODEC-310:  Documentation update for the org.apache.commons.codec.digest.* package <a href="https://redirect.github.com/apache/commons-codec/issues/208">#208</a>. Thanks to Yakov Shafranovich.
o             Precompile regular expression in UnixCrypt.crypt(byte[], String). Thanks to Gary Gregory.
o CODEC-315:  Fix possible IndexOutOfBoundException in PhoneticEngine.encode method <a href="https://redirect.github.com/apache/commons-codec/issues/223">#223</a>. Thanks to Arthur Chan, Gary Gregory.
o CODEC-313:  Fix possible ArrayIndexOutOfBoundsException in QuotedPrintableCodec.encodeQuotedPrintable() method <a href="https://redirect.github.com/apache/commons-codec/issues/221">#221</a>. Thanks to Arthur Chan, Gary Gregory.
o CODEC-312:  Fix possible StringIndexOutOfBoundException in MatchRatingApproachEncoder.encode() method <a href="https://redirect.github.com/apache/commons-codec/issues/220">#220</a>. Thanks to Arthur Chan, Gary Gregory.
o CODEC-311:  Fix possible ArrayIndexOutOfBoundException in RefinedSoundex.getMappingCode() <a href="https://redirect.github.com/apache/commons-codec/issues/219">#219</a>. Thanks to Arthur Chan, Gary Gregory.
o CODEC-314:  Fix possible IndexOutOfBoundsException in PercentCodec.insertAlwaysEncodeChars() method <a href="https://redirect.github.com/apache/commons-codec/issues/222">#222</a>. Thanks to Arthur Chan, Gary Gregory.
o             Deprecate UnixCrypt 0-argument constructor. Thanks to Gary Gregory.
o             Deprecate Md5Crypt 0-argument constructor. Thanks to Gary Gregory.
o             Deprecate Crypt 0-argument constructor. Thanks to Gary Gregory.
o             Deprecate StringUtils 0-argument constructor. Thanks to Gary Gregory.
o             Deprecate Resources 0-argument constructor. Thanks to Gary Gregory.
o             Deprecate Charsets 0-argument constructor. Thanks to Gary Gregory.
o             Deprecate CharEncoding 0-argument constructor. Thanks to Gary Gregory.
o             Add missing version for animal-sniffer-maven-plugin. Thanks to Gary Gregory.</p>
<p>Changes:
o             Bump commons-parent from 58 to 66. Thanks to Dependabot, Gary Gregory.
o             Bump commons-lang3 from 3.12.0 to 3.14.0. Thanks to Gary Gregory.
o             Bump commons-io from 2.13.0 to 2.15.1. Thanks to Gary Gregory.</p>
<p>For complete information on Apache Commons Codec, including instructions on how to submit bug reports,
patches, or suggestions for improvement, see the Apache Commons Codec website:</p>
<p><a href="https://commons.apache.org/proper/commons-codec/">https://commons.apache.org/proper/commons-codec/</a></p>
<p>Download page: <a href="https://commons.apache.org/proper/commons-codec/download_codec.cgi">https://commons.apache.org/proper/commons-codec/download_codec.cgi</a></p>
<hr />

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/commons-codec/commit/e59fc76531141cb4a36f3031457b9d5f07e5e43f"><code>e59fc76</code></a> Prepare release candidate</li>
<li><a href="https://github.com/apache/commons-codec/commit/90c8023be911b42bab20b31b0e325174df0ee452"><code>90c8023</code></a> Prepare for the next release candidate</li>
<li><a href="https://github.com/apache/commons-codec/commit/05714adcf957e7a7644a71cd82de30145288ff38"><code>05714ad</code></a> Prepare release candidate</li>
<li><a href="https://github.com/apache/commons-codec/commit/060be1a1ca4b229ab348618ecae506a65543356f"><code>060be1a</code></a> Add missing version for animal-sniffer-maven-plugin</li>
<li><a href="https://github.com/apache/commons-codec/commit/0fd7b59fb1dbd67260429e1d75789fca92ab8a6f"><code>0fd7b59</code></a> Remove variable assignment just before returning it</li>
<li><a href="https://github.com/apache/commons-codec/commit/19649cdafdd780cbb7805f73e1e02c4d0fd549ff"><code>19649cd</code></a> Add Maven property project.build.outputTimestamp for build</li>
<li><a href="https://github.com/apache/commons-codec/commit/6d92b6acfc8db2bf04115ad0934934a59aa4219f"><code>6d92b6a</code></a> Bump org.apache.commons:commons-parent from 65 to 66 <a href="https://redirect.github.com/apache/commons-codec/issues/239">#239</a></li>
<li><a href="https://github.com/apache/commons-codec/commit/a76c362b5a54227717e82c198165eac49297e571"><code>a76c362</code></a> Bump org.apache.commons:commons-parent from 65 to 66 (<a href="https://redirect.github.com/apache/commons-codec/issues/239">#239</a>)</li>
<li><a href="https://github.com/apache/commons-codec/commit/0aee0c82e310e99eebbe7b0909464981054ecf36"><code>0aee0c8</code></a> Add property project.build.outputTimestamp for build reproducibility</li>
<li><a href="https://github.com/apache/commons-codec/commit/d322ef089f97f570b0706417c2f9e2a3921e225c"><code>d322ef0</code></a> Bump codecov/codecov-action from 3.1.5 to 4.0.1 (<a href="https://redirect.github.com/apache/commons-codec/issues/238">#238</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/apache/commons-codec/compare/rel/commons-codec-1.16.0...rel/commons-codec-1.16.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=commons-codec:commons-codec&package-manager=maven&previous-version=1.16.0&new-version=1.16.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/vector/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index dc453963b62f6..dde53e7e656bf 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -53,7 +53,7 @@
     <dependency>
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>
-      <version>1.16.0</version>
+      <version>1.16.1</version>
     </dependency>
     <dependency>
       <groupId>org.apache.arrow</groupId>

From 06935a3690425feb2422423bacef302d261a5358 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 27 Feb 2024 09:02:33 +0900
Subject: [PATCH 427/570] MINOR: [Java] Bump org.hamcrest:hamcrest-core from
 1.3 to 2.2 in /java (#40238)

Bumps [org.hamcrest:hamcrest-core](https://github.com/hamcrest/JavaHamcrest) from 1.3 to 2.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/hamcrest/JavaHamcrest/releases">org.hamcrest:hamcrest-core's releases</a>.</em></p>
<blockquote>
<h2>hamcrest-java-2.2</h2>
<h3>Improvements</h3>
<ul>
<li>AllOf/AnyOf: Pass the matchers to constructor using varargs ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/245">#245</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/245">hamcrest/JavaHamcrest#245</a>))</li>
<li>Matchers.anyOf: Fix generic bounds compatibility for JDK 11 ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/256">#256</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/256">hamcrest/JavaHamcrest#256</a>), [Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/257">#257</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/257">hamcrest/JavaHamcrest#257</a>))</li>
<li>AssertionError message is unhelpful when match fails for byte type ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/254">#254</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/254">hamcrest/JavaHamcrest#254</a>), [Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/255">#255</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/255">hamcrest/JavaHamcrest#255</a>))</li>
<li>Use platform specific line breaks ([PR <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/267">#267</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/267">hamcrest/JavaHamcrest#267</a>))</li>
<li>Build now checks for consistent use of spaces ([PR <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/217">#217</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/217">hamcrest/JavaHamcrest#217</a>))</li>
</ul>
<h3>Bugfixes</h3>
<ul>
<li>Fix compatibility issue for development with Android D8 ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/246">#246</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/246">hamcrest/JavaHamcrest#246</a>))</li>
<li>Fix typo in license name ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/247">#247</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/247">hamcrest/JavaHamcrest#247</a>))</li>
<li>1.3 compatible constructors for string matchers ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/259">#259</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/259">hamcrest/JavaHamcrest#259</a>), [Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/260">#260</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/260">hamcrest/JavaHamcrest#260</a>))</li>
<li>Fix for split packages with Java 9 modules ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/269">#269</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/269">hamcrest/JavaHamcrest#269</a>), [PR <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/270">#270</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/270">hamcrest/JavaHamcrest#270</a>))</li>
</ul>
<h2>hamcrest-java-2.2-rc1</h2>
<h3>Improvements</h3>
<ul>
<li>AllOf/AnyOf: Pass the matchers to constructor using varargs ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/245">#245</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/245">hamcrest/JavaHamcrest#245</a>))</li>
<li>Matchers.anyOf: Fix generic bounds compatibility for JDK 11 ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/256">#256</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/256">hamcrest/JavaHamcrest#256</a>), [Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/257">#257</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/257">hamcrest/JavaHamcrest#257</a>))</li>
<li>AssertionError message is unhelpful when match fails for byte type ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/254">#254</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/254">hamcrest/JavaHamcrest#254</a>), [Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/255">#255</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/255">hamcrest/JavaHamcrest#255</a>))</li>
<li>Use platform specific line breaks ([PR <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/267">#267</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/267">hamcrest/JavaHamcrest#267</a>))</li>
<li>Build now checks for consistent use of spaces ([PR <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/217">#217</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/217">hamcrest/JavaHamcrest#217</a>))</li>
</ul>
<h3>Bugfixes</h3>
<ul>
<li>Fix compatibility issue for development with Android D8 ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/246">#246</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/246">hamcrest/JavaHamcrest#246</a>))</li>
<li>Fix typo in license name ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/247">#247</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/247">hamcrest/JavaHamcrest#247</a>))</li>
<li>1.3 compatible constructors for string matchers ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/259">#259</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/259">hamcrest/JavaHamcrest#259</a>), [Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/260">#260</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/260">hamcrest/JavaHamcrest#260</a>))</li>
<li>Fix for split packages with Java 9 modules ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/269">#269</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/269">hamcrest/JavaHamcrest#269</a>), [PR <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/270">#270</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/270">hamcrest/JavaHamcrest#270</a>))</li>
</ul>
<h2>hamcrest-java-2.1</h2>
<h2>Overview</h2>
<p>After a long hiatus without releases, this version simplifies the packaging of
Hamcrest into a single jar: <code>hamcrest-2.1.jar</code>. Other big changes include
Java 9 module compatibility, along with numerous other improvements and bug
fixes.</p>
<h2>Breaking Changes</h2>
<ul>
<li>Although the class API has not changed since Hamcrest 1.3, the way that the
project is packaged has changed. Refer to the <a href="http://hamcrest.org/JavaHamcrest/distributables.html">Hamcrest Distributables</a>
documentation for more information, and in particular the section on
<a href="http://hamcrest.org/JavaHamcrest/distributables#upgrading-from-hamcrest-1x">Upgrading from Hamcrest 1.x</a></li>
<li>The <code>org.hamcrest.Factory</code> annotation has been removed (it should not be used in client code)</li>
</ul>
<h2>Changes</h2>
<ul>
<li>Publish a single jar hamcrest-2.1.jar</li>
<li>Documentation updates</li>
<li>Add implementation for CharSequence length matcher</li>
<li>Fix for TypeSafeDiagnosingMatcher can't detect generic types for subclass</li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/hamcrest/JavaHamcrest/blob/master/CHANGES.md">org.hamcrest:hamcrest-core's changelog</a>.</em></p>
<blockquote>
<h2>Version 2.2 (17th October 2019)</h2>
<h3>Improvements</h3>
<ul>
<li>AllOf/AnyOf: Pass the matchers to constructor using varargs ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/245">#245</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/245">hamcrest/JavaHamcrest#245</a>))</li>
<li>Matchers.anyOf: Fix generic bounds compatibility for JDK 11 ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/256">#256</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/256">hamcrest/JavaHamcrest#256</a>), [Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/257">#257</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/257">hamcrest/JavaHamcrest#257</a>))</li>
<li>AssertionError message is unhelpful when match fails for byte type ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/254">#254</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/254">hamcrest/JavaHamcrest#254</a>), [Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/255">#255</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/255">hamcrest/JavaHamcrest#255</a>))</li>
<li>Use platform specific line breaks ([PR <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/267">#267</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/267">hamcrest/JavaHamcrest#267</a>))</li>
<li>Build now checks for consistent use of spaces ([PR <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/217">#217</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/217">hamcrest/JavaHamcrest#217</a>))</li>
</ul>
<h3>Bugfixes</h3>
<ul>
<li>Fix compatibility issue for development with Android D8 ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/246">#246</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/246">hamcrest/JavaHamcrest#246</a>))</li>
<li>Fix typo in license name ([Issue <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/247">#247</a>](<a href="https://redirect.github.com/hamcrest/JavaHamcrest/pull/247">hamcrest/JavaHamcrest#247</a>))</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/hamcrest/JavaHamcrest/commit/60454d3c3e2eac8f3049dfbef2900a989a8b8624"><code>60454d3</code></a> Version 2.2</li>
<li><a href="https://github.com/hamcrest/JavaHamcrest/commit/070613874ab85fdd2da2e7e3ec7e5c1426388a65"><code>0706138</code></a> Version 2.2-SNAPSHOT</li>
<li><a href="https://github.com/hamcrest/JavaHamcrest/commit/e049297f8e1ddf539d5625d6134cd8727f3e3267"><code>e049297</code></a> Version 2.2-rc1</li>
<li><a href="https://github.com/hamcrest/JavaHamcrest/commit/052acf3c117ba6abc9bb841e22f3a9477382e95f"><code>052acf3</code></a> Clean up readme</li>
<li><a href="https://github.com/hamcrest/JavaHamcrest/commit/c5e9c506cfcc12e6a075dc38ba1f4e376c56ed45"><code>c5e9c50</code></a> Start checkstyle setup</li>
<li><a href="https://github.com/hamcrest/JavaHamcrest/commit/7f10c5574d54c07bee96d15dc755c46c11b0b8d0"><code>7f10c55</code></a> Add PR <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/270">#270</a> to changes</li>
<li><a href="https://github.com/hamcrest/JavaHamcrest/commit/dc4c0123583737a955846b4aef3b3b476100f613"><code>dc4c012</code></a> Add PR <a href="https://redirect.github.com/hamcrest/JavaHamcrest/issues/267">#267</a> to changes</li>
<li><a href="https://github.com/hamcrest/JavaHamcrest/commit/2951069fbd7afcb65457db30dde9dc0fd8ce5f80"><code>2951069</code></a> Use platform-specific line separators</li>
<li><a href="https://github.com/hamcrest/JavaHamcrest/commit/c1ea49a115a381c92b27513d59eeea38ba5ae1c0"><code>c1ea49a</code></a> Use different module names for hamcrest jars.</li>
<li><a href="https://github.com/hamcrest/JavaHamcrest/commit/976a60ffc3464ede33a9f0fc66977a051468b127"><code>976a60f</code></a> Update copyright year to say 2019</li>
<li>Additional commits viewable in <a href="https://github.com/hamcrest/JavaHamcrest/compare/hamcrest-java-1.3...v2.2">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.hamcrest:hamcrest-core&package-manager=maven&previous-version=1.3&new-version=2.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Lead-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/flight/flight-sql-jdbc-core/pom.xml   | 4 ++--
 java/flight/flight-sql-jdbc-driver/pom.xml | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml
index ce1f52e39676e..020f181f5d107 100644
--- a/java/flight/flight-sql-jdbc-core/pom.xml
+++ b/java/flight/flight-sql-jdbc-core/pom.xml
@@ -88,8 +88,8 @@
 
         <dependency>
             <groupId>org.hamcrest</groupId>
-            <artifactId>hamcrest-core</artifactId>
-            <version>1.3</version>
+            <artifactId>hamcrest</artifactId>
+            <version>2.2</version>
             <scope>test</scope>
         </dependency>
 
diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml
index 28534a9b0badd..bf053f3f7798a 100644
--- a/java/flight/flight-sql-jdbc-driver/pom.xml
+++ b/java/flight/flight-sql-jdbc-driver/pom.xml
@@ -28,11 +28,11 @@
     <url>https://arrow.apache.org</url>
 
     <dependencies>
-        <!-- https://mvnrepository.com/artifact/org.hamcrest/hamcrest-core -->
+        <!-- https://mvnrepository.com/artifact/org.hamcrest/hamcrest -->
         <dependency>
             <groupId>org.hamcrest</groupId>
-            <artifactId>hamcrest-core</artifactId>
-            <version>1.3</version>
+            <artifactId>hamcrest</artifactId>
+            <version>2.2</version>
             <scope>test</scope>
         </dependency>
 

From 46407df093181c8e96424b5fe79bc97c0403b449 Mon Sep 17 00:00:00 2001
From: Ian Cook <ianmcook@gmail.com>
Date: Mon, 26 Feb 2024 21:20:08 -0700
Subject: [PATCH 428/570] GH-40215: [Format][Docs] Document Arrow Columnar
 Format version history (#40219)

---
 docs/source/format/CanonicalExtensions.rst |  2 +-
 docs/source/format/Columnar.rst            | 17 +++++++-
 docs/source/format/Versioning.rst          | 46 +++++++++++++++++++---
 docs/source/status.rst                     |  6 +--
 4 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/docs/source/format/CanonicalExtensions.rst b/docs/source/format/CanonicalExtensions.rst
index 86cfab718dd3c..1f055b7f8edb5 100644
--- a/docs/source/format/CanonicalExtensions.rst
+++ b/docs/source/format/CanonicalExtensions.rst
@@ -25,7 +25,7 @@ Canonical Extension Types
 Introduction
 ============
 
-The Arrow Columnar Format allows defining
+The Arrow columnar format allows defining
 :ref:`extension types <format_metadata_extension_types>` so as to extend
 standard Arrow data types with custom semantics.  Often these semantics
 will be specific to a system or application.  However, it is beneficial
diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst
index 56cb27626a1f9..84f251968fb5e 100644
--- a/docs/source/format/Columnar.rst
+++ b/docs/source/format/Columnar.rst
@@ -23,7 +23,10 @@ Arrow Columnar Format
 
 *Version: 1.4*
 
-The "Arrow Columnar Format" includes a language-agnostic in-memory
+.. seealso:: :ref:`Additions to the Arrow columnar format since version 1.0.0
+   <post-1-0-0-format-versions>`
+
+The **Arrow columnar format** includes a language-agnostic in-memory
 data structure specification, metadata serialization, and a protocol
 for serialization and generic data transport.
 
@@ -359,6 +362,8 @@ will be represented as follows: ::
     |----------------|-----------------------|
     | joemark        | unspecified (padding) |
 
+.. _variable-size-binary-view-layout:
+
 Variable-size Binary View Layout
 --------------------------------
 
@@ -499,9 +504,13 @@ will be represented as follows: ::
           |-------------------------------|-----------------------|
           | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 | unspecified (padding) |
 
+.. _listview-layout:
+
 ListView Layout
 ~~~~~~~~~~~~~~~
 
+.. versionadded:: Arrow Columnar Format 1.4
+
 The ListView layout is defined by three buffers: a validity bitmap, an offsets
 buffer, and an additional sizes buffer. Sizes and offsets have the identical bit
 width and both 32-bit and 64-bit signed integer options are supported.
@@ -957,6 +966,8 @@ below.
 Run-End Encoded Layout
 ----------------------
 
+.. versionadded:: Arrow Columnar Format 1.3
+
 Run-end encoding (REE) is a variation of run-length encoding (RLE). These
 encodings are well-suited for representing data containing sequences of the
 same value, called runs. In run-end encoding, each run is represented as a
@@ -1232,9 +1243,13 @@ bytes. Since this metadata can be used to communicate in-memory pointer
 addresses between libraries, it is recommended to set ``size`` to the actual
 memory size rather than the padded size.
 
+.. _variadic-buffers:
+
 Variadic buffers
 ----------------
 
+.. versionadded:: Arrow Columnar Format 1.4
+
 Some types such as Utf8View are represented using a variable number of buffers.
 For each such Field in the pre-ordered flattened logical schema, there will be
 an entry in ``variadicBufferCounts`` to indicate the number of variadic buffers
diff --git a/docs/source/format/Versioning.rst b/docs/source/format/Versioning.rst
index 4158f6a003e53..7ba01107074d0 100644
--- a/docs/source/format/Versioning.rst
+++ b/docs/source/format/Versioning.rst
@@ -15,8 +15,9 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
+*******************************
 Format Versioning and Stability
-===============================
+*******************************
 
 Starting with version 1.0.0, Apache Arrow uses
 **two versions** to describe each release of the project:
@@ -32,7 +33,7 @@ changes. From 1.0.0 onward, we follow `Semantic Versioning
 expect most releases to be major library releases.
 
 Backward Compatibility
-----------------------
+======================
 
 A newer versioned client library will be able to read any data and
 metadata produced by an older client library.
@@ -41,7 +42,7 @@ So long as the **major** format version is not changed, a newer
 library is backward compatible with an older library.
 
 Forward Compatibility
----------------------
+=====================
 
 An older client library must be able to either read data generated
 from a new client library or detect that it cannot properly read the
@@ -53,7 +54,7 @@ available in 1.0.0. So long as these features are not used (such as a
 new logical data type), forward compatibility is preserved.
 
 Long-Term Stability
--------------------
+===================
 
 A change in the format major version (e.g. from 1.0.0 to 2.0.0)
 indicates a disruption to these compatibility guarantees in some way.
@@ -63,9 +64,44 @@ event and, should this come to pass, we would exercise caution in
 ensuring that production applications are not harmed.
 
 Pre-1.0.0 Versions
-------------------
+==================
 
 We made no forward or backward compatibility guarantees for
 versions prior to 1.0.0. However, we made every effort to ensure
 that new clients can read serialized data produced by library version
 0.8.0 and onward.
+
+.. _post-1-0-0-format-versions:
+
+Post-1.0.0 Format Versions
+==========================
+
+Since version 1.0.0, there have been four new minor versions and zero new
+major versions of the Arrow format. Each new minor version added new features.
+When these new features are not used, the new minor format versions are
+compatible with format version 1.0.0. The new features added in each minor
+format version since 1.0.0 are as follows:
+
+Version 1.1
+-----------
+
+* Added 256-bit Decimal type.
+
+Version 1.2
+-----------
+
+* Added MonthDayNano interval type.
+
+Version 1.3
+-----------
+
+* Added :ref:`run-end-encoded-layout`.
+
+Version 1.4
+-----------
+
+* Added :ref:`variable-size-binary-view-layout` and the associated BinaryView
+  and Utf8View types.
+* Added :ref:`listview-layout` and the associated ListView and LargeListView
+  types.
+* Added :ref:`variadic-buffers`.
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 4bff37c8527fa..9af2fd1921e22 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -21,9 +21,9 @@ Implementation Status
 
 The following tables summarize the features available in the various official
 Arrow libraries. All libraries currently follow version 1.0.0 of the Arrow
-format. See :doc:`./format/Versioning` for details about versioning. Unless
-otherwise stated, the Python, R, Ruby and C/GLib libraries follow the C++
-Arrow library.
+format, or later minor versions that are compatible with version 1.0.0. See
+:doc:`./format/Versioning` for details about versioning. Unless otherwise
+stated, the Python, R, Ruby and C/GLib libraries follow the C++ Arrow library.
 
 Data Types
 ==========

From 2235a7ed40b999d919d7d17cbb34097e819a5acf Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 27 Feb 2024 13:54:26 +0900
Subject: [PATCH 429/570] GH-40228: [C++][CMake] Improve description why we
 need to initialize AWS C++ SDK in arrow-s3fs-test (#40229)

### Rationale for this change

Only static linking isn't important. Static linking + private symbols are important.

### What changes are included in this PR?

Improve comment and macro name.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40228

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/filesystem/CMakeLists.txt | 18 +++++++-----------
 cpp/src/arrow/filesystem/s3fs_test.cc   |  6 +++---
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt
index a42a8d0f8c1b6..77e93223cd116 100644
--- a/cpp/src/arrow/filesystem/CMakeLists.txt
+++ b/cpp/src/arrow/filesystem/CMakeLists.txt
@@ -71,17 +71,13 @@ if(ARROW_S3)
     get_target_property(AWS_CPP_SDK_S3_TYPE aws-cpp-sdk-s3 TYPE)
     # We need to initialize AWS C++ SDK for direct use (not via
     # arrow::fs::S3FileSystem) in arrow-s3fs-test if we use static AWS
-    # C++ SDK. Because AWS C++ SDK has internal static variables that
-    # aren't shared in libarrow and arrow-s3fs-test. It means that
-    # arrow::fs::InitializeS3() doesn't initialize AWS C++ SDK that is
-    # directly used in arrow-s3fs-test.
-    #
-    # But it seems that internal static variables in AWS C++ SDK are
-    # shared on macOS even if we link static AWS C++ SDK to both
-    # libarrow and arrow-s3fs-test. So we don't need to initialize AWS
-    # C++ SDK in arrow-s3fs-test on macOS.
-    if(AWS_CPP_SDK_S3_TYPE STREQUAL "STATIC_LIBRARY" AND NOT APPLE)
-      list(APPEND ARROW_S3FS_TEST_COMPILE_DEFINITIONS "AWS_CPP_SDK_S3_NOT_SHARED")
+    # C++ SDK and hide symbols of them. Because AWS C++ SDK has
+    # internal static variables that aren't shared in libarrow and
+    # arrow-s3fs-test. It means that arrow::fs::InitializeS3() doesn't
+    # initialize AWS C++ SDK that is directly used in arrow-s3fs-test.
+    if(AWS_CPP_SDK_S3_TYPE STREQUAL "STATIC_LIBRARY"
+       AND CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
+      list(APPEND ARROW_S3FS_TEST_COMPILE_DEFINITIONS "AWS_CPP_SDK_S3_PRIVATE_STATIC")
     endif()
     target_compile_definitions(arrow-s3fs-test
                                PRIVATE ${ARROW_S3FS_TEST_COMPILE_DEFINITIONS})
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index 33e9712a666cd..394f59e91a454 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -150,7 +150,7 @@ class ShortRetryStrategy : public S3RetryStrategy {
 class AwsTestMixin : public ::testing::Test {
  public:
   void SetUp() override {
-#ifdef AWS_CPP_SDK_S3_NOT_SHARED
+#ifdef AWS_CPP_SDK_S3_PRIVATE_STATIC
     auto aws_log_level = Aws::Utils::Logging::LogLevel::Fatal;
     aws_options_.loggingOptions.logLevel = aws_log_level;
     aws_options_.loggingOptions.logger_create_fn = [&aws_log_level] {
@@ -161,13 +161,13 @@ class AwsTestMixin : public ::testing::Test {
   }
 
   void TearDown() override {
-#ifdef AWS_CPP_SDK_S3_NOT_SHARED
+#ifdef AWS_CPP_SDK_S3_PRIVATE_STATIC
     Aws::ShutdownAPI(aws_options_);
 #endif
   }
 
  private:
-#ifdef AWS_CPP_SDK_S3_NOT_SHARED
+#ifdef AWS_CPP_SDK_S3_PRIVATE_STATIC
   Aws::SDKOptions aws_options_;
 #endif
 };

From 5f3688351f3adfba9a84d9e0bd65b300eabe35d2 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 27 Feb 2024 09:15:55 +0100
Subject: [PATCH 430/570] MINOR: [Format] Clarify that the buffers for the
 Binary View layout differ in the C Data Interface (#40156)

### Rationale for this change

Attempt to draw more attention to the fact that the buffer listing / number of buffers differ between the main Format spec and the C Data Interface, for the Binary View layout.

Triggered by feedback from implementing this in duckdb at https://github.com/duckdb/duckdb/pull/10481#discussion_r1489245865

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 docs/source/format/CDataInterface.rst | 7 ++++++-
 docs/source/format/Columnar.rst       | 3 +++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/docs/source/format/CDataInterface.rst b/docs/source/format/CDataInterface.rst
index ef4bf1cf3238d..fd9952b037e75 100644
--- a/docs/source/format/CDataInterface.rst
+++ b/docs/source/format/CDataInterface.rst
@@ -467,7 +467,10 @@ It has the following fields:
 
    Mandatory.  The number of physical buffers backing this array.  The
    number of buffers is a function of the data type, as described in the
-   :ref:`Columnar format specification <format_columnar>`.
+   :ref:`Columnar format specification <format_columnar>`, except for the
+   the binary or utf-8 view type, which has one additional buffer compared
+   to the Columnar format specification (see
+   :ref:`c-data-interface-binary-view-arrays`).
 
    Buffers of children arrays are not included.
 
@@ -552,6 +555,8 @@ parameterized extension types).
 The ``ArrowArray`` structure exported from an extension array simply points
 to the storage data of the extension array.
 
+.. _c-data-interface-binary-view-arrays:
+
 Binary view arrays
 ------------------
 
diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst
index 84f251968fb5e..7b74b972f2ab8 100644
--- a/docs/source/format/Columnar.rst
+++ b/docs/source/format/Columnar.rst
@@ -409,6 +409,9 @@ All integers (length, buffer index, and offset) are signed.
 
 This layout is adapted from TU Munich's `UmbraDB`_.
 
+Note that this layout uses one additional buffer to store the variadic buffer
+lengths in the :ref:`Arrow C data interface <c-data-interface-binary-view-arrays>`.
+
 .. _variable-size-list-layout:
 
 Variable-size List Layout

From 3f7b2884dccb4c0164092b754a2a76ccbb900154 Mon Sep 17 00:00:00 2001
From: Hussein Awala <hussein@awala.fr>
Date: Tue, 27 Feb 2024 14:28:55 +0100
Subject: [PATCH 431/570] GH-40171: [Python] Add Type_FIXED_SIZE_LIST to
 _NESTED_TYPES set (#40172)

### Rationale for this change

### What changes are included in this PR?

This PR fixes a minor bug in `types.is_nested` which doesn't consider the `FIXED_SIZE_LIST` type as nested type.

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #40171

Authored-by: hussein-awala <hussein@awala.fr>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/tests/test_types.py | 1 +
 python/pyarrow/types.py            | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index 0add5786088d3..e048ed6fa5d58 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -214,6 +214,7 @@ def test_is_nested_or_struct():
 
     assert types.is_nested(struct_ex)
     assert types.is_nested(pa.list_(pa.int32()))
+    assert types.is_nested(pa.list_(pa.int32(), 3))
     assert types.is_nested(pa.large_list(pa.int32()))
     assert not types.is_nested(pa.int32())
 
diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py
index 0f68ca9fe574b..6c262b49cb88b 100644
--- a/python/pyarrow/types.py
+++ b/python/pyarrow/types.py
@@ -40,8 +40,8 @@
                     lib.Type_DURATION} | _TIME_TYPES | _DATE_TYPES |
                    _INTERVAL_TYPES)
 _UNION_TYPES = {lib.Type_SPARSE_UNION, lib.Type_DENSE_UNION}
-_NESTED_TYPES = {lib.Type_LIST, lib.Type_LARGE_LIST, lib.Type_STRUCT,
-                 lib.Type_MAP} | _UNION_TYPES
+_NESTED_TYPES = {lib.Type_LIST, lib.Type_FIXED_SIZE_LIST, lib.Type_LARGE_LIST,
+                 lib.Type_STRUCT, lib.Type_MAP} | _UNION_TYPES
 
 
 @doc(datatype="null")

From 06d841ee7d1cc0bd3561c2d4bd0dd038908eb715 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Tue, 27 Feb 2024 04:41:32 -0900
Subject: [PATCH 432/570] MINOR: [Documentation][C++][Python][R] Clarify
 docstrings around max_chunksize (#40251)

### Rationale for this change

A user [ran into confusion](https://lists.apache.org/thread/8ym8r1z5gys7dpcr8rw8dvjbkqc2lf7f) over the units of the `max_chunksize` argument in PyArrow and didn't see any reason not to make the documentation more explicit.

### What changes are included in this PR?

Just changes to docstrings.

### Are these changes tested?

No, though I did go through every change to see if it was correct and I'm pretty sure it's right. Good to double-check during review though.

### Are there any user-facing changes?

Just docs.

Authored-by: Bryce Mecum <petridish@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 cpp/src/arrow/ipc/writer.h |  4 ++--
 cpp/src/arrow/table.h      |  4 ++--
 python/pyarrow/_flight.pyx |  4 ++--
 python/pyarrow/ipc.pxi     |  4 ++--
 python/pyarrow/table.pxi   | 12 ++++++------
 r/R/flight.R               |  3 ++-
 r/man/flight_put.Rd        |  3 ++-
 7 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index 4e0ee3dfc8b44..aefb59f3136e4 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -113,8 +113,8 @@ class ARROW_EXPORT RecordBatchWriter {
 
   /// \brief Write Table with a particular chunksize
   /// \param[in] table table to write
-  /// \param[in] max_chunksize maximum length of table chunks. To indicate
-  /// that no maximum should be enforced, pass -1.
+  /// \param[in] max_chunksize maximum number of rows for table chunks. To
+  /// indicate that no maximum should be enforced, pass -1.
   /// \return Status
   virtual Status WriteTable(const Table& table, int64_t max_chunksize);
 
diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h
index 551880f237586..a7508430c132b 100644
--- a/cpp/src/arrow/table.h
+++ b/cpp/src/arrow/table.h
@@ -251,9 +251,9 @@ class ARROW_EXPORT TableBatchReader : public RecordBatchReader {
 
   Status ReadNext(std::shared_ptr<RecordBatch>* out) override;
 
-  /// \brief Set the desired maximum chunk size of record batches
+  /// \brief Set the desired maximum number of rows for record batches
   ///
-  /// The actual chunk size of each record batch may be smaller, depending
+  /// The actual number of rows in each record batch may be smaller, depending
   /// on actual chunking characteristics of each table column.
   void set_chunksize(int64_t chunksize);
 
diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index 67ee7590560f0..8289215de2e29 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -1134,8 +1134,8 @@ cdef class MetadataRecordBatchWriter(_CRecordBatchWriter):
         ----------
         table : Table
         max_chunksize : int, default None
-            Maximum size for RecordBatch chunks. Individual chunks may be
-            smaller depending on the chunk layout of individual columns.
+            Maximum number of rows for RecordBatch chunks. Individual chunks may
+            be smaller depending on the chunk layout of individual columns.
         """
         cdef:
             # max_chunksize must be > 0 to have any impact
diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index da9636dfc86e1..0bb0fe073cc59 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -515,8 +515,8 @@ cdef class _CRecordBatchWriter(_Weakrefable):
         ----------
         table : Table
         max_chunksize : int, default None
-            Maximum size for RecordBatch chunks. Individual chunks may be
-            smaller depending on the chunk layout of individual columns.
+            Maximum number of rows for RecordBatch chunks. Individual chunks may
+            be smaller depending on the chunk layout of individual columns.
         """
         cdef:
             # max_chunksize must be > 0 to have any impact
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index ee3872aa3a242..38b08b626113b 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -4094,8 +4094,8 @@ cdef class Table(_Tabular):
         Parameters
         ----------
         max_chunksize : int, default None
-            Maximum size for ChunkedArray chunks. Individual chunks may be
-            smaller depending on the chunk layout of individual columns.
+            Maximum number of rows for ChunkedArray chunks. Individual chunks
+            may be smaller depending on the chunk layout of individual columns.
 
         Returns
         -------
@@ -4189,8 +4189,8 @@ cdef class Table(_Tabular):
         Parameters
         ----------
         max_chunksize : int, default None
-            Maximum size for RecordBatch chunks. Individual chunks may be
-            smaller depending on the chunk layout of individual columns.
+            Maximum number of rows for each RecordBatch chunk. Individual chunks
+            may be smaller depending on the chunk layout of individual columns.
 
         Returns
         -------
@@ -4259,8 +4259,8 @@ cdef class Table(_Tabular):
         Parameters
         ----------
         max_chunksize : int, default None
-            Maximum size for RecordBatch chunks. Individual chunks may be
-            smaller depending on the chunk layout of individual columns.
+            Maximum number of rows for each RecordBatch chunk. Individual chunks
+            may be smaller depending on the chunk layout of individual columns.
 
         Returns
         -------
diff --git a/r/R/flight.R b/r/R/flight.R
index 0bd661e58d565..d151c705c6c1c 100644
--- a/r/R/flight.R
+++ b/r/R/flight.R
@@ -56,7 +56,8 @@ flight_disconnect <- function(client) {
 #' @param overwrite logical: if `path` exists on `client` already, should we
 #' replace it with the contents of `data`? Default is `TRUE`; if `FALSE` and
 #' `path` exists, the function will error.
-#' @param max_chunksize integer: Maximum size for RecordBatch chunks when a `data.frame` is sent.
+#' @param max_chunksize integer: Maximum number of rows for RecordBatch chunks
+#'   when a `data.frame` is sent.
 #' Individual chunks may be smaller depending on the chunk layout of individual columns.
 #' @return `client`, invisibly.
 #' @export
diff --git a/r/man/flight_put.Rd b/r/man/flight_put.Rd
index c306b0f7bb9e0..f14c446d01053 100644
--- a/r/man/flight_put.Rd
+++ b/r/man/flight_put.Rd
@@ -17,7 +17,8 @@ flight_put(client, data, path, overwrite = TRUE, max_chunksize = NULL)
 replace it with the contents of \code{data}? Default is \code{TRUE}; if \code{FALSE} and
 \code{path} exists, the function will error.}
 
-\item{max_chunksize}{integer: Maximum size for RecordBatch chunks when a \code{data.frame} is sent.
+\item{max_chunksize}{integer: Maximum number of rows for RecordBatch chunks
+when a \code{data.frame} is sent.
 Individual chunks may be smaller depending on the chunk layout of individual columns.}
 }
 \value{

From c57115de8d9b844f1929f8395d29c61ca2269b24 Mon Sep 17 00:00:00 2001
From: Florian Jetter <fjetter@users.noreply.github.com>
Date: Tue, 27 Feb 2024 14:41:56 +0100
Subject: [PATCH 433/570] GH-40142: [Python] Allow FileInfo instances to be
 passed to dataset init (#40143)

### Rationale for this change

Closes https://github.com/apache/arrow/issues/40142

I'm developing a new dask integration with pyarrow parquet reader (see https://github.com/dask-contrib/dask-expr/pull/882) and want to rely on the pyarrow Filesystem more.

Right now, we are performing a list operation ourselves to get all touched files and I would like to pass the retrieved `FileInfo` objects directly to the dataset constructor. This API is already exposed in C++ and this PR is adding the necessary python bindings.

The benefit of this is that there is API is that it cuts the need to perform additional HEAD requests to a remote storage.

This came up in https://github.com/apache/arrow/issues/38389#issuecomment-1774777681 and there's been related work already with https://github.com/apache/arrow/issues/37857

### What changes are included in this PR?

Python bindings for the `DatasetFactory` constructor that accepts a list/vector of `FileInfo` objects.

### Are these changes tested?

~I slightly modified the minio test setup such that the prometheus endpoint is exposed. This can be used to assert that there hasn't been any HEAD requests.~ I ended up removing this again since parsing the response is a bit brittle.

### Are there any user-facing changes?

* Closes: #40142

Lead-authored-by: fjetter <fjetter@users.noreply.github.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/_dataset.pyx                  | 34 +++++++++++++++-----
 python/pyarrow/dataset.py                    | 16 +++++++--
 python/pyarrow/includes/libarrow_dataset.pxd |  8 +++++
 python/pyarrow/tests/test_dataset.py         | 10 ++++++
 4 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index b93f71969e8d3..8b9e62d628870 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -3139,6 +3139,13 @@ cdef class FileSystemFactoryOptions(_Weakrefable):
         self.options.selector_ignore_prefixes = [tobytes(v) for v in values]
 
 
+cdef vector[CFileInfo] unwrap_finfos(finfos):
+    cdef vector[CFileInfo] o_vect
+    for fi in finfos:
+        o_vect.push_back((<FileInfo> fi).unwrap())
+    return o_vect
+
+
 cdef class FileSystemDatasetFactory(DatasetFactory):
     """
     Create a DatasetFactory from a list of paths with schema inspection.
@@ -3163,6 +3170,7 @@ cdef class FileSystemDatasetFactory(DatasetFactory):
                  FileSystemFactoryOptions options=None):
         cdef:
             vector[c_string] paths
+            vector[CFileInfo] finfos
             CFileSelector c_selector
             CResult[shared_ptr[CDatasetFactory]] result
             shared_ptr[CFileSystem] c_filesystem
@@ -3184,14 +3192,24 @@ cdef class FileSystemDatasetFactory(DatasetFactory):
                     c_options
                 )
         elif isinstance(paths_or_selector, (list, tuple)):
-            paths = [tobytes(s) for s in paths_or_selector]
-            with nogil:
-                result = CFileSystemDatasetFactory.MakeFromPaths(
-                    c_filesystem,
-                    paths,
-                    c_format,
-                    c_options
-                )
+            if len(paths_or_selector) > 0 and isinstance(paths_or_selector[0], FileInfo):
+                finfos = unwrap_finfos(paths_or_selector)
+                with nogil:
+                    result = CFileSystemDatasetFactory.MakeFromFileInfos(
+                        c_filesystem,
+                        finfos,
+                        c_format,
+                        c_options
+                    )
+            else:
+                paths = [tobytes(s) for s in paths_or_selector]
+                with nogil:
+                    result = CFileSystemDatasetFactory.MakeFromPaths(
+                        c_filesystem,
+                        paths,
+                        c_format,
+                        c_options
+                    )
         else:
             raise TypeError('Must pass either paths or a FileSelector, but '
                             'passed {}'.format(type(paths_or_selector)))
diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index f83753ac57d47..1efbfe1665a75 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -456,11 +456,22 @@ def _filesystem_dataset(source, schema=None, filesystem=None,
     -------
     FileSystemDataset
     """
+    from pyarrow.fs import LocalFileSystem, _ensure_filesystem, FileInfo
+
     format = _ensure_format(format or 'parquet')
     partitioning = _ensure_partitioning(partitioning)
 
     if isinstance(source, (list, tuple)):
-        fs, paths_or_selector = _ensure_multiple_sources(source, filesystem)
+        if source and isinstance(source[0], FileInfo):
+            if filesystem is None:
+                # fall back to local file system as the default
+                fs = LocalFileSystem()
+            else:
+                # construct a filesystem if it is a valid URI
+                fs = _ensure_filesystem(filesystem)
+            paths_or_selector = source
+        else:
+            fs, paths_or_selector = _ensure_multiple_sources(source, filesystem)
     else:
         fs, paths_or_selector = _ensure_single_source(source, filesystem)
 
@@ -767,6 +778,7 @@ def dataset(source, schema=None, format=None, filesystem=None,
     ...     dataset("local/path/to/data", format="ipc")
     ... ]) # doctest: +SKIP
     """
+    from pyarrow.fs import FileInfo
     # collect the keyword arguments for later reuse
     kwargs = dict(
         schema=schema,
@@ -781,7 +793,7 @@ def dataset(source, schema=None, format=None, filesystem=None,
     if _is_path_like(source):
         return _filesystem_dataset(source, **kwargs)
     elif isinstance(source, (tuple, list)):
-        if all(_is_path_like(elem) for elem in source):
+        if all(_is_path_like(elem) or isinstance(elem, FileInfo) for elem in source):
             return _filesystem_dataset(source, **kwargs)
         elif all(isinstance(elem, Dataset) for elem in source):
             return _union_dataset(source, **kwargs)
diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd
index 4566cb5004add..fe96705a54b2f 100644
--- a/python/pyarrow/includes/libarrow_dataset.pxd
+++ b/python/pyarrow/includes/libarrow_dataset.pxd
@@ -403,3 +403,11 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
             shared_ptr[CFileFormat] format,
             CFileSystemFactoryOptions options
         )
+
+        @staticmethod
+        CResult[shared_ptr[CDatasetFactory]] MakeFromFileInfos "Make"(
+            shared_ptr[CFileSystem] filesystem,
+            vector[CFileInfo] files,
+            shared_ptr[CFileFormat] format,
+            CFileSystemFactoryOptions options
+        )
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index a9054f0b174aa..8e20390385885 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2725,6 +2725,16 @@ def test_open_dataset_from_uri_s3(s3_example_simple, dataset_reader):
     assert dataset_reader.to_table(dataset).equals(table)
 
 
+@pytest.mark.parquet
+@pytest.mark.s3
+def test_open_dataset_from_fileinfos(s3_example_simple, dataset_reader):
+    table, path, filesystem, uri, _, _, _, _ = s3_example_simple
+    selector = fs.FileSelector("mybucket")
+    finfos = filesystem.get_file_info(selector)
+    dataset = ds.dataset(finfos, format="parquet", filesystem=filesystem)
+    assert dataset_reader.to_table(dataset).equals(table)
+
+
 @pytest.mark.parquet
 @pytest.mark.s3  # still needed to create the data
 def test_open_dataset_from_uri_s3_fsspec(s3_example_simple):

From 40a8a68c7c3903e2bad98605d82339460c5ea930 Mon Sep 17 00:00:00 2001
From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com>
Date: Tue, 27 Feb 2024 22:00:59 +0800
Subject: [PATCH 434/570] GH-40233: [C++] Fix an abort on asof_join_benchmark
 run for lost an arg (#40234)

### Rationale for this change
Fix an abort error on asof_join_benchmark run for lost an arg.

### What changes are included in this PR?
Add the lost arg back.

### Are these changes tested?

### Are there any user-facing changes?
No

* GitHub Issue: #40233

Authored-by: hugo.zhang <hugo.zhang@openpie.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/acero/asof_join_benchmark.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/acero/asof_join_benchmark.cc b/cpp/src/arrow/acero/asof_join_benchmark.cc
index 600c230a3fc3f..02116b09fc1fd 100644
--- a/cpp/src/arrow/acero/asof_join_benchmark.cc
+++ b/cpp/src/arrow/acero/asof_join_benchmark.cc
@@ -131,9 +131,8 @@ static void AsOfJoinOverhead(benchmark::State& state) {
 // this generates the set of right hand tables to test on.
 void SetArgs(benchmark::internal::Benchmark* bench) {
   bench
-      ->ArgNames({"left_freq", "left_cols", "left_ids", "left_batch_size",
-                  "num_right_tables", "right_freq", "right_cols", "right_ids",
-                  "right_batch_size"})
+      ->ArgNames({"left_freq", "left_cols", "left_ids", "batch_size", "num_right_tables",
+                  "right_freq", "right_cols", "right_ids"})
       ->UseRealTime();
 
   int default_freq = 400;

From c5f60a02fe5c7106960824cb500f553623b7d97e Mon Sep 17 00:00:00 2001
From: Xiansen Chen <khn64@163.com>
Date: Wed, 28 Feb 2024 03:22:12 +0800
Subject: [PATCH 435/570] GH-39864: [C++] DataType::ToString support optionally
 show metadata (#39888)

### Rationale for this change

Support showing metadata of  nested DataType which have child fields.

### What changes are included in this PR?

Add an optional argument "show_metadata" to the ToString() of DataType and other DataType derived class. And we also  add it to TypeHolder::ToString().

### Are these changes tested?

Yes, I add tests for changes.

### Are there any user-facing changes?
No.

Closes: #39864
* Closes: #39864

Lead-authored-by: xiansen.chen <xiansen.chen@openpie.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/gdb_arrow.py                              |  6 +-
 .../engine/simple_extension_type_internal.h   |  5 +-
 cpp/src/arrow/extension/fixed_shape_tensor.cc |  4 +-
 cpp/src/arrow/extension/fixed_shape_tensor.h  |  2 +-
 cpp/src/arrow/extension_type.cc               |  2 +-
 cpp/src/arrow/extension_type.h                |  2 +-
 cpp/src/arrow/testing/gtest_util.cc           | 26 ++---
 cpp/src/arrow/type.cc                         | 89 +++++++++--------
 cpp/src/arrow/type.h                          | 68 ++++++-------
 cpp/src/arrow/type_test.cc                    | 95 +++++++------------
 .../src/arrow/python/extension_type.cc        |  2 +-
 .../pyarrow/src/arrow/python/extension_type.h |  2 +-
 12 files changed, 140 insertions(+), 163 deletions(-)

diff --git a/cpp/gdb_arrow.py b/cpp/gdb_arrow.py
index 6c3af1680bdae..e6180f2ff0eeb 100644
--- a/cpp/gdb_arrow.py
+++ b/cpp/gdb_arrow.py
@@ -956,10 +956,12 @@ def storage_type(self):
 
     def to_string(self):
         """
-        The result of calling ToString().
+        The result of calling ToString(show_metadata=True).
         """
+        # XXX `show_metadata` is an optional argument, but gdb doesn't allow
+        # omitting it.
         return StdString(gdb.parse_and_eval(
-            f"{for_evaluation(self.val)}.ToString()"))
+            f"{for_evaluation(self.val)}.ToString(true)"))
 
 
 class Schema:
diff --git a/cpp/src/arrow/engine/simple_extension_type_internal.h b/cpp/src/arrow/engine/simple_extension_type_internal.h
index c3f0226283d5f..73dbb9f7cb78d 100644
--- a/cpp/src/arrow/engine/simple_extension_type_internal.h
+++ b/cpp/src/arrow/engine/simple_extension_type_internal.h
@@ -70,8 +70,9 @@ class SimpleExtensionType : public ExtensionType {
 
   std::string extension_name() const override { return std::string(kExtensionName); }
 
-  std::string ToString() const override { return "extension<" + this->Serialize() + ">"; }
-
+  std::string ToString(bool show_metadata = false) const override {
+    return "extension<" + this->Serialize() + ">";
+  }
   /// \brief A comparator which returns true iff all parameter properties are equal
   struct ExtensionEqualsImpl {
     ExtensionEqualsImpl(const Params& l, const Params& r) : left_(l), right_(r) {
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.cc b/cpp/src/arrow/extension/fixed_shape_tensor.cc
index 02e0a890e4b3d..1101b08307332 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor.cc
+++ b/cpp/src/arrow/extension/fixed_shape_tensor.cc
@@ -108,10 +108,10 @@ bool FixedShapeTensorType::ExtensionEquals(const ExtensionType& other) const {
          permutation_equivalent;
 }
 
-std::string FixedShapeTensorType::ToString() const {
+std::string FixedShapeTensorType::ToString(bool show_metadata) const {
   std::stringstream ss;
   ss << "extension<" << this->extension_name()
-     << "[value_type=" << value_type_->ToString()
+     << "[value_type=" << value_type_->ToString(show_metadata)
      << ", shape=" << ::arrow::internal::PrintVector{shape_, ","};
 
   if (!permutation_.empty()) {
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.h b/cpp/src/arrow/extension/fixed_shape_tensor.h
index 591a7cee32a34..3fec79b5c2a3c 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor.h
+++ b/cpp/src/arrow/extension/fixed_shape_tensor.h
@@ -61,7 +61,7 @@ class ARROW_EXPORT FixedShapeTensorType : public ExtensionType {
         dim_names_(dim_names) {}
 
   std::string extension_name() const override { return "arrow.fixed_shape_tensor"; }
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   /// Number of dimensions of tensor elements
   size_t ndim() const { return shape_.size(); }
diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc
index 1199336763ddb..cf8dda7a85df4 100644
--- a/cpp/src/arrow/extension_type.cc
+++ b/cpp/src/arrow/extension_type.cc
@@ -41,7 +41,7 @@ using internal::checked_cast;
 
 DataTypeLayout ExtensionType::layout() const { return storage_type_->layout(); }
 
-std::string ExtensionType::ToString() const {
+std::string ExtensionType::ToString(bool show_metadata) const {
   std::stringstream ss;
   ss << "extension<" << this->extension_name() << ">";
   return ss.str();
diff --git a/cpp/src/arrow/extension_type.h b/cpp/src/arrow/extension_type.h
index dd004118e83c9..0fd7216f1820b 100644
--- a/cpp/src/arrow/extension_type.h
+++ b/cpp/src/arrow/extension_type.h
@@ -50,7 +50,7 @@ class ARROW_EXPORT ExtensionType : public DataType {
 
   DataTypeLayout layout() const override;
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   std::string name() const override { return "extension"; }
 
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 5ef1820d5b581..37865948882da 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -232,21 +232,12 @@ void AssertBufferEqual(const Buffer& buffer, const Buffer& expected) {
   ASSERT_TRUE(buffer.Equals(expected));
 }
 
-template <typename T>
-std::string ToStringWithMetadata(const T& t, bool show_metadata) {
-  return t.ToString(show_metadata);
-}
-
-std::string ToStringWithMetadata(const DataType& t, bool show_metadata) {
-  return t.ToString();
-}
-
 template <typename T>
 void AssertFingerprintablesEqual(const T& left, const T& right, bool check_metadata,
                                  const char* types_plural) {
   ASSERT_TRUE(left.Equals(right, check_metadata))
-      << types_plural << " '" << ToStringWithMetadata(left, check_metadata) << "' and '"
-      << ToStringWithMetadata(right, check_metadata) << "' should have compared equal";
+      << types_plural << " '" << left.ToString(check_metadata) << "' and '"
+      << right.ToString(check_metadata) << "' should have compared equal";
   auto lfp = left.fingerprint();
   auto rfp = right.fingerprint();
   // Note: all types tested in this file should implement fingerprinting,
@@ -256,9 +247,8 @@ void AssertFingerprintablesEqual(const T& left, const T& right, bool check_metad
     rfp += right.metadata_fingerprint();
   }
   ASSERT_EQ(lfp, rfp) << "Fingerprints for " << types_plural << " '"
-                      << ToStringWithMetadata(left, check_metadata) << "' and '"
-                      << ToStringWithMetadata(right, check_metadata)
-                      << "' should have compared equal";
+                      << left.ToString(check_metadata) << "' and '"
+                      << right.ToString(check_metadata) << "' should have compared equal";
 }
 
 template <typename T>
@@ -274,8 +264,8 @@ template <typename T>
 void AssertFingerprintablesNotEqual(const T& left, const T& right, bool check_metadata,
                                     const char* types_plural) {
   ASSERT_FALSE(left.Equals(right, check_metadata))
-      << types_plural << " '" << ToStringWithMetadata(left, check_metadata) << "' and '"
-      << ToStringWithMetadata(right, check_metadata) << "' should have compared unequal";
+      << types_plural << " '" << left.ToString(check_metadata) << "' and '"
+      << right.ToString(check_metadata) << "' should have compared unequal";
   auto lfp = left.fingerprint();
   auto rfp = right.fingerprint();
   // Note: all types tested in this file should implement fingerprinting,
@@ -286,8 +276,8 @@ void AssertFingerprintablesNotEqual(const T& left, const T& right, bool check_me
       rfp += right.metadata_fingerprint();
     }
     ASSERT_NE(lfp, rfp) << "Fingerprints for " << types_plural << " '"
-                        << ToStringWithMetadata(left, check_metadata) << "' and '"
-                        << ToStringWithMetadata(right, check_metadata)
+                        << left.ToString(check_metadata) << "' and '"
+                        << right.ToString(check_metadata)
                         << "' should have compared unequal";
   }
 }
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 62d2d61598dc8..edf8f0496628c 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -874,7 +874,7 @@ bool Field::IsCompatibleWith(const std::shared_ptr<Field>& other) const {
 
 std::string Field::ToString(bool show_metadata) const {
   std::stringstream ss;
-  ss << name_ << ": " << type_->ToString();
+  ss << name_ << ": " << type_->ToString(show_metadata);
   if (!nullable_) {
     ss << " not null";
   }
@@ -919,14 +919,15 @@ std::ostream& operator<<(std::ostream& os, const TypeHolder& type) {
 // ----------------------------------------------------------------------
 // TypeHolder
 
-std::string TypeHolder::ToString(const std::vector<TypeHolder>& types) {
+std::string TypeHolder::ToString(const std::vector<TypeHolder>& types,
+                                 bool show_metadata) {
   std::stringstream ss;
   ss << "(";
   for (size_t i = 0; i < types.size(); ++i) {
     if (i > 0) {
       ss << ", ";
     }
-    ss << types[i].type->ToString();
+    ss << types[i].type->ToString(show_metadata);
   }
   ss << ")";
   return ss.str();
@@ -984,27 +985,27 @@ BaseBinaryType::~BaseBinaryType() {}
 
 BaseListType::~BaseListType() {}
 
-std::string ListType::ToString() const {
+std::string ListType::ToString(bool show_metadata) const {
   std::stringstream s;
-  s << "list<" << value_field()->ToString() << ">";
+  s << "list<" << value_field()->ToString(show_metadata) << ">";
   return s.str();
 }
 
-std::string LargeListType::ToString() const {
+std::string LargeListType::ToString(bool show_metadata) const {
   std::stringstream s;
-  s << "large_list<" << value_field()->ToString() << ">";
+  s << "large_list<" << value_field()->ToString(show_metadata) << ">";
   return s.str();
 }
 
-std::string ListViewType::ToString() const {
+std::string ListViewType::ToString(bool show_metadata) const {
   std::stringstream s;
-  s << "list_view<" << value_field()->ToString() << ">";
+  s << "list_view<" << value_field()->ToString(show_metadata) << ">";
   return s.str();
 }
 
-std::string LargeListViewType::ToString() const {
+std::string LargeListViewType::ToString(bool show_metadata) const {
   std::stringstream s;
-  s << "large_list_view<" << value_field()->ToString() << ">";
+  s << "large_list_view<" << value_field()->ToString(show_metadata) << ">";
   return s.str();
 }
 
@@ -1047,7 +1048,7 @@ Result<std::shared_ptr<DataType>> MapType::Make(std::shared_ptr<Field> value_fie
   return std::make_shared<MapType>(std::move(value_field), keys_sorted);
 }
 
-std::string MapType::ToString() const {
+std::string MapType::ToString(bool show_metadata) const {
   std::stringstream s;
 
   const auto print_field_name = [](std::ostream& os, const Field& field,
@@ -1058,7 +1059,7 @@ std::string MapType::ToString() const {
   };
   const auto print_field = [&](std::ostream& os, const Field& field,
                                const char* std_name) {
-    os << field.type()->ToString();
+    os << field.type()->ToString(show_metadata);
     print_field_name(os, field, std_name);
   };
 
@@ -1074,23 +1075,24 @@ std::string MapType::ToString() const {
   return s.str();
 }
 
-std::string FixedSizeListType::ToString() const {
+std::string FixedSizeListType::ToString(bool show_metadata) const {
   std::stringstream s;
-  s << "fixed_size_list<" << value_field()->ToString() << ">[" << list_size_ << "]";
+  s << "fixed_size_list<" << value_field()->ToString(show_metadata) << ">[" << list_size_
+    << "]";
   return s.str();
 }
 
-std::string BinaryType::ToString() const { return "binary"; }
+std::string BinaryType::ToString(bool show_metadata) const { return "binary"; }
 
-std::string BinaryViewType::ToString() const { return "binary_view"; }
+std::string BinaryViewType::ToString(bool show_metadata) const { return "binary_view"; }
 
-std::string LargeBinaryType::ToString() const { return "large_binary"; }
+std::string LargeBinaryType::ToString(bool show_metadata) const { return "large_binary"; }
 
-std::string StringType::ToString() const { return "string"; }
+std::string StringType::ToString(bool show_metadata) const { return "string"; }
 
-std::string StringViewType::ToString() const { return "string_view"; }
+std::string StringViewType::ToString(bool show_metadata) const { return "string_view"; }
 
-std::string LargeStringType::ToString() const { return "large_string"; }
+std::string LargeStringType::ToString(bool show_metadata) const { return "large_string"; }
 
 int FixedSizeBinaryType::bit_width() const { return CHAR_BIT * byte_width(); }
 
@@ -1105,7 +1107,7 @@ Result<std::shared_ptr<DataType>> FixedSizeBinaryType::Make(int32_t byte_width)
   return std::make_shared<FixedSizeBinaryType>(byte_width);
 }
 
-std::string FixedSizeBinaryType::ToString() const {
+std::string FixedSizeBinaryType::ToString(bool show_metadata) const {
   std::stringstream ss;
   ss << "fixed_size_binary[" << byte_width_ << "]";
   return ss.str();
@@ -1122,9 +1124,13 @@ Date32Type::Date32Type() : DateType(Type::DATE32) {}
 
 Date64Type::Date64Type() : DateType(Type::DATE64) {}
 
-std::string Date64Type::ToString() const { return std::string("date64[ms]"); }
+std::string Date64Type::ToString(bool show_metadata) const {
+  return std::string("date64[ms]");
+}
 
-std::string Date32Type::ToString() const { return std::string("date32[day]"); }
+std::string Date32Type::ToString(bool show_metadata) const {
+  return std::string("date32[day]");
+}
 
 // ----------------------------------------------------------------------
 // Time types
@@ -1137,7 +1143,7 @@ Time32Type::Time32Type(TimeUnit::type unit) : TimeType(Type::TIME32, unit) {
       << "Must be seconds or milliseconds";
 }
 
-std::string Time32Type::ToString() const {
+std::string Time32Type::ToString(bool show_metadata) const {
   std::stringstream ss;
   ss << "time32[" << this->unit_ << "]";
   return ss.str();
@@ -1148,7 +1154,7 @@ Time64Type::Time64Type(TimeUnit::type unit) : TimeType(Type::TIME64, unit) {
       << "Must be microseconds or nanoseconds";
 }
 
-std::string Time64Type::ToString() const {
+std::string Time64Type::ToString(bool show_metadata) const {
   std::stringstream ss;
   ss << "time64[" << this->unit_ << "]";
   return ss.str();
@@ -1175,7 +1181,7 @@ std::ostream& operator<<(std::ostream& os, TimeUnit::type unit) {
 // ----------------------------------------------------------------------
 // Timestamp types
 
-std::string TimestampType::ToString() const {
+std::string TimestampType::ToString(bool show_metadata) const {
   std::stringstream ss;
   ss << "timestamp[" << this->unit_;
   if (this->timezone_.size() > 0) {
@@ -1186,7 +1192,7 @@ std::string TimestampType::ToString() const {
 }
 
 // Duration types
-std::string DurationType::ToString() const {
+std::string DurationType::ToString(bool show_metadata) const {
   std::stringstream ss;
   ss << "duration[" << this->unit_ << "]";
   return ss.str();
@@ -1245,7 +1251,7 @@ uint8_t UnionType::max_type_code() const {
              : *std::max_element(type_codes_.begin(), type_codes_.end());
 }
 
-std::string UnionType::ToString() const {
+std::string UnionType::ToString(bool show_metadata) const {
   std::stringstream s;
 
   s << name() << "<";
@@ -1254,7 +1260,7 @@ std::string UnionType::ToString() const {
     if (i) {
       s << ", ";
     }
-    s << children_[i]->ToString() << "=" << static_cast<int>(type_codes_[i]);
+    s << children_[i]->ToString(show_metadata) << "=" << static_cast<int>(type_codes_[i]);
   }
   s << ">";
   return s.str();
@@ -1291,10 +1297,10 @@ RunEndEncodedType::RunEndEncodedType(std::shared_ptr<DataType> run_end_type,
 
 RunEndEncodedType::~RunEndEncodedType() = default;
 
-std::string RunEndEncodedType::ToString() const {
+std::string RunEndEncodedType::ToString(bool show_metadata) const {
   std::stringstream s;
-  s << name() << "<run_ends: " << run_end_type()->ToString()
-    << ", values: " << value_type()->ToString() << ">";
+  s << name() << "<run_ends: " << run_end_type()->ToString(show_metadata)
+    << ", values: " << value_type()->ToString(show_metadata) << ">";
   return s.str();
 }
 
@@ -1350,7 +1356,7 @@ StructType::StructType(const FieldVector& fields)
 
 StructType::~StructType() {}
 
-std::string StructType::ToString() const {
+std::string StructType::ToString(bool show_metadata) const {
   std::stringstream s;
   s << "struct<";
   for (int i = 0; i < this->num_fields(); ++i) {
@@ -1358,7 +1364,7 @@ std::string StructType::ToString() const {
       s << ", ";
     }
     std::shared_ptr<Field> field = this->field(i);
-    s << field->ToString();
+    s << field->ToString(show_metadata);
   }
   s << ">";
   return s.str();
@@ -1523,17 +1529,18 @@ DataTypeLayout DictionaryType::layout() const {
   return layout;
 }
 
-std::string DictionaryType::ToString() const {
+std::string DictionaryType::ToString(bool show_metadata) const {
   std::stringstream ss;
-  ss << this->name() << "<values=" << value_type_->ToString()
-     << ", indices=" << index_type_->ToString() << ", ordered=" << ordered_ << ">";
+  ss << this->name() << "<values=" << value_type_->ToString(show_metadata)
+     << ", indices=" << index_type_->ToString(show_metadata) << ", ordered=" << ordered_
+     << ">";
   return ss.str();
 }
 
 // ----------------------------------------------------------------------
 // Null type
 
-std::string NullType::ToString() const { return name(); }
+std::string NullType::ToString(bool show_metadata) const { return name(); }
 
 // ----------------------------------------------------------------------
 // FieldPath
@@ -3304,13 +3311,13 @@ std::shared_ptr<DataType> decimal256(int32_t precision, int32_t scale) {
   return std::make_shared<Decimal256Type>(precision, scale);
 }
 
-std::string Decimal128Type::ToString() const {
+std::string Decimal128Type::ToString(bool show_metadata) const {
   std::stringstream s;
   s << "decimal128(" << precision_ << ", " << scale_ << ")";
   return s.str();
 }
 
-std::string Decimal256Type::ToString() const {
+std::string Decimal256Type::ToString(bool show_metadata) const {
   std::stringstream s;
   s << "decimal256(" << precision_ << ", " << scale_ << ")";
   return s.str();
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 5b1331ab66919..3f651741d3e49 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -162,7 +162,7 @@ class ARROW_EXPORT DataType : public std::enable_shared_from_this<DataType>,
   Status Accept(TypeVisitor* visitor) const;
 
   /// \brief A string representation of the type, including any children
-  virtual std::string ToString() const = 0;
+  virtual std::string ToString(bool show_metadata = false) const = 0;
 
   /// \brief Return hash value (excluding metadata in child fields)
   size_t Hash() const;
@@ -266,11 +266,11 @@ struct ARROW_EXPORT TypeHolder {
 
   bool operator!=(const TypeHolder& other) const { return !(*this == other); }
 
-  std::string ToString() const {
-    return this->type ? this->type->ToString() : "<NULLPTR>";
+  std::string ToString(bool show_metadata = false) const {
+    return this->type ? this->type->ToString(show_metadata) : "<NULLPTR>";
   }
 
-  static std::string ToString(const std::vector<TypeHolder>&);
+  static std::string ToString(const std::vector<TypeHolder>&, bool show_metadata = false);
 
   static std::vector<TypeHolder> FromTypes(
       const std::vector<std::shared_ptr<DataType>>& types);
@@ -565,7 +565,7 @@ class ARROW_EXPORT CTypeImpl : public BASE {
 
   std::string name() const override { return DERIVED::type_name(); }
 
-  std::string ToString() const override { return this->name(); }
+  std::string ToString(bool show_metadata = false) const override { return this->name(); }
 };
 
 template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
@@ -587,7 +587,7 @@ class ARROW_EXPORT NullType : public DataType {
 
   NullType() : DataType(Type::NA) {}
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   DataTypeLayout layout() const override {
     return DataTypeLayout({DataTypeLayout::AlwaysNull()});
@@ -769,7 +769,7 @@ class ARROW_EXPORT BinaryType : public BaseBinaryType {
                            DataTypeLayout::VariableWidth()});
   }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "binary"; }
 
  protected:
@@ -866,7 +866,7 @@ class ARROW_EXPORT BinaryViewType : public DataType {
                           DataTypeLayout::VariableWidth());
   }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "binary_view"; }
 
  protected:
@@ -894,7 +894,7 @@ class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
                            DataTypeLayout::VariableWidth()});
   }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "large_binary"; }
 
  protected:
@@ -915,7 +915,7 @@ class ARROW_EXPORT StringType : public BinaryType {
 
   StringType() : BinaryType(Type::STRING) {}
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "utf8"; }
 
  protected:
@@ -933,7 +933,7 @@ class ARROW_EXPORT StringViewType : public BinaryViewType {
 
   StringViewType() : BinaryViewType(Type::STRING_VIEW) {}
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "utf8_view"; }
 
  protected:
@@ -951,7 +951,7 @@ class ARROW_EXPORT LargeStringType : public LargeBinaryType {
 
   LargeStringType() : LargeBinaryType(Type::LARGE_STRING) {}
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "large_utf8"; }
 
  protected:
@@ -971,7 +971,7 @@ class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public Parametri
   explicit FixedSizeBinaryType(int32_t byte_width, Type::type override_type_id)
       : FixedWidthType(override_type_id), byte_width_(byte_width) {}
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "fixed_size_binary"; }
 
   DataTypeLayout layout() const override {
@@ -1050,7 +1050,7 @@ class ARROW_EXPORT Decimal128Type : public DecimalType {
   /// Decimal128Type constructor that returns an error on invalid input.
   static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "decimal128"; }
 
   static constexpr int32_t kMinPrecision = 1;
@@ -1083,7 +1083,7 @@ class ARROW_EXPORT Decimal256Type : public DecimalType {
   /// Decimal256Type constructor that returns an error on invalid input.
   static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "decimal256"; }
 
   static constexpr int32_t kMinPrecision = 1;
@@ -1134,7 +1134,7 @@ class ARROW_EXPORT ListType : public BaseListType {
         {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(offset_type))});
   }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   std::string name() const override { return "list"; }
 
@@ -1166,7 +1166,7 @@ class ARROW_EXPORT LargeListType : public BaseListType {
         {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(offset_type))});
   }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   std::string name() const override { return "large_list"; }
 
@@ -1197,7 +1197,7 @@ class ARROW_EXPORT ListViewType : public BaseListType {
                            DataTypeLayout::FixedWidth(sizeof(offset_type))});
   }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   std::string name() const override { return "list_view"; }
 
@@ -1231,7 +1231,7 @@ class ARROW_EXPORT LargeListViewType : public BaseListType {
                            DataTypeLayout::FixedWidth(sizeof(offset_type))});
   }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   std::string name() const override { return "large_list_view"; }
 
@@ -1273,7 +1273,7 @@ class ARROW_EXPORT MapType : public ListType {
   std::shared_ptr<Field> item_field() const { return value_type()->field(1); }
   std::shared_ptr<DataType> item_type() const { return item_field()->type(); }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   std::string name() const override { return "map"; }
 
@@ -1308,7 +1308,7 @@ class ARROW_EXPORT FixedSizeListType : public BaseListType {
     return DataTypeLayout({DataTypeLayout::Bitmap()});
   }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   std::string name() const override { return "fixed_size_list"; }
 
@@ -1335,7 +1335,7 @@ class ARROW_EXPORT StructType : public NestedType {
     return DataTypeLayout({DataTypeLayout::Bitmap()});
   }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "struct"; }
 
   /// Returns null if name not found
@@ -1385,7 +1385,7 @@ class ARROW_EXPORT UnionType : public NestedType {
 
   DataTypeLayout layout() const override;
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   /// The array of logical type ids.
   ///
@@ -1488,7 +1488,7 @@ class ARROW_EXPORT RunEndEncodedType : public NestedType {
   const std::shared_ptr<DataType>& run_end_type() const { return fields()[0]->type(); }
   const std::shared_ptr<DataType>& value_type() const { return fields()[1]->type(); }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   std::string name() const override { return "run_end_encoded"; }
 
@@ -1544,7 +1544,7 @@ class ARROW_EXPORT Date32Type : public DateType {
 
   int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   std::string name() const override { return "date32"; }
   DateUnit unit() const override { return UNIT; }
@@ -1567,7 +1567,7 @@ class ARROW_EXPORT Date64Type : public DateType {
 
   int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   std::string name() const override { return "date64"; }
   DateUnit unit() const override { return UNIT; }
@@ -1605,7 +1605,7 @@ class ARROW_EXPORT Time32Type : public TimeType {
 
   explicit Time32Type(TimeUnit::type unit = TimeUnit::MILLI);
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   std::string name() const override { return "time32"; }
 };
@@ -1624,7 +1624,7 @@ class ARROW_EXPORT Time64Type : public TimeType {
 
   explicit Time64Type(TimeUnit::type unit = TimeUnit::NANO);
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   std::string name() const override { return "time64"; }
 };
@@ -1679,7 +1679,7 @@ class ARROW_EXPORT TimestampType : public TemporalType, public ParametricType {
   explicit TimestampType(TimeUnit::type unit, const std::string& timezone)
       : TemporalType(Type::TIMESTAMP), unit_(unit), timezone_(timezone) {}
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "timestamp"; }
 
   TimeUnit::type unit() const { return unit_; }
@@ -1723,7 +1723,7 @@ class ARROW_EXPORT MonthIntervalType : public IntervalType {
 
   MonthIntervalType() : IntervalType(type_id) {}
 
-  std::string ToString() const override { return name(); }
+  std::string ToString(bool show_metadata = false) const override { return name(); }
   std::string name() const override { return "month_interval"; }
 };
 
@@ -1759,7 +1759,7 @@ class ARROW_EXPORT DayTimeIntervalType : public IntervalType {
 
   int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
 
-  std::string ToString() const override { return name(); }
+  std::string ToString(bool show_metadata = false) const override { return name(); }
   std::string name() const override { return "day_time_interval"; }
 };
 
@@ -1799,7 +1799,7 @@ class ARROW_EXPORT MonthDayNanoIntervalType : public IntervalType {
 
   int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
 
-  std::string ToString() const override { return name(); }
+  std::string ToString(bool show_metadata = false) const override { return name(); }
   std::string name() const override { return "month_day_nano_interval"; }
 };
 
@@ -1823,7 +1823,7 @@ class ARROW_EXPORT DurationType : public TemporalType, public ParametricType {
   explicit DurationType(TimeUnit::type unit = TimeUnit::MILLI)
       : TemporalType(Type::DURATION), unit_(unit) {}
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "duration"; }
 
   TimeUnit::type unit() const { return unit_; }
@@ -1857,7 +1857,7 @@ class ARROW_EXPORT DictionaryType : public FixedWidthType {
       const std::shared_ptr<DataType>& index_type,
       const std::shared_ptr<DataType>& value_type, bool ordered = false);
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
   std::string name() const override { return "dictionary"; }
 
   int bit_width() const override;
diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc
index 22913f77fbfc1..df484a8fc2c59 100644
--- a/cpp/src/arrow/type_test.cc
+++ b/cpp/src/arrow/type_test.cc
@@ -20,6 +20,7 @@
 #include <algorithm>
 #include <cctype>
 #include <cstdint>
+#include <functional>
 #include <memory>
 #include <string>
 #include <unordered_set>
@@ -1893,9 +1894,13 @@ TEST(TestListViewType, Equals) {
 
   AssertTypeEqual(list_view_type, list_view_type_named);
   ASSERT_FALSE(list_view_type.Equals(list_view_type_named, /*check_metadata=*/true));
+  ASSERT_NE(list_view_type.ToString(), list_view_type_named.ToString());
 }
 
-TEST(TestListType, Metadata) {
+using ListListTypeFactory =
+    std::function<std::shared_ptr<DataType>(std::shared_ptr<Field>)>;
+
+void CheckListListTypeMetadata(ListListTypeFactory list_type_factory) {
   auto md1 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"});
   auto md2 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"});
   auto md3 = key_value_metadata({"foo"}, {"foo value"});
@@ -1906,83 +1911,49 @@ TEST(TestListType, Metadata) {
   auto f4 = field("item", utf8());
   auto f5 = field("item", utf8(), /*nullable =*/false, md1);
 
-  auto t1 = list(f1);
-  auto t2 = list(f2);
-  auto t3 = list(f3);
-  auto t4 = list(f4);
-  auto t5 = list(f5);
+  auto t1 = list_type_factory(f1);
+  auto t2 = list_type_factory(f2);
+  auto t3 = list_type_factory(f3);
+  auto t4 = list_type_factory(f4);
+  auto t5 = list_type_factory(f5);
 
   AssertTypeEqual(*t1, *t2);
   AssertTypeEqual(*t1, *t2, /*check_metadata =*/false);
+  ASSERT_EQ(t1->ToString(/*show_metadata=*/true), t2->ToString(/*show_metadata=*/true));
 
   AssertTypeEqual(*t1, *t3);
   AssertTypeNotEqual(*t1, *t3, /*check_metadata =*/true);
+  ASSERT_EQ(t1->ToString(/*show_metadata=*/false), t3->ToString(/*show_metadata=*/false));
+  ASSERT_NE(t1->ToString(/*show_metadata=*/true), t3->ToString(/*show_metadata=*/true));
 
   AssertTypeEqual(*t1, *t4);
   AssertTypeNotEqual(*t1, *t4, /*check_metadata =*/true);
+  ASSERT_EQ(t1->ToString(/*show_metadata=*/false), t4->ToString(/*show_metadata=*/false));
+  ASSERT_NE(t1->ToString(/*show_metadata=*/true), t4->ToString(/*show_metadata=*/true));
 
   AssertTypeNotEqual(*t1, *t5);
   AssertTypeNotEqual(*t1, *t5, /*check_metadata =*/true);
+  ASSERT_NE(t1->ToString(/*show_metadata=*/false), t5->ToString(/*show_metadata=*/false));
+  ASSERT_NE(t1->ToString(/*show_metadata=*/true), t5->ToString(/*show_metadata=*/true));
 }
 
-TEST(TestListViewType, Metadata) {
-  auto md1 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"});
-  auto md2 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"});
-  auto md3 = key_value_metadata({"foo"}, {"foo value"});
-
-  auto f1 = field("item", utf8(), /*nullable =*/true, md1);
-  auto f2 = field("item", utf8(), /*nullable =*/true, md2);
-  auto f3 = field("item", utf8(), /*nullable =*/true, md3);
-  auto f4 = field("item", utf8());
-  auto f5 = field("item", utf8(), /*nullable =*/false, md1);
-
-  auto t1 = list_view(f1);
-  auto t2 = list_view(f2);
-  auto t3 = list_view(f3);
-  auto t4 = list_view(f4);
-  auto t5 = list_view(f5);
-
-  AssertTypeEqual(*t1, *t2);
-  AssertTypeEqual(*t1, *t2, /*check_metadata =*/false);
-
-  AssertTypeEqual(*t1, *t3);
-  AssertTypeNotEqual(*t1, *t3, /*check_metadata =*/true);
+TEST(TestListType, Metadata) {
+  CheckListListTypeMetadata([](std::shared_ptr<Field> field) { return list(field); });
+}
 
-  AssertTypeEqual(*t1, *t4);
-  AssertTypeNotEqual(*t1, *t4, /*check_metadata =*/true);
+TEST(TestLargeListType, Metadata) {
+  CheckListListTypeMetadata(
+      [](std::shared_ptr<Field> field) { return large_list(field); });
+}
 
-  AssertTypeNotEqual(*t1, *t5);
-  AssertTypeNotEqual(*t1, *t5, /*check_metadata =*/true);
+TEST(TestListViewType, Metadata) {
+  CheckListListTypeMetadata(
+      [](std::shared_ptr<Field> field) { return list_view(field); });
 }
 
 TEST(TestLargeListViewType, Metadata) {
-  auto md1 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"});
-  auto md2 = key_value_metadata({"foo", "bar"}, {"foo value", "bar value"});
-  auto md3 = key_value_metadata({"foo"}, {"foo value"});
-
-  auto f1 = field("item", utf8(), /*nullable =*/true, md1);
-  auto f2 = field("item", utf8(), /*nullable =*/true, md2);
-  auto f3 = field("item", utf8(), /*nullable =*/true, md3);
-  auto f4 = field("item", utf8());
-  auto f5 = field("item", utf8(), /*nullable =*/false, md1);
-
-  auto t1 = large_list_view(f1);
-  auto t2 = large_list_view(f2);
-  auto t3 = large_list_view(f3);
-  auto t4 = large_list_view(f4);
-  auto t5 = large_list_view(f5);
-
-  AssertTypeEqual(*t1, *t2);
-  AssertTypeEqual(*t1, *t2, /*check_metadata =*/false);
-
-  AssertTypeEqual(*t1, *t3);
-  AssertTypeNotEqual(*t1, *t3, /*check_metadata =*/true);
-
-  AssertTypeEqual(*t1, *t4);
-  AssertTypeNotEqual(*t1, *t4, /*check_metadata =*/true);
-
-  AssertTypeNotEqual(*t1, *t5);
-  AssertTypeNotEqual(*t1, *t5, /*check_metadata =*/true);
+  CheckListListTypeMetadata(
+      [](std::shared_ptr<Field> field) { return large_list_view(field); });
 }
 
 TEST(TestNestedType, Equals) {
@@ -2124,6 +2095,12 @@ TEST(TestStructType, TestFieldsDifferOnlyInMetadata) {
 
   AssertTypeEqual(s0, s1);
   AssertTypeNotEqual(s0, s1, /* check_metadata = */ true);
+  ASSERT_NE(s0.ToString(), s1.ToString(/*show_metadata=*/true));
+
+  std::string expected = R"(struct<f: string
+-- metadata --
+foo: baz, f: string>)";
+  ASSERT_EQ(s1.ToString(/*show_metadata=*/true), expected);
 
   ASSERT_EQ(s0.fingerprint(), s1.fingerprint());
   ASSERT_NE(s0.metadata_fingerprint(), s1.metadata_fingerprint());
diff --git a/python/pyarrow/src/arrow/python/extension_type.cc b/python/pyarrow/src/arrow/python/extension_type.cc
index 3ccc171c8713a..be66b4a1c68ea 100644
--- a/python/pyarrow/src/arrow/python/extension_type.cc
+++ b/python/pyarrow/src/arrow/python/extension_type.cc
@@ -72,7 +72,7 @@ PyObject* DeserializeExtInstance(PyObject* type_class,
 
 static const char* kExtensionName = "arrow.py_extension_type";
 
-std::string PyExtensionType::ToString() const {
+std::string PyExtensionType::ToString(bool show_metadata) const {
   PyAcquireGIL lock;
 
   std::stringstream ss;
diff --git a/python/pyarrow/src/arrow/python/extension_type.h b/python/pyarrow/src/arrow/python/extension_type.h
index e433d9aca7081..e6523824eb963 100644
--- a/python/pyarrow/src/arrow/python/extension_type.h
+++ b/python/pyarrow/src/arrow/python/extension_type.h
@@ -33,7 +33,7 @@ class ARROW_PYTHON_EXPORT PyExtensionType : public ExtensionType {
   // Implement extensionType API
   std::string extension_name() const override { return extension_name_; }
 
-  std::string ToString() const override;
+  std::string ToString(bool show_metadata = false) const override;
 
   bool ExtensionEquals(const ExtensionType& other) const override;
 

From 5ce060a95ce4f777368973e08118e14c122c19b9 Mon Sep 17 00:00:00 2001
From: "Uwe L. Korn" <xhochy@users.noreply.github.com>
Date: Tue, 27 Feb 2024 21:41:29 +0100
Subject: [PATCH 436/570] GH-40181: [C++] Support glog 0.7 build (#40230)

Fixes #40181

### Are these changes tested?

These changes have been tested as part of the conda feedstocks for Arrow.
* GitHub Issue: #40181

Authored-by: Uwe L. Korn <uwe.korn@quantco.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/FindGLOG.cmake | 8 +++++++-
 cpp/src/arrow/util/logging.cc    | 6 +++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/cpp/cmake_modules/FindGLOG.cmake b/cpp/cmake_modules/FindGLOG.cmake
index 61b7d0694efd4..62b235ee917ca 100644
--- a/cpp/cmake_modules/FindGLOG.cmake
+++ b/cpp/cmake_modules/FindGLOG.cmake
@@ -17,6 +17,11 @@
 #
 #  find_package(GLOG)
 
+find_package(glog CONFIG)
+if(glog_FOUND)
+  return()
+endif()
+
 if(GLOG_FOUND)
   return()
 endif()
@@ -56,5 +61,6 @@ if(GLOG_FOUND)
   add_library(glog::glog UNKNOWN IMPORTED)
   set_target_properties(glog::glog
                         PROPERTIES IMPORTED_LOCATION "${GLOG_LIB}"
-                                   INTERFACE_INCLUDE_DIRECTORIES "${GLOG_INCLUDE_DIR}")
+                                   INTERFACE_INCLUDE_DIRECTORIES "${GLOG_INCLUDE_DIR}"
+                                   INTERFACE_COMPILE_DEFINITIONS "GLOG_USE_GLOG_EXPORT")
 endif()
diff --git a/cpp/src/arrow/util/logging.cc b/cpp/src/arrow/util/logging.cc
index d2931132372a7..25c336a6d2111 100644
--- a/cpp/src/arrow/util/logging.cc
+++ b/cpp/src/arrow/util/logging.cc
@@ -116,7 +116,7 @@ static std::unique_ptr<std::string> log_dir_;
 #ifdef ARROW_USE_GLOG
 
 // Glog's severity map.
-static int GetMappedSeverity(ArrowLogLevel severity) {
+static google::LogSeverity GetMappedSeverity(ArrowLogLevel severity) {
   switch (severity) {
     case ArrowLogLevel::ARROW_DEBUG:
       return google::GLOG_INFO;
@@ -148,7 +148,7 @@ void ArrowLog::StartArrowLog(const std::string& app_name,
   app_name_.reset(new std::string(app_name));
   log_dir_.reset(new std::string(log_dir));
 #ifdef ARROW_USE_GLOG
-  int mapped_severity_threshold = GetMappedSeverity(severity_threshold_);
+  google::LogSeverity mapped_severity_threshold = GetMappedSeverity(severity_threshold_);
   google::SetStderrLogging(mapped_severity_threshold);
   // Enable log file if log_dir is not empty.
   if (!log_dir.empty()) {
@@ -173,7 +173,7 @@ void ArrowLog::StartArrowLog(const std::string& app_name,
     google::SetLogFilenameExtension(app_name_without_path.c_str());
     for (int i = static_cast<int>(severity_threshold_);
          i <= static_cast<int>(ArrowLogLevel::ARROW_FATAL); ++i) {
-      int level = GetMappedSeverity(static_cast<ArrowLogLevel>(i));
+      google::LogSeverity level = GetMappedSeverity(static_cast<ArrowLogLevel>(i));
       google::SetLogDestination(level, dir_ends_with_slash.c_str());
     }
   }

From d519a4cb05773dc6ef36e02c963b5e27c73d06e5 Mon Sep 17 00:00:00 2001
From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com>
Date: Wed, 28 Feb 2024 04:37:34 -0500
Subject: [PATCH 437/570] GH-40266: [Python] Mark ListView as a nested type
 (#40265)

### Rationale for this change

ListView types are nested, so `is_nested()` should return True.

### What changes are included in this PR?

* `pa.types.is_nested(pa.list_view(<type>))` returns True

### Are these changes tested?

Yes, unit tested.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #40266

Authored-by: Dane Pitkin <dane@voltrondata.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/tests/test_types.py | 2 ++
 python/pyarrow/types.py            | 1 +
 2 files changed, 3 insertions(+)

diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index e048ed6fa5d58..a79702a8cad9e 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -216,6 +216,8 @@ def test_is_nested_or_struct():
     assert types.is_nested(pa.list_(pa.int32()))
     assert types.is_nested(pa.list_(pa.int32(), 3))
     assert types.is_nested(pa.large_list(pa.int32()))
+    assert types.is_nested(pa.list_view(pa.int32()))
+    assert types.is_nested(pa.large_list_view(pa.int32()))
     assert not types.is_nested(pa.int32())
 
 
diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py
index 6c262b49cb88b..66b1ec33953a9 100644
--- a/python/pyarrow/types.py
+++ b/python/pyarrow/types.py
@@ -41,6 +41,7 @@
                    _INTERVAL_TYPES)
 _UNION_TYPES = {lib.Type_SPARSE_UNION, lib.Type_DENSE_UNION}
 _NESTED_TYPES = {lib.Type_LIST, lib.Type_FIXED_SIZE_LIST, lib.Type_LARGE_LIST,
+                 lib.Type_LIST_VIEW, lib.Type_LARGE_LIST_VIEW,
                  lib.Type_STRUCT, lib.Type_MAP} | _UNION_TYPES
 
 
From 2c13a19fd31063a0426ae85b923f9cb0f523acf4 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Wed, 28 Feb 2024 07:41:09 -0300
Subject: [PATCH 438/570] GH-40207: [C++] TakeCC: Concatenate only once and
 delegate to TakeAA instead of TakeCA (#40206)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

`take` concatenates chunks when it's applied to a chunked `values` array, but when the `indices` arrays is also `chunked` it concatenates `values` more than once -- one `Concatenate` call with `values.chunks()` for every chunk in `indices`. This PR doesn't remove the concatenation, but ensures it's done only once instead of `indices.size()` times.

### What changes are included in this PR?

 - Adding return type to the `TakeXX` names (-> `TakeXXY`) to makes code easier to understand
 - Adding benchmarks to `TakeCCC` — copied from #13857
 - Remove the concatenation from the loop body (!)

### Are these changes tested?

By existing tests.

### Are there any user-facing changes?

A faster compute kernel.
* GitHub Issue: #40207

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../kernels/vector_selection_benchmark.cc     | 179 ++++++++++++++++--
 .../kernels/vector_selection_take_internal.cc | 122 ++++++------
 2 files changed, 233 insertions(+), 68 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
index e65d5dbcab1c9..c2a27dfe43488 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
@@ -115,6 +115,24 @@ struct TakeBenchmark {
         indices_have_nulls(indices_have_nulls),
         monotonic_indices(monotonic_indices) {}
 
+  static constexpr int kStringMinLength = 0;
+  static constexpr int kStringMaxLength = 32;
+  static constexpr int kByteWidthRange = 2;
+
+  template <typename GenChunk>
+  std::shared_ptr<ChunkedArray> GenChunkedArray(int64_t num_chunks,
+                                                GenChunk&& gen_chunk) {
+    const int64_t chunk_length =
+        std::llround(args.size / static_cast<double>(num_chunks));
+    ArrayVector chunks;
+    for (int64_t i = 0; i < num_chunks; ++i) {
+      const int64_t fitting_chunk_length =
+          std::min(chunk_length, args.size - i * chunk_length);
+      chunks.push_back(gen_chunk(fitting_chunk_length));
+    }
+    return std::make_shared<ChunkedArray>(std::move(chunks));
+  }
+
   void Int64() {
     auto values = rand.Int64(args.size, -100, 100, args.null_proportion);
     Bench(values);
@@ -129,19 +147,43 @@ struct TakeBenchmark {
   }
 
   void FixedSizeBinary() {
-    const int32_t byte_width = static_cast<int32_t>(state.range(2));
+    const auto byte_width = static_cast<int32_t>(state.range(kByteWidthRange));
     auto values = rand.FixedSizeBinary(args.size, byte_width, args.null_proportion);
     Bench(values);
     state.counters["byte_width"] = byte_width;
   }
 
   void String() {
-    int32_t string_min_length = 0, string_max_length = 32;
-    auto values = std::static_pointer_cast<StringArray>(rand.String(
-        args.size, string_min_length, string_max_length, args.null_proportion));
+    auto values = std::static_pointer_cast<StringArray>(
+        rand.String(args.size, kStringMinLength, kStringMaxLength, args.null_proportion));
     Bench(values);
   }
 
+  void ChunkedInt64(int64_t num_chunks, bool chunk_indices_too) {
+    auto chunked_array = GenChunkedArray(num_chunks, [this](int64_t chunk_length) {
+      return rand.Int64(chunk_length, -100, 100, args.null_proportion);
+    });
+    BenchChunked(chunked_array, chunk_indices_too);
+  }
+
+  void ChunkedFSB(int64_t num_chunks, bool chunk_indices_too) {
+    const auto byte_width = static_cast<int32_t>(state.range(kByteWidthRange));
+    auto chunked_array =
+        GenChunkedArray(num_chunks, [this, byte_width](int64_t chunk_length) {
+          return rand.FixedSizeBinary(chunk_length, byte_width, args.null_proportion);
+        });
+    BenchChunked(chunked_array, chunk_indices_too);
+    state.counters["byte_width"] = byte_width;
+  }
+
+  void ChunkedString(int64_t num_chunks, bool chunk_indices_too) {
+    auto chunked_array = GenChunkedArray(num_chunks, [this](int64_t chunk_length) {
+      return std::static_pointer_cast<StringArray>(rand.String(
+          chunk_length, kStringMinLength, kStringMaxLength, args.null_proportion));
+    });
+    BenchChunked(chunked_array, chunk_indices_too);
+  }
+
   void Bench(const std::shared_ptr<Array>& values) {
     double indices_null_proportion = indices_have_nulls ? args.null_proportion : 0;
     auto indices =
@@ -158,6 +200,40 @@ struct TakeBenchmark {
     }
     state.SetItemsProcessed(state.iterations() * values->length());
   }
+
+  void BenchChunked(const std::shared_ptr<ChunkedArray>& values, bool chunk_indices_too) {
+    double indices_null_proportion = indices_have_nulls ? args.null_proportion : 0;
+    auto indices =
+        rand.Int32(values->length(), 0, static_cast<int32_t>(values->length() - 1),
+                   indices_null_proportion);
+
+    if (monotonic_indices) {
+      auto arg_sorter = *SortIndices(*indices);
+      indices = *Take(*indices, *arg_sorter);
+    }
+    std::shared_ptr<ChunkedArray> chunked_indices;
+    if (chunk_indices_too) {
+      std::vector<std::shared_ptr<Array>> indices_chunks;
+      int64_t offset = 0;
+      for (int i = 0; i < values->num_chunks(); ++i) {
+        auto chunk = indices->Slice(offset, values->chunk(i)->length());
+        indices_chunks.push_back(std::move(chunk));
+        offset += values->chunk(i)->length();
+      }
+      chunked_indices = std::make_shared<ChunkedArray>(std::move(indices_chunks));
+    }
+
+    if (chunk_indices_too) {
+      for (auto _ : state) {
+        ABORT_NOT_OK(Take(values, chunked_indices).status());
+      }
+    } else {
+      for (auto _ : state) {
+        ABORT_NOT_OK(Take(values, indices).status());
+      }
+    }
+    state.SetItemsProcessed(state.iterations() * values->length());
+  }
 };
 
 struct FilterBenchmark {
@@ -298,11 +374,11 @@ static void FilterRecordBatchWithNulls(benchmark::State& state) {
 }
 
 static void TakeInt64RandomIndicesNoNulls(benchmark::State& state) {
-  TakeBenchmark(state, false).Int64();
+  TakeBenchmark(state, /*indices_with_nulls=*/false).Int64();
 }
 
 static void TakeInt64RandomIndicesWithNulls(benchmark::State& state) {
-  TakeBenchmark(state, true).Int64();
+  TakeBenchmark(state, /*indices_with_nulls=*/true).Int64();
 }
 
 static void TakeInt64MonotonicIndices(benchmark::State& state) {
@@ -310,11 +386,11 @@ static void TakeInt64MonotonicIndices(benchmark::State& state) {
 }
 
 static void TakeFixedSizeBinaryRandomIndicesNoNulls(benchmark::State& state) {
-  TakeBenchmark(state, false).FixedSizeBinary();
+  TakeBenchmark(state, /*indices_with_nulls=*/false).FixedSizeBinary();
 }
 
 static void TakeFixedSizeBinaryRandomIndicesWithNulls(benchmark::State& state) {
-  TakeBenchmark(state, true).FixedSizeBinary();
+  TakeBenchmark(state, /*indices_with_nulls=*/true).FixedSizeBinary();
 }
 
 static void TakeFixedSizeBinaryMonotonicIndices(benchmark::State& state) {
@@ -323,11 +399,11 @@ static void TakeFixedSizeBinaryMonotonicIndices(benchmark::State& state) {
 }
 
 static void TakeFSLInt64RandomIndicesNoNulls(benchmark::State& state) {
-  TakeBenchmark(state, false).FSLInt64();
+  TakeBenchmark(state, /*indices_with_nulls=*/false).FSLInt64();
 }
 
 static void TakeFSLInt64RandomIndicesWithNulls(benchmark::State& state) {
-  TakeBenchmark(state, true).FSLInt64();
+  TakeBenchmark(state, /*indices_with_nulls=*/true).FSLInt64();
 }
 
 static void TakeFSLInt64MonotonicIndices(benchmark::State& state) {
@@ -335,17 +411,79 @@ static void TakeFSLInt64MonotonicIndices(benchmark::State& state) {
 }
 
 static void TakeStringRandomIndicesNoNulls(benchmark::State& state) {
-  TakeBenchmark(state, false).String();
+  TakeBenchmark(state, /*indices_with_nulls=*/false).String();
 }
 
 static void TakeStringRandomIndicesWithNulls(benchmark::State& state) {
-  TakeBenchmark(state, true).String();
+  TakeBenchmark(state, /*indices_with_nulls=*/true).String();
 }
 
 static void TakeStringMonotonicIndices(benchmark::State& state) {
   TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true).FSLInt64();
 }
 
+static void TakeChunkedChunkedInt64RandomIndicesNoNulls(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/false)
+      .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
+static void TakeChunkedChunkedInt64RandomIndicesWithNulls(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/true)
+      .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
+static void TakeChunkedChunkedInt64MonotonicIndices(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true)
+      .ChunkedInt64(
+          /*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
+static void TakeChunkedChunkedFSBRandomIndicesNoNulls(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/false)
+      .ChunkedFSB(/*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
+static void TakeChunkedChunkedFSBRandomIndicesWithNulls(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/true)
+      .ChunkedFSB(/*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
+static void TakeChunkedChunkedFSBMonotonicIndices(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true)
+      .ChunkedFSB(/*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
+static void TakeChunkedChunkedStringRandomIndicesNoNulls(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/false)
+      .ChunkedString(/*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
+static void TakeChunkedChunkedStringRandomIndicesWithNulls(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/true)
+      .ChunkedString(/*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
+static void TakeChunkedChunkedStringMonotonicIndices(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true)
+      .ChunkedString(/*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
+static void TakeChunkedFlatInt64RandomIndicesNoNulls(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/false)
+      .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/false);
+}
+
+static void TakeChunkedFlatInt64RandomIndicesWithNulls(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/true)
+      .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/false);
+}
+
+static void TakeChunkedFlatInt64MonotonicIndices(benchmark::State& state) {
+  TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true)
+      .ChunkedInt64(
+          /*num_chunks=*/100, /*chunk_indices_too=*/false);
+}
+
 void FilterSetArgs(benchmark::internal::Benchmark* bench) {
   for (int64_t size : g_data_sizes) {
     for (int i = 0; i < static_cast<int>(g_filter_params.size()); ++i) {
@@ -405,6 +543,7 @@ void TakeFSBSetArgs(benchmark::internal::Benchmark* bench) {
   }
 }
 
+// Flat values x Flat indices
 BENCHMARK(TakeInt64RandomIndicesNoNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeInt64RandomIndicesWithNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeInt64MonotonicIndices)->Apply(TakeSetArgs);
@@ -418,5 +557,21 @@ BENCHMARK(TakeStringRandomIndicesNoNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeStringRandomIndicesWithNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeStringMonotonicIndices)->Apply(TakeSetArgs);
 
+// Chunked values x Chunked indices
+BENCHMARK(TakeChunkedChunkedInt64RandomIndicesNoNulls)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedChunkedInt64RandomIndicesWithNulls)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedChunkedInt64MonotonicIndices)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedChunkedFSBRandomIndicesNoNulls)->Apply(TakeFSBSetArgs);
+BENCHMARK(TakeChunkedChunkedFSBRandomIndicesWithNulls)->Apply(TakeFSBSetArgs);
+BENCHMARK(TakeChunkedChunkedFSBMonotonicIndices)->Apply(TakeFSBSetArgs);
+BENCHMARK(TakeChunkedChunkedStringRandomIndicesNoNulls)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedChunkedStringRandomIndicesWithNulls)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedChunkedStringMonotonicIndices)->Apply(TakeSetArgs);
+
+// Chunked values x Flat indices
+BENCHMARK(TakeChunkedFlatInt64RandomIndicesNoNulls)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedFlatInt64RandomIndicesWithNulls)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedFlatInt64MonotonicIndices)->Apply(TakeSetArgs);
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc
index 89b3f7d0d3c58..5cd3710828485 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc
@@ -681,112 +681,122 @@ Status ExtensionTake(KernelContext* ctx, const ExecSpan& batch, ExecResult* out)
 // R -> RecordBatch
 // T -> Table
 
-Result<std::shared_ptr<ArrayData>> TakeAA(const std::shared_ptr<ArrayData>& values,
-                                          const std::shared_ptr<ArrayData>& indices,
-                                          const TakeOptions& options, ExecContext* ctx) {
+Result<std::shared_ptr<ArrayData>> TakeAAA(const std::shared_ptr<ArrayData>& values,
+                                           const std::shared_ptr<ArrayData>& indices,
+                                           const TakeOptions& options, ExecContext* ctx) {
   ARROW_ASSIGN_OR_RAISE(Datum result,
                         CallFunction("array_take", {values, indices}, &options, ctx));
   return result.array();
 }
 
-Result<std::shared_ptr<ChunkedArray>> TakeCA(const ChunkedArray& values,
-                                             const Array& indices,
-                                             const TakeOptions& options,
-                                             ExecContext* ctx) {
-  auto num_chunks = values.num_chunks();
-  std::shared_ptr<Array> current_chunk;
-
-  // Case 1: `values` has a single chunk, so just use it
-  if (num_chunks == 1) {
-    current_chunk = values.chunk(0);
+Result<std::shared_ptr<ChunkedArray>> TakeCAC(const ChunkedArray& values,
+                                              const Array& indices,
+                                              const TakeOptions& options,
+                                              ExecContext* ctx) {
+  std::shared_ptr<Array> values_array;
+  if (values.num_chunks() == 1) {
+    // Case 1: `values` has a single chunk, so just use it
+    values_array = values.chunk(0);
   } else {
     // TODO Case 2: See if all `indices` fall in the same chunk and call Array Take on it
     // See
     // https://github.com/apache/arrow/blob/6f2c9041137001f7a9212f244b51bc004efc29af/r/src/compute.cpp#L123-L151
     // TODO Case 3: If indices are sorted, can slice them and call Array Take
+    // (these are relevant to TakeCCC as well)
 
     // Case 4: Else, concatenate chunks and call Array Take
     if (values.chunks().empty()) {
-      ARROW_ASSIGN_OR_RAISE(current_chunk, MakeArrayOfNull(values.type(), /*length=*/0,
-                                                           ctx->memory_pool()));
+      ARROW_ASSIGN_OR_RAISE(
+          values_array, MakeArrayOfNull(values.type(), /*length=*/0, ctx->memory_pool()));
     } else {
-      ARROW_ASSIGN_OR_RAISE(current_chunk,
+      ARROW_ASSIGN_OR_RAISE(values_array,
                             Concatenate(values.chunks(), ctx->memory_pool()));
     }
   }
   // Call Array Take on our single chunk
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> new_chunk,
-                        TakeAA(current_chunk->data(), indices.data(), options, ctx));
+                        TakeAAA(values_array->data(), indices.data(), options, ctx));
   std::vector<std::shared_ptr<Array>> chunks = {MakeArray(new_chunk)};
   return std::make_shared<ChunkedArray>(std::move(chunks));
 }
 
-Result<std::shared_ptr<ChunkedArray>> TakeCC(const ChunkedArray& values,
-                                             const ChunkedArray& indices,
-                                             const TakeOptions& options,
-                                             ExecContext* ctx) {
-  auto num_chunks = indices.num_chunks();
-  std::vector<std::shared_ptr<Array>> new_chunks(num_chunks);
-  for (int i = 0; i < num_chunks; i++) {
-    // Take with that indices chunk
-    // Note that as currently implemented, this is inefficient because `values`
-    // will get concatenated on every iteration of this loop
-    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ChunkedArray> current_chunk,
-                          TakeCA(values, *indices.chunk(i), options, ctx));
-    // Concatenate the result to make a single array for this chunk
-    ARROW_ASSIGN_OR_RAISE(new_chunks[i],
-                          Concatenate(current_chunk->chunks(), ctx->memory_pool()));
+Result<std::shared_ptr<ChunkedArray>> TakeCCC(const ChunkedArray& values,
+                                              const ChunkedArray& indices,
+                                              const TakeOptions& options,
+                                              ExecContext* ctx) {
+  // XXX: for every chunk in indices, values are gathered from all chunks in values to
+  // form a new chunk in the result. Performing this concatenation is not ideal, but
+  // greatly simplifies the implementation before something more efficient is
+  // implemented.
+  std::shared_ptr<Array> values_array;
+  if (values.num_chunks() == 1) {
+    values_array = values.chunk(0);
+  } else {
+    if (values.chunks().empty()) {
+      ARROW_ASSIGN_OR_RAISE(
+          values_array, MakeArrayOfNull(values.type(), /*length=*/0, ctx->memory_pool()));
+    } else {
+      ARROW_ASSIGN_OR_RAISE(values_array,
+                            Concatenate(values.chunks(), ctx->memory_pool()));
+    }
+  }
+  std::vector<std::shared_ptr<Array>> new_chunks;
+  new_chunks.resize(indices.num_chunks());
+  for (int i = 0; i < indices.num_chunks(); i++) {
+    ARROW_ASSIGN_OR_RAISE(auto chunk, TakeAAA(values_array->data(),
+                                              indices.chunk(i)->data(), options, ctx));
+    new_chunks[i] = MakeArray(chunk);
   }
   return std::make_shared<ChunkedArray>(std::move(new_chunks), values.type());
 }
 
-Result<std::shared_ptr<ChunkedArray>> TakeAC(const Array& values,
-                                             const ChunkedArray& indices,
-                                             const TakeOptions& options,
-                                             ExecContext* ctx) {
+Result<std::shared_ptr<ChunkedArray>> TakeACC(const Array& values,
+                                              const ChunkedArray& indices,
+                                              const TakeOptions& options,
+                                              ExecContext* ctx) {
   auto num_chunks = indices.num_chunks();
   std::vector<std::shared_ptr<Array>> new_chunks(num_chunks);
   for (int i = 0; i < num_chunks; i++) {
     // Take with that indices chunk
     ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> chunk,
-                          TakeAA(values.data(), indices.chunk(i)->data(), options, ctx));
+                          TakeAAA(values.data(), indices.chunk(i)->data(), options, ctx));
     new_chunks[i] = MakeArray(chunk);
   }
   return std::make_shared<ChunkedArray>(std::move(new_chunks), values.type());
 }
 
-Result<std::shared_ptr<RecordBatch>> TakeRA(const RecordBatch& batch,
-                                            const Array& indices,
-                                            const TakeOptions& options,
-                                            ExecContext* ctx) {
+Result<std::shared_ptr<RecordBatch>> TakeRAR(const RecordBatch& batch,
+                                             const Array& indices,
+                                             const TakeOptions& options,
+                                             ExecContext* ctx) {
   auto ncols = batch.num_columns();
   auto nrows = indices.length();
   std::vector<std::shared_ptr<Array>> columns(ncols);
   for (int j = 0; j < ncols; j++) {
     ARROW_ASSIGN_OR_RAISE(std::shared_ptr<ArrayData> col_data,
-                          TakeAA(batch.column(j)->data(), indices.data(), options, ctx));
+                          TakeAAA(batch.column(j)->data(), indices.data(), options, ctx));
     columns[j] = MakeArray(col_data);
   }
   return RecordBatch::Make(batch.schema(), nrows, std::move(columns));
 }
 
-Result<std::shared_ptr<Table>> TakeTA(const Table& table, const Array& indices,
-                                      const TakeOptions& options, ExecContext* ctx) {
+Result<std::shared_ptr<Table>> TakeTAT(const Table& table, const Array& indices,
+                                       const TakeOptions& options, ExecContext* ctx) {
   auto ncols = table.num_columns();
   std::vector<std::shared_ptr<ChunkedArray>> columns(ncols);
 
   for (int j = 0; j < ncols; j++) {
-    ARROW_ASSIGN_OR_RAISE(columns[j], TakeCA(*table.column(j), indices, options, ctx));
+    ARROW_ASSIGN_OR_RAISE(columns[j], TakeCAC(*table.column(j), indices, options, ctx));
   }
   return Table::Make(table.schema(), std::move(columns));
 }
 
-Result<std::shared_ptr<Table>> TakeTC(const Table& table, const ChunkedArray& indices,
-                                      const TakeOptions& options, ExecContext* ctx) {
+Result<std::shared_ptr<Table>> TakeTCT(const Table& table, const ChunkedArray& indices,
+                                       const TakeOptions& options, ExecContext* ctx) {
   auto ncols = table.num_columns();
   std::vector<std::shared_ptr<ChunkedArray>> columns(ncols);
   for (int j = 0; j < ncols; j++) {
-    ARROW_ASSIGN_OR_RAISE(columns[j], TakeCC(*table.column(j), indices, options, ctx));
+    ARROW_ASSIGN_OR_RAISE(columns[j], TakeCCC(*table.column(j), indices, options, ctx));
   }
   return Table::Make(table.schema(), std::move(columns));
 }
@@ -815,29 +825,29 @@ class TakeMetaFunction : public MetaFunction {
     switch (args[0].kind()) {
       case Datum::ARRAY:
         if (index_kind == Datum::ARRAY) {
-          return TakeAA(args[0].array(), args[1].array(), take_opts, ctx);
+          return TakeAAA(args[0].array(), args[1].array(), take_opts, ctx);
         } else if (index_kind == Datum::CHUNKED_ARRAY) {
-          return TakeAC(*args[0].make_array(), *args[1].chunked_array(), take_opts, ctx);
+          return TakeACC(*args[0].make_array(), *args[1].chunked_array(), take_opts, ctx);
         }
         break;
       case Datum::CHUNKED_ARRAY:
         if (index_kind == Datum::ARRAY) {
-          return TakeCA(*args[0].chunked_array(), *args[1].make_array(), take_opts, ctx);
+          return TakeCAC(*args[0].chunked_array(), *args[1].make_array(), take_opts, ctx);
         } else if (index_kind == Datum::CHUNKED_ARRAY) {
-          return TakeCC(*args[0].chunked_array(), *args[1].chunked_array(), take_opts,
-                        ctx);
+          return TakeCCC(*args[0].chunked_array(), *args[1].chunked_array(), take_opts,
+                         ctx);
         }
         break;
       case Datum::RECORD_BATCH:
         if (index_kind == Datum::ARRAY) {
-          return TakeRA(*args[0].record_batch(), *args[1].make_array(), take_opts, ctx);
+          return TakeRAR(*args[0].record_batch(), *args[1].make_array(), take_opts, ctx);
         }
         break;
       case Datum::TABLE:
         if (index_kind == Datum::ARRAY) {
-          return TakeTA(*args[0].table(), *args[1].make_array(), take_opts, ctx);
+          return TakeTAT(*args[0].table(), *args[1].make_array(), take_opts, ctx);
         } else if (index_kind == Datum::CHUNKED_ARRAY) {
-          return TakeTC(*args[0].table(), *args[1].chunked_array(), take_opts, ctx);
+          return TakeTCT(*args[0].table(), *args[1].chunked_array(), take_opts, ctx);
         }
         break;
       default:

From 655ae960fcbc53061dadb243cd584944b74b140d Mon Sep 17 00:00:00 2001
From: Kevin Mingtarja <69668484+kevinmingtarja@users.noreply.github.com>
Date: Wed, 28 Feb 2024 03:24:00 -0800
Subject: [PATCH 439/570] GH-39277: [Python] Fix missing byte_width attribute
 on DataType class (#39592)

### Rationale for this change

As mentioned in the issue, the byte_width attribute was missing on most data types, which is a small annoyance.

### What changes are included in this PR?

Add the byte_width attribute on the DataType class (which is the base class of all Arrow data types), instead of on FixedSizeBinaryType (which is a child class of DataType).

### Are these changes tested?

Yes, tests were added in `python/pyarrow/tests/test_types.py`.

### Are there any user-facing changes?

Yes, users can now access the byte_width attribute on all fixed width data types.

* Closes: #39277

Authored-by: Kevin Mingtarja <kevin.mingtarja@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/includes/libarrow.pxd |  1 +
 python/pyarrow/tests/test_types.py   | 39 +++++++++++++++++++++-------
 python/pyarrow/types.pxi             | 36 +++++++++++++++----------
 3 files changed, 52 insertions(+), 24 deletions(-)

diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 935fb4d34b318..05d33180209c6 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -245,6 +245,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
     cdef cppclass CFixedWidthType" arrow::FixedWidthType"(CDataType):
         int bit_width()
+        int byte_width()
 
     cdef cppclass CNullArray" arrow::NullArray"(CArray):
         CNullArray(int64_t length)
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index a79702a8cad9e..1d132a6af8a4d 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -945,18 +945,37 @@ def test_type_id():
         assert isinstance(ty.id, int)
 
 
-def test_bit_width():
-    for ty, expected in [(pa.bool_(), 1),
-                         (pa.int8(), 8),
-                         (pa.uint32(), 32),
-                         (pa.float16(), 16),
-                         (pa.decimal128(19, 4), 128),
-                         (pa.decimal256(76, 38), 256),
-                         (pa.binary(42), 42 * 8)]:
-        assert ty.bit_width == expected
-    for ty in [pa.binary(), pa.string(), pa.list_(pa.int16())]:
+def test_bit_and_byte_width():
+    for ty, expected_bit_width, expected_byte_width in [
+        (pa.bool_(), 1, 0),
+        (pa.int8(), 8, 1),
+        (pa.uint32(), 32, 4),
+        (pa.float16(), 16, 2),
+        (pa.timestamp('s'), 64, 8),
+        (pa.date32(), 32, 4),
+        (pa.decimal128(19, 4), 128, 16),
+        (pa.decimal256(76, 38), 256, 32),
+        (pa.binary(42), 42 * 8, 42)
+    ]:
+        assert ty.bit_width == expected_bit_width
+
+        if expected_byte_width == 0:
+            with pytest.raises(ValueError, match="Less than one byte"):
+                ty.byte_width
+        else:
+            assert ty.byte_width == expected_byte_width
+
+    for ty in [
+        pa.binary(),
+        pa.string(),
+        pa.list_(pa.int16()),
+        pa.map_(pa.string(), pa.int32()),
+        pa.struct([('f1', pa.int32())])
+    ]:
         with pytest.raises(ValueError, match="fixed width"):
             ty.bit_width
+        with pytest.raises(ValueError, match="fixed width"):
+            ty.byte_width
 
 
 def test_fixed_size_binary_byte_width():
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index e9bf56c6213f6..fbbf36ae9f551 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -257,6 +257,28 @@ cdef class DataType(_Weakrefable):
             raise ValueError("Non-fixed width type")
         return ty.bit_width()
 
+    @property
+    def byte_width(self):
+        """
+        Byte width for fixed width type.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> pa.int64()
+        DataType(int64)
+        >>> pa.int64().byte_width
+        8
+        """
+        cdef _CFixedWidthTypePtr ty
+        ty = dynamic_cast[_CFixedWidthTypePtr](self.type)
+        if ty == nullptr:
+            raise ValueError("Non-fixed width type")
+        byte_width = ty.byte_width()
+        if byte_width == 0:
+            raise ValueError("Less than one byte")
+        return byte_width
+
     @property
     def num_fields(self):
         """
@@ -1342,20 +1364,6 @@ cdef class FixedSizeBinaryType(DataType):
     def __reduce__(self):
         return binary, (self.byte_width,)
 
-    @property
-    def byte_width(self):
-        """
-        The binary size in bytes.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> t = pa.binary(3)
-        >>> t.byte_width
-        3
-        """
-        return self.fixed_size_binary_type.byte_width()
-
 
 cdef class Decimal128Type(FixedSizeBinaryType):
     """

From 19e874ff8c6d960a58ee91a9605e133d99fec3f9 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 28 Feb 2024 12:33:52 +0100
Subject: [PATCH 440/570] GH-39782: [C++] Use correct (non-CPU) address of
 buffer in ExportDeviceArray (#39783)

### Rationale for this change

The Array exporter currently uses `buffer->data()`, which is documented as "The buffer has to be a CPU buffer (`is_cpu()` is true). Otherwise, an assertion may be thrown or a null pointer may be returned.".
In practice, we indeed return null for non-CPU data. There is only a debug check that asserts the device.

This means that for non-CPU data the C device interface is currently returning null pointers for the buffers.

### What changes are included in this PR?

Use the buffer's `address()` instead of `data()`

### Are these changes tested?

Not yet

* Closes: #39782

Lead-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/c/bridge.cc      |  25 ++++---
 cpp/src/arrow/gpu/cuda_test.cc | 118 +++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 022fce72f59b8..0ffa5291812a9 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -565,6 +565,9 @@ void ReleaseExportedArray(struct ArrowArray* array) {
 }
 
 struct ArrayExporter {
+  explicit ArrayExporter(bool device_interface = false)
+      : device_interface_(device_interface) {}
+
   Status Export(const std::shared_ptr<ArrayData>& data) {
     // Force computing null count.
     // This is because ARROW-9037 is in version 0.17 and 0.17.1, and they are
@@ -586,8 +589,12 @@ struct ArrayExporter {
 
     export_.buffers_.resize(n_buffers);
     std::transform(buffers_begin, data->buffers.end(), export_.buffers_.begin(),
-                   [](const std::shared_ptr<Buffer>& buffer) -> const void* {
-                     return buffer ? buffer->data() : nullptr;
+                   [this](const std::shared_ptr<Buffer>& buffer) -> const void* {
+                     return buffer
+                                ? (device_interface_
+                                       ? reinterpret_cast<const void*>(buffer->address())
+                                       : buffer->data())
+                                : nullptr;
                    });
 
     if (need_variadic_buffer_sizes) {
@@ -602,15 +609,16 @@ struct ArrayExporter {
 
     // Export dictionary
     if (data->dictionary != nullptr) {
-      dict_exporter_ = std::make_unique<ArrayExporter>();
+      dict_exporter_ = std::make_unique<ArrayExporter>(device_interface_);
       RETURN_NOT_OK(dict_exporter_->Export(data->dictionary));
     }
 
     // Export children
     export_.children_.resize(data->child_data.size());
-    child_exporters_.resize(data->child_data.size());
-    for (size_t i = 0; i < data->child_data.size(); ++i) {
-      RETURN_NOT_OK(child_exporters_[i].Export(data->child_data[i]));
+    child_exporters_.reserve(data->child_data.size());
+    for (const auto& child : data->child_data) {
+      child_exporters_.emplace_back(ArrayExporter{device_interface_});
+      RETURN_NOT_OK(child_exporters_.back().Export(child));
     }
 
     // Store owning pointer to ArrayData
@@ -662,6 +670,7 @@ struct ArrayExporter {
   ExportedArrayPrivateData export_;
   std::unique_ptr<ArrayExporter> dict_exporter_;
   std::vector<ArrayExporter> child_exporters_;
+  bool device_interface_ = false;
 };
 
 }  // namespace
@@ -756,7 +765,7 @@ Status ExportDeviceArray(const Array& array, std::shared_ptr<Device::SyncEvent>
   }
   out->device_id = device_info.second;
 
-  ArrayExporter exporter;
+  ArrayExporter exporter(/*device_interface*/ true);
   RETURN_NOT_OK(exporter.Export(array.data()));
   exporter.Finish(&out->array);
 
@@ -794,7 +803,7 @@ Status ExportDeviceRecordBatch(const RecordBatch& batch,
   }
   out->device_id = device_info.second;
 
-  ArrayExporter exporter;
+  ArrayExporter exporter(/*device_interface*/ true);
   RETURN_NOT_OK(exporter.Export(array->data()));
   exporter.Finish(&out->array);
 
diff --git a/cpp/src/arrow/gpu/cuda_test.cc b/cpp/src/arrow/gpu/cuda_test.cc
index c39dbe28e808a..d2f01cb3bbc0c 100644
--- a/cpp/src/arrow/gpu/cuda_test.cc
+++ b/cpp/src/arrow/gpu/cuda_test.cc
@@ -23,6 +23,8 @@
 
 #include "gtest/gtest.h"
 
+#include "arrow/c/bridge.h"
+#include "arrow/c/util_internal.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/api.h"
 #include "arrow/ipc/dictionary.h"
@@ -38,8 +40,11 @@
 
 namespace arrow {
 
+using internal::ArrayExportGuard;
+using internal::ArrayStreamExportGuard;
 using internal::checked_cast;
 using internal::checked_pointer_cast;
+using internal::SchemaExportGuard;
 
 namespace cuda {
 
@@ -703,5 +708,118 @@ TEST_F(TestCudaArrowIpc, DictionaryWriteRead) {
   CompareBatch(*batch, *cpu_batch);
 }
 
+// ------------------------------------------------------------------------
+// Test C Device Interface export/import with CUDA
+// (equivalent tests for non-CUDA live in bridge_test.cc)
+
+class TestCudaDeviceArrayRoundtrip : public ::testing::Test {
+ public:
+  using ArrayFactory = std::function<Result<std::shared_ptr<Array>>()>;
+
+  static Result<std::shared_ptr<MemoryManager>> DeviceMapper(ArrowDeviceType type,
+                                                             int64_t id) {
+    if (type != ARROW_DEVICE_CUDA) {
+      return Status::NotImplemented("should only be CUDA device");
+    }
+
+    ARROW_ASSIGN_OR_RAISE(auto manager, cuda::CudaDeviceManager::Instance());
+    ARROW_ASSIGN_OR_RAISE(auto device, manager->GetDevice(id));
+    return device->default_memory_manager();
+  }
+
+  static ArrayFactory JSONArrayFactory(std::shared_ptr<DataType> type, const char* json) {
+    return [=]() { return ArrayFromJSON(type, json); };
+  }
+
+  template <typename ArrayFactory>
+  void TestWithArrayFactory(ArrayFactory&& factory) {
+    TestWithArrayFactory(factory, factory);
+  }
+
+  template <typename ArrayFactory, typename ExpectedArrayFactory>
+  void TestWithArrayFactory(ArrayFactory&& factory,
+                            ExpectedArrayFactory&& factory_expected) {
+    ASSERT_OK_AND_ASSIGN(auto manager, cuda::CudaDeviceManager::Instance());
+    ASSERT_OK_AND_ASSIGN(auto device, manager->GetDevice(0));
+    auto mm = device->default_memory_manager();
+
+    std::shared_ptr<Array> array;
+    std::shared_ptr<Array> device_array;
+    ASSERT_OK_AND_ASSIGN(array, factory());
+    ASSERT_OK_AND_ASSIGN(device_array, array->CopyTo(mm));
+
+    struct ArrowDeviceArray c_array {};
+    struct ArrowSchema c_schema {};
+    ArrayExportGuard array_guard(&c_array.array);
+    SchemaExportGuard schema_guard(&c_schema);
+
+    ASSERT_OK(ExportType(*device_array->type(), &c_schema));
+    std::shared_ptr<Device::SyncEvent> sync{nullptr};
+    ASSERT_OK(ExportDeviceArray(*device_array, sync, &c_array));
+
+    std::shared_ptr<Array> device_array_roundtripped;
+    ASSERT_OK_AND_ASSIGN(device_array_roundtripped,
+                         ImportDeviceArray(&c_array, &c_schema, DeviceMapper));
+    ASSERT_TRUE(ArrowSchemaIsReleased(&c_schema));
+    ASSERT_TRUE(ArrowArrayIsReleased(&c_array.array));
+
+    // Check value of imported array (copy to CPU to assert equality)
+    std::shared_ptr<Array> array_roundtripped;
+    ASSERT_OK_AND_ASSIGN(array_roundtripped,
+                         device_array_roundtripped->CopyTo(default_cpu_memory_manager()));
+    ASSERT_OK(array_roundtripped->ValidateFull());
+    {
+      std::shared_ptr<Array> expected;
+      ASSERT_OK_AND_ASSIGN(expected, factory_expected());
+      AssertTypeEqual(*expected->type(), *array_roundtripped->type());
+      AssertArraysEqual(*expected, *array_roundtripped, true);
+    }
+
+    // Re-export and re-import, now both at once
+    ASSERT_OK(ExportDeviceArray(*device_array, sync, &c_array, &c_schema));
+    device_array_roundtripped.reset();
+    ASSERT_OK_AND_ASSIGN(device_array_roundtripped,
+                         ImportDeviceArray(&c_array, &c_schema, DeviceMapper));
+    ASSERT_TRUE(ArrowSchemaIsReleased(&c_schema));
+    ASSERT_TRUE(ArrowArrayIsReleased(&c_array.array));
+
+    // Check value of imported array (copy to CPU to assert equality)
+    array_roundtripped.reset();
+    ASSERT_OK_AND_ASSIGN(array_roundtripped,
+                         device_array_roundtripped->CopyTo(default_cpu_memory_manager()));
+    ASSERT_OK(array_roundtripped->ValidateFull());
+    {
+      std::shared_ptr<Array> expected;
+      ASSERT_OK_AND_ASSIGN(expected, factory_expected());
+      AssertTypeEqual(*expected->type(), *array_roundtripped->type());
+      AssertArraysEqual(*expected, *array_roundtripped, true);
+    }
+  }
+
+  void TestWithJSON(std::shared_ptr<DataType> type, const char* json) {
+    TestWithArrayFactory(JSONArrayFactory(type, json));
+  }
+};
+
+TEST_F(TestCudaDeviceArrayRoundtrip, Primitive) { TestWithJSON(int32(), "[4, 5, null]"); }
+
+TEST_F(TestCudaDeviceArrayRoundtrip, Struct) {
+  auto type = struct_({field("ints", int16()), field("strs", utf8())});
+
+  TestWithJSON(type, "[]");
+  TestWithJSON(type, R"([[4, "foo"], [5, "bar"]])");
+  TestWithJSON(type, R"([[4, null], null, [5, "foo"]])");
+}
+
+TEST_F(TestCudaDeviceArrayRoundtrip, Dictionary) {
+  auto factory = []() {
+    auto values = ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])");
+    auto indices = ArrayFromJSON(uint16(), "[0, 2, 1, null, 1]");
+    return DictionaryArray::FromArrays(dictionary(indices->type(), values->type()),
+                                       indices, values);
+  };
+  TestWithArrayFactory(factory);
+}
+
 }  // namespace cuda
 }  // namespace arrow

From 99c5412a6a2e7439014d0574c2af5eb0d800945b Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 28 Feb 2024 14:24:15 +0100
Subject: [PATCH 441/570] GH-39979: [Python] Low-level bindings for
 exporting/importing the C Device Interface (#39980)

### Rationale for this change

We have low-level methods `_import_from_c`/`_export_to_c` for the C Data Interface, we can add similar methods for the C Device data interface.

Expanding the Arrow PyCapsule protocol (i.e. a better public API for other libraries) is covered by https://github.com/apache/arrow/issues/38325. Because of that, we might not want to keep those low-level methods long term (or at least we need to have the equivalents using capsules), but for testing it's useful to already add those.

### What changes are included in this PR?

Added methods to Array and RecordBatch classes. Currently import only works for CPU devices.

* GitHub Issue: #39979

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/c/bridge.cc            |  8 +++
 cpp/src/arrow/c/bridge.h             | 32 ++++++----
 python/pyarrow/array.pxi             | 64 +++++++++++++++++++
 python/pyarrow/cffi.py               | 10 +++
 python/pyarrow/includes/libarrow.pxd | 23 +++++++
 python/pyarrow/table.pxi             | 62 ++++++++++++++++++
 python/pyarrow/tests/test_cffi.py    | 96 ++++++++++++++++++++++------
 7 files changed, 263 insertions(+), 32 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 0ffa5291812a9..4ec79a73029b4 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -1967,6 +1967,14 @@ Result<std::shared_ptr<RecordBatch>> ImportRecordBatch(struct ArrowArray* array,
   return ImportRecordBatch(array, *maybe_schema);
 }
 
+Result<std::shared_ptr<MemoryManager>> DefaultDeviceMapper(ArrowDeviceType device_type,
+                                                           int64_t device_id) {
+  if (device_type != ARROW_DEVICE_CPU) {
+    return Status::NotImplemented("Only importing data on CPU is supported");
+  }
+  return default_cpu_memory_manager();
+}
+
 Result<std::shared_ptr<Array>> ImportDeviceArray(struct ArrowDeviceArray* array,
                                                  std::shared_ptr<DataType> type,
                                                  const DeviceMemoryMapper& mapper) {
diff --git a/cpp/src/arrow/c/bridge.h b/cpp/src/arrow/c/bridge.h
index e98a42818f628..0ced3d38cd1e6 100644
--- a/cpp/src/arrow/c/bridge.h
+++ b/cpp/src/arrow/c/bridge.h
@@ -218,6 +218,10 @@ Status ExportDeviceRecordBatch(const RecordBatch& batch,
 using DeviceMemoryMapper =
     std::function<Result<std::shared_ptr<MemoryManager>>(ArrowDeviceType, int64_t)>;
 
+ARROW_EXPORT
+Result<std::shared_ptr<MemoryManager>> DefaultDeviceMapper(ArrowDeviceType device_type,
+                                                           int64_t device_id);
+
 /// \brief EXPERIMENTAL: Import C++ device array from the C data interface.
 ///
 /// The ArrowArray struct has its contents moved (as per the C data interface
@@ -226,12 +230,13 @@ using DeviceMemoryMapper =
 ///
 /// \param[in,out] array C data interface struct holding the array data
 /// \param[in] type type of the imported array
-/// \param[in] mapper A function to map device + id to memory manager
+/// \param[in] mapper A function to map device + id to memory manager. If not
+/// specified, defaults to map "cpu" to the built-in default memory manager.
 /// \return Imported array object
 ARROW_EXPORT
-Result<std::shared_ptr<Array>> ImportDeviceArray(struct ArrowDeviceArray* array,
-                                                 std::shared_ptr<DataType> type,
-                                                 const DeviceMemoryMapper& mapper);
+Result<std::shared_ptr<Array>> ImportDeviceArray(
+    struct ArrowDeviceArray* array, std::shared_ptr<DataType> type,
+    const DeviceMemoryMapper& mapper = DefaultDeviceMapper);
 
 /// \brief EXPERIMENTAL: Import C++ device array and its type from the C data interface.
 ///
@@ -242,12 +247,13 @@ Result<std::shared_ptr<Array>> ImportDeviceArray(struct ArrowDeviceArray* array,
 ///
 /// \param[in,out] array C data interface struct holding the array data
 /// \param[in,out] type C data interface struct holding the array type
-/// \param[in] mapper A function to map device + id to memory manager
+/// \param[in] mapper A function to map device + id to memory manager. If not
+/// specified, defaults to map "cpu" to the built-in default memory manager.
 /// \return Imported array object
 ARROW_EXPORT
-Result<std::shared_ptr<Array>> ImportDeviceArray(struct ArrowDeviceArray* array,
-                                                 struct ArrowSchema* type,
-                                                 const DeviceMemoryMapper& mapper);
+Result<std::shared_ptr<Array>> ImportDeviceArray(
+    struct ArrowDeviceArray* array, struct ArrowSchema* type,
+    const DeviceMemoryMapper& mapper = DefaultDeviceMapper);
 
 /// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device from the C data
 /// interface.
@@ -259,12 +265,13 @@ Result<std::shared_ptr<Array>> ImportDeviceArray(struct ArrowDeviceArray* array,
 ///
 /// \param[in,out] array C data interface struct holding the record batch data
 /// \param[in] schema schema of the imported record batch
-/// \param[in] mapper A function to map device + id to memory manager
+/// \param[in] mapper A function to map device + id to memory manager. If not
+/// specified, defaults to map "cpu" to the built-in default memory manager.
 /// \return Imported record batch object
 ARROW_EXPORT
 Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
     struct ArrowDeviceArray* array, std::shared_ptr<Schema> schema,
-    const DeviceMemoryMapper& mapper);
+    const DeviceMemoryMapper& mapper = DefaultDeviceMapper);
 
 /// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device and its schema
 /// from the C data interface.
@@ -278,12 +285,13 @@ Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
 ///
 /// \param[in,out] array C data interface struct holding the record batch data
 /// \param[in,out] schema C data interface struct holding the record batch schema
-/// \param[in] mapper A function to map device + id to memory manager
+/// \param[in] mapper A function to map device + id to memory manager. If not
+/// specified, defaults to map "cpu" to the built-in default memory manager.
 /// \return Imported record batch object
 ARROW_EXPORT
 Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
     struct ArrowDeviceArray* array, struct ArrowSchema* schema,
-    const DeviceMemoryMapper& mapper);
+    const DeviceMemoryMapper& mapper = DefaultDeviceMapper);
 
 /// @}
 
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index ad01d45571ba1..e1bf494920566 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1778,6 +1778,70 @@ cdef class Array(_PandasConvertible):
 
         return pyarrow_wrap_array(array)
 
+    def _export_to_c_device(self, out_ptr, out_schema_ptr=0):
+        """
+        Export to a C ArrowDeviceArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the array type
+        is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowDeviceArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+        cdef:
+            void* c_ptr = _as_c_pointer(out_ptr)
+            void* c_schema_ptr = _as_c_pointer(out_schema_ptr,
+                                               allow_null=True)
+        with nogil:
+            check_status(ExportDeviceArray(
+                deref(self.sp_array), <shared_ptr[CSyncEvent]>NULL,
+                <ArrowDeviceArray*> c_ptr, <ArrowSchema*> c_schema_ptr))
+
+    @staticmethod
+    def _import_from_c_device(in_ptr, type):
+        """
+        Import Array from a C ArrowDeviceArray struct, given its pointer
+        and the imported array type.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        type: DataType or int
+            Either a DataType object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+        cdef:
+            void* c_ptr = _as_c_pointer(in_ptr)
+            void* c_type_ptr
+            shared_ptr[CArray] c_array
+
+        c_type = pyarrow_unwrap_data_type(type)
+        if c_type == nullptr:
+            # Not a DataType object, perhaps a raw ArrowSchema pointer
+            c_type_ptr = _as_c_pointer(type)
+            with nogil:
+                c_array = GetResultValue(
+                    ImportDeviceArray(<ArrowDeviceArray*> c_ptr,
+                                      <ArrowSchema*> c_type_ptr)
+                )
+        else:
+            with nogil:
+                c_array = GetResultValue(
+                    ImportDeviceArray(<ArrowDeviceArray*> c_ptr, c_type)
+                )
+        return pyarrow_wrap_array(c_array)
+
     def __dlpack__(self, stream=None):
         """Export a primitive array as a DLPack capsule.
 
diff --git a/python/pyarrow/cffi.py b/python/pyarrow/cffi.py
index 961b61dee59fd..1da1a91691404 100644
--- a/python/pyarrow/cffi.py
+++ b/python/pyarrow/cffi.py
@@ -64,6 +64,16 @@
       // Opaque producer-specific data
       void* private_data;
     };
+
+    typedef int32_t ArrowDeviceType;
+
+    struct ArrowDeviceArray {
+      struct ArrowArray array;
+      int64_t device_id;
+      ArrowDeviceType device_type;
+      void* sync_event;
+      int64_t reserved[3];
+    };
     """
 
 # TODO use out-of-line mode for faster import and avoid C parsing
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 05d33180209c6..bc9d05ddbbc37 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -346,6 +346,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     CResult[unique_ptr[CResizableBuffer]] AllocateResizableBuffer(
         const int64_t size, CMemoryPool* pool)
 
+    cdef cppclass CSyncEvent" arrow::Device::SyncEvent":
+        pass
+
+    cdef cppclass CDevice" arrow::Device":
+        pass
+
     cdef CMemoryPool* c_default_memory_pool" arrow::default_memory_pool"()
     cdef CMemoryPool* c_system_memory_pool" arrow::system_memory_pool"()
     cdef CStatus c_jemalloc_memory_pool" arrow::jemalloc_memory_pool"(
@@ -2902,6 +2908,9 @@ cdef extern from "arrow/c/abi.h":
     cdef struct ArrowArrayStream:
         void (*release)(ArrowArrayStream*) noexcept nogil
 
+    cdef struct ArrowDeviceArray:
+        pass
+
 cdef extern from "arrow/c/bridge.h" namespace "arrow" nogil:
     CStatus ExportType(CDataType&, ArrowSchema* out)
     CResult[shared_ptr[CDataType]] ImportType(ArrowSchema*)
@@ -2934,6 +2943,20 @@ cdef extern from "arrow/c/bridge.h" namespace "arrow" nogil:
     CStatus ExportChunkedArray(shared_ptr[CChunkedArray], ArrowArrayStream*)
     CResult[shared_ptr[CChunkedArray]] ImportChunkedArray(ArrowArrayStream*)
 
+    CStatus ExportDeviceArray(const CArray&, shared_ptr[CSyncEvent],
+                              ArrowDeviceArray* out, ArrowSchema*)
+    CResult[shared_ptr[CArray]] ImportDeviceArray(
+        ArrowDeviceArray*, shared_ptr[CDataType])
+    CResult[shared_ptr[CArray]] ImportDeviceArray(
+        ArrowDeviceArray*, ArrowSchema*)
+
+    CStatus ExportDeviceRecordBatch(const CRecordBatch&, shared_ptr[CSyncEvent],
+                                    ArrowDeviceArray* out, ArrowSchema*)
+    CResult[shared_ptr[CRecordBatch]] ImportDeviceRecordBatch(
+        ArrowDeviceArray*, shared_ptr[CSchema])
+    CResult[shared_ptr[CRecordBatch]] ImportDeviceRecordBatch(
+        ArrowDeviceArray*, ArrowSchema*)
+
 
 cdef extern from "arrow/util/byte_size.h" namespace "arrow::util" nogil:
     CResult[int64_t] ReferencedBufferSize(const CArray& array_data)
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 38b08b626113b..40d22494e6ffb 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -3145,6 +3145,68 @@ cdef class RecordBatch(_Tabular):
 
         return pyarrow_wrap_batch(c_batch)
 
+    def _export_to_c_device(self, out_ptr, out_schema_ptr=0):
+        """
+        Export to a C ArrowDeviceArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the record batch
+        schema is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowDeviceArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+        cdef:
+            void* c_ptr = _as_c_pointer(out_ptr)
+            void* c_schema_ptr = _as_c_pointer(out_schema_ptr,
+                                               allow_null=True)
+        with nogil:
+            check_status(ExportDeviceRecordBatch(
+                deref(self.sp_batch), <shared_ptr[CSyncEvent]>NULL,
+                <ArrowDeviceArray*> c_ptr, <ArrowSchema*> c_schema_ptr)
+            )
+
+    @staticmethod
+    def _import_from_c_device(in_ptr, schema):
+        """
+        Import RecordBatch from a C ArrowDeviceArray struct, given its pointer
+        and the imported schema.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        type: Schema or int
+            Either a Schema object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+        cdef:
+            void* c_ptr = _as_c_pointer(in_ptr)
+            void* c_schema_ptr
+            shared_ptr[CRecordBatch] c_batch
+
+        c_schema = pyarrow_unwrap_schema(schema)
+        if c_schema == nullptr:
+            # Not a Schema object, perhaps a raw ArrowSchema pointer
+            c_schema_ptr = _as_c_pointer(schema, allow_null=True)
+            with nogil:
+                c_batch = GetResultValue(ImportDeviceRecordBatch(
+                    <ArrowDeviceArray*> c_ptr, <ArrowSchema*> c_schema_ptr))
+        else:
+            with nogil:
+                c_batch = GetResultValue(ImportDeviceRecordBatch(
+                    <ArrowDeviceArray*> c_ptr, c_schema))
+        return pyarrow_wrap_batch(c_batch)
+
 
 def _reconstruct_record_batch(columns, schema):
     """
diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
index 3a0c7b5b7152f..ce50fe6a6f81d 100644
--- a/python/pyarrow/tests/test_cffi.py
+++ b/python/pyarrow/tests/test_cffi.py
@@ -181,11 +181,10 @@ def test_export_import_field():
         pa.Field._import_from_c(ptr_schema)
 
 
-@needs_cffi
-def test_export_import_array():
+def check_export_import_array(array_type, exporter, importer):
     c_schema = ffi.new("struct ArrowSchema*")
     ptr_schema = int(ffi.cast("uintptr_t", c_schema))
-    c_array = ffi.new("struct ArrowArray*")
+    c_array = ffi.new(f"struct {array_type}*")
     ptr_array = int(ffi.cast("uintptr_t", c_array))
 
     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
@@ -195,11 +194,11 @@ def test_export_import_array():
     typ = pa.list_(pa.int32())
     arr = pa.array([[1], [2, 42]], type=typ)
     py_value = arr.to_pylist()
-    arr._export_to_c(ptr_array)
+    exporter(arr, ptr_array)
     assert pa.total_allocated_bytes() > old_allocated
     # Delete recreate C++ object from exported pointer
     del arr
-    arr_new = pa.Array._import_from_c(ptr_array, typ)
+    arr_new = importer(ptr_array, typ)
     assert arr_new.to_pylist() == py_value
     assert arr_new.type == pa.list_(pa.int32())
     assert pa.total_allocated_bytes() > old_allocated
@@ -207,15 +206,15 @@ def test_export_import_array():
     assert pa.total_allocated_bytes() == old_allocated
     # Now released
     with assert_array_released:
-        pa.Array._import_from_c(ptr_array, pa.list_(pa.int32()))
+        importer(ptr_array, pa.list_(pa.int32()))
 
     # Type is exported and imported at the same time
     arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32()))
     py_value = arr.to_pylist()
-    arr._export_to_c(ptr_array, ptr_schema)
+    exporter(arr, ptr_array, ptr_schema)
     # Delete and recreate C++ objects from exported pointers
     del arr
-    arr_new = pa.Array._import_from_c(ptr_array, ptr_schema)
+    arr_new = importer(ptr_array, ptr_schema)
     assert arr_new.to_pylist() == py_value
     assert arr_new.type == pa.list_(pa.int32())
     assert pa.total_allocated_bytes() > old_allocated
@@ -223,7 +222,35 @@ def test_export_import_array():
     assert pa.total_allocated_bytes() == old_allocated
     # Now released
     with assert_schema_released:
-        pa.Array._import_from_c(ptr_array, ptr_schema)
+        importer(ptr_array, ptr_schema)
+
+
+@needs_cffi
+def test_export_import_array():
+    check_export_import_array(
+        "ArrowArray",
+        pa.Array._export_to_c,
+        pa.Array._import_from_c,
+    )
+
+
+@needs_cffi
+def test_export_import_device_array():
+    check_export_import_array(
+        "ArrowDeviceArray",
+        pa.Array._export_to_c_device,
+        pa.Array._import_from_c_device,
+    )
+
+    # verify exported struct
+    c_array = ffi.new("struct ArrowDeviceArray*")
+    ptr_array = int(ffi.cast("uintptr_t", c_array))
+    arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32()))
+    arr._export_to_c_device(ptr_array)
+
+    assert c_array.device_type == 1  # ARROW_DEVICE_CPU 1
+    assert c_array.device_id == -1
+    assert c_array.array.length == 2
 
 
 def check_export_import_schema(schema_factory, expected_schema_factory=None):
@@ -289,10 +316,10 @@ def test_export_import_schema_float_pointer():
     assert schema_new == make_schema()
 
 
-def check_export_import_batch(batch_factory):
+def check_export_import_batch(array_type, exporter, importer, batch_factory):
     c_schema = ffi.new("struct ArrowSchema*")
     ptr_schema = int(ffi.cast("uintptr_t", c_schema))
-    c_array = ffi.new("struct ArrowArray*")
+    c_array = ffi.new(f"struct {array_type}*")
     ptr_array = int(ffi.cast("uintptr_t", c_array))
 
     gc.collect()  # Make sure no Arrow data dangles in a ref cycle
@@ -302,11 +329,11 @@ def check_export_import_batch(batch_factory):
     batch = batch_factory()
     schema = batch.schema
     py_value = batch.to_pydict()
-    batch._export_to_c(ptr_array)
+    exporter(batch, ptr_array)
     assert pa.total_allocated_bytes() > old_allocated
     # Delete and recreate C++ object from exported pointer
     del batch
-    batch_new = pa.RecordBatch._import_from_c(ptr_array, schema)
+    batch_new = importer(ptr_array, schema)
     assert batch_new.to_pydict() == py_value
     assert batch_new.schema == schema
     assert pa.total_allocated_bytes() > old_allocated
@@ -314,7 +341,7 @@ def check_export_import_batch(batch_factory):
     assert pa.total_allocated_bytes() == old_allocated
     # Now released
     with assert_array_released:
-        pa.RecordBatch._import_from_c(ptr_array, make_schema())
+        importer(ptr_array, make_schema())
 
     # Type is exported and imported at the same time
     batch = batch_factory()
@@ -322,7 +349,7 @@ def check_export_import_batch(batch_factory):
     batch._export_to_c(ptr_array, ptr_schema)
     # Delete and recreate C++ objects from exported pointers
     del batch
-    batch_new = pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
+    batch_new = importer(ptr_array, ptr_schema)
     assert batch_new.to_pydict() == py_value
     assert batch_new.schema == batch_factory().schema
     assert pa.total_allocated_bytes() > old_allocated
@@ -330,28 +357,57 @@ def check_export_import_batch(batch_factory):
     assert pa.total_allocated_bytes() == old_allocated
     # Now released
     with assert_schema_released:
-        pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
+        importer(ptr_array, ptr_schema)
 
     # Not a struct type
     pa.int32()._export_to_c(ptr_schema)
     batch_factory()._export_to_c(ptr_array)
     with pytest.raises(ValueError,
                        match="ArrowSchema describes non-struct type"):
-        pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
+        importer(ptr_array, ptr_schema)
     # Now released
     with assert_schema_released:
-        pa.RecordBatch._import_from_c(ptr_array, ptr_schema)
+        importer(ptr_array, ptr_schema)
 
 
 @needs_cffi
 def test_export_import_batch():
-    check_export_import_batch(make_batch)
+    check_export_import_batch(
+        "ArrowArray",
+        pa.RecordBatch._export_to_c,
+        pa.RecordBatch._import_from_c,
+        make_batch,
+    )
 
 
 @needs_cffi
 def test_export_import_batch_with_extension():
     with registered_extension_type(ParamExtType(1)):
-        check_export_import_batch(make_extension_batch)
+        check_export_import_batch(
+            "ArrowArray",
+            pa.RecordBatch._export_to_c,
+            pa.RecordBatch._import_from_c,
+            make_extension_batch,
+        )
+
+
+@needs_cffi
+def test_export_import_device_batch():
+    check_export_import_batch(
+        "ArrowDeviceArray",
+        pa.RecordBatch._export_to_c_device,
+        pa.RecordBatch._import_from_c_device,
+        make_batch,
+    )
+
+    # verify exported struct
+    c_array = ffi.new("struct ArrowDeviceArray*")
+    ptr_array = int(ffi.cast("uintptr_t", c_array))
+    batch = make_batch()
+    batch._export_to_c_device(ptr_array)
+    assert c_array.device_type == 1  # ARROW_DEVICE_CPU 1
+    assert c_array.device_id == -1
+    assert c_array.array.length == 2
 
 
 def _export_import_batch_reader(ptr_stream, reader_factory):

From d6b9051fa05b230ce288eb0b7ab6b596200c45b1 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 28 Feb 2024 09:49:48 -0400
Subject: [PATCH 442/570] GH-40066: [Python] Support `requested_schema` in
 `__arrow_c_stream__()` (#40070)

### Rationale for this change

The `requested_schema` portion of the `__arrow_c_stream__()` protocol methods errored in all cases if passed an unequal schema. There was a note about figuring out how to check the cast before doing it and a comment in https://github.com/apache/arrow/issues/40066 about how it should be done lazily. This PR (hopefully) solves both!

### What changes are included in this PR?

- Added `arrow::py::CastingRecordBatchReader`, which wraps a `arrow::RecordBatchReader`, casting each batch as it is pulled.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes: the current approach adds `RecordBatchReader.cast()` as the way to access the casting reader.

* Closes: #40066
* GitHub Issue: #40066

Lead-authored-by: Dewey Dunnington <dewey@fishandwhistle.net>
Co-authored-by: Dewey Dunnington <dewey@voltrondata.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/includes/libarrow_python.pxd |  8 +++
 python/pyarrow/ipc.pxi                      | 39 ++++++++++--
 python/pyarrow/src/arrow/python/ipc.cc      | 66 ++++++++++++++++++++
 python/pyarrow/src/arrow/python/ipc.h       | 20 ++++++
 python/pyarrow/table.pxi                    | 23 +++++++
 python/pyarrow/tests/test_cffi.py           | 18 +++++-
 python/pyarrow/tests/test_ipc.py            | 68 ++++++++++++++++++++-
 python/pyarrow/tests/test_table.py          | 32 +++++++++-
 8 files changed, 261 insertions(+), 13 deletions(-)

diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd
index 906f0b7d28e59..136d6bc8b14cd 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -283,6 +283,14 @@ cdef extern from "arrow/python/ipc.h" namespace "arrow::py":
                                                      object)
 
 
+cdef extern from "arrow/python/ipc.h" namespace "arrow::py" nogil:
+    cdef cppclass CCastingRecordBatchReader" arrow::py::CastingRecordBatchReader" \
+            (CRecordBatchReader):
+        @staticmethod
+        CResult[shared_ptr[CRecordBatchReader]] Make(shared_ptr[CRecordBatchReader],
+                                                     shared_ptr[CSchema])
+
+
 cdef extern from "arrow/python/extension_type.h" namespace "arrow::py":
     cdef cppclass CPyExtensionType \
             " arrow::py::PyExtensionType"(CExtensionType):
diff --git a/python/pyarrow/ipc.pxi b/python/pyarrow/ipc.pxi
index 0bb0fe073cc59..617e25a14235d 100644
--- a/python/pyarrow/ipc.pxi
+++ b/python/pyarrow/ipc.pxi
@@ -772,6 +772,38 @@ cdef class RecordBatchReader(_Weakrefable):
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.close()
 
+    def cast(self, target_schema):
+        """
+        Wrap this reader with one that casts each batch lazily as it is pulled.
+        Currently only a safe cast to target_schema is implemented.
+
+        Parameters
+        ----------
+        target_schema : Schema
+            Schema to cast to, the names and order of fields must match.
+
+        Returns
+        -------
+        RecordBatchReader
+        """
+        cdef:
+            shared_ptr[CSchema] c_schema
+            shared_ptr[CRecordBatchReader] c_reader
+            RecordBatchReader out
+
+        if self.schema.names != target_schema.names:
+            raise ValueError("Target schema's field names are not matching "
+                             f"the table's field names: {self.schema.names}, "
+                             f"{target_schema.names}")
+
+        c_schema = pyarrow_unwrap_schema(target_schema)
+        c_reader = GetResultValue(CCastingRecordBatchReader.Make(
+            self.reader, c_schema))
+
+        out = RecordBatchReader.__new__(RecordBatchReader)
+        out.reader = c_reader
+        return out
+
     def _export_to_c(self, out_ptr):
         """
         Export to a C ArrowArrayStream struct, given its pointer.
@@ -827,8 +859,6 @@ cdef class RecordBatchReader(_Weakrefable):
             The schema to which the stream should be casted, passed as a
             PyCapsule containing a C ArrowSchema representation of the
             requested schema.
-            Currently, this is not supported and will raise a
-            NotImplementedError if the schema doesn't match the current schema.
 
         Returns
         -------
@@ -840,11 +870,8 @@ cdef class RecordBatchReader(_Weakrefable):
 
         if requested_schema is not None:
             out_schema = Schema._import_from_c_capsule(requested_schema)
-            # TODO: figure out a way to check if one schema is castable to
-            # another. Once we have that, we can perform validation here and
-            # if successful creating a wrapping reader that casts each batch.
             if self.schema != out_schema:
-                raise NotImplementedError("Casting to requested_schema")
+                return self.cast(out_schema).__arrow_c_stream__()
 
         stream_capsule = alloc_c_stream(&c_stream)
 
diff --git a/python/pyarrow/src/arrow/python/ipc.cc b/python/pyarrow/src/arrow/python/ipc.cc
index 93481822475db..0ed152242425d 100644
--- a/python/pyarrow/src/arrow/python/ipc.cc
+++ b/python/pyarrow/src/arrow/python/ipc.cc
@@ -19,6 +19,7 @@
 
 #include <memory>
 
+#include "arrow/compute/cast.h"
 #include "arrow/python/pyarrow.h"
 
 namespace arrow {
@@ -63,5 +64,70 @@ Result<std::shared_ptr<RecordBatchReader>> PyRecordBatchReader::Make(
   return reader;
 }
 
+CastingRecordBatchReader::CastingRecordBatchReader() = default;
+
+Status CastingRecordBatchReader::Init(std::shared_ptr<RecordBatchReader> parent,
+                                      std::shared_ptr<Schema> schema) {
+  std::shared_ptr<Schema> src = parent->schema();
+
+  // The check for names has already been done in Python where it's easier to
+  // generate a nice error message.
+  int num_fields = schema->num_fields();
+  if (src->num_fields() != num_fields) {
+    return Status::Invalid("Number of fields not equal");
+  }
+
+  // Ensure all columns can be cast before succeeding
+  for (int i = 0; i < num_fields; i++) {
+    if (!compute::CanCast(*src->field(i)->type(), *schema->field(i)->type())) {
+      return Status::TypeError("Field ", i, " cannot be cast from ",
+                               src->field(i)->type()->ToString(), " to ",
+                               schema->field(i)->type()->ToString());
+    }
+  }
+
+  parent_ = std::move(parent);
+  schema_ = std::move(schema);
+
+  return Status::OK();
+}
+
+std::shared_ptr<Schema> CastingRecordBatchReader::schema() const { return schema_; }
+
+Status CastingRecordBatchReader::ReadNext(std::shared_ptr<RecordBatch>* batch) {
+  std::shared_ptr<RecordBatch> out;
+  ARROW_RETURN_NOT_OK(parent_->ReadNext(&out));
+  if (!out) {
+    batch->reset();
+    return Status::OK();
+  }
+
+  auto num_columns = out->num_columns();
+  auto options = compute::CastOptions::Safe();
+  ArrayVector columns(num_columns);
+  for (int i = 0; i < num_columns; i++) {
+    const Array& src = *out->column(i);
+    if (!schema_->field(i)->nullable() && src.null_count() > 0) {
+      return Status::Invalid(
+          "Can't cast array that contains nulls to non-nullable field at index ", i);
+    }
+
+    ARROW_ASSIGN_OR_RAISE(columns[i],
+                          compute::Cast(src, schema_->field(i)->type(), options));
+  }
+
+  *batch = RecordBatch::Make(schema_, out->num_rows(), std::move(columns));
+  return Status::OK();
+}
+
+Result<std::shared_ptr<RecordBatchReader>> CastingRecordBatchReader::Make(
+    std::shared_ptr<RecordBatchReader> parent, std::shared_ptr<Schema> schema) {
+  auto reader = std::shared_ptr<CastingRecordBatchReader>(new CastingRecordBatchReader());
+  ARROW_RETURN_NOT_OK(reader->Init(parent, schema));
+  return reader;
+}
+
+Status CastingRecordBatchReader::Close() { return parent_->Close(); }
+
 }  // namespace py
 }  // namespace arrow
diff --git a/python/pyarrow/src/arrow/python/ipc.h b/python/pyarrow/src/arrow/python/ipc.h
index 92232ed830093..2c16d8c967ff0 100644
--- a/python/pyarrow/src/arrow/python/ipc.h
+++ b/python/pyarrow/src/arrow/python/ipc.h
@@ -48,5 +48,25 @@ class ARROW_PYTHON_EXPORT PyRecordBatchReader : public RecordBatchReader {
   OwnedRefNoGIL iterator_;
 };
 
+class ARROW_PYTHON_EXPORT CastingRecordBatchReader : public RecordBatchReader {
+ public:
+  std::shared_ptr<Schema> schema() const override;
+
+  Status ReadNext(std::shared_ptr<RecordBatch>* batch) override;
+
+  static Result<std::shared_ptr<RecordBatchReader>> Make(
+      std::shared_ptr<RecordBatchReader> parent, std::shared_ptr<Schema> schema);
+
+  Status Close() override;
+
+ protected:
+  CastingRecordBatchReader();
+
+  Status Init(std::shared_ptr<RecordBatchReader> parent, std::shared_ptr<Schema> schema);
+
+  std::shared_ptr<RecordBatchReader> parent_;
+  std::shared_ptr<Schema> schema_;
+};
+
 }  // namespace py
 }  // namespace arrow
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 40d22494e6ffb..d7f7895b538e8 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -2742,6 +2742,29 @@ cdef class RecordBatch(_Tabular):
 
         return pyarrow_wrap_batch(c_batch)
 
+    def cast(self, Schema target_schema, safe=None, options=None):
+        """
+        Cast batch values to another schema.
+
+        Parameters
+        ----------
+        target_schema : Schema
+            Schema to cast to, the names and order of fields must match.
+        safe : bool, default True
+            Check for overflows or other unsafe conversions.
+        options : CastOptions, default None
+            Additional checks pass by CastOptions
+
+        Returns
+        -------
+        RecordBatch
+        """
+        # Wrap the more general Table cast implementation
+        tbl = Table.from_batches([self])
+        casted_tbl = tbl.cast(target_schema, safe=safe, options=options)
+        casted_batch, = casted_tbl.to_batches()
+        return casted_batch
+
     def _to_pandas(self, options, **kwargs):
         return Table.from_batches([self])._to_pandas(options, **kwargs)
 
diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
index ce50fe6a6f81d..f8b2ea15d31ad 100644
--- a/python/pyarrow/tests/test_cffi.py
+++ b/python/pyarrow/tests/test_cffi.py
@@ -633,9 +633,8 @@ def test_roundtrip_reader_capsule(constructor):
 
     obj = constructor(schema, batches)
 
-    # TODO: turn this to ValueError once we implement validation.
     bad_schema = pa.schema({'ints': pa.int32()})
-    with pytest.raises(NotImplementedError):
+    with pytest.raises(pa.lib.ArrowTypeError, match="Field 0 cannot be cast"):
         obj.__arrow_c_stream__(bad_schema.__arrow_c_schema__())
 
     # Can work with matching schema
@@ -647,6 +646,21 @@ def test_roundtrip_reader_capsule(constructor):
         assert batch.equals(expected)
 
 
+def test_roundtrip_batch_reader_capsule_requested_schema():
+    batch = make_batch()
+    requested_schema = pa.schema([('ints', pa.list_(pa.int64()))])
+    requested_capsule = requested_schema.__arrow_c_schema__()
+    batch_as_requested = batch.cast(requested_schema)
+
+    capsule = batch.__arrow_c_stream__(requested_capsule)
+    assert PyCapsule_IsValid(capsule, b"arrow_array_stream") == 1
+    imported_reader = pa.RecordBatchReader._import_from_c_capsule(capsule)
+    assert imported_reader.schema == requested_schema
+    assert imported_reader.read_next_batch().equals(batch_as_requested)
+    with pytest.raises(StopIteration):
+        imported_reader.read_next_batch()
+
+
 def test_roundtrip_batch_reader_capsule():
     batch = make_batch()
 
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 407011d90b734..d38f45b5feff4 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -1226,10 +1226,15 @@ def __arrow_c_stream__(self, requested_schema=None):
     reader = pa.RecordBatchReader.from_stream(wrapper, schema=data[0].schema)
     assert reader.read_all() == expected
 
-    # If schema doesn't match, raises NotImplementedError
-    with pytest.raises(NotImplementedError):
+    # Passing a different but castable schema works
+    good_schema = pa.schema([pa.field("a", pa.int32())])
+    reader = pa.RecordBatchReader.from_stream(wrapper, schema=good_schema)
+    assert reader.read_all() == expected.cast(good_schema)
+
+    # If schema doesn't match, raises TypeError
+    with pytest.raises(pa.lib.ArrowTypeError, match='Field 0 cannot be cast'):
         pa.RecordBatchReader.from_stream(
-            wrapper, schema=pa.schema([pa.field('a', pa.int32())])
+            wrapper, schema=pa.schema([pa.field('a', pa.list_(pa.int32()))])
         )
 
     # Proper type errors for wrong input
@@ -1238,3 +1243,60 @@ def __arrow_c_stream__(self, requested_schema=None):
 
     with pytest.raises(TypeError):
         pa.RecordBatchReader.from_stream(expected, schema=data[0])
+
+
+def test_record_batch_reader_cast():
+    schema_src = pa.schema([pa.field('a', pa.int64())])
+    data = [
+        pa.record_batch([pa.array([1, 2, 3], type=pa.int64())], names=['a']),
+        pa.record_batch([pa.array([4, 5, 6], type=pa.int64())], names=['a']),
+    ]
+    table_src = pa.Table.from_batches(data)
+
+    # Cast to same type should always work
+    reader = pa.RecordBatchReader.from_batches(schema_src, data)
+    assert reader.cast(schema_src).read_all() == table_src
+
+    # Check non-trivial cast
+    schema_dst = pa.schema([pa.field('a', pa.int32())])
+    reader = pa.RecordBatchReader.from_batches(schema_src, data)
+    assert reader.cast(schema_dst).read_all() == table_src.cast(schema_dst)
+
+    # Check error for field name/length mismatch
+    reader = pa.RecordBatchReader.from_batches(schema_src, data)
+    with pytest.raises(ValueError, match="Target schema's field names"):
+        reader.cast(pa.schema([]))
+
+    # Check error for impossible cast in call to .cast()
+    reader = pa.RecordBatchReader.from_batches(schema_src, data)
+    with pytest.raises(pa.lib.ArrowTypeError, match='Field 0 cannot be cast'):
+        reader.cast(pa.schema([pa.field('a', pa.list_(pa.int32()))]))
+
+
+def test_record_batch_reader_cast_nulls():
+    schema_src = pa.schema([pa.field('a', pa.int64())])
+    data_with_nulls = [
+        pa.record_batch([pa.array([1, 2, None], type=pa.int64())], names=['a']),
+    ]
+    data_without_nulls = [
+        pa.record_batch([pa.array([1, 2, 3], type=pa.int64())], names=['a']),
+    ]
+    table_with_nulls = pa.Table.from_batches(data_with_nulls)
+    table_without_nulls = pa.Table.from_batches(data_without_nulls)
+
+    # Cast to nullable destination should work
+    reader = pa.RecordBatchReader.from_batches(schema_src, data_with_nulls)
+    schema_dst = pa.schema([pa.field('a', pa.int32())])
+    assert reader.cast(schema_dst).read_all() == table_with_nulls.cast(schema_dst)
+
+    # Cast to non-nullable destination should work if there are no nulls
+    reader = pa.RecordBatchReader.from_batches(schema_src, data_without_nulls)
+    schema_dst = pa.schema([pa.field('a', pa.int32(), nullable=False)])
+    assert reader.cast(schema_dst).read_all() == table_without_nulls.cast(schema_dst)
+
+    # Cast to non-nullable destination should error if there are nulls
+    # when the batch is pulled
+    reader = pa.RecordBatchReader.from_batches(schema_src, data_with_nulls)
+    casted_reader = reader.cast(schema_dst)
+    with pytest.raises(pa.lib.ArrowInvalid, match="Can't cast array"):
+        casted_reader.read_all()
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index d6def54570581..f0fd5518de067 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -635,9 +635,18 @@ def __arrow_c_stream__(self, requested_schema=None):
     result = pa.table(wrapper, schema=data[0].schema)
     assert result == expected
 
+    # Passing a different schema will cast
+    good_schema = pa.schema([pa.field('a', pa.int32())])
+    result = pa.table(wrapper, schema=good_schema)
+    assert result == expected.cast(good_schema)
+
     # If schema doesn't match, raises NotImplementedError
-    with pytest.raises(NotImplementedError):
-        pa.table(wrapper, schema=pa.schema([pa.field('a', pa.int32())]))
+    with pytest.raises(
+        pa.lib.ArrowTypeError, match="Field 0 cannot be cast"
+    ):
+        pa.table(
+            wrapper, schema=pa.schema([pa.field('a', pa.list_(pa.int32()))])
+        )
 
 
 def test_recordbatch_itercolumns():
@@ -2620,6 +2629,25 @@ def test_record_batch_sort():
     assert sorted_rb_dict["c"] == ["foobar", "bar", "foo", "car"]
 
 
+def test_record_batch_cast():
+    rb = pa.RecordBatch.from_arrays([
+        pa.array([None, 1]),
+        pa.array([False, True])
+    ], names=["a", "b"])
+    new_schema = pa.schema([pa.field("a", "int64", nullable=True),
+                            pa.field("b", "bool", nullable=False)])
+
+    assert rb.cast(new_schema).schema == new_schema
+
+    # Casting a nullable field to non-nullable is invalid
+    rb = pa.RecordBatch.from_arrays([
+        pa.array([None, 1]),
+        pa.array([None, True])
+    ], names=["a", "b"])
+    with pytest.raises(ValueError):
+        rb.cast(new_schema)
+
+
 @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch])
 def test_numpy_asarray(constructor):
     table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"])

From c6f20a2348b3336f2ceff5b245e5eb10db7f8ce3 Mon Sep 17 00:00:00 2001
From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com>
Date: Wed, 28 Feb 2024 23:35:00 +0800
Subject: [PATCH 443/570] GH-40276: [C++] Fix an simple buffer-overflow case in
 decimal_benchmark (#40277)

### Rationale for this change
An simple buffer-overflow case found in decimal_benchmark.

### What changes are included in this PR?
Change the loop len from `100` to correct `kValueSize`.

### Are these changes tested?

### Are there any user-facing changes?
No

* GitHub Issue: #40276

Authored-by: hugo.zhang <hugo.zhang@openpie.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/util/decimal_benchmark.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/util/decimal_benchmark.cc b/cpp/src/arrow/util/decimal_benchmark.cc
index 5ec7f8df870f6..d505532d71da1 100644
--- a/cpp/src/arrow/util/decimal_benchmark.cc
+++ b/cpp/src/arrow/util/decimal_benchmark.cc
@@ -131,7 +131,7 @@ static void BinaryMathOpAggregate(
 
   for (auto _ : state) {
     BasicDecimal128 result;
-    for (int x = 0; x < 100; x++) {
+    for (int x = 0; x < kValueSize; x++) {
       result += v[x];
     }
     benchmark::DoNotOptimize(result);

From 2fbf22a736ac73acab7cc98eae5fd066740fc613 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Wed, 28 Feb 2024 17:24:32 -0600
Subject: [PATCH 444/570] GH-40248: [R] fallback to the correct libtool when we
 find a GNU one (#40259)

### Rationale for this change

On the CRAN build machines the GNU libtool is on the path in front of the macOS libtool. Though these are named the same thing, they are actually very different and don't actually appear to be substitutes

I checked on a non-developer's machine to see if `/usr/bin/libtool` exists, and it did. So I believe we _should_ be ok with this even if xcode / command line tools haven't been installed.

One note: it's possible that we could get the GNU libtool in link mode to work with the right incantation (something like `libtool --mode=link --tag=CXX ${cmake_compiler} -o ...` but when I tried this I kept getting symbol not found errors. Ultimately, I think any mac that we are on will have the apple-provided libtool, so decided to go the route of finding it.

### What changes are included in this PR?

When we detect we are on a GNU libtool, we look to `/usr/bin/libtool` instead.

### Are these changes tested?

Yes. See a broken config failing at https://github.com/apache/arrow/pull/40259#issuecomment-1967762074 and then the next one passes

### Are there any user-facing changes?

We will remain on CRAN

**This PR contains a "Critical Fix".**
* GitHub Issue: #40248

Lead-authored-by: Jonathan Keane <jkeane@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 cpp/cmake_modules/BuildUtils.cmake       | 22 +++++++++++++++++++++-
 dev/tasks/r/github.macos-linux.local.yml | 12 ++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
index 083ac2fe9a862..7a45e9cca59de 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -97,7 +97,27 @@ function(arrow_create_merged_static_lib output_target)
   endforeach()
 
   if(APPLE)
-    set(BUNDLE_COMMAND "libtool" "-no_warning_for_no_symbols" "-static" "-o"
+    # The apple-distributed libtool is what we want for bundling, but there is
+    # a GNU libtool that has a namecollision (and happens to be bundled with R, too).
+    # We are not compatible with GNU libtool, so we need to avoid it.
+
+    # check in the obvious places first to find Apple's libtool
+    # HINTS is used before system paths and before PATHS, so we use that
+    # even though hard coded paths should go in PATHS
+    # TODO: use a VALIDATOR when we require cmake >= 3.25
+    find_program(LIBTOOL_MACOS libtool HINTS /usr/bin
+                                             /Library/Developer/CommandLineTools/usr/bin)
+
+    # confirm that the libtool we found is not GNU libtool
+    execute_process(COMMAND ${LIBTOOL_MACOS} -V
+                    OUTPUT_VARIABLE LIBTOOL_V_OUTPUT
+                    OUTPUT_STRIP_TRAILING_WHITESPACE)
+    if(NOT "${LIBTOOL_V_OUTPUT}" MATCHES ".*cctools-([0-9.]+).*")
+      message(FATAL_ERROR "libtool found appears to be the incompatible GNU libtool: ${LIBTOOL_MACOS}"
+      )
+    endif()
+
+    set(BUNDLE_COMMAND ${LIBTOOL_MACOS} "-no_warning_for_no_symbols" "-static" "-o"
                        ${output_lib_path} ${all_library_paths})
   elseif(CMAKE_CXX_COMPILER_ID MATCHES "^(Clang|GNU|Intel|IntelLLVM)$")
     set(ar_script_path ${CMAKE_BINARY_DIR}/${ARG_NAME}.ar)
diff --git a/dev/tasks/r/github.macos-linux.local.yml b/dev/tasks/r/github.macos-linux.local.yml
index 045c387b73f60..b221e8c5d8d5b 100644
--- a/dev/tasks/r/github.macos-linux.local.yml
+++ b/dev/tasks/r/github.macos-linux.local.yml
@@ -58,6 +58,18 @@ jobs:
       - uses: r-lib/actions/setup-r@v2
         with:
           use-public-rspm: true
+      # CRAN builders have the entire bin here added to the path. This sometimes
+      # includes things like GNU libtool which name-collide with what we expect
+      - name: Add R.framework/Resources/bin to the path
+        if: contains(matrix.os, 'macOS')
+        run: echo "/Library/Frameworks/R.framework/Resources/bin" >> $GITHUB_PATH
+      - name : Check whether libtool in R is used
+        if: contains(matrix.os, 'macOS')
+        run: |
+          if [ "$(which libtool)" != "/Library/Frameworks/R.framework/Resources/bin/libtool" ]; then
+            echo "libtool provided by R isn't found: $(which libtool)"
+            exit 1
+          fi
       - name: Install dependencies
         uses: r-lib/actions/setup-r-dependencies@v2
         with:

From 909f6f90ebdbb2e16b223d768ee10c78e0b37bfb Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 29 Feb 2024 09:44:15 +0900
Subject: [PATCH 445/570] GH-39823: [C++] Allow building cpp/src/arrow/**/*.cc
 without waiting bundled libraries (#39824)

### Rationale for this change

If we can build most of `cpp/src/arrow/**/*.cc` before all bundled libraries are built, we can reduce build time.

### What changes are included in this PR?

* Remove the `toolchain` internal CMake target
* Remove `ARROW_SHARED_LINK_LIBS`
* Remove `ARROW_STATIC_LINK_LIBS`
* Move the following variables to `cpp/src/arrow/CMakeLists.txt`
  * `ARROW_SHARED_PRIVATE_LINK_LIBS`
  * `ARROW_SHARED_INSTALL_INTERFACE_LIBS`
  * `ARROW_STATIC_INSTALL_INTERFACE_LIBS`
  * `ARROW_TEST_LINK_TOOLCHAIN`
  * `ARROW_TEST_SHARED_LINK_LIBS`
  * `ARROW_TEST_STATIC_LINK_LIBS`
  * `ARROW_SYSTEM_LINK_LIBS`
* Add internal `OBJECT` libraries that have minimal dependencies
* Remove unused `cpp/src/arrow/util/benchmark_main.cc`

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #39823
* GitHub Issue: #39823

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/CMakeLists.txt                            | 284 +-----
 cpp/cmake_modules/ThirdpartyToolchain.cmake   |  56 +-
 cpp/examples/arrow/CMakeLists.txt             |  21 +-
 .../stream_reader_writer.cc                   |   1 +
 cpp/src/arrow/CMakeLists.txt                  | 850 ++++++++++++------
 cpp/src/arrow/acero/CMakeLists.txt            |  51 +-
 cpp/src/arrow/adapters/orc/CMakeLists.txt     |   1 +
 cpp/src/arrow/compute/CMakeLists.txt          |   4 +-
 cpp/src/arrow/compute/kernels/CMakeLists.txt  |  11 +-
 .../kernels/scalar_string_benchmark.cc        |   1 +
 cpp/src/arrow/dataset/CMakeLists.txt          |  28 +-
 cpp/src/arrow/dataset/file_csv_test.cc        |   1 +
 cpp/src/arrow/filesystem/CMakeLists.txt       |   4 +
 cpp/src/arrow/filesystem/s3fs_test.cc         |  22 +-
 cpp/src/arrow/flight/CMakeLists.txt           |   9 +-
 cpp/src/arrow/integration/CMakeLists.txt      |   9 +-
 cpp/src/arrow/io/CMakeLists.txt               |   1 +
 cpp/src/arrow/ipc/CMakeLists.txt              |  13 +-
 cpp/src/arrow/ipc/read_write_benchmark.cc     |   1 +
 cpp/src/arrow/json/CMakeLists.txt             |  10 +-
 cpp/src/arrow/util/CMakeLists.txt             |  24 +-
 cpp/src/arrow/util/benchmark_main.cc          |  24 -
 cpp/src/gandiva/CMakeLists.txt                |  36 +-
 cpp/src/gandiva/precompiled/CMakeLists.txt    |   3 +-
 cpp/src/parquet/CMakeLists.txt                |  49 +-
 cpp/src/skyhook/CMakeLists.txt                |   7 +-
 26 files changed, 814 insertions(+), 707 deletions(-)
 delete mode 100644 cpp/src/arrow/util/benchmark_main.cc

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 164f4182d9602..b6d9ad5a5990e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -683,305 +683,35 @@ if(${INFER_FOUND})
 endif()
 
 #
-# Linker and Dependencies
+# Link targets
 #
 
-# Libraries to link statically with libarrow.so.
-#
-# Note that arrow::hadoop is a header only target that refers
-# cpp/thirdparty/hadoop/include/. See
-# cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-set(ARROW_SHARED_LINK_LIBS arrow::flatbuffers arrow::hadoop)
-set(ARROW_SHARED_INSTALL_INTERFACE_LIBS)
-set(ARROW_STATIC_LINK_LIBS arrow::flatbuffers arrow::hadoop)
-set(ARROW_STATIC_INSTALL_INTERFACE_LIBS)
-
-# We must use google-cloud-cpp::storage first. If
-# google-cloud-cpp::storage depends on bundled Abseil, bundled Abseil
-# and system Abseil may be mixed.
-#
-# For example, if Boost::headers is used before
-# google-cloud-cpp::storage AND Boost::headers has
-# -I/opt/homebrew/include AND /opt/homebrew/include/absl/ exists,
-# /opt/homebrew/include/absl/**/*.h are used instead of .h provided by
-# bundled Abseil.
-if(ARROW_GCS)
-  list(APPEND ARROW_SHARED_LINK_LIBS google-cloud-cpp::storage)
-  list(APPEND ARROW_STATIC_LINK_LIBS google-cloud-cpp::storage)
-  if(google_cloud_cpp_storage_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS google-cloud-cpp::storage)
-  endif()
-endif()
-
-if(ARROW_USE_BOOST)
-  list(APPEND ARROW_SHARED_LINK_LIBS Boost::headers)
-  list(APPEND ARROW_STATIC_LINK_LIBS Boost::headers)
-endif()
-
-if(ARROW_USE_OPENSSL)
-  set(ARROW_OPENSSL_LIBS OpenSSL::Crypto OpenSSL::SSL)
-  list(APPEND ARROW_SHARED_LINK_LIBS ${ARROW_OPENSSL_LIBS})
-  list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_OPENSSL_LIBS})
-  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_OPENSSL_LIBS})
-endif()
-
-if(ARROW_WITH_BROTLI)
-  # Order is important for static linking
-  set(ARROW_BROTLI_LIBS Brotli::brotlienc Brotli::brotlidec Brotli::brotlicommon)
-  list(APPEND ARROW_SHARED_LINK_LIBS ${ARROW_BROTLI_LIBS})
-  list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_BROTLI_LIBS})
-  if(Brotli_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_BROTLI_LIBS})
-  endif()
-endif()
-
-if(ARROW_WITH_BZ2)
-  list(APPEND ARROW_STATIC_LINK_LIBS BZip2::BZip2)
-  if(BZip2_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS BZip2::BZip2)
-  endif()
-endif()
-
-if(ARROW_WITH_LZ4)
-  list(APPEND ARROW_STATIC_LINK_LIBS LZ4::lz4)
-  if(lz4_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4)
-  endif()
-endif()
-
-if(ARROW_WITH_SNAPPY)
-  list(APPEND ARROW_STATIC_LINK_LIBS ${Snappy_TARGET})
-  if(Snappy_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${Snappy_TARGET})
-  endif()
-endif()
-
-if(ARROW_WITH_ZLIB)
-  list(APPEND ARROW_STATIC_LINK_LIBS ZLIB::ZLIB)
-  if(ZLIB_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ZLIB::ZLIB)
-  endif()
-endif()
-
-if(ARROW_WITH_ZSTD)
-  list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_ZSTD_LIBZSTD})
-  if(zstd_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_ZSTD_LIBZSTD})
-  endif()
-endif()
-
-if(ARROW_ORC)
-  list(APPEND ARROW_SHARED_LINK_LIBS orc::orc ${ARROW_PROTOBUF_LIBPROTOBUF})
-  list(APPEND ARROW_STATIC_LINK_LIBS orc::orc ${ARROW_PROTOBUF_LIBPROTOBUF})
-  if(ORC_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::orc)
-  endif()
-endif()
-
-if(ARROW_USE_GLOG)
-  list(APPEND ARROW_SHARED_LINK_LIBS glog::glog)
-  list(APPEND ARROW_STATIC_LINK_LIBS glog::glog)
-  if(GLOG_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS glog::glog)
-  endif()
-endif()
-
-if(ARROW_S3)
-  list(APPEND ARROW_SHARED_LINK_LIBS ${AWSSDK_LINK_LIBRARIES})
-  list(APPEND ARROW_STATIC_LINK_LIBS ${AWSSDK_LINK_LIBRARIES})
-  if(AWSSDK_SOURCE STREQUAL "SYSTEM")
-    list(APPEND
-         ARROW_STATIC_INSTALL_INTERFACE_LIBS
-         aws-cpp-sdk-identity-management
-         aws-cpp-sdk-sts
-         aws-cpp-sdk-cognito-identity
-         aws-cpp-sdk-s3
-         aws-cpp-sdk-core)
-  elseif(AWSSDK_SOURCE STREQUAL "BUNDLED")
-    if(UNIX)
-      list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl)
-    endif()
-  endif()
-endif()
-
-if(ARROW_WITH_OPENTELEMETRY)
-  list(APPEND
-       ARROW_SHARED_LINK_LIBS
-       opentelemetry-cpp::trace
-       opentelemetry-cpp::ostream_span_exporter
-       opentelemetry-cpp::otlp_http_exporter)
-  list(APPEND
-       ARROW_STATIC_LINK_LIBS
-       opentelemetry-cpp::trace
-       opentelemetry-cpp::ostream_span_exporter
-       opentelemetry-cpp::otlp_http_exporter)
-  if(opentelemetry_SOURCE STREQUAL "SYSTEM")
-    list(APPEND
-         ARROW_STATIC_INSTALL_INTERFACE_LIBS
-         opentelemetry-cpp::trace
-         opentelemetry-cpp::ostream_span_exporter
-         opentelemetry-cpp::otlp_http_exporter)
-  endif()
-  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl)
-endif()
-
-if(ARROW_WITH_AZURE_SDK)
-  list(APPEND ARROW_SHARED_LINK_LIBS ${AZURE_SDK_LINK_LIBRARIES})
-  list(APPEND ARROW_STATIC_LINK_LIBS ${AZURE_SDK_LINK_LIBRARIES})
-endif()
-
-if(ARROW_WITH_UTF8PROC)
-  list(APPEND ARROW_SHARED_LINK_LIBS utf8proc::utf8proc)
-  list(APPEND ARROW_STATIC_LINK_LIBS utf8proc::utf8proc)
-  if(utf8proc_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS utf8proc::utf8proc)
-  endif()
-endif()
-
-if(ARROW_WITH_RE2)
-  list(APPEND ARROW_SHARED_LINK_LIBS re2::re2)
-  list(APPEND ARROW_STATIC_LINK_LIBS re2::re2)
-  if(re2_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS re2::re2)
-  endif()
-endif()
-
-if(ARROW_WITH_RAPIDJSON)
-  list(APPEND ARROW_SHARED_LINK_LIBS RapidJSON)
-  list(APPEND ARROW_STATIC_LINK_LIBS RapidJSON)
-endif()
-
-if(ARROW_USE_XSIMD)
-  list(APPEND ARROW_SHARED_LINK_LIBS ${ARROW_XSIMD})
-  list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_XSIMD})
-endif()
-
-# This should be done after if(ARROW_ORC) and if(ARROW_WITH_OPENTELEMETRY)
-# because they depend on Protobuf.
-if(ARROW_WITH_PROTOBUF)
-  if(Protobuf_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF})
-  endif()
-endif()
-
-add_custom_target(arrow_dependencies)
-add_custom_target(arrow_benchmark_dependencies)
-add_custom_target(arrow_test_dependencies)
-
-# ARROW-4581: CMake can be finicky about invoking the ExternalProject builds
-# for some of the library dependencies, so we "nuke it from orbit" by making
-# the toolchain dependency explicit using these "dependencies" targets
-add_dependencies(arrow_dependencies toolchain)
-add_dependencies(arrow_test_dependencies toolchain-tests)
-
-if(ARROW_STATIC_LINK_LIBS)
-  add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS})
-  if(ARROW_HDFS OR ARROW_ORC)
-    if(NOT MSVC_TOOLCHAIN)
-      list(APPEND ARROW_STATIC_LINK_LIBS ${CMAKE_DL_LIBS})
-      list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS})
-    endif()
-  endif()
-endif()
-
-set(ARROW_SHARED_PRIVATE_LINK_LIBS ${ARROW_STATIC_LINK_LIBS})
-
-if(NOT MSVC_TOOLCHAIN)
-  list(APPEND ARROW_SHARED_LINK_LIBS ${CMAKE_DL_LIBS})
-endif()
-
-set(ARROW_TEST_LINK_TOOLCHAIN arrow::flatbuffers ${ARROW_GTEST_GMOCK}
-                              ${ARROW_GTEST_GTEST_MAIN})
-
-if(ARROW_BUILD_TESTS)
-  add_dependencies(arrow_test_dependencies ${ARROW_TEST_LINK_TOOLCHAIN})
-endif()
-
-if(ARROW_BUILD_BENCHMARKS)
-  # Some benchmarks use gtest
-  add_dependencies(arrow_benchmark_dependencies arrow_test_dependencies
-                   toolchain-benchmarks)
-endif()
-
-set(ARROW_TEST_STATIC_LINK_LIBS arrow_testing_static arrow_static
-                                ${ARROW_SHARED_LINK_LIBS} ${ARROW_TEST_LINK_TOOLCHAIN})
-
-set(ARROW_TEST_SHARED_LINK_LIBS arrow_testing_shared arrow_shared
-                                ${ARROW_SHARED_LINK_LIBS} ${ARROW_TEST_LINK_TOOLCHAIN})
-
-if(NOT MSVC)
-  list(APPEND ARROW_TEST_SHARED_LINK_LIBS ${CMAKE_DL_LIBS})
-endif()
-
 if("${ARROW_TEST_LINKAGE}" STREQUAL "shared")
   if(ARROW_BUILD_TESTS AND NOT ARROW_BUILD_SHARED)
     message(FATAL_ERROR "If using shared linkage for unit tests, must also \
 pass ARROW_BUILD_SHARED=on")
   endif()
   # Use shared linking for unit tests if it's available
-  set(ARROW_TEST_LINK_LIBS ${ARROW_TEST_SHARED_LINK_LIBS})
+  set(ARROW_TEST_LINK_LIBS arrow_testing_shared ${ARROW_GTEST_GMOCK}
+                           ${ARROW_GTEST_GTEST_MAIN})
   set(ARROW_EXAMPLE_LINK_LIBS arrow_shared)
 else()
   if(ARROW_BUILD_TESTS AND NOT ARROW_BUILD_STATIC)
     message(FATAL_ERROR "If using static linkage for unit tests, must also \
 pass ARROW_BUILD_STATIC=on")
   endif()
-  set(ARROW_TEST_LINK_LIBS ${ARROW_TEST_STATIC_LINK_LIBS})
+  set(ARROW_TEST_LINK_LIBS arrow_testing_static ${ARROW_GTEST_GMOCK}
+                           ${ARROW_GTEST_GTEST_MAIN})
   set(ARROW_EXAMPLE_LINK_LIBS arrow_static)
 endif()
 
 if(ARROW_BUILD_BENCHMARKS)
-  # In the case that benchmark::benchmark_main is not available,
-  # we need to provide our own version. This only happens for older versions
-  # of benchmark.
-  if(NOT TARGET benchmark::benchmark_main)
-    add_library(arrow_benchmark_main STATIC src/arrow/util/benchmark_main.cc)
-    add_library(benchmark::benchmark_main ALIAS arrow_benchmark_main)
-  endif()
-
-  set(ARROW_BENCHMARK_LINK_LIBS benchmark::benchmark_main benchmark::benchmark
-                                ${ARROW_TEST_LINK_LIBS})
+  set(ARROW_BENCHMARK_LINK_LIBS benchmark::benchmark_main ${ARROW_TEST_LINK_LIBS})
   if(WIN32)
-    set(ARROW_BENCHMARK_LINK_LIBS Shlwapi.dll ${ARROW_BENCHMARK_LINK_LIBS})
+    list(APPEND ARROW_BENCHMARK_LINK_LIBS Shlwapi.dll)
   endif()
 endif()
 
-if(ARROW_JEMALLOC)
-  list(APPEND ARROW_SHARED_LINK_LIBS jemalloc::jemalloc)
-  list(APPEND ARROW_STATIC_LINK_LIBS jemalloc::jemalloc)
-endif()
-
-if(ARROW_MIMALLOC)
-  list(APPEND ARROW_SHARED_LINK_LIBS mimalloc::mimalloc)
-  list(APPEND ARROW_STATIC_LINK_LIBS mimalloc::mimalloc)
-endif()
-
-# ----------------------------------------------------------------------
-# Handle platform-related libraries like -pthread
-
-set(ARROW_SYSTEM_LINK_LIBS)
-
-if(ARROW_ENABLE_THREADING)
-  list(APPEND ARROW_SYSTEM_LINK_LIBS Threads::Threads)
-endif()
-if(CMAKE_THREAD_LIBS_INIT)
-  string(APPEND ARROW_PC_LIBS_PRIVATE " ${CMAKE_THREAD_LIBS_INIT}")
-endif()
-
-if(WIN32)
-  # Winsock
-  list(APPEND ARROW_SYSTEM_LINK_LIBS "ws2_32.dll")
-endif()
-
-if(NOT WIN32 AND NOT APPLE)
-  # Pass -lrt on Linux only
-  list(APPEND ARROW_SYSTEM_LINK_LIBS rt)
-endif()
-
-list(APPEND ARROW_SHARED_LINK_LIBS ${ARROW_SYSTEM_LINK_LIBS})
-list(APPEND ARROW_STATIC_LINK_LIBS ${ARROW_SYSTEM_LINK_LIBS})
-list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_SYSTEM_LINK_LIBS})
-
 #
 # Subdirectories
 #
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 951028b6994ad..454d94d64bf8a 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -18,10 +18,6 @@
 include(ProcessorCount)
 processorcount(NPROC)
 
-add_custom_target(toolchain)
-add_custom_target(toolchain-benchmarks)
-add_custom_target(toolchain-tests)
-
 # Accumulate all bundled targets and we will splice them together later as
 # libarrow_bundled_dependencies.a so that third party libraries have something
 # usable to create statically-linked builds with some BUNDLED dependencies,
@@ -1149,7 +1145,6 @@ macro(build_boost)
   if(NOT TARGET Boost::dynamic_linking)
     # This doesn't add BOOST_ALL_DYN_LINK because bundled Boost is a static library.
     add_library(Boost::dynamic_linking INTERFACE IMPORTED)
-    add_dependencies(toolchain boost_ep)
   endif()
   set(BOOST_VENDORED TRUE)
 endmacro()
@@ -1363,7 +1358,6 @@ macro(build_snappy)
   set_target_properties(${Snappy_TARGET} PROPERTIES IMPORTED_LOCATION
                                                     "${SNAPPY_STATIC_LIB}")
   target_include_directories(${Snappy_TARGET} BEFORE INTERFACE "${SNAPPY_PREFIX}/include")
-  add_dependencies(toolchain snappy_ep)
   add_dependencies(${Snappy_TARGET} snappy_ep)
 
   list(APPEND ARROW_BUNDLED_STATIC_LIBS ${Snappy_TARGET})
@@ -1414,7 +1408,6 @@ macro(build_brotli)
                       CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
                       STEP_TARGETS headers_copy)
 
-  add_dependencies(toolchain brotli_ep)
   file(MAKE_DIRECTORY "${BROTLI_INCLUDE_DIR}")
 
   add_library(Brotli::brotlicommon STATIC IMPORTED)
@@ -1450,6 +1443,8 @@ if(ARROW_WITH_BROTLI)
                      PC_PACKAGE_NAMES
                      libbrotlidec
                      libbrotlienc)
+  # Order is important for static linking
+  set(ARROW_BROTLI_LIBS Brotli::brotlienc Brotli::brotlidec Brotli::brotlicommon)
 endif()
 
 if(PARQUET_REQUIRE_ENCRYPTION AND NOT ARROW_PARQUET)
@@ -1470,6 +1465,7 @@ if(PARQUET_REQUIRE_ENCRYPTION
                      REQUIRED_VERSION
                      ${ARROW_OPENSSL_REQUIRED_VERSION})
   set(ARROW_USE_OPENSSL ON)
+  set(ARROW_OPENSSL_LIBS OpenSSL::Crypto OpenSSL::SSL)
 endif()
 
 if(ARROW_USE_OPENSSL)
@@ -1521,7 +1517,6 @@ macro(build_glog)
                       BUILD_BYPRODUCTS "${GLOG_STATIC_LIB}"
                       CMAKE_ARGS ${GLOG_CMAKE_ARGS})
 
-  add_dependencies(toolchain glog_ep)
   file(MAKE_DIRECTORY "${GLOG_INCLUDE_DIR}")
 
   add_library(glog::glog STATIC IMPORTED)
@@ -1580,8 +1575,6 @@ macro(build_gflags)
                       BUILD_BYPRODUCTS "${GFLAGS_STATIC_LIB}"
                       CMAKE_ARGS ${GFLAGS_CMAKE_ARGS})
 
-  add_dependencies(toolchain gflags_ep)
-
   add_thirdparty_lib(gflags::gflags_static STATIC ${GFLAGS_STATIC_LIB})
   add_dependencies(gflags::gflags_static gflags_ep)
   set(GFLAGS_LIBRARY gflags::gflags_static)
@@ -1693,7 +1686,6 @@ macro(build_thrift)
   if(ARROW_USE_BOOST)
     target_link_libraries(thrift::thrift INTERFACE Boost::headers)
   endif()
-  add_dependencies(toolchain thrift_ep)
   add_dependencies(thrift::thrift thrift_ep)
   set(Thrift_VERSION ${ARROW_THRIFT_BUILD_VERSION})
   set(THRIFT_VENDORED TRUE)
@@ -1790,7 +1782,6 @@ macro(build_protobuf)
   set_target_properties(arrow::protobuf::protoc PROPERTIES IMPORTED_LOCATION
                                                            "${PROTOBUF_COMPILER}")
 
-  add_dependencies(toolchain protobuf_ep)
   add_dependencies(arrow::protobuf::libprotobuf protobuf_ep)
   add_dependencies(arrow::protobuf::protoc protobuf_ep)
 
@@ -2132,7 +2123,6 @@ if(ARROW_MIMALLOC)
     target_link_libraries(mimalloc::mimalloc INTERFACE "bcrypt.lib" "psapi.lib")
   endif()
   add_dependencies(mimalloc::mimalloc mimalloc_ep)
-  add_dependencies(toolchain mimalloc_ep)
 
   list(APPEND ARROW_BUNDLED_STATIC_LIBS mimalloc::mimalloc)
 
@@ -2290,8 +2280,8 @@ macro(build_benchmark)
                         PROPERTIES IMPORTED_LOCATION "${GBENCHMARK_MAIN_STATIC_LIB}")
   target_include_directories(benchmark::benchmark_main BEFORE
                              INTERFACE "${GBENCHMARK_INCLUDE_DIR}")
+  target_link_libraries(benchmark::benchmark_main INTERFACE benchmark::benchmark)
 
-  add_dependencies(toolchain-benchmarks gbenchmark_ep)
   add_dependencies(benchmark::benchmark gbenchmark_ep)
   add_dependencies(benchmark::benchmark_main gbenchmark_ep)
 endmacro()
@@ -2361,8 +2351,9 @@ macro(build_xsimd)
   # The include directory must exist before it is referenced by a target.
   file(MAKE_DIRECTORY "${XSIMD_INCLUDE_DIR}")
 
-  add_dependencies(toolchain xsimd_ep)
-  add_dependencies(toolchain-tests xsimd_ep)
+  add_library(arrow::xsimd INTERFACE IMPORTED)
+  target_include_directories(arrow::xsimd INTERFACE "${XSIMD_INCLUDE_DIR}")
+  add_dependencies(arrow::xsimd xsimd_ep)
 
   set(XSIMD_VENDORED TRUE)
 endmacro()
@@ -2384,8 +2375,6 @@ if(ARROW_USE_XSIMD)
                      "8.1.0")
 
   if(xsimd_SOURCE STREQUAL "BUNDLED")
-    add_library(arrow::xsimd INTERFACE IMPORTED)
-    target_include_directories(arrow::xsimd INTERFACE "${XSIMD_INCLUDE_DIR}")
     set(ARROW_XSIMD arrow::xsimd)
   else()
     message(STATUS "xsimd found. Headers: ${xsimd_INCLUDE_DIRS}")
@@ -2423,7 +2412,6 @@ macro(build_zlib)
   set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES})
   target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}")
 
-  add_dependencies(toolchain zlib_ep)
   add_dependencies(ZLIB::ZLIB zlib_ep)
 
   list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB)
@@ -2459,7 +2447,6 @@ macro(build_lz4)
   add_library(LZ4::lz4 STATIC IMPORTED)
   set_target_properties(LZ4::lz4 PROPERTIES IMPORTED_LOCATION "${LZ4_STATIC_LIB}")
   target_include_directories(LZ4::lz4 BEFORE INTERFACE "${LZ4_PREFIX}/include")
-  add_dependencies(toolchain lz4_ep)
   add_dependencies(LZ4::lz4 lz4_ep)
 
   list(APPEND ARROW_BUNDLED_STATIC_LIBS LZ4::lz4)
@@ -2512,7 +2499,6 @@ macro(build_zstd)
   target_include_directories(zstd::libzstd_static BEFORE
                              INTERFACE "${ZSTD_PREFIX}/include")
 
-  add_dependencies(toolchain zstd_ep)
   add_dependencies(zstd::libzstd_static zstd_ep)
 
   list(APPEND ARROW_BUNDLED_STATIC_LIBS zstd::libzstd_static)
@@ -2569,7 +2555,6 @@ macro(build_re2)
   set_target_properties(re2::re2 PROPERTIES IMPORTED_LOCATION "${RE2_STATIC_LIB}")
   target_include_directories(re2::re2 BEFORE INTERFACE "${RE2_PREFIX}/include")
 
-  add_dependencies(toolchain re2_ep)
   add_dependencies(re2::re2 re2_ep)
   set(RE2_VENDORED TRUE)
   # Set values so that FindRE2 finds this too
@@ -2627,7 +2612,6 @@ macro(build_bzip2)
   target_include_directories(BZip2::BZip2 BEFORE INTERFACE "${BZIP2_PREFIX}/include")
   set(BZIP2_INCLUDE_DIR "${BZIP2_PREFIX}/include")
 
-  add_dependencies(toolchain bzip2_ep)
   add_dependencies(BZip2::BZip2 bzip2_ep)
 
   list(APPEND ARROW_BUNDLED_STATIC_LIBS BZip2::BZip2)
@@ -2680,7 +2664,6 @@ macro(build_utf8proc)
   target_include_directories(utf8proc::utf8proc BEFORE
                              INTERFACE "${UTF8PROC_PREFIX}/include")
 
-  add_dependencies(toolchain utf8proc_ep)
   add_dependencies(utf8proc::utf8proc utf8proc_ep)
 
   list(APPEND ARROW_BUNDLED_STATIC_LIBS utf8proc::utf8proc)
@@ -2717,7 +2700,6 @@ macro(build_cares)
 
   file(MAKE_DIRECTORY ${CARES_INCLUDE_DIR})
 
-  add_dependencies(toolchain cares_ep)
   add_library(c-ares::cares STATIC IMPORTED)
   set_target_properties(c-ares::cares PROPERTIES IMPORTED_LOCATION "${CARES_STATIC_LIB}")
   target_include_directories(c-ares::cares BEFORE INTERFACE "${CARES_INCLUDE_DIR}")
@@ -3962,7 +3944,6 @@ macro(build_grpc)
                                                          ${GRPC_CPP_PLUGIN})
 
   add_dependencies(grpc_ep grpc_dependencies)
-  add_dependencies(toolchain grpc_ep)
   add_dependencies(gRPC::grpc++ grpc_ep)
   add_dependencies(gRPC::grpc_cpp_plugin grpc_ep)
   set(GRPC_VENDORED TRUE)
@@ -4212,8 +4193,6 @@ macro(build_google_cloud_cpp_storage)
   # Work around https://gitlab.kitware.com/cmake/cmake/issues/15052
   file(MAKE_DIRECTORY ${GOOGLE_CLOUD_CPP_INCLUDE_DIR})
 
-  add_dependencies(toolchain google_cloud_cpp_ep)
-
   add_library(google-cloud-cpp::common STATIC IMPORTED)
   set_target_properties(google-cloud-cpp::common
                         PROPERTIES IMPORTED_LOCATION
@@ -4434,7 +4413,6 @@ macro(build_orc)
     target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS})
   endif()
 
-  add_dependencies(toolchain orc_ep)
   add_dependencies(orc::orc orc_ep)
 
   list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc)
@@ -4442,6 +4420,7 @@ endmacro()
 
 if(ARROW_ORC)
   resolve_dependency(orc HAVE_ALT TRUE)
+  target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF})
   message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}")
   message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}")
 endif()
@@ -4601,9 +4580,6 @@ macro(build_opentelemetry)
                            DEPENDEES download
                            DEPENDERS configure)
 
-  add_dependencies(toolchain opentelemetry_ep)
-  add_dependencies(toolchain-tests opentelemetry_ep)
-
   set(OPENTELEMETRY_VENDORED 1)
 
   target_link_libraries(opentelemetry-cpp::common
@@ -4645,6 +4621,9 @@ if(ARROW_WITH_OPENTELEMETRY)
   find_curl()
   set(opentelemetry-cpp_SOURCE "AUTO")
   resolve_dependency(opentelemetry-cpp)
+  set(ARROW_OPENTELEMETRY_LIBS
+      opentelemetry-cpp::trace opentelemetry-cpp::ostream_span_exporter
+      opentelemetry-cpp::otlp_http_exporter)
   get_target_property(OPENTELEMETRY_INCLUDE_DIR opentelemetry-cpp::api
                       INTERFACE_INCLUDE_DIRECTORIES)
   message(STATUS "Found OpenTelemetry headers: ${OPENTELEMETRY_INCLUDE_DIR}")
@@ -4980,7 +4959,6 @@ macro(build_awssdk)
                                        ${AWS_CPP_SDK_S3_STATIC_LIBRARY}
                                        ${AWS_CPP_SDK_STS_STATIC_LIBRARY}
                       DEPENDS aws_crt_cpp_ep)
-  add_dependencies(toolchain awssdk_ep)
   foreach(_AWSSDK_LIB ${_AWSSDK_LIBS})
     if(${_AWSSDK_LIB} MATCHES "^aws-cpp-sdk-")
       add_dependencies(${_AWSSDK_LIB} awssdk_ep)
@@ -5009,6 +4987,9 @@ macro(build_awssdk)
                    PROPERTY INTERFACE_LINK_LIBRARIES ZLIB::ZLIB)
       add_dependencies(awssdk_ep zlib_ep)
     endif()
+    set_property(TARGET AWS::aws-c-io
+                 APPEND
+                 PROPERTY INTERFACE_LINK_LIBRARIES ${CMAKE_DL_LIBS})
   elseif(WIN32)
     set_property(TARGET aws-cpp-sdk-core
                  APPEND
@@ -5112,12 +5093,8 @@ endfunction()
 
 if(ARROW_WITH_AZURE_SDK)
   resolve_dependency(Azure REQUIRED_VERSION 1.10.2)
-  set(AZURE_SDK_LINK_LIBRARIES
-      Azure::azure-storage-files-datalake
-      Azure::azure-storage-common
-      Azure::azure-storage-blobs
-      Azure::azure-identity
-      Azure::azure-core)
+  set(AZURE_SDK_LINK_LIBRARIES Azure::azure-storage-files-datalake
+                               Azure::azure-storage-blobs Azure::azure-identity)
 endif()
 # ----------------------------------------------------------------------
 # ucx - communication framework for modern, high-bandwidth and low-latency networks
@@ -5182,7 +5159,6 @@ macro(build_ucx)
   add_library(ucx::ucs SHARED IMPORTED)
   set_target_properties(ucx::ucs PROPERTIES IMPORTED_LOCATION "${UCX_SHARED_LIB_UCS}")
 
-  add_dependencies(toolchain ucx_ep)
   add_dependencies(ucx::ucp ucx_ep)
   add_dependencies(ucx::uct ucx_ep)
   add_dependencies(ucx::ucs ucx_ep)
diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt
index 4625f130565e7..a092a31733f72 100644
--- a/cpp/examples/arrow/CMakeLists.txt
+++ b/cpp/examples/arrow/CMakeLists.txt
@@ -17,7 +17,9 @@
 
 add_arrow_example(row_wise_conversion_example)
 
-add_arrow_example(rapidjson_row_converter)
+if(ARROW_WITH_RAPIDJSON)
+  add_arrow_example(rapidjson_row_converter EXTRA_LINK_LIBS RapidJSON)
+endif()
 
 if(ARROW_ACERO)
   if(ARROW_BUILD_SHARED)
@@ -93,6 +95,17 @@ if(ARROW_FLIGHT)
   add_custom_target(flight_grpc_example_gen ALL
                     DEPENDS ${FLIGHT_EXAMPLE_GENERATED_PROTO_FILES})
 
+  set(FLIGHT_GRPC_EXAMPLE_LINK_LIBS
+      ${FLIGHT_EXAMPLES_LINK_LIBS}
+      gRPC::grpc++
+      ${GRPC_REFLECTION_LINK_LIBS}
+      ${ARROW_PROTOBUF_LIBPROTOBUF}
+      ${GFLAGS_LIBRARIES})
+  if(TARGET absl::log_internal_check_op)
+    # Protobuf generated files may use ABSL_DCHECK*() and
+    # absl::log_internal_check_op is needed for them.
+    list(APPEND FLIGHT_GRPC_EXAMPLE_LINK_LIBS absl::log_internal_check_op)
+  endif()
   add_arrow_example(flight_grpc_example
                     DEPENDENCIES
                     flight_grpc_example_gen
@@ -102,11 +115,7 @@ if(ARROW_FLIGHT)
                     EXTRA_INCLUDES
                     ${CMAKE_BINARY_DIR}
                     EXTRA_LINK_LIBS
-                    ${FLIGHT_EXAMPLES_LINK_LIBS}
-                    gRPC::grpc++
-                    ${GRPC_REFLECTION_LINK_LIBS}
-                    ${ARROW_PROTOBUF_LIBPROTOBUF}
-                    ${GFLAGS_LIBRARIES}
+                    ${FLIGHT_GRPC_EXAMPLE_LINK_LIBS}
                     EXTRA_SOURCES
                     "${CMAKE_CURRENT_BINARY_DIR}/helloworld.pb.cc"
                     "${CMAKE_CURRENT_BINARY_DIR}/helloworld.grpc.pb.cc")
diff --git a/cpp/examples/parquet/parquet_stream_api/stream_reader_writer.cc b/cpp/examples/parquet/parquet_stream_api/stream_reader_writer.cc
index 1f7246b78160c..7189aa853917c 100644
--- a/cpp/examples/parquet/parquet_stream_api/stream_reader_writer.cc
+++ b/cpp/examples/parquet/parquet_stream_api/stream_reader_writer.cc
@@ -25,6 +25,7 @@
 #include <utility>
 
 #include "arrow/io/file.h"
+#include "arrow/util/config.h"
 #include "parquet/exception.h"
 #include "parquet/stream_reader.h"
 #include "parquet/stream_writer.h"
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index c160179ceff0b..23a3691566f69 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -26,6 +26,214 @@ add_dependencies(arrow-all
                  arrow-benchmarks
                  arrow-integration)
 
+# Libraries to link with libarrow.so. They aren't exported.
+set(ARROW_SHARED_PRIVATE_LINK_LIBS)
+
+# Libraries to link with exported libarrow.{so,a}.
+set(ARROW_SHARED_INSTALL_INTERFACE_LIBS)
+set(ARROW_STATIC_INSTALL_INTERFACE_LIBS)
+
+if(ARROW_GCS)
+  if(google_cloud_cpp_storage_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS google-cloud-cpp::storage)
+  endif()
+endif()
+
+if(ARROW_USE_OPENSSL)
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_OPENSSL_LIBS})
+endif()
+
+if(ARROW_WITH_BROTLI)
+  if(Brotli_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_BROTLI_LIBS})
+  endif()
+endif()
+
+if(ARROW_WITH_BZ2)
+  if(BZip2_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS BZip2::BZip2)
+  endif()
+endif()
+
+if(ARROW_WITH_LZ4)
+  if(lz4_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS LZ4::lz4)
+  endif()
+endif()
+
+if(ARROW_WITH_SNAPPY)
+  if(Snappy_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${Snappy_TARGET})
+  endif()
+endif()
+
+if(ARROW_WITH_ZLIB)
+  if(ZLIB_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ZLIB::ZLIB)
+  endif()
+endif()
+
+if(ARROW_WITH_ZSTD)
+  if(zstd_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_ZSTD_LIBZSTD})
+  endif()
+endif()
+
+if(ARROW_ORC)
+  if(ORC_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS orc::orc)
+  endif()
+endif()
+
+if(ARROW_USE_GLOG)
+  if(GLOG_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS glog::glog)
+  endif()
+endif()
+
+if(ARROW_S3)
+  if(AWSSDK_SOURCE STREQUAL "SYSTEM")
+    list(APPEND
+         ARROW_STATIC_INSTALL_INTERFACE_LIBS
+         aws-cpp-sdk-identity-management
+         aws-cpp-sdk-sts
+         aws-cpp-sdk-cognito-identity
+         aws-cpp-sdk-s3
+         aws-cpp-sdk-core)
+  elseif(AWSSDK_SOURCE STREQUAL "BUNDLED")
+    if(UNIX)
+      list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl)
+    endif()
+  endif()
+endif()
+
+if(ARROW_WITH_OPENTELEMETRY)
+  if(opentelemetry_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_OPENTELEMETRY_LIBS})
+  endif()
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl)
+endif()
+
+if(ARROW_WITH_UTF8PROC)
+  if(utf8proc_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS utf8proc::utf8proc)
+  endif()
+endif()
+
+if(ARROW_WITH_RE2)
+  if(re2_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS re2::re2)
+  endif()
+endif()
+
+# This should be done after if(ARROW_ORC) and if(ARROW_WITH_OPENTELEMETRY)
+# because they depend on Protobuf.
+if(ARROW_WITH_PROTOBUF)
+  if(Protobuf_SOURCE STREQUAL "SYSTEM")
+    list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF})
+  endif()
+endif()
+
+if(ARROW_ENABLE_THREADING)
+  list(APPEND ARROW_SHARED_PRIVATE_LINK_LIBS Threads::Threads)
+  list(APPEND ARROW_STATIC_LINK_LIBS Threads::Threads)
+endif()
+
+if(NOT MSVC_TOOLCHAIN)
+  list(APPEND ARROW_SHARED_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS})
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS})
+endif()
+
+set(ARROW_TEST_LINK_TOOLCHAIN arrow::flatbuffers ${ARROW_GTEST_GMOCK}
+                              ${ARROW_GTEST_GTEST_MAIN})
+set(ARROW_TEST_STATIC_LINK_LIBS arrow_testing_static arrow_static
+                                ${ARROW_TEST_LINK_TOOLCHAIN})
+set(ARROW_TEST_SHARED_LINK_LIBS arrow_testing_shared arrow_shared
+                                ${ARROW_TEST_LINK_TOOLCHAIN})
+if(NOT MSVC)
+  list(APPEND ARROW_TEST_SHARED_LINK_LIBS ${CMAKE_DL_LIBS})
+endif()
+
+# ----------------------------------------------------------------------
+# Handle platform-related libraries like -pthread
+
+set(ARROW_SYSTEM_LINK_LIBS)
+
+if(CMAKE_THREAD_LIBS_INIT)
+  string(APPEND ARROW_PC_LIBS_PRIVATE " ${CMAKE_THREAD_LIBS_INIT}")
+endif()
+
+if(WIN32)
+  list(APPEND ARROW_SYSTEM_LINK_LIBS "ws2_32.dll")
+endif()
+
+if(NOT WIN32 AND NOT APPLE)
+  # Pass -lrt on Linux only
+  list(APPEND ARROW_SYSTEM_LINK_LIBS rt)
+endif()
+
+list(APPEND ARROW_SHARED_INSTALL_INTERFACE_LIBS ${ARROW_SYSTEM_LINK_LIBS})
+list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_SYSTEM_LINK_LIBS})
+
+# Need -latomic on Raspbian.
+# See also: https://issues.apache.org/jira/browse/ARROW-12860
+if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" AND ${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
+  string(APPEND ARROW_PC_LIBS_PRIVATE " -latomic")
+  list(APPEND ARROW_SHARED_INSTALL_INTERFACE_LIBS "atomic")
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS "atomic")
+endif()
+
+# This creates OBJECT libraries for arrow_shared/arrow_static. This is
+# not intended to use for other libraries such as
+# arrow_acero_shared/arrow_acero_static for now.
+#
+# arrow_shared/arrow_static depends on many external libraries such as
+# Zstandard and jemalloc. If we use bundled libraries, we can't start
+# building arrow_shared/arrow_static until all bundled libraries are
+# built. It prevent parallel build speedup.
+#
+# We can avoid the situation by creating small OBJECT libraries that
+# depend only needed external libraries. If an OBJECT library doesn't
+# depend on any bundled libraries, it can be built before bundled
+# libraries are built. If an OBJECT library depend on only a few
+# bundled libraries, it can be built after only they are built.
+function(arrow_add_object_library PREFIX)
+  set(SOURCES ${ARGN})
+  string(TOLOWER "${PREFIX}" prefix)
+  if(WIN32)
+    add_library(${prefix}_shared OBJECT ${SOURCES})
+    add_library(${prefix}_static OBJECT ${SOURCES})
+    set_target_properties(${prefix}_shared PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    set_target_properties(${prefix}_static PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    target_compile_definitions(${prefix}_shared PRIVATE ARROW_EXPORTING)
+    target_compile_definitions(${prefix}_static PRIVATE ARROW_STATIC)
+    target_compile_features(${prefix}_shared PRIVATE cxx_std_17)
+    target_compile_features(${prefix}_static PRIVATE cxx_std_17)
+    set(${PREFIX}_TARGET_SHARED
+        ${prefix}_shared
+        PARENT_SCOPE)
+    set(${PREFIX}_TARGET_STATIC
+        ${prefix}_static
+        PARENT_SCOPE)
+    set(${PREFIX}_TARGETS
+        ${prefix}_shared ${prefix}_static
+        PARENT_SCOPE)
+  else()
+    add_library(${prefix} OBJECT ${SOURCES})
+    set_target_properties(${prefix} PROPERTIES POSITION_INDEPENDENT_CODE ON)
+    target_compile_features(${prefix} PRIVATE cxx_std_17)
+    set(${PREFIX}_TARGET_SHARED
+        ${prefix}
+        PARENT_SCOPE)
+    set(${PREFIX}_TARGET_STATIC
+        ${prefix}
+        PARENT_SCOPE)
+    set(${PREFIX}_TARGETS
+        ${prefix}
+        PARENT_SCOPE)
+  endif()
+endfunction()
+
 # Adding unit tests part of the "arrow" portion of the test suite
 function(ADD_ARROW_TEST REL_TEST_NAME)
   set(options)
@@ -119,53 +327,32 @@ function(ADD_ARROW_BENCHMARK REL_TEST_NAME)
                 ${ARG_UNPARSED_ARGUMENTS})
 endfunction()
 
-macro(append_runtime_avx2_src SRC)
+macro(append_runtime_avx2_src SRCS SRC)
   if(ARROW_HAVE_RUNTIME_AVX2)
-    list(APPEND ARROW_SRCS ${SRC})
+    list(APPEND ${SRCS} ${SRC})
     set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
     set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS ${ARROW_AVX2_FLAG})
   endif()
 endmacro()
 
-macro(append_runtime_avx2_bmi2_src SRC)
+macro(append_runtime_avx2_bmi2_src SRCS SRC)
   if(ARROW_HAVE_RUNTIME_AVX2 AND ARROW_HAVE_RUNTIME_BMI2)
-    list(APPEND ARROW_SRCS ${SRC})
+    list(APPEND ${SRCS} ${SRC})
     set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
     set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS
                                                   "${ARROW_AVX2_FLAG} ${ARROW_BMI2_FLAG}")
   endif()
 endmacro()
 
-macro(append_runtime_avx512_src SRC)
+macro(append_runtime_avx512_src SRCS SRC)
   if(ARROW_HAVE_RUNTIME_AVX512)
-    list(APPEND ARROW_SRCS ${SRC})
+    list(APPEND ${SRCS} ${SRC})
     set_source_files_properties(${SRC} PROPERTIES SKIP_PRECOMPILE_HEADERS ON)
     set_source_files_properties(${SRC} PROPERTIES COMPILE_FLAGS ${ARROW_AVX512_FLAG})
   endif()
 endmacro()
 
 set(ARROW_SRCS
-    array/array_base.cc
-    array/array_binary.cc
-    array/array_decimal.cc
-    array/array_dict.cc
-    array/array_nested.cc
-    array/array_primitive.cc
-    array/array_run_end.cc
-    array/builder_adaptive.cc
-    array/builder_base.cc
-    array/builder_binary.cc
-    array/builder_decimal.cc
-    array/builder_dict.cc
-    array/builder_run_end.cc
-    array/builder_nested.cc
-    array/builder_primitive.cc
-    array/builder_union.cc
-    array/concatenate.cc
-    array/data.cc
-    array/diff.cc
-    array/util.cc
-    array/validate.cc
     builder.cc
     buffer.cc
     chunked_array.cc
@@ -175,7 +362,6 @@ set(ARROW_SRCS
     datum.cc
     device.cc
     extension_type.cc
-    memory_pool.cc
     pretty_print.cc
     record_batch.cc
     result.cc
@@ -192,18 +378,109 @@ set(ARROW_SRCS
     type_traits.cc
     visitor.cc
     c/bridge.cc
-    c/dlpack.cc
-    io/buffered.cc
-    io/caching.cc
-    io/compressed.cc
-    io/file.cc
-    io/hdfs.cc
-    io/hdfs_internal.cc
-    io/interfaces.cc
-    io/memory.cc
-    io/slow.cc
-    io/stdio.cc
-    io/transform.cc
+    c/dlpack.cc)
+
+arrow_add_object_library(ARROW_ARRAY
+                         array/array_base.cc
+                         array/array_binary.cc
+                         array/array_decimal.cc
+                         array/array_dict.cc
+                         array/array_nested.cc
+                         array/array_primitive.cc
+                         array/array_run_end.cc
+                         array/builder_adaptive.cc
+                         array/builder_base.cc
+                         array/builder_binary.cc
+                         array/builder_decimal.cc
+                         array/builder_dict.cc
+                         array/builder_run_end.cc
+                         array/builder_nested.cc
+                         array/builder_primitive.cc
+                         array/builder_union.cc
+                         array/concatenate.cc
+                         array/data.cc
+                         array/diff.cc
+                         array/util.cc
+                         array/validate.cc)
+
+arrow_add_object_library(ARROW_IO
+                         io/buffered.cc
+                         io/caching.cc
+                         io/compressed.cc
+                         io/file.cc
+                         io/hdfs.cc
+                         io/hdfs_internal.cc
+                         io/interfaces.cc
+                         io/memory.cc
+                         io/slow.cc
+                         io/stdio.cc
+                         io/transform.cc)
+foreach(ARROW_IO_TARGET ${ARROW_IO_TARGETS})
+  target_link_libraries(${ARROW_IO_TARGET} PRIVATE arrow::hadoop)
+  if(NOT MSVC)
+    target_link_libraries(${ARROW_IO_TARGET} PRIVATE ${CMAKE_DL_LIBS})
+  endif()
+endforeach()
+
+set(ARROW_MEMORY_POOL_SRCS memory_pool.cc)
+if(ARROW_JEMALLOC)
+  list(APPEND ARROW_MEMORY_POOL_SRCS memory_pool_jemalloc.cc)
+  set_source_files_properties(memory_pool_jemalloc.cc
+                              PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                         SKIP_UNITY_BUILD_INCLUSION ON)
+endif()
+arrow_add_object_library(ARROW_MEMORY_POOL ${ARROW_MEMORY_POOL_SRCS})
+if(ARROW_JEMALLOC)
+  foreach(ARROW_MEMORY_POOL_TARGET ${ARROW_MEMORY_POOL_TARGETS})
+    target_link_libraries(${ARROW_MEMORY_POOL_TARGET} PRIVATE jemalloc::jemalloc)
+  endforeach()
+endif()
+if(ARROW_MIMALLOC)
+  foreach(ARROW_MEMORY_POOL_TARGET ${ARROW_MEMORY_POOL_TARGETS})
+    target_link_libraries(${ARROW_MEMORY_POOL_TARGET} PRIVATE mimalloc::mimalloc)
+  endforeach()
+endif()
+
+set(ARROW_VENDORED_SRCS
+    vendored/base64.cpp
+    vendored/datetime/tz.cpp
+    vendored/double-conversion/bignum-dtoa.cc
+    vendored/double-conversion/bignum.cc
+    vendored/double-conversion/cached-powers.cc
+    vendored/double-conversion/double-to-string.cc
+    vendored/double-conversion/fast-dtoa.cc
+    vendored/double-conversion/fixed-dtoa.cc
+    vendored/double-conversion/string-to-double.cc
+    vendored/double-conversion/strtod.cc
+    vendored/musl/strptime.c
+    vendored/uriparser/UriCommon.c
+    vendored/uriparser/UriCompare.c
+    vendored/uriparser/UriEscape.c
+    vendored/uriparser/UriFile.c
+    vendored/uriparser/UriIp4.c
+    vendored/uriparser/UriIp4Base.c
+    vendored/uriparser/UriMemory.c
+    vendored/uriparser/UriNormalize.c
+    vendored/uriparser/UriNormalizeBase.c
+    vendored/uriparser/UriParse.c
+    vendored/uriparser/UriParseBase.c
+    vendored/uriparser/UriQuery.c
+    vendored/uriparser/UriRecompose.c
+    vendored/uriparser/UriResolve.c
+    vendored/uriparser/UriShorten.c)
+if(APPLE)
+  list(APPEND ARROW_VENDORED_SRCS vendored/datetime/ios.mm)
+endif()
+set_source_files_properties(vendored/datetime/tz.cpp
+                            PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+                                       SKIP_UNITY_BUILD_INCLUSION ON)
+arrow_add_object_library(ARROW_VENDORED ${ARROW_VENDORED_SRCS})
+# Disable DLL exports in vendored uriparser library
+foreach(ARROW_VENDORED_TARGET ${ARROW_VENDORED_TARGETS})
+  target_compile_definitions(${ARROW_VENDORED_TARGET} PUBLIC URI_STATIC_BUILD)
+endforeach()
+
+set(ARROW_UTIL_SRCS
     util/align_util.cc
     util/async_util.cc
     util/atfork_internal.cc
@@ -249,92 +526,103 @@ set(ARROW_SRCS
     util/unreachable.cc
     util/uri.cc
     util/utf8.cc
-    util/value_parsing.cc
-    vendored/base64.cpp
-    vendored/datetime/tz.cpp
-    vendored/double-conversion/bignum.cc
-    vendored/double-conversion/bignum-dtoa.cc
-    vendored/double-conversion/cached-powers.cc
-    vendored/double-conversion/double-to-string.cc
-    vendored/double-conversion/fast-dtoa.cc
-    vendored/double-conversion/fixed-dtoa.cc
-    vendored/double-conversion/string-to-double.cc
-    vendored/double-conversion/strtod.cc)
-
-if(ARROW_JEMALLOC)
-  list(APPEND ARROW_SRCS memory_pool_jemalloc.cc)
-  set_source_files_properties(memory_pool_jemalloc.cc
-                              PROPERTIES SKIP_UNITY_BUILD_INCLUSION ON)
-endif()
-
-append_runtime_avx2_src(util/bpacking_avx2.cc)
-append_runtime_avx512_src(util/bpacking_avx512.cc)
+    util/value_parsing.cc)
 
+append_runtime_avx2_src(ARROW_UTIL_SRCS util/bpacking_avx2.cc)
+append_runtime_avx512_src(ARROW_UTIL_SRCS util/bpacking_avx512.cc)
 if(ARROW_HAVE_NEON)
-  list(APPEND ARROW_SRCS util/bpacking_neon.cc)
-endif()
-
-if(APPLE)
-  list(APPEND ARROW_SRCS vendored/datetime/ios.mm)
+  list(APPEND ARROW_UTIL_SRCS util/bpacking_neon.cc)
 endif()
 
-set(ARROW_C_SRCS
-    vendored/musl/strptime.c
-    vendored/uriparser/UriCommon.c
-    vendored/uriparser/UriCompare.c
-    vendored/uriparser/UriEscape.c
-    vendored/uriparser/UriFile.c
-    vendored/uriparser/UriIp4Base.c
-    vendored/uriparser/UriIp4.c
-    vendored/uriparser/UriMemory.c
-    vendored/uriparser/UriNormalizeBase.c
-    vendored/uriparser/UriNormalize.c
-    vendored/uriparser/UriParseBase.c
-    vendored/uriparser/UriParse.c
-    vendored/uriparser/UriQuery.c
-    vendored/uriparser/UriRecompose.c
-    vendored/uriparser/UriResolve.c
-    vendored/uriparser/UriShorten.c)
-
-set_source_files_properties(vendored/datetime/tz.cpp
-                            PROPERTIES SKIP_PRECOMPILE_HEADERS ON
-                                       SKIP_UNITY_BUILD_INCLUSION ON)
-
-# Disable DLL exports in vendored uriparser library
-add_definitions(-DURI_STATIC_BUILD)
-
 if(ARROW_WITH_BROTLI)
-  list(APPEND ARROW_SRCS util/compression_brotli.cc)
+  list(APPEND ARROW_UTIL_SRCS util/compression_brotli.cc)
 endif()
-
 if(ARROW_WITH_BZ2)
-  list(APPEND ARROW_SRCS util/compression_bz2.cc)
+  list(APPEND ARROW_UTIL_SRCS util/compression_bz2.cc)
 endif()
-
 if(ARROW_WITH_LZ4)
-  list(APPEND ARROW_SRCS util/compression_lz4.cc)
+  list(APPEND ARROW_UTIL_SRCS util/compression_lz4.cc)
 endif()
-
 if(ARROW_WITH_OPENTELEMETRY)
-  list(APPEND ARROW_SRCS util/tracing_internal.cc)
+  list(APPEND ARROW_UTIL_SRCS util/tracing_internal.cc)
 endif()
-
 if(ARROW_WITH_SNAPPY)
-  list(APPEND ARROW_SRCS util/compression_snappy.cc)
+  list(APPEND ARROW_UTIL_SRCS util/compression_snappy.cc)
 endif()
-
 if(ARROW_WITH_ZLIB)
-  list(APPEND ARROW_SRCS util/compression_zlib.cc)
+  list(APPEND ARROW_UTIL_SRCS util/compression_zlib.cc)
+endif()
+if(ARROW_WITH_ZSTD)
+  list(APPEND ARROW_UTIL_SRCS util/compression_zstd.cc)
 endif()
 
+arrow_add_object_library(ARROW_UTIL ${ARROW_UTIL_SRCS})
+
+if(ARROW_USE_BOOST)
+  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE Boost::headers)
+  endforeach()
+endif()
+if(ARROW_USE_XSIMD)
+  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_XSIMD})
+  endforeach()
+endif()
+if(ARROW_WITH_BROTLI)
+  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_BROTLI_LIBS})
+  endforeach()
+endif()
+if(ARROW_WITH_BZ2)
+  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE BZip2::BZip2)
+  endforeach()
+endif()
+if(ARROW_WITH_LZ4)
+  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE LZ4::lz4)
+  endforeach()
+endif()
+if(ARROW_WITH_SNAPPY)
+  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${Snappy_TARGET})
+  endforeach()
+endif()
+if(ARROW_WITH_OPENTELEMETRY)
+  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_OPENTELEMETRY_LIBS})
+  endforeach()
+endif()
+if(ARROW_WITH_ZLIB)
+  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ZLIB::ZLIB)
+  endforeach()
+endif()
 if(ARROW_WITH_ZSTD)
-  list(APPEND ARROW_SRCS util/compression_zstd.cc)
+  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_ZSTD_LIBZSTD})
+  endforeach()
 endif()
 
-set(ARROW_TESTING_SHARED_LINK_LIBS arrow::flatbuffers RapidJSON arrow_shared
-                                   ${ARROW_GTEST_GTEST})
+set(ARROW_TESTING_SHARED_LINK_LIBS arrow_shared ${ARROW_GTEST_GTEST})
+set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON)
 set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers RapidJSON arrow_static
                                    ${ARROW_GTEST_GTEST})
+set(ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared)
+set(ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static)
+# that depend on gtest
+if(GTest_SOURCE STREQUAL "SYSTEM")
+  list(APPEND ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS ${ARROW_GTEST_GTEST})
+  list(APPEND ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_GTEST_GTEST})
+else()
+  list(APPEND ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS ArrowTesting::gtest)
+  list(APPEND ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS ArrowTesting::gtest)
+endif()
+if(WIN32)
+  list(APPEND ARROW_TESTING_SHARED_LINK_LIBS "ws2_32.dll")
+  list(APPEND ARROW_TESTING_STATIC_LINK_LIBS "ws2_32.dll")
+  list(APPEND ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS "ws2_32.dll")
+endif()
 
 set(ARROW_TESTING_SRCS
     io/test_common.cc
@@ -344,115 +632,92 @@ set(ARROW_TESTING_SRCS
     testing/generator.cc
     testing/util.cc)
 
-# Add dependencies for third-party allocators.
-# If possible we only want memory_pool.cc to wait for allocators to finish building,
-# but that only works with Ninja
-# (see https://gitlab.kitware.com/cmake/cmake/issues/19677)
-
-set(_allocator_dependencies "") # Empty list
-if(jemalloc_VENDORED)
-  list(APPEND _allocator_dependencies jemalloc_ep)
-endif()
-if(mimalloc_VENDORED)
-  list(APPEND _allocator_dependencies mimalloc_ep)
-endif()
-
-if(_allocator_dependencies)
-  if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
-    set_source_files_properties(memory_pool.cc PROPERTIES OBJECT_DEPENDS
-                                                          "${_allocator_dependencies}")
-  else()
-    add_dependencies(arrow_dependencies ${_allocator_dependencies})
-  endif()
-  set_source_files_properties(memory_pool.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
-                                                        SKIP_UNITY_BUILD_INCLUSION ON)
-endif()
-
-unset(_allocator_dependencies)
-
-if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-  set_property(SOURCE util/io_util.cc
-               APPEND_STRING
-               PROPERTY COMPILE_FLAGS " -Wno-unused-macros ")
-endif()
-
 #
 # Configure the base Arrow libraries
 #
 
 if(ARROW_BUILD_INTEGRATION OR ARROW_BUILD_TESTS)
-  list(APPEND
-       ARROW_SRCS
-       integration/c_data_integration_internal.cc
-       integration/json_integration.cc
-       integration/json_internal.cc)
+  arrow_add_object_library(ARROW_INTEGRATION integration/c_data_integration_internal.cc
+                           integration/json_integration.cc integration/json_internal.cc)
+  foreach(ARROW_INTEGRATION_TARGET ${ARROW_INTEGRATION_TARGETS})
+    target_link_libraries(${ARROW_INTEGRATION_TARGET} PRIVATE RapidJSON)
+  endforeach()
+else()
+  set(ARROW_INTEGRATION_TARGET_SHARED)
+  set(ARROW_INTEGRATION_TARGET_STATIC)
 endif()
 
 if(ARROW_CSV)
-  list(APPEND
-       ARROW_SRCS
-       csv/converter.cc
-       csv/chunker.cc
-       csv/column_builder.cc
-       csv/column_decoder.cc
-       csv/options.cc
-       csv/parser.cc
-       csv/reader.cc
-       csv/writer.cc)
+  arrow_add_object_library(ARROW_CSV
+                           csv/converter.cc
+                           csv/chunker.cc
+                           csv/column_builder.cc
+                           csv/column_decoder.cc
+                           csv/options.cc
+                           csv/parser.cc
+                           csv/reader.cc
+                           csv/writer.cc)
+  if(ARROW_USE_XSIMD)
+    foreach(ARROW_CSV_TARGET ${ARROW_CSV_TARGETS})
+      target_link_libraries(${ARROW_CSV_TARGET} PRIVATE ${ARROW_XSIMD})
+    endforeach()
+  endif()
 
   list(APPEND ARROW_TESTING_SRCS csv/test_common.cc)
+else()
+  set(ARROW_CSV_TARGET_SHARED)
+  set(ARROW_CSV_TARGET_STATIC)
 endif()
 
 # Baseline Compute functionality + scalar casts and a few select kernels
-list(APPEND
-     ARROW_SRCS
-     compute/api_aggregate.cc
-     compute/api_scalar.cc
-     compute/api_vector.cc
-     compute/cast.cc
-     compute/exec.cc
-     compute/expression.cc
-     compute/function.cc
-     compute/function_internal.cc
-     compute/kernel.cc
-     compute/key_hash.cc
-     compute/key_map.cc
-     compute/light_array.cc
-     compute/ordering.cc
-     compute/registry.cc
-     compute/kernels/codegen_internal.cc
-     compute/kernels/row_encoder.cc
-     compute/kernels/ree_util_internal.cc
-     compute/kernels/scalar_cast_boolean.cc
-     compute/kernels/scalar_cast_dictionary.cc
-     compute/kernels/scalar_cast_extension.cc
-     compute/kernels/scalar_cast_internal.cc
-     compute/kernels/scalar_cast_nested.cc
-     compute/kernels/scalar_cast_numeric.cc
-     compute/kernels/scalar_cast_string.cc
-     compute/kernels/scalar_cast_temporal.cc
-     compute/kernels/util_internal.cc
-     compute/kernels/vector_hash.cc
-     compute/kernels/vector_selection.cc
-     compute/kernels/vector_selection_filter_internal.cc
-     compute/kernels/vector_selection_internal.cc
-     compute/kernels/vector_selection_take_internal.cc
-     compute/row/encode_internal.cc
-     compute/row/compare_internal.cc
-     compute/row/grouper.cc
-     compute/row/row_internal.cc
-     compute/util.cc)
-
-append_runtime_avx2_src(compute/key_hash_avx2.cc)
-append_runtime_avx2_bmi2_src(compute/key_map_avx2.cc)
-append_runtime_avx2_src(compute/row/compare_internal_avx2.cc)
-append_runtime_avx2_src(compute/row/encode_internal_avx2.cc)
-append_runtime_avx2_bmi2_src(compute/util_avx2.cc)
+set(ARROW_COMPUTE_SRCS
+    compute/api_aggregate.cc
+    compute/api_scalar.cc
+    compute/api_vector.cc
+    compute/cast.cc
+    compute/exec.cc
+    compute/expression.cc
+    compute/function.cc
+    compute/function_internal.cc
+    compute/kernel.cc
+    compute/key_hash.cc
+    compute/key_map.cc
+    compute/light_array.cc
+    compute/ordering.cc
+    compute/registry.cc
+    compute/kernels/codegen_internal.cc
+    compute/kernels/row_encoder.cc
+    compute/kernels/ree_util_internal.cc
+    compute/kernels/scalar_cast_boolean.cc
+    compute/kernels/scalar_cast_dictionary.cc
+    compute/kernels/scalar_cast_extension.cc
+    compute/kernels/scalar_cast_internal.cc
+    compute/kernels/scalar_cast_nested.cc
+    compute/kernels/scalar_cast_numeric.cc
+    compute/kernels/scalar_cast_string.cc
+    compute/kernels/scalar_cast_temporal.cc
+    compute/kernels/util_internal.cc
+    compute/kernels/vector_hash.cc
+    compute/kernels/vector_selection.cc
+    compute/kernels/vector_selection_filter_internal.cc
+    compute/kernels/vector_selection_internal.cc
+    compute/kernels/vector_selection_take_internal.cc
+    compute/row/encode_internal.cc
+    compute/row/compare_internal.cc
+    compute/row/grouper.cc
+    compute/row/row_internal.cc
+    compute/util.cc)
+
+append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_avx2.cc)
+append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_avx2.cc)
+append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc)
+append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc)
+append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc)
 
 if(ARROW_COMPUTE)
   # Include the remaining kernels
   list(APPEND
-       ARROW_SRCS
+       ARROW_COMPUTE_SRCS
        compute/kernels/aggregate_basic.cc
        compute/kernels/aggregate_mode.cc
        compute/kernels/aggregate_quantile.cc
@@ -482,49 +747,101 @@ if(ARROW_COMPUTE)
        compute/kernels/vector_select_k.cc
        compute/kernels/vector_sort.cc)
 
-  append_runtime_avx2_src(compute/kernels/aggregate_basic_avx2.cc)
-  append_runtime_avx512_src(compute/kernels/aggregate_basic_avx512.cc)
+  append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx2.cc)
+  append_runtime_avx512_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx512.cc)
+endif()
+
+arrow_add_object_library(ARROW_COMPUTE ${ARROW_COMPUTE_SRCS})
+if(ARROW_USE_BOOST)
+  foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS})
+    target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE Boost::headers)
+  endforeach()
+endif()
+if(ARROW_USE_XSIMD)
+  foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS})
+    target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE ${ARROW_XSIMD})
+  endforeach()
+endif()
+if(ARROW_WITH_OPENTELEMETRY)
+  foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS})
+    target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE ${ARROW_OPENTELEMETRY_LIBS})
+  endforeach()
+endif()
+if(ARROW_WITH_RE2)
+  foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS})
+    target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE re2::re2)
+  endforeach()
+endif()
+if(ARROW_WITH_UTF8PROC)
+  foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS})
+    target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE utf8proc::utf8proc)
+  endforeach()
 endif()
 
 if(ARROW_FILESYSTEM)
-  list(APPEND
-       ARROW_SRCS
-       filesystem/filesystem.cc
-       filesystem/localfs.cc
-       filesystem/mockfs.cc
-       filesystem/path_util.cc
-       filesystem/util_internal.cc)
+  set(ARROW_FILESYSTEM_SRCS
+      filesystem/filesystem.cc
+      filesystem/localfs.cc
+      filesystem/mockfs.cc
+      filesystem/path_util.cc
+      filesystem/util_internal.cc)
 
   if(ARROW_AZURE)
-    list(APPEND ARROW_SRCS filesystem/azurefs.cc)
+    list(APPEND ARROW_FILESYSTEM_SRCS filesystem/azurefs.cc)
     set_source_files_properties(filesystem/azurefs.cc
                                 PROPERTIES SKIP_PRECOMPILE_HEADERS ON
                                            SKIP_UNITY_BUILD_INCLUSION ON)
   endif()
   if(ARROW_GCS)
-    list(APPEND ARROW_SRCS filesystem/gcsfs.cc filesystem/gcsfs_internal.cc)
+    list(APPEND ARROW_FILESYSTEM_SRCS filesystem/gcsfs.cc filesystem/gcsfs_internal.cc)
     set_source_files_properties(filesystem/gcsfs.cc filesystem/gcsfs_internal.cc
                                 PROPERTIES SKIP_PRECOMPILE_HEADERS ON
                                            SKIP_UNITY_BUILD_INCLUSION ON)
   endif()
   if(ARROW_HDFS)
-    list(APPEND ARROW_SRCS filesystem/hdfs.cc)
+    list(APPEND ARROW_FILESYSTEM_SRCS filesystem/hdfs.cc)
   endif()
   if(ARROW_S3)
-    list(APPEND ARROW_SRCS filesystem/s3fs.cc)
+    list(APPEND ARROW_FILESYSTEM_SRCS filesystem/s3fs.cc)
     set_source_files_properties(filesystem/s3fs.cc
                                 PROPERTIES SKIP_PRECOMPILE_HEADERS ON
                                            SKIP_UNITY_BUILD_INCLUSION ON)
   endif()
 
+  arrow_add_object_library(ARROW_FILESYSTEM ${ARROW_FILESYSTEM_SRCS})
+  if(ARROW_AZURE)
+    foreach(ARROW_FILESYSTEM_TARGET ${ARROW_FILESYSTEM_TARGETS})
+      target_link_libraries(${ARROW_FILESYSTEM_TARGET}
+                            PRIVATE ${AZURE_SDK_LINK_LIBRARIES})
+    endforeach()
+  endif()
+  if(ARROW_GCS)
+    foreach(ARROW_FILESYSTEM_TARGET ${ARROW_FILESYSTEM_TARGETS})
+      target_link_libraries(${ARROW_FILESYSTEM_TARGET} PRIVATE google-cloud-cpp::storage)
+    endforeach()
+  endif()
+  if(ARROW_HDFS)
+    foreach(ARROW_FILESYSTEM_TARGET ${ARROW_FILESYSTEM_TARGETS})
+      target_link_libraries(${ARROW_FILESYSTEM_TARGET} PRIVATE arrow::hadoop)
+    endforeach()
+  endif()
+  if(ARROW_S3)
+    foreach(ARROW_FILESYSTEM_TARGET ${ARROW_FILESYSTEM_TARGETS})
+      target_link_libraries(${ARROW_FILESYSTEM_TARGET} PRIVATE ${AWSSDK_LINK_LIBRARIES})
+    endforeach()
+  endif()
+
   list(APPEND ARROW_TESTING_SHARED_LINK_LIBS ${ARROW_GTEST_GMOCK})
   list(APPEND ARROW_TESTING_STATIC_LINK_LIBS ${ARROW_GTEST_GMOCK})
   list(APPEND ARROW_TESTING_SRCS filesystem/test_util.cc)
+else()
+  set(ARROW_FILESYSTEM_TARGET_SHARED)
+  set(ARROW_FILESYSTEM_TARGET_STATIC)
 endif()
 
 if(ARROW_IPC)
   list(APPEND
-       ARROW_SRCS
+       ARROW_IPC_SRCS
        ipc/dictionary.cc
        ipc/feather.cc
        ipc/message.cc
@@ -532,32 +849,51 @@ if(ARROW_IPC)
        ipc/options.cc
        ipc/reader.cc
        ipc/writer.cc)
-
   if(ARROW_JSON)
-    list(APPEND ARROW_SRCS ipc/json_simple.cc)
+    list(APPEND ARROW_IPC_SRCS ipc/json_simple.cc)
+  endif()
+  arrow_add_object_library(ARROW_IPC ${ARROW_IPC_SRCS})
+  foreach(ARROW_IPC_TARGET ${ARROW_IPC_TARGETS})
+    target_link_libraries(${ARROW_IPC_TARGET} PRIVATE arrow::flatbuffers)
+  endforeach()
+  if(ARROW_JSON)
+    foreach(ARROW_IPC_TARGET ${ARROW_IPC_TARGETS})
+      target_link_libraries(${ARROW_IPC_TARGET} PRIVATE RapidJSON)
+    endforeach()
   endif()
+else()
+  set(ARROW_IPC_TARGET_SHARED)
+  set(ARROW_IPC_TARGET_STATIC)
 endif()
 
 if(ARROW_JSON)
-  list(APPEND
-       ARROW_SRCS
-       extension/fixed_shape_tensor.cc
-       json/options.cc
-       json/chunked_builder.cc
-       json/chunker.cc
-       json/converter.cc
-       json/object_parser.cc
-       json/object_writer.cc
-       json/parser.cc
-       json/reader.cc)
+  arrow_add_object_library(ARROW_JSON
+                           extension/fixed_shape_tensor.cc
+                           json/options.cc
+                           json/chunked_builder.cc
+                           json/chunker.cc
+                           json/converter.cc
+                           json/object_parser.cc
+                           json/object_writer.cc
+                           json/parser.cc
+                           json/reader.cc)
+  foreach(ARROW_JSON_TARGET ${ARROW_JSON_TARGETS})
+    target_link_libraries(${ARROW_JSON_TARGET} PRIVATE RapidJSON)
+  endforeach()
+else()
+  set(ARROW_JSON_TARGET_SHARED)
+  set(ARROW_JSON_TARGET_STATIC)
 endif()
 
 if(ARROW_ORC)
-  list(APPEND
-       ARROW_SRCS
-       adapters/orc/adapter.cc
-       adapters/orc/options.cc
-       adapters/orc/util.cc)
+  arrow_add_object_library(ARROW_ORC adapters/orc/adapter.cc adapters/orc/options.cc
+                           adapters/orc/util.cc)
+  foreach(ARROW_ORC_TARGET ${ARROW_ORC_TARGETS})
+    target_link_libraries(${ARROW_ORC_TARGET} PRIVATE orc::orc)
+  endforeach()
+else()
+  set(ARROW_ORC_TARGET_SHARED)
+  set(ARROW_ORC_TARGET_STATIC)
 endif()
 
 if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
@@ -566,8 +902,6 @@ if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
   set(ARROW_SHARED_LINK_FLAGS ${ARROW_VERSION_SCRIPT_FLAGS})
 endif()
 
-set(ARROW_ALL_SRCS ${ARROW_SRCS} ${ARROW_C_SRCS})
-
 if(ARROW_BUILD_STATIC AND ARROW_BUNDLED_STATIC_LIBS)
   set(ARROW_BUILD_BUNDLED_DEPENDENCIES TRUE)
 else()
@@ -610,14 +944,6 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
   endif()
 endif()
 
-# Need -latomic on Raspbian.
-# See also: https://issues.apache.org/jira/browse/ARROW-12860
-if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" AND ${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7")
-  string(APPEND ARROW_PC_LIBS_PRIVATE " -latomic")
-  list(APPEND ARROW_SHARED_INSTALL_INTERFACE_LIBS "atomic")
-  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS "atomic")
-endif()
-
 # If libarrow.a is only built, "pkg-config --cflags --libs arrow"
 # outputs build flags for static linking not shared
 # linking. ARROW_PC_* except ARROW_PC_*_PRIVATE are for the static
@@ -643,21 +969,42 @@ add_arrow_lib(arrow
               PKG_CONFIG_NAME
               arrow
               SOURCES
-              ${ARROW_ALL_SRCS}
+              ${ARROW_SRCS}
               OUTPUTS
               ARROW_LIBRARIES
               PRECOMPILED_HEADERS
               "$<$<COMPILE_LANGUAGE:CXX>:arrow/pch.h>"
-              DEPENDENCIES
-              arrow_dependencies
               SHARED_LINK_FLAGS
               ${ARROW_SHARED_LINK_FLAGS}
-              SHARED_LINK_LIBS
-              ${ARROW_SHARED_LINK_LIBS}
               SHARED_PRIVATE_LINK_LIBS
+              ${ARROW_ARRAY_TARGET_SHARED}
+              ${ARROW_COMPUTE_TARGET_SHARED}
+              ${ARROW_CSV_TARGET_SHARED}
+              ${ARROW_FILESYSTEM_TARGET_SHARED}
+              ${ARROW_INTEGRATION_TARGET_SHARED}
+              ${ARROW_IO_TARGET_SHARED}
+              ${ARROW_IPC_TARGET_SHARED}
+              ${ARROW_JSON_TARGET_SHARED}
+              ${ARROW_MEMORY_POOL_TARGET_SHARED}
+              ${ARROW_ORC_TARGET_SHARED}
+              ${ARROW_UTIL_TARGET_SHARED}
+              ${ARROW_VENDORED_TARGET_SHARED}
               ${ARROW_SHARED_PRIVATE_LINK_LIBS}
+              ${ARROW_SYSTEM_LINK_LIBS}
               STATIC_LINK_LIBS
-              ${ARROW_STATIC_LINK_LIBS}
+              ${ARROW_ARRAY_TARGET_STATIC}
+              ${ARROW_COMPUTE_TARGET_STATIC}
+              ${ARROW_CSV_TARGET_STATIC}
+              ${ARROW_FILESYSTEM_TARGET_STATIC}
+              ${ARROW_INTEGRATION_TARGET_STATIC}
+              ${ARROW_IO_TARGET_STATIC}
+              ${ARROW_IPC_TARGET_STATIC}
+              ${ARROW_JSON_TARGET_STATIC}
+              ${ARROW_MEMORY_POOL_TARGET_STATIC}
+              ${ARROW_ORC_TARGET_STATIC}
+              ${ARROW_UTIL_TARGET_STATIC}
+              ${ARROW_VENDORED_TARGET_STATIC}
+              ${ARROW_SYSTEM_LINK_LIBS}
               STATIC_INSTALL_INTERFACE_LIBS
               ${ARROW_STATIC_INSTALL_INTERFACE_LIBS}
               SHARED_INSTALL_INTERFACE_LIBS
@@ -720,16 +1067,6 @@ if(ARROW_WITH_BACKTRACE)
 endif()
 
 if(ARROW_TESTING)
-  # that depend on gtest
-  set(ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared)
-  set(ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static)
-  if(GTest_SOURCE STREQUAL "SYSTEM")
-    list(APPEND ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS ${ARROW_GTEST_GTEST})
-    list(APPEND ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_GTEST_GTEST})
-  else()
-    list(APPEND ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS ArrowTesting::gtest)
-    list(APPEND ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS ArrowTesting::gtest)
-  endif()
   add_arrow_lib(arrow_testing
                 CMAKE_PACKAGE_NAME
                 ArrowTesting
@@ -741,10 +1078,10 @@ if(ARROW_TESTING)
                 ARROW_TESTING_LIBRARIES
                 PRECOMPILED_HEADERS
                 "$<$<COMPILE_LANGUAGE:CXX>:arrow/pch.h>"
-                DEPENDENCIES
-                arrow_test_dependencies
                 SHARED_LINK_LIBS
                 ${ARROW_TESTING_SHARED_LINK_LIBS}
+                SHARED_PRIVATE_LINK_LIBS
+                ${ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS}
                 SHARED_INSTALL_INTERFACE_LIBS
                 ${ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS}
                 STATIC_LINK_LIBS
@@ -807,7 +1144,6 @@ add_arrow_test(misc_test
                status_test.cc)
 
 add_arrow_test(public_api_test)
-
 set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
                                                           SKIP_UNITY_BUILD_INCLUSION ON)
 
diff --git a/cpp/src/arrow/acero/CMakeLists.txt b/cpp/src/arrow/acero/CMakeLists.txt
index b77d52a23eedb..31ed4a6a69b6a 100644
--- a/cpp/src/arrow/acero/CMakeLists.txt
+++ b/cpp/src/arrow/acero/CMakeLists.txt
@@ -62,26 +62,19 @@ append_acero_runtime_avx2_src(bloom_filter_avx2.cc)
 append_acero_runtime_avx2_src(swiss_join_avx2.cc)
 
 set(ARROW_ACERO_SHARED_LINK_LIBS)
+set(ARROW_ACERO_SHARED_PRIVATE_LINK_LIBS)
 set(ARROW_ACERO_STATIC_LINK_LIBS)
 set(ARROW_ACERO_STATIC_INSTALL_INTERFACE_LIBS)
 set(ARROW_ACERO_SHARED_INSTALL_INTERFACE_LIBS)
 
 if(ARROW_WITH_OPENTELEMETRY)
-  list(APPEND
-       ARROW_ACERO_SHARED_LINK_LIBS
-       opentelemetry-cpp::trace
-       opentelemetry-cpp::ostream_span_exporter
-       opentelemetry-cpp::otlp_http_exporter)
-  list(APPEND
-       ARROW_ACERO_STATIC_LINK_LIBS
-       opentelemetry-cpp::trace
-       opentelemetry-cpp::ostream_span_exporter
-       opentelemetry-cpp::otlp_http_exporter)
+  list(APPEND ARROW_ACERO_SHARED_PRIVATE_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS})
+  list(APPEND ARROW_ACERO_STATIC_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS})
 endif()
 
 list(APPEND ARROW_ACERO_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static)
 list(APPEND ARROW_ACERO_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared)
-list(APPEND ARROW_ACERO_STATIC_LINK_LIBS arrow_static ${ARROW_STATIC_LINK_LIBS})
+list(APPEND ARROW_ACERO_STATIC_LINK_LIBS arrow_static)
 list(APPEND ARROW_ACERO_SHARED_LINK_LIBS arrow_shared)
 
 add_arrow_lib(arrow_acero
@@ -95,10 +88,10 @@ add_arrow_lib(arrow_acero
               ${ARROW_ACERO_SRCS}
               PRECOMPILED_HEADERS
               "$<$<COMPILE_LANGUAGE:CXX>:arrow/acero/pch.h>"
-              DEPENDENCIES
-              toolchain
               SHARED_LINK_LIBS
               ${ARROW_ACERO_SHARED_LINK_LIBS}
+              SHARED_PRIVATE_LINK_LIBS
+              ${ARROW_ACERO_SHARED_PRIVATE_LINK_LIBS}
               SHARED_INSTALL_INTERFACE_LIBS
               ${ARROW_ACERO_SHARED_INSTALL_INTERFACE_LIBS}
               STATIC_LINK_LIBS
@@ -122,12 +115,22 @@ endforeach()
 
 # Define arrow_acero_testing object library for common test files
 if(ARROW_TESTING)
-  add_library(arrow_acero_testing OBJECT test_util_internal.cc)
+  # test_nodes.cc isn't used by all tests but link to it for simple
+  # CMakeLists.txt.
+  add_library(arrow_acero_testing OBJECT test_nodes.cc test_util_internal.cc)
   # Even though this is still just an object library we still need to "link" our
   # dependencies so that include paths are configured correctly
   target_link_libraries(arrow_acero_testing PRIVATE ${ARROW_ACERO_TEST_LINK_LIBS})
+  # Only for test_nodes.cc.
+  if(ARROW_WITH_OPENTELEMETRY)
+    target_link_libraries(arrow_acero_testing PRIVATE ${ARROW_OPENTELEMETRY_LIBS})
+  endif()
   list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_testing)
 endif()
+# Only for hash_aggregate_test.cc.
+if(ARROW_USE_BOOST)
+  list(APPEND ARROW_ACERO_TEST_LINK_LIBS Boost::headers)
+endif()
 
 # Adding unit tests part of the "dataset" portion of the test suite
 function(add_arrow_acero_test REL_TEST_NAME)
@@ -162,26 +165,20 @@ function(add_arrow_acero_test REL_TEST_NAME)
                  ${ARG_UNPARSED_ARGUMENTS})
 endfunction()
 
-add_arrow_acero_test(plan_test
-                     SOURCES
-                     plan_test.cc
-                     test_nodes_test.cc
-                     test_nodes.cc)
-add_arrow_acero_test(source_node_test SOURCES source_node_test.cc test_nodes.cc)
-add_arrow_acero_test(fetch_node_test SOURCES fetch_node_test.cc test_nodes.cc)
-add_arrow_acero_test(order_by_node_test SOURCES order_by_node_test.cc test_nodes.cc)
+add_arrow_acero_test(plan_test SOURCES plan_test.cc test_nodes_test.cc)
+add_arrow_acero_test(source_node_test SOURCES source_node_test.cc)
+add_arrow_acero_test(fetch_node_test SOURCES fetch_node_test.cc)
+add_arrow_acero_test(order_by_node_test SOURCES order_by_node_test.cc)
 add_arrow_acero_test(hash_join_node_test SOURCES hash_join_node_test.cc
                      bloom_filter_test.cc)
-add_arrow_acero_test(pivot_longer_node_test SOURCES pivot_longer_node_test.cc
-                     test_nodes.cc)
+add_arrow_acero_test(pivot_longer_node_test SOURCES pivot_longer_node_test.cc)
 
 # asof_join_node and sorted_merge_node use std::thread internally
 # and doesn't use ThreadPool so it will
 # be broken if threading is turned off
 if(ARROW_ENABLE_THREADING)
-  add_arrow_acero_test(asof_join_node_test SOURCES asof_join_node_test.cc test_nodes.cc)
-  add_arrow_acero_test(sorted_merge_node_test SOURCES sorted_merge_node_test.cc
-                       test_nodes.cc)
+  add_arrow_acero_test(asof_join_node_test SOURCES asof_join_node_test.cc)
+  add_arrow_acero_test(sorted_merge_node_test SOURCES sorted_merge_node_test.cc)
 endif()
 
 add_arrow_acero_test(tpch_node_test SOURCES tpch_node_test.cc)
diff --git a/cpp/src/arrow/adapters/orc/CMakeLists.txt b/cpp/src/arrow/adapters/orc/CMakeLists.txt
index 4d66151cd38c9..14fb8e681d14b 100644
--- a/cpp/src/arrow/adapters/orc/CMakeLists.txt
+++ b/cpp/src/arrow/adapters/orc/CMakeLists.txt
@@ -35,6 +35,7 @@ add_arrow_test(adapter_test
                PREFIX
                "arrow-orc"
                STATIC_LINK_LIBS
+               orc::orc
                ${ARROW_ORC_STATIC_LINK_LIBS})
 
 set_source_files_properties(adapter_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt
index e14d78ff6e5ca..badcf4f2f26ac 100644
--- a/cpp/src/arrow/compute/CMakeLists.txt
+++ b/cpp/src/arrow/compute/CMakeLists.txt
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-add_custom_target(arrow_compute)
+add_custom_target(arrow-compute-tests)
 
 arrow_install_all_headers("arrow/compute")
 
@@ -29,7 +29,7 @@ endif()
 #
 
 set(ARROW_COMPUTE_TEST_PREFIX "arrow-compute")
-set(ARROW_COMPUTE_TEST_LABELS "arrow_compute")
+set(ARROW_COMPUTE_TEST_LABELS "arrow-compute-tests")
 set(ARROW_COMPUTE_TEST_ARGS PREFIX ${ARROW_COMPUTE_TEST_PREFIX} LABELS
                             ${ARROW_COMPUTE_TEST_LABELS})
 
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 4350cd57ff026..afb30996eac15 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -23,7 +23,7 @@ if(ARROW_TESTING)
   add_library(arrow_compute_kernels_testing OBJECT test_util.cc)
   # Even though this is still just an object library we still need to "link" our
   # dependencies so that include paths are configured correctly
-  target_link_libraries(arrow_compute_kernels_testing PRIVATE ${ARROW_GTEST_GMOCK})
+  target_link_libraries(arrow_compute_kernels_testing PUBLIC ${ARROW_GTEST_GMOCK})
 endif()
 
 add_arrow_test(scalar_cast_test
@@ -36,13 +36,17 @@ add_arrow_test(scalar_cast_test
 # ----------------------------------------------------------------------
 # Scalar kernels
 
+set(ARROW_COMPUTE_SCALAR_TYPE_TEST_LINK_LIBS arrow_compute_kernels_testing)
+if(ARROW_WITH_UTF8PROC)
+  list(APPEND ARROW_COMPUTE_SCALAR_TYPE_TEST_LINK_LIBS utf8proc::utf8proc)
+endif()
 add_arrow_compute_test(scalar_type_test
                        SOURCES
                        scalar_boolean_test.cc
                        scalar_nested_test.cc
                        scalar_string_test.cc
                        EXTRA_LINK_LIBS
-                       arrow_compute_kernels_testing)
+                       ${ARROW_COMPUTE_SCALAR_TYPE_TEST_LINK_LIBS})
 
 add_arrow_compute_test(scalar_if_else_test
                        SOURCES
@@ -126,7 +130,8 @@ add_arrow_compute_test(aggregate_test
                        SOURCES
                        aggregate_test.cc
                        EXTRA_LINK_LIBS
-                       arrow_compute_kernels_testing)
+                       arrow_compute_kernels_testing
+                       Boost::headers)
 
 # ----------------------------------------------------------------------
 # Utilities
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
index 0977ea7806cb4..909c89dbe4795 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
@@ -25,6 +25,7 @@
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/config.h"
 
 namespace arrow {
 
diff --git a/cpp/src/arrow/dataset/CMakeLists.txt b/cpp/src/arrow/dataset/CMakeLists.txt
index 1afef3e3b0463..e48bcfaf65bcb 100644
--- a/cpp/src/arrow/dataset/CMakeLists.txt
+++ b/cpp/src/arrow/dataset/CMakeLists.txt
@@ -38,6 +38,7 @@ endif()
 
 set(ARROW_DATASET_STATIC_LINK_LIBS)
 set(ARROW_DATASET_SHARED_LINK_LIBS)
+set(ARROW_DATASET_SHARED_PRIVATE_LINK_LIBS)
 set(ARROW_DATASET_STATIC_INSTALL_INTERFACE_LIBS)
 set(ARROW_DATASET_SHARED_INSTALL_INTERFACE_LIBS)
 
@@ -65,9 +66,14 @@ endif()
 list(APPEND ARROW_DATASET_STATIC_INSTALL_INTERFACE_LIBS ArrowAcero::arrow_acero_static)
 list(APPEND ARROW_DATASET_SHARED_INSTALL_INTERFACE_LIBS ArrowAcero::arrow_acero_shared)
 
-list(APPEND ARROW_DATASET_STATIC_LINK_LIBS arrow_acero_static ${ARROW_STATIC_LINK_LIBS})
+list(APPEND ARROW_DATASET_STATIC_LINK_LIBS arrow_acero_static)
 list(APPEND ARROW_DATASET_SHARED_LINK_LIBS arrow_acero_shared)
 
+if(ARROW_WITH_OPENTELEMETRY)
+  list(APPEND ARROW_DATASET_STATIC_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS})
+  list(APPEND ARROW_DATASET_SHARED_PRIVATE_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS})
+endif()
+
 add_arrow_lib(arrow_dataset
               CMAKE_PACKAGE_NAME
               ArrowDataset
@@ -79,12 +85,12 @@ add_arrow_lib(arrow_dataset
               ${ARROW_DATASET_SRCS}
               PRECOMPILED_HEADERS
               "$<$<COMPILE_LANGUAGE:CXX>:arrow/dataset/pch.h>"
-              DEPENDENCIES
-              toolchain
               PRIVATE_INCLUDES
               ${ARROW_DATASET_PRIVATE_INCLUDES}
               SHARED_LINK_LIBS
               ${ARROW_DATASET_SHARED_LINK_LIBS}
+              SHARED_PRIVATE_LINK_LIBS
+              ${ARROW_DATASET_SHARED_PRIVATE_LINK_LIBS}
               SHARED_INSTALL_INTERFACE_LIBS
               ${ARROW_DATASET_SHARED_INSTALL_INTERFACE_LIBS}
               STATIC_LINK_LIBS
@@ -121,7 +127,7 @@ endif()
 function(ADD_ARROW_DATASET_TEST REL_TEST_NAME)
   set(options)
   set(one_value_args PREFIX)
-  set(multi_value_args LABELS)
+  set(multi_value_args EXTRA_LINK_LIBS LABELS)
   cmake_parse_arguments(ARG
                         "${options}"
                         "${one_value_args}"
@@ -134,6 +140,12 @@ function(ADD_ARROW_DATASET_TEST REL_TEST_NAME)
     set(PREFIX "arrow-dataset")
   endif()
 
+  if(ARG_EXTRA_LINK_LIBS)
+    set(EXTRA_LINK_LIBS ${ARG_EXTRA_LINK_LIBS})
+  else()
+    set(EXTRA_LINK_LIBS ${ARROW_DATASET_TEST_LINK_LIBS})
+  endif()
+
   if(ARG_LABELS)
     set(LABELS ${ARG_LABELS})
   else()
@@ -142,7 +154,7 @@ function(ADD_ARROW_DATASET_TEST REL_TEST_NAME)
 
   add_arrow_test(${REL_TEST_NAME}
                  EXTRA_LINK_LIBS
-                 ${ARROW_DATASET_TEST_LINK_LIBS}
+                 ${EXTRA_LINK_LIBS}
                  PREFIX
                  ${PREFIX}
                  LABELS
@@ -165,11 +177,13 @@ if(ARROW_CSV)
 endif()
 
 if(ARROW_JSON)
-  add_arrow_dataset_test(file_json_test)
+  add_arrow_dataset_test(file_json_test EXTRA_LINK_LIBS ${ARROW_DATASET_TEST_LINK_LIBS}
+                         RapidJSON)
 endif()
 
 if(ARROW_ORC)
-  add_arrow_dataset_test(file_orc_test)
+  add_arrow_dataset_test(file_orc_test EXTRA_LINK_LIBS ${ARROW_DATASET_TEST_LINK_LIBS}
+                         orc::orc)
 endif()
 
 if(ARROW_PARQUET)
diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc
index 755b202439be6..60a6685dc22fd 100644
--- a/cpp/src/arrow/dataset/file_csv_test.cc
+++ b/cpp/src/arrow/dataset/file_csv_test.cc
@@ -36,6 +36,7 @@
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/config.h"
 
 namespace arrow {
 namespace dataset {
diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt
index 77e93223cd116..b9ed11e7608f3 100644
--- a/cpp/src/arrow/filesystem/CMakeLists.txt
+++ b/cpp/src/arrow/filesystem/CMakeLists.txt
@@ -43,6 +43,7 @@ if(ARROW_GCS)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
+                 google-cloud-cpp::storage
                  Boost::filesystem
                  Boost::system)
 endif()
@@ -52,6 +53,7 @@ if(ARROW_AZURE)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
+                 ${AZURE_SDK_LINK_LIBRARIES}
                  Boost::filesystem
                  Boost::system)
 endif()
@@ -64,6 +66,7 @@ if(ARROW_S3)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
+                 ${AWSSDK_LINK_LIBRARIES}
                  Boost::filesystem
                  Boost::system)
   if(TARGET arrow-s3fs-test)
@@ -98,6 +101,7 @@ if(ARROW_S3)
                         s3fs_benchmark.cc
                         s3_test_util.cc
                         STATIC_LINK_LIBS
+                        ${AWSSDK_LINK_LIBRARIES}
                         ${ARROW_BENCHMARK_LINK_LIBS}
                         Boost::filesystem
                         Boost::system)
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index 394f59e91a454..ad7aaa1bd43cf 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -190,8 +190,11 @@ class S3TestMixin : public AwsTestMixin {
   }
 
   void TearDown() override {
-    client_.reset();  // Aws::S3::S3Client destruction relies on AWS SDK, so it must be
-                      // reset before Aws::ShutdownAPI
+    // Aws::S3::S3Client destruction relies on AWS SDK, so it must be
+    // reset before Aws::ShutdownAPI
+    client_.reset();
+    client_config_.reset();
+
     AwsTestMixin::TearDown();
   }
 
@@ -468,6 +471,13 @@ class TestS3FS : public S3TestMixin {
     }
   }
 
+  void TearDown() override {
+    // Aws::S3::S3Client destruction relies on AWS SDK, so it must be
+    // reset before Aws::ShutdownAPI
+    fs_.reset();
+    S3TestMixin::TearDown();
+  }
+
   Result<std::shared_ptr<S3FileSystem>> MakeNewFileSystem(
       io::IOContext io_context = io::default_io_context()) {
     options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key());
@@ -1359,6 +1369,14 @@ class TestS3FSGeneric : public S3TestMixin, public GenericFileSystemTest {
     fs_ = std::make_shared<SubTreeFileSystem>("s3fs-test-bucket", s3fs_);
   }
 
+  void TearDown() override {
+    // Aws::S3::S3Client destruction relies on AWS SDK, so it must be
+    // reset before Aws::ShutdownAPI
+    s3fs_.reset();
+    fs_.reset();
+    S3TestMixin::TearDown();
+  }
+
  protected:
   std::shared_ptr<FileSystem> GetEmptyFileSystem() override { return fs_; }
 
diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index 91e0fbf9136d3..8eba89b8e78a6 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -20,7 +20,9 @@ add_custom_target(arrow_flight)
 arrow_install_all_headers("arrow/flight")
 
 set(ARROW_FLIGHT_LINK_LIBS gRPC::grpc++ ${ARROW_PROTOBUF_LIBPROTOBUF})
-
+if(ARROW_WITH_OPENTELEMETRY)
+  list(APPEND ARROW_FLIGHT_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS})
+endif()
 if(WIN32)
   list(APPEND ARROW_FLIGHT_LINK_LIBS ws2_32.lib)
 endif()
@@ -66,8 +68,7 @@ list(APPEND
      ARROW_FLIGHT_TEST_INTERFACE_LIBS
      Boost::headers
      Boost::filesystem
-     Boost::system
-     ${ARROW_GTEST_GMOCK})
+     Boost::system)
 list(APPEND ARROW_FLIGHT_TEST_LINK_LIBS gRPC::grpc++)
 
 # TODO(wesm): Protobuf shared vs static linking
@@ -260,9 +261,7 @@ if(ARROW_TESTING)
                 test_definitions.cc
                 test_util.cc
                 DEPENDENCIES
-                ${ARROW_GTEST_GTEST}
                 flight_grpc_gen
-                arrow_dependencies
                 SHARED_LINK_LIBS
                 ${ARROW_FLIGHT_TESTING_SHARED_LINK_LIBS}
                 SHARED_INSTALL_INTERFACE_LIBS
diff --git a/cpp/src/arrow/integration/CMakeLists.txt b/cpp/src/arrow/integration/CMakeLists.txt
index 0d7b7e5a7d47d..350487052de1b 100644
--- a/cpp/src/arrow/integration/CMakeLists.txt
+++ b/cpp/src/arrow/integration/CMakeLists.txt
@@ -21,12 +21,15 @@ arrow_install_all_headers("arrow/integration")
 # - an executable that can be called to answer integration test requests
 # - a self-(unit)test for the C++ side of integration testing
 if(ARROW_BUILD_TESTS)
-  add_arrow_test(json_integration_test EXTRA_LINK_LIBS ${GFLAGS_LIBRARIES})
+  add_arrow_test(json_integration_test EXTRA_LINK_LIBS RapidJSON ${GFLAGS_LIBRARIES})
   add_dependencies(arrow-integration arrow-json-integration-test)
 elseif(ARROW_BUILD_INTEGRATION)
   add_executable(arrow-json-integration-test json_integration_test.cc)
-  target_link_libraries(arrow-json-integration-test ${ARROW_TEST_LINK_LIBS}
-                        ${GFLAGS_LIBRARIES} ${ARROW_GTEST_GTEST})
+  target_link_libraries(arrow-json-integration-test
+                        RapidJSON
+                        ${ARROW_TEST_LINK_LIBS}
+                        ${GFLAGS_LIBRARIES}
+                        ${ARROW_GTEST_GTEST})
 
   add_dependencies(arrow-json-integration-test arrow arrow_testing)
   add_dependencies(arrow-integration arrow-json-integration-test)
diff --git a/cpp/src/arrow/io/CMakeLists.txt b/cpp/src/arrow/io/CMakeLists.txt
index d8224192ce0fb..041d511083457 100644
--- a/cpp/src/arrow/io/CMakeLists.txt
+++ b/cpp/src/arrow/io/CMakeLists.txt
@@ -28,6 +28,7 @@ if(ARROW_HDFS)
                  PREFIX
                  "arrow-io"
                  EXTRA_LINK_LIBS
+                 arrow::hadoop
                  Boost::filesystem
                  Boost::system)
 endif()
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index 4b62bdc3a77f2..8b7eee495808b 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -18,12 +18,10 @@
 #
 # Messaging and interprocess communication
 
-add_custom_target(arrow_ipc)
-
 function(ADD_ARROW_IPC_TEST REL_TEST_NAME)
   set(options)
   set(one_value_args PREFIX)
-  set(multi_value_args LABELS)
+  set(multi_value_args)
   cmake_parse_arguments(ARG
                         "${options}"
                         "${one_value_args}"
@@ -36,17 +34,12 @@ function(ADD_ARROW_IPC_TEST REL_TEST_NAME)
     set(PREFIX "arrow-ipc")
   endif()
 
-  add_arrow_test(${REL_TEST_NAME}
-                 EXTRA_LINK_LIBS
-                 ${ARROW_DATASET_TEST_LINK_LIBS}
-                 PREFIX
-                 ${PREFIX}
-                 ${ARG_UNPARSED_ARGUMENTS})
+  add_arrow_test(${REL_TEST_NAME} PREFIX ${PREFIX} ${ARG_UNPARSED_ARGUMENTS})
 endfunction()
 
 add_arrow_test(feather_test)
 add_arrow_ipc_test(json_simple_test)
-add_arrow_ipc_test(read_write_test)
+add_arrow_ipc_test(read_write_test EXTRA_LINK_LIBS arrow::flatbuffers)
 add_arrow_ipc_test(tensor_test)
 
 # Headers: top level
diff --git a/cpp/src/arrow/ipc/read_write_benchmark.cc b/cpp/src/arrow/ipc/read_write_benchmark.cc
index ed7e6957df199..defe9790678c0 100644
--- a/cpp/src/arrow/ipc/read_write_benchmark.cc
+++ b/cpp/src/arrow/ipc/read_write_benchmark.cc
@@ -30,6 +30,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
+#include "arrow/util/config.h"
 #include "arrow/util/io_util.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/json/CMakeLists.txt b/cpp/src/arrow/json/CMakeLists.txt
index f09b15ce51c20..95b299d8f0c33 100644
--- a/cpp/src/arrow/json/CMakeLists.txt
+++ b/cpp/src/arrow/json/CMakeLists.txt
@@ -23,9 +23,15 @@ add_arrow_test(test
                parser_test.cc
                reader_test.cc
                PREFIX
-               "arrow-json")
+               "arrow-json"
+               EXTRA_LINK_LIBS
+               RapidJSON)
 
-add_arrow_benchmark(parser_benchmark PREFIX "arrow-json")
+add_arrow_benchmark(parser_benchmark
+                    PREFIX
+                    "arrow-json"
+                    EXTRA_LINK_LIBS
+                    RapidJSON)
 arrow_install_all_headers("arrow/json")
 
 # pkg-config support
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index badf8a75078ed..e26efba28594b 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -38,6 +38,13 @@ else()
   set(IO_UTIL_TEST_SOURCES io_util_test.cc)
 endif()
 
+set(ARROW_UTILITY_TEST_LINK_LIBS Boost::headers)
+if(ARROW_USE_XSIMD)
+  list(APPEND ARROW_UTILITY_TEST_LINK_LIBS ${ARROW_XSIMD})
+endif()
+if(ARROW_WITH_OPENTELEMETRY)
+  list(APPEND ARROW_UTILITY_TEST_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS})
+endif()
 add_arrow_test(utility-test
                SOURCES
                align_util_test.cc
@@ -73,7 +80,9 @@ add_arrow_test(utility-test
                trie_test.cc
                uri_test.cc
                utf8_util_test.cc
-               value_parsing_test.cc)
+               value_parsing_test.cc
+               EXTRA_LINK_LIBS
+               ${ARROW_UTILITY_TEST_LINK_LIBS})
 
 add_arrow_test(async-utility-test
                SOURCES
@@ -96,7 +105,11 @@ add_arrow_test(threading-utility-test
                test_common.cc
                thread_pool_test.cc)
 
-add_arrow_test(crc32-test SOURCES crc32_test.cc)
+add_arrow_test(crc32-test
+               SOURCES
+               crc32_test.cc
+               EXTRA_LINK_LIBS
+               Boost::headers)
 
 add_arrow_benchmark(bit_block_counter_benchmark)
 add_arrow_benchmark(bit_util_benchmark)
@@ -113,5 +126,10 @@ add_arrow_benchmark(small_vector_benchmark)
 add_arrow_benchmark(tdigest_benchmark)
 add_arrow_benchmark(thread_pool_benchmark)
 add_arrow_benchmark(trie_benchmark)
-add_arrow_benchmark(utf8_util_benchmark)
+set(ARROW_BENCHMARK_UTF8_UTIL_LINK_LIBS)
+if(ARROW_USE_XSIMD)
+  list(APPEND ARROW_BENCHMARK_UTF8_UTIL_LINK_LIBS ${ARROW_XSIMD})
+endif()
+add_arrow_benchmark(utf8_util_benchmark EXTRA_LINK_LIBS
+                    ${ARROW_BENCHMARK_UTF8_UTIL_LINK_LIBS})
 add_arrow_benchmark(value_parsing_benchmark)
diff --git a/cpp/src/arrow/util/benchmark_main.cc b/cpp/src/arrow/util/benchmark_main.cc
deleted file mode 100644
index c9739af03fb53..0000000000000
--- a/cpp/src/arrow/util/benchmark_main.cc
+++ /dev/null
@@ -1,24 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "benchmark/benchmark.h"
-
-int main(int argc, char** argv) {
-  benchmark::Initialize(&argc, argv);
-  benchmark::RunSpecifiedBenchmarks();
-  return 0;
-}
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 9352ac5c4a938..0f28b0da82fed 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -92,20 +92,24 @@ set(SRC_FILES
     random_generator_holder.cc
     ${GANDIVA_PRECOMPILED_CC_PATH})
 
-set(GANDIVA_OPENSSL_LIBS OpenSSL::Crypto OpenSSL::SSL)
+set(GANDIVA_OPENSSL_LIBS ${ARROW_OPENSSL_LIBS})
 if(WIN32 AND NOT CMAKE_VERSION VERSION_LESS 3.18)
-  list(APPEND GANDIVA_OPENSSL_TARGETS OpenSSL::applink)
+  list(APPEND GANDIVA_OPENSSL_LIBS OpenSSL::applink)
 endif()
 
 set(GANDIVA_SHARED_LINK_LIBS arrow_shared LLVM::LLVM_HEADERS)
-set(GANDIVA_SHARED_PRIVATE_LINK_LIBS LLVM::LLVM_LIBS ${GANDIVA_OPENSSL_TARGETS}
+set(GANDIVA_SHARED_PRIVATE_LINK_LIBS LLVM::LLVM_LIBS ${GANDIVA_OPENSSL_LIBS}
                                      Boost::headers)
 set(GANDIVA_STATIC_LINK_LIBS
     arrow_static
     LLVM::LLVM_HEADERS
     LLVM::LLVM_LIBS
-    ${GANDIVA_OPENSSL_TARGETS}
+    ${GANDIVA_OPENSSL_LIBS}
     Boost::headers)
+if(ARROW_USE_XSIMD)
+  list(APPEND GANDIVA_SHARED_PRIVATE_LINK_LIBS ${ARROW_XSIMD})
+  list(APPEND GANDIVA_STATIC_LINK_LIBS ${ARROW_XSIMD})
+endif()
 if(ARROW_WITH_RE2)
   list(APPEND GANDIVA_SHARED_PRIVATE_LINK_LIBS re2::re2)
   list(APPEND GANDIVA_STATIC_LINK_LIBS re2::re2)
@@ -117,8 +121,7 @@ endif()
 
 if(ARROW_GANDIVA_STATIC_LIBSTDCPP AND (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX
                                       ))
-  set(GANDIVA_STATIC_LINK_LIBS ${GANDIVA_STATIC_LINK_LIBS} -static-libstdc++
-                               -static-libgcc)
+  list(APPEND GANDIVA_STATIC_LINK_LIBS -static-libstdc++ -static-libgcc)
 endif()
 
 # if (MSVC)
@@ -130,10 +133,8 @@ endif()
 #   endforeach()
 # endif()
 if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
-  set(GANDIVA_VERSION_SCRIPT_FLAGS
-      "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
-  set(GANDIVA_SHARED_LINK_FLAGS
-      "${GANDIVA_SHARED_LINK_FLAGS} ${GANDIVA_VERSION_SCRIPT_FLAGS}")
+  string(APPEND GANDIVA_SHARED_LINK_FLAGS
+         " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map")
 endif()
 
 add_arrow_lib(gandiva
@@ -148,7 +149,6 @@ add_arrow_lib(gandiva
               OUTPUTS
               GANDIVA_LIBRARIES
               DEPENDENCIES
-              arrow_dependencies
               precompiled
               SHARED_LINK_FLAGS
               ${GANDIVA_SHARED_LINK_FLAGS}
@@ -178,16 +178,18 @@ add_dependencies(gandiva ${GANDIVA_LIBRARIES})
 
 arrow_install_all_headers("gandiva")
 
-set(GANDIVA_STATIC_TEST_LINK_LIBS gandiva_static ${ARROW_TEST_LINK_LIBS})
-set(GANDIVA_SHARED_TEST_LINK_LIBS gandiva_shared ${ARROW_TEST_LINK_LIBS} LLVM::LLVM_LIBS)
+set(GANDIVA_STATIC_TEST_LINK_LIBS gandiva_static arrow_testing_static)
+set(GANDIVA_SHARED_TEST_LINK_LIBS gandiva_shared arrow_testing_shared LLVM::LLVM_LIBS)
 if(ARROW_WITH_UTF8PROC)
   list(APPEND GANDIVA_SHARED_TEST_LINK_LIBS utf8proc::utf8proc)
   list(APPEND GANDIVA_STATIC_TEST_LINK_LIBS utf8proc::utf8proc)
 endif()
 if(WIN32)
-  list(APPEND GANDIVA_STATIC_TEST_LINK_LIBS ${GANDIVA_OPENSSL_TARGETS})
-  list(APPEND GANDIVA_SHARED_TEST_LINK_LIBS ${GANDIVA_OPENSSL_TARGETS})
+  list(APPEND GANDIVA_STATIC_TEST_LINK_LIBS ${GANDIVA_OPENSSL_LIBS})
+  list(APPEND GANDIVA_SHARED_TEST_LINK_LIBS ${GANDIVA_OPENSSL_LIBS})
 endif()
+list(APPEND GANDIVA_STATIC_TEST_LINK_LIBS ${ARROW_GTEST_GMOCK} ${ARROW_GTEST_GTEST_MAIN})
+list(APPEND GANDIVA_SHARED_TEST_LINK_LIBS ${ARROW_GTEST_GMOCK} ${ARROW_GTEST_GTEST_MAIN})
 
 function(ADD_GANDIVA_TEST REL_TEST_NAME)
   set(options USE_STATIC_LINKING)
@@ -265,7 +267,9 @@ add_gandiva_test(internals-test
                  hash_utils_test.cc
                  gdv_function_stubs_test.cc
                  interval_holder_test.cc
-                 tests/test_util.cc)
+                 tests/test_util.cc
+                 EXTRA_LINK_LIBS
+                 re2::re2)
 
 add_subdirectory(precompiled)
 add_subdirectory(tests)
diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt
index e62a8e3d4a375..c092ff4fd011f 100644
--- a/cpp/src/gandiva/precompiled/CMakeLists.txt
+++ b/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -77,7 +77,8 @@ if(ARROW_BUILD_TESTS)
                  ../decimal_type_util.cc
                  ../decimal_xlarge.cc)
   target_include_directories(gandiva-precompiled-test PRIVATE ${CMAKE_SOURCE_DIR}/src)
-  target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS})
+  target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS}
+                                                         Boost::headers)
   target_compile_definitions(gandiva-precompiled-test PRIVATE GANDIVA_UNIT_TEST=1
                                                               ARROW_STATIC GANDIVA_STATIC)
   set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/gandiva-precompiled-test")
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 9fed75704b64c..8be5a88c33c55 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -118,7 +118,7 @@ endfunction()
 # TODO(wesm): Handling of ABI/SO version
 
 if(ARROW_BUILD_STATIC)
-  set(PARQUET_STATIC_LINK_LIBS arrow_static ${ARROW_STATIC_LINK_LIBS})
+  set(PARQUET_STATIC_LINK_LIBS arrow_static)
   set(PARQUET_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static)
 else()
   set(PARQUET_STATIC_INSTALL_INTERFACE_LIBS)
@@ -227,7 +227,11 @@ if(ARROW_HAVE_RUNTIME_AVX2)
   endif()
 endif()
 
+set(PARQUET_SHARED_LINK_LIBS)
+set(PARQUET_SHARED_PRIVATE_LINK_LIBS)
+
 if(PARQUET_REQUIRE_ENCRYPTION)
+  list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS ${ARROW_OPENSSL_LIBS})
   set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc
                    encryption/openssl_internal.cc)
   # Encryption key management
@@ -248,11 +252,11 @@ else()
 endif()
 
 if(NOT PARQUET_MINIMAL_DEPENDENCY)
-  set(PARQUET_SHARED_LINK_LIBS arrow_shared)
+  list(APPEND PARQUET_SHARED_LINK_LIBS arrow_shared)
 
   # These are libraries that we will link privately with parquet_shared (as they
   # do not need to be linked transitively by other linkers)
-  set(PARQUET_SHARED_PRIVATE_LINK_LIBS thrift::thrift)
+  list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS thrift::thrift)
 
   # Link publicly with parquet_static (because internal users need to
   # transitively link all dependencies)
@@ -261,6 +265,9 @@ if(NOT PARQUET_MINIMAL_DEPENDENCY)
     list(APPEND PARQUET_STATIC_INSTALL_INTERFACE_LIBS thrift::thrift)
   endif()
 endif()
+if(ARROW_WITH_OPENTELEMETRY)
+  list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS})
+endif()
 
 if(CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
   set(PARQUET_SHARED_LINK_FLAGS
@@ -300,6 +307,15 @@ if(WIN32 AND NOT (ARROW_TEST_LINKAGE STREQUAL "static"))
   list(APPEND PARQUET_LIBRARIES parquet_test_support)
 endif()
 
+if(ARROW_TESTING)
+  add_library(parquet_testing OBJECT test_util.cc)
+  # Even though this is still just an object library we still need to
+  # "link" our dependencies so that include paths are configured
+  # correctly
+  target_link_libraries(parquet_testing PUBLIC ${ARROW_GTEST_GMOCK})
+  list(APPEND PARQUET_TEST_LINK_LIBS parquet_testing)
+endif()
+
 if(NOT ARROW_BUILD_SHARED)
   set(PARQUET_BENCHMARK_LINK_OPTION STATIC_LINK_LIBS parquet_static
                                     ${PARQUET_TEST_LINK_LIBS} benchmark::benchmark_main)
@@ -354,8 +370,7 @@ add_parquet_test(internals-test
                  metadata_test.cc
                  page_index_test.cc
                  public_api_test.cc
-                 types_test.cc
-                 test_util.cc)
+                 types_test.cc)
 
 set_source_files_properties(public_api_test.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
                                                           SKIP_UNITY_BUILD_INCLUSION ON)
@@ -366,28 +381,22 @@ add_parquet_test(reader-test
                  level_conversion_test.cc
                  column_scanner_test.cc
                  reader_test.cc
-                 stream_reader_test.cc
-                 test_util.cc)
+                 stream_reader_test.cc)
 
 add_parquet_test(writer-test
                  SOURCES
                  column_writer_test.cc
                  file_serialize_test.cc
-                 stream_writer_test.cc
-                 test_util.cc)
+                 stream_writer_test.cc)
 
 add_parquet_test(arrow-test
                  SOURCES
                  arrow/arrow_reader_writer_test.cc
                  arrow/arrow_schema_test.cc
-                 arrow/arrow_statistics_test.cc
-                 test_util.cc)
+                 arrow/arrow_statistics_test.cc)
 
-add_parquet_test(arrow-internals-test
-                 SOURCES
-                 arrow/path_internal_test.cc
-                 arrow/reconstruct_internal_test.cc
-                 test_util.cc)
+add_parquet_test(arrow-internals-test SOURCES arrow/path_internal_test.cc
+                 arrow/reconstruct_internal_test.cc)
 
 if(PARQUET_REQUIRE_ENCRYPTION)
   add_parquet_test(encryption-test
@@ -395,8 +404,7 @@ if(PARQUET_REQUIRE_ENCRYPTION)
                    encryption/write_configurations_test.cc
                    encryption/read_configurations_test.cc
                    encryption/properties_test.cc
-                   encryption/test_encryption_util.cc
-                   test_util.cc)
+                   encryption/test_encryption_util.cc)
   add_parquet_test(encryption-key-management-test
                    SOURCES
                    encryption/key_management_test.cc
@@ -404,13 +412,12 @@ if(PARQUET_REQUIRE_ENCRYPTION)
                    encryption/key_wrapping_test.cc
                    encryption/test_encryption_util.cc
                    encryption/test_in_memory_kms.cc
-                   encryption/two_level_cache_with_expiration_test.cc
-                   test_util.cc)
+                   encryption/two_level_cache_with_expiration_test.cc)
 endif()
 
 # Those tests need to use static linking as they access thrift-generated
 # symbols which are not exported by parquet.dll on Windows (PARQUET-1420).
-add_parquet_test(file_deserialize_test SOURCES file_deserialize_test.cc test_util.cc)
+add_parquet_test(file_deserialize_test SOURCES file_deserialize_test.cc)
 add_parquet_test(schema_test)
 
 add_parquet_benchmark(bloom_filter_benchmark SOURCES bloom_filter_benchmark.cc
diff --git a/cpp/src/skyhook/CMakeLists.txt b/cpp/src/skyhook/CMakeLists.txt
index 67fddec135f99..1b41e340ebd12 100644
--- a/cpp/src/skyhook/CMakeLists.txt
+++ b/cpp/src/skyhook/CMakeLists.txt
@@ -43,8 +43,11 @@ add_arrow_lib(arrow_skyhook
               ARROW_SKYHOOK_CLIENT_LIBRARIES
               SHARED_LINK_LIBS
               ${ARROW_SKYHOOK_LINK_SHARED}
+              SHARED_PRIVATE_LINK_LIBS
+              arrow::flatbuffers
               STATIC_LINK_LIBS
-              ${ARROW_SKYHOOK_LINK_STATIC})
+              ${ARROW_SKYHOOK_LINK_STATIC}
+              arrow::flatbuffers)
 
 #  define the cls library
 add_arrow_lib(cls_skyhook
@@ -54,6 +57,8 @@ add_arrow_lib(cls_skyhook
               ARROW_SKYHOOK_CLS_LIBRARIES
               SHARED_LINK_LIBS
               ${ARROW_SKYHOOK_LINK_SHARED}
+              SHARED_PRIVATE_LINK_LIBS
+              arrow::flatbuffers
               STATIC_LINK_LIBS
               ${ARROW_SKYHOOK_LINK_STATIC})
 

From 5e0919f842d9ad31b8e39b5738681b34ef2c19fa Mon Sep 17 00:00:00 2001
From: ywgrit <63003600+ywgrit@users.noreply.github.com>
Date: Thu, 29 Feb 2024 20:55:14 +0800
Subject: [PATCH 446/570] GH-40152: [C++] Remove redundant invocation of
 BatchesFromTable (#40173)

### What changes are included in this PR?
This PR fixes a minor performances bug in TableSorter::SortInternal, which invokes BatchesFromTable to get record batches that have been built already.

### Are these changes tested?

Yes, by existing tests.

### Are there any user-facing changes?

No.

* GitHub Issue: #40152

Authored-by: xin.wang <xin.wang@openpie.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/compute/kernels/vector_sort.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index d3914173b65aa..d324c1658751d 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -647,8 +647,7 @@ class TableSorter {
 
   Status SortInternal() {
     // Sort each batch independently and merge to sorted indices.
-    ARROW_ASSIGN_OR_RAISE(RecordBatchVector batches, BatchesFromTable(table_));
-    const int64_t num_batches = static_cast<int64_t>(batches.size());
+    const int64_t num_batches = static_cast<int64_t>(batches_.size());
     if (num_batches == 0) {
       return Status::OK();
     }
@@ -659,7 +658,7 @@ class TableSorter {
     int64_t end_offset = 0;
     int64_t null_count = 0;
     for (int64_t i = 0; i < num_batches; ++i) {
-      const auto& batch = *batches[i];
+      const auto& batch = *batches_[i];
       end_offset += batch.num_rows();
       RadixRecordBatchSorter sorter(indices_begin_ + begin_offset,
                                     indices_begin_ + end_offset, batch, options_);

From d3ae78855c40c55c23b07e04ffe769f52d1aec30 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 29 Feb 2024 14:03:35 +0100
Subject: [PATCH 447/570] MINOR: [Python] Avoid failing pyarrow compute test on
 leap day (#40288)

### Rationale for this change

Avoid creation of invalid datetimes when it is Feb 29, avoiding to have to investigate every four years why a lot of tests are failing

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/tests/test_compute.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 4b58dc65bae9b..98cbd920b509b 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -3214,8 +3214,7 @@ def test_list_element():
 
 
 def test_count_distinct():
-    seed = datetime.datetime.now()
-    samples = [seed.replace(year=y) for y in range(1992, 2092)]
+    samples = [datetime.datetime(year=y, month=1, day=1) for y in range(1992, 2092)]
     arr = pa.array(samples, pa.timestamp("ns"))
     assert pc.count_distinct(arr) == pa.scalar(len(samples), type=pa.int64())
 

From 7e691d5078850b007f649059be65e5c016aab80b Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Fri, 1 Mar 2024 00:18:53 +0900
Subject: [PATCH 448/570] GH-40274: [C++] Add support for system glog 0.7
 (#40275)

### Rationale for this change

glog uses "glog" not "GLOG" as CMake package name. So we should follow it.

### What changes are included in this PR?

Use "glogAlt" for our glog CMake module name to distinct upstream's CMake package name.

### Are these changes tested?

No. We don't have CI with glog 0.7 yet.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #40274

Lead-authored-by: Sutou Kouhei <kou@clear-code.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .../{FindGLOG.cmake => FindglogAlt.cmake}     | 20 +++++++++++++------
 cpp/cmake_modules/ThirdpartyToolchain.cmake   | 18 ++++++++++++++---
 2 files changed, 29 insertions(+), 9 deletions(-)
 rename cpp/cmake_modules/{FindGLOG.cmake => FindglogAlt.cmake} (83%)

diff --git a/cpp/cmake_modules/FindGLOG.cmake b/cpp/cmake_modules/FindglogAlt.cmake
similarity index 83%
rename from cpp/cmake_modules/FindGLOG.cmake
rename to cpp/cmake_modules/FindglogAlt.cmake
index 62b235ee917ca..eb16636add958 100644
--- a/cpp/cmake_modules/FindGLOG.cmake
+++ b/cpp/cmake_modules/FindglogAlt.cmake
@@ -15,14 +15,22 @@
 #
 # Usage of this module as follows:
 #
-#  find_package(GLOG)
+#  find_package(glogAlt)
 
-find_package(glog CONFIG)
-if(glog_FOUND)
+if(glogAlt_FOUND)
   return()
 endif()
 
-if(GLOG_FOUND)
+set(find_package_args CONFIG)
+if(glogAlt_FIND_VERSION)
+  list(APPEND find_package_args ${glogAlt_FIND_VERSION})
+endif()
+if(glogAlt_FIND_QUIETLY)
+  list(APPEND find_package_args QUIET)
+endif()
+find_package(glog ${find_package_args})
+if(glog_FOUND)
+  set(glogAlt_FOUND TRUE)
   return()
 endif()
 
@@ -55,9 +63,9 @@ else()
             PATH_SUFFIXES ${ARROW_INCLUDE_PATH_SUFFIXES})
 endif()
 
-find_package_handle_standard_args(GLOG REQUIRED_VARS GLOG_INCLUDE_DIR GLOG_LIB)
+find_package_handle_standard_args(glogAlt REQUIRED_VARS GLOG_INCLUDE_DIR GLOG_LIB)
 
-if(GLOG_FOUND)
+if(glogAlt_FOUND)
   add_library(glog::glog UNKNOWN IMPORTED)
   set_target_properties(glog::glog
                         PROPERTIES IMPORTED_LOCATION "${GLOG_LIB}"
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 454d94d64bf8a..b6b6ac18ea8cb 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -51,7 +51,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES
     BZip2
     c-ares
     gflags
-    GLOG
+    glog
     google_cloud_cpp_storage
     gRPC
     GTest
@@ -111,6 +111,14 @@ if("${lz4_SOURCE}" STREQUAL "" AND NOT "${Lz4_SOURCE}" STREQUAL "")
   set(lz4_SOURCE ${Lz4_SOURCE})
 endif()
 
+# For backward compatibility. We use "GLOG_SOURCE" if "glog_SOURCE"
+# isn't specified and "GLOG_SOURCE" is specified.
+# We renamed "GLOG" dependency name to "glog" in 16.0.0 because
+# upstream uses "glog" not "GLOG" as package name.
+if("${glog_SOURCE}" STREQUAL "" AND NOT "${GLOG_SOURCE}" STREQUAL "")
+  set(glog_SOURCE ${GLOG_SOURCE})
+endif()
+
 # For backward compatibility. We use bundled jemalloc by default.
 if("${jemalloc_SOURCE}" STREQUAL "")
   set(jemalloc_SOURCE "BUNDLED")
@@ -180,7 +188,7 @@ macro(build_dependency DEPENDENCY_NAME)
     build_cares()
   elseif("${DEPENDENCY_NAME}" STREQUAL "gflags")
     build_gflags()
-  elseif("${DEPENDENCY_NAME}" STREQUAL "GLOG")
+  elseif("${DEPENDENCY_NAME}" STREQUAL "glog")
     build_glog()
   elseif("${DEPENDENCY_NAME}" STREQUAL "google_cloud_cpp_storage")
     build_google_cloud_cpp_storage()
@@ -1528,7 +1536,11 @@ macro(build_glog)
 endmacro()
 
 if(ARROW_USE_GLOG)
-  resolve_dependency(GLOG PC_PACKAGE_NAMES libglog)
+  resolve_dependency(glog
+                     HAVE_ALT
+                     TRUE
+                     PC_PACKAGE_NAMES
+                     libglog)
 endif()
 
 # ----------------------------------------------------------------------

From 16eb02649fde5c7ea9ba3607697a832f13d36cb3 Mon Sep 17 00:00:00 2001
From: Jinpeng <zjpzlz@163.com>
Date: Thu, 29 Feb 2024 08:06:58 -0800
Subject: [PATCH 449/570] PARQUET-2423: [C++][Parquet] Avoid allocating buffer
 object in RecordReader's SkipRecords (#39818)

### Rationale for this change

Currently each invocation of `SkipRecords()` for non-repeated fields would [create a new buffer](https://github.com/apache/arrow/blob/main/cpp/src/parquet/column_reader.cc#L1482) object to hold a decoded validity bitmpa. It is not useful as we are merely counting how many defined values are in the internal buffer, not reusing the validity bitmap.

### What changes are included in this PR?

* Remove temporary validity bitmap, just counting the definition levels at the max value instead. This improves performance when skipping non-repeated records.
* Add a new microbenchmark for reading and skipping alternatively from a RecordReader.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.

Lead-authored-by: jp0317 <zjpzlz@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Jinpeng <zjpzlz@163.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/parquet/column_reader.cc           | 14 ++----
 cpp/src/parquet/column_reader_benchmark.cc | 51 ++++++++++++++++++++++
 cpp/src/parquet/column_reader_test.cc      | 27 ++++++++++++
 3 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index ac4627d69c0f6..3fb224154c4ec 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -1478,17 +1478,9 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
     // We skipped the levels by incrementing 'levels_position_'. For values
     // we do not have a buffer, so we need to read them and throw them away.
     // First we need to figure out how many present/not-null values there are.
-    std::shared_ptr<::arrow::ResizableBuffer> valid_bits;
-    valid_bits = AllocateBuffer(this->pool_);
-    PARQUET_THROW_NOT_OK(valid_bits->Resize(bit_util::BytesForBits(skipped_records),
-                                            /*shrink_to_fit=*/true));
-    ValidityBitmapInputOutput validity_io;
-    validity_io.values_read_upper_bound = skipped_records;
-    validity_io.valid_bits = valid_bits->mutable_data();
-    validity_io.valid_bits_offset = 0;
-    DefLevelsToBitmap(def_levels() + start_levels_position, skipped_records,
-                      this->leaf_info_, &validity_io);
-    int64_t values_to_read = validity_io.values_read - validity_io.null_count;
+    int64_t values_to_read =
+        std::count(def_levels() + start_levels_position, def_levels() + levels_position_,
+                   this->max_def_level_);
 
     // Now that we have figured out number of values to read, we do not need
     // these levels anymore. We will remove these values from the buffer.
diff --git a/cpp/src/parquet/column_reader_benchmark.cc b/cpp/src/parquet/column_reader_benchmark.cc
index 61fe397cf1c30..93ab2dfa8c1ac 100644
--- a/cpp/src/parquet/column_reader_benchmark.cc
+++ b/cpp/src/parquet/column_reader_benchmark.cc
@@ -56,6 +56,7 @@ class BenchmarkHelper {
     for (const auto& page : pages_) {
       total_size_ += page->size();
     }
+    total_levels_ = static_cast<int64_t>(num_pages) * levels_per_page;
   }
 
   Int32Reader* ResetColumnReader() {
@@ -80,6 +81,8 @@ class BenchmarkHelper {
 
   int64_t total_size() const { return total_size_; }
 
+  int64_t total_levels() const { return total_levels_; }
+
  private:
   std::vector<std::shared_ptr<Page>> pages_;
   std::unique_ptr<ColumnDescriptor> descr_;
@@ -88,6 +91,7 @@ class BenchmarkHelper {
   // Reader for record reader benchmarks.
   std::shared_ptr<RecordReader> record_reader_;
   int64_t total_size_ = 0;
+  int64_t total_levels_ = 0;
 };
 
 // Benchmarks Skip for ColumnReader with the following parameters in order:
@@ -165,6 +169,7 @@ static void RecordReaderReadRecords(::benchmark::State& state) {
   }
 
   state.SetBytesProcessed(state.iterations() * helper.total_size());
+  state.SetItemsProcessed(state.iterations() * helper.total_levels());
 }
 
 // Benchmarks SkipRecords for RecordReader with the following parameters in order:
@@ -190,6 +195,40 @@ static void RecordReaderSkipRecords(::benchmark::State& state) {
   }
 
   state.SetBytesProcessed(state.iterations() * helper.total_size());
+  state.SetItemsProcessed(state.iterations() * helper.total_levels());
+}
+
+// Benchmarks ReadRecords and SkipRecords for RecordReader with the following parameters
+// in order:
+// - repetition: 0 for REQUIRED, 1 for OPTIONAL, 2 for REPEATED.
+// - batch_size: sets how many values to read/skip at each call.
+// - levels_per_page: sets how many levels to read/skip in total.
+static void RecordReaderReadAndSkipRecords(::benchmark::State& state) {
+  const auto repetition = static_cast<Repetition::type>(state.range(0));
+  const auto batch_size = static_cast<int64_t>(state.range(1));
+  const auto levels_per_page = static_cast<int>(state.range(2));
+
+  BenchmarkHelper helper(repetition, /*num_pages=*/16, levels_per_page);
+
+  // Vectors to read the values into.
+  for (auto _ : state) {
+    state.PauseTiming();
+    // read_dense_for_nullable should not matter for skip.
+    RecordReader* reader = helper.ResetRecordReader(/*read_dense_for_nullable=*/false);
+    int64_t records_read = -1;
+    int64_t records_skipped = -1;
+    state.ResumeTiming();
+    while (records_read != 0 && records_skipped != 0) {
+      // ReadRecords may buffer some levels which will be skipped by the following
+      // SkipRecords.
+      DoNotOptimize(records_read = reader->ReadRecords(batch_size));
+      DoNotOptimize(records_skipped = reader->SkipRecords(batch_size));
+      reader->Reset();
+    }
+  }
+
+  state.SetBytesProcessed(state.iterations() * helper.total_size());
+  state.SetItemsProcessed(state.iterations() * helper.total_levels());
 }
 
 BENCHMARK(ColumnReaderSkipInt32)
@@ -219,6 +258,18 @@ BENCHMARK(RecordReaderReadRecords)
     ->Args({2, 1000, true})
     ->Args({2, 1000, false});
 
+BENCHMARK(RecordReaderReadAndSkipRecords)
+    ->ArgNames({"Repetition", "BatchSize", "LevelsPerPage"})
+    ->Args({0, 10, 80000})
+    ->Args({0, 1000, 80000})
+    ->Args({0, 10000, 1000000})
+    ->Args({1, 10, 80000})
+    ->Args({1, 1000, 80000})
+    ->Args({1, 10000, 1000000})
+    ->Args({2, 10, 80000})
+    ->Args({2, 100, 80000})
+    ->Args({2, 10000, 1000000});
+
 void GenerateLevels(int level_repeats, int max_level, int num_levels,
                     std::vector<int16_t>* levels) {
   // Generate random levels
diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc
index e2cc24502af5d..a48573966a905 100644
--- a/cpp/src/parquet/column_reader_test.cc
+++ b/cpp/src/parquet/column_reader_test.cc
@@ -1607,6 +1607,33 @@ TEST_P(ByteArrayRecordReaderTest, ReadAndSkipOptional) {
   record_reader_->Reset();
 }
 
+// Test skipping buffered records when reading/skipping more than kMinLevelBatchSize
+// levels at a time.
+TEST_P(ByteArrayRecordReaderTest, ReadAndBatchSkipOptional) {
+  MakeRecordReader(/*levels_per_page=*/9000, /*num_pages=*/1);
+
+  // Read 100 records and buffer some records.
+  ASSERT_EQ(record_reader_->ReadRecords(/*num_records=*/100), 100);
+  CheckReadValues(0, 100);
+  record_reader_->Reset();
+
+  // Skip 3000 records. The buffered records will be skipped.
+  ASSERT_EQ(record_reader_->SkipRecords(/*num_records=*/3000), 3000);
+
+  // Read 900 records and buffer some records again.
+  ASSERT_EQ(record_reader_->ReadRecords(/*num_records=*/900), 900);
+  CheckReadValues(3100, 4000);
+  record_reader_->Reset();
+
+  // Skip 3000 records. The buffered records will be skipped.
+  ASSERT_EQ(record_reader_->SkipRecords(/*num_records=*/3000), 3000);
+
+  // Read 3000 records. Only 2000 records are left to be read.
+  ASSERT_EQ(record_reader_->ReadRecords(/*num_records=*/3000), 2000);
+  CheckReadValues(7000, 9000);
+  record_reader_->Reset();
+}
+
 // Tests reading and skipping an optional FLBA field.
 // The binary readers only differ in DecodeDense and DecodeSpaced functions, so
 // testing optional is sufficient in exercising those code paths.

From e295a7e6450aaa4e748216ac2cc50427e4381f62 Mon Sep 17 00:00:00 2001
From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com>
Date: Fri, 1 Mar 2024 00:24:58 +0800
Subject: [PATCH 450/570] GH-40126: [C++] Decimal types with different
 precisions and scales  bind failed in resolve type when call arithmetic
 function (#40223)

### Rationale for this change
Fix decimal types with different precisions and scales  bind failed in resolve type when call arithmetic function.

### What changes are included in this PR?
Add `IsNeedDispatchBest` function to check the decimal types and arithmetic functions, if success we will
go into the dispatchBest path and do the implicit cast correctly.

### Are these changes tested?
Yes

### Are there any user-facing changes?
Yes, user needn't do their own cast for decimal related logic.

* GitHub Issue: #40126

Authored-by: hugo.zhang <hugo.zhang@openpie.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/expression.cc           | 89 ++++++++++---------
 cpp/src/arrow/compute/expression_test.cc      | 14 +++
 .../compute/kernels/scalar_arithmetic.cc      | 25 ++++--
 3 files changed, 82 insertions(+), 46 deletions(-)

diff --git a/cpp/src/arrow/compute/expression.cc b/cpp/src/arrow/compute/expression.cc
index 8c59ad1df86f2..38312225dd5c5 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -536,60 +536,67 @@ Result<Expression> BindNonRecursive(Expression::Call call, bool insert_implicit_
   std::vector<TypeHolder> types = GetTypes(call.arguments);
   ARROW_ASSIGN_OR_RAISE(call.function, GetFunction(call, exec_context));
 
+  auto FinishBind = [&] {
+    compute::KernelContext kernel_context(exec_context, call.kernel);
+    if (call.kernel->init) {
+      const FunctionOptions* options =
+          call.options ? call.options.get() : call.function->default_options();
+      ARROW_ASSIGN_OR_RAISE(
+          call.kernel_state,
+          call.kernel->init(&kernel_context, {call.kernel, types, options}));
+
+      kernel_context.SetState(call.kernel_state.get());
+    }
+
+    ARROW_ASSIGN_OR_RAISE(
+        call.type, call.kernel->signature->out_type().Resolve(&kernel_context, types));
+    return Status::OK();
+  };
+
   // First try and bind exactly
   Result<const Kernel*> maybe_exact_match = call.function->DispatchExact(types);
   if (maybe_exact_match.ok()) {
     call.kernel = *maybe_exact_match;
-  } else {
-    if (!insert_implicit_casts) {
-      return maybe_exact_match.status();
+    if (FinishBind().ok()) {
+      return Expression(std::move(call));
     }
-    // If exact binding fails, and we are allowed to cast, then prefer casting literals
-    // first.  Since DispatchBest generally prefers up-casting the best way to do this is
-    // first down-cast the literals as much as possible
-    types = GetTypesWithSmallestLiteralRepresentation(call.arguments);
-    ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchBest(&types));
-
-    for (size_t i = 0; i < types.size(); ++i) {
-      if (types[i] == call.arguments[i].type()) continue;
-
-      if (const Datum* lit = call.arguments[i].literal()) {
-        ARROW_ASSIGN_OR_RAISE(Datum new_lit,
-                              compute::Cast(*lit, types[i].GetSharedPtr()));
-        call.arguments[i] = literal(std::move(new_lit));
-        continue;
-      }
+  }
 
-      // construct an implicit cast Expression with which to replace this argument
-      Expression::Call implicit_cast;
-      implicit_cast.function_name = "cast";
-      implicit_cast.arguments = {std::move(call.arguments[i])};
+  if (!insert_implicit_casts) {
+    return maybe_exact_match.status();
+  }
 
-      // TODO(wesm): Use TypeHolder in options
-      implicit_cast.options = std::make_shared<compute::CastOptions>(
-          compute::CastOptions::Safe(types[i].GetSharedPtr()));
+  // If exact binding fails, and we are allowed to cast, then prefer casting literals
+  // first.  Since DispatchBest generally prefers up-casting the best way to do this is
+  // first down-cast the literals as much as possible
+  types = GetTypesWithSmallestLiteralRepresentation(call.arguments);
+  ARROW_ASSIGN_OR_RAISE(call.kernel, call.function->DispatchBest(&types));
 
-      ARROW_ASSIGN_OR_RAISE(
-          call.arguments[i],
-          BindNonRecursive(std::move(implicit_cast),
-                           /*insert_implicit_casts=*/false, exec_context));
+  for (size_t i = 0; i < types.size(); ++i) {
+    if (types[i] == call.arguments[i].type()) continue;
+
+    if (const Datum* lit = call.arguments[i].literal()) {
+      ARROW_ASSIGN_OR_RAISE(Datum new_lit, compute::Cast(*lit, types[i].GetSharedPtr()));
+      call.arguments[i] = literal(std::move(new_lit));
+      continue;
     }
-  }
 
-  compute::KernelContext kernel_context(exec_context, call.kernel);
-  if (call.kernel->init) {
-    const FunctionOptions* options =
-        call.options ? call.options.get() : call.function->default_options();
-    ARROW_ASSIGN_OR_RAISE(
-        call.kernel_state,
-        call.kernel->init(&kernel_context, {call.kernel, types, options}));
+    // construct an implicit cast Expression with which to replace this argument
+    Expression::Call implicit_cast;
+    implicit_cast.function_name = "cast";
+    implicit_cast.arguments = {std::move(call.arguments[i])};
 
-    kernel_context.SetState(call.kernel_state.get());
-  }
+    // TODO(wesm): Use TypeHolder in options
+    implicit_cast.options = std::make_shared<compute::CastOptions>(
+        compute::CastOptions::Safe(types[i].GetSharedPtr()));
 
-  ARROW_ASSIGN_OR_RAISE(
-      call.type, call.kernel->signature->out_type().Resolve(&kernel_context, types));
+    ARROW_ASSIGN_OR_RAISE(
+        call.arguments[i],
+        BindNonRecursive(std::move(implicit_cast),
+                         /*insert_implicit_casts=*/false, exec_context));
+  }
 
+  RETURN_NOT_OK(FinishBind());
   return Expression(std::move(call));
 }
 
diff --git a/cpp/src/arrow/compute/expression_test.cc b/cpp/src/arrow/compute/expression_test.cc
index d33c348cd77da..38f8183dabcba 100644
--- a/cpp/src/arrow/compute/expression_test.cc
+++ b/cpp/src/arrow/compute/expression_test.cc
@@ -604,6 +604,20 @@ TEST(Expression, BindCall) {
                 add(cast(field_ref("i32"), float32()), literal(3.5F)));
 }
 
+TEST(Expression, BindWithDecimalArithmeticOps) {
+  for (std::string arith_op : {"add", "subtract", "multiply", "divide"}) {
+    auto expr = call(arith_op, {field_ref("d1"), field_ref("d2")});
+    EXPECT_FALSE(expr.IsBound());
+
+    static const std::vector<std::pair<int, int>> scales = {{3, 9}, {6, 6}, {9, 3}};
+    for (auto s : scales) {
+      auto schema = arrow::schema(
+          {field("d1", decimal256(30, s.first)), field("d2", decimal256(20, s.second))});
+      ExpectBindsTo(expr, no_change, &expr, *schema);
+    }
+  }
+}
+
 TEST(Expression, BindWithImplicitCasts) {
   for (auto cmp : {equal, not_equal, less, less_equal, greater, greater_equal}) {
     // cast arguments to common numeric type
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 44f5fea79078a..efd25a8a20c80 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -499,8 +499,9 @@ Result<TypeHolder> ResolveDecimalBinaryOperationOutput(
   DCHECK_EQ(left_type.id(), right_type.id());
 
   int32_t precision, scale;
-  std::tie(precision, scale) = getter(left_type.precision(), left_type.scale(),
-                                      right_type.precision(), right_type.scale());
+  ARROW_ASSIGN_OR_RAISE(std::tie(precision, scale),
+                        ToResult(getter(left_type.precision(), left_type.scale(),
+                                        right_type.precision(), right_type.scale())));
   ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type.id(), precision, scale));
   return std::move(type);
 }
@@ -508,7 +509,13 @@ Result<TypeHolder> ResolveDecimalBinaryOperationOutput(
 Result<TypeHolder> ResolveDecimalAdditionOrSubtractionOutput(
     KernelContext*, const std::vector<TypeHolder>& types) {
   return ResolveDecimalBinaryOperationOutput(
-      types, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+      types,
+      [](int32_t p1, int32_t s1, int32_t p2,
+         int32_t s2) -> Result<std::pair<int32_t, int32_t>> {
+        if (s1 != s2) {
+          return Status::Invalid("Addition or subtraction of two decimal ",
+                                 "types scale1 != scale2. (", s1, s2, ").");
+        }
         DCHECK_EQ(s1, s2);
         const int32_t scale = s1;
         const int32_t precision = std::max(p1 - s1, p2 - s2) + scale + 1;
@@ -519,7 +526,9 @@ Result<TypeHolder> ResolveDecimalAdditionOrSubtractionOutput(
 Result<TypeHolder> ResolveDecimalMultiplicationOutput(
     KernelContext*, const std::vector<TypeHolder>& types) {
   return ResolveDecimalBinaryOperationOutput(
-      types, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+      types,
+      [](int32_t p1, int32_t s1, int32_t p2,
+         int32_t s2) -> Result<std::pair<int32_t, int32_t>> {
         const int32_t scale = s1 + s2;
         const int32_t precision = p1 + p2 + 1;
         return std::make_pair(precision, scale);
@@ -529,7 +538,13 @@ Result<TypeHolder> ResolveDecimalMultiplicationOutput(
 Result<TypeHolder> ResolveDecimalDivisionOutput(KernelContext*,
                                                 const std::vector<TypeHolder>& types) {
   return ResolveDecimalBinaryOperationOutput(
-      types, [](int32_t p1, int32_t s1, int32_t p2, int32_t s2) {
+      types,
+      [](int32_t p1, int32_t s1, int32_t p2,
+         int32_t s2) -> Result<std::pair<int32_t, int32_t>> {
+        if (s1 < s2) {
+          return Status::Invalid("Division of two decimal types scale1 < scale2. ", "(",
+                                 s1, s2, ").");
+        }
         DCHECK_GE(s1, s2);
         const int32_t scale = s1 - s2;
         const int32_t precision = p1;

From fc48b8963d6486ac129a7c1365a35d02b28876e8 Mon Sep 17 00:00:00 2001
From: Gabriel Tomitsuka <g@gtomitsuka.com>
Date: Thu, 29 Feb 2024 19:00:33 +0100
Subject: [PATCH 451/570] GH-40261: [Go] Don't export array functions with
 unexposed return types (#40272)

### Rationale for this change
Exposing functions that return unexposed types in Go is considered poor practice. This approach complicates type handling, making it challenging for developers to utilize these return values in their functions. Developers must undertake the cumbersome process of identifying the applicable interface for the return type, a task that often results in significant time consumption and leads to confusing, non-informative types being suggested by godocs and IDEs.

Consider the difficulty in discerning the relationship between two return types, `*simpleTable` and `arrow.Table`, at a glance. It is not immediately clear whether they implement the same interface or are distinct entities:

<img width="1161" alt="image" src="https://github.com/apache/arrow/assets/10295671/463cd8a7-47f3-44ce-9871-2885025e5a5c">
<img width="1151" alt="image" src="https://github.com/apache/arrow/assets/10295671/4ffc049c-fb88-43fb-bd57-fc1ad5d4dc68">

Returning exposed interfaces is already commonly done in the Arrow package to ensure API consistency and usability, as evidenced in methods like `AddColumn() -> arrow.Table` and `RecordFromJSON() -> arrow.Record`. Extending this to all functions, including `NewTable`, `NewTableFromSlice`, and `NewRecord`, will standardize the codebase in line with these principles.

The use of `*simpleTable` and similar types is restricted in explicit type declarations and function signatures. Therefore, transitioning to exposed return types is a backward-compatible improvement that will lead to enhanced documentation and better support in IDEs for Arrow users.

### What changes are included in this PR?
* Change return signature of functions using the following unexposed return types:
    * `*simpleTable` --> `arrow.Table`
    * `*simpleRecord` --> `arrow.Record`
    * `*simpleRecords` --> `array.RecordReader`
* Add the function `String()`, which is implemented by `*simpleTable`, to the `arrow.Table` interface. `*simpleTable` is the only implementation of `arrow.Table`, so this requires no further changes.

### Are these changes tested?
Yes. The relevant code is already covered by tests in `arrow/array/table_test.go` (`TestTable`) and `arrow/array/record_test.go` (`TestRecord`, `TestRecordReader`).

All tests pass (subpackages without tests omitted):
```bash
ok      github.com/apache/arrow/go/v16/arrow    0.398s
ok      github.com/apache/arrow/go/v16/arrow/array      0.600s
ok      github.com/apache/arrow/go/v16/arrow/arrio      1.544s
ok      github.com/apache/arrow/go/v16/arrow/avro       0.629s
ok      github.com/apache/arrow/go/v16/arrow/bitutil    1.001s
ok      github.com/apache/arrow/go/v16/arrow/compute    2.147s
ok      github.com/apache/arrow/go/v16/arrow/compute/exec       0.813s
ok      github.com/apache/arrow/go/v16/arrow/compute/exprs      1.900s
ok      github.com/apache/arrow/go/v16/arrow/csv        0.288s
ok      github.com/apache/arrow/go/v16/arrow/decimal128 1.356s
ok      github.com/apache/arrow/go/v16/arrow/decimal256 1.718s
ok      github.com/apache/arrow/go/v16/arrow/encoded    0.493s
ok      github.com/apache/arrow/go/v16/arrow/flight     2.845s
ok      github.com/apache/arrow/go/v16/arrow/flight/flightsql   0.512s
ok      github.com/apache/arrow/go/v16/arrow/flight/flightsql/driver    7.386s
ok      github.com/apache/arrow/go/v16/arrow/float16    0.570s
ok      github.com/apache/arrow/go/v16/arrow/internal/arrjson   0.419s
ok      github.com/apache/arrow/go/v16/arrow/internal/dictutils 0.407s
ok      github.com/apache/arrow/go/v16/arrow/internal/testing/tools     0.247s
ok      github.com/apache/arrow/go/v16/arrow/ipc        1.984s
ok      github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-cat  0.530s
ok      github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-file-to-stream       1.267s
ok      github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-json-integration-test        1.074s
ok      github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-ls   1.263s
ok      github.com/apache/arrow/go/v16/arrow/ipc/cmd/arrow-stream-to-file       0.935s
ok      github.com/apache/arrow/go/v16/arrow/math       0.616s
ok      github.com/apache/arrow/go/v16/arrow/memory     1.275s
ok      github.com/apache/arrow/go/v16/arrow/memory/mallocator  0.348s
ok      github.com/apache/arrow/go/v16/arrow/scalar     0.484s
ok      github.com/apache/arrow/go/v16/arrow/tensor     0.418s
ok      github.com/apache/arrow/go/v16/arrow/util       0.621s
```

### Are there any user-facing changes?
This update will not affect users directly since the modified interfaces were previously unexposed and, consequently, inaccessible from user code. This change is aimed at improving the developer experience by simplifying type usage and enhancing documentation clarity.

* GitHub Issue: #40261

Authored-by: gtomitsuka <g@gtomitsuka.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/arrow/array/record.go | 4 ++--
 go/arrow/array/table.go  | 6 +++---
 go/arrow/table.go        | 2 ++
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/go/arrow/array/record.go b/go/arrow/array/record.go
index b4a03410c4fbf..6a45880181043 100644
--- a/go/arrow/array/record.go
+++ b/go/arrow/array/record.go
@@ -50,7 +50,7 @@ type simpleRecords struct {
 }
 
 // NewRecordReader returns a simple iterator over the given slice of records.
-func NewRecordReader(schema *arrow.Schema, recs []arrow.Record) (*simpleRecords, error) {
+func NewRecordReader(schema *arrow.Schema, recs []arrow.Record) (RecordReader, error) {
 	rs := &simpleRecords{
 		refCount: 1,
 		schema:   schema,
@@ -124,7 +124,7 @@ type simpleRecord struct {
 //
 // NewRecord panics if the columns and schema are inconsistent.
 // NewRecord panics if rows is larger than the height of the columns.
-func NewRecord(schema *arrow.Schema, cols []arrow.Array, nrows int64) *simpleRecord {
+func NewRecord(schema *arrow.Schema, cols []arrow.Array, nrows int64) arrow.Record {
 	rec := &simpleRecord{
 		refCount: 1,
 		schema:   schema,
diff --git a/go/arrow/array/table.go b/go/arrow/array/table.go
index 197179b5ca4c3..2e7bb72d77855 100644
--- a/go/arrow/array/table.go
+++ b/go/arrow/array/table.go
@@ -99,7 +99,7 @@ type simpleTable struct {
 //
 // NewTable panics if the columns and schema are inconsistent.
 // NewTable panics if rows is larger than the height of the columns.
-func NewTable(schema *arrow.Schema, cols []arrow.Column, rows int64) *simpleTable {
+func NewTable(schema *arrow.Schema, cols []arrow.Column, rows int64) arrow.Table {
 	tbl := simpleTable{
 		refCount: 1,
 		rows:     rows,
@@ -136,7 +136,7 @@ func NewTable(schema *arrow.Schema, cols []arrow.Column, rows int64) *simpleTabl
 //   - len(schema.Fields) != len(data)
 //   - the total length of each column's array slice (ie: number of rows
 //     in the column) aren't the same for all columns.
-func NewTableFromSlice(schema *arrow.Schema, data [][]arrow.Array) *simpleTable {
+func NewTableFromSlice(schema *arrow.Schema, data [][]arrow.Array) arrow.Table {
 	if len(data) != schema.NumFields() {
 		panic("array/table: mismatch in number of columns and data for creating a table")
 	}
@@ -175,7 +175,7 @@ func NewTableFromSlice(schema *arrow.Schema, data [][]arrow.Array) *simpleTable
 // NewTableFromRecords returns a new basic, non-lazy in-memory table.
 //
 // NewTableFromRecords panics if the records and schema are inconsistent.
-func NewTableFromRecords(schema *arrow.Schema, recs []arrow.Record) *simpleTable {
+func NewTableFromRecords(schema *arrow.Schema, recs []arrow.Record) arrow.Table {
 	arrs := make([]arrow.Array, len(recs))
 	cols := make([]arrow.Column, schema.NumFields())
 
diff --git a/go/arrow/table.go b/go/arrow/table.go
index f0728108d94b9..15fd3e5bcf3f9 100644
--- a/go/arrow/table.go
+++ b/go/arrow/table.go
@@ -39,6 +39,8 @@ type Table interface {
 
 	Retain()
 	Release()
+
+	fmt.Stringer
 }
 
 // Column is an immutable column data structure consisting of

From 5c4869d453717d8946549cac408998759c5084d9 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 29 Feb 2024 19:14:13 +0100
Subject: [PATCH 452/570] GH-40153: [Python] Avoid using np.take in
 Array.to_numpy() (#40295)

### Rationale for this change

`Array.to_numpy` calls `np.take` to linearize dictionary arrays. This fails on 32-bit Numpy builds because we give Numpy 64-bit indices and Numpy would like to downcast them.

### What changes are included in this PR?

Avoid calling `np.take`, instead using our own dictionary decoding routine.

### Are these changes tested?

Yes. A test failure is fixed on 32-bit.

### Are there any user-facing changes?

No.
* GitHub Issue: #40153

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/array.pxi                           | 5 +----
 python/pyarrow/src/arrow/python/arrow_to_pandas.cc | 2 ++
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index e1bf494920566..7d9b65c77d25a 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1573,7 +1573,7 @@ cdef class Array(_PandasConvertible):
         # decoding the dictionary will make sure nulls are correctly handled.
         # Decoding a dictionary does imply a copy by the way,
         # so it can't be done if the user requested a zero_copy.
-        c_options.decode_dictionaries = not zero_copy_only
+        c_options.decode_dictionaries = True
         c_options.zero_copy_only = zero_copy_only
         c_options.to_numpy = True
 
@@ -1585,9 +1585,6 @@ cdef class Array(_PandasConvertible):
         # always convert to numpy array without pandas dependency
         array = PyObject_to_object(out)
 
-        if isinstance(array, dict):
-            array = np.take(array['dictionary'], array['indices'])
-
         if writable and not array.flags.writeable:
             # if the conversion already needed to a copy, writeable is True
             array = array.copy()
diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index 2115cd8015cac..cb9cbe5b930e7 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -2515,6 +2515,8 @@ Status ConvertChunkedArrayToPandas(const PandasOptions& options,
                                    std::shared_ptr<ChunkedArray> arr, PyObject* py_ref,
                                    PyObject** out) {
   if (options.decode_dictionaries && arr->type()->id() == Type::DICTIONARY) {
+    // XXX we should return an error as below if options.zero_copy_only
+    // is true, but that would break compatibility with existing tests.
     const auto& dense_type =
         checked_cast<const DictionaryType&>(*arr->type()).value_type();
     RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &arr));

From 214378b522a36fbf6010e3d4f5470abaca7bf92e Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Thu, 29 Feb 2024 16:30:05 -0300
Subject: [PATCH 453/570] GH-40280: [C++] Specialize ResolvedChunk::Value on
 value-specific types instead of entire class (#40281)

### Rationale for this change

Less template-specialization without any loss in efficiency.

### What changes are included in this PR?

 - `ChunkedResolver` tweak
 - Explicitly declare copy constructors of `ChunkedResolver`
     - at the moment they are necessary because `ChunkedArrayResolver` is copied in compute kernel code
 - Make `ChunkedArrayResolver` re-use `ChunkResolver` via composition instead of inheritance
     - This will allow the `ChunkResolver` API to evolve in ways that might not make sense if it's a sub-class of `ChunkedArrayResolver`
 - Use `std::enable_if` instead of duplicating the `ResolvedChunk` implementation and template-specializing
 - Only specialize `ResolvedChunk::Value` on value-type-specific types preserving the same type-safety checks (static and runtime)

### Are these changes tested?

Yes, by existing tests.

### Are there any user-facing changes?

No, because these are API are under the internal namespace at the moment even though there are plans to make it public. These changes are preparation for that if we end up making them public.
* GitHub Issue: #40280

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/chunk_resolver.cc               | 25 +++++++--
 cpp/src/arrow/chunk_resolver.h                | 36 ++++++-------
 .../arrow/compute/kernels/chunked_internal.h  | 53 ++++++++-----------
 cpp/src/arrow/compute/kernels/vector_rank.cc  |  4 +-
 .../arrow/compute/kernels/vector_select_k.cc  | 14 ++---
 cpp/src/arrow/compute/kernels/vector_sort.cc  | 47 ++++++++--------
 .../compute/kernels/vector_sort_internal.h    | 38 ++++++-------
 7 files changed, 106 insertions(+), 111 deletions(-)

diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc
index 4a1ba6d0a329c..29bccb52658f8 100644
--- a/cpp/src/arrow/chunk_resolver.cc
+++ b/cpp/src/arrow/chunk_resolver.cc
@@ -56,14 +56,33 @@ inline std::vector<int64_t> MakeChunksOffsets(const std::vector<T>& chunks) {
 }
 }  // namespace
 
-ChunkResolver::ChunkResolver(const ArrayVector& chunks)
+ChunkResolver::ChunkResolver(const ArrayVector& chunks) noexcept
     : offsets_(MakeChunksOffsets(chunks)), cached_chunk_(0) {}
 
-ChunkResolver::ChunkResolver(const std::vector<const Array*>& chunks)
+ChunkResolver::ChunkResolver(const std::vector<const Array*>& chunks) noexcept
     : offsets_(MakeChunksOffsets(chunks)), cached_chunk_(0) {}
 
-ChunkResolver::ChunkResolver(const RecordBatchVector& batches)
+ChunkResolver::ChunkResolver(const RecordBatchVector& batches) noexcept
     : offsets_(MakeChunksOffsets(batches)), cached_chunk_(0) {}
 
+ChunkResolver::ChunkResolver(ChunkResolver&& other) noexcept
+    : offsets_(std::move(other.offsets_)),
+      cached_chunk_(other.cached_chunk_.load(std::memory_order_relaxed)) {}
+
+ChunkResolver& ChunkResolver::operator=(ChunkResolver&& other) noexcept {
+  offsets_ = std::move(other.offsets_);
+  cached_chunk_.store(other.cached_chunk_.load(std::memory_order_relaxed));
+  return *this;
+}
+
+ChunkResolver::ChunkResolver(const ChunkResolver& other) noexcept
+    : offsets_(other.offsets_), cached_chunk_(0) {}
+
+ChunkResolver& ChunkResolver::operator=(const ChunkResolver& other) noexcept {
+  offsets_ = other.offsets_;
+  cached_chunk_.store(0, std::memory_order_relaxed);
+  return *this;
+}
+
 }  // namespace internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h
index d3ae315568d08..c5dad1a17b18e 100644
--- a/cpp/src/arrow/chunk_resolver.h
+++ b/cpp/src/arrow/chunk_resolver.h
@@ -32,12 +32,12 @@ struct ChunkLocation {
   ///
   /// The value is always in the range `[0, chunks.size()]`. `chunks.size()` is used
   /// to represent out-of-bounds locations.
-  int64_t chunk_index;
+  int64_t chunk_index = 0;
 
   /// \brief Index of the value in the chunk
   ///
   /// The value is undefined if chunk_index >= chunks.size()
-  int64_t index_in_chunk;
+  int64_t index_in_chunk = 0;
 };
 
 /// \brief An utility that incrementally resolves logical indices into
@@ -56,19 +56,15 @@ struct ARROW_EXPORT ChunkResolver {
   mutable std::atomic<int64_t> cached_chunk_;
 
  public:
-  explicit ChunkResolver(const ArrayVector& chunks);
-  explicit ChunkResolver(const std::vector<const Array*>& chunks);
-  explicit ChunkResolver(const RecordBatchVector& batches);
-
-  ChunkResolver(ChunkResolver&& other) noexcept
-      : offsets_(std::move(other.offsets_)),
-        cached_chunk_(other.cached_chunk_.load(std::memory_order_relaxed)) {}
-
-  ChunkResolver& operator=(ChunkResolver&& other) {
-    offsets_ = std::move(other.offsets_);
-    cached_chunk_.store(other.cached_chunk_.load(std::memory_order_relaxed));
-    return *this;
-  }
+  explicit ChunkResolver(const ArrayVector& chunks) noexcept;
+  explicit ChunkResolver(const std::vector<const Array*>& chunks) noexcept;
+  explicit ChunkResolver(const RecordBatchVector& batches) noexcept;
+
+  ChunkResolver(ChunkResolver&& other) noexcept;
+  ChunkResolver& operator=(ChunkResolver&& other) noexcept;
+
+  ChunkResolver(const ChunkResolver& other) noexcept;
+  ChunkResolver& operator=(const ChunkResolver& other) noexcept;
 
   /// \brief Resolve a logical index to a ChunkLocation.
   ///
@@ -96,16 +92,16 @@ struct ARROW_EXPORT ChunkResolver {
   /// \pre index >= 0
   /// \post location.chunk_index in [0, chunks.size()]
   /// \param index The logical index to resolve
-  /// \param cached_chunk_index 0 or the chunk_index of the last ChunkLocation
-  /// returned by this ChunkResolver.
+  /// \param hint ChunkLocation{} or the last ChunkLocation returned by
+  ///             this ChunkResolver.
   /// \return ChunkLocation with a valid chunk_index if index is within
   ///         bounds, or with chunk_index == chunks.size() if logical index is
   ///         `>= chunked_array.length()`.
   inline ChunkLocation ResolveWithChunkIndexHint(int64_t index,
-                                                 int64_t cached_chunk_index) const {
-    assert(cached_chunk_index < static_cast<int64_t>(offsets_.size()));
+                                                 ChunkLocation hint) const {
+    assert(hint.chunk_index < static_cast<int64_t>(offsets_.size()));
     const auto chunk_index =
-        ResolveChunkIndex</*StoreCachedChunk=*/false>(index, cached_chunk_index);
+        ResolveChunkIndex</*StoreCachedChunk=*/false>(index, hint.chunk_index);
     return {chunk_index, index - offsets_[chunk_index]};
   }
 
diff --git a/cpp/src/arrow/compute/kernels/chunked_internal.h b/cpp/src/arrow/compute/kernels/chunked_internal.h
index 69f439fccf026..2b72e0ab3109e 100644
--- a/cpp/src/arrow/compute/kernels/chunked_internal.h
+++ b/cpp/src/arrow/compute/kernels/chunked_internal.h
@@ -31,26 +31,7 @@ namespace compute {
 namespace internal {
 
 // The target chunk in a chunked array.
-template <typename ArrayType>
 struct ResolvedChunk {
-  using ViewType = GetViewType<typename ArrayType::TypeClass>;
-  using LogicalValueType = typename ViewType::T;
-
-  // The target array in chunked array.
-  const ArrayType* array;
-  // The index in the target array.
-  const int64_t index;
-
-  ResolvedChunk(const ArrayType* array, int64_t index) : array(array), index(index) {}
-
-  bool IsNull() const { return array->IsNull(index); }
-
-  LogicalValueType Value() const { return ViewType::LogicalValue(array->GetView(index)); }
-};
-
-// ResolvedChunk specialization for untyped arrays when all is needed is null lookup
-template <>
-struct ResolvedChunk<Array> {
   // The target array in chunked array.
   const Array* array;
   // The index in the target array.
@@ -58,24 +39,36 @@ struct ResolvedChunk<Array> {
 
   ResolvedChunk(const Array* array, int64_t index) : array(array), index(index) {}
 
+ public:
   bool IsNull() const { return array->IsNull(index); }
+
+  template <typename ArrowType, typename ViewType = GetViewType<ArrowType>>
+  typename ViewType::T Value() const {
+    using LogicalArrayType = typename TypeTraits<ArrowType>::ArrayType;
+    auto* typed_array = checked_cast<const LogicalArrayType*>(array);
+    return ViewType::LogicalValue(typed_array->GetView(index));
+  }
 };
 
-struct ChunkedArrayResolver : protected ::arrow::internal::ChunkResolver {
-  ChunkedArrayResolver(const ChunkedArrayResolver& other)
-      : ::arrow::internal::ChunkResolver(other.chunks_), chunks_(other.chunks_) {}
+class ChunkedArrayResolver {
+ private:
+  ::arrow::internal::ChunkResolver resolver_;
+  std::vector<const Array*> chunks_;
 
+ public:
   explicit ChunkedArrayResolver(const std::vector<const Array*>& chunks)
-      : ::arrow::internal::ChunkResolver(chunks), chunks_(chunks) {}
+      : resolver_(chunks), chunks_(chunks) {}
 
-  template <typename ArrayType>
-  ResolvedChunk<ArrayType> Resolve(int64_t index) const {
-    const auto loc = ::arrow::internal::ChunkResolver::Resolve(index);
-    return {checked_cast<const ArrayType*>(chunks_[loc.chunk_index]), loc.index_in_chunk};
-  }
+  ChunkedArrayResolver(ChunkedArrayResolver&& other) = default;
+  ChunkedArrayResolver& operator=(ChunkedArrayResolver&& other) = default;
+
+  ChunkedArrayResolver(const ChunkedArrayResolver& other) = default;
+  ChunkedArrayResolver& operator=(const ChunkedArrayResolver& other) = default;
 
- protected:
-  const std::vector<const Array*> chunks_;
+  ResolvedChunk Resolve(int64_t index) const {
+    const auto loc = resolver_.Resolve(index);
+    return {chunks_[loc.chunk_index], loc.index_in_chunk};
+  }
 };
 
 inline std::vector<const Array*> GetArrayPointers(const ArrayVector& arrays) {
diff --git a/cpp/src/arrow/compute/kernels/vector_rank.cc b/cpp/src/arrow/compute/kernels/vector_rank.cc
index 0cea7246e516c..c4e52701411fd 100644
--- a/cpp/src/arrow/compute/kernels/vector_rank.cc
+++ b/cpp/src/arrow/compute/kernels/vector_rank.cc
@@ -227,8 +227,6 @@ class Ranker<ChunkedArray> : public RankerMixin<ChunkedArray, Ranker<ChunkedArra
 
   template <typename InType>
   Status RankInternal() {
-    using ArrayType = typename TypeTraits<InType>::ArrayType;
-
     if (physical_chunks_.empty()) {
       return Status::OK();
     }
@@ -240,7 +238,7 @@ class Ranker<ChunkedArray> : public RankerMixin<ChunkedArray, Ranker<ChunkedArra
 
     const auto arrays = GetArrayPointers(physical_chunks_);
     auto value_selector = [resolver = ChunkedArrayResolver(arrays)](int64_t index) {
-      return resolver.Resolve<ArrayType>(index).Value();
+      return resolver.Resolve(index).Value<InType>();
     };
     ARROW_ASSIGN_OR_RAISE(*output_, CreateRankings(ctx_, sorted, null_placement_,
                                                    tiebreaker_, value_selector));
diff --git a/cpp/src/arrow/compute/kernels/vector_select_k.cc b/cpp/src/arrow/compute/kernels/vector_select_k.cc
index 97996e6d52cc0..4ef7d80b994ff 100644
--- a/cpp/src/arrow/compute/kernels/vector_select_k.cc
+++ b/cpp/src/arrow/compute/kernels/vector_select_k.cc
@@ -406,10 +406,7 @@ class TableSelector : public TypeVisitor {
 
     // Find the target chunk and index in the target chunk from an
     // index in chunked array.
-    template <typename ArrayType>
-    ResolvedChunk<ArrayType> GetChunk(int64_t index) const {
-      return resolver.Resolve<ArrayType>(index);
-    }
+    ResolvedChunk GetChunk(int64_t index) const { return resolver.Resolve(index); }
 
     const SortOrder order;
     const std::shared_ptr<DataType> type;
@@ -495,7 +492,6 @@ class TableSelector : public TypeVisitor {
 
   template <typename InType, SortOrder sort_order>
   Status SelectKthInternal() {
-    using ArrayType = typename TypeTraits<InType>::ArrayType;
     auto& comparator = comparator_;
     const auto& first_sort_key = sort_keys_[0];
 
@@ -509,10 +505,10 @@ class TableSelector : public TypeVisitor {
     std::function<bool(const uint64_t&, const uint64_t&)> cmp;
     SelectKComparator<sort_order> select_k_comparator;
     cmp = [&](const uint64_t& left, const uint64_t& right) -> bool {
-      auto chunk_left = first_sort_key.template GetChunk<ArrayType>(left);
-      auto chunk_right = first_sort_key.template GetChunk<ArrayType>(right);
-      auto value_left = chunk_left.Value();
-      auto value_right = chunk_right.Value();
+      auto chunk_left = first_sort_key.GetChunk(left);
+      auto chunk_right = first_sort_key.GetChunk(right);
+      auto value_left = chunk_left.Value<InType>();
+      auto value_right = chunk_right.Value<InType>();
       if (value_left == value_right) {
         return comparator.Compare(left, right, 1);
       }
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index d324c1658751d..db2023ef04cad 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -156,25 +156,26 @@ class ChunkedArraySorter : public TypeVisitor {
   template <typename ArrayType>
   void MergeNonNulls(uint64_t* range_begin, uint64_t* range_middle, uint64_t* range_end,
                      const std::vector<const Array*>& arrays, uint64_t* temp_indices) {
+    using ArrowType = typename ArrayType::TypeClass;
     const ChunkedArrayResolver left_resolver(arrays);
     const ChunkedArrayResolver right_resolver(arrays);
 
     if (order_ == SortOrder::Ascending) {
       std::merge(range_begin, range_middle, range_middle, range_end, temp_indices,
                  [&](uint64_t left, uint64_t right) {
-                   const auto chunk_left = left_resolver.Resolve<ArrayType>(left);
-                   const auto chunk_right = right_resolver.Resolve<ArrayType>(right);
-                   return chunk_left.Value() < chunk_right.Value();
+                   const auto chunk_left = left_resolver.Resolve(left);
+                   const auto chunk_right = right_resolver.Resolve(right);
+                   return chunk_left.Value<ArrowType>() < chunk_right.Value<ArrowType>();
                  });
     } else {
       std::merge(range_begin, range_middle, range_middle, range_end, temp_indices,
                  [&](uint64_t left, uint64_t right) {
-                   const auto chunk_left = left_resolver.Resolve<ArrayType>(left);
-                   const auto chunk_right = right_resolver.Resolve<ArrayType>(right);
+                   const auto chunk_left = left_resolver.Resolve(left);
+                   const auto chunk_right = right_resolver.Resolve(right);
                    // We don't use 'left > right' here to reduce required
                    // operator. If we use 'right < left' here, '<' is only
                    // required.
-                   return chunk_right.Value() < chunk_left.Value();
+                   return chunk_right.Value<ArrowType>() < chunk_left.Value<ArrowType>();
                  });
     }
     // Copy back temp area into main buffer
@@ -743,8 +744,6 @@ class TableSorter {
                                                             uint64_t* nulls_end,
                                                             uint64_t* temp_indices,
                                                             int64_t null_count) {
-    using ArrayType = typename TypeTraits<Type>::ArrayType;
-
     auto& comparator = comparator_;
     const auto& first_sort_key = sort_keys_[0];
 
@@ -754,11 +753,11 @@ class TableSorter {
                [&](uint64_t left, uint64_t right) {
                  // First column is either null or nan
                  left_loc =
-                     left_resolver_.ResolveWithChunkIndexHint(left, left_loc.chunk_index);
-                 right_loc = right_resolver_.ResolveWithChunkIndexHint(
-                     right, right_loc.chunk_index);
-                 auto chunk_left = first_sort_key.GetChunk<ArrayType>(left_loc);
-                 auto chunk_right = first_sort_key.GetChunk<ArrayType>(right_loc);
+                     left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc);
+                 right_loc =
+                     right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc);
+                 auto chunk_left = first_sort_key.GetChunk(left_loc);
+                 auto chunk_right = first_sort_key.GetChunk(right_loc);
                  const auto left_is_null = chunk_left.IsNull();
                  const auto right_is_null = chunk_right.IsNull();
                  if (left_is_null == right_is_null) {
@@ -793,9 +792,9 @@ class TableSorter {
                [&](uint64_t left, uint64_t right) {
                  // First column is always null
                  left_loc =
-                     left_resolver_.ResolveWithChunkIndexHint(left, left_loc.chunk_index);
-                 right_loc = right_resolver_.ResolveWithChunkIndexHint(
-                     right, right_loc.chunk_index);
+                     left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc);
+                 right_loc =
+                     right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc);
                  return comparator.Compare(left_loc, right_loc, 1);
                });
     // Copy back temp area into main buffer
@@ -810,8 +809,6 @@ class TableSorter {
                                                         uint64_t* range_middle,
                                                         uint64_t* range_end,
                                                         uint64_t* temp_indices) {
-    using ArrayType = typename TypeTraits<Type>::ArrayType;
-
     auto& comparator = comparator_;
     const auto& first_sort_key = sort_keys_[0];
 
@@ -821,15 +818,15 @@ class TableSorter {
                [&](uint64_t left, uint64_t right) {
                  // Both values are never null nor NaN.
                  left_loc =
-                     left_resolver_.ResolveWithChunkIndexHint(left, left_loc.chunk_index);
-                 right_loc = right_resolver_.ResolveWithChunkIndexHint(
-                     right, right_loc.chunk_index);
-                 auto chunk_left = first_sort_key.GetChunk<ArrayType>(left_loc);
-                 auto chunk_right = first_sort_key.GetChunk<ArrayType>(right_loc);
+                     left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc);
+                 right_loc =
+                     right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc);
+                 auto chunk_left = first_sort_key.GetChunk(left_loc);
+                 auto chunk_right = first_sort_key.GetChunk(right_loc);
                  DCHECK(!chunk_left.IsNull());
                  DCHECK(!chunk_right.IsNull());
-                 auto value_left = chunk_left.Value();
-                 auto value_right = chunk_right.Value();
+                 auto value_left = chunk_left.Value<Type>();
+                 auto value_right = chunk_right.Value<Type>();
                  if (value_left == value_right) {
                    // If the left value equals to the right value,
                    // we need to compare the second and following
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_internal.h b/cpp/src/arrow/compute/kernels/vector_sort_internal.h
index d7e5575c80765..564afb8c087d2 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_internal.h
+++ b/cpp/src/arrow/compute/kernels/vector_sort_internal.h
@@ -278,13 +278,13 @@ NullPartitionResult PartitionNullsOnly(uint64_t* indices_begin, uint64_t* indice
   Partitioner partitioner;
   if (null_placement == NullPlacement::AtStart) {
     auto nulls_end = partitioner(indices_begin, indices_end, [&](uint64_t ind) {
-      const auto chunk = resolver.Resolve<Array>(ind);
+      const auto chunk = resolver.Resolve(ind);
       return chunk.IsNull();
     });
     return NullPartitionResult::NullsAtStart(indices_begin, indices_end, nulls_end);
   } else {
     auto nulls_begin = partitioner(indices_begin, indices_end, [&](uint64_t ind) {
-      const auto chunk = resolver.Resolve<Array>(ind);
+      const auto chunk = resolver.Resolve(ind);
       return !chunk.IsNull();
     });
     return NullPartitionResult::NullsAtEnd(indices_begin, indices_end, nulls_begin);
@@ -299,22 +299,22 @@ PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end,
   return NullPartitionResult::NoNulls(indices_begin, indices_end, null_placement);
 }
 
-template <typename ArrayType, typename Partitioner>
-enable_if_t<has_null_like_values<typename ArrayType::TypeClass>::value,
-            NullPartitionResult>
+template <typename ArrayType, typename Partitioner,
+          typename TypeClass = typename ArrayType::TypeClass>
+enable_if_t<has_null_like_values<TypeClass>::value, NullPartitionResult>
 PartitionNullLikes(uint64_t* indices_begin, uint64_t* indices_end,
                    const ChunkedArrayResolver& resolver, NullPlacement null_placement) {
   Partitioner partitioner;
   if (null_placement == NullPlacement::AtStart) {
     auto null_likes_end = partitioner(indices_begin, indices_end, [&](uint64_t ind) {
-      const auto chunk = resolver.Resolve<ArrayType>(ind);
-      return std::isnan(chunk.Value());
+      const auto chunk = resolver.Resolve(ind);
+      return std::isnan(chunk.Value<TypeClass>());
     });
     return NullPartitionResult::NullsAtStart(indices_begin, indices_end, null_likes_end);
   } else {
     auto null_likes_begin = partitioner(indices_begin, indices_end, [&](uint64_t ind) {
-      const auto chunk = resolver.Resolve<ArrayType>(ind);
-      return !std::isnan(chunk.Value());
+      const auto chunk = resolver.Resolve(ind);
+      return !std::isnan(chunk.Value<TypeClass>());
     });
     return NullPartitionResult::NullsAtEnd(indices_begin, indices_end, null_likes_begin);
   }
@@ -595,7 +595,6 @@ struct ColumnComparator {
 
 template <typename ResolvedSortKey, typename Type>
 struct ConcreteColumnComparator : public ColumnComparator<ResolvedSortKey> {
-  using ArrayType = typename TypeTraits<Type>::ArrayType;
   using Location = typename ResolvedSortKey::LocationType;
 
   using ColumnComparator<ResolvedSortKey>::ColumnComparator;
@@ -603,8 +602,8 @@ struct ConcreteColumnComparator : public ColumnComparator<ResolvedSortKey> {
   int Compare(const Location& left, const Location& right) const override {
     const auto& sort_key = this->sort_key_;
 
-    const auto chunk_left = sort_key.template GetChunk<ArrayType>(left);
-    const auto chunk_right = sort_key.template GetChunk<ArrayType>(right);
+    const auto chunk_left = sort_key.GetChunk(left);
+    const auto chunk_right = sort_key.GetChunk(right);
     if (sort_key.null_count > 0) {
       const bool is_null_left = chunk_left.IsNull();
       const bool is_null_right = chunk_right.IsNull();
@@ -616,8 +615,9 @@ struct ConcreteColumnComparator : public ColumnComparator<ResolvedSortKey> {
         return this->null_placement_ == NullPlacement::AtStart ? 1 : -1;
       }
     }
-    return CompareTypeValues<Type>(chunk_left.Value(), chunk_right.Value(),
-                                   sort_key.order, this->null_placement_);
+    return CompareTypeValues<Type>(chunk_left.template Value<Type>(),
+                                   chunk_right.template Value<Type>(), sort_key.order,
+                                   this->null_placement_);
   }
 };
 
@@ -731,10 +731,7 @@ struct ResolvedRecordBatchSortKey {
 
   using LocationType = int64_t;
 
-  template <typename ArrayType>
-  ResolvedChunk<ArrayType> GetChunk(int64_t index) const {
-    return {&::arrow::internal::checked_cast<const ArrayType&>(array), index};
-  }
+  ResolvedChunk GetChunk(int64_t index) const { return {&array, index}; }
 
   const std::shared_ptr<DataType> type;
   std::shared_ptr<Array> owned_array;
@@ -754,9 +751,8 @@ struct ResolvedTableSortKey {
 
   using LocationType = ::arrow::internal::ChunkLocation;
 
-  template <typename ArrayType>
-  ResolvedChunk<ArrayType> GetChunk(::arrow::internal::ChunkLocation loc) const {
-    return {checked_cast<const ArrayType*>(chunks[loc.chunk_index]), loc.index_in_chunk};
+  ResolvedChunk GetChunk(::arrow::internal::ChunkLocation loc) const {
+    return {chunks[loc.chunk_index], loc.index_in_chunk};
   }
 
   // Make a vector of ResolvedSortKeys for the sort keys and the given table.

From 2308e40e6f8050ead064b0d672e94e0662543dc0 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Thu, 29 Feb 2024 20:20:36 -0600
Subject: [PATCH 454/570] GH-40212: [R][CI] Add a C++ with gcc 14 build
 (#40244)

This is an attempt to make sure we catching things like the issue we ran into in https://github.com/apache/arrow/issues/40009 in CI so that we could confirm that we don't run into this in the future. CRAN does runs using pre-release compilers, and we've hit this a time or two. We can wait for them to come and tell us we need to move in order to stay up, but it would be nice if we could detect this ourselves. And more importantly: it gives us a hopefully easier we to replicate the error and confirm we've fixed it so that we can have confidence when we submit.

``` [1m/tmp/RtmpLtR2pg/R.INSTALL1d415a4f31ad3b/arrow/tools/cpp/src/arrow/filesystem/util_internal.cc:143:7:  [0m [0;1;31merror:  [0m [1mno matching function for call to 'find' [0m
  143 |   if (std::find(supported_schemes.begin(), supported_schemes.end(), scheme) == [0m
      |  [0;1;32m      ^~~~~~~~~
 [0m [1m/usr/bin/../lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/bits/streambuf_iterator.h:435:5:  [0m [0;1;30mnote:  [0mcandidate template ignored: could not match 'istreambuf_iterator' against '__normal_iterator' [0m
  435 |     find(istreambuf_iterator<_CharT> __first, [0m
      |  [0;1;32m    ^
 [0m1 error generated.
```

https://github.com/apache/arrow/pull/40244#issuecomment-1968156808 is a run before our fix showing the same failure.

I've also downloaded + saved the log from CRAN since it will be overwritten soon now that we have a new release up: [Install log for 'arrow' with clang dev.txt](https://github.com/apache/arrow/files/14407630/Install.log.for.arrow.with.clang.dev.txt)

* GitHub Issue: #40212

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/docker/ubuntu-24.04-cpp.dockerfile | 204 ++++++++++++++++++++++++++
 ci/scripts/install_gcs_testbench.sh   |   3 +
 dev/tasks/tasks.yml                   |  16 +-
 docker-compose.yml                    |   2 +-
 4 files changed, 222 insertions(+), 3 deletions(-)
 create mode 100644 ci/docker/ubuntu-24.04-cpp.dockerfile

diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile
new file mode 100644
index 0000000000000..d56895a792f7c
--- /dev/null
+++ b/ci/docker/ubuntu-24.04-cpp.dockerfile
@@ -0,0 +1,204 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base=amd64/ubuntu:24.04
+FROM ${base}
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+        debconf-set-selections
+
+# Installs LLVM toolchain, for Gandiva and testing other compilers
+#
+# Note that this is installed before the base packages to improve iteration
+# while debugging package list with docker build.
+ARG clang_tools
+ARG llvm
+RUN latest_system_llvm=14 && \
+    if [ ${llvm} -gt ${latest_system_llvm} -o \
+         ${clang_tools} -gt ${latest_system_llvm} ]; then \
+      apt-get update -y -q && \
+      apt-get install -y -q --no-install-recommends \
+          apt-transport-https \
+          ca-certificates \
+          gnupg \
+          lsb-release \
+          wget && \
+      wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+      code_name=$(lsb_release --codename --short) && \
+      if [ ${llvm} -gt 10 ]; then \
+        echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" > \
+           /etc/apt/sources.list.d/llvm.list; \
+      fi && \
+      if [ ${clang_tools} -ne ${llvm} -a \
+           ${clang_tools} -gt ${latest_system_llvm} ]; then \
+        echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${clang_tools} main" > \
+           /etc/apt/sources.list.d/clang-tools.list; \
+      fi; \
+    fi && \
+    apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        clang-${clang_tools} \
+        clang-${llvm} \
+        clang-format-${clang_tools} \
+        clang-tidy-${clang_tools} \
+        llvm-${llvm}-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+# Installs C++ toolchain and dependencies
+RUN apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        autoconf \
+        ca-certificates \
+        ccache \
+        cmake \
+        curl \
+        gdb \
+        git \
+        libbenchmark-dev \
+        libboost-filesystem-dev \
+        libboost-system-dev \
+        libbrotli-dev \
+        libbz2-dev \
+        libc-ares-dev \
+        libcurl4-openssl-dev \
+        libgflags-dev \
+        libgmock-dev \
+        libgoogle-glog-dev \
+        libgrpc++-dev \
+        libidn2-dev \
+        libkrb5-dev \
+        libldap-dev \
+        liblz4-dev \
+        libnghttp2-dev \
+        libprotobuf-dev \
+        libprotoc-dev \
+        libpsl-dev \
+        libre2-dev \
+        librtmp-dev \
+        libsnappy-dev \
+        libsqlite3-dev \
+        libssh-dev \
+        libssh2-1-dev \
+        libssl-dev \
+        libthrift-dev \
+        libutf8proc-dev \
+        libxml2-dev \
+        libzstd-dev \
+        make \
+        ninja-build \
+        nlohmann-json3-dev \
+        npm \
+        pkg-config \
+        protobuf-compiler \
+        protobuf-compiler-grpc \
+        python3-dev \
+        python3-pip \
+        python3-venv \
+        rapidjson-dev \
+        rsync \
+        tzdata \
+        tzdata-legacy \
+        wget && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+ARG gcc_version=""
+RUN if [ "${gcc_version}" = "" ]; then \
+      apt-get update -y -q && \
+      apt-get install -y -q --no-install-recommends \
+          g++ \
+          gcc; \
+    else \
+      if [ "${gcc_version}" -gt "12" ]; then \
+          apt-get update -y -q && \
+          apt-get install -y -q --no-install-recommends software-properties-common && \
+          add-apt-repository ppa:ubuntu-toolchain-r/volatile; \
+      fi; \
+      apt-get update -y -q && \
+      apt-get install -y -q --no-install-recommends \
+          g++-${gcc_version} \
+          gcc-${gcc_version} && \
+      update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \
+      update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \
+      update-alternatives --install \
+        /usr/bin/$(uname --machine)-linux-gnu-gcc \
+        $(uname --machine)-linux-gnu-gcc \
+        /usr/bin/$(uname --machine)-linux-gnu-gcc-${gcc_version} 100 && \
+      update-alternatives --install \
+        /usr/bin/$(uname --machine)-linux-gnu-g++ \
+        $(uname --machine)-linux-gnu-g++ \
+        /usr/bin/$(uname --machine)-linux-gnu-g++-${gcc_version} 100 && \
+      update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \
+      update-alternatives --set cc /usr/bin/gcc && \
+      update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 && \
+      update-alternatives --set c++ /usr/bin/g++; \
+    fi
+
+COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
+
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_gcs_testbench.sh default
+
+COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_azurite.sh
+
+COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
+
+# Prioritize system packages and local installation
+ENV ARROW_ACERO=ON \
+    ARROW_AZURE=ON \
+    ARROW_BUILD_STATIC=ON \
+    ARROW_BUILD_TESTS=ON \
+    ARROW_DEPENDENCY_SOURCE=SYSTEM \
+    ARROW_DATASET=ON \
+    ARROW_FLIGHT=ON \
+    ARROW_FLIGHT_SQL=ON \
+    ARROW_GANDIVA=ON \
+    ARROW_GCS=ON \
+    ARROW_HDFS=ON \
+    ARROW_HOME=/usr/local \
+    ARROW_INSTALL_NAME_RPATH=OFF \
+    ARROW_NO_DEPRECATED_API=ON \
+    ARROW_ORC=ON \
+    ARROW_PARQUET=ON \
+    ARROW_S3=ON \
+    ARROW_SUBSTRAIT=ON \
+    ARROW_USE_ASAN=OFF \
+    ARROW_USE_CCACHE=ON \
+    ARROW_USE_UBSAN=OFF \
+    ARROW_WITH_BROTLI=ON \
+    ARROW_WITH_BZ2=ON \
+    ARROW_WITH_LZ4=ON \
+    ARROW_WITH_OPENTELEMETRY=ON \
+    ARROW_WITH_SNAPPY=ON \
+    ARROW_WITH_ZLIB=ON \
+    ARROW_WITH_ZSTD=ON \
+    ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-${llvm}/bin/llvm-symbolizer \
+    AWSSDK_SOURCE=BUNDLED \
+    Azure_SOURCE=BUNDLED \
+    google_cloud_cpp_storage_SOURCE=BUNDLED \
+    ORC_SOURCE=BUNDLED \
+    PARQUET_BUILD_EXAMPLES=ON \
+    PARQUET_BUILD_EXECUTABLES=ON \
+    PATH=/usr/lib/ccache/:$PATH \
+    PYTHON=python3 \
+    xsimd_SOURCE=BUNDLED
diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh
index 0aa6d20975b49..2090290c99322 100755
--- a/ci/scripts/install_gcs_testbench.sh
+++ b/ci/scripts/install_gcs_testbench.sh
@@ -34,6 +34,9 @@ case "$(uname -m)" in
     ;;
 esac
 
+# On newer pythons install into the system will fail, so override that
+export PIP_BREAK_SYSTEM_PACKAGES=1
+
 version=$1
 if [[ "${version}" -eq "default" ]]; then
   version="v0.39.0"
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index cfc333c6b22f5..56c93c095c870 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -231,7 +231,7 @@ tasks:
   #
   # * On conda-forge the `pyarrow` and `arrow-cpp` packages are built in
   #   the same feedstock as the dependency matrix is the same for them as
-  #   Python and the OS are the main dimension.   
+  #   Python and the OS are the main dimension.
   # * The files in `dev/tasks/conda-recipes/.ci_support/` are automatically
   #   generated and to be synced regularly from the feedstock. We have no way
   #   yet to generate them inside the arrow repository automatically.
@@ -1055,7 +1055,7 @@ tasks:
     params:
       image: conda-cpp-valgrind
 
-{% for ubuntu_version in ["20.04", "22.04"] %}
+{% for ubuntu_version in ["20.04", "22.04", "24.04"] %}
   test-ubuntu-{{ ubuntu_version }}-cpp:
     ci: github
     template: docker-tests/github.linux.yml
@@ -1073,6 +1073,18 @@ tasks:
         UBUNTU: 20.04
       image: ubuntu-cpp-bundled
 
+  test-ubuntu-24.04-cpp-gcc-14:
+    ci: github
+    template: docker-tests/github.linux.yml
+    params:
+      env:
+        UBUNTU: "24.04"
+        GCC_VERSION: 14
+      # rapidjson 1.1.0 has an error caught by gcc 14.
+      # https://github.com/Tencent/rapidjson/issues/718
+      flags: -e CC=gcc-14 -e CXX=g++-14 -e RapidJSON_SOURCE=BUNDLED
+      image: ubuntu-cpp
+
   test-skyhook-integration:
     ci: github
     template: docker-tests/github.linux.yml
diff --git a/docker-compose.yml b/docker-compose.yml
index aec685775aab1..26a42fa13947b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -373,7 +373,7 @@ services:
     #   docker-compose run --rm ubuntu-cpp
     # Parameters:
     #   ARCH: amd64, arm64v8, s390x, ...
-    #   UBUNTU: 20.04, 22.04
+    #   UBUNTU: 20.04, 22.04, 24.04
     image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
     build:
       context: .

From 7c4f4c2bb140fb51a6c26908f2420a972c7f48e0 Mon Sep 17 00:00:00 2001
From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com>
Date: Fri, 1 Mar 2024 02:30:01 -0500
Subject: [PATCH 455/570] GH-39855: [Python] ListView support for pa.array()
 (#40160)

### Rationale for this change

Add pa.array() instantiation support for ListView and LargeListView formats.

### What changes are included in this PR?

* pa.array() supports creating ListView and LargeListView types
* ListArray, LargeListArray now have their size initialized before adding elements during python-to-arrow conversion. This allows these types to be convertible to ListViewArray and LargeListViewArray types.

### Are these changes tested?

Yes, unit tested.

### Are there any user-facing changes?

Yes, new feature added.
* Closes: #39855
* GitHub Issue: #39855

Authored-by: Dane Pitkin <dane@voltrondata.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 .../src/arrow/python/python_to_arrow.cc       |  23 ++-
 python/pyarrow/tests/strategies.py            |   4 +-
 python/pyarrow/tests/test_array.py            | 147 ++++++++++++++++--
 python/pyarrow/tests/test_convert_builtin.py  |  44 ++++--
 python/pyarrow/tests/test_scalars.py          |  43 +++--
 5 files changed, 218 insertions(+), 43 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index 3c4d59d6594a2..a0bae2f50194d 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -581,7 +581,8 @@ struct PyConverterTrait<
 };
 
 template <typename T>
-struct PyConverterTrait<T, enable_if_list_like<T>> {
+struct PyConverterTrait<
+    T, enable_if_t<is_list_like_type<T>::value || is_list_view_type<T>::value>> {
   using type = PyListConverter<T>;
 };
 
@@ -803,7 +804,6 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
       return this->list_builder_->AppendNull();
     }
 
-    RETURN_NOT_OK(this->list_builder_->Append());
     if (PyArray_Check(value)) {
       RETURN_NOT_OK(AppendNdarray(value));
     } else if (PySequence_Check(value)) {
@@ -824,6 +824,21 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
   }
 
  protected:
+  // MapType does not support args in the Append() method
+  Status AppendTo(const MapType*, int64_t size) { return this->list_builder_->Append(); }
+
+  // FixedSizeListType does not support args in the Append() method
+  Status AppendTo(const FixedSizeListType*, int64_t size) {
+    return this->list_builder_->Append();
+  }
+
+  // ListType requires the size argument in the Append() method
+  // in order to be convertible to a ListViewType. ListViewType
+  // requires the size argument in the Append() method always.
+  Status AppendTo(const BaseListType*, int64_t size) {
+    return this->list_builder_->Append(true, size);
+  }
+
   Status ValidateBuilder(const MapType*) {
     if (this->list_builder_->key_builder()->null_count() > 0) {
       return Status::Invalid("Invalid Map: key field cannot contain null values");
@@ -836,11 +851,14 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
 
   Status AppendSequence(PyObject* value) {
     int64_t size = static_cast<int64_t>(PySequence_Size(value));
+    RETURN_NOT_OK(AppendTo(this->list_type_, size));
     RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size));
     return this->value_converter_->Extend(value, size);
   }
 
   Status AppendIterable(PyObject* value) {
+    auto size = static_cast<int64_t>(PyObject_Size(value));
+    RETURN_NOT_OK(AppendTo(this->list_type_, size));
     PyObject* iterator = PyObject_GetIter(value);
     OwnedRef iter_ref(iterator);
     while (PyObject* item = PyIter_Next(iterator)) {
@@ -857,6 +875,7 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
       return Status::Invalid("Can only convert 1-dimensional array values");
     }
     const int64_t size = PyArray_SIZE(ndarray);
+    RETURN_NOT_OK(AppendTo(this->list_type_, size));
     RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size));
 
     const auto value_type = this->value_converter_->builder()->type();
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index bb88a4dcb7b2a..7affe815a22ba 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -167,7 +167,9 @@ def list_types(item_strategy=primitive_types):
             pa.list_,
             item_strategy,
             st.integers(min_value=0, max_value=16)
-        )
+        ),
+        st.builds(pa.list_view, item_strategy),
+        st.builds(pa.large_list_view, item_strategy)
     )
 
 
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index bd9ae214b041e..782c41d0d7015 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -627,7 +627,8 @@ def test_string_binary_from_buffers():
     assert copied.null_count == 0
 
 
-@pytest.mark.parametrize('list_type_factory', [pa.list_, pa.large_list])
+@pytest.mark.parametrize('list_type_factory', [
+    pa.list_, pa.large_list, pa.list_view, pa.large_list_view])
 def test_list_from_buffers(list_type_factory):
     ty = list_type_factory(pa.int16())
     array = pa.array([[0, 1, 2], None, [], [3, 4, 5]], type=ty)
@@ -637,15 +638,15 @@ def test_list_from_buffers(list_type_factory):
 
     with pytest.raises(ValueError):
         # No children
-        pa.Array.from_buffers(ty, 4, [None, buffers[1]])
+        pa.Array.from_buffers(ty, 4, buffers[:ty.num_buffers])
 
-    child = pa.Array.from_buffers(pa.int16(), 6, buffers[2:])
-    copied = pa.Array.from_buffers(ty, 4, buffers[:2], children=[child])
+    child = pa.Array.from_buffers(pa.int16(), 6, buffers[ty.num_buffers:])
+    copied = pa.Array.from_buffers(ty, 4, buffers[:ty.num_buffers], children=[child])
     assert copied.equals(array)
 
     with pytest.raises(ValueError):
         # too many children
-        pa.Array.from_buffers(ty, 4, [None, buffers[1]],
+        pa.Array.from_buffers(ty, 4, buffers[:ty.num_buffers],
                               children=[child, child])
 
 
@@ -2022,6 +2023,9 @@ def test_cast_identities(ty, values):
         ([[1, 2], [3]], pa.list_(pa.int64())),
         ([[4, 5], [6]], pa.large_list(pa.int16())),
         ([['a'], None, ['b', 'c']], pa.list_(pa.string())),
+        ([[1, 2], [3]], pa.list_view(pa.int64())),
+        ([[4, 5], [6]], pa.large_list_view(pa.int16())),
+        ([['a'], None, ['b', 'c']], pa.list_view(pa.string())),
         ([(1, 'a'), (2, 'c'), None],
             pa.struct([pa.field('a', pa.int64()), pa.field('b', pa.string())]))
     ]
@@ -3575,9 +3579,10 @@ def test_run_end_encoded_from_buffers():
                                            1, offset, children)
 
 
-@pytest.mark.parametrize(('list_array_type'),
-                         [pa.ListViewArray, pa.LargeListViewArray])
-def test_list_view_from_arrays(list_array_type):
+@pytest.mark.parametrize(('list_array_type', 'list_type_factory'),
+                         [(pa.ListViewArray, pa.list_view),
+                          (pa.LargeListViewArray, pa.large_list_view)])
+def test_list_view_from_arrays(list_array_type, list_type_factory):
     # test in order offsets, similar to ListArray representation
     values = [1, 2, 3, 4, 5, 6, None, 7]
     offsets = [0, 2, 4, 6]
@@ -3589,6 +3594,17 @@ def test_list_view_from_arrays(list_array_type):
     assert array.offsets.to_pylist() == offsets
     assert array.sizes.to_pylist() == sizes
 
+    # with specified type
+    typ = list_type_factory(pa.field("name", pa.int64()))
+    result = list_array_type.from_arrays(offsets, sizes, values, typ)
+    assert result.type == typ
+    assert result.type.value_field.name == "name"
+
+    # with mismatching type
+    typ = list_type_factory(pa.binary())
+    with pytest.raises(TypeError):
+        list_array_type.from_arrays(offsets, sizes, values, type=typ)
+
     # test out of order offsets with overlapping values
     values = [1, 2, 3, 4]
     offsets = [2, 1, 0]
@@ -3635,12 +3651,121 @@ def test_list_view_from_arrays(list_array_type):
     assert array.sizes.to_pylist() == sizes
 
 
-@pytest.mark.parametrize(('list_array_type'),
-                         [pa.ListViewArray, pa.LargeListViewArray])
-def test_list_view_flatten(list_array_type):
+@pytest.mark.parametrize(('list_array_type', 'list_type_factory'),
+                         [(pa.ListViewArray, pa.list_view),
+                          (pa.LargeListViewArray, pa.large_list_view)])
+def test_list_view_from_arrays_fails(list_array_type, list_type_factory):
+    values = [1, 2]
+    offsets = [0, 1, None]
+    sizes = [1, 1, 0]
+    mask = pa.array([False, False, True])
+
+    # Ambiguous to specify both validity map and offsets or sizes with nulls
+    with pytest.raises(pa.lib.ArrowInvalid):
+        list_array_type.from_arrays(offsets, sizes, values, mask=mask)
+
+    offsets = [0, 1, 1]
+    array = list_array_type.from_arrays(offsets, sizes, values, mask=mask)
+    array_slice = array[1:]
+
+    # List offsets and sizes must not be slices if a validity map is specified
+    with pytest.raises(pa.lib.ArrowInvalid):
+        list_array_type.from_arrays(
+            array_slice.offsets, array_slice.sizes,
+            array_slice.values, mask=array_slice.is_null())
+
+
+@pytest.mark.parametrize(('list_array_type', 'list_type_factory', 'offset_type'),
+                         [(pa.ListViewArray, pa.list_view, pa.int32()),
+                          (pa.LargeListViewArray, pa.large_list_view, pa.int64())])
+def test_list_view_flatten(list_array_type, list_type_factory, offset_type):
+    arr0 = pa.array([
+        1, None, 2,
+        3, 4,
+        5, 6,
+        7, 8
+    ], type=pa.int64())
+
+    typ1 = list_type_factory(pa.int64())
+    arr1 = pa.array([
+        [1, None, 2],
+        None,
+        [3, 4],
+        [],
+        [5, 6],
+        None,
+        [7, 8]
+    ], type=typ1)
+    offsets1 = pa.array([0, 3, 3, 5, 5, 7, 7], type=offset_type)
+    sizes1 = pa.array([3, 0, 2, 0, 2, 0, 2], type=offset_type)
+
+    typ2 = list_type_factory(
+        list_type_factory(
+            pa.int64()
+        )
+    )
+    arr2 = pa.array([
+        None,
+        [
+            [1, None, 2],
+            None,
+            [3, 4]
+        ],
+        [],
+        [
+            [],
+            [5, 6],
+            None
+        ],
+        [
+            [7, 8]
+        ]
+    ], type=typ2)
+    offsets2 = pa.array([0, 0, 3, 3, 6], type=offset_type)
+    sizes2 = pa.array([0, 3, 0, 3, 1], type=offset_type)
+
+    assert arr1.flatten().equals(arr0)
+    assert arr1.offsets.equals(offsets1)
+    assert arr1.sizes.equals(sizes1)
+    assert arr1.values.equals(arr0)
+    assert arr2.flatten().equals(arr1)
+    assert arr2.offsets.equals(offsets2)
+    assert arr2.sizes.equals(sizes2)
+    assert arr2.values.equals(arr1)
+    assert arr2.flatten().flatten().equals(arr0)
+    assert arr2.values.values.equals(arr0)
+
+    # test out of order offsets
     values = [1, 2, 3, 4]
     offsets = [3, 2, 1, 0]
     sizes = [1, 1, 1, 1]
     array = list_array_type.from_arrays(offsets, sizes, values)
 
     assert array.flatten().to_pylist() == [4, 3, 2, 1]
+
+    # test null elements backed by non-empty sublists
+    mask = pa.array([False, False, False, True])
+    array = list_array_type.from_arrays(offsets, sizes, values, mask=mask)
+
+    assert array.flatten().to_pylist() == [4, 3, 2]
+    assert array.values.to_pylist() == [1, 2, 3, 4]
+
+
+@pytest.mark.parametrize('list_view_type', [pa.ListViewArray, pa.LargeListViewArray])
+def test_list_view_slice(list_view_type):
+    # sliced -> values keeps referring to full values buffer, but offsets is
+    # sliced as well so the offsets correctly point into the full values array
+    # sliced -> flatten() will return the sliced value array.
+
+    array = list_view_type.from_arrays(offsets=[0, 3, 4], sizes=[
+                                       3, 1, 2], values=[1, 2, 3, 4, 5, 6])
+    sliced_array = array[1:]
+
+    assert sliced_array.values.to_pylist() == [1, 2, 3, 4, 5, 6]
+    assert sliced_array.offsets.to_pylist() == [3, 4]
+    assert sliced_array.flatten().to_pylist() == [4, 5, 6]
+
+    i = sliced_array.offsets[0].as_py()
+    j = sliced_array.offsets[1].as_py()
+
+    assert sliced_array[0].as_py() == sliced_array.values[i:j].to_pylist() == [4]
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 55ea28f50fbb3..b824b89564374 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -252,21 +252,17 @@ def test_nested_lists(seq):
     assert arr.null_count == 1
     assert arr.type == pa.list_(pa.int64())
     assert arr.to_pylist() == data
-    # With explicit type
-    arr = pa.array(seq(data), type=pa.list_(pa.int32()))
-    assert len(arr) == 3
-    assert arr.null_count == 1
-    assert arr.type == pa.list_(pa.int32())
-    assert arr.to_pylist() == data
 
 
 @parametrize_with_sequence_types
-def test_nested_large_lists(seq):
+@pytest.mark.parametrize("factory", [
+    pa.list_, pa.large_list, pa.list_view, pa.large_list_view])
+def test_nested_lists_with_explicit_type(seq, factory):
     data = [[], [1, 2], None]
-    arr = pa.array(seq(data), type=pa.large_list(pa.int16()))
+    arr = pa.array(seq(data), type=factory(pa.int16()))
     assert len(arr) == 3
     assert arr.null_count == 1
-    assert arr.type == pa.large_list(pa.int16())
+    assert arr.type == factory(pa.int16())
     assert arr.to_pylist() == data
 
 
@@ -277,15 +273,22 @@ def test_list_with_non_list(seq):
         pa.array(seq([[], [1, 2], 3]), type=pa.list_(pa.int64()))
     with pytest.raises(TypeError):
         pa.array(seq([[], [1, 2], 3]), type=pa.large_list(pa.int64()))
+    with pytest.raises(TypeError):
+        pa.array(seq([[], [1, 2], 3]), type=pa.list_view(pa.int64()))
+    with pytest.raises(TypeError):
+        pa.array(seq([[], [1, 2], 3]), type=pa.large_list_view(pa.int64()))
 
 
 @parametrize_with_sequence_types
-def test_nested_arrays(seq):
+@pytest.mark.parametrize("factory", [
+    pa.list_, pa.large_list, pa.list_view, pa.large_list_view])
+def test_nested_arrays(seq, factory):
     arr = pa.array(seq([np.array([], dtype=np.int64),
-                        np.array([1, 2], dtype=np.int64), None]))
+                        np.array([1, 2], dtype=np.int64), None]),
+                   type=factory(pa.int64()))
     assert len(arr) == 3
     assert arr.null_count == 1
-    assert arr.type == pa.list_(pa.int64())
+    assert arr.type == factory(pa.int64())
     assert arr.to_pylist() == [[], [1, 2], None]
 
 
@@ -1464,9 +1467,18 @@ def test_sequence_duration_nested_lists():
     assert arr.type == pa.list_(pa.duration('us'))
     assert arr.to_pylist() == data
 
-    arr = pa.array(data, type=pa.list_(pa.duration('ms')))
+
+@pytest.mark.parametrize("factory", [
+    pa.list_, pa.large_list, pa.list_view, pa.large_list_view])
+def test_sequence_duration_nested_lists_with_explicit_type(factory):
+    td1 = datetime.timedelta(1, 1, 1000)
+    td2 = datetime.timedelta(1, 100)
+
+    data = [[td1, None], [td1, td2]]
+
+    arr = pa.array(data, type=factory(pa.duration('ms')))
     assert len(arr) == 2
-    assert arr.type == pa.list_(pa.duration('ms'))
+    assert arr.type == factory(pa.duration('ms'))
     assert arr.to_pylist() == data
 
 
@@ -2430,6 +2442,10 @@ def test_array_from_pylist_offset_overflow():
     ),
     ([[1, 2, 3]], [pa.scalar([1, 2, 3])], pa.list_(pa.int64())),
     ([["a", "b"]], [pa.scalar(["a", "b"])], pa.list_(pa.string())),
+    ([[1, 2, 3]], [pa.scalar([1, 2, 3], type=pa.list_view(pa.int64()))],
+     pa.list_view(pa.int64())),
+    ([["a", "b"]], [pa.scalar(["a", "b"], type=pa.list_view(pa.string()))],
+     pa.list_view(pa.string())),
     (
         [1, 2, None],
         [pa.scalar(1, type=pa.int8()), pa.scalar(2, type=pa.int8()), None],
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 074fb757e265a..6a814111898b7 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -57,9 +57,8 @@
     ([1, 2, 3], None, pa.ListScalar),
     ([1, 2, 3, 4], pa.large_list(pa.int8()), pa.LargeListScalar),
     ([1, 2, 3, 4, 5], pa.list_(pa.int8(), 5), pa.FixedSizeListScalar),
-    # TODO GH-39855
-    # ([1, 2, 3], pa.list_view(pa.int8()), pa.ListViewScalar),
-    # ([1, 2, 3, 4], pa.large_list_view(pa.int8()), pa.LargeListViewScalar),
+    ([1, 2, 3], pa.list_view(pa.int8()), pa.ListViewScalar),
+    ([1, 2, 3, 4], pa.large_list_view(pa.int8()), pa.LargeListViewScalar),
     (datetime.date.today(), None, pa.Date32Scalar),
     (datetime.date.today(), pa.date64(), pa.Date64Scalar),
     (datetime.datetime.now(), None, pa.TimestampScalar),
@@ -541,9 +540,8 @@ def test_fixed_size_binary():
 @pytest.mark.parametrize(('ty', 'klass'), [
     (pa.list_(pa.string()), pa.ListScalar),
     (pa.large_list(pa.string()), pa.LargeListScalar),
-    # TODO GH-39855
-    # (pa.list_view(pa.string()), pa.ListViewScalar),
-    # (pa.large_list_view(pa.string()), pa.LargeListViewScalar)
+    (pa.list_view(pa.string()), pa.ListViewScalar),
+    (pa.large_list_view(pa.string()), pa.LargeListViewScalar)
 ])
 def test_list(ty, klass):
     v = ['foo', None]
@@ -565,14 +563,29 @@ def test_list(ty, klass):
         s[2]
 
 
-def test_list_from_numpy():
-    s = pa.scalar(np.array([1, 2, 3], dtype=np.int64()))
-    assert s.type == pa.list_(pa.int64())
+@pytest.mark.parametrize('ty', [
+    pa.list_(pa.int64()),
+    pa.large_list(pa.int64()),
+    pa.list_view(pa.int64()),
+    pa.large_list_view(pa.int64()),
+    None
+])
+def test_list_from_numpy(ty):
+    s = pa.scalar(np.array([1, 2, 3], dtype=np.int64()), type=ty)
+    if ty is None:
+        ty = pa.list_(pa.int64())  # expected inferred type
+    assert s.type == ty
     assert s.as_py() == [1, 2, 3]
 
 
 @pytest.mark.pandas
-def test_list_from_pandas():
+@pytest.mark.parametrize('factory', [
+    pa.list_,
+    pa.large_list,
+    pa.list_view,
+    pa.large_list_view
+])
+def test_list_from_pandas(factory):
     import pandas as pd
 
     s = pa.scalar(pd.Series([1, 2, 3]))
@@ -580,11 +593,11 @@ def test_list_from_pandas():
 
     cases = [
         (np.nan, 'null'),
-        (['string', np.nan], pa.list_(pa.binary())),
-        (['string', np.nan], pa.list_(pa.utf8())),
-        ([b'string', np.nan], pa.list_(pa.binary(6))),
-        ([True, np.nan], pa.list_(pa.bool_())),
-        ([decimal.Decimal('0'), np.nan], pa.list_(pa.decimal128(12, 2))),
+        (['string', np.nan], factory(pa.binary())),
+        (['string', np.nan], factory(pa.utf8())),
+        ([b'string', np.nan], factory(pa.binary(6))),
+        ([True, np.nan], factory(pa.bool_())),
+        ([decimal.Decimal('0'), np.nan], factory(pa.decimal128(12, 2))),
     ]
     for case, ty in cases:
         # Both types of exceptions are raised. May want to clean that up

From 30e6d72242e376baa598b2e8f1d9b80d800a974c Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Fri, 1 Mar 2024 07:40:09 -0600
Subject: [PATCH 456/570] GH-40268: [Archery] Bump the version of pygit2, adapt
 to API changes (#40269)

### Rationale for this change

`archery crossbow submit ...` fails with newer versions of pygit2

### What changes are included in this PR?

Adapt away from deprecated [sic] APIs in pygit2 to ones that work with current versions, bump the pin

### Are these changes tested?

Manually, yes, I can use `archery crossbow submit ...` again. CI will run using archery in a bunch of places on this PR too.

### Are there any user-facing changes?

No
* GitHub Issue: #40268

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 .github/workflows/archery.yml        | 2 +-
 .github/workflows/comment_bot.yml    | 2 +-
 .github/workflows/dev.yml            | 4 ++--
 .github/workflows/docs.yml           | 2 +-
 .github/workflows/docs_light.yml     | 2 +-
 .github/workflows/java_nightly.yml   | 2 +-
 .github/workflows/pr_bot.yml         | 2 +-
 .github/workflows/r_nightly.yml      | 6 +++---
 dev/archery/archery/crossbow/core.py | 2 +-
 dev/archery/archery/docker/cli.py    | 2 +-
 dev/archery/setup.py                 | 6 +++++-
 dev/tasks/java-jars/github.yml       | 2 +-
 dev/tasks/macros.jinja               | 4 ++--
 13 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index d5f419f8a7dd8..dbd24796db52b 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -59,7 +59,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.8'
+          python-version: '3.12'
       - name: Install pygit2 binary wheel
         run: pip install pygit2 --only-binary pygit2
       - name: Install Archery, Crossbow- and Test Dependencies
diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index dbcbbff54953c..038a468a81276 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -43,7 +43,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Install Archery and Crossbow dependencies
         run: pip install -e arrow/dev/archery[bot]
       - name: Handle GitHub comment event
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 4892767324335..77efda58cb3d2 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -43,7 +43,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
@@ -90,7 +90,7 @@ jobs:
       - name: Install Python
         uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: '3.8'
+          python-version: '3.12'
       - name: Install Ruby
         uses: ruby/setup-ruby@250fcd6a742febb1123a77a841497ccaa8b9e939 # v1.152.0
         with:
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index e394347e95261..82b43ee2363b5 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -53,7 +53,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml
index 5303531f34350..306fc5135073d 100644
--- a/.github/workflows/docs_light.yml
+++ b/.github/workflows/docs_light.yml
@@ -59,7 +59,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
diff --git a/.github/workflows/java_nightly.yml b/.github/workflows/java_nightly.yml
index c19576d2f659e..c535dc4a07de3 100644
--- a/.github/workflows/java_nightly.yml
+++ b/.github/workflows/java_nightly.yml
@@ -61,7 +61,7 @@ jobs:
         uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
           cache: 'pip'
-          python-version: 3.8
+          python-version: 3.12
       - name: Install Archery
         shell: bash
         run: pip install -e arrow/dev/archery[all]
diff --git a/.github/workflows/pr_bot.yml b/.github/workflows/pr_bot.yml
index 31ab32800705c..6af7dbe7680f5 100644
--- a/.github/workflows/pr_bot.yml
+++ b/.github/workflows/pr_bot.yml
@@ -84,7 +84,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Install Archery and Crossbow dependencies
         run: pip install -e arrow/dev/archery[bot]
       - name: Handle PR workflow event
diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml
index a57a8cddea3c0..6629b5c8a5673 100644
--- a/.github/workflows/r_nightly.yml
+++ b/.github/workflows/r_nightly.yml
@@ -60,10 +60,10 @@ jobs:
           repository: ursacomputing/crossbow
           ref: main
       - name: Set up Python
-        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
+        uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
           cache: 'pip'
-          python-version: 3.8
+          python-version: 3.12
       - name: Install Archery
         shell: bash
         run: pip install -e arrow/dev/archery[all]
@@ -86,7 +86,7 @@ jobs:
             exit 1
           fi
       - name: Cache Repo
-        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
+        uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
           path: repo
           key: r-nightly-${{ github.run_id }}
diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py
index 57e91e206748d..c85f1f754b997 100644
--- a/dev/archery/archery/crossbow/core.py
+++ b/dev/archery/archery/crossbow/core.py
@@ -199,7 +199,7 @@ def credentials(self, url, username_from_url, allowed_types):
             raise CrossbowError(msg)
 
         if (allowed_types &
-                pygit2.credentials.GIT_CREDENTIAL_USERPASS_PLAINTEXT):
+                pygit2.credentials.CredentialType.USERPASS_PLAINTEXT):
             return pygit2.UserPass('x-oauth-basic', self.token)
         else:
             return None
diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py
index 162f73ec0ffe0..20d9a16138bac 100644
--- a/dev/archery/archery/docker/cli.py
+++ b/dev/archery/archery/docker/cli.py
@@ -215,7 +215,7 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build,
     archery docker run --no-cache conda-python
 
     # pass a docker-compose parameter, like the python version
-    PYTHON=3.8 archery docker run conda-python
+    PYTHON=3.12 archery docker run conda-python
 
     # disable the cache only for the leaf image
     PANDAS=upstream_devel archery docker run --no-leaf-cache \
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index 2ecc72e04e8aa..23a1600910d04 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -21,6 +21,10 @@
 import sys
 from setuptools import setup, find_packages
 
+# pygit2>=1.14.0 requires python 3.9, so crossbow and all
+# both technically require python 3.9 — however we still need to
+# support 3.8 when using docker. When 3.8 is EOLed and we bump
+# to Python 3.9 this will resolve itself.
 if sys.version_info < (3, 8):
     sys.exit('Python < 3.8 is not supported')
 
@@ -29,7 +33,7 @@
 
 extras = {
     'benchmark': ['pandas'],
-    'crossbow': ['github3.py', jinja_req, 'pygit2>=1.6.0', 'requests',
+    'crossbow': ['github3.py', jinja_req, 'pygit2>=1.14.0', 'requests',
                  'ruamel.yaml', 'setuptools_scm<8.0.0'],
     'crossbow-upload': ['github3.py', jinja_req, 'ruamel.yaml',
                         'setuptools_scm'],
diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index 086d1fdbe811f..8fe5878254dde 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -90,7 +90,7 @@ jobs:
         uses: actions/setup-python@v4
         with:
           cache: 'pip'
-          python-version: 3.8
+          python-version: 3.12
       - name: Install Archery
         shell: bash
         run: pip install -e arrow/dev/archery[all]
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index 8ba95af46af56..a190eea459c56 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -59,7 +59,7 @@ on:
     uses: actions/setup-python@v4
     with:
       cache: 'pip'
-      python-version: 3.8
+      python-version: 3.12
   - name: Set up Python by apt
     if: runner.os == 'Linux' && runner.arch != 'X64'
     run: |
@@ -85,7 +85,7 @@ on:
     if: runner.arch == 'X64'
     uses: actions/setup-python@v4
     with:
-      python-version: 3.8
+      python-version: 3.12
   - name: Set up Python by apt
     if: runner.os == 'Linux' && runner.arch != 'X64'
     run: |

From 81c9d30be32058d51583caf22a2ff61b7d7c82cc Mon Sep 17 00:00:00 2001
From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com>
Date: Fri, 1 Mar 2024 17:15:02 -0500
Subject: [PATCH 457/570] GH-40155: [Go][FlightRPC][FlightSQL] Implement
 Session Management (#40284)

### Rationale for this change

Brings Go implementation in parity with recent session management additions to Java and C++: #34865

### What changes are included in this PR?

- Go Flight/FlightSQL implementations of session management RPC handlers
- Implementation of cookie-based session middleware
  - Implementation of stateful (id-lookup based) sessions/tokens
  - Implementation of stateless (fully encoded) sessions/tokens
- Fix minor C++ logic bug when closing sessions
- Update Java integration test server to return an empty session if `getSessionOptions` is called before `setSessionOptions`
- Refactor of `DoAction` handlers to consolidate the code that is essentially copied between them.
  - As part of this I found an issue with `CancelFlightInfo` where a copy of the message was being returned instead of a pointer as is typically the case with `proto.Message`'s. I updated the return type and any usage throughout the code base as part of the refactor.

### Are these changes tested?

Yes, both integration and unit tests are included.

A few tests were added in the Go integration suite beyond the existing coverage in the Java/C++ suites. These tests aim to demonstrate my understanding of session semantics in those scenarios, please let me know if you believe the details are not accurate.

Some of the new integration tests failed in the Java/C++ scenarios. I made very minor changes to those implementations to fix certain failures but there are still some remaining bugs (assuming these are testing the right semantics). Specifically:
- The integration test for reopening a previously closed session passes on Go/Java, but fails for C++ so it is commented out.
- This implementation prefers to set any cookies in the gRPC trailer which works fine for Go/C++, but not for Java. As a temporary workaround this implementation will _also_ set the cookie in the gRPC header if a new session was created. This is sufficient to maintain compatibility with Java stateful sessions where the session ID token can be known at the time of creation, but is not robust to other scenarios such as stateless sessions where in many cases the token cannot be known until after the RPC has completed.

### Are there any user-facing changes?
Yes, session management RPC as well as middleware implementations are included. Functionality is entirely additive

* GitHub Issue: #40155

Authored-by: joel <joellubi@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 .../flight/sql/server_session_middleware.cc   |    2 +-
 dev/archery/archery/integration/runner.py     |    2 +-
 go/arrow/flight/client.go                     |   90 +-
 go/arrow/flight/flightsql/client.go           |   14 +-
 go/arrow/flight/flightsql/client_test.go      |   25 +-
 go/arrow/flight/flightsql/server.go           |   81 +
 go/arrow/flight/flightsql/server_test.go      |  447 ++++-
 go/arrow/flight/gen/flight/Flight.pb.go       | 1497 +++++++++++++----
 go/arrow/flight/gen/flight/FlightSql.pb.go    |  597 +++----
 go/arrow/flight/gen/flight/Flight_grpc.pb.go  |   22 +-
 go/arrow/flight/server.go                     |   76 +
 go/arrow/flight/session/cookies.go            |   80 +
 .../flight/session/example_session_test.go    |   77 +
 go/arrow/flight/session/session.go            |  240 +++
 go/arrow/flight/session/stateful_session.go   |  197 +++
 go/arrow/flight/session/stateless_session.go  |  122 ++
 .../internal/flight_integration/scenario.go   |  261 +++
 .../tests/SessionOptionsProducer.java         |    6 -
 18 files changed, 3106 insertions(+), 730 deletions(-)
 create mode 100644 go/arrow/flight/session/cookies.go
 create mode 100644 go/arrow/flight/session/example_session_test.go
 create mode 100644 go/arrow/flight/session/session.go
 create mode 100644 go/arrow/flight/session/stateful_session.go
 create mode 100644 go/arrow/flight/session/stateless_session.go

diff --git a/cpp/src/arrow/flight/sql/server_session_middleware.cc b/cpp/src/arrow/flight/sql/server_session_middleware.cc
index f3e02de232444..43609ea8cc43c 100644
--- a/cpp/src/arrow/flight/sql/server_session_middleware.cc
+++ b/cpp/src/arrow/flight/sql/server_session_middleware.cc
@@ -80,7 +80,7 @@ class ServerSessionMiddlewareImpl : public ServerSessionMiddleware {
 
   Status CloseSession() override {
     const std::lock_guard<std::shared_mutex> l(mutex_);
-    if (static_cast<bool>(session_)) {
+    if (!static_cast<bool>(session_)) {
       return Status::Invalid("Nonexistent session cannot be closed.");
     }
     ARROW_RETURN_NOT_OK(factory_->CloseSession(session_id_));
diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index e984468bc5052..3525ae0be56a5 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -611,7 +611,7 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True,
         Scenario(
             "session_options",
             description="Ensure Flight SQL Sessions work as expected.",
-            skip_testers={"JS", "C#", "Rust", "Go"}
+            skip_testers={"JS", "C#", "Rust"}
         ),
         Scenario(
             "poll_flight_info",
diff --git a/go/arrow/flight/client.go b/go/arrow/flight/client.go
index 312c8a76b6f0e..0063e8dccd9cb 100644
--- a/go/arrow/flight/client.go
+++ b/go/arrow/flight/client.go
@@ -66,9 +66,12 @@ type Client interface {
 	// in order to use the Handshake endpoints of the service.
 	Authenticate(context.Context, ...grpc.CallOption) error
 	AuthenticateBasicToken(ctx context.Context, username string, password string, opts ...grpc.CallOption) (context.Context, error)
-	CancelFlightInfo(ctx context.Context, request *CancelFlightInfoRequest, opts ...grpc.CallOption) (CancelFlightInfoResult, error)
+	CancelFlightInfo(ctx context.Context, request *CancelFlightInfoRequest, opts ...grpc.CallOption) (*CancelFlightInfoResult, error)
 	Close() error
 	RenewFlightEndpoint(ctx context.Context, request *RenewFlightEndpointRequest, opts ...grpc.CallOption) (*FlightEndpoint, error)
+	SetSessionOptions(ctx context.Context, request *SetSessionOptionsRequest, opts ...grpc.CallOption) (*SetSessionOptionsResult, error)
+	GetSessionOptions(ctx context.Context, request *GetSessionOptionsRequest, opts ...grpc.CallOption) (*GetSessionOptionsResult, error)
+	CloseSession(ctx context.Context, request *CloseSessionRequest, opts ...grpc.CallOption) (*CloseSessionResult, error)
 	// join the interface from the FlightServiceClient instead of re-defining all
 	// the endpoints here.
 	FlightServiceClient
@@ -364,26 +367,14 @@ func ReadUntilEOF(stream FlightService_DoActionClient) error {
 	}
 }
 
-func (c *client) CancelFlightInfo(ctx context.Context, request *CancelFlightInfoRequest, opts ...grpc.CallOption) (result CancelFlightInfoResult, err error) {
-	var action flight.Action
-	action.Type = CancelFlightInfoActionType
-	action.Body, err = proto.Marshal(request)
-	if err != nil {
-		return
-	}
-	stream, err := c.DoAction(ctx, &action, opts...)
-	if err != nil {
-		return
-	}
-	res, err := stream.Recv()
+func (c *client) CancelFlightInfo(ctx context.Context, request *CancelFlightInfoRequest, opts ...grpc.CallOption) (*CancelFlightInfoResult, error) {
+	var result CancelFlightInfoResult
+	err := handleAction(ctx, c, CancelFlightInfoActionType, request, &result, opts...)
 	if err != nil {
-		return
-	}
-	if err = proto.Unmarshal(res.Body, &result); err != nil {
-		return
+		return nil, err
 	}
-	err = ReadUntilEOF(stream)
-	return
+
+	return &result, err
 }
 
 func (c *client) Close() error {
@@ -395,29 +386,68 @@ func (c *client) Close() error {
 }
 
 func (c *client) RenewFlightEndpoint(ctx context.Context, request *RenewFlightEndpointRequest, opts ...grpc.CallOption) (*FlightEndpoint, error) {
-	var err error
-	var action flight.Action
-	action.Type = RenewFlightEndpointActionType
-	action.Body, err = proto.Marshal(request)
+	var result FlightEndpoint
+	err := handleAction(ctx, c, RenewFlightEndpointActionType, request, &result, opts...)
 	if err != nil {
 		return nil, err
 	}
-	stream, err := c.DoAction(ctx, &action, opts...)
+
+	return &result, err
+}
+
+func (c *client) SetSessionOptions(ctx context.Context, request *SetSessionOptionsRequest, opts ...grpc.CallOption) (*SetSessionOptionsResult, error) {
+	var result SetSessionOptionsResult
+	err := handleAction(ctx, c, SetSessionOptionsActionType, request, &result, opts...)
 	if err != nil {
 		return nil, err
 	}
-	res, err := stream.Recv()
+
+	return &result, err
+}
+
+func (c *client) GetSessionOptions(ctx context.Context, request *GetSessionOptionsRequest, opts ...grpc.CallOption) (*GetSessionOptionsResult, error) {
+	var result GetSessionOptionsResult
+	err := handleAction(ctx, c, GetSessionOptionsActionType, request, &result, opts...)
 	if err != nil {
 		return nil, err
 	}
-	var renewedEndpoint FlightEndpoint
-	err = proto.Unmarshal(res.Body, &renewedEndpoint)
+
+	return &result, err
+}
+
+func (c *client) CloseSession(ctx context.Context, request *CloseSessionRequest, opts ...grpc.CallOption) (*CloseSessionResult, error) {
+	var result CloseSessionResult
+	err := handleAction(ctx, c, CloseSessionActionType, request, &result, opts...)
 	if err != nil {
 		return nil, err
 	}
-	err = ReadUntilEOF(stream)
+
+	return &result, err
+}
+
+func handleAction[T, U proto.Message](ctx context.Context, client FlightServiceClient, name string, request T, response U, opts ...grpc.CallOption) error {
+	var (
+		action flight.Action
+		err    error
+	)
+
+	action.Type = name
+	action.Body, err = proto.Marshal(request)
 	if err != nil {
-		return nil, err
+		return err
 	}
-	return &renewedEndpoint, nil
+	stream, err := client.DoAction(ctx, &action, opts...)
+	if err != nil {
+		return err
+	}
+	res, err := stream.Recv()
+	if err != nil {
+		return err
+	}
+	err = proto.Unmarshal(res.Body, response)
+	if err != nil {
+		return err
+	}
+
+	return ReadUntilEOF(stream)
 }
diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go
index 068bfa84c3144..09e4974ae9bfc 100644
--- a/go/arrow/flight/flightsql/client.go
+++ b/go/arrow/flight/flightsql/client.go
@@ -584,7 +584,7 @@ func (c *Client) CancelQuery(ctx context.Context, info *flight.FlightInfo, opts
 	return
 }
 
-func (c *Client) CancelFlightInfo(ctx context.Context, request *flight.CancelFlightInfoRequest, opts ...grpc.CallOption) (flight.CancelFlightInfoResult, error) {
+func (c *Client) CancelFlightInfo(ctx context.Context, request *flight.CancelFlightInfoRequest, opts ...grpc.CallOption) (*flight.CancelFlightInfoResult, error) {
 	return c.Client.CancelFlightInfo(ctx, request, opts...)
 }
 
@@ -592,6 +592,18 @@ func (c *Client) RenewFlightEndpoint(ctx context.Context, request *flight.RenewF
 	return c.Client.RenewFlightEndpoint(ctx, request, opts...)
 }
 
+func (c *Client) SetSessionOptions(ctx context.Context, request *flight.SetSessionOptionsRequest, opts ...grpc.CallOption) (*flight.SetSessionOptionsResult, error) {
+	return c.Client.SetSessionOptions(ctx, request, opts...)
+}
+
+func (c *Client) GetSessionOptions(ctx context.Context, request *flight.GetSessionOptionsRequest, opts ...grpc.CallOption) (*flight.GetSessionOptionsResult, error) {
+	return c.Client.GetSessionOptions(ctx, request, opts...)
+}
+
+func (c *Client) CloseSession(ctx context.Context, request *flight.CloseSessionRequest, opts ...grpc.CallOption) (*flight.CloseSessionResult, error) {
+	return c.Client.CloseSession(ctx, request, opts...)
+}
+
 func (c *Client) BeginTransaction(ctx context.Context, opts ...grpc.CallOption) (*Txn, error) {
 	request := &pb.ActionBeginTransactionRequest{}
 	action, err := packAction(BeginTransactionActionType, request)
diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go
index f35aeefcf4628..fe4d308f29fc3 100644
--- a/go/arrow/flight/flightsql/client_test.go
+++ b/go/arrow/flight/flightsql/client_test.go
@@ -60,9 +60,9 @@ func (m *FlightServiceClientMock) AuthenticateBasicToken(_ context.Context, user
 	return args.Get(0).(context.Context), args.Error(1)
 }
 
-func (m *FlightServiceClientMock) CancelFlightInfo(ctx context.Context, request *flight.CancelFlightInfoRequest, opts ...grpc.CallOption) (flight.CancelFlightInfoResult, error) {
+func (m *FlightServiceClientMock) CancelFlightInfo(ctx context.Context, request *flight.CancelFlightInfoRequest, opts ...grpc.CallOption) (*flight.CancelFlightInfoResult, error) {
 	args := m.Called(request, opts)
-	return args.Get(0).(flight.CancelFlightInfoResult), args.Error(1)
+	return args.Get(0).(*flight.CancelFlightInfoResult), args.Error(1)
 }
 
 func (m *FlightServiceClientMock) RenewFlightEndpoint(ctx context.Context, request *flight.RenewFlightEndpointRequest, opts ...grpc.CallOption) (*flight.FlightEndpoint, error) {
@@ -70,6 +70,21 @@ func (m *FlightServiceClientMock) RenewFlightEndpoint(ctx context.Context, reque
 	return args.Get(0).(*flight.FlightEndpoint), args.Error(1)
 }
 
+func (m *FlightServiceClientMock) SetSessionOptions(ctx context.Context, request *flight.SetSessionOptionsRequest, opts ...grpc.CallOption) (*flight.SetSessionOptionsResult, error) {
+	args := m.Called(request, opts)
+	return args.Get(0).(*flight.SetSessionOptionsResult), args.Error(1)
+}
+
+func (m *FlightServiceClientMock) GetSessionOptions(ctx context.Context, request *flight.GetSessionOptionsRequest, opts ...grpc.CallOption) (*flight.GetSessionOptionsResult, error) {
+	args := m.Called(request, opts)
+	return args.Get(0).(*flight.GetSessionOptionsResult), args.Error(1)
+}
+
+func (m *FlightServiceClientMock) CloseSession(ctx context.Context, request *flight.CloseSessionRequest, opts ...grpc.CallOption) (*flight.CloseSessionResult, error) {
+	args := m.Called(request, opts)
+	return args.Get(0).(*flight.CloseSessionResult), args.Error(1)
+}
+
 func (m *FlightServiceClientMock) Close() error {
 	return m.Called().Error(0)
 }
@@ -639,10 +654,10 @@ func (s *FlightSqlClientSuite) TestCancelFlightInfo() {
 	mockedCancelResult := flight.CancelFlightInfoResult{
 		Status: flight.CancelStatusCancelled,
 	}
-	s.mockClient.On("CancelFlightInfo", &request, s.callOpts).Return(mockedCancelResult, nil)
+	s.mockClient.On("CancelFlightInfo", &request, s.callOpts).Return(&mockedCancelResult, nil)
 	cancelResult, err := s.sqlClient.CancelFlightInfo(context.TODO(), &request, s.callOpts...)
 	s.NoError(err)
-	s.Equal(mockedCancelResult, cancelResult)
+	s.Equal(&mockedCancelResult, cancelResult)
 }
 
 func (s *FlightSqlClientSuite) TestRenewFlightEndpoint() {
@@ -671,7 +686,7 @@ func (s *FlightSqlClientSuite) TestPreparedStatementLoadFromResult() {
 	result := &pb.ActionCreatePreparedStatementResult{
 		PreparedStatementHandle: []byte(query),
 	}
-	
+
 	parameterSchemaResult := arrow.NewSchema([]arrow.Field{{Name: "p_id", Type: arrow.PrimitiveTypes.Int64, Nullable: true}}, nil)
 	result.ParameterSchema = flight.SerializeSchema(parameterSchemaResult, memory.DefaultAllocator)
 	datasetSchemaResult := arrow.NewSchema([]arrow.Field{{Name: "ds_id", Type: arrow.PrimitiveTypes.Int64, Nullable: true}}, nil)
diff --git a/go/arrow/flight/flightsql/server.go b/go/arrow/flight/flightsql/server.go
index b825f121f3a16..7bc15ab4295f1 100644
--- a/go/arrow/flight/flightsql/server.go
+++ b/go/arrow/flight/flightsql/server.go
@@ -548,6 +548,18 @@ func (BaseServer) EndSavepoint(context.Context, ActionEndSavepointRequest) error
 	return status.Error(codes.Unimplemented, "EndSavepoint not implemented")
 }
 
+func (BaseServer) SetSessionOptions(context.Context, *flight.SetSessionOptionsRequest) (*flight.SetSessionOptionsResult, error) {
+	return nil, status.Error(codes.Unimplemented, "SetSessionOptions not implemented")
+}
+
+func (BaseServer) GetSessionOptions(context.Context, *flight.GetSessionOptionsRequest) (*flight.GetSessionOptionsResult, error) {
+	return nil, status.Error(codes.Unimplemented, "GetSessionOptions not implemented")
+}
+
+func (BaseServer) CloseSession(context.Context, *flight.CloseSessionRequest) (*flight.CloseSessionResult, error) {
+	return nil, status.Error(codes.Unimplemented, "CloseSession not implemented")
+}
+
 // Server is the required interface for a FlightSQL server. It is implemented by
 // BaseServer which must be embedded in any implementation. The default
 // implementation by BaseServer for each of these (except GetSqlInfo)
@@ -676,6 +688,12 @@ type Server interface {
 	PollFlightInfoSubstraitPlan(context.Context, StatementSubstraitPlan, *flight.FlightDescriptor) (*flight.PollInfo, error)
 	// PollFlightInfoPreparedStatement handles polling for query execution.
 	PollFlightInfoPreparedStatement(context.Context, PreparedStatementQuery, *flight.FlightDescriptor) (*flight.PollInfo, error)
+	// SetSessionOptions sets option(s) for the current server session.
+	SetSessionOptions(context.Context, *flight.SetSessionOptionsRequest) (*flight.SetSessionOptionsResult, error)
+	// GetSessionOptions gets option(s) for the current server session.
+	GetSessionOptions(context.Context, *flight.GetSessionOptionsRequest) (*flight.GetSessionOptionsResult, error)
+	// CloseSession closes/invalidates the current server session.
+	CloseSession(context.Context, *flight.CloseSessionRequest) (*flight.CloseSessionResult, error)
 
 	mustEmbedBaseServer()
 }
@@ -1262,6 +1280,69 @@ func (f *flightSqlServer) DoAction(cmd *flight.Action, stream flight.FlightServi
 		}
 
 		return stream.Send(&pb.Result{})
+	case flight.SetSessionOptionsActionType:
+		var (
+			request flight.SetSessionOptionsRequest
+			err     error
+		)
+
+		if err = proto.Unmarshal(cmd.Body, &request); err != nil {
+			return status.Errorf(codes.InvalidArgument, "unable to unmarshal SetSessionOptionsRequest: %s", err.Error())
+		}
+
+		response, err := f.srv.SetSessionOptions(stream.Context(), &request)
+		if err != nil {
+			return err
+		}
+
+		out := &pb.Result{}
+		out.Body, err = proto.Marshal(response)
+		if err != nil {
+			return err
+		}
+		return stream.Send(out)
+	case flight.GetSessionOptionsActionType:
+		var (
+			request flight.GetSessionOptionsRequest
+			err     error
+		)
+
+		if err = proto.Unmarshal(cmd.Body, &request); err != nil {
+			return status.Errorf(codes.InvalidArgument, "unable to unmarshal GetSessionOptionsRequest: %s", err.Error())
+		}
+
+		response, err := f.srv.GetSessionOptions(stream.Context(), &request)
+		if err != nil {
+			return err
+		}
+
+		out := &pb.Result{}
+		out.Body, err = proto.Marshal(response)
+		if err != nil {
+			return err
+		}
+		return stream.Send(out)
+	case flight.CloseSessionActionType:
+		var (
+			request flight.CloseSessionRequest
+			err     error
+		)
+
+		if err = proto.Unmarshal(cmd.Body, &request); err != nil {
+			return status.Errorf(codes.InvalidArgument, "unable to unmarshal CloseSessionRequest: %s", err.Error())
+		}
+
+		response, err := f.srv.CloseSession(stream.Context(), &request)
+		if err != nil {
+			return err
+		}
+
+		out := &pb.Result{}
+		out.Body, err = proto.Marshal(response)
+		if err != nil {
+			return err
+		}
+		return stream.Send(out)
 	default:
 		return status.Error(codes.InvalidArgument, "the defined request is invalid.")
 	}
diff --git a/go/arrow/flight/flightsql/server_test.go b/go/arrow/flight/flightsql/server_test.go
index 22bbe3f8154b2..df619e7a24140 100644
--- a/go/arrow/flight/flightsql/server_test.go
+++ b/go/arrow/flight/flightsql/server_test.go
@@ -27,11 +27,14 @@ import (
 	"github.com/apache/arrow/go/v16/arrow/flight"
 	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
 	pb "github.com/apache/arrow/go/v16/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/session"
 	"github.com/apache/arrow/go/v16/arrow/memory"
+	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/credentials/insecure"
+	"google.golang.org/grpc/metadata"
 	"google.golang.org/grpc/status"
 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/types/known/anypb"
@@ -130,6 +133,51 @@ func (*testServer) DoGetStatement(ctx context.Context, ticket flightsql.Statemen
 	return
 }
 
+func (*testServer) SetSessionOptions(ctx context.Context, req *flight.SetSessionOptionsRequest) (*flight.SetSessionOptionsResult, error) {
+	session, err := session.GetSessionFromContext(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	errors := make(map[string]*flight.SetSessionOptionsResultError)
+	for key, val := range req.GetSessionOptions() {
+		if key == "lol_invalid" {
+			errors[key] = &flight.SetSessionOptionsResultError{Value: flight.SetSessionOptionsResultErrorInvalidName}
+			continue
+		}
+		if val.GetStringValue() == "lol_invalid" {
+			errors[key] = &flight.SetSessionOptionsResultError{Value: flight.SetSessionOptionsResultErrorInvalidValue}
+			continue
+		}
+
+		session.SetSessionOption(key, val)
+	}
+
+	return &flight.SetSessionOptionsResult{Errors: errors}, nil
+}
+
+func (*testServer) GetSessionOptions(ctx context.Context, req *flight.GetSessionOptionsRequest) (*flight.GetSessionOptionsResult, error) {
+	session, err := session.GetSessionFromContext(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	return &flight.GetSessionOptionsResult{SessionOptions: session.GetSessionOptions()}, nil
+}
+
+func (*testServer) CloseSession(ctx context.Context, req *flight.CloseSessionRequest) (*flight.CloseSessionResult, error) {
+	session, err := session.GetSessionFromContext(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	if err = session.Close(); err != nil {
+		return nil, err
+	}
+
+	return &flight.CloseSessionResult{Status: flight.CloseSessionResultClosed}, nil
+}
+
 type FlightSqlServerSuite struct {
 	suite.Suite
 
@@ -423,8 +471,7 @@ func (s *UnimplementedFlightSqlServerSuite) TestDoAction() {
 func (s *UnimplementedFlightSqlServerSuite) TestCancelFlightInfo() {
 	request := flight.CancelFlightInfoRequest{}
 	result, err := s.cl.CancelFlightInfo(context.TODO(), &request)
-	s.Equal(flight.CancelFlightInfoResult{Status: flight.CancelStatusUnspecified},
-		result)
+	s.Nil(result)
 	st, ok := status.FromError(err)
 	s.True(ok)
 	s.Equal(codes.Unimplemented, st.Code())
@@ -442,7 +489,403 @@ func (s *UnimplementedFlightSqlServerSuite) TestRenewFlightEndpoint() {
 	s.Equal("RenewFlightEndpoint not implemented", st.Message())
 }
 
+func (s *UnimplementedFlightSqlServerSuite) TestSetSessionOptions() {
+	opts, err := flight.NewSessionOptionValues(map[string]any{
+		"key": "val",
+	})
+	s.NoError(err)
+	res, err := s.cl.SetSessionOptions(context.TODO(), &flight.SetSessionOptionsRequest{SessionOptions: opts})
+	s.Nil(res)
+	st, ok := status.FromError(err)
+	s.True(ok)
+	s.Equal(codes.Unimplemented, st.Code())
+	s.Equal("SetSessionOptions not implemented", st.Message())
+}
+
+func (s *UnimplementedFlightSqlServerSuite) TestGetSessionOptions() {
+	res, err := s.cl.GetSessionOptions(context.TODO(), &flight.GetSessionOptionsRequest{})
+	s.Nil(res)
+	st, ok := status.FromError(err)
+	s.True(ok)
+	s.Equal(codes.Unimplemented, st.Code())
+	s.Equal("GetSessionOptions not implemented", st.Message())
+}
+
+func (s *UnimplementedFlightSqlServerSuite) TestCloseSession() {
+	res, err := s.cl.CloseSession(context.TODO(), &flight.CloseSessionRequest{})
+	s.Nil(res)
+	st, ok := status.FromError(err)
+	s.True(ok)
+	s.Equal(codes.Unimplemented, st.Code())
+	s.Equal("CloseSession not implemented", st.Message())
+}
+
+type FlightSqlServerSessionSuite struct {
+	suite.Suite
+
+	s  flight.Server
+	cl *flightsql.Client
+
+	sessionManager session.ServerSessionManager
+}
+
+func (s *FlightSqlServerSessionSuite) SetupSuite() {
+	s.s = flight.NewServerWithMiddleware([]flight.ServerMiddleware{
+		flight.CreateServerMiddleware(session.NewServerSessionMiddleware(s.sessionManager)),
+	})
+	srv := flightsql.NewFlightServer(&testServer{})
+	s.s.RegisterFlightService(srv)
+	s.s.Init("localhost:0")
+
+	go s.s.Serve()
+}
+
+func (s *FlightSqlServerSessionSuite) TearDownSuite() {
+	s.s.Shutdown()
+}
+
+func (s *FlightSqlServerSessionSuite) SetupTest() {
+	middleware := []flight.ClientMiddleware{
+		flight.NewClientCookieMiddleware(),
+	}
+	cl, err := flightsql.NewClient(s.s.Addr().String(), nil, middleware, dialOpts...)
+	s.Require().NoError(err)
+	s.cl = cl
+}
+
+func (s *FlightSqlServerSessionSuite) TearDownTest() {
+	s.Require().NoError(s.cl.Close())
+	s.cl = nil
+}
+
+func (s *FlightSqlServerSessionSuite) TestSetSessionOptions() {
+	opts, err := flight.NewSessionOptionValues(map[string]any{
+		"foolong":                int64(123),
+		"bardouble":              456.0,
+		"lol_invalid":            "this won't get set",
+		"key_with_invalid_value": "lol_invalid",
+		"big_ol_string_list":     []string{"a", "b", "sea", "dee", " ", "  ", "geee", "(づ｡◕‿‿◕｡)づ"},
+	})
+	s.NoError(err)
+	res, err := s.cl.SetSessionOptions(context.TODO(), &flight.SetSessionOptionsRequest{SessionOptions: opts})
+	s.NoError(err)
+	s.NotNil(res)
+
+	expectedErrs := map[string]*flight.SetSessionOptionsResultError{
+		"lol_invalid":            {Value: flight.SetSessionOptionsResultErrorInvalidName},
+		"key_with_invalid_value": {Value: flight.SetSessionOptionsResultErrorInvalidValue},
+	}
+
+	errs := res.GetErrors()
+	s.Equal(len(expectedErrs), len(errs))
+
+	for key, val := range errs {
+		s.Equal(expectedErrs[key], val)
+	}
+}
+
+func (s *FlightSqlServerSessionSuite) TestGetSetGetSessionOptions() {
+	ctx := context.TODO()
+	getRes, err := s.cl.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{})
+	s.NoError(err)
+	s.NotNil(getRes)
+	s.Len(getRes.SessionOptions, 0)
+
+	expectedOpts := map[string]any{
+		"foolong":            int64(123),
+		"bardouble":          456.0,
+		"big_ol_string_list": []string{"a", "b", "sea", "dee", " ", "  ", "geee", "(づ｡◕‿‿◕｡)づ"},
+	}
+
+	optionVals, err := flight.NewSessionOptionValues(expectedOpts)
+	s.NoError(err)
+	s.NotNil(optionVals)
+
+	setRes, err := s.cl.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: optionVals})
+	s.NoError(err)
+	s.NotNil(setRes)
+	s.Empty(setRes.Errors)
+
+	getRes2, err := s.cl.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{})
+	s.NoError(err)
+	s.NotNil(getRes2)
+
+	opts := getRes2.GetSessionOptions()
+	s.Equal(3, len(opts))
+
+	s.Equal(expectedOpts["foolong"], opts["foolong"].GetInt64Value())
+	s.Equal(expectedOpts["bardouble"], opts["bardouble"].GetDoubleValue())
+	s.Equal(expectedOpts["big_ol_string_list"], opts["big_ol_string_list"].GetStringListValue().GetValues())
+}
+
+func (s *FlightSqlServerSessionSuite) TestSetRemoveSessionOptions() {
+	ctx := context.TODO()
+	initialOpts := map[string]any{
+		"foolong":            int64(123),
+		"bardouble":          456.0,
+		"big_ol_string_list": []string{"a", "b", "sea", "dee", " ", "  ", "geee", "(づ｡◕‿‿◕｡)づ"},
+	}
+
+	optionVals, err := flight.NewSessionOptionValues(initialOpts)
+	s.NoError(err)
+	s.NotNil(optionVals)
+
+	setRes, err := s.cl.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: optionVals})
+	s.NoError(err)
+	s.NotNil(setRes)
+	s.Empty(setRes.Errors)
+
+	removeKeyOpts, err := flight.NewSessionOptionValues(map[string]any{
+		"foolong": nil,
+	})
+	s.NoError(err)
+	s.NotNil(removeKeyOpts)
+
+	setRes2, err := s.cl.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: removeKeyOpts})
+	s.NoError(err)
+	s.NotNil(setRes2)
+	s.Empty(setRes2.Errors)
+
+	getRes, err := s.cl.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{})
+	s.NoError(err)
+	s.NotNil(getRes)
+
+	opts := getRes.GetSessionOptions()
+	s.Equal(2, len(opts))
+
+	s.Equal(initialOpts["bardouble"], opts["bardouble"].GetDoubleValue())
+	s.Equal(initialOpts["big_ol_string_list"], opts["big_ol_string_list"].GetStringListValue().GetValues())
+}
+
+func (s *FlightSqlServerSessionSuite) TestCloseSession() {
+	ctx := context.TODO()
+	initialOpts := map[string]any{
+		"foolong":            int64(123),
+		"bardouble":          456.0,
+		"big_ol_string_list": []string{"a", "b", "sea", "dee", " ", "  ", "geee", "(づ｡◕‿‿◕｡)づ"},
+	}
+
+	optionVals, err := flight.NewSessionOptionValues(initialOpts)
+	s.NoError(err)
+	s.NotNil(optionVals)
+
+	setRes, err := s.cl.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: optionVals})
+	s.NoError(err)
+	s.NotNil(setRes)
+	s.Empty(setRes.Errors)
+
+	closeRes, err := s.cl.CloseSession(ctx, &flight.CloseSessionRequest{})
+	s.NoError(err)
+	s.NotNil(closeRes)
+	s.Equal(flight.CloseSessionResultClosed, closeRes.GetStatus())
+
+	getRes, err := s.cl.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{})
+	s.NoError(err)
+	s.NotNil(getRes)
+
+	opts := getRes.GetSessionOptions()
+	s.Empty(opts)
+}
+
 func TestBaseServer(t *testing.T) {
 	suite.Run(t, new(UnimplementedFlightSqlServerSuite))
 	suite.Run(t, new(FlightSqlServerSuite))
+	suite.Run(t, &FlightSqlServerSessionSuite{sessionManager: session.NewStatefulServerSessionManager()})
+	suite.Run(t, &FlightSqlServerSessionSuite{sessionManager: session.NewStatelessServerSessionManager()})
+}
+
+func TestStatefulServerSessionCookies(t *testing.T) {
+	// Generate session IDs deterministically
+	sessionIDGenerator := func(ids []string) func() string {
+		ch := make(chan string, len(ids))
+		for _, id := range ids {
+			ch <- id
+		}
+		close(ch)
+
+		return func() string {
+			return <-ch
+		}
+	}
+
+	factory := session.NewSessionFactory(sessionIDGenerator([]string{"how-now-brown-cow", "unique-new-york"}))
+	store := session.NewSessionStore()
+	manager := session.NewStatefulServerSessionManager(session.WithFactory(factory), session.WithStore(store))
+	middleware := session.NewServerSessionMiddleware(manager)
+
+	srv := flight.NewServerWithMiddleware([]flight.ServerMiddleware{
+		flight.CreateServerMiddleware(middleware),
+	})
+	srv.RegisterFlightService(flightsql.NewFlightServer(&testServer{}))
+	srv.Init("localhost:0")
+
+	go srv.Serve()
+	defer srv.Shutdown()
+
+	client, err := flightsql.NewClient(
+		srv.Addr().String(),
+		nil,
+		[]flight.ClientMiddleware{
+			flight.NewClientCookieMiddleware(),
+		},
+		dialOpts...,
+	)
+	require.NoError(t, err)
+	defer client.Close()
+
+	var (
+		trailer metadata.MD
+		session session.ServerSession
+	)
+
+	ctx := context.TODO()
+
+	// Get empty session; should create new session since one doesn't exist
+	_, err = client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}, grpc.Trailer(&trailer))
+	require.NoError(t, err)
+
+	// Client should recieve cookie with new session ID
+	require.Len(t, trailer.Get("set-cookie"), 1)
+	require.Equal(t, "arrow_flight_session_id=how-now-brown-cow", trailer.Get("set-cookie")[0])
+
+	// Server should add the empty session to its internal store
+	session, err = store.Get("how-now-brown-cow")
+	require.NoError(t, err)
+	require.NotNil(t, session)
+	require.Empty(t, session.GetSessionOptions())
+
+	optionVals, err := flight.NewSessionOptionValues(map[string]any{"hello": "world"})
+	require.NoError(t, err)
+	require.NotNil(t, optionVals)
+
+	// Add option to existing session
+	_, err = client.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: optionVals}, grpc.Trailer(&trailer))
+	require.NoError(t, err)
+
+	// Server received and used session from existing client cookie, no need to set a new one
+	require.Len(t, trailer.Get("set-cookie"), 0)
+
+	// The option we set has been added to the server's state
+	session, err = store.Get("how-now-brown-cow")
+	require.NoError(t, err)
+	require.NotNil(t, session)
+	require.Len(t, session.GetSessionOptions(), 1)
+	require.Contains(t, session.GetSessionOptions(), "hello")
+
+	// Close the existing session
+	_, err = client.CloseSession(ctx, &flight.CloseSessionRequest{}, grpc.Trailer(&trailer))
+	require.NoError(t, err)
+
+	// Inform the client that the cookie should be deleted
+	require.Len(t, trailer.Get("set-cookie"), 1)
+	require.Equal(t, "arrow_flight_session_id=how-now-brown-cow; Max-Age=0", trailer.Get("set-cookie")[0])
+
+	// The session has been removed from the server's internal store
+	session, err = store.Get("how-now-brown-cow")
+	require.Error(t, err)
+	require.Nil(t, session)
+
+	// Get the session; this should create a new session because we just closed the previous one
+	_, err = client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}, grpc.Trailer(&trailer))
+	require.NoError(t, err)
+
+	// The client is informed to set a NEW cookie for the newly created session
+	require.Len(t, trailer.Get("set-cookie"), 1)
+	require.Equal(t, "arrow_flight_session_id=unique-new-york", trailer.Get("set-cookie")[0])
+
+	// The new empty session has been added to the server's internal store
+	session, err = store.Get("unique-new-york")
+	require.NoError(t, err)
+	require.NotNil(t, session)
+	require.Empty(t, session.GetSessionOptions())
+
+	// Close the new session
+	_, err = client.CloseSession(ctx, &flight.CloseSessionRequest{}, grpc.Trailer(&trailer))
+	require.NoError(t, err)
+
+	// Inform the client that the new session's cookie should be deleted
+	require.Len(t, trailer.Get("set-cookie"), 1)
+	require.Equal(t, "arrow_flight_session_id=unique-new-york; Max-Age=0", trailer.Get("set-cookie")[0])
+
+	// The session has been removed from the server's internal store
+	session, err = store.Get("unique-new-york")
+	require.Error(t, err)
+	require.Nil(t, session)
+}
+
+func TestStatelessServerSessionCookies(t *testing.T) {
+	manager := session.NewStatelessServerSessionManager()
+	middleware := session.NewServerSessionMiddleware(manager)
+
+	srv := flight.NewServerWithMiddleware([]flight.ServerMiddleware{
+		flight.CreateServerMiddleware(middleware),
+	})
+	srv.RegisterFlightService(flightsql.NewFlightServer(&testServer{}))
+	srv.Init("localhost:0")
+
+	go srv.Serve()
+	defer srv.Shutdown()
+
+	client, err := flightsql.NewClient(
+		srv.Addr().String(),
+		nil,
+		[]flight.ClientMiddleware{
+			flight.NewClientCookieMiddleware(),
+		},
+		dialOpts...,
+	)
+	require.NoError(t, err)
+	defer client.Close()
+
+	var trailer metadata.MD
+
+	ctx := context.TODO()
+
+	// Get empty session; should create new session since one doesn't exist
+	_, err = client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}, grpc.Trailer(&trailer))
+	require.NoError(t, err)
+
+	// Client should recieve cookie with new session token. An empty session is serialized with zero bytes.
+	require.Len(t, trailer.Get("set-cookie"), 1)
+	require.Equal(t, "arrow_flight_session=", trailer.Get("set-cookie")[0])
+
+	optionVals, err := flight.NewSessionOptionValues(map[string]any{"hello": "world"})
+	require.NoError(t, err)
+	require.NotNil(t, optionVals)
+
+	// Add option to existing session
+	_, err = client.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: optionVals}, grpc.Trailer(&trailer))
+	require.NoError(t, err)
+
+	// Session state has been modified, so we send a new cookie with the updated session contents
+	require.Len(t, trailer.Get("set-cookie"), 1)
+	require.Equal(t, `arrow_flight_session=ChAKBWhlbGxvEgcKBXdvcmxk`, trailer.Get("set-cookie")[0]) // base64 of binary '{"hello":"world"}' proto message
+
+	// Close the existing session
+	_, err = client.CloseSession(ctx, &flight.CloseSessionRequest{}, grpc.Trailer(&trailer))
+	require.NoError(t, err)
+
+	// Inform the client that the cookie should be deleted
+	//
+	// The cookie is in the gRPC trailer because the session may have been closed AFTER the initial headers were sent
+	require.Len(t, trailer.Get("set-cookie"), 1)
+	require.Equal(t, "arrow_flight_session=ChAKBWhlbGxvEgcKBXdvcmxk; Max-Age=0", trailer.Get("set-cookie")[0])
+
+	// Get the session; his should create a new session because we just closed the previous one
+	// Realistically no session is "created", this just happens because the client was told to drop the cookie
+	// in the last step.
+	_, err = client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{}, grpc.Trailer(&trailer))
+	require.NoError(t, err)
+
+	// The client is informed to set a NEW cookie for the newly created empty session
+	require.Len(t, trailer.Get("set-cookie"), 1)
+	require.Equal(t, "arrow_flight_session=", trailer.Get("set-cookie")[0])
+
+	// Close the new session
+	_, err = client.CloseSession(ctx, &flight.CloseSessionRequest{}, grpc.Trailer(&trailer))
+	require.NoError(t, err)
+
+	// Inform the client that the new session's cookie should be deleted
+	require.Len(t, trailer.Get("set-cookie"), 1)
+	require.Equal(t, "arrow_flight_session=; Max-Age=0", trailer.Get("set-cookie")[0])
 }
diff --git a/go/arrow/flight/gen/flight/Flight.pb.go b/go/arrow/flight/gen/flight/Flight.pb.go
index 0438bca28be50..42d4493c8f3e6 100644
--- a/go/arrow/flight/gen/flight/Flight.pb.go
+++ b/go/arrow/flight/gen/flight/Flight.pb.go
@@ -17,8 +17,8 @@
 
 // Code generated by protoc-gen-go. DO NOT EDIT.
 // versions:
-// 	protoc-gen-go v1.28.1
-// 	protoc        v4.23.4
+// 	protoc-gen-go v1.31.0
+// 	protoc        v4.25.2
 // source: Flight.proto
 
 package flight
@@ -38,7 +38,6 @@ const (
 	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
 )
 
-//
 // The result of a cancel operation.
 //
 // This is used by CancelFlightInfoResult.status.
@@ -103,19 +102,17 @@ func (CancelStatus) EnumDescriptor() ([]byte, []int) {
 	return file_Flight_proto_rawDescGZIP(), []int{0}
 }
 
-//
 // Describes what type of descriptor is defined.
 type FlightDescriptor_DescriptorType int32
 
 const (
 	// Protobuf pattern, not used.
 	FlightDescriptor_UNKNOWN FlightDescriptor_DescriptorType = 0
-	//
 	// A named path that identifies a dataset. A path is composed of a string
 	// or list of strings describing a particular dataset. This is conceptually
-	//  similar to a path inside a filesystem.
-	FlightDescriptor_PATH FlightDescriptor_DescriptorType = 1
 	//
+	//	similar to a path inside a filesystem.
+	FlightDescriptor_PATH FlightDescriptor_DescriptorType = 1
 	// An opaque command to generate a dataset.
 	FlightDescriptor_CMD FlightDescriptor_DescriptorType = 2
 )
@@ -161,17 +158,132 @@ func (FlightDescriptor_DescriptorType) EnumDescriptor() ([]byte, []int) {
 	return file_Flight_proto_rawDescGZIP(), []int{12, 0}
 }
 
-//
+type SetSessionOptionsResult_ErrorValue int32
+
+const (
+	// Protobuf deserialization fallback value: The status is unknown or unrecognized.
+	// Servers should avoid using this value. The request may be retried by the client.
+	SetSessionOptionsResult_UNSPECIFIED SetSessionOptionsResult_ErrorValue = 0
+	// The given session option name is invalid.
+	SetSessionOptionsResult_INVALID_NAME SetSessionOptionsResult_ErrorValue = 1
+	// The session option value or type is invalid.
+	SetSessionOptionsResult_INVALID_VALUE SetSessionOptionsResult_ErrorValue = 2
+	// The session option cannot be set.
+	SetSessionOptionsResult_ERROR SetSessionOptionsResult_ErrorValue = 3
+)
+
+// Enum value maps for SetSessionOptionsResult_ErrorValue.
+var (
+	SetSessionOptionsResult_ErrorValue_name = map[int32]string{
+		0: "UNSPECIFIED",
+		1: "INVALID_NAME",
+		2: "INVALID_VALUE",
+		3: "ERROR",
+	}
+	SetSessionOptionsResult_ErrorValue_value = map[string]int32{
+		"UNSPECIFIED":   0,
+		"INVALID_NAME":  1,
+		"INVALID_VALUE": 2,
+		"ERROR":         3,
+	}
+)
+
+func (x SetSessionOptionsResult_ErrorValue) Enum() *SetSessionOptionsResult_ErrorValue {
+	p := new(SetSessionOptionsResult_ErrorValue)
+	*p = x
+	return p
+}
+
+func (x SetSessionOptionsResult_ErrorValue) String() string {
+	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (SetSessionOptionsResult_ErrorValue) Descriptor() protoreflect.EnumDescriptor {
+	return file_Flight_proto_enumTypes[2].Descriptor()
+}
+
+func (SetSessionOptionsResult_ErrorValue) Type() protoreflect.EnumType {
+	return &file_Flight_proto_enumTypes[2]
+}
+
+func (x SetSessionOptionsResult_ErrorValue) Number() protoreflect.EnumNumber {
+	return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Use SetSessionOptionsResult_ErrorValue.Descriptor instead.
+func (SetSessionOptionsResult_ErrorValue) EnumDescriptor() ([]byte, []int) {
+	return file_Flight_proto_rawDescGZIP(), []int{22, 0}
+}
+
+type CloseSessionResult_Status int32
+
+const (
+	// Protobuf deserialization fallback value: The session close status is unknown or
+	// not recognized. Servers should avoid using this value (send a NOT_FOUND error if
+	// the requested session is not known or expired). Clients can retry the request.
+	CloseSessionResult_UNSPECIFIED CloseSessionResult_Status = 0
+	// The session close request is complete. Subsequent requests with
+	// the same session produce a NOT_FOUND error.
+	CloseSessionResult_CLOSED CloseSessionResult_Status = 1
+	// The session close request is in progress. The client may retry
+	// the close request.
+	CloseSessionResult_CLOSING CloseSessionResult_Status = 2
+	// The session is not closeable. The client should not retry the
+	// close request.
+	CloseSessionResult_NOT_CLOSEABLE CloseSessionResult_Status = 3
+)
+
+// Enum value maps for CloseSessionResult_Status.
+var (
+	CloseSessionResult_Status_name = map[int32]string{
+		0: "UNSPECIFIED",
+		1: "CLOSED",
+		2: "CLOSING",
+		3: "NOT_CLOSEABLE",
+	}
+	CloseSessionResult_Status_value = map[string]int32{
+		"UNSPECIFIED":   0,
+		"CLOSED":        1,
+		"CLOSING":       2,
+		"NOT_CLOSEABLE": 3,
+	}
+)
+
+func (x CloseSessionResult_Status) Enum() *CloseSessionResult_Status {
+	p := new(CloseSessionResult_Status)
+	*p = x
+	return p
+}
+
+func (x CloseSessionResult_Status) String() string {
+	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (CloseSessionResult_Status) Descriptor() protoreflect.EnumDescriptor {
+	return file_Flight_proto_enumTypes[3].Descriptor()
+}
+
+func (CloseSessionResult_Status) Type() protoreflect.EnumType {
+	return &file_Flight_proto_enumTypes[3]
+}
+
+func (x CloseSessionResult_Status) Number() protoreflect.EnumNumber {
+	return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Use CloseSessionResult_Status.Descriptor instead.
+func (CloseSessionResult_Status) EnumDescriptor() ([]byte, []int) {
+	return file_Flight_proto_rawDescGZIP(), []int{26, 0}
+}
+
 // The request that a client provides to a server on handshake.
 type HandshakeRequest struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//
 	// A defined protocol version
 	ProtocolVersion uint64 `protobuf:"varint,1,opt,name=protocol_version,json=protocolVersion,proto3" json:"protocol_version,omitempty"`
-	//
 	// Arbitrary auth/handshake info.
 	Payload []byte `protobuf:"bytes,2,opt,name=payload,proto3" json:"payload,omitempty"`
 }
@@ -227,10 +339,8 @@ type HandshakeResponse struct {
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//
 	// A defined protocol version
 	ProtocolVersion uint64 `protobuf:"varint,1,opt,name=protocol_version,json=protocolVersion,proto3" json:"protocol_version,omitempty"`
-	//
 	// Arbitrary auth/handshake info.
 	Payload []byte `protobuf:"bytes,2,opt,name=payload,proto3" json:"payload,omitempty"`
 }
@@ -281,7 +391,6 @@ func (x *HandshakeResponse) GetPayload() []byte {
 	return nil
 }
 
-//
 // A message for doing simple auth.
 type BasicAuth struct {
 	state         protoimpl.MessageState
@@ -376,7 +485,6 @@ func (*Empty) Descriptor() ([]byte, []int) {
 	return file_Flight_proto_rawDescGZIP(), []int{3}
 }
 
-//
 // Describes an available action, including both the name used for execution
 // along with a short description of the purpose of the action.
 type ActionType struct {
@@ -434,7 +542,6 @@ func (x *ActionType) GetDescription() string {
 	return ""
 }
 
-//
 // A service specific expression that can be used to return a limited set
 // of available Arrow Flight streams.
 type Criteria struct {
@@ -484,7 +591,6 @@ func (x *Criteria) GetExpression() []byte {
 	return nil
 }
 
-//
 // An opaque action specific for the service.
 type Action struct {
 	state         protoimpl.MessageState
@@ -541,7 +647,6 @@ func (x *Action) GetBody() []byte {
 	return nil
 }
 
-//
 // The request of the CancelFlightInfo action.
 //
 // The request should be stored in Action.body.
@@ -592,7 +697,6 @@ func (x *CancelFlightInfoRequest) GetInfo() *FlightInfo {
 	return nil
 }
 
-//
 // The request of the RenewFlightEndpoint action.
 //
 // The request should be stored in Action.body.
@@ -643,7 +747,6 @@ func (x *RenewFlightEndpointRequest) GetEndpoint() *FlightEndpoint {
 	return nil
 }
 
-//
 // An opaque result returned after executing an action.
 type Result struct {
 	state         protoimpl.MessageState
@@ -692,7 +795,6 @@ func (x *Result) GetBody() []byte {
 	return nil
 }
 
-//
 // The result of the CancelFlightInfo action.
 //
 // The result should be stored in Result.body.
@@ -743,7 +845,6 @@ func (x *CancelFlightInfoResult) GetStatus() CancelStatus {
 	return CancelStatus_CANCEL_STATUS_UNSPECIFIED
 }
 
-//
 // Wrap the result of a getSchema call
 type SchemaResult struct {
 	state         protoimpl.MessageState
@@ -751,9 +852,10 @@ type SchemaResult struct {
 	unknownFields protoimpl.UnknownFields
 
 	// The schema of the dataset in its IPC form:
-	//   4 bytes - an optional IPC_CONTINUATION_TOKEN prefix
-	//   4 bytes - the byte length of the payload
-	//   a flatbuffer Message whose header is the Schema
+	//
+	//	4 bytes - an optional IPC_CONTINUATION_TOKEN prefix
+	//	4 bytes - the byte length of the payload
+	//	a flatbuffer Message whose header is the Schema
 	Schema []byte `protobuf:"bytes,1,opt,name=schema,proto3" json:"schema,omitempty"`
 }
 
@@ -796,7 +898,6 @@ func (x *SchemaResult) GetSchema() []byte {
 	return nil
 }
 
-//
 // The name or tag for a Flight. May be used as a way to retrieve or generate
 // a flight or be used to expose a set of previously defined flights.
 type FlightDescriptor struct {
@@ -805,11 +906,9 @@ type FlightDescriptor struct {
 	unknownFields protoimpl.UnknownFields
 
 	Type FlightDescriptor_DescriptorType `protobuf:"varint,1,opt,name=type,proto3,enum=arrow.flight.protocol.FlightDescriptor_DescriptorType" json:"type,omitempty"`
-	//
 	// Opaque value used to express a command. Should only be defined when
 	// type = CMD.
 	Cmd []byte `protobuf:"bytes,2,opt,name=cmd,proto3" json:"cmd,omitempty"`
-	//
 	// List of strings identifying a particular dataset. Should only be defined
 	// when type = PATH.
 	Path []string `protobuf:"bytes,3,rep,name=path,proto3" json:"path,omitempty"`
@@ -868,7 +967,6 @@ func (x *FlightDescriptor) GetPath() []string {
 	return nil
 }
 
-//
 // The access coordinates for retrieval of a dataset. With a FlightInfo, a
 // consumer is able to determine how to retrieve a dataset.
 type FlightInfo struct {
@@ -877,14 +975,13 @@ type FlightInfo struct {
 	unknownFields protoimpl.UnknownFields
 
 	// The schema of the dataset in its IPC form:
-	//   4 bytes - an optional IPC_CONTINUATION_TOKEN prefix
-	//   4 bytes - the byte length of the payload
-	//   a flatbuffer Message whose header is the Schema
-	Schema []byte `protobuf:"bytes,1,opt,name=schema,proto3" json:"schema,omitempty"`
 	//
+	//	4 bytes - an optional IPC_CONTINUATION_TOKEN prefix
+	//	4 bytes - the byte length of the payload
+	//	a flatbuffer Message whose header is the Schema
+	Schema []byte `protobuf:"bytes,1,opt,name=schema,proto3" json:"schema,omitempty"`
 	// The descriptor associated with this info.
 	FlightDescriptor *FlightDescriptor `protobuf:"bytes,2,opt,name=flight_descriptor,json=flightDescriptor,proto3" json:"flight_descriptor,omitempty"`
-	//
 	// A list of endpoints associated with the flight. To consume the
 	// whole flight, all endpoints (and hence all Tickets) must be
 	// consumed. Endpoints can be consumed in any order.
@@ -904,18 +1001,22 @@ type FlightInfo struct {
 	// ordering is important for an application, an application must
 	// choose one of them:
 	//
-	// * An application requires that all clients must read data in
-	//   returned endpoints order.
-	// * An application must return the all data in a single endpoint.
+	//   - An application requires that all clients must read data in
+	//     returned endpoints order.
+	//   - An application must return the all data in a single endpoint.
 	Endpoint []*FlightEndpoint `protobuf:"bytes,3,rep,name=endpoint,proto3" json:"endpoint,omitempty"`
 	// Set these to -1 if unknown.
 	TotalRecords int64 `protobuf:"varint,4,opt,name=total_records,json=totalRecords,proto3" json:"total_records,omitempty"`
 	TotalBytes   int64 `protobuf:"varint,5,opt,name=total_bytes,json=totalBytes,proto3" json:"total_bytes,omitempty"`
-	//
 	// FlightEndpoints are in the same order as the data.
 	Ordered bool `protobuf:"varint,6,opt,name=ordered,proto3" json:"ordered,omitempty"`
-	//
 	// Application-defined metadata.
+	//
+	// There is no inherent or required relationship between this
+	// and the app_metadata fields in the FlightEndpoints or resulting
+	// FlightData messages. Since this metadata is application-defined,
+	// a given application could define there to be a relationship,
+	// but there is none required by the spec.
 	AppMetadata []byte `protobuf:"bytes,7,opt,name=app_metadata,json=appMetadata,proto3" json:"app_metadata,omitempty"`
 }
 
@@ -1000,14 +1101,12 @@ func (x *FlightInfo) GetAppMetadata() []byte {
 	return nil
 }
 
-//
 // The information to process a long-running query.
 type PollInfo struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//
 	// The currently available results.
 	//
 	// If "flight_descriptor" is not specified, the query is complete
@@ -1025,15 +1124,12 @@ type PollInfo struct {
 	// ticket in the info before the query is
 	// completed. FlightInfo.ordered is also valid.
 	Info *FlightInfo `protobuf:"bytes,1,opt,name=info,proto3" json:"info,omitempty"`
-	//
 	// The descriptor the client should use on the next try.
 	// If unset, the query is complete.
 	FlightDescriptor *FlightDescriptor `protobuf:"bytes,2,opt,name=flight_descriptor,json=flightDescriptor,proto3" json:"flight_descriptor,omitempty"`
-	//
 	// Query progress. If known, must be in [0.0, 1.0] but need not be
 	// monotonic or nondecreasing. If unknown, do not set.
 	Progress *float64 `protobuf:"fixed64,3,opt,name=progress,proto3,oneof" json:"progress,omitempty"`
-	//
 	// Expiration time for this request. After this passes, the server
 	// might not accept the retry descriptor anymore (and the query may
 	// be cancelled). This may be updated on a call to PollFlightInfo.
@@ -1100,17 +1196,14 @@ func (x *PollInfo) GetExpirationTime() *timestamppb.Timestamp {
 	return nil
 }
 
-//
 // A particular stream or split associated with a flight.
 type FlightEndpoint struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//
 	// Token used to retrieve this stream.
 	Ticket *Ticket `protobuf:"bytes,1,opt,name=ticket,proto3" json:"ticket,omitempty"`
-	//
 	// A list of URIs where this ticket can be redeemed via DoGet().
 	//
 	// If the list is empty, the expectation is that the ticket can only
@@ -1126,13 +1219,17 @@ type FlightEndpoint struct {
 	// In other words, an application can use multiple locations to
 	// represent redundant and/or load balanced services.
 	Location []*Location `protobuf:"bytes,2,rep,name=location,proto3" json:"location,omitempty"`
-	//
 	// Expiration time of this stream. If present, clients may assume
 	// they can retry DoGet requests. Otherwise, it is
 	// application-defined whether DoGet requests may be retried.
 	ExpirationTime *timestamppb.Timestamp `protobuf:"bytes,3,opt,name=expiration_time,json=expirationTime,proto3" json:"expiration_time,omitempty"`
-	//
 	// Application-defined metadata.
+	//
+	// There is no inherent or required relationship between this
+	// and the app_metadata fields in the FlightInfo or resulting
+	// FlightData messages. Since this metadata is application-defined,
+	// a given application could define there to be a relationship,
+	// but there is none required by the spec.
 	AppMetadata []byte `protobuf:"bytes,4,opt,name=app_metadata,json=appMetadata,proto3" json:"app_metadata,omitempty"`
 }
 
@@ -1196,7 +1293,6 @@ func (x *FlightEndpoint) GetAppMetadata() []byte {
 	return nil
 }
 
-//
 // A location where a Flight service will accept retrieval of a particular
 // stream given a ticket.
 type Location struct {
@@ -1246,7 +1342,6 @@ func (x *Location) GetUri() string {
 	return ""
 }
 
-//
 // An opaque identifier that the service can use to retrieve a particular
 // portion of a stream.
 //
@@ -1299,24 +1394,19 @@ func (x *Ticket) GetTicket() []byte {
 	return nil
 }
 
-//
 // A batch of Arrow data as part of a stream of batches.
 type FlightData struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//
 	// The descriptor of the data. This is only relevant when a client is
 	// starting a new DoPut stream.
 	FlightDescriptor *FlightDescriptor `protobuf:"bytes,1,opt,name=flight_descriptor,json=flightDescriptor,proto3" json:"flight_descriptor,omitempty"`
-	//
 	// Header for message data as described in Message.fbs::Message.
 	DataHeader []byte `protobuf:"bytes,2,opt,name=data_header,json=dataHeader,proto3" json:"data_header,omitempty"`
-	//
 	// Application-defined metadata.
 	AppMetadata []byte `protobuf:"bytes,3,opt,name=app_metadata,json=appMetadata,proto3" json:"app_metadata,omitempty"`
-	//
 	// The actual batch of Arrow data. Preferably handled with minimal-copies
 	// coming last in the definition to help with sidecar patterns (it is
 	// expected that some implementations will fetch this field off the wire
@@ -1384,7 +1474,7 @@ func (x *FlightData) GetDataBody() []byte {
 	return nil
 }
 
-//*
+// *
 // The response message associated with the submission of a DoPut.
 type PutResult struct {
 	state         protoimpl.MessageState
@@ -1433,223 +1523,826 @@ func (x *PutResult) GetAppMetadata() []byte {
 	return nil
 }
 
-var File_Flight_proto protoreflect.FileDescriptor
+// EXPERIMENTAL: Union of possible value types for a Session Option to be set to.
+//
+// By convention, an attempt to set a valueless SessionOptionValue should
+// attempt to unset or clear the named option value on the server.
+type SessionOptionValue struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
 
-var file_Flight_proto_rawDesc = []byte{
-	0x0a, 0x0c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x15,
-	0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f,
-	0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x1a, 0x1f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72,
-	0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70,
-	0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x57, 0x0a, 0x10, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68,
-	0x61, 0x6b, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x29, 0x0a, 0x10, 0x70, 0x72,
-	0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01,
-	0x20, 0x01, 0x28, 0x04, 0x52, 0x0f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x56, 0x65,
-	0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64,
-	0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x22,
-	0x58, 0x0a, 0x11, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68, 0x61, 0x6b, 0x65, 0x52, 0x65, 0x73, 0x70,
-	0x6f, 0x6e, 0x73, 0x65, 0x12, 0x29, 0x0a, 0x10, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c,
-	0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0f,
-	0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12,
-	0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c,
-	0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x22, 0x43, 0x0a, 0x09, 0x42, 0x61, 0x73,
-	0x69, 0x63, 0x41, 0x75, 0x74, 0x68, 0x12, 0x1a, 0x0a, 0x08, 0x75, 0x73, 0x65, 0x72, 0x6e, 0x61,
-	0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x75, 0x73, 0x65, 0x72, 0x6e, 0x61,
-	0x6d, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64, 0x18, 0x03,
-	0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64, 0x22, 0x07,
-	0x0a, 0x05, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x22, 0x42, 0x0a, 0x0a, 0x41, 0x63, 0x74, 0x69, 0x6f,
-	0x6e, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20,
-	0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73,
-	0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b,
-	0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x2a, 0x0a, 0x08, 0x43,
-	0x72, 0x69, 0x74, 0x65, 0x72, 0x69, 0x61, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x78, 0x70, 0x72, 0x65,
-	0x73, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x65, 0x78, 0x70,
-	0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x30, 0x0a, 0x06, 0x41, 0x63, 0x74, 0x69, 0x6f,
-	0x6e, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52,
-	0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x18, 0x02, 0x20,
-	0x01, 0x28, 0x0c, 0x52, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x22, 0x50, 0x0a, 0x17, 0x43, 0x61, 0x6e,
-	0x63, 0x65, 0x6c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71,
-	0x75, 0x65, 0x73, 0x74, 0x12, 0x35, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x01,
-	0x28, 0x0b, 0x32, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68,
-	0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68,
-	0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x22, 0x5f, 0x0a, 0x1a, 0x52,
-	0x65, 0x6e, 0x65, 0x77, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69,
-	0x6e, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x41, 0x0a, 0x08, 0x65, 0x6e, 0x64,
-	0x70, 0x6f, 0x69, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x61, 0x72,
-	0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f,
-	0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69,
-	0x6e, 0x74, 0x52, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x22, 0x1c, 0x0a, 0x06,
-	0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x18, 0x01,
-	0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x22, 0x55, 0x0a, 0x16, 0x43, 0x61,
-	0x6e, 0x63, 0x65, 0x6c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65,
-	0x73, 0x75, 0x6c, 0x74, 0x12, 0x3b, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01,
-	0x20, 0x01, 0x28, 0x0e, 0x32, 0x23, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69,
-	0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x43, 0x61, 0x6e,
-	0x63, 0x65, 0x6c, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75,
-	0x73, 0x22, 0x26, 0x0a, 0x0c, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, 0x65, 0x73, 0x75, 0x6c,
-	0x74, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28,
-	0x0c, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x22, 0xb6, 0x01, 0x0a, 0x10, 0x46, 0x6c,
-	0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12, 0x4a,
-	0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x36, 0x2e, 0x61,
-	0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74,
-	0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72,
-	0x69, 0x70, 0x74, 0x6f, 0x72, 0x2e, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72,
-	0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x63, 0x6d,
-	0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x03, 0x63, 0x6d, 0x64, 0x12, 0x12, 0x0a, 0x04,
-	0x70, 0x61, 0x74, 0x68, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x70, 0x61, 0x74, 0x68,
-	0x22, 0x30, 0x0a, 0x0e, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x54, 0x79,
-	0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12,
-	0x08, 0x0a, 0x04, 0x50, 0x41, 0x54, 0x48, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x43, 0x4d, 0x44,
-	0x10, 0x02, 0x22, 0xc0, 0x02, 0x0a, 0x0a, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66,
-	0x6f, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28,
-	0x0c, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x54, 0x0a, 0x11, 0x66, 0x6c, 0x69,
-	0x67, 0x68, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x18, 0x02,
-	0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69,
-	0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69,
-	0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x52, 0x10, 0x66,
-	0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12,
-	0x41, 0x0a, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x03, 0x28,
-	0x0b, 0x32, 0x25, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74,
-	0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74,
-	0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x52, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69,
-	0x6e, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x72, 0x65, 0x63, 0x6f,
-	0x72, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c,
-	0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c,
-	0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x74, 0x6f,
-	0x74, 0x61, 0x6c, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x72, 0x64, 0x65,
-	0x72, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x6f, 0x72, 0x64, 0x65, 0x72,
-	0x65, 0x64, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61,
-	0x74, 0x61, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, 0x70, 0x70, 0x4d, 0x65, 0x74,
-	0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x8a, 0x02, 0x0a, 0x08, 0x50, 0x6f, 0x6c, 0x6c, 0x49, 0x6e,
-	0x66, 0x6f, 0x12, 0x35, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b,
-	0x32, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e,
-	0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49,
-	0x6e, 0x66, 0x6f, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x12, 0x54, 0x0a, 0x11, 0x66, 0x6c, 0x69,
-	0x67, 0x68, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x18, 0x02,
-	0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69,
-	0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69,
-	0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x52, 0x10, 0x66,
-	0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12,
-	0x1f, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28,
-	0x01, 0x48, 0x00, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x88, 0x01, 0x01,
-	0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, 0x70, 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74,
-	0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67,
-	0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65,
-	0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0e, 0x65, 0x78, 0x70, 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f,
-	0x6e, 0x54, 0x69, 0x6d, 0x65, 0x42, 0x0b, 0x0a, 0x09, 0x5f, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65,
-	0x73, 0x73, 0x22, 0xec, 0x01, 0x0a, 0x0e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x45, 0x6e, 0x64,
-	0x70, 0x6f, 0x69, 0x6e, 0x74, 0x12, 0x35, 0x0a, 0x06, 0x74, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x18,
-	0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c,
-	0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x54, 0x69,
-	0x63, 0x6b, 0x65, 0x74, 0x52, 0x06, 0x74, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x3b, 0x0a, 0x08,
-	0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f,
-	0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72,
-	0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52,
-	0x08, 0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, 0x70,
-	0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01,
-	0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74,
-	0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0e,
-	0x65, 0x78, 0x70, 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x21,
-	0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x04,
-	0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, 0x70, 0x70, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
-	0x61, 0x22, 0x1c, 0x0a, 0x08, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x10, 0x0a,
-	0x03, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x22,
-	0x20, 0x0a, 0x06, 0x54, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x69, 0x63,
-	0x6b, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x74, 0x69, 0x63, 0x6b, 0x65,
-	0x74, 0x22, 0xc4, 0x01, 0x0a, 0x0a, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, 0x61,
-	0x12, 0x54, 0x0a, 0x11, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72,
-	0x69, 0x70, 0x74, 0x6f, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x61, 0x72,
-	0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f,
-	0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69,
-	0x70, 0x74, 0x6f, 0x72, 0x52, 0x10, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63,
-	0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x68,
-	0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x64, 0x61, 0x74,
-	0x61, 0x48, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d,
-	0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61,
-	0x70, 0x70, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x1c, 0x0a, 0x09, 0x64, 0x61,
-	0x74, 0x61, 0x5f, 0x62, 0x6f, 0x64, 0x79, 0x18, 0xe8, 0x07, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x08,
-	0x64, 0x61, 0x74, 0x61, 0x42, 0x6f, 0x64, 0x79, 0x22, 0x2e, 0x0a, 0x09, 0x50, 0x75, 0x74, 0x52,
-	0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, 0x65, 0x74,
-	0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, 0x70, 0x70,
-	0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x2a, 0x8b, 0x01, 0x0a, 0x0c, 0x43, 0x61, 0x6e,
-	0x63, 0x65, 0x6c, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1d, 0x0a, 0x19, 0x43, 0x41, 0x4e,
-	0x43, 0x45, 0x4c, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45,
-	0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x1b, 0x0a, 0x17, 0x43, 0x41, 0x4e, 0x43,
-	0x45, 0x4c, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c,
-	0x4c, 0x45, 0x44, 0x10, 0x01, 0x12, 0x1c, 0x0a, 0x18, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x5f,
-	0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x4c, 0x49, 0x4e,
-	0x47, 0x10, 0x02, 0x12, 0x21, 0x0a, 0x1d, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x5f, 0x53, 0x54,
-	0x41, 0x54, 0x55, 0x53, 0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x4c,
-	0x41, 0x42, 0x4c, 0x45, 0x10, 0x03, 0x32, 0x85, 0x07, 0x0a, 0x0d, 0x46, 0x6c, 0x69, 0x67, 0x68,
-	0x74, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x64, 0x0a, 0x09, 0x48, 0x61, 0x6e, 0x64,
-	0x73, 0x68, 0x61, 0x6b, 0x65, 0x12, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c,
-	0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x48, 0x61,
-	0x6e, 0x64, 0x73, 0x68, 0x61, 0x6b, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x28,
-	0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72,
-	0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68, 0x61, 0x6b, 0x65,
-	0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x28, 0x01, 0x30, 0x01, 0x12, 0x55,
-	0x0a, 0x0b, 0x4c, 0x69, 0x73, 0x74, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x73, 0x12, 0x1f, 0x2e,
-	0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f,
-	0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x43, 0x72, 0x69, 0x74, 0x65, 0x72, 0x69, 0x61, 0x1a, 0x21,
-	0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72,
-	0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66,
-	0x6f, 0x22, 0x00, 0x30, 0x01, 0x12, 0x5d, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x46, 0x6c, 0x69, 0x67,
-	0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66,
-	0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46,
-	0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x1a,
-	0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70,
-	0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e,
-	0x66, 0x6f, 0x22, 0x00, 0x12, 0x5c, 0x0a, 0x0e, 0x50, 0x6f, 0x6c, 0x6c, 0x46, 0x6c, 0x69, 0x67,
-	0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66,
-	0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46,
-	0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x1a,
-	0x1f, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70,
-	0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x50, 0x6f, 0x6c, 0x6c, 0x49, 0x6e, 0x66, 0x6f,
-	0x22, 0x00, 0x12, 0x5b, 0x0a, 0x09, 0x47, 0x65, 0x74, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12,
-	0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70,
-	0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65,
-	0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x1a, 0x23, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77,
-	0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c,
-	0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12,
-	0x4d, 0x0a, 0x05, 0x44, 0x6f, 0x47, 0x65, 0x74, 0x12, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77,
-	0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c,
-	0x2e, 0x54, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e,
-	0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e,
-	0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, 0x61, 0x22, 0x00, 0x30, 0x01, 0x12, 0x52,
-	0x0a, 0x05, 0x44, 0x6f, 0x50, 0x75, 0x74, 0x12, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e,
-	0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e,
-	0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, 0x61, 0x1a, 0x20, 0x2e, 0x61, 0x72, 0x72,
-	0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63,
-	0x6f, 0x6c, 0x2e, 0x50, 0x75, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x28, 0x01,
-	0x30, 0x01, 0x12, 0x58, 0x0a, 0x0a, 0x44, 0x6f, 0x45, 0x78, 0x63, 0x68, 0x61, 0x6e, 0x67, 0x65,
-	0x12, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e,
-	0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44,
-	0x61, 0x74, 0x61, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67,
-	0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67,
-	0x68, 0x74, 0x44, 0x61, 0x74, 0x61, 0x22, 0x00, 0x28, 0x01, 0x30, 0x01, 0x12, 0x4c, 0x0a, 0x08,
-	0x44, 0x6f, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77,
-	0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c,
-	0x2e, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e,
-	0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e,
-	0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x30, 0x01, 0x12, 0x52, 0x0a, 0x0b, 0x4c, 0x69,
-	0x73, 0x74, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x1c, 0x2e, 0x61, 0x72, 0x72, 0x6f,
-	0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f,
-	0x6c, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e,
-	0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e,
-	0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x22, 0x00, 0x30, 0x01, 0x42, 0x71,
-	0x0a, 0x1c, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x61, 0x72, 0x72,
-	0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x69, 0x6d, 0x70, 0x6c, 0x5a, 0x32,
-	0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x70, 0x61, 0x63, 0x68,
-	0x65, 0x2f, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2f, 0x67, 0x6f, 0x2f, 0x61, 0x72, 0x72, 0x6f, 0x77,
-	0x2f, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x66, 0x6c, 0x69, 0x67,
-	0x68, 0x74, 0xaa, 0x02, 0x1c, 0x41, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x41, 0x72, 0x72, 0x6f,
-	0x77, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f,
-	0x6c, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
+	// Types that are assignable to OptionValue:
+	//
+	//	*SessionOptionValue_StringValue
+	//	*SessionOptionValue_BoolValue
+	//	*SessionOptionValue_Int64Value
+	//	*SessionOptionValue_DoubleValue
+	//	*SessionOptionValue_StringListValue_
+	OptionValue isSessionOptionValue_OptionValue `protobuf_oneof:"option_value"`
 }
 
-var (
-	file_Flight_proto_rawDescOnce sync.Once
-	file_Flight_proto_rawDescData = file_Flight_proto_rawDesc
-)
+func (x *SessionOptionValue) Reset() {
+	*x = SessionOptionValue{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_Flight_proto_msgTypes[20]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *SessionOptionValue) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*SessionOptionValue) ProtoMessage() {}
+
+func (x *SessionOptionValue) ProtoReflect() protoreflect.Message {
+	mi := &file_Flight_proto_msgTypes[20]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use SessionOptionValue.ProtoReflect.Descriptor instead.
+func (*SessionOptionValue) Descriptor() ([]byte, []int) {
+	return file_Flight_proto_rawDescGZIP(), []int{20}
+}
+
+func (m *SessionOptionValue) GetOptionValue() isSessionOptionValue_OptionValue {
+	if m != nil {
+		return m.OptionValue
+	}
+	return nil
+}
+
+func (x *SessionOptionValue) GetStringValue() string {
+	if x, ok := x.GetOptionValue().(*SessionOptionValue_StringValue); ok {
+		return x.StringValue
+	}
+	return ""
+}
+
+func (x *SessionOptionValue) GetBoolValue() bool {
+	if x, ok := x.GetOptionValue().(*SessionOptionValue_BoolValue); ok {
+		return x.BoolValue
+	}
+	return false
+}
+
+func (x *SessionOptionValue) GetInt64Value() int64 {
+	if x, ok := x.GetOptionValue().(*SessionOptionValue_Int64Value); ok {
+		return x.Int64Value
+	}
+	return 0
+}
+
+func (x *SessionOptionValue) GetDoubleValue() float64 {
+	if x, ok := x.GetOptionValue().(*SessionOptionValue_DoubleValue); ok {
+		return x.DoubleValue
+	}
+	return 0
+}
+
+func (x *SessionOptionValue) GetStringListValue() *SessionOptionValue_StringListValue {
+	if x, ok := x.GetOptionValue().(*SessionOptionValue_StringListValue_); ok {
+		return x.StringListValue
+	}
+	return nil
+}
+
+type isSessionOptionValue_OptionValue interface {
+	isSessionOptionValue_OptionValue()
+}
+
+type SessionOptionValue_StringValue struct {
+	StringValue string `protobuf:"bytes,1,opt,name=string_value,json=stringValue,proto3,oneof"`
+}
+
+type SessionOptionValue_BoolValue struct {
+	BoolValue bool `protobuf:"varint,2,opt,name=bool_value,json=boolValue,proto3,oneof"`
+}
+
+type SessionOptionValue_Int64Value struct {
+	Int64Value int64 `protobuf:"fixed64,3,opt,name=int64_value,json=int64Value,proto3,oneof"`
+}
+
+type SessionOptionValue_DoubleValue struct {
+	DoubleValue float64 `protobuf:"fixed64,4,opt,name=double_value,json=doubleValue,proto3,oneof"`
+}
+
+type SessionOptionValue_StringListValue_ struct {
+	StringListValue *SessionOptionValue_StringListValue `protobuf:"bytes,5,opt,name=string_list_value,json=stringListValue,proto3,oneof"`
+}
+
+func (*SessionOptionValue_StringValue) isSessionOptionValue_OptionValue() {}
+
+func (*SessionOptionValue_BoolValue) isSessionOptionValue_OptionValue() {}
+
+func (*SessionOptionValue_Int64Value) isSessionOptionValue_OptionValue() {}
+
+func (*SessionOptionValue_DoubleValue) isSessionOptionValue_OptionValue() {}
+
+func (*SessionOptionValue_StringListValue_) isSessionOptionValue_OptionValue() {}
+
+// EXPERIMENTAL: A request to set session options for an existing or new (implicit)
+// server session.
+//
+// Sessions are persisted and referenced via a transport-level state management, typically
+// RFC 6265 HTTP cookies when using an HTTP transport.  The suggested cookie name or state
+// context key is 'arrow_flight_session_id', although implementations may freely choose their
+// own name.
+//
+// Session creation (if one does not already exist) is implied by this RPC request, however
+// server implementations may choose to initiate a session that also contains client-provided
+// session options at any other time, e.g. on authentication, or when any other call is made
+// and the server wishes to use a session to persist any state (or lack thereof).
+type SetSessionOptionsRequest struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	SessionOptions map[string]*SessionOptionValue `protobuf:"bytes,1,rep,name=session_options,json=sessionOptions,proto3" json:"session_options,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
+}
+
+func (x *SetSessionOptionsRequest) Reset() {
+	*x = SetSessionOptionsRequest{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_Flight_proto_msgTypes[21]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *SetSessionOptionsRequest) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*SetSessionOptionsRequest) ProtoMessage() {}
+
+func (x *SetSessionOptionsRequest) ProtoReflect() protoreflect.Message {
+	mi := &file_Flight_proto_msgTypes[21]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use SetSessionOptionsRequest.ProtoReflect.Descriptor instead.
+func (*SetSessionOptionsRequest) Descriptor() ([]byte, []int) {
+	return file_Flight_proto_rawDescGZIP(), []int{21}
+}
+
+func (x *SetSessionOptionsRequest) GetSessionOptions() map[string]*SessionOptionValue {
+	if x != nil {
+		return x.SessionOptions
+	}
+	return nil
+}
+
+// EXPERIMENTAL: The results (individually) of setting a set of session options.
+//
+// Option names should only be present in the response if they were not successfully
+// set on the server; that is, a response without an Error for a name provided in the
+// SetSessionOptionsRequest implies that the named option value was set successfully.
+type SetSessionOptionsResult struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Errors map[string]*SetSessionOptionsResult_Error `protobuf:"bytes,1,rep,name=errors,proto3" json:"errors,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
+}
+
+func (x *SetSessionOptionsResult) Reset() {
+	*x = SetSessionOptionsResult{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_Flight_proto_msgTypes[22]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *SetSessionOptionsResult) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*SetSessionOptionsResult) ProtoMessage() {}
+
+func (x *SetSessionOptionsResult) ProtoReflect() protoreflect.Message {
+	mi := &file_Flight_proto_msgTypes[22]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use SetSessionOptionsResult.ProtoReflect.Descriptor instead.
+func (*SetSessionOptionsResult) Descriptor() ([]byte, []int) {
+	return file_Flight_proto_rawDescGZIP(), []int{22}
+}
+
+func (x *SetSessionOptionsResult) GetErrors() map[string]*SetSessionOptionsResult_Error {
+	if x != nil {
+		return x.Errors
+	}
+	return nil
+}
+
+// EXPERIMENTAL: A request to access the session options for the current server session.
+//
+// The existing session is referenced via a cookie header or similar (see
+// SetSessionOptionsRequest above); it is an error to make this request with a missing,
+// invalid, or expired session cookie header or other implementation-defined session
+// reference token.
+type GetSessionOptionsRequest struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+}
+
+func (x *GetSessionOptionsRequest) Reset() {
+	*x = GetSessionOptionsRequest{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_Flight_proto_msgTypes[23]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *GetSessionOptionsRequest) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*GetSessionOptionsRequest) ProtoMessage() {}
+
+func (x *GetSessionOptionsRequest) ProtoReflect() protoreflect.Message {
+	mi := &file_Flight_proto_msgTypes[23]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use GetSessionOptionsRequest.ProtoReflect.Descriptor instead.
+func (*GetSessionOptionsRequest) Descriptor() ([]byte, []int) {
+	return file_Flight_proto_rawDescGZIP(), []int{23}
+}
+
+// EXPERIMENTAL: The result containing the current server session options.
+type GetSessionOptionsResult struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	SessionOptions map[string]*SessionOptionValue `protobuf:"bytes,1,rep,name=session_options,json=sessionOptions,proto3" json:"session_options,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
+}
+
+func (x *GetSessionOptionsResult) Reset() {
+	*x = GetSessionOptionsResult{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_Flight_proto_msgTypes[24]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *GetSessionOptionsResult) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*GetSessionOptionsResult) ProtoMessage() {}
+
+func (x *GetSessionOptionsResult) ProtoReflect() protoreflect.Message {
+	mi := &file_Flight_proto_msgTypes[24]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use GetSessionOptionsResult.ProtoReflect.Descriptor instead.
+func (*GetSessionOptionsResult) Descriptor() ([]byte, []int) {
+	return file_Flight_proto_rawDescGZIP(), []int{24}
+}
+
+func (x *GetSessionOptionsResult) GetSessionOptions() map[string]*SessionOptionValue {
+	if x != nil {
+		return x.SessionOptions
+	}
+	return nil
+}
+
+// Request message for the "Close Session" action.
+//
+// The exiting session is referenced via a cookie header.
+type CloseSessionRequest struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+}
+
+func (x *CloseSessionRequest) Reset() {
+	*x = CloseSessionRequest{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_Flight_proto_msgTypes[25]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *CloseSessionRequest) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*CloseSessionRequest) ProtoMessage() {}
+
+func (x *CloseSessionRequest) ProtoReflect() protoreflect.Message {
+	mi := &file_Flight_proto_msgTypes[25]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use CloseSessionRequest.ProtoReflect.Descriptor instead.
+func (*CloseSessionRequest) Descriptor() ([]byte, []int) {
+	return file_Flight_proto_rawDescGZIP(), []int{25}
+}
+
+// The result of closing a session.
+type CloseSessionResult struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Status CloseSessionResult_Status `protobuf:"varint,1,opt,name=status,proto3,enum=arrow.flight.protocol.CloseSessionResult_Status" json:"status,omitempty"`
+}
+
+func (x *CloseSessionResult) Reset() {
+	*x = CloseSessionResult{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_Flight_proto_msgTypes[26]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *CloseSessionResult) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*CloseSessionResult) ProtoMessage() {}
+
+func (x *CloseSessionResult) ProtoReflect() protoreflect.Message {
+	mi := &file_Flight_proto_msgTypes[26]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use CloseSessionResult.ProtoReflect.Descriptor instead.
+func (*CloseSessionResult) Descriptor() ([]byte, []int) {
+	return file_Flight_proto_rawDescGZIP(), []int{26}
+}
+
+func (x *CloseSessionResult) GetStatus() CloseSessionResult_Status {
+	if x != nil {
+		return x.Status
+	}
+	return CloseSessionResult_UNSPECIFIED
+}
+
+type SessionOptionValue_StringListValue struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Values []string `protobuf:"bytes,1,rep,name=values,proto3" json:"values,omitempty"`
+}
+
+func (x *SessionOptionValue_StringListValue) Reset() {
+	*x = SessionOptionValue_StringListValue{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_Flight_proto_msgTypes[27]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *SessionOptionValue_StringListValue) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*SessionOptionValue_StringListValue) ProtoMessage() {}
+
+func (x *SessionOptionValue_StringListValue) ProtoReflect() protoreflect.Message {
+	mi := &file_Flight_proto_msgTypes[27]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use SessionOptionValue_StringListValue.ProtoReflect.Descriptor instead.
+func (*SessionOptionValue_StringListValue) Descriptor() ([]byte, []int) {
+	return file_Flight_proto_rawDescGZIP(), []int{20, 0}
+}
+
+func (x *SessionOptionValue_StringListValue) GetValues() []string {
+	if x != nil {
+		return x.Values
+	}
+	return nil
+}
+
+type SetSessionOptionsResult_Error struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Value SetSessionOptionsResult_ErrorValue `protobuf:"varint,1,opt,name=value,proto3,enum=arrow.flight.protocol.SetSessionOptionsResult_ErrorValue" json:"value,omitempty"`
+}
+
+func (x *SetSessionOptionsResult_Error) Reset() {
+	*x = SetSessionOptionsResult_Error{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_Flight_proto_msgTypes[29]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *SetSessionOptionsResult_Error) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*SetSessionOptionsResult_Error) ProtoMessage() {}
+
+func (x *SetSessionOptionsResult_Error) ProtoReflect() protoreflect.Message {
+	mi := &file_Flight_proto_msgTypes[29]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use SetSessionOptionsResult_Error.ProtoReflect.Descriptor instead.
+func (*SetSessionOptionsResult_Error) Descriptor() ([]byte, []int) {
+	return file_Flight_proto_rawDescGZIP(), []int{22, 0}
+}
+
+func (x *SetSessionOptionsResult_Error) GetValue() SetSessionOptionsResult_ErrorValue {
+	if x != nil {
+		return x.Value
+	}
+	return SetSessionOptionsResult_UNSPECIFIED
+}
+
+var File_Flight_proto protoreflect.FileDescriptor
+
+var file_Flight_proto_rawDesc = []byte{
+	0x0a, 0x0c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x15,
+	0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f,
+	0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x1a, 0x1f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72,
+	0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70,
+	0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x57, 0x0a, 0x10, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68,
+	0x61, 0x6b, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x29, 0x0a, 0x10, 0x70, 0x72,
+	0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01,
+	0x20, 0x01, 0x28, 0x04, 0x52, 0x0f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x56, 0x65,
+	0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64,
+	0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x22,
+	0x58, 0x0a, 0x11, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68, 0x61, 0x6b, 0x65, 0x52, 0x65, 0x73, 0x70,
+	0x6f, 0x6e, 0x73, 0x65, 0x12, 0x29, 0x0a, 0x10, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c,
+	0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0f,
+	0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12,
+	0x18, 0x0a, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c,
+	0x52, 0x07, 0x70, 0x61, 0x79, 0x6c, 0x6f, 0x61, 0x64, 0x22, 0x43, 0x0a, 0x09, 0x42, 0x61, 0x73,
+	0x69, 0x63, 0x41, 0x75, 0x74, 0x68, 0x12, 0x1a, 0x0a, 0x08, 0x75, 0x73, 0x65, 0x72, 0x6e, 0x61,
+	0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x75, 0x73, 0x65, 0x72, 0x6e, 0x61,
+	0x6d, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64, 0x18, 0x03,
+	0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64, 0x22, 0x07,
+	0x0a, 0x05, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x22, 0x42, 0x0a, 0x0a, 0x41, 0x63, 0x74, 0x69, 0x6f,
+	0x6e, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20,
+	0x01, 0x28, 0x09, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x20, 0x0a, 0x0b, 0x64, 0x65, 0x73,
+	0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b,
+	0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x2a, 0x0a, 0x08, 0x43,
+	0x72, 0x69, 0x74, 0x65, 0x72, 0x69, 0x61, 0x12, 0x1e, 0x0a, 0x0a, 0x65, 0x78, 0x70, 0x72, 0x65,
+	0x73, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x65, 0x78, 0x70,
+	0x72, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x30, 0x0a, 0x06, 0x41, 0x63, 0x74, 0x69, 0x6f,
+	0x6e, 0x12, 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52,
+	0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x18, 0x02, 0x20,
+	0x01, 0x28, 0x0c, 0x52, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x22, 0x50, 0x0a, 0x17, 0x43, 0x61, 0x6e,
+	0x63, 0x65, 0x6c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65, 0x71,
+	0x75, 0x65, 0x73, 0x74, 0x12, 0x35, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x01,
+	0x28, 0x0b, 0x32, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68,
+	0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68,
+	0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x22, 0x5f, 0x0a, 0x1a, 0x52,
+	0x65, 0x6e, 0x65, 0x77, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69,
+	0x6e, 0x74, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x41, 0x0a, 0x08, 0x65, 0x6e, 0x64,
+	0x70, 0x6f, 0x69, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x25, 0x2e, 0x61, 0x72,
+	0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f,
+	0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69,
+	0x6e, 0x74, 0x52, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x22, 0x1c, 0x0a, 0x06,
+	0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x18, 0x01,
+	0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x62, 0x6f, 0x64, 0x79, 0x22, 0x55, 0x0a, 0x16, 0x43, 0x61,
+	0x6e, 0x63, 0x65, 0x6c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x52, 0x65,
+	0x73, 0x75, 0x6c, 0x74, 0x12, 0x3b, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01,
+	0x20, 0x01, 0x28, 0x0e, 0x32, 0x23, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x43, 0x61, 0x6e,
+	0x63, 0x65, 0x6c, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75,
+	0x73, 0x22, 0x26, 0x0a, 0x0c, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, 0x65, 0x73, 0x75, 0x6c,
+	0x74, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28,
+	0x0c, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x22, 0xb6, 0x01, 0x0a, 0x10, 0x46, 0x6c,
+	0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12, 0x4a,
+	0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x36, 0x2e, 0x61,
+	0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74,
+	0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72,
+	0x69, 0x70, 0x74, 0x6f, 0x72, 0x2e, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72,
+	0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x63, 0x6d,
+	0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x03, 0x63, 0x6d, 0x64, 0x12, 0x12, 0x0a, 0x04,
+	0x70, 0x61, 0x74, 0x68, 0x18, 0x03, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x70, 0x61, 0x74, 0x68,
+	0x22, 0x30, 0x0a, 0x0e, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x54, 0x79,
+	0x70, 0x65, 0x12, 0x0b, 0x0a, 0x07, 0x55, 0x4e, 0x4b, 0x4e, 0x4f, 0x57, 0x4e, 0x10, 0x00, 0x12,
+	0x08, 0x0a, 0x04, 0x50, 0x41, 0x54, 0x48, 0x10, 0x01, 0x12, 0x07, 0x0a, 0x03, 0x43, 0x4d, 0x44,
+	0x10, 0x02, 0x22, 0xc0, 0x02, 0x0a, 0x0a, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66,
+	0x6f, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28,
+	0x0c, 0x52, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x54, 0x0a, 0x11, 0x66, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x18, 0x02,
+	0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x52, 0x10, 0x66,
+	0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12,
+	0x41, 0x0a, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x03, 0x28,
+	0x0b, 0x32, 0x25, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74,
+	0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74,
+	0x45, 0x6e, 0x64, 0x70, 0x6f, 0x69, 0x6e, 0x74, 0x52, 0x08, 0x65, 0x6e, 0x64, 0x70, 0x6f, 0x69,
+	0x6e, 0x74, 0x12, 0x23, 0x0a, 0x0d, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x72, 0x65, 0x63, 0x6f,
+	0x72, 0x64, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x74, 0x6f, 0x74, 0x61, 0x6c,
+	0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x73, 0x12, 0x1f, 0x0a, 0x0b, 0x74, 0x6f, 0x74, 0x61, 0x6c,
+	0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, 0x74, 0x6f,
+	0x74, 0x61, 0x6c, 0x42, 0x79, 0x74, 0x65, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x72, 0x64, 0x65,
+	0x72, 0x65, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x6f, 0x72, 0x64, 0x65, 0x72,
+	0x65, 0x64, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61,
+	0x74, 0x61, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, 0x70, 0x70, 0x4d, 0x65, 0x74,
+	0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x8a, 0x02, 0x0a, 0x08, 0x50, 0x6f, 0x6c, 0x6c, 0x49, 0x6e,
+	0x66, 0x6f, 0x12, 0x35, 0x0a, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b,
+	0x32, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e,
+	0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49,
+	0x6e, 0x66, 0x6f, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f, 0x12, 0x54, 0x0a, 0x11, 0x66, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x18, 0x02,
+	0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x52, 0x10, 0x66,
+	0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12,
+	0x1f, 0x0a, 0x08, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28,
+	0x01, 0x48, 0x00, 0x52, 0x08, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x88, 0x01, 0x01,
+	0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, 0x70, 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74,
+	0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67,
+	0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65,
+	0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0e, 0x65, 0x78, 0x70, 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f,
+	0x6e, 0x54, 0x69, 0x6d, 0x65, 0x42, 0x0b, 0x0a, 0x09, 0x5f, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x65,
+	0x73, 0x73, 0x22, 0xec, 0x01, 0x0a, 0x0e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x45, 0x6e, 0x64,
+	0x70, 0x6f, 0x69, 0x6e, 0x74, 0x12, 0x35, 0x0a, 0x06, 0x74, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x18,
+	0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c,
+	0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x54, 0x69,
+	0x63, 0x6b, 0x65, 0x74, 0x52, 0x06, 0x74, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x3b, 0x0a, 0x08,
+	0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1f,
+	0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72,
+	0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52,
+	0x08, 0x6c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x43, 0x0a, 0x0f, 0x65, 0x78, 0x70,
+	0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01,
+	0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74,
+	0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0e,
+	0x65, 0x78, 0x70, 0x69, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x21,
+	0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x04,
+	0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, 0x70, 0x70, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74,
+	0x61, 0x22, 0x1c, 0x0a, 0x08, 0x4c, 0x6f, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x10, 0x0a,
+	0x03, 0x75, 0x72, 0x69, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x22,
+	0x20, 0x0a, 0x06, 0x54, 0x69, 0x63, 0x6b, 0x65, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x69, 0x63,
+	0x6b, 0x65, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x74, 0x69, 0x63, 0x6b, 0x65,
+	0x74, 0x22, 0xc4, 0x01, 0x0a, 0x0a, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, 0x61,
+	0x12, 0x54, 0x0a, 0x11, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72,
+	0x69, 0x70, 0x74, 0x6f, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x61, 0x72,
+	0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f,
+	0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69,
+	0x70, 0x74, 0x6f, 0x72, 0x52, 0x10, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63,
+	0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x12, 0x1f, 0x0a, 0x0b, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x68,
+	0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x64, 0x61, 0x74,
+	0x61, 0x48, 0x65, 0x61, 0x64, 0x65, 0x72, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d,
+	0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61,
+	0x70, 0x70, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x1c, 0x0a, 0x09, 0x64, 0x61,
+	0x74, 0x61, 0x5f, 0x62, 0x6f, 0x64, 0x79, 0x18, 0xe8, 0x07, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x08,
+	0x64, 0x61, 0x74, 0x61, 0x42, 0x6f, 0x64, 0x79, 0x22, 0x2e, 0x0a, 0x09, 0x50, 0x75, 0x74, 0x52,
+	0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x61, 0x70, 0x70, 0x5f, 0x6d, 0x65, 0x74,
+	0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0b, 0x61, 0x70, 0x70,
+	0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0xc6, 0x02, 0x0a, 0x12, 0x53, 0x65, 0x73,
+	0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12,
+	0x23, 0x0a, 0x0c, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18,
+	0x01, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x0b, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x56,
+	0x61, 0x6c, 0x75, 0x65, 0x12, 0x1f, 0x0a, 0x0a, 0x62, 0x6f, 0x6f, 0x6c, 0x5f, 0x76, 0x61, 0x6c,
+	0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x48, 0x00, 0x52, 0x09, 0x62, 0x6f, 0x6f, 0x6c,
+	0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x21, 0x0a, 0x0b, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x5f, 0x76,
+	0x61, 0x6c, 0x75, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x10, 0x48, 0x00, 0x52, 0x0a, 0x69, 0x6e,
+	0x74, 0x36, 0x34, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x23, 0x0a, 0x0c, 0x64, 0x6f, 0x75, 0x62,
+	0x6c, 0x65, 0x5f, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x01, 0x48, 0x00,
+	0x52, 0x0b, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x67, 0x0a,
+	0x11, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x5f, 0x6c, 0x69, 0x73, 0x74, 0x5f, 0x76, 0x61, 0x6c,
+	0x75, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x39, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77,
+	0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c,
+	0x2e, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x56, 0x61,
+	0x6c, 0x75, 0x65, 0x2e, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x4c, 0x69, 0x73, 0x74, 0x56, 0x61,
+	0x6c, 0x75, 0x65, 0x48, 0x00, 0x52, 0x0f, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x4c, 0x69, 0x73,
+	0x74, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x1a, 0x29, 0x0a, 0x0f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67,
+	0x4c, 0x69, 0x73, 0x74, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x76, 0x61, 0x6c,
+	0x75, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x76, 0x61, 0x6c, 0x75, 0x65,
+	0x73, 0x42, 0x0e, 0x0a, 0x0c, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x76, 0x61, 0x6c, 0x75,
+	0x65, 0x22, 0xf6, 0x01, 0x0a, 0x18, 0x53, 0x65, 0x74, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e,
+	0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x6c,
+	0x0a, 0x0f, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e,
+	0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x43, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e,
+	0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e,
+	0x53, 0x65, 0x74, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e,
+	0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x2e, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e,
+	0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0e, 0x73, 0x65,
+	0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x6c, 0x0a, 0x13,
+	0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e,
+	0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
+	0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x3f, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02,
+	0x20, 0x01, 0x28, 0x0b, 0x32, 0x29, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x53, 0x65, 0x73,
+	0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52,
+	0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x87, 0x03, 0x0a, 0x17, 0x53,
+	0x65, 0x74, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73,
+	0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x52, 0x0a, 0x06, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73,
+	0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x3a, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66,
+	0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x53,
+	0x65, 0x74, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73,
+	0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x45, 0x6e, 0x74,
+	0x72, 0x79, 0x52, 0x06, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x1a, 0x58, 0x0a, 0x05, 0x45, 0x72,
+	0x72, 0x6f, 0x72, 0x12, 0x4f, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x01, 0x20, 0x01,
+	0x28, 0x0e, 0x32, 0x39, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68,
+	0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x53, 0x65, 0x74, 0x53, 0x65,
+	0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x73, 0x75,
+	0x6c, 0x74, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76,
+	0x61, 0x6c, 0x75, 0x65, 0x1a, 0x6f, 0x0a, 0x0b, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x45, 0x6e,
+	0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09,
+	0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x4a, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02,
+	0x20, 0x01, 0x28, 0x0b, 0x32, 0x34, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x53, 0x65, 0x74,
+	0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65,
+	0x73, 0x75, 0x6c, 0x74, 0x2e, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75,
+	0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x4d, 0x0a, 0x0a, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x56, 0x61,
+	0x6c, 0x75, 0x65, 0x12, 0x0f, 0x0a, 0x0b, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49,
+	0x45, 0x44, 0x10, 0x00, 0x12, 0x10, 0x0a, 0x0c, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49, 0x44, 0x5f,
+	0x4e, 0x41, 0x4d, 0x45, 0x10, 0x01, 0x12, 0x11, 0x0a, 0x0d, 0x49, 0x4e, 0x56, 0x41, 0x4c, 0x49,
+	0x44, 0x5f, 0x56, 0x41, 0x4c, 0x55, 0x45, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x45, 0x52, 0x52,
+	0x4f, 0x52, 0x10, 0x03, 0x22, 0x1a, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x53, 0x65, 0x73, 0x73, 0x69,
+	0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74,
+	0x22, 0xf4, 0x01, 0x0a, 0x17, 0x47, 0x65, 0x74, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f,
+	0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x6b, 0x0a, 0x0f,
+	0x73, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18,
+	0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x42, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c,
+	0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x47, 0x65,
+	0x74, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52,
+	0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74,
+	0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0e, 0x73, 0x65, 0x73, 0x73, 0x69,
+	0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x1a, 0x6c, 0x0a, 0x13, 0x53, 0x65, 0x73,
+	0x73, 0x69, 0x6f, 0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79,
+	0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b,
+	0x65, 0x79, 0x12, 0x3f, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28,
+	0x0b, 0x32, 0x29, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74,
+	0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f,
+	0x6e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x52, 0x05, 0x76, 0x61,
+	0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x15, 0x0a, 0x13, 0x43, 0x6c, 0x6f, 0x73, 0x65,
+	0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0xa5,
+	0x01, 0x0a, 0x12, 0x43, 0x6c, 0x6f, 0x73, 0x65, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x52,
+	0x65, 0x73, 0x75, 0x6c, 0x74, 0x12, 0x48, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18,
+	0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x30, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c,
+	0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x43, 0x6c,
+	0x6f, 0x73, 0x65, 0x53, 0x65, 0x73, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74,
+	0x2e, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22,
+	0x45, 0x0a, 0x06, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x0f, 0x0a, 0x0b, 0x55, 0x4e, 0x53,
+	0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x43, 0x4c,
+	0x4f, 0x53, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x4c, 0x4f, 0x53, 0x49, 0x4e,
+	0x47, 0x10, 0x02, 0x12, 0x11, 0x0a, 0x0d, 0x4e, 0x4f, 0x54, 0x5f, 0x43, 0x4c, 0x4f, 0x53, 0x45,
+	0x41, 0x42, 0x4c, 0x45, 0x10, 0x03, 0x2a, 0x8b, 0x01, 0x0a, 0x0c, 0x43, 0x61, 0x6e, 0x63, 0x65,
+	0x6c, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1d, 0x0a, 0x19, 0x43, 0x41, 0x4e, 0x43, 0x45,
+	0x4c, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49,
+	0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x1b, 0x0a, 0x17, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c,
+	0x5f, 0x53, 0x54, 0x41, 0x54, 0x55, 0x53, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x4c, 0x45,
+	0x44, 0x10, 0x01, 0x12, 0x1c, 0x0a, 0x18, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x5f, 0x53, 0x54,
+	0x41, 0x54, 0x55, 0x53, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x4c, 0x49, 0x4e, 0x47, 0x10,
+	0x02, 0x12, 0x21, 0x0a, 0x1d, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x5f, 0x53, 0x54, 0x41, 0x54,
+	0x55, 0x53, 0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45, 0x4c, 0x4c, 0x41, 0x42,
+	0x4c, 0x45, 0x10, 0x03, 0x32, 0x85, 0x07, 0x0a, 0x0d, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x53,
+	0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x64, 0x0a, 0x09, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68,
+	0x61, 0x6b, 0x65, 0x12, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67,
+	0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x48, 0x61, 0x6e, 0x64,
+	0x73, 0x68, 0x61, 0x6b, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x28, 0x2e, 0x61,
+	0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74,
+	0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x48, 0x61, 0x6e, 0x64, 0x73, 0x68, 0x61, 0x6b, 0x65, 0x52, 0x65,
+	0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x28, 0x01, 0x30, 0x01, 0x12, 0x55, 0x0a, 0x0b,
+	0x4c, 0x69, 0x73, 0x74, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x73, 0x12, 0x1f, 0x2e, 0x61, 0x72,
+	0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f,
+	0x63, 0x6f, 0x6c, 0x2e, 0x43, 0x72, 0x69, 0x74, 0x65, 0x72, 0x69, 0x61, 0x1a, 0x21, 0x2e, 0x61,
+	0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74,
+	0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f, 0x22,
+	0x00, 0x30, 0x01, 0x12, 0x5d, 0x0a, 0x0d, 0x47, 0x65, 0x74, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74,
+	0x49, 0x6e, 0x66, 0x6f, 0x12, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x1a, 0x21, 0x2e,
+	0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f,
+	0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x49, 0x6e, 0x66, 0x6f,
+	0x22, 0x00, 0x12, 0x5c, 0x0a, 0x0e, 0x50, 0x6f, 0x6c, 0x6c, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74,
+	0x49, 0x6e, 0x66, 0x6f, 0x12, 0x27, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69,
+	0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x1a, 0x1f, 0x2e,
+	0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f,
+	0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x50, 0x6f, 0x6c, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x22, 0x00,
+	0x12, 0x5b, 0x0a, 0x09, 0x47, 0x65, 0x74, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, 0x27, 0x2e,
+	0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f,
+	0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x65, 0x73, 0x63,
+	0x72, 0x69, 0x70, 0x74, 0x6f, 0x72, 0x1a, 0x23, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66,
+	0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x53,
+	0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x12, 0x4d, 0x0a,
+	0x05, 0x44, 0x6f, 0x47, 0x65, 0x74, 0x12, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66,
+	0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x54,
+	0x69, 0x63, 0x6b, 0x65, 0x74, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c,
+	0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c,
+	0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, 0x61, 0x22, 0x00, 0x30, 0x01, 0x12, 0x52, 0x0a, 0x05,
+	0x44, 0x6f, 0x50, 0x75, 0x74, 0x12, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c,
+	0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c,
+	0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74, 0x61, 0x1a, 0x20, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77,
+	0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c,
+	0x2e, 0x50, 0x75, 0x74, 0x52, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x28, 0x01, 0x30, 0x01,
+	0x12, 0x58, 0x0a, 0x0a, 0x44, 0x6f, 0x45, 0x78, 0x63, 0x68, 0x61, 0x6e, 0x67, 0x65, 0x12, 0x21,
+	0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72,
+	0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x44, 0x61, 0x74,
+	0x61, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74,
+	0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x46, 0x6c, 0x69, 0x67, 0x68, 0x74,
+	0x44, 0x61, 0x74, 0x61, 0x22, 0x00, 0x28, 0x01, 0x30, 0x01, 0x12, 0x4c, 0x0a, 0x08, 0x44, 0x6f,
+	0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66,
+	0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x41,
+	0x63, 0x74, 0x69, 0x6f, 0x6e, 0x1a, 0x1d, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c,
+	0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x52, 0x65,
+	0x73, 0x75, 0x6c, 0x74, 0x22, 0x00, 0x30, 0x01, 0x12, 0x52, 0x0a, 0x0b, 0x4c, 0x69, 0x73, 0x74,
+	0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x1c, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e,
+	0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e,
+	0x45, 0x6d, 0x70, 0x74, 0x79, 0x1a, 0x21, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c,
+	0x69, 0x67, 0x68, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x2e, 0x41, 0x63,
+	0x74, 0x69, 0x6f, 0x6e, 0x54, 0x79, 0x70, 0x65, 0x22, 0x00, 0x30, 0x01, 0x42, 0x71, 0x0a, 0x1c,
+	0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77,
+	0x2e, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x69, 0x6d, 0x70, 0x6c, 0x5a, 0x32, 0x67, 0x69,
+	0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2f,
+	0x61, 0x72, 0x72, 0x6f, 0x77, 0x2f, 0x67, 0x6f, 0x2f, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2f, 0x66,
+	0x6c, 0x69, 0x67, 0x68, 0x74, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74,
+	0xaa, 0x02, 0x1c, 0x41, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x41, 0x72, 0x72, 0x6f, 0x77, 0x2e,
+	0x46, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x63, 0x6f, 0x6c, 0x62,
+	0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
+}
+
+var (
+	file_Flight_proto_rawDescOnce sync.Once
+	file_Flight_proto_rawDescData = file_Flight_proto_rawDesc
+)
 
 func file_Flight_proto_rawDescGZIP() []byte {
 	file_Flight_proto_rawDescOnce.Do(func() {
@@ -1658,72 +2351,95 @@ func file_Flight_proto_rawDescGZIP() []byte {
 	return file_Flight_proto_rawDescData
 }
 
-var file_Flight_proto_enumTypes = make([]protoimpl.EnumInfo, 2)
-var file_Flight_proto_msgTypes = make([]protoimpl.MessageInfo, 20)
+var file_Flight_proto_enumTypes = make([]protoimpl.EnumInfo, 4)
+var file_Flight_proto_msgTypes = make([]protoimpl.MessageInfo, 32)
 var file_Flight_proto_goTypes = []interface{}{
-	(CancelStatus)(0),                    // 0: arrow.flight.protocol.CancelStatus
-	(FlightDescriptor_DescriptorType)(0), // 1: arrow.flight.protocol.FlightDescriptor.DescriptorType
-	(*HandshakeRequest)(nil),             // 2: arrow.flight.protocol.HandshakeRequest
-	(*HandshakeResponse)(nil),            // 3: arrow.flight.protocol.HandshakeResponse
-	(*BasicAuth)(nil),                    // 4: arrow.flight.protocol.BasicAuth
-	(*Empty)(nil),                        // 5: arrow.flight.protocol.Empty
-	(*ActionType)(nil),                   // 6: arrow.flight.protocol.ActionType
-	(*Criteria)(nil),                     // 7: arrow.flight.protocol.Criteria
-	(*Action)(nil),                       // 8: arrow.flight.protocol.Action
-	(*CancelFlightInfoRequest)(nil),      // 9: arrow.flight.protocol.CancelFlightInfoRequest
-	(*RenewFlightEndpointRequest)(nil),   // 10: arrow.flight.protocol.RenewFlightEndpointRequest
-	(*Result)(nil),                       // 11: arrow.flight.protocol.Result
-	(*CancelFlightInfoResult)(nil),       // 12: arrow.flight.protocol.CancelFlightInfoResult
-	(*SchemaResult)(nil),                 // 13: arrow.flight.protocol.SchemaResult
-	(*FlightDescriptor)(nil),             // 14: arrow.flight.protocol.FlightDescriptor
-	(*FlightInfo)(nil),                   // 15: arrow.flight.protocol.FlightInfo
-	(*PollInfo)(nil),                     // 16: arrow.flight.protocol.PollInfo
-	(*FlightEndpoint)(nil),               // 17: arrow.flight.protocol.FlightEndpoint
-	(*Location)(nil),                     // 18: arrow.flight.protocol.Location
-	(*Ticket)(nil),                       // 19: arrow.flight.protocol.Ticket
-	(*FlightData)(nil),                   // 20: arrow.flight.protocol.FlightData
-	(*PutResult)(nil),                    // 21: arrow.flight.protocol.PutResult
-	(*timestamppb.Timestamp)(nil),        // 22: google.protobuf.Timestamp
+	(CancelStatus)(0),                          // 0: arrow.flight.protocol.CancelStatus
+	(FlightDescriptor_DescriptorType)(0),       // 1: arrow.flight.protocol.FlightDescriptor.DescriptorType
+	(SetSessionOptionsResult_ErrorValue)(0),    // 2: arrow.flight.protocol.SetSessionOptionsResult.ErrorValue
+	(CloseSessionResult_Status)(0),             // 3: arrow.flight.protocol.CloseSessionResult.Status
+	(*HandshakeRequest)(nil),                   // 4: arrow.flight.protocol.HandshakeRequest
+	(*HandshakeResponse)(nil),                  // 5: arrow.flight.protocol.HandshakeResponse
+	(*BasicAuth)(nil),                          // 6: arrow.flight.protocol.BasicAuth
+	(*Empty)(nil),                              // 7: arrow.flight.protocol.Empty
+	(*ActionType)(nil),                         // 8: arrow.flight.protocol.ActionType
+	(*Criteria)(nil),                           // 9: arrow.flight.protocol.Criteria
+	(*Action)(nil),                             // 10: arrow.flight.protocol.Action
+	(*CancelFlightInfoRequest)(nil),            // 11: arrow.flight.protocol.CancelFlightInfoRequest
+	(*RenewFlightEndpointRequest)(nil),         // 12: arrow.flight.protocol.RenewFlightEndpointRequest
+	(*Result)(nil),                             // 13: arrow.flight.protocol.Result
+	(*CancelFlightInfoResult)(nil),             // 14: arrow.flight.protocol.CancelFlightInfoResult
+	(*SchemaResult)(nil),                       // 15: arrow.flight.protocol.SchemaResult
+	(*FlightDescriptor)(nil),                   // 16: arrow.flight.protocol.FlightDescriptor
+	(*FlightInfo)(nil),                         // 17: arrow.flight.protocol.FlightInfo
+	(*PollInfo)(nil),                           // 18: arrow.flight.protocol.PollInfo
+	(*FlightEndpoint)(nil),                     // 19: arrow.flight.protocol.FlightEndpoint
+	(*Location)(nil),                           // 20: arrow.flight.protocol.Location
+	(*Ticket)(nil),                             // 21: arrow.flight.protocol.Ticket
+	(*FlightData)(nil),                         // 22: arrow.flight.protocol.FlightData
+	(*PutResult)(nil),                          // 23: arrow.flight.protocol.PutResult
+	(*SessionOptionValue)(nil),                 // 24: arrow.flight.protocol.SessionOptionValue
+	(*SetSessionOptionsRequest)(nil),           // 25: arrow.flight.protocol.SetSessionOptionsRequest
+	(*SetSessionOptionsResult)(nil),            // 26: arrow.flight.protocol.SetSessionOptionsResult
+	(*GetSessionOptionsRequest)(nil),           // 27: arrow.flight.protocol.GetSessionOptionsRequest
+	(*GetSessionOptionsResult)(nil),            // 28: arrow.flight.protocol.GetSessionOptionsResult
+	(*CloseSessionRequest)(nil),                // 29: arrow.flight.protocol.CloseSessionRequest
+	(*CloseSessionResult)(nil),                 // 30: arrow.flight.protocol.CloseSessionResult
+	(*SessionOptionValue_StringListValue)(nil), // 31: arrow.flight.protocol.SessionOptionValue.StringListValue
+	nil,                                   // 32: arrow.flight.protocol.SetSessionOptionsRequest.SessionOptionsEntry
+	(*SetSessionOptionsResult_Error)(nil), // 33: arrow.flight.protocol.SetSessionOptionsResult.Error
+	nil,                                   // 34: arrow.flight.protocol.SetSessionOptionsResult.ErrorsEntry
+	nil,                                   // 35: arrow.flight.protocol.GetSessionOptionsResult.SessionOptionsEntry
+	(*timestamppb.Timestamp)(nil),         // 36: google.protobuf.Timestamp
 }
 var file_Flight_proto_depIdxs = []int32{
-	15, // 0: arrow.flight.protocol.CancelFlightInfoRequest.info:type_name -> arrow.flight.protocol.FlightInfo
-	17, // 1: arrow.flight.protocol.RenewFlightEndpointRequest.endpoint:type_name -> arrow.flight.protocol.FlightEndpoint
+	17, // 0: arrow.flight.protocol.CancelFlightInfoRequest.info:type_name -> arrow.flight.protocol.FlightInfo
+	19, // 1: arrow.flight.protocol.RenewFlightEndpointRequest.endpoint:type_name -> arrow.flight.protocol.FlightEndpoint
 	0,  // 2: arrow.flight.protocol.CancelFlightInfoResult.status:type_name -> arrow.flight.protocol.CancelStatus
 	1,  // 3: arrow.flight.protocol.FlightDescriptor.type:type_name -> arrow.flight.protocol.FlightDescriptor.DescriptorType
-	14, // 4: arrow.flight.protocol.FlightInfo.flight_descriptor:type_name -> arrow.flight.protocol.FlightDescriptor
-	17, // 5: arrow.flight.protocol.FlightInfo.endpoint:type_name -> arrow.flight.protocol.FlightEndpoint
-	15, // 6: arrow.flight.protocol.PollInfo.info:type_name -> arrow.flight.protocol.FlightInfo
-	14, // 7: arrow.flight.protocol.PollInfo.flight_descriptor:type_name -> arrow.flight.protocol.FlightDescriptor
-	22, // 8: arrow.flight.protocol.PollInfo.expiration_time:type_name -> google.protobuf.Timestamp
-	19, // 9: arrow.flight.protocol.FlightEndpoint.ticket:type_name -> arrow.flight.protocol.Ticket
-	18, // 10: arrow.flight.protocol.FlightEndpoint.location:type_name -> arrow.flight.protocol.Location
-	22, // 11: arrow.flight.protocol.FlightEndpoint.expiration_time:type_name -> google.protobuf.Timestamp
-	14, // 12: arrow.flight.protocol.FlightData.flight_descriptor:type_name -> arrow.flight.protocol.FlightDescriptor
-	2,  // 13: arrow.flight.protocol.FlightService.Handshake:input_type -> arrow.flight.protocol.HandshakeRequest
-	7,  // 14: arrow.flight.protocol.FlightService.ListFlights:input_type -> arrow.flight.protocol.Criteria
-	14, // 15: arrow.flight.protocol.FlightService.GetFlightInfo:input_type -> arrow.flight.protocol.FlightDescriptor
-	14, // 16: arrow.flight.protocol.FlightService.PollFlightInfo:input_type -> arrow.flight.protocol.FlightDescriptor
-	14, // 17: arrow.flight.protocol.FlightService.GetSchema:input_type -> arrow.flight.protocol.FlightDescriptor
-	19, // 18: arrow.flight.protocol.FlightService.DoGet:input_type -> arrow.flight.protocol.Ticket
-	20, // 19: arrow.flight.protocol.FlightService.DoPut:input_type -> arrow.flight.protocol.FlightData
-	20, // 20: arrow.flight.protocol.FlightService.DoExchange:input_type -> arrow.flight.protocol.FlightData
-	8,  // 21: arrow.flight.protocol.FlightService.DoAction:input_type -> arrow.flight.protocol.Action
-	5,  // 22: arrow.flight.protocol.FlightService.ListActions:input_type -> arrow.flight.protocol.Empty
-	3,  // 23: arrow.flight.protocol.FlightService.Handshake:output_type -> arrow.flight.protocol.HandshakeResponse
-	15, // 24: arrow.flight.protocol.FlightService.ListFlights:output_type -> arrow.flight.protocol.FlightInfo
-	15, // 25: arrow.flight.protocol.FlightService.GetFlightInfo:output_type -> arrow.flight.protocol.FlightInfo
-	16, // 26: arrow.flight.protocol.FlightService.PollFlightInfo:output_type -> arrow.flight.protocol.PollInfo
-	13, // 27: arrow.flight.protocol.FlightService.GetSchema:output_type -> arrow.flight.protocol.SchemaResult
-	20, // 28: arrow.flight.protocol.FlightService.DoGet:output_type -> arrow.flight.protocol.FlightData
-	21, // 29: arrow.flight.protocol.FlightService.DoPut:output_type -> arrow.flight.protocol.PutResult
-	20, // 30: arrow.flight.protocol.FlightService.DoExchange:output_type -> arrow.flight.protocol.FlightData
-	11, // 31: arrow.flight.protocol.FlightService.DoAction:output_type -> arrow.flight.protocol.Result
-	6,  // 32: arrow.flight.protocol.FlightService.ListActions:output_type -> arrow.flight.protocol.ActionType
-	23, // [23:33] is the sub-list for method output_type
-	13, // [13:23] is the sub-list for method input_type
-	13, // [13:13] is the sub-list for extension type_name
-	13, // [13:13] is the sub-list for extension extendee
-	0,  // [0:13] is the sub-list for field type_name
+	16, // 4: arrow.flight.protocol.FlightInfo.flight_descriptor:type_name -> arrow.flight.protocol.FlightDescriptor
+	19, // 5: arrow.flight.protocol.FlightInfo.endpoint:type_name -> arrow.flight.protocol.FlightEndpoint
+	17, // 6: arrow.flight.protocol.PollInfo.info:type_name -> arrow.flight.protocol.FlightInfo
+	16, // 7: arrow.flight.protocol.PollInfo.flight_descriptor:type_name -> arrow.flight.protocol.FlightDescriptor
+	36, // 8: arrow.flight.protocol.PollInfo.expiration_time:type_name -> google.protobuf.Timestamp
+	21, // 9: arrow.flight.protocol.FlightEndpoint.ticket:type_name -> arrow.flight.protocol.Ticket
+	20, // 10: arrow.flight.protocol.FlightEndpoint.location:type_name -> arrow.flight.protocol.Location
+	36, // 11: arrow.flight.protocol.FlightEndpoint.expiration_time:type_name -> google.protobuf.Timestamp
+	16, // 12: arrow.flight.protocol.FlightData.flight_descriptor:type_name -> arrow.flight.protocol.FlightDescriptor
+	31, // 13: arrow.flight.protocol.SessionOptionValue.string_list_value:type_name -> arrow.flight.protocol.SessionOptionValue.StringListValue
+	32, // 14: arrow.flight.protocol.SetSessionOptionsRequest.session_options:type_name -> arrow.flight.protocol.SetSessionOptionsRequest.SessionOptionsEntry
+	34, // 15: arrow.flight.protocol.SetSessionOptionsResult.errors:type_name -> arrow.flight.protocol.SetSessionOptionsResult.ErrorsEntry
+	35, // 16: arrow.flight.protocol.GetSessionOptionsResult.session_options:type_name -> arrow.flight.protocol.GetSessionOptionsResult.SessionOptionsEntry
+	3,  // 17: arrow.flight.protocol.CloseSessionResult.status:type_name -> arrow.flight.protocol.CloseSessionResult.Status
+	24, // 18: arrow.flight.protocol.SetSessionOptionsRequest.SessionOptionsEntry.value:type_name -> arrow.flight.protocol.SessionOptionValue
+	2,  // 19: arrow.flight.protocol.SetSessionOptionsResult.Error.value:type_name -> arrow.flight.protocol.SetSessionOptionsResult.ErrorValue
+	33, // 20: arrow.flight.protocol.SetSessionOptionsResult.ErrorsEntry.value:type_name -> arrow.flight.protocol.SetSessionOptionsResult.Error
+	24, // 21: arrow.flight.protocol.GetSessionOptionsResult.SessionOptionsEntry.value:type_name -> arrow.flight.protocol.SessionOptionValue
+	4,  // 22: arrow.flight.protocol.FlightService.Handshake:input_type -> arrow.flight.protocol.HandshakeRequest
+	9,  // 23: arrow.flight.protocol.FlightService.ListFlights:input_type -> arrow.flight.protocol.Criteria
+	16, // 24: arrow.flight.protocol.FlightService.GetFlightInfo:input_type -> arrow.flight.protocol.FlightDescriptor
+	16, // 25: arrow.flight.protocol.FlightService.PollFlightInfo:input_type -> arrow.flight.protocol.FlightDescriptor
+	16, // 26: arrow.flight.protocol.FlightService.GetSchema:input_type -> arrow.flight.protocol.FlightDescriptor
+	21, // 27: arrow.flight.protocol.FlightService.DoGet:input_type -> arrow.flight.protocol.Ticket
+	22, // 28: arrow.flight.protocol.FlightService.DoPut:input_type -> arrow.flight.protocol.FlightData
+	22, // 29: arrow.flight.protocol.FlightService.DoExchange:input_type -> arrow.flight.protocol.FlightData
+	10, // 30: arrow.flight.protocol.FlightService.DoAction:input_type -> arrow.flight.protocol.Action
+	7,  // 31: arrow.flight.protocol.FlightService.ListActions:input_type -> arrow.flight.protocol.Empty
+	5,  // 32: arrow.flight.protocol.FlightService.Handshake:output_type -> arrow.flight.protocol.HandshakeResponse
+	17, // 33: arrow.flight.protocol.FlightService.ListFlights:output_type -> arrow.flight.protocol.FlightInfo
+	17, // 34: arrow.flight.protocol.FlightService.GetFlightInfo:output_type -> arrow.flight.protocol.FlightInfo
+	18, // 35: arrow.flight.protocol.FlightService.PollFlightInfo:output_type -> arrow.flight.protocol.PollInfo
+	15, // 36: arrow.flight.protocol.FlightService.GetSchema:output_type -> arrow.flight.protocol.SchemaResult
+	22, // 37: arrow.flight.protocol.FlightService.DoGet:output_type -> arrow.flight.protocol.FlightData
+	23, // 38: arrow.flight.protocol.FlightService.DoPut:output_type -> arrow.flight.protocol.PutResult
+	22, // 39: arrow.flight.protocol.FlightService.DoExchange:output_type -> arrow.flight.protocol.FlightData
+	13, // 40: arrow.flight.protocol.FlightService.DoAction:output_type -> arrow.flight.protocol.Result
+	8,  // 41: arrow.flight.protocol.FlightService.ListActions:output_type -> arrow.flight.protocol.ActionType
+	32, // [32:42] is the sub-list for method output_type
+	22, // [22:32] is the sub-list for method input_type
+	22, // [22:22] is the sub-list for extension type_name
+	22, // [22:22] is the sub-list for extension extendee
+	0,  // [0:22] is the sub-list for field type_name
 }
 
 func init() { file_Flight_proto_init() }
@@ -1972,15 +2688,130 @@ func file_Flight_proto_init() {
 				return nil
 			}
 		}
+		file_Flight_proto_msgTypes[20].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*SessionOptionValue); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_Flight_proto_msgTypes[21].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*SetSessionOptionsRequest); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_Flight_proto_msgTypes[22].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*SetSessionOptionsResult); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_Flight_proto_msgTypes[23].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*GetSessionOptionsRequest); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_Flight_proto_msgTypes[24].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*GetSessionOptionsResult); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_Flight_proto_msgTypes[25].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*CloseSessionRequest); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_Flight_proto_msgTypes[26].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*CloseSessionResult); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_Flight_proto_msgTypes[27].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*SessionOptionValue_StringListValue); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_Flight_proto_msgTypes[29].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*SetSessionOptionsResult_Error); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
 	}
 	file_Flight_proto_msgTypes[14].OneofWrappers = []interface{}{}
+	file_Flight_proto_msgTypes[20].OneofWrappers = []interface{}{
+		(*SessionOptionValue_StringValue)(nil),
+		(*SessionOptionValue_BoolValue)(nil),
+		(*SessionOptionValue_Int64Value)(nil),
+		(*SessionOptionValue_DoubleValue)(nil),
+		(*SessionOptionValue_StringListValue_)(nil),
+	}
 	type x struct{}
 	out := protoimpl.TypeBuilder{
 		File: protoimpl.DescBuilder{
 			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
 			RawDescriptor: file_Flight_proto_rawDesc,
-			NumEnums:      2,
-			NumMessages:   20,
+			NumEnums:      4,
+			NumMessages:   32,
 			NumExtensions: 0,
 			NumServices:   1,
 		},
diff --git a/go/arrow/flight/gen/flight/FlightSql.pb.go b/go/arrow/flight/gen/flight/FlightSql.pb.go
index 279dc29c4262a..d886bc6bdb70b 100644
--- a/go/arrow/flight/gen/flight/FlightSql.pb.go
+++ b/go/arrow/flight/gen/flight/FlightSql.pb.go
@@ -17,8 +17,8 @@
 
 // Code generated by protoc-gen-go. DO NOT EDIT.
 // versions:
-// 	protoc-gen-go v1.28.1
-// 	protoc        v4.23.4
+// 	protoc-gen-go v1.31.0
+// 	protoc        v4.25.2
 // source: FlightSql.proto
 
 package flight
@@ -48,33 +48,27 @@ const (
 	SqlInfo_FLIGHT_SQL_SERVER_VERSION SqlInfo = 1
 	// Retrieves a UTF-8 string with the Arrow format version of the Flight SQL Server.
 	SqlInfo_FLIGHT_SQL_SERVER_ARROW_VERSION SqlInfo = 2
-	//
 	// Retrieves a boolean value indicating whether the Flight SQL Server is read only.
 	//
 	// Returns:
 	// - false: if read-write
 	// - true: if read only
 	SqlInfo_FLIGHT_SQL_SERVER_READ_ONLY SqlInfo = 3
-	//
 	// Retrieves a boolean value indicating whether the Flight SQL Server supports executing
 	// SQL queries.
 	//
 	// Note that the absence of this info (as opposed to a false value) does not necessarily
 	// mean that SQL is not supported, as this property was not originally defined.
 	SqlInfo_FLIGHT_SQL_SERVER_SQL SqlInfo = 4
-	//
 	// Retrieves a boolean value indicating whether the Flight SQL Server supports executing
 	// Substrait plans.
 	SqlInfo_FLIGHT_SQL_SERVER_SUBSTRAIT SqlInfo = 5
-	//
 	// Retrieves a string value indicating the minimum supported Substrait version, or null
 	// if Substrait is not supported.
 	SqlInfo_FLIGHT_SQL_SERVER_SUBSTRAIT_MIN_VERSION SqlInfo = 6
-	//
 	// Retrieves a string value indicating the maximum supported Substrait version, or null
 	// if Substrait is not supported.
 	SqlInfo_FLIGHT_SQL_SERVER_SUBSTRAIT_MAX_VERSION SqlInfo = 7
-	//
 	// Retrieves an int32 indicating whether the Flight SQL Server supports the
 	// BeginTransaction/EndTransaction/BeginSavepoint/EndSavepoint actions.
 	//
@@ -84,61 +78,51 @@ const (
 	//
 	// The possible values are listed in `SqlSupportedTransaction`.
 	SqlInfo_FLIGHT_SQL_SERVER_TRANSACTION SqlInfo = 8
-	//
 	// Retrieves a boolean value indicating whether the Flight SQL Server supports explicit
 	// query cancellation (the CancelQuery action).
 	SqlInfo_FLIGHT_SQL_SERVER_CANCEL SqlInfo = 9
-	//
 	// Retrieves an int32 indicating the timeout (in milliseconds) for prepared statement handles.
 	//
 	// If 0, there is no timeout.  Servers should reset the timeout when the handle is used in a command.
 	SqlInfo_FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT SqlInfo = 100
-	//
 	// Retrieves an int32 indicating the timeout (in milliseconds) for transactions, since transactions are not tied to a connection.
 	//
 	// If 0, there is no timeout.  Servers should reset the timeout when the handle is used in a command.
 	SqlInfo_FLIGHT_SQL_SERVER_TRANSACTION_TIMEOUT SqlInfo = 101
-	//
 	// Retrieves a boolean value indicating whether the Flight SQL Server supports CREATE and DROP of catalogs.
 	//
 	// Returns:
 	// - false: if it doesn't support CREATE and DROP of catalogs.
 	// - true: if it supports CREATE and DROP of catalogs.
 	SqlInfo_SQL_DDL_CATALOG SqlInfo = 500
-	//
 	// Retrieves a boolean value indicating whether the Flight SQL Server supports CREATE and DROP of schemas.
 	//
 	// Returns:
 	// - false: if it doesn't support CREATE and DROP of schemas.
 	// - true: if it supports CREATE and DROP of schemas.
 	SqlInfo_SQL_DDL_SCHEMA SqlInfo = 501
-	//
 	// Indicates whether the Flight SQL Server supports CREATE and DROP of tables.
 	//
 	// Returns:
 	// - false: if it doesn't support CREATE and DROP of tables.
 	// - true: if it supports CREATE and DROP of tables.
 	SqlInfo_SQL_DDL_TABLE SqlInfo = 502
-	//
 	// Retrieves a int32 ordinal representing the case sensitivity of catalog, table, schema and table names.
 	//
 	// The possible values are listed in `arrow.flight.protocol.sql.SqlSupportedCaseSensitivity`.
 	SqlInfo_SQL_IDENTIFIER_CASE SqlInfo = 503
 	// Retrieves a UTF-8 string with the supported character(s) used to surround a delimited identifier.
 	SqlInfo_SQL_IDENTIFIER_QUOTE_CHAR SqlInfo = 504
-	//
 	// Retrieves a int32 describing the case sensitivity of quoted identifiers.
 	//
 	// The possible values are listed in `arrow.flight.protocol.sql.SqlSupportedCaseSensitivity`.
 	SqlInfo_SQL_QUOTED_IDENTIFIER_CASE SqlInfo = 505
-	//
 	// Retrieves a boolean value indicating whether all tables are selectable.
 	//
 	// Returns:
 	// - false: if not all tables are selectable or if none are;
 	// - true: if all tables are selectable.
 	SqlInfo_SQL_ALL_TABLES_ARE_SELECTABLE SqlInfo = 506
-	//
 	// Retrieves the null ordering.
 	//
 	// Returns a int32 ordinal for the null ordering being used, as described in
@@ -154,18 +138,15 @@ const (
 	SqlInfo_SQL_SYSTEM_FUNCTIONS SqlInfo = 511
 	// Retrieves a UTF-8 string list with values of the supported datetime functions.
 	SqlInfo_SQL_DATETIME_FUNCTIONS SqlInfo = 512
-	//
 	// Retrieves the UTF-8 string that can be used to escape wildcard characters.
 	// This is the string that can be used to escape '_' or '%' in the catalog search parameters that are a pattern
 	// (and therefore use one of the wildcard characters).
 	// The '_' character represents any single character; the '%' character represents any sequence of zero or more
 	// characters.
 	SqlInfo_SQL_SEARCH_STRING_ESCAPE SqlInfo = 513
-	//
 	// Retrieves a UTF-8 string with all the "extra" characters that can be used in unquoted identifier names
 	// (those beyond a-z, A-Z, 0-9 and _).
 	SqlInfo_SQL_EXTRA_NAME_CHARACTERS SqlInfo = 514
-	//
 	// Retrieves a boolean value indicating whether column aliasing is supported.
 	// If so, the SQL AS clause can be used to provide names for computed columns or to provide alias names for columns
 	// as required.
@@ -174,7 +155,6 @@ const (
 	// - false: if column aliasing is unsupported;
 	// - true: if column aliasing is supported.
 	SqlInfo_SQL_SUPPORTS_COLUMN_ALIASING SqlInfo = 515
-	//
 	// Retrieves a boolean value indicating whether concatenations between null and non-null values being
 	// null are supported.
 	//
@@ -182,13 +162,11 @@ const (
 	// - false: if concatenations between null and non-null values being null are unsupported;
 	// - true: if concatenations between null and non-null values being null are supported.
 	SqlInfo_SQL_NULL_PLUS_NULL_IS_NULL SqlInfo = 516
-	//
 	// Retrieves a map where the key is the type to convert from and the value is a list with the types to convert to,
 	// indicating the supported conversions. Each key and each item on the list value is a value to a predefined type on
 	// SqlSupportsConvert enum.
 	// The returned map will be:  map<int32, list<int32>>
 	SqlInfo_SQL_SUPPORTS_CONVERT SqlInfo = 517
-	//
 	// Retrieves a boolean value indicating whether, when table correlation names are supported,
 	// they are restricted to being different from the names of the tables.
 	//
@@ -196,7 +174,6 @@ const (
 	// - false: if table correlation names are unsupported;
 	// - true: if table correlation names are supported.
 	SqlInfo_SQL_SUPPORTS_TABLE_CORRELATION_NAMES SqlInfo = 518
-	//
 	// Retrieves a boolean value indicating whether, when table correlation names are supported,
 	// they are restricted to being different from the names of the tables.
 	//
@@ -204,14 +181,12 @@ const (
 	// - false: if different table correlation names are unsupported;
 	// - true: if different table correlation names are supported
 	SqlInfo_SQL_SUPPORTS_DIFFERENT_TABLE_CORRELATION_NAMES SqlInfo = 519
-	//
 	// Retrieves a boolean value indicating whether expressions in ORDER BY lists are supported.
 	//
 	// Returns:
 	// - false: if expressions in ORDER BY are unsupported;
 	// - true: if expressions in ORDER BY are supported;
 	SqlInfo_SQL_SUPPORTS_EXPRESSIONS_IN_ORDER_BY SqlInfo = 520
-	//
 	// Retrieves a boolean value indicating whether using a column that is not in the SELECT statement in a GROUP BY
 	// clause is supported.
 	//
@@ -219,7 +194,6 @@ const (
 	// - false: if using a column that is not in the SELECT statement in a GROUP BY clause is unsupported;
 	// - true: if using a column that is not in the SELECT statement in a GROUP BY clause is supported.
 	SqlInfo_SQL_SUPPORTS_ORDER_BY_UNRELATED SqlInfo = 521
-	//
 	// Retrieves the supported GROUP BY commands;
 	//
 	// Returns an int32 bitmask value representing the supported commands.
@@ -232,21 +206,18 @@ const (
 	// - return 3 (\b11)  => [SQL_GROUP_BY_UNRELATED, SQL_GROUP_BY_BEYOND_SELECT].
 	// Valid GROUP BY types are described under `arrow.flight.protocol.sql.SqlSupportedGroupBy`.
 	SqlInfo_SQL_SUPPORTED_GROUP_BY SqlInfo = 522
-	//
 	// Retrieves a boolean value indicating whether specifying a LIKE escape clause is supported.
 	//
 	// Returns:
 	// - false: if specifying a LIKE escape clause is unsupported;
 	// - true: if specifying a LIKE escape clause is supported.
 	SqlInfo_SQL_SUPPORTS_LIKE_ESCAPE_CLAUSE SqlInfo = 523
-	//
 	// Retrieves a boolean value indicating whether columns may be defined as non-nullable.
 	//
 	// Returns:
 	// - false: if columns cannot be defined as non-nullable;
 	// - true: if columns may be defined as non-nullable.
 	SqlInfo_SQL_SUPPORTS_NON_NULLABLE_COLUMNS SqlInfo = 524
-	//
 	// Retrieves the supported SQL grammar level as per the ODBC specification.
 	//
 	// Returns an int32 bitmask value representing the supported SQL grammar level.
@@ -263,7 +234,6 @@ const (
 	// - return 7 (\b111) => [SQL_MINIMUM_GRAMMAR, SQL_CORE_GRAMMAR, SQL_EXTENDED_GRAMMAR].
 	// Valid SQL grammar levels are described under `arrow.flight.protocol.sql.SupportedSqlGrammar`.
 	SqlInfo_SQL_SUPPORTED_GRAMMAR SqlInfo = 525
-	//
 	// Retrieves the supported ANSI92 SQL grammar level.
 	//
 	// Returns an int32 bitmask value representing the supported ANSI92 SQL grammar level.
@@ -280,14 +250,12 @@ const (
 	// - return 7 (\b111) => [ANSI92_ENTRY_SQL, ANSI92_INTERMEDIATE_SQL, ANSI92_FULL_SQL].
 	// Valid ANSI92 SQL grammar levels are described under `arrow.flight.protocol.sql.SupportedAnsi92SqlGrammarLevel`.
 	SqlInfo_SQL_ANSI92_SUPPORTED_LEVEL SqlInfo = 526
-	//
 	// Retrieves a boolean value indicating whether the SQL Integrity Enhancement Facility is supported.
 	//
 	// Returns:
 	// - false: if the SQL Integrity Enhancement Facility is supported;
 	// - true: if the SQL Integrity Enhancement Facility is supported.
 	SqlInfo_SQL_SUPPORTS_INTEGRITY_ENHANCEMENT_FACILITY SqlInfo = 527
-	//
 	// Retrieves the support level for SQL OUTER JOINs.
 	//
 	// Returns a int32 ordinal for the SQL ordering being used, as described in
@@ -297,17 +265,14 @@ const (
 	SqlInfo_SQL_SCHEMA_TERM SqlInfo = 529
 	// Retrieves a UTF-8 string with the preferred term for "procedure".
 	SqlInfo_SQL_PROCEDURE_TERM SqlInfo = 530
-	//
 	// Retrieves a UTF-8 string with the preferred term for "catalog".
 	// If a empty string is returned its assumed that the server does NOT supports catalogs.
 	SqlInfo_SQL_CATALOG_TERM SqlInfo = 531
-	//
 	// Retrieves a boolean value indicating whether a catalog appears at the start of a fully qualified table name.
 	//
 	// - false: if a catalog does not appear at the start of a fully qualified table name;
 	// - true: if a catalog appears at the start of a fully qualified table name.
 	SqlInfo_SQL_CATALOG_AT_START SqlInfo = 532
-	//
 	// Retrieves the supported actions for a SQL schema.
 	//
 	// Returns an int32 bitmask value representing the supported actions for a SQL schema.
@@ -324,7 +289,6 @@ const (
 	// - return 7 (\b111) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS].
 	// Valid actions for a SQL schema described under `arrow.flight.protocol.sql.SqlSupportedElementActions`.
 	SqlInfo_SQL_SCHEMAS_SUPPORTED_ACTIONS SqlInfo = 533
-	//
 	// Retrieves the supported actions for a SQL schema.
 	//
 	// Returns an int32 bitmask value representing the supported actions for a SQL catalog.
@@ -341,7 +305,6 @@ const (
 	// - return 7 (\b111) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS].
 	// Valid actions for a SQL catalog are described under `arrow.flight.protocol.sql.SqlSupportedElementActions`.
 	SqlInfo_SQL_CATALOGS_SUPPORTED_ACTIONS SqlInfo = 534
-	//
 	// Retrieves the supported SQL positioned commands.
 	//
 	// Returns an int32 bitmask value representing the supported SQL positioned commands.
@@ -354,14 +317,12 @@ const (
 	// - return 3 (\b11)  => [SQL_POSITIONED_DELETE, SQL_POSITIONED_UPDATE].
 	// Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlSupportedPositionedCommands`.
 	SqlInfo_SQL_SUPPORTED_POSITIONED_COMMANDS SqlInfo = 535
-	//
 	// Retrieves a boolean value indicating whether SELECT FOR UPDATE statements are supported.
 	//
 	// Returns:
 	// - false: if SELECT FOR UPDATE statements are unsupported;
 	// - true: if SELECT FOR UPDATE statements are supported.
 	SqlInfo_SQL_SELECT_FOR_UPDATE_SUPPORTED SqlInfo = 536
-	//
 	// Retrieves a boolean value indicating whether stored procedure calls that use the stored procedure escape syntax
 	// are supported.
 	//
@@ -369,7 +330,6 @@ const (
 	// - false: if stored procedure calls that use the stored procedure escape syntax are unsupported;
 	// - true: if stored procedure calls that use the stored procedure escape syntax are supported.
 	SqlInfo_SQL_STORED_PROCEDURES_SUPPORTED SqlInfo = 537
-	//
 	// Retrieves the supported SQL subqueries.
 	//
 	// Returns an int32 bitmask value representing the supported SQL subqueries.
@@ -395,14 +355,12 @@ const (
 	// - ...
 	// Valid SQL subqueries are described under `arrow.flight.protocol.sql.SqlSupportedSubqueries`.
 	SqlInfo_SQL_SUPPORTED_SUBQUERIES SqlInfo = 538
-	//
 	// Retrieves a boolean value indicating whether correlated subqueries are supported.
 	//
 	// Returns:
 	// - false: if correlated subqueries are unsupported;
 	// - true: if correlated subqueries are supported.
 	SqlInfo_SQL_CORRELATED_SUBQUERIES_SUPPORTED SqlInfo = 539
-	//
 	// Retrieves the supported SQL UNIONs.
 	//
 	// Returns an int32 bitmask value representing the supported SQL UNIONs.
@@ -435,7 +393,6 @@ const (
 	SqlInfo_SQL_MAX_CONNECTIONS SqlInfo = 549
 	// Retrieves a int64 value the maximum number of characters allowed in a cursor name.
 	SqlInfo_SQL_MAX_CURSOR_NAME_LENGTH SqlInfo = 550
-	//
 	// Retrieves a int64 value representing the maximum number of bytes allowed for an index,
 	// including all of the parts of the index.
 	SqlInfo_SQL_MAX_INDEX_LENGTH SqlInfo = 551
@@ -447,17 +404,15 @@ const (
 	SqlInfo_SQL_MAX_CATALOG_NAME_LENGTH SqlInfo = 554
 	// Retrieves a int64 value representing the maximum number of bytes allowed in a single row.
 	SqlInfo_SQL_MAX_ROW_SIZE SqlInfo = 555
-	//
 	// Retrieves a boolean indicating whether the return value for the JDBC method getMaxRowSize includes the SQL
 	// data types LONGVARCHAR and LONGVARBINARY.
 	//
 	// Returns:
-	// - false: if return value for the JDBC method getMaxRowSize does
-	//          not include the SQL data types LONGVARCHAR and LONGVARBINARY;
-	// - true: if return value for the JDBC method getMaxRowSize includes
-	//         the SQL data types LONGVARCHAR and LONGVARBINARY.
+	//   - false: if return value for the JDBC method getMaxRowSize does
+	//     not include the SQL data types LONGVARCHAR and LONGVARBINARY;
+	//   - true: if return value for the JDBC method getMaxRowSize includes
+	//     the SQL data types LONGVARCHAR and LONGVARBINARY.
 	SqlInfo_SQL_MAX_ROW_SIZE_INCLUDES_BLOBS SqlInfo = 556
-	//
 	// Retrieves a int64 value representing the maximum number of characters allowed for an SQL statement;
 	// a result of 0 (zero) means that there is no limit or the limit is not known.
 	SqlInfo_SQL_MAX_STATEMENT_LENGTH SqlInfo = 557
@@ -469,13 +424,11 @@ const (
 	SqlInfo_SQL_MAX_TABLES_IN_SELECT SqlInfo = 560
 	// Retrieves a int64 value representing the maximum number of characters allowed in a user name.
 	SqlInfo_SQL_MAX_USERNAME_LENGTH SqlInfo = 561
-	//
 	// Retrieves this database's default transaction isolation level as described in
 	// `arrow.flight.protocol.sql.SqlTransactionIsolationLevel`.
 	//
 	// Returns a int32 ordinal for the SQL transaction isolation level.
 	SqlInfo_SQL_DEFAULT_TRANSACTION_ISOLATION SqlInfo = 562
-	//
 	// Retrieves a boolean value indicating whether transactions are supported. If not, invoking the method commit is a
 	// noop, and the isolation level is `arrow.flight.protocol.sql.SqlTransactionIsolationLevel.TRANSACTION_NONE`.
 	//
@@ -483,7 +436,6 @@ const (
 	// - false: if transactions are unsupported;
 	// - true: if transactions are supported.
 	SqlInfo_SQL_TRANSACTIONS_SUPPORTED SqlInfo = 563
-	//
 	// Retrieves the supported transactions isolation levels.
 	//
 	// Returns an int32 bitmask value representing the supported transactions isolation levels.
@@ -510,7 +462,6 @@ const (
 	// - ...
 	// Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlTransactionIsolationLevel`.
 	SqlInfo_SQL_SUPPORTED_TRANSACTIONS_ISOLATION_LEVELS SqlInfo = 564
-	//
 	// Retrieves a boolean value indicating whether a data definition statement within a transaction forces
 	// the transaction to commit.
 	//
@@ -518,14 +469,12 @@ const (
 	// - false: if a data definition statement within a transaction does not force the transaction to commit;
 	// - true: if a data definition statement within a transaction forces the transaction to commit.
 	SqlInfo_SQL_DATA_DEFINITION_CAUSES_TRANSACTION_COMMIT SqlInfo = 565
-	//
 	// Retrieves a boolean value indicating whether a data definition statement within a transaction is ignored.
 	//
 	// Returns:
 	// - false: if a data definition statement within a transaction is taken into account;
 	// - true: a data definition statement within a transaction is ignored.
 	SqlInfo_SQL_DATA_DEFINITIONS_IN_TRANSACTIONS_IGNORED SqlInfo = 566
-	//
 	// Retrieves an int32 bitmask value representing the supported result set types.
 	// The returned bitmask should be parsed in order to retrieve the supported result set types.
 	//
@@ -542,7 +491,6 @@ const (
 	// - ...
 	// Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetType`.
 	SqlInfo_SQL_SUPPORTED_RESULT_SET_TYPES SqlInfo = 567
-	//
 	// Returns an int32 bitmask value concurrency types supported for
 	// `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_UNSPECIFIED`.
 	//
@@ -557,7 +505,6 @@ const (
 	// - return 7 (\b111)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
 	// Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`.
 	SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_UNSPECIFIED SqlInfo = 568
-	//
 	// Returns an int32 bitmask value concurrency types supported for
 	// `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_FORWARD_ONLY`.
 	//
@@ -572,7 +519,6 @@ const (
 	// - return 7 (\b111)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
 	// Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`.
 	SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_FORWARD_ONLY SqlInfo = 569
-	//
 	// Returns an int32 bitmask value concurrency types supported for
 	// `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE`.
 	//
@@ -587,7 +533,6 @@ const (
 	// - return 7 (\b111)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
 	// Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`.
 	SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_SENSITIVE SqlInfo = 570
-	//
 	// Returns an int32 bitmask value concurrency types supported for
 	// `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE`.
 	//
@@ -602,34 +547,29 @@ const (
 	// - return 7 (\b111)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
 	// Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`.
 	SqlInfo_SQL_SUPPORTED_CONCURRENCIES_FOR_RESULT_SET_SCROLL_INSENSITIVE SqlInfo = 571
-	//
 	// Retrieves a boolean value indicating whether this database supports batch updates.
 	//
 	// - false: if this database does not support batch updates;
 	// - true: if this database supports batch updates.
 	SqlInfo_SQL_BATCH_UPDATES_SUPPORTED SqlInfo = 572
-	//
 	// Retrieves a boolean value indicating whether this database supports savepoints.
 	//
 	// Returns:
 	// - false: if this database does not support savepoints;
 	// - true: if this database supports savepoints.
 	SqlInfo_SQL_SAVEPOINTS_SUPPORTED SqlInfo = 573
-	//
 	// Retrieves a boolean value indicating whether named parameters are supported in callable statements.
 	//
 	// Returns:
 	// - false: if named parameters in callable statements are unsupported;
 	// - true: if named parameters in callable statements are supported.
 	SqlInfo_SQL_NAMED_PARAMETERS_SUPPORTED SqlInfo = 574
-	//
 	// Retrieves a boolean value indicating whether updates made to a LOB are made on a copy or directly to the LOB.
 	//
 	// Returns:
 	// - false: if updates made to a LOB are made directly to the LOB;
 	// - true: if updates made to a LOB are made on a copy.
 	SqlInfo_SQL_LOCATORS_UPDATE_COPY SqlInfo = 575
-	//
 	// Retrieves a boolean value indicating whether invoking user-defined or vendor functions
 	// using the stored procedure escape syntax is supported.
 	//
@@ -1702,7 +1642,7 @@ func (SqlSupportsConvert) EnumDescriptor() ([]byte, []int) {
 	return file_FlightSql_proto_rawDescGZIP(), []int{16}
 }
 
-//*
+// *
 // The JDBC/ODBC-defined type of any object.
 // All the values here are the same as in the JDBC and ODBC specs.
 type XdbcDataType int32
@@ -1817,7 +1757,7 @@ func (XdbcDataType) EnumDescriptor() ([]byte, []int) {
 	return file_FlightSql_proto_rawDescGZIP(), []int{17}
 }
 
-//*
+// *
 // Detailed subtype information for XDBC_TYPE_DATETIME and XDBC_TYPE_INTERVAL.
 type XdbcDatetimeSubcode int32
 
@@ -1958,13 +1898,13 @@ func (XdbcDatetimeSubcode) EnumDescriptor() ([]byte, []int) {
 type Nullable int32
 
 const (
-	//*
+	// *
 	// Indicates that the fields does not allow the use of null values.
 	Nullable_NULLABILITY_NO_NULLS Nullable = 0
-	//*
+	// *
 	// Indicates that the fields allow the use of null values.
 	Nullable_NULLABILITY_NULLABLE Nullable = 1
-	//*
+	// *
 	// Indicates that nullability of the fields cannot be determined.
 	Nullable_NULLABILITY_UNKNOWN Nullable = 2
 )
@@ -2013,21 +1953,21 @@ func (Nullable) EnumDescriptor() ([]byte, []int) {
 type Searchable int32
 
 const (
-	//*
+	// *
 	// Indicates that column cannot be used in a WHERE clause.
 	Searchable_SEARCHABLE_NONE Searchable = 0
-	//*
+	// *
 	// Indicates that the column can be used in a WHERE clause if it is using a
 	// LIKE operator.
 	Searchable_SEARCHABLE_CHAR Searchable = 1
-	//*
+	// *
 	// Indicates that the column can be used In a WHERE clause with any
 	// operator other than LIKE.
 	//
-	// - Allowed operators: comparison, quantified comparison, BETWEEN,
-	//                      DISTINCT, IN, MATCH, and UNIQUE.
+	//   - Allowed operators: comparison, quantified comparison, BETWEEN,
+	//     DISTINCT, IN, MATCH, and UNIQUE.
 	Searchable_SEARCHABLE_BASIC Searchable = 2
-	//*
+	// *
 	// Indicates that the column can be used in a WHERE clause using any operator.
 	Searchable_SEARCHABLE_FULL Searchable = 3
 )
@@ -2293,22 +2233,23 @@ func (ActionCancelQueryResult_CancelResult) EnumDescriptor() ([]byte, []int) {
 	return file_FlightSql_proto_rawDescGZIP(), []int{29, 0}
 }
 
-//
 // Represents a metadata request. Used in the command member of FlightDescriptor
 // for the following RPC calls:
-//  - GetSchema: return the Arrow schema of the query.
-//  - GetFlightInfo: execute the metadata request.
+//   - GetSchema: return the Arrow schema of the query.
+//   - GetFlightInfo: execute the metadata request.
 //
 // The returned Arrow schema will be:
 // <
-//  info_name: uint32 not null,
-//  value: dense_union<
-//              string_value: utf8,
-//              bool_value: bool,
-//              bigint_value: int64,
-//              int32_bitmask: int32,
-//              string_list: list<string_data: utf8>
-//              int32_to_int32_list_map: map<key: int32, value: list<$data$: int32>>
+//
+//	info_name: uint32 not null,
+//	value: dense_union<
+//	            string_value: utf8,
+//	            bool_value: bool,
+//	            bigint_value: int64,
+//	            int32_bitmask: int32,
+//	            string_list: list<string_data: utf8>
+//	            int32_to_int32_list_map: map<key: int32, value: list<$data$: int32>>
+//
 // >
 // where there is one row per requested piece of metadata information.
 type CommandGetSqlInfo struct {
@@ -2316,7 +2257,6 @@ type CommandGetSqlInfo struct {
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//
 	// Values are modelled after ODBC's SQLGetInfo() function. This information is intended to provide
 	// Flight SQL clients with basic, SQL syntax and SQL functions related information.
 	// More information types can be added in future releases.
@@ -2376,61 +2316,62 @@ func (x *CommandGetSqlInfo) GetInfo() []uint32 {
 	return nil
 }
 
-//
 // Represents a request to retrieve information about data type supported on a Flight SQL enabled backend.
 // Used in the command member of FlightDescriptor for the following RPC calls:
-//  - GetSchema: return the schema of the query.
-//  - GetFlightInfo: execute the catalog metadata request.
+//   - GetSchema: return the schema of the query.
+//   - GetFlightInfo: execute the catalog metadata request.
 //
 // The returned schema will be:
 // <
-//   type_name: utf8 not null (The name of the data type, for example: VARCHAR, INTEGER, etc),
-//   data_type: int32 not null (The SQL data type),
-//   column_size: int32 (The maximum size supported by that column.
-//                       In case of exact numeric types, this represents the maximum precision.
-//                       In case of string types, this represents the character length.
-//                       In case of datetime data types, this represents the length in characters of the string representation.
-//                       NULL is returned for data types where column size is not applicable.),
-//   literal_prefix: utf8 (Character or characters used to prefix a literal, NULL is returned for
-//                         data types where a literal prefix is not applicable.),
-//   literal_suffix: utf8 (Character or characters used to terminate a literal,
-//                         NULL is returned for data types where a literal suffix is not applicable.),
-//   create_params: list<utf8 not null>
-//                        (A list of keywords corresponding to which parameters can be used when creating
-//                         a column for that specific type.
-//                         NULL is returned if there are no parameters for the data type definition.),
-//   nullable: int32 not null (Shows if the data type accepts a NULL value. The possible values can be seen in the
-//                             Nullable enum.),
-//   case_sensitive: bool not null (Shows if a character data type is case-sensitive in collations and comparisons),
-//   searchable: int32 not null (Shows how the data type is used in a WHERE clause. The possible values can be seen in the
-//                               Searchable enum.),
-//   unsigned_attribute: bool (Shows if the data type is unsigned. NULL is returned if the attribute is
-//                             not applicable to the data type or the data type is not numeric.),
-//   fixed_prec_scale: bool not null (Shows if the data type has predefined fixed precision and scale.),
-//   auto_increment: bool (Shows if the data type is auto incremental. NULL is returned if the attribute
-//                         is not applicable to the data type or the data type is not numeric.),
-//   local_type_name: utf8 (Localized version of the data source-dependent name of the data type. NULL
-//                          is returned if a localized name is not supported by the data source),
-//   minimum_scale: int32 (The minimum scale of the data type on the data source.
-//                         If a data type has a fixed scale, the MINIMUM_SCALE and MAXIMUM_SCALE
-//                         columns both contain this value. NULL is returned if scale is not applicable.),
-//   maximum_scale: int32 (The maximum scale of the data type on the data source.
-//                         NULL is returned if scale is not applicable.),
-//   sql_data_type: int32 not null (The value of the SQL DATA TYPE which has the same values
-//                                  as data_type value. Except for interval and datetime, which
-//                                  uses generic values. More info about those types can be
-//                                  obtained through datetime_subcode. The possible values can be seen
-//                                  in the XdbcDataType enum.),
-//   datetime_subcode: int32 (Only used when the SQL DATA TYPE is interval or datetime. It contains
-//                            its sub types. For type different from interval and datetime, this value
-//                            is NULL. The possible values can be seen in the XdbcDatetimeSubcode enum.),
-//   num_prec_radix: int32 (If the data type is an approximate numeric type, this column contains
-//                          the value 2 to indicate that COLUMN_SIZE specifies a number of bits. For
-//                          exact numeric types, this column contains the value 10 to indicate that
-//                          column size specifies a number of decimal digits. Otherwise, this column is NULL.),
-//   interval_precision: int32 (If the data type is an interval data type, then this column contains the value
-//                              of the interval leading precision. Otherwise, this column is NULL. This fields
-//                              is only relevant to be used by ODBC).
+//
+//	type_name: utf8 not null (The name of the data type, for example: VARCHAR, INTEGER, etc),
+//	data_type: int32 not null (The SQL data type),
+//	column_size: int32 (The maximum size supported by that column.
+//	                    In case of exact numeric types, this represents the maximum precision.
+//	                    In case of string types, this represents the character length.
+//	                    In case of datetime data types, this represents the length in characters of the string representation.
+//	                    NULL is returned for data types where column size is not applicable.),
+//	literal_prefix: utf8 (Character or characters used to prefix a literal, NULL is returned for
+//	                      data types where a literal prefix is not applicable.),
+//	literal_suffix: utf8 (Character or characters used to terminate a literal,
+//	                      NULL is returned for data types where a literal suffix is not applicable.),
+//	create_params: list<utf8 not null>
+//	                     (A list of keywords corresponding to which parameters can be used when creating
+//	                      a column for that specific type.
+//	                      NULL is returned if there are no parameters for the data type definition.),
+//	nullable: int32 not null (Shows if the data type accepts a NULL value. The possible values can be seen in the
+//	                          Nullable enum.),
+//	case_sensitive: bool not null (Shows if a character data type is case-sensitive in collations and comparisons),
+//	searchable: int32 not null (Shows how the data type is used in a WHERE clause. The possible values can be seen in the
+//	                            Searchable enum.),
+//	unsigned_attribute: bool (Shows if the data type is unsigned. NULL is returned if the attribute is
+//	                          not applicable to the data type or the data type is not numeric.),
+//	fixed_prec_scale: bool not null (Shows if the data type has predefined fixed precision and scale.),
+//	auto_increment: bool (Shows if the data type is auto incremental. NULL is returned if the attribute
+//	                      is not applicable to the data type or the data type is not numeric.),
+//	local_type_name: utf8 (Localized version of the data source-dependent name of the data type. NULL
+//	                       is returned if a localized name is not supported by the data source),
+//	minimum_scale: int32 (The minimum scale of the data type on the data source.
+//	                      If a data type has a fixed scale, the MINIMUM_SCALE and MAXIMUM_SCALE
+//	                      columns both contain this value. NULL is returned if scale is not applicable.),
+//	maximum_scale: int32 (The maximum scale of the data type on the data source.
+//	                      NULL is returned if scale is not applicable.),
+//	sql_data_type: int32 not null (The value of the SQL DATA TYPE which has the same values
+//	                               as data_type value. Except for interval and datetime, which
+//	                               uses generic values. More info about those types can be
+//	                               obtained through datetime_subcode. The possible values can be seen
+//	                               in the XdbcDataType enum.),
+//	datetime_subcode: int32 (Only used when the SQL DATA TYPE is interval or datetime. It contains
+//	                         its sub types. For type different from interval and datetime, this value
+//	                         is NULL. The possible values can be seen in the XdbcDatetimeSubcode enum.),
+//	num_prec_radix: int32 (If the data type is an approximate numeric type, this column contains
+//	                       the value 2 to indicate that COLUMN_SIZE specifies a number of bits. For
+//	                       exact numeric types, this column contains the value 10 to indicate that
+//	                       column size specifies a number of decimal digits. Otherwise, this column is NULL.),
+//	interval_precision: int32 (If the data type is an interval data type, then this column contains the value
+//	                           of the interval leading precision. Otherwise, this column is NULL. This fields
+//	                           is only relevant to be used by ODBC).
+//
 // >
 // The returned data should be ordered by data_type and then by type_name.
 type CommandGetXdbcTypeInfo struct {
@@ -2438,7 +2379,6 @@ type CommandGetXdbcTypeInfo struct {
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//
 	// Specifies the data type to search for the info.
 	DataType *int32 `protobuf:"varint,1,opt,name=data_type,json=dataType,proto3,oneof" json:"data_type,omitempty"`
 }
@@ -2482,16 +2422,17 @@ func (x *CommandGetXdbcTypeInfo) GetDataType() int32 {
 	return 0
 }
 
-//
 // Represents a request to retrieve the list of catalogs on a Flight SQL enabled backend.
 // The definition of a catalog depends on vendor/implementation. It is usually the database itself
 // Used in the command member of FlightDescriptor for the following RPC calls:
-//  - GetSchema: return the Arrow schema of the query.
-//  - GetFlightInfo: execute the catalog metadata request.
+//   - GetSchema: return the Arrow schema of the query.
+//   - GetFlightInfo: execute the catalog metadata request.
 //
 // The returned Arrow schema will be:
 // <
-//  catalog_name: utf8 not null
+//
+//	catalog_name: utf8 not null
+//
 // >
 // The returned data should be ordered by catalog_name.
 type CommandGetCatalogs struct {
@@ -2532,17 +2473,18 @@ func (*CommandGetCatalogs) Descriptor() ([]byte, []int) {
 	return file_FlightSql_proto_rawDescGZIP(), []int{2}
 }
 
-//
 // Represents a request to retrieve the list of database schemas on a Flight SQL enabled backend.
 // The definition of a database schema depends on vendor/implementation. It is usually a collection of tables.
 // Used in the command member of FlightDescriptor for the following RPC calls:
-//  - GetSchema: return the Arrow schema of the query.
-//  - GetFlightInfo: execute the catalog metadata request.
+//   - GetSchema: return the Arrow schema of the query.
+//   - GetFlightInfo: execute the catalog metadata request.
 //
 // The returned Arrow schema will be:
 // <
-//  catalog_name: utf8,
-//  db_schema_name: utf8 not null
+//
+//	catalog_name: utf8,
+//	db_schema_name: utf8 not null
+//
 // >
 // The returned data should be ordered by catalog_name, then db_schema_name.
 type CommandGetDbSchemas struct {
@@ -2550,17 +2492,15 @@ type CommandGetDbSchemas struct {
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//
 	// Specifies the Catalog to search for the tables.
 	// An empty string retrieves those without a catalog.
 	// If omitted the catalog name should not be used to narrow the search.
 	Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"`
-	//
 	// Specifies a filter pattern for schemas to search for.
 	// When no db_schema_filter_pattern is provided, the pattern will not be used to narrow the search.
 	// In the pattern string, two special characters can be used to denote matching rules:
-	//    - "%" means to match any substring with 0 or more characters.
-	//    - "_" means to match any one character.
+	//   - "%" means to match any substring with 0 or more characters.
+	//   - "_" means to match any one character.
 	DbSchemaFilterPattern *string `protobuf:"bytes,2,opt,name=db_schema_filter_pattern,json=dbSchemaFilterPattern,proto3,oneof" json:"db_schema_filter_pattern,omitempty"`
 }
 
@@ -2610,58 +2550,56 @@ func (x *CommandGetDbSchemas) GetDbSchemaFilterPattern() string {
 	return ""
 }
 
-//
 // Represents a request to retrieve the list of tables, and optionally their schemas, on a Flight SQL enabled backend.
 // Used in the command member of FlightDescriptor for the following RPC calls:
-//  - GetSchema: return the Arrow schema of the query.
-//  - GetFlightInfo: execute the catalog metadata request.
+//   - GetSchema: return the Arrow schema of the query.
+//   - GetFlightInfo: execute the catalog metadata request.
 //
 // The returned Arrow schema will be:
 // <
-//  catalog_name: utf8,
-//  db_schema_name: utf8,
-//  table_name: utf8 not null,
-//  table_type: utf8 not null,
-//  [optional] table_schema: bytes not null (schema of the table as described in Schema.fbs::Schema,
-//                                           it is serialized as an IPC message.)
+//
+//	catalog_name: utf8,
+//	db_schema_name: utf8,
+//	table_name: utf8 not null,
+//	table_type: utf8 not null,
+//	[optional] table_schema: bytes not null (schema of the table as described in Schema.fbs::Schema,
+//	                                         it is serialized as an IPC message.)
+//
 // >
 // Fields on table_schema may contain the following metadata:
-//  - ARROW:FLIGHT:SQL:CATALOG_NAME      - Table's catalog name
-//  - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME    - Database schema name
-//  - ARROW:FLIGHT:SQL:TABLE_NAME        - Table name
-//  - ARROW:FLIGHT:SQL:TYPE_NAME         - The data source-specific name for the data type of the column.
-//  - ARROW:FLIGHT:SQL:PRECISION         - Column precision/size
-//  - ARROW:FLIGHT:SQL:SCALE             - Column scale/decimal digits if applicable
-//  - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise.
-//  - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
-//  - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
-//  - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
+//   - ARROW:FLIGHT:SQL:CATALOG_NAME      - Table's catalog name
+//   - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME    - Database schema name
+//   - ARROW:FLIGHT:SQL:TABLE_NAME        - Table name
+//   - ARROW:FLIGHT:SQL:TYPE_NAME         - The data source-specific name for the data type of the column.
+//   - ARROW:FLIGHT:SQL:PRECISION         - Column precision/size
+//   - ARROW:FLIGHT:SQL:SCALE             - Column scale/decimal digits if applicable
+//   - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise.
+//   - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
+//   - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
+//   - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
+//
 // The returned data should be ordered by catalog_name, db_schema_name, table_name, then table_type, followed by table_schema if requested.
 type CommandGetTables struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//
 	// Specifies the Catalog to search for the tables.
 	// An empty string retrieves those without a catalog.
 	// If omitted the catalog name should not be used to narrow the search.
 	Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"`
-	//
 	// Specifies a filter pattern for schemas to search for.
 	// When no db_schema_filter_pattern is provided, all schemas matching other filters are searched.
 	// In the pattern string, two special characters can be used to denote matching rules:
-	//    - "%" means to match any substring with 0 or more characters.
-	//    - "_" means to match any one character.
+	//   - "%" means to match any substring with 0 or more characters.
+	//   - "_" means to match any one character.
 	DbSchemaFilterPattern *string `protobuf:"bytes,2,opt,name=db_schema_filter_pattern,json=dbSchemaFilterPattern,proto3,oneof" json:"db_schema_filter_pattern,omitempty"`
-	//
 	// Specifies a filter pattern for tables to search for.
 	// When no table_name_filter_pattern is provided, all tables matching other filters are searched.
 	// In the pattern string, two special characters can be used to denote matching rules:
-	//    - "%" means to match any substring with 0 or more characters.
-	//    - "_" means to match any one character.
+	//   - "%" means to match any substring with 0 or more characters.
+	//   - "_" means to match any one character.
 	TableNameFilterPattern *string `protobuf:"bytes,3,opt,name=table_name_filter_pattern,json=tableNameFilterPattern,proto3,oneof" json:"table_name_filter_pattern,omitempty"`
-	//
 	// Specifies a filter of table types which must match.
 	// The table types depend on vendor/implementation. It is usually used to separate tables from views or system tables.
 	// TABLE, VIEW, and SYSTEM TABLE are commonly supported.
@@ -2737,17 +2675,18 @@ func (x *CommandGetTables) GetIncludeSchema() bool {
 	return false
 }
 
-//
 // Represents a request to retrieve the list of table types on a Flight SQL enabled backend.
 // The table types depend on vendor/implementation. It is usually used to separate tables from views or system tables.
 // TABLE, VIEW, and SYSTEM TABLE are commonly supported.
 // Used in the command member of FlightDescriptor for the following RPC calls:
-//  - GetSchema: return the Arrow schema of the query.
-//  - GetFlightInfo: execute the catalog metadata request.
+//   - GetSchema: return the Arrow schema of the query.
+//   - GetFlightInfo: execute the catalog metadata request.
 //
 // The returned Arrow schema will be:
 // <
-//  table_type: utf8 not null
+//
+//	table_type: utf8 not null
+//
 // >
 // The returned data should be ordered by table_type.
 type CommandGetTableTypes struct {
@@ -2788,20 +2727,21 @@ func (*CommandGetTableTypes) Descriptor() ([]byte, []int) {
 	return file_FlightSql_proto_rawDescGZIP(), []int{5}
 }
 
-//
 // Represents a request to retrieve the primary keys of a table on a Flight SQL enabled backend.
 // Used in the command member of FlightDescriptor for the following RPC calls:
-//  - GetSchema: return the Arrow schema of the query.
-//  - GetFlightInfo: execute the catalog metadata request.
+//   - GetSchema: return the Arrow schema of the query.
+//   - GetFlightInfo: execute the catalog metadata request.
 //
 // The returned Arrow schema will be:
 // <
-//  catalog_name: utf8,
-//  db_schema_name: utf8,
-//  table_name: utf8 not null,
-//  column_name: utf8 not null,
-//  key_name: utf8,
-//  key_sequence: int32 not null
+//
+//	catalog_name: utf8,
+//	db_schema_name: utf8,
+//	table_name: utf8 not null,
+//	column_name: utf8 not null,
+//	key_name: utf8,
+//	key_sequence: int32 not null
+//
 // >
 // The returned data should be ordered by catalog_name, db_schema_name, table_name, key_name, then key_sequence.
 type CommandGetPrimaryKeys struct {
@@ -2809,12 +2749,10 @@ type CommandGetPrimaryKeys struct {
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//
 	// Specifies the catalog to search for the table.
 	// An empty string retrieves those without a catalog.
 	// If omitted the catalog name should not be used to narrow the search.
 	Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"`
-	//
 	// Specifies the schema to search for the table.
 	// An empty string retrieves those without a schema.
 	// If omitted the schema name should not be used to narrow the search.
@@ -2876,28 +2814,29 @@ func (x *CommandGetPrimaryKeys) GetTable() string {
 	return ""
 }
 
-//
 // Represents a request to retrieve a description of the foreign key columns that reference the given table's
 // primary key columns (the foreign keys exported by a table) of a table on a Flight SQL enabled backend.
 // Used in the command member of FlightDescriptor for the following RPC calls:
-//  - GetSchema: return the Arrow schema of the query.
-//  - GetFlightInfo: execute the catalog metadata request.
+//   - GetSchema: return the Arrow schema of the query.
+//   - GetFlightInfo: execute the catalog metadata request.
 //
 // The returned Arrow schema will be:
 // <
-//  pk_catalog_name: utf8,
-//  pk_db_schema_name: utf8,
-//  pk_table_name: utf8 not null,
-//  pk_column_name: utf8 not null,
-//  fk_catalog_name: utf8,
-//  fk_db_schema_name: utf8,
-//  fk_table_name: utf8 not null,
-//  fk_column_name: utf8 not null,
-//  key_sequence: int32 not null,
-//  fk_key_name: utf8,
-//  pk_key_name: utf8,
-//  update_rule: uint8 not null,
-//  delete_rule: uint8 not null
+//
+//	pk_catalog_name: utf8,
+//	pk_db_schema_name: utf8,
+//	pk_table_name: utf8 not null,
+//	pk_column_name: utf8 not null,
+//	fk_catalog_name: utf8,
+//	fk_db_schema_name: utf8,
+//	fk_table_name: utf8 not null,
+//	fk_column_name: utf8 not null,
+//	key_sequence: int32 not null,
+//	fk_key_name: utf8,
+//	pk_key_name: utf8,
+//	update_rule: uint8 not null,
+//	delete_rule: uint8 not null
+//
 // >
 // The returned data should be ordered by fk_catalog_name, fk_db_schema_name, fk_table_name, fk_key_name, then key_sequence.
 // update_rule and delete_rule returns a byte that is equivalent to actions declared on UpdateDeleteRules enum.
@@ -2906,12 +2845,10 @@ type CommandGetExportedKeys struct {
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//
 	// Specifies the catalog to search for the foreign key table.
 	// An empty string retrieves those without a catalog.
 	// If omitted the catalog name should not be used to narrow the search.
 	Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"`
-	//
 	// Specifies the schema to search for the foreign key table.
 	// An empty string retrieves those without a schema.
 	// If omitted the schema name should not be used to narrow the search.
@@ -2973,46 +2910,45 @@ func (x *CommandGetExportedKeys) GetTable() string {
 	return ""
 }
 
-//
 // Represents a request to retrieve the foreign keys of a table on a Flight SQL enabled backend.
 // Used in the command member of FlightDescriptor for the following RPC calls:
-//  - GetSchema: return the Arrow schema of the query.
-//  - GetFlightInfo: execute the catalog metadata request.
+//   - GetSchema: return the Arrow schema of the query.
+//   - GetFlightInfo: execute the catalog metadata request.
 //
 // The returned Arrow schema will be:
 // <
-//  pk_catalog_name: utf8,
-//  pk_db_schema_name: utf8,
-//  pk_table_name: utf8 not null,
-//  pk_column_name: utf8 not null,
-//  fk_catalog_name: utf8,
-//  fk_db_schema_name: utf8,
-//  fk_table_name: utf8 not null,
-//  fk_column_name: utf8 not null,
-//  key_sequence: int32 not null,
-//  fk_key_name: utf8,
-//  pk_key_name: utf8,
-//  update_rule: uint8 not null,
-//  delete_rule: uint8 not null
+//
+//	pk_catalog_name: utf8,
+//	pk_db_schema_name: utf8,
+//	pk_table_name: utf8 not null,
+//	pk_column_name: utf8 not null,
+//	fk_catalog_name: utf8,
+//	fk_db_schema_name: utf8,
+//	fk_table_name: utf8 not null,
+//	fk_column_name: utf8 not null,
+//	key_sequence: int32 not null,
+//	fk_key_name: utf8,
+//	pk_key_name: utf8,
+//	update_rule: uint8 not null,
+//	delete_rule: uint8 not null
+//
 // >
 // The returned data should be ordered by pk_catalog_name, pk_db_schema_name, pk_table_name, pk_key_name, then key_sequence.
 // update_rule and delete_rule returns a byte that is equivalent to actions:
-//    - 0 = CASCADE
-//    - 1 = RESTRICT
-//    - 2 = SET NULL
-//    - 3 = NO ACTION
-//    - 4 = SET DEFAULT
+//   - 0 = CASCADE
+//   - 1 = RESTRICT
+//   - 2 = SET NULL
+//   - 3 = NO ACTION
+//   - 4 = SET DEFAULT
 type CommandGetImportedKeys struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//
 	// Specifies the catalog to search for the primary key table.
 	// An empty string retrieves those without a catalog.
 	// If omitted the catalog name should not be used to narrow the search.
 	Catalog *string `protobuf:"bytes,1,opt,name=catalog,proto3,oneof" json:"catalog,omitempty"`
-	//
 	// Specifies the schema to search for the primary key table.
 	// An empty string retrieves those without a schema.
 	// If omitted the schema name should not be used to narrow the search.
@@ -3074,66 +3010,67 @@ func (x *CommandGetImportedKeys) GetTable() string {
 	return ""
 }
 
-//
 // Represents a request to retrieve a description of the foreign key columns in the given foreign key table that
 // reference the primary key or the columns representing a unique constraint of the parent table (could be the same
 // or a different table) on a Flight SQL enabled backend.
 // Used in the command member of FlightDescriptor for the following RPC calls:
-//  - GetSchema: return the Arrow schema of the query.
-//  - GetFlightInfo: execute the catalog metadata request.
+//   - GetSchema: return the Arrow schema of the query.
+//   - GetFlightInfo: execute the catalog metadata request.
 //
 // The returned Arrow schema will be:
 // <
-//  pk_catalog_name: utf8,
-//  pk_db_schema_name: utf8,
-//  pk_table_name: utf8 not null,
-//  pk_column_name: utf8 not null,
-//  fk_catalog_name: utf8,
-//  fk_db_schema_name: utf8,
-//  fk_table_name: utf8 not null,
-//  fk_column_name: utf8 not null,
-//  key_sequence: int32 not null,
-//  fk_key_name: utf8,
-//  pk_key_name: utf8,
-//  update_rule: uint8 not null,
-//  delete_rule: uint8 not null
+//
+//	pk_catalog_name: utf8,
+//	pk_db_schema_name: utf8,
+//	pk_table_name: utf8 not null,
+//	pk_column_name: utf8 not null,
+//	fk_catalog_name: utf8,
+//	fk_db_schema_name: utf8,
+//	fk_table_name: utf8 not null,
+//	fk_column_name: utf8 not null,
+//	key_sequence: int32 not null,
+//	fk_key_name: utf8,
+//	pk_key_name: utf8,
+//	update_rule: uint8 not null,
+//	delete_rule: uint8 not null
+//
 // >
 // The returned data should be ordered by pk_catalog_name, pk_db_schema_name, pk_table_name, pk_key_name, then key_sequence.
 // update_rule and delete_rule returns a byte that is equivalent to actions:
-//    - 0 = CASCADE
-//    - 1 = RESTRICT
-//    - 2 = SET NULL
-//    - 3 = NO ACTION
-//    - 4 = SET DEFAULT
+//   - 0 = CASCADE
+//   - 1 = RESTRICT
+//   - 2 = SET NULL
+//   - 3 = NO ACTION
+//   - 4 = SET DEFAULT
 type CommandGetCrossReference struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
 	unknownFields protoimpl.UnknownFields
 
-	//*
+	// *
 	// The catalog name where the parent table is.
 	// An empty string retrieves those without a catalog.
 	// If omitted the catalog name should not be used to narrow the search.
 	PkCatalog *string `protobuf:"bytes,1,opt,name=pk_catalog,json=pkCatalog,proto3,oneof" json:"pk_catalog,omitempty"`
-	//*
+	// *
 	// The Schema name where the parent table is.
 	// An empty string retrieves those without a schema.
 	// If omitted the schema name should not be used to narrow the search.
 	PkDbSchema *string `protobuf:"bytes,2,opt,name=pk_db_schema,json=pkDbSchema,proto3,oneof" json:"pk_db_schema,omitempty"`
-	//*
+	// *
 	// The parent table name. It cannot be null.
 	PkTable string `protobuf:"bytes,3,opt,name=pk_table,json=pkTable,proto3" json:"pk_table,omitempty"`
-	//*
+	// *
 	// The catalog name where the foreign table is.
 	// An empty string retrieves those without a catalog.
 	// If omitted the catalog name should not be used to narrow the search.
 	FkCatalog *string `protobuf:"bytes,4,opt,name=fk_catalog,json=fkCatalog,proto3,oneof" json:"fk_catalog,omitempty"`
-	//*
+	// *
 	// The schema name where the foreign table is.
 	// An empty string retrieves those without a schema.
 	// If omitted the schema name should not be used to narrow the search.
 	FkDbSchema *string `protobuf:"bytes,5,opt,name=fk_db_schema,json=fkDbSchema,proto3,oneof" json:"fk_db_schema,omitempty"`
-	//*
+	// *
 	// The foreign table name. It cannot be null.
 	FkTable string `protobuf:"bytes,6,opt,name=fk_table,json=fkTable,proto3" json:"fk_table,omitempty"`
 }
@@ -3212,7 +3149,6 @@ func (x *CommandGetCrossReference) GetFkTable() string {
 	return ""
 }
 
-//
 // Request message for the "CreatePreparedStatement" action on a Flight SQL enabled backend.
 type ActionCreatePreparedStatementRequest struct {
 	state         protoimpl.MessageState
@@ -3272,7 +3208,6 @@ func (x *ActionCreatePreparedStatementRequest) GetTransactionId() []byte {
 	return nil
 }
 
-//
 // An embedded message describing a Substrait plan to execute.
 type SubstraitPlan struct {
 	state         protoimpl.MessageState
@@ -3336,7 +3271,6 @@ func (x *SubstraitPlan) GetVersion() string {
 	return ""
 }
 
-//
 // Request message for the "CreatePreparedSubstraitPlan" action on a Flight SQL enabled backend.
 type ActionCreatePreparedSubstraitPlanRequest struct {
 	state         protoimpl.MessageState
@@ -3396,7 +3330,6 @@ func (x *ActionCreatePreparedSubstraitPlanRequest) GetTransactionId() []byte {
 	return nil
 }
 
-//
 // Wrap the result of a "CreatePreparedStatement" or "CreatePreparedSubstraitPlan" action.
 //
 // The resultant PreparedStatement can be closed either:
@@ -3412,10 +3345,13 @@ type ActionCreatePreparedStatementResult struct {
 	// Opaque handle for the prepared statement on the server.
 	PreparedStatementHandle []byte `protobuf:"bytes,1,opt,name=prepared_statement_handle,json=preparedStatementHandle,proto3" json:"prepared_statement_handle,omitempty"`
 	// If a result set generating query was provided, dataset_schema contains the
-	// schema of the dataset as described in Schema.fbs::Schema, it is serialized as an IPC message.
+	// schema of the result set.  It should be an IPC-encapsulated Schema, as described in Schema.fbs.
+	// For some queries, the schema of the results may depend on the schema of the parameters.  The server
+	// should provide its best guess as to the schema at this point.  Clients must not assume that this
+	// schema, if provided, will be accurate.
 	DatasetSchema []byte `protobuf:"bytes,2,opt,name=dataset_schema,json=datasetSchema,proto3" json:"dataset_schema,omitempty"`
 	// If the query provided contained parameters, parameter_schema contains the
-	// schema of the expected parameters as described in Schema.fbs::Schema, it is serialized as an IPC message.
+	// schema of the expected parameters.  It should be an IPC-encapsulated Schema, as described in Schema.fbs.
 	ParameterSchema []byte `protobuf:"bytes,3,opt,name=parameter_schema,json=parameterSchema,proto3" json:"parameter_schema,omitempty"`
 }
 
@@ -3472,7 +3408,6 @@ func (x *ActionCreatePreparedStatementResult) GetParameterSchema() []byte {
 	return nil
 }
 
-//
 // Request message for the "ClosePreparedStatement" action on a Flight SQL enabled backend.
 // Closes server resources associated with the prepared statement handle.
 type ActionClosePreparedStatementRequest struct {
@@ -3523,7 +3458,6 @@ func (x *ActionClosePreparedStatementRequest) GetPreparedStatementHandle() []byt
 	return nil
 }
 
-//
 // Request message for the "BeginTransaction" action.
 // Begins a transaction.
 type ActionBeginTransactionRequest struct {
@@ -3564,7 +3498,6 @@ func (*ActionBeginTransactionRequest) Descriptor() ([]byte, []int) {
 	return file_FlightSql_proto_rawDescGZIP(), []int{15}
 }
 
-//
 // Request message for the "BeginSavepoint" action.
 // Creates a savepoint within a transaction.
 //
@@ -3627,7 +3560,6 @@ func (x *ActionBeginSavepointRequest) GetName() string {
 	return ""
 }
 
-//
 // The result of a "BeginTransaction" action.
 //
 // The transaction can be manipulated with the "EndTransaction" action, or
@@ -3683,7 +3615,6 @@ func (x *ActionBeginTransactionResult) GetTransactionId() []byte {
 	return nil
 }
 
-//
 // The result of a "BeginSavepoint" action.
 //
 // The transaction can be manipulated with the "EndSavepoint" action.
@@ -3739,7 +3670,6 @@ func (x *ActionBeginSavepointResult) GetSavepointId() []byte {
 	return nil
 }
 
-//
 // Request message for the "EndTransaction" action.
 //
 // Commit (COMMIT) or rollback (ROLLBACK) the transaction.
@@ -3803,7 +3733,6 @@ func (x *ActionEndTransactionRequest) GetAction() ActionEndTransactionRequest_En
 	return ActionEndTransactionRequest_END_TRANSACTION_UNSPECIFIED
 }
 
-//
 // Request message for the "EndSavepoint" action.
 //
 // Release (RELEASE) the savepoint or rollback (ROLLBACK) to the
@@ -3869,22 +3798,21 @@ func (x *ActionEndSavepointRequest) GetAction() ActionEndSavepointRequest_EndSav
 	return ActionEndSavepointRequest_END_SAVEPOINT_UNSPECIFIED
 }
 
-//
 // Represents a SQL query. Used in the command member of FlightDescriptor
 // for the following RPC calls:
-//  - GetSchema: return the Arrow schema of the query.
-//    Fields on this schema may contain the following metadata:
-//    - ARROW:FLIGHT:SQL:CATALOG_NAME      - Table's catalog name
-//    - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME    - Database schema name
-//    - ARROW:FLIGHT:SQL:TABLE_NAME        - Table name
-//    - ARROW:FLIGHT:SQL:TYPE_NAME         - The data source-specific name for the data type of the column.
-//    - ARROW:FLIGHT:SQL:PRECISION         - Column precision/size
-//    - ARROW:FLIGHT:SQL:SCALE             - Column scale/decimal digits if applicable
-//    - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise.
-//    - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
-//    - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
-//    - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
-//  - GetFlightInfo: execute the query.
+//   - GetSchema: return the Arrow schema of the query.
+//     Fields on this schema may contain the following metadata:
+//   - ARROW:FLIGHT:SQL:CATALOG_NAME      - Table's catalog name
+//   - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME    - Database schema name
+//   - ARROW:FLIGHT:SQL:TABLE_NAME        - Table name
+//   - ARROW:FLIGHT:SQL:TYPE_NAME         - The data source-specific name for the data type of the column.
+//   - ARROW:FLIGHT:SQL:PRECISION         - Column precision/size
+//   - ARROW:FLIGHT:SQL:SCALE             - Column scale/decimal digits if applicable
+//   - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise.
+//   - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
+//   - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
+//   - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
+//   - GetFlightInfo: execute the query.
 type CommandStatementQuery struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
@@ -3942,23 +3870,22 @@ func (x *CommandStatementQuery) GetTransactionId() []byte {
 	return nil
 }
 
-//
 // Represents a Substrait plan. Used in the command member of FlightDescriptor
 // for the following RPC calls:
-//  - GetSchema: return the Arrow schema of the query.
-//    Fields on this schema may contain the following metadata:
-//    - ARROW:FLIGHT:SQL:CATALOG_NAME      - Table's catalog name
-//    - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME    - Database schema name
-//    - ARROW:FLIGHT:SQL:TABLE_NAME        - Table name
-//    - ARROW:FLIGHT:SQL:TYPE_NAME         - The data source-specific name for the data type of the column.
-//    - ARROW:FLIGHT:SQL:PRECISION         - Column precision/size
-//    - ARROW:FLIGHT:SQL:SCALE             - Column scale/decimal digits if applicable
-//    - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise.
-//    - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
-//    - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
-//    - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
-//  - GetFlightInfo: execute the query.
-//  - DoPut: execute the query.
+//   - GetSchema: return the Arrow schema of the query.
+//     Fields on this schema may contain the following metadata:
+//   - ARROW:FLIGHT:SQL:CATALOG_NAME      - Table's catalog name
+//   - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME    - Database schema name
+//   - ARROW:FLIGHT:SQL:TABLE_NAME        - Table name
+//   - ARROW:FLIGHT:SQL:TYPE_NAME         - The data source-specific name for the data type of the column.
+//   - ARROW:FLIGHT:SQL:PRECISION         - Column precision/size
+//   - ARROW:FLIGHT:SQL:SCALE             - Column scale/decimal digits if applicable
+//   - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise.
+//   - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
+//   - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
+//   - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
+//   - GetFlightInfo: execute the query.
+//   - DoPut: execute the query.
 type CommandStatementSubstraitPlan struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
@@ -4016,7 +3943,7 @@ func (x *CommandStatementSubstraitPlan) GetTransactionId() []byte {
 	return nil
 }
 
-//*
+// *
 // Represents a ticket resulting from GetFlightInfo with a CommandStatementQuery.
 // This should be used only once and treated as an opaque value, that is, clients should not attempt to parse this.
 type TicketStatementQuery struct {
@@ -4067,23 +3994,38 @@ func (x *TicketStatementQuery) GetStatementHandle() []byte {
 	return nil
 }
 
-//
 // Represents an instance of executing a prepared statement. Used in the command member of FlightDescriptor for
 // the following RPC calls:
-//  - GetSchema: return the Arrow schema of the query.
-//    Fields on this schema may contain the following metadata:
-//    - ARROW:FLIGHT:SQL:CATALOG_NAME      - Table's catalog name
-//    - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME    - Database schema name
-//    - ARROW:FLIGHT:SQL:TABLE_NAME        - Table name
-//    - ARROW:FLIGHT:SQL:TYPE_NAME         - The data source-specific name for the data type of the column.
-//    - ARROW:FLIGHT:SQL:PRECISION         - Column precision/size
-//    - ARROW:FLIGHT:SQL:SCALE             - Column scale/decimal digits if applicable
-//    - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise.
-//    - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
-//    - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
-//    - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
-//  - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution.
-//  - GetFlightInfo: execute the prepared statement instance.
+//
+//   - GetSchema: return the Arrow schema of the query.
+//     Fields on this schema may contain the following metadata:
+//
+//   - ARROW:FLIGHT:SQL:CATALOG_NAME      - Table's catalog name
+//
+//   - ARROW:FLIGHT:SQL:DB_SCHEMA_NAME    - Database schema name
+//
+//   - ARROW:FLIGHT:SQL:TABLE_NAME        - Table name
+//
+//   - ARROW:FLIGHT:SQL:TYPE_NAME         - The data source-specific name for the data type of the column.
+//
+//   - ARROW:FLIGHT:SQL:PRECISION         - Column precision/size
+//
+//   - ARROW:FLIGHT:SQL:SCALE             - Column scale/decimal digits if applicable
+//
+//   - ARROW:FLIGHT:SQL:IS_AUTO_INCREMENT - "1" indicates if the column is auto incremented, "0" otherwise.
+//
+//   - ARROW:FLIGHT:SQL:IS_CASE_SENSITIVE - "1" indicates if the column is case-sensitive, "0" otherwise.
+//
+//   - ARROW:FLIGHT:SQL:IS_READ_ONLY      - "1" indicates if the column is read only, "0" otherwise.
+//
+//   - ARROW:FLIGHT:SQL:IS_SEARCHABLE     - "1" indicates if the column is searchable via WHERE clause, "0" otherwise.
+//
+//     If the schema is retrieved after parameter values have been bound with DoPut, then the server should account
+//     for the parameters when determining the schema.
+//
+//   - DoPut: bind parameter values. All of the bound parameter sets will be executed as a single atomic execution.
+//
+//   - GetFlightInfo: execute the prepared statement instance.
 type CommandPreparedStatementQuery struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
@@ -4132,7 +4074,6 @@ func (x *CommandPreparedStatementQuery) GetPreparedStatementHandle() []byte {
 	return nil
 }
 
-//
 // Represents a SQL update query. Used in the command member of FlightDescriptor
 // for the RPC call DoPut to cause the server to execute the included SQL update.
 type CommandStatementUpdate struct {
@@ -4192,7 +4133,6 @@ func (x *CommandStatementUpdate) GetTransactionId() []byte {
 	return nil
 }
 
-//
 // Represents a SQL update query. Used in the command member of FlightDescriptor
 // for the RPC call DoPut to cause the server to execute the included
 // prepared statement handle as an update.
@@ -4244,7 +4184,6 @@ func (x *CommandPreparedStatementUpdate) GetPreparedStatementHandle() []byte {
 	return nil
 }
 
-//
 // Returned from the RPC call DoPut when a CommandStatementUpdate
 // CommandPreparedStatementUpdate was in the request, containing
 // results from the update.
@@ -4297,7 +4236,6 @@ func (x *DoPutUpdateResult) GetRecordCount() int64 {
 	return 0
 }
 
-//
 // Request message for the "CancelQuery" action.
 //
 // Explicitly cancel a running query.
@@ -4314,7 +4252,7 @@ func (x *DoPutUpdateResult) GetRecordCount() int64 {
 // This command is deprecated since 13.0.0. Use the "CancelFlightInfo"
 // action with DoAction instead.
 //
-// Deprecated: Do not use.
+// Deprecated: Marked as deprecated in FlightSql.proto.
 type ActionCancelQueryRequest struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
@@ -4366,7 +4304,6 @@ func (x *ActionCancelQueryRequest) GetInfo() []byte {
 	return nil
 }
 
-//
 // The result of cancelling a query.
 //
 // The result should be wrapped in a google.protobuf.Any message.
@@ -4374,7 +4311,7 @@ func (x *ActionCancelQueryRequest) GetInfo() []byte {
 // This command is deprecated since 13.0.0. Use the "CancelFlightInfo"
 // action with DoAction instead.
 //
-// Deprecated: Do not use.
+// Deprecated: Marked as deprecated in FlightSql.proto.
 type ActionCancelQueryResult struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
@@ -4676,7 +4613,7 @@ var file_FlightSql_proto_rawDesc = []byte{
 	0x22, 0x35, 0x0a, 0x18, 0x41, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c,
 	0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04,
 	0x69, 0x6e, 0x66, 0x6f, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x69, 0x6e, 0x66, 0x6f,
-	0x3a, 0x05, 0x18, 0x01, 0xc0, 0x3e, 0x01, 0x22, 0x87, 0x02, 0x0a, 0x17, 0x41, 0x63, 0x74, 0x69,
+	0x3a, 0x05, 0xc0, 0x3e, 0x01, 0x18, 0x01, 0x22, 0x87, 0x02, 0x0a, 0x17, 0x41, 0x63, 0x74, 0x69,
 	0x6f, 0x6e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x51, 0x75, 0x65, 0x72, 0x79, 0x52, 0x65, 0x73,
 	0x75, 0x6c, 0x74, 0x12, 0x57, 0x0a, 0x06, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x18, 0x01, 0x20,
 	0x01, 0x28, 0x0e, 0x32, 0x3f, 0x2e, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x2e, 0x66, 0x6c, 0x69, 0x67,
@@ -4692,7 +4629,7 @@ var file_FlightSql_proto_rawDesc = []byte{
 	0x43, 0x45, 0x4c, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x43, 0x41, 0x4e, 0x43, 0x45,
 	0x4c, 0x4c, 0x49, 0x4e, 0x47, 0x10, 0x02, 0x12, 0x21, 0x0a, 0x1d, 0x43, 0x41, 0x4e, 0x43, 0x45,
 	0x4c, 0x5f, 0x52, 0x45, 0x53, 0x55, 0x4c, 0x54, 0x5f, 0x4e, 0x4f, 0x54, 0x5f, 0x43, 0x41, 0x4e,
-	0x43, 0x45, 0x4c, 0x4c, 0x41, 0x42, 0x4c, 0x45, 0x10, 0x03, 0x3a, 0x05, 0x18, 0x01, 0xc0, 0x3e,
+	0x43, 0x45, 0x4c, 0x4c, 0x41, 0x42, 0x4c, 0x45, 0x10, 0x03, 0x3a, 0x05, 0xc0, 0x3e, 0x01, 0x18,
 	0x01, 0x2a, 0xb7, 0x18, 0x0a, 0x07, 0x53, 0x71, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x12, 0x1a, 0x0a,
 	0x16, 0x46, 0x4c, 0x49, 0x47, 0x48, 0x54, 0x5f, 0x53, 0x51, 0x4c, 0x5f, 0x53, 0x45, 0x52, 0x56,
 	0x45, 0x52, 0x5f, 0x4e, 0x41, 0x4d, 0x45, 0x10, 0x00, 0x12, 0x1d, 0x0a, 0x19, 0x46, 0x4c, 0x49,
diff --git a/go/arrow/flight/gen/flight/Flight_grpc.pb.go b/go/arrow/flight/gen/flight/Flight_grpc.pb.go
index 87d9abc5926eb..237cb1fe2dfb5 100644
--- a/go/arrow/flight/gen/flight/Flight_grpc.pb.go
+++ b/go/arrow/flight/gen/flight/Flight_grpc.pb.go
@@ -1,7 +1,7 @@
 // Code generated by protoc-gen-go-grpc. DO NOT EDIT.
 // versions:
 // - protoc-gen-go-grpc v1.2.0
-// - protoc             v4.23.4
+// - protoc             v4.25.2
 // source: Flight.proto
 
 package flight
@@ -22,13 +22,11 @@ const _ = grpc.SupportPackageIsVersion7
 //
 // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
 type FlightServiceClient interface {
-	//
 	// Handshake between client and server. Depending on the server, the
 	// handshake may be required to determine the token that should be used for
 	// future operations. Both request and response are streams to allow multiple
 	// round-trips depending on auth mechanism.
 	Handshake(ctx context.Context, opts ...grpc.CallOption) (FlightService_HandshakeClient, error)
-	//
 	// Get a list of available streams given a particular criteria. Most flight
 	// services will expose one or more streams that are readily available for
 	// retrieval. This api allows listing the streams available for
@@ -36,7 +34,6 @@ type FlightServiceClient interface {
 	// the subset of streams that can be listed via this interface. Each flight
 	// service allows its own definition of how to consume criteria.
 	ListFlights(ctx context.Context, in *Criteria, opts ...grpc.CallOption) (FlightService_ListFlightsClient, error)
-	//
 	// For a given FlightDescriptor, get information about how the flight can be
 	// consumed. This is a useful interface if the consumer of the interface
 	// already can identify the specific flight to consume. This interface can
@@ -48,7 +45,6 @@ type FlightServiceClient interface {
 	// available for consumption for the duration defined by the specific flight
 	// service.
 	GetFlightInfo(ctx context.Context, in *FlightDescriptor, opts ...grpc.CallOption) (*FlightInfo, error)
-	//
 	// For a given FlightDescriptor, start a query and get information
 	// to poll its execution status. This is a useful interface if the
 	// query may be a long-running query. The first PollFlightInfo call
@@ -72,19 +68,16 @@ type FlightServiceClient interface {
 	// A client may use the CancelFlightInfo action with
 	// PollInfo.info to cancel the running query.
 	PollFlightInfo(ctx context.Context, in *FlightDescriptor, opts ...grpc.CallOption) (*PollInfo, error)
-	//
 	// For a given FlightDescriptor, get the Schema as described in Schema.fbs::Schema
 	// This is used when a consumer needs the Schema of flight stream. Similar to
 	// GetFlightInfo this interface may generate a new flight that was not previously
 	// available in ListFlights.
 	GetSchema(ctx context.Context, in *FlightDescriptor, opts ...grpc.CallOption) (*SchemaResult, error)
-	//
 	// Retrieve a single stream associated with a particular descriptor
 	// associated with the referenced ticket. A Flight can be composed of one or
 	// more streams where each stream can be retrieved using a separate opaque
 	// ticket that the flight service uses for managing a collection of streams.
 	DoGet(ctx context.Context, in *Ticket, opts ...grpc.CallOption) (FlightService_DoGetClient, error)
-	//
 	// Push a stream to the flight service associated with a particular
 	// flight stream. This allows a client of a flight service to upload a stream
 	// of data. Depending on the particular flight service, a client consumer
@@ -92,14 +85,12 @@ type FlightServiceClient interface {
 	// number. In the latter, the service might implement a 'seal' action that
 	// can be applied to a descriptor once all streams are uploaded.
 	DoPut(ctx context.Context, opts ...grpc.CallOption) (FlightService_DoPutClient, error)
-	//
 	// Open a bidirectional data channel for a given descriptor. This
 	// allows clients to send and receive arbitrary Arrow data and
 	// application-specific metadata in a single logical stream. In
 	// contrast to DoGet/DoPut, this is more suited for clients
 	// offloading computation (rather than storage) to a Flight service.
 	DoExchange(ctx context.Context, opts ...grpc.CallOption) (FlightService_DoExchangeClient, error)
-	//
 	// Flight services can support an arbitrary number of simple actions in
 	// addition to the possible ListFlights, GetFlightInfo, DoGet, DoPut
 	// operations that are potentially available. DoAction allows a flight client
@@ -107,7 +98,6 @@ type FlightServiceClient interface {
 	// opaque request and response objects that are specific to the type action
 	// being undertaken.
 	DoAction(ctx context.Context, in *Action, opts ...grpc.CallOption) (FlightService_DoActionClient, error)
-	//
 	// A flight service exposes all of the available action types that it has
 	// along with descriptions. This allows different flight consumers to
 	// understand the capabilities of the flight service.
@@ -374,13 +364,11 @@ func (x *flightServiceListActionsClient) Recv() (*ActionType, error) {
 // All implementations must embed UnimplementedFlightServiceServer
 // for forward compatibility
 type FlightServiceServer interface {
-	//
 	// Handshake between client and server. Depending on the server, the
 	// handshake may be required to determine the token that should be used for
 	// future operations. Both request and response are streams to allow multiple
 	// round-trips depending on auth mechanism.
 	Handshake(FlightService_HandshakeServer) error
-	//
 	// Get a list of available streams given a particular criteria. Most flight
 	// services will expose one or more streams that are readily available for
 	// retrieval. This api allows listing the streams available for
@@ -388,7 +376,6 @@ type FlightServiceServer interface {
 	// the subset of streams that can be listed via this interface. Each flight
 	// service allows its own definition of how to consume criteria.
 	ListFlights(*Criteria, FlightService_ListFlightsServer) error
-	//
 	// For a given FlightDescriptor, get information about how the flight can be
 	// consumed. This is a useful interface if the consumer of the interface
 	// already can identify the specific flight to consume. This interface can
@@ -400,7 +387,6 @@ type FlightServiceServer interface {
 	// available for consumption for the duration defined by the specific flight
 	// service.
 	GetFlightInfo(context.Context, *FlightDescriptor) (*FlightInfo, error)
-	//
 	// For a given FlightDescriptor, start a query and get information
 	// to poll its execution status. This is a useful interface if the
 	// query may be a long-running query. The first PollFlightInfo call
@@ -424,19 +410,16 @@ type FlightServiceServer interface {
 	// A client may use the CancelFlightInfo action with
 	// PollInfo.info to cancel the running query.
 	PollFlightInfo(context.Context, *FlightDescriptor) (*PollInfo, error)
-	//
 	// For a given FlightDescriptor, get the Schema as described in Schema.fbs::Schema
 	// This is used when a consumer needs the Schema of flight stream. Similar to
 	// GetFlightInfo this interface may generate a new flight that was not previously
 	// available in ListFlights.
 	GetSchema(context.Context, *FlightDescriptor) (*SchemaResult, error)
-	//
 	// Retrieve a single stream associated with a particular descriptor
 	// associated with the referenced ticket. A Flight can be composed of one or
 	// more streams where each stream can be retrieved using a separate opaque
 	// ticket that the flight service uses for managing a collection of streams.
 	DoGet(*Ticket, FlightService_DoGetServer) error
-	//
 	// Push a stream to the flight service associated with a particular
 	// flight stream. This allows a client of a flight service to upload a stream
 	// of data. Depending on the particular flight service, a client consumer
@@ -444,14 +427,12 @@ type FlightServiceServer interface {
 	// number. In the latter, the service might implement a 'seal' action that
 	// can be applied to a descriptor once all streams are uploaded.
 	DoPut(FlightService_DoPutServer) error
-	//
 	// Open a bidirectional data channel for a given descriptor. This
 	// allows clients to send and receive arbitrary Arrow data and
 	// application-specific metadata in a single logical stream. In
 	// contrast to DoGet/DoPut, this is more suited for clients
 	// offloading computation (rather than storage) to a Flight service.
 	DoExchange(FlightService_DoExchangeServer) error
-	//
 	// Flight services can support an arbitrary number of simple actions in
 	// addition to the possible ListFlights, GetFlightInfo, DoGet, DoPut
 	// operations that are potentially available. DoAction allows a flight client
@@ -459,7 +440,6 @@ type FlightServiceServer interface {
 	// opaque request and response objects that are specific to the type action
 	// being undertaken.
 	DoAction(*Action, FlightService_DoActionServer) error
-	//
 	// A flight service exposes all of the available action types that it has
 	// along with descriptions. This allows different flight consumers to
 	// understand the capabilities of the flight service.
diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go
index c70aceabcfe8e..2df52434a2a32 100644
--- a/go/arrow/flight/server.go
+++ b/go/arrow/flight/server.go
@@ -18,6 +18,7 @@ package flight
 
 import (
 	"context"
+	"fmt"
 	"net"
 	"os"
 	"os/signal"
@@ -54,6 +55,14 @@ type (
 	Result                          = flight.Result
 	CancelFlightInfoResult          = flight.CancelFlightInfoResult
 	CancelStatus                    = flight.CancelStatus
+	SessionOptionValue              = flight.SessionOptionValue
+	SetSessionOptionsRequest        = flight.SetSessionOptionsRequest
+	SetSessionOptionsResult         = flight.SetSessionOptionsResult
+	SetSessionOptionsResultError    = flight.SetSessionOptionsResult_Error
+	GetSessionOptionsRequest        = flight.GetSessionOptionsRequest
+	GetSessionOptionsResult         = flight.GetSessionOptionsResult
+	CloseSessionRequest             = flight.CloseSessionRequest
+	CloseSessionResult              = flight.CloseSessionResult
 	Empty                           = flight.Empty
 )
 
@@ -61,8 +70,75 @@ type (
 const (
 	CancelFlightInfoActionType    = "CancelFlightInfo"
 	RenewFlightEndpointActionType = "RenewFlightEndpoint"
+	SetSessionOptionsActionType   = "SetSessionOptions"
+	GetSessionOptionsActionType   = "GetSessionOptions"
+	CloseSessionActionType        = "CloseSession"
 )
 
+const (
+	// The set option error is unknown. Servers should avoid
+	// using this value (send a NOT_FOUND error if the requested
+	// FlightInfo is not known). Clients can retry the request.
+	SetSessionOptionsResultErrorUnspecified = flight.SetSessionOptionsResult_UNSPECIFIED
+	// The given session option name is invalid.
+	SetSessionOptionsResultErrorInvalidName = flight.SetSessionOptionsResult_INVALID_NAME
+	// The session option value or type is invalid.
+	SetSessionOptionsResultErrorInvalidValue = flight.SetSessionOptionsResult_INVALID_VALUE
+	// The session option cannot be set.
+	SetSessionOptionsResultErrorError = flight.SetSessionOptionsResult_ERROR
+)
+
+const (
+	// The close session status is unknown. Servers should avoid
+	// using this value (send a NOT_FOUND error if the requested
+	// FlightInfo is not known). Clients can retry the request.
+	CloseSessionResultUnspecified = flight.CloseSessionResult_UNSPECIFIED
+	// The session close request is complete.
+	CloseSessionResultClosed = flight.CloseSessionResult_CLOSED
+	// The session close request is in progress. The client may retry the request.
+	CloseSessionResultClosing = flight.CloseSessionResult_CLOSING
+	// The session is not closeable.
+	CloseSessionResultNotCloseable = flight.CloseSessionResult_NOT_CLOSEABLE
+)
+
+// NewSessionOptionValues returns a map with the same keys as the input map, but with all values converted
+// to SessionOptionValues. If any values fail conversion, an error will be returned.
+func NewSessionOptionValues(options map[string]any) (map[string]*flight.SessionOptionValue, error) {
+	sessionOptions := make(map[string]*flight.SessionOptionValue, len(options))
+	for key, val := range options {
+		optval, err := NewSessionOptionValue(val)
+		if err != nil {
+			return nil, err
+		}
+		sessionOptions[key] = &optval
+	}
+
+	return sessionOptions, nil
+}
+
+// NewSessionOptionValue takes any value and constructs a SessionOptionValue suitable for setting session values.
+// An error will be returned if the value is not one of the types supported by SessionOptionValue.
+func NewSessionOptionValue(value any) (flight.SessionOptionValue, error) {
+	if value == nil {
+		return flight.SessionOptionValue{}, nil
+	}
+
+	switch val := value.(type) {
+	case string:
+		return flight.SessionOptionValue{OptionValue: &flight.SessionOptionValue_StringValue{StringValue: val}}, nil
+	case bool:
+		return flight.SessionOptionValue{OptionValue: &flight.SessionOptionValue_BoolValue{BoolValue: val}}, nil
+	case int64:
+		return flight.SessionOptionValue{OptionValue: &flight.SessionOptionValue_Int64Value{Int64Value: val}}, nil
+	case float64:
+		return flight.SessionOptionValue{OptionValue: &flight.SessionOptionValue_DoubleValue{DoubleValue: val}}, nil
+	case []string:
+		return flight.SessionOptionValue{OptionValue: &flight.SessionOptionValue_StringListValue_{StringListValue: &flight.SessionOptionValue_StringListValue{Values: val}}}, nil
+	default:
+		return flight.SessionOptionValue{}, fmt.Errorf("invalid option type %[1]T for value %[1]v", val)
+	}
+}
+
 // Constants for CancelStatus
 const (
 	// The cancellation status is unknown. Servers should avoid
diff --git a/go/arrow/flight/session/cookies.go b/go/arrow/flight/session/cookies.go
new file mode 100644
index 0000000000000..85dc5d10941ab
--- /dev/null
+++ b/go/arrow/flight/session/cookies.go
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package session
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+
+	"google.golang.org/grpc/metadata"
+)
+
+func GetIncomingCookieByName(ctx context.Context, name string) (http.Cookie, error) {
+	md, ok := metadata.FromIncomingContext(ctx)
+	if !ok {
+		return http.Cookie{}, fmt.Errorf("no metadata found for incoming context")
+	}
+
+	header := make(http.Header, md.Len())
+	for k, v := range md {
+		for _, val := range v {
+			header.Add(k, val)
+		}
+	}
+
+	cookie, err := (&http.Request{Header: header}).Cookie(name)
+	if err != nil {
+		return http.Cookie{}, err
+	}
+
+	if cookie == nil {
+		return http.Cookie{}, fmt.Errorf("failed to get cookie with name: %s", name)
+	}
+
+	return *cookie, nil
+}
+
+func CreateCookieForSession(session ServerSession) (http.Cookie, error) {
+	var key string
+
+	if session == nil {
+		return http.Cookie{}, ErrNoSession
+	}
+
+	switch s := session.(type) {
+	case *statefulServerSession:
+		key = StatefulSessionCookieName
+	case *statelessServerSession:
+		key = StatelessSessionCookieName
+	default:
+		return http.Cookie{}, fmt.Errorf("cannot serialize session of type %T as cookie", s)
+	}
+
+	// Reuse the std http lib functionality for constructing cookies
+	cookie, err := (&http.Request{
+		Header: http.Header{"Cookie": []string{fmt.Sprintf("%s=%s", key, session.Token())}},
+	}).Cookie(key)
+	if err != nil {
+		return http.Cookie{}, err
+	}
+	if cookie == nil {
+		return http.Cookie{}, fmt.Errorf("failed to construct cookie for session: %s", session.Token())
+	}
+
+	return *cookie, nil
+}
diff --git a/go/arrow/flight/session/example_session_test.go b/go/arrow/flight/session/example_session_test.go
new file mode 100644
index 0000000000000..705a0b792ef97
--- /dev/null
+++ b/go/arrow/flight/session/example_session_test.go
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package session_test
+
+import (
+	"log"
+
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v16/arrow/flight/session"
+	"github.com/google/uuid"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials/insecure"
+)
+
+func Example_defaultMiddleware() {
+	// Setup server with default session middleware
+	middleware := session.NewServerSessionMiddleware(nil)
+	srv := flight.NewServerWithMiddleware([]flight.ServerMiddleware{
+		flight.CreateServerMiddleware(middleware),
+	})
+	srv.RegisterFlightService(flightsql.NewFlightServer(&flightsql.BaseServer{}))
+	srv.Init("localhost:0")
+
+	go srv.Serve()
+	defer srv.Shutdown()
+
+	// Client will require cookie middleware in order to handle cookie-based server sessions
+	client, err := flightsql.NewClient(
+		srv.Addr().String(),
+		nil,
+		[]flight.ClientMiddleware{
+			flight.NewClientCookieMiddleware(),
+		},
+		grpc.WithTransportCredentials(insecure.NewCredentials()),
+	)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer client.Close()
+
+}
+
+func Example_customStatefulMiddleware() {
+	// Generate IDs for new sessions using provided function
+	factory := session.NewSessionFactory(uuid.NewString)
+
+	// Create a SessionStore to persist sessions.
+	// In-memory store is default; you may provide your own implementation.
+	store := session.NewSessionStore()
+
+	// Construct the middleware with the custom manager.
+	manager := session.NewStatefulServerSessionManager(session.WithFactory(factory), session.WithStore(store))
+	middleware := session.NewServerSessionMiddleware(manager)
+	_ = middleware // ... remaining setup is the same as DefaultMiddleware example
+}
+
+func Example_statelessMiddleware() {
+	// Construct the middleware with the stateless manager.
+	manager := session.NewStatelessServerSessionManager()
+	middleware := session.NewServerSessionMiddleware(manager)
+	_ = middleware // ... remaining setup is the same as DefaultMiddleware example
+}
diff --git a/go/arrow/flight/session/session.go b/go/arrow/flight/session/session.go
new file mode 100644
index 0000000000000..598c393ecea9d
--- /dev/null
+++ b/go/arrow/flight/session/session.go
@@ -0,0 +1,240 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package session provides server middleware and reference implementations for Flight session management.
+//
+// For more details on the Flight Session Specification, see:
+// https://arrow.apache.org/docs/format/FlightSql.html#flight-server-session-management
+//
+// [NewServerSessionMiddleware] manages sessions using cookies, so any client would need its own
+// middleware/support for storing and sending those cookies. The cookies may be stateful or stateless:
+//
+//   - [NewStatefulServerSessionManager] implements stateful cookies.
+//
+//   - [NewStatelessServerSessionManager] implements stateless cookies.
+//
+// See details of either implementation for caveats and recommended usage scenarios.
+package session
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net/http"
+	"sync"
+
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/metadata"
+	"google.golang.org/protobuf/proto"
+)
+
+var ErrNoSession error = errors.New("flight: server session not present")
+
+type sessionMiddlewareKey struct{}
+
+// NewSessionContex returns a copy of the provided context containing the provided ServerSession
+func NewSessionContext(ctx context.Context, session ServerSession) context.Context {
+	return context.WithValue(ctx, sessionMiddlewareKey{}, session)
+}
+
+// GetSessionFromContext retrieves the ServerSession from the provided context if it exists.
+// An error indicates that the session was not found in the context.
+func GetSessionFromContext(ctx context.Context) (ServerSession, error) {
+	session, ok := ctx.Value(sessionMiddlewareKey{}).(ServerSession)
+	if !ok {
+		return nil, ErrNoSession
+	}
+	return session, nil
+}
+
+// ServerSession is a container for named SessionOptionValues
+type ServerSession interface {
+	// An identifier for the session that the server can use to reconstruct
+	// the session state on future requests. It is the responsibility of
+	// each implementation to define the token's semantics.
+	Token() string
+	// Get session option value by name, or nil if it does not exist
+	GetSessionOption(name string) *flight.SessionOptionValue
+	// Get a copy of the session options
+	GetSessionOptions() map[string]*flight.SessionOptionValue
+	// Set session option by name to given value
+	SetSessionOption(name string, value *flight.SessionOptionValue)
+	// Idempotently remove name from this session
+	EraseSessionOption(name string)
+	// Close the session
+	Close() error
+	// Report whether the session has been closed
+	Closed() bool
+}
+
+// ServerSessionManager handles session lifecycle management
+type ServerSessionManager interface {
+	// Create a new, empty ServerSession
+	CreateSession(ctx context.Context) (ServerSession, error)
+	// Get the current ServerSession, if one exists
+	GetSession(ctx context.Context) (ServerSession, error)
+	// Cleanup any resources associated with the current ServerSession
+	CloseSession(session ServerSession) error
+}
+
+// Implementation of common session behavior. Intended to be extended
+// by specific session implementations.
+type serverSession struct {
+	closed bool
+
+	options map[string]*flight.SessionOptionValue
+	mu      sync.RWMutex
+}
+
+func (session *serverSession) GetSessionOption(name string) *flight.SessionOptionValue {
+	session.mu.RLock()
+	defer session.mu.RUnlock()
+	value, found := session.options[name]
+	if !found {
+		return nil
+	}
+
+	return value
+}
+
+func (session *serverSession) GetSessionOptions() map[string]*flight.SessionOptionValue {
+	options := make(map[string]*flight.SessionOptionValue, len(session.options))
+
+	session.mu.RLock()
+	defer session.mu.RUnlock()
+	for k, v := range session.options {
+		options[k] = proto.Clone(v).(*flight.SessionOptionValue)
+	}
+
+	return options
+}
+
+func (session *serverSession) SetSessionOption(name string, value *flight.SessionOptionValue) {
+	if value.GetOptionValue() == nil {
+		session.EraseSessionOption(name)
+		return
+	}
+
+	session.mu.Lock()
+	defer session.mu.Unlock()
+	session.options[name] = value
+}
+
+func (session *serverSession) EraseSessionOption(name string) {
+	session.mu.Lock()
+	defer session.mu.Unlock()
+	delete(session.options, name)
+}
+
+func (session *serverSession) Close() error {
+	session.options = nil
+	session.closed = true
+	return nil
+}
+
+func (session *serverSession) Closed() bool {
+	return session.closed
+}
+
+// NewServerSessionMiddleware creates new instance of CustomServerMiddleware implementing server session persistence.
+//
+// The provided manager can be used to customize session implementation/behavior.
+// If no manager is provided, a stateful in-memory, goroutine-safe implementation is used.
+func NewServerSessionMiddleware(manager ServerSessionManager) *serverSessionMiddleware {
+	// Default manager
+	if manager == nil {
+		manager = NewStatefulServerSessionManager()
+	}
+	return &serverSessionMiddleware{manager: manager}
+}
+
+type serverSessionMiddleware struct {
+	manager ServerSessionManager
+}
+
+// Get the existing session if one is found, otherwise create one. The resulting context will contain
+// the session at a well-known key for any internal RPC methods to read/update.
+func (middleware *serverSessionMiddleware) StartCall(ctx context.Context) context.Context {
+	session, err := middleware.manager.GetSession(ctx)
+	if err == nil {
+		return NewSessionContext(ctx, session)
+	}
+
+	if err != ErrNoSession {
+		panic(err)
+	}
+
+	session, err = middleware.manager.CreateSession(ctx)
+	if err != nil {
+		panic(err)
+	}
+
+	// TODO(joellubi): Remove this once Java clients support receiving cookies in gRPC trailer.
+	// Currently, both C++ and Go client cookie middlewares merge the header and trailer when setting cookies.
+	// Java middleware checks the metadata in the header, but only reads the trailer when there is an error.
+	// It is far simpler to only set cookies in the trailer, especially for streaming RPC.
+	sessionCookie, err := CreateCookieForSession(session)
+	if err != nil {
+		panic(err)
+	}
+	grpc.SetHeader(ctx, metadata.Pairs("Set-Cookie", sessionCookie.String()))
+
+	return NewSessionContext(ctx, session)
+}
+
+// Determine if the session state has changed. If it has then we need to inform the client
+// with a new cookie. The cookie is sent in the gRPC trailer because we would like to
+// determine its contents based on the final state the session at the end of the RPC call.
+func (middleware *serverSessionMiddleware) CallCompleted(ctx context.Context, _ error) {
+	session, err := middleware.manager.GetSession(ctx)
+	if err != nil {
+		panic(fmt.Sprintf("failed to get server session: %s", err))
+	}
+
+	sessionCookie, err := CreateCookieForSession(session)
+	if err != nil {
+		panic(err)
+	}
+
+	clientCookie, err := GetIncomingCookieByName(ctx, sessionCookie.Name)
+	if err == http.ErrNoCookie {
+		grpc.SetTrailer(ctx, metadata.Pairs("Set-Cookie", sessionCookie.String()))
+		return
+	}
+
+	if err != nil {
+		panic(err)
+	}
+
+	if session.Closed() {
+		// Invalidate the client's cookie
+		clientCookie.MaxAge = -1
+		grpc.SetTrailer(ctx, metadata.Pairs("Set-Cookie", clientCookie.String()))
+
+		if err = middleware.manager.CloseSession(session); err != nil {
+			panic(fmt.Sprintf("failed to close server session: %s", err))
+		}
+		return
+	}
+
+	if sessionCookie.String() != clientCookie.String() {
+		grpc.SetTrailer(ctx, metadata.Pairs("Set-Cookie", sessionCookie.String()))
+	}
+
+	// If the resulting cookie is exactly the same as the
+	// client's cookie, then there's no need to send it at all.
+}
diff --git a/go/arrow/flight/session/stateful_session.go b/go/arrow/flight/session/stateful_session.go
new file mode 100644
index 0000000000000..5e3d9c72f5e5c
--- /dev/null
+++ b/go/arrow/flight/session/stateful_session.go
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package session
+
+import (
+	"context"
+	"fmt"
+	"net/http"
+	"sync"
+
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"github.com/google/uuid"
+)
+
+const StatefulSessionCookieName string = "arrow_flight_session_id"
+
+// SessionStore handles persistence of ServerSession instances for
+// stateful session implementations.
+type SessionStore interface {
+	// Get the session with the provided ID
+	Get(id string) (ServerSession, error)
+	// Persist the provided session
+	Put(session ServerSession) error
+	// Remove the session with the provided ID
+	Remove(id string) error
+}
+
+// SessionFactory creates ServerSession instances
+type SessionFactory interface {
+	// Create a new, empty ServerSession
+	CreateSession() (ServerSession, error)
+}
+
+// NewSessionStore creates a simple in-memory, goroutine-safe SessionStore
+func NewSessionStore() *sessionStore {
+	return &sessionStore{sessions: make(map[string]ServerSession)}
+}
+
+type sessionStore struct {
+	sessions map[string]ServerSession
+	mu       sync.RWMutex
+}
+
+func (store *sessionStore) Get(id string) (ServerSession, error) {
+	store.mu.RLock()
+	defer store.mu.RUnlock()
+	session, found := store.sessions[id]
+	if !found {
+		return nil, fmt.Errorf("session with ID %s not found", id)
+	}
+	return session, nil
+}
+
+func (store *sessionStore) Put(session ServerSession) error {
+	store.mu.Lock()
+	defer store.mu.Unlock()
+	store.sessions[session.Token()] = session
+	return nil
+}
+
+func (store *sessionStore) Remove(id string) error {
+	store.mu.Lock()
+	defer store.mu.Unlock()
+	delete(store.sessions, id)
+
+	return nil
+}
+
+// NewSessionFactory creates a new SessionFactory, producing in-memory, goroutine-safe ServerSessions.
+// The provided function MUST produce collision-free identifiers.
+func NewSessionFactory(generateID func() string) *sessionFactory {
+	return &sessionFactory{generateID: generateID}
+}
+
+type sessionFactory struct {
+	generateID func() string
+}
+
+func (factory *sessionFactory) CreateSession() (ServerSession, error) {
+	return &statefulServerSession{
+		id:            factory.generateID(),
+		serverSession: serverSession{options: make(map[string]*flight.SessionOptionValue)},
+	}, nil
+}
+
+type statefulServerSession struct {
+	serverSession
+	id string
+}
+
+func (session *statefulServerSession) Token() string {
+	return session.id
+}
+
+type StatefulSessionManagerOption func(*statefulServerSessionManager)
+
+// WithFactory specifies the SessionFactory to use for session creation
+func WithFactory(factory SessionFactory) StatefulSessionManagerOption {
+	return func(manager *statefulServerSessionManager) {
+		manager.factory = factory
+	}
+}
+
+// WithStore specifies the SessionStore to use for session persistence
+func WithStore(store SessionStore) StatefulSessionManagerOption {
+	return func(manager *statefulServerSessionManager) {
+		manager.store = store
+	}
+}
+
+// NewStatefulServerSessionManager creates a new ServerSessionManager.
+//
+//   - If unset via options, the default factory produces sessions with UUIDs.
+//   - If unset via options, sessions are stored in-memory.
+func NewStatefulServerSessionManager(opts ...StatefulSessionManagerOption) *statefulServerSessionManager {
+	manager := &statefulServerSessionManager{}
+	for _, opt := range opts {
+		opt(manager)
+	}
+
+	// Set defaults if not specified above
+	if manager.factory == nil {
+		manager.factory = NewSessionFactory(uuid.NewString)
+	}
+
+	if manager.store == nil {
+		manager.store = NewSessionStore()
+	}
+
+	return manager
+}
+
+type statefulServerSessionManager struct {
+	factory SessionFactory
+	store   SessionStore
+}
+
+func (manager *statefulServerSessionManager) CreateSession(ctx context.Context) (ServerSession, error) {
+	session, err := manager.factory.CreateSession()
+	if err != nil {
+		return nil, fmt.Errorf("failed to create new session: %w", err)
+	}
+
+	if err = manager.store.Put(session); err != nil {
+		return nil, fmt.Errorf("failed to persist new session: %w", err)
+	}
+
+	return session, nil
+}
+
+func (manager *statefulServerSessionManager) GetSession(ctx context.Context) (ServerSession, error) {
+	session, err := GetSessionFromContext(ctx)
+	if err == nil {
+		return session, nil
+	}
+
+	sessionID, err := getSessionIDFromIncomingCookie(ctx)
+	if err == nil {
+		return manager.store.Get(sessionID)
+	}
+	if err == http.ErrNoCookie {
+		return nil, ErrNoSession
+	}
+
+	return nil, fmt.Errorf("failed to get current session from cookie: %w", err)
+}
+
+func (manager *statefulServerSessionManager) CloseSession(session ServerSession) error {
+	if err := manager.store.Remove(session.Token()); err != nil {
+		return fmt.Errorf("failed to remove server session from store: %w", err)
+	}
+	return nil
+}
+
+// Check the provided context for cookies in the incoming gRPC metadata.
+func getSessionIDFromIncomingCookie(ctx context.Context) (string, error) {
+	cookie, err := GetIncomingCookieByName(ctx, StatefulSessionCookieName)
+	if err != nil {
+		return "", err
+	}
+
+	return cookie.Value, nil
+}
diff --git a/go/arrow/flight/session/stateless_session.go b/go/arrow/flight/session/stateless_session.go
new file mode 100644
index 0000000000000..b57d78230a8e8
--- /dev/null
+++ b/go/arrow/flight/session/stateless_session.go
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package session
+
+import (
+	"context"
+	"encoding/base64"
+	"fmt"
+	"net/http"
+
+	"github.com/apache/arrow/go/v16/arrow/flight"
+	"google.golang.org/protobuf/proto"
+)
+
+const StatelessSessionCookieName string = "arrow_flight_session"
+
+// NewStatelessServerSessionManager creates a new StatelessServerSessionManager.
+//
+// The tokens it produces contain the entire session state, so sessions can
+// be maintained across multiple backends.
+// Token contents are considered opaque but are NOT encrypted.
+func NewStatelessServerSessionManager() *statelessServerSessionManager {
+	return &statelessServerSessionManager{}
+}
+
+type statelessServerSessionManager struct{}
+
+func (manager *statelessServerSessionManager) CreateSession(ctx context.Context) (ServerSession, error) {
+	return NewStatelessServerSession(nil), nil
+}
+
+func (manager *statelessServerSessionManager) GetSession(ctx context.Context) (ServerSession, error) {
+	session, err := GetSessionFromContext(ctx)
+	if err == nil {
+		return session, nil
+	}
+
+	session, err = getSessionFromIncomingCookie(ctx)
+	if err == nil {
+		return session, err
+	}
+	if err == http.ErrNoCookie {
+		return nil, ErrNoSession
+	}
+
+	return nil, fmt.Errorf("failed to get current session from cookie: %w", err)
+}
+
+func (manager *statelessServerSessionManager) CloseSession(session ServerSession) error {
+	return nil
+}
+
+// NewStatelessServerSession creates a new instance of a server session that can serialize its entire state.
+// A map is provided containing the initial state. If it is nil, a new empty state will be created.
+func NewStatelessServerSession(options map[string]*flight.SessionOptionValue) *statelessServerSession {
+	if options == nil {
+		options = make(map[string]*flight.SessionOptionValue)
+	}
+
+	return &statelessServerSession{
+		serverSession: serverSession{options: options},
+	}
+}
+
+type statelessServerSession struct {
+	serverSession
+}
+
+// First encode session contents using protobuf binary marshaller.
+// Then base64 encode the resulting bytes for client compatibility.
+func (session *statelessServerSession) Token() string {
+	session.mu.RLock()
+	defer session.mu.RUnlock()
+
+	payload := flight.GetSessionOptionsResult{SessionOptions: session.options}
+	b, err := proto.Marshal(&payload)
+	if err != nil {
+		panic(fmt.Sprintf("failed to marshal stateless token: %s", err))
+	}
+
+	return base64.StdEncoding.EncodeToString(b)
+}
+
+// Reconstruct the session from its fully encoded token representation
+func decodeStatelessToken(token string) (*statelessServerSession, error) {
+	decoded, err := base64.StdEncoding.DecodeString(token)
+	if err != nil {
+		return nil, err
+	}
+
+	var parsed flight.GetSessionOptionsResult
+	if err := proto.Unmarshal(decoded, &parsed); err != nil {
+		return nil, err
+	}
+
+	return NewStatelessServerSession(parsed.SessionOptions), nil
+}
+
+// Check the provided context for a cookie in the incoming gRPC metadata containing the
+// stateless session token. Decode the token payload to reconstruct the session.
+func getSessionFromIncomingCookie(ctx context.Context) (*statelessServerSession, error) {
+	cookie, err := GetIncomingCookieByName(ctx, StatelessSessionCookieName)
+	if err != nil {
+		return nil, err
+	}
+
+	return decodeStatelessToken(cookie.Value)
+}
diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go
index 91658a694ecab..342727d03cb8d 100644
--- a/go/arrow/internal/flight_integration/scenario.go
+++ b/go/arrow/internal/flight_integration/scenario.go
@@ -36,6 +36,7 @@ import (
 	"github.com/apache/arrow/go/v16/arrow/flight"
 	"github.com/apache/arrow/go/v16/arrow/flight/flightsql"
 	"github.com/apache/arrow/go/v16/arrow/flight/flightsql/schema_ref"
+	"github.com/apache/arrow/go/v16/arrow/flight/session"
 	"github.com/apache/arrow/go/v16/arrow/internal/arrjson"
 	"github.com/apache/arrow/go/v16/arrow/ipc"
 	"github.com/apache/arrow/go/v16/arrow/memory"
@@ -77,6 +78,8 @@ func GetScenario(name string, args ...string) Scenario {
 		return &flightSqlScenarioTester{}
 	case "flight_sql:extension":
 		return &flightSqlExtensionScenarioTester{}
+	case "session_options":
+		return &sessionOptionsScenarioTester{}
 	case "":
 		if len(args) > 0 {
 			return &defaultIntegrationTester{path: args[0]}
@@ -2635,3 +2638,261 @@ func (m *flightSqlExtensionScenarioTester) ValidateTransactions(client *flightsq
 
 	return txn.Rollback(ctx)
 }
+
+type sessionOptionsScenarioTester struct {
+	flightsql.BaseServer
+}
+
+func (tester *sessionOptionsScenarioTester) MakeServer(port int) flight.Server {
+	srv := flight.NewServerWithMiddleware([]flight.ServerMiddleware{
+		flight.CreateServerMiddleware(session.NewServerSessionMiddleware(nil)),
+	})
+
+	srv.RegisterFlightService(flightsql.NewFlightServer(tester))
+	initServer(port, srv)
+	return srv
+}
+
+func (tester *sessionOptionsScenarioTester) SetSessionOptions(ctx context.Context, req *flight.SetSessionOptionsRequest) (*flight.SetSessionOptionsResult, error) {
+	session, err := session.GetSessionFromContext(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	errors := make(map[string]*flight.SetSessionOptionsResultError)
+	for key, val := range req.GetSessionOptions() {
+		if key == "lol_invalid" {
+			errors[key] = &flight.SetSessionOptionsResultError{Value: flight.SetSessionOptionsResultErrorInvalidName}
+			continue
+		}
+		if val.GetStringValue() == "lol_invalid" {
+			errors[key] = &flight.SetSessionOptionsResultError{Value: flight.SetSessionOptionsResultErrorInvalidValue}
+			continue
+		}
+
+		session.SetSessionOption(key, val)
+	}
+
+	return &flight.SetSessionOptionsResult{Errors: errors}, nil
+}
+
+func (tester *sessionOptionsScenarioTester) GetSessionOptions(ctx context.Context, req *flight.GetSessionOptionsRequest) (*flight.GetSessionOptionsResult, error) {
+	session, err := session.GetSessionFromContext(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	return &flight.GetSessionOptionsResult{SessionOptions: session.GetSessionOptions()}, nil
+}
+
+func (tester *sessionOptionsScenarioTester) CloseSession(ctx context.Context, req *flight.CloseSessionRequest) (*flight.CloseSessionResult, error) {
+	session, err := session.GetSessionFromContext(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	if err = session.Close(); err != nil {
+		return nil, err
+	}
+
+	return &flight.CloseSessionResult{Status: flight.CloseSessionResultClosed}, nil
+}
+
+func (tester *sessionOptionsScenarioTester) RunClient(addr string, opts ...grpc.DialOption) error {
+	middleware := []flight.ClientMiddleware{
+		flight.NewClientCookieMiddleware(),
+	}
+	client, err := flight.NewClientWithMiddleware(addr, nil, middleware, opts...)
+	if err != nil {
+		return err
+	}
+	defer client.Close()
+
+	// Run validations in order. We are changing session state in each step, so order is made explicit.
+	ctx := context.Background()
+	if err = tester.ValidateFirstGetSessionOptions(ctx, client); err != nil {
+		return err
+	}
+
+	if err = tester.ValidateSecondSetSessionOptions(ctx, client); err != nil {
+		return err
+	}
+
+	if err = tester.ValidateThirdGetSessionOptions(ctx, client); err != nil {
+		return err
+	}
+
+	if err = tester.ValidateFourthRemoveOption(ctx, client); err != nil {
+		return err
+	}
+
+	if err = tester.ValidateFifthGetSessionOptions(ctx, client); err != nil {
+		return err
+	}
+
+	if err = tester.ValidateSixthCloseSession(ctx, client); err != nil {
+		return err
+	}
+
+	// C++ impl currently fails with "Invalid or expired arrow_flight_session_id cookie", likely related to GH-39791
+	// if err = tester.ValidateSeventhGetSessionOptions(ctx, client); err != nil {
+	// 	return err
+	// }
+
+	return nil
+}
+
+func (tester *sessionOptionsScenarioTester) ValidateFirstGetSessionOptions(ctx context.Context, client flight.Client) error {
+	res, err := client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{})
+	if err != nil {
+		return err
+	}
+
+	opts := res.GetSessionOptions()
+	if len(opts) != 0 {
+		return fmt.Errorf("expected new session to be empty, but found %d options already set", len(opts))
+	}
+
+	return nil
+}
+
+func (tester *sessionOptionsScenarioTester) ValidateSecondSetSessionOptions(ctx context.Context, client flight.Client) error {
+	opts, err := flight.NewSessionOptionValues(map[string]any{
+		"foolong":                int64(123),
+		"bardouble":              456.0,
+		"lol_invalid":            "this won't get set",
+		"key_with_invalid_value": "lol_invalid",
+		"big_ol_string_list":     []string{"a", "b", "sea", "dee", " ", "  ", "geee", "(づ｡◕‿‿◕｡)づ"},
+	})
+	if err != nil {
+		return err
+	}
+
+	res, err := client.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: opts})
+	if err != nil {
+		return err
+	}
+
+	expectedErrs := map[string]*flight.SetSessionOptionsResultError{
+		"lol_invalid":            {Value: flight.SetSessionOptionsResultErrorInvalidName},
+		"key_with_invalid_value": {Value: flight.SetSessionOptionsResultErrorInvalidValue},
+	}
+
+	errs := res.GetErrors()
+	if len(errs) != len(expectedErrs) {
+		return fmt.Errorf("errors expected: %d, got: %d", len(expectedErrs), len(errs))
+	}
+
+	for key, val := range errs {
+		if !reflect.DeepEqual(val, expectedErrs[key]) {
+			return fmt.Errorf("error mismatch for key %s. expected: %s, got: %s", key, expectedErrs[key], val)
+		}
+	}
+
+	return nil
+}
+
+func (tester *sessionOptionsScenarioTester) ValidateThirdGetSessionOptions(ctx context.Context, client flight.Client) error {
+	res, err := client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{})
+	if err != nil {
+		return err
+	}
+
+	expectedOpts, err := flight.NewSessionOptionValues(map[string]any{
+		"foolong":            int64(123),
+		"bardouble":          456.0,
+		"big_ol_string_list": []string{"a", "b", "sea", "dee", " ", "  ", "geee", "(づ｡◕‿‿◕｡)づ"},
+	})
+	if err != nil {
+		return err
+	}
+
+	opts := res.GetSessionOptions()
+	if len(opts) != len(expectedOpts) {
+		return fmt.Errorf("options expected: %d, got: %d", len(expectedOpts), len(opts))
+	}
+
+	for key, val := range opts {
+		if !reflect.DeepEqual(val, expectedOpts[key]) {
+			return fmt.Errorf("session options mismatch for key %s. expected: %s, got: %s", key, expectedOpts[key], val)
+		}
+	}
+
+	return nil
+}
+
+func (tester *sessionOptionsScenarioTester) ValidateFourthRemoveOption(ctx context.Context, client flight.Client) error {
+	opts, err := flight.NewSessionOptionValues(map[string]any{
+		"foolong": nil,
+	})
+	if err != nil {
+		return err
+	}
+
+	res, err := client.SetSessionOptions(ctx, &flight.SetSessionOptionsRequest{SessionOptions: opts})
+	if err != nil {
+		return err
+	}
+
+	errs := res.GetErrors()
+	if len(errs) != 0 {
+		return fmt.Errorf("errors expected: %d, got: %d", 0, len(errs))
+	}
+
+	return nil
+}
+
+func (tester *sessionOptionsScenarioTester) ValidateFifthGetSessionOptions(ctx context.Context, client flight.Client) error {
+	res, err := client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{})
+	if err != nil {
+		return err
+	}
+
+	expectedOpts, err := flight.NewSessionOptionValues(map[string]any{
+		"bardouble":          456.0,
+		"big_ol_string_list": []string{"a", "b", "sea", "dee", " ", "  ", "geee", "(づ｡◕‿‿◕｡)づ"},
+	})
+	if err != nil {
+		return err
+	}
+
+	opts := res.GetSessionOptions()
+	if len(opts) != len(expectedOpts) {
+		return fmt.Errorf("options expected: %d, got: %d", len(expectedOpts), len(opts))
+	}
+
+	for key, val := range opts {
+		if !reflect.DeepEqual(val, expectedOpts[key]) {
+			return fmt.Errorf("session options mismatch for key %s. expected: %s, got: %s", key, expectedOpts[key], val)
+		}
+	}
+
+	return nil
+}
+
+func (tester *sessionOptionsScenarioTester) ValidateSixthCloseSession(ctx context.Context, client flight.Client) error {
+	res, err := client.CloseSession(ctx, &flight.CloseSessionRequest{})
+	if err != nil {
+		return err
+	}
+
+	if res.GetStatus() != flight.CloseSessionResultClosed {
+		return fmt.Errorf("expected session to successfully close, but found status: %s", res.GetStatus())
+	}
+
+	return nil
+}
+
+func (tester *sessionOptionsScenarioTester) ValidateSeventhGetSessionOptions(ctx context.Context, client flight.Client) error {
+	res, err := client.GetSessionOptions(ctx, &flight.GetSessionOptionsRequest{})
+	if err != nil {
+		return err
+	}
+
+	opts := res.GetSessionOptions()
+	if len(opts) != 0 {
+		return fmt.Errorf("expected new session to be empty, but found %d options already set", len(opts))
+	}
+
+	return nil
+}
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java
index f29028547c452..6bd3f8ddf8592 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/SessionOptionsProducer.java
@@ -20,7 +20,6 @@
 import java.util.HashMap;
 import java.util.Map;
 
-import org.apache.arrow.flight.CallStatus;
 import org.apache.arrow.flight.CloseSessionRequest;
 import org.apache.arrow.flight.CloseSessionResult;
 import org.apache.arrow.flight.FlightRuntimeException;
@@ -84,11 +83,6 @@ public void setSessionOptions(SetSessionOptionsRequest request, CallContext cont
   public void getSessionOptions(GetSessionOptionsRequest request, CallContext context,
                          StreamListener<GetSessionOptionsResult> listener) {
     ServerSessionMiddleware middleware = context.getMiddleware(sessionMiddlewareKey);
-    if (!middleware.hasSession()) {
-      // Attempt to get options without an existing session
-      listener.onError(CallStatus.NOT_FOUND.withDescription("No current server session").toRuntimeException());
-      return;
-    }
     final Map<String, SessionOptionValue> sessionOptions = middleware.getSession().getSessionOptions();
     listener.onNext(new GetSessionOptionsResult(sessionOptions));
     listener.onCompleted();

From 640667664abbdb9ddf38d32781715cc783a69885 Mon Sep 17 00:00:00 2001
From: Jonathan Keane <jkeane@gmail.com>
Date: Sat, 2 Mar 2024 17:14:52 -0600
Subject: [PATCH 458/570] GH-40323: [R] [CI] Use rocker/r-ver instead of
 library/r-base (#40321)

### Rationale for this change

I'll make an issue if this works

### What changes are included in this PR?

Replace `library/r-base` with `rocker/r-ver` to see if that is more stable for our generic minimal / offline / bundled builds

### Are these changes tested?

They are the tests

### Are there any user-facing changes?

No
* GitHub Issue: #40323

Authored-by: Jonathan Keane <jkeane@gmail.com>
Signed-off-by: Jonathan Keane <jkeane@gmail.com>
---
 dev/tasks/tasks.yml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 56c93c095c870..c166ea15f3761 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1293,8 +1293,8 @@ tasks:
     ci: azure
     template: r/azure.linux.yml
     params:
-      r_org: library
-      r_image: r-base
+      r_org: rocker
+      r_image: r-ver
       r_tag: latest
       flags: '-e ARROW_DEPENDENCY_SOURCE=BUNDLED'
 
@@ -1309,8 +1309,8 @@ tasks:
     ci: azure
     template: r/azure.linux.yml
     params:
-      r_org: library
-      r_image: r-base
+      r_org: rocker
+      r_image: r-ver
       r_tag: latest
       flags: '-e ARROW_OFFLINE_BUILD=true'
 
@@ -1336,7 +1336,7 @@ tasks:
       r_custom_ccache: true
 
 {% for r_org, r_image, r_tag in [("rhub", "ubuntu-gcc-release", "latest"),
-                                 ("library", "r-base", "latest"),
+                                 ("rocker", "r-ver", "latest"),
                                  ("rstudio", "r-base", "4.2-focal"),
                                  ("rstudio", "r-base", "4.1-opensuse153")] %}
   test-r-{{ r_org }}-{{ r_image }}-{{ r_tag }}:
@@ -1403,8 +1403,8 @@ tasks:
     ci: azure
     template: r/azure.linux.yml
     params:
-      r_org: library
-      r_image: r-base
+      r_org: rocker
+      r_image: r-ver
       r_tag: latest
       flags: "-e LIBARROW_MINIMAL=TRUE"
 

From 70bb0fbb17c8e3343f227d0e11a0f19eaa3af92f Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 4 Mar 2024 18:20:40 +0900
Subject: [PATCH 459/570] GH-40331: [C++][CMake] Add missing glog::glog
 dependency to arrow_util (#40332)

### Rationale for this change

`cpp/src/arrow/util/logging.cc` may use glog. So `arrow_util` OBJECT target(s) must depend on `glog::glog`.

### What changes are included in this PR?

Add missing `glog::glog` dependency to `${ARROW_UTIL_TARGETS}`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #40331

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/CMakeLists.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 23a3691566f69..0bf55e38f92b5 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -563,6 +563,11 @@ if(ARROW_USE_BOOST)
     target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE Boost::headers)
   endforeach()
 endif()
+if(ARROW_USE_GLOG)
+  foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+    target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE glog::glog)
+  endforeach()
+endif()
 if(ARROW_USE_XSIMD)
   foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
     target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_XSIMD})

From 2b194ad222f4dc8ecf2eb73539ab8cab5b1fc5e7 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Mon, 4 Mar 2024 13:33:18 +0100
Subject: [PATCH 460/570] GH-20127: [Python] Remove deprecated
 pyarrow.filesystem legacy implementations (#39825)

This PR removes the `pyarrow.filesystem` and `pyarrow.hdfs` filesystems that have been deprecated since 2.0.0.
* Closes: #20127

Lead-authored-by: AlenkaF <frim.alenka@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 docs/source/python/filesystems_deprecated.rst |  88 ---
 docs/source/python/index.rst                  |   1 -
 python/CMakeLists.txt                         |   1 -
 python/pyarrow/__init__.py                    |  47 +-
 python/pyarrow/_hdfsio.pyx                    | 478 ----------------
 python/pyarrow/filesystem.py                  | 511 ------------------
 python/pyarrow/fs.py                          |  25 +-
 python/pyarrow/hdfs.py                        | 240 --------
 python/pyarrow/io.pxi                         |  13 +
 python/pyarrow/parquet/core.py                |  30 +-
 python/pyarrow/tests/parquet/test_basic.py    |   5 +-
 python/pyarrow/tests/parquet/test_dataset.py  | 137 +++--
 .../tests/parquet/test_parquet_writer.py      |  43 --
 python/pyarrow/tests/test_filesystem.py       |  75 ---
 python/pyarrow/tests/test_hdfs.py             | 451 ----------------
 python/setup.py                               |   1 -
 16 files changed, 93 insertions(+), 2053 deletions(-)
 delete mode 100644 docs/source/python/filesystems_deprecated.rst
 delete mode 100644 python/pyarrow/_hdfsio.pyx
 delete mode 100644 python/pyarrow/filesystem.py
 delete mode 100644 python/pyarrow/hdfs.py
 delete mode 100644 python/pyarrow/tests/test_filesystem.py
 delete mode 100644 python/pyarrow/tests/test_hdfs.py

diff --git a/docs/source/python/filesystems_deprecated.rst b/docs/source/python/filesystems_deprecated.rst
deleted file mode 100644
index c51245341b4cb..0000000000000
--- a/docs/source/python/filesystems_deprecated.rst
+++ /dev/null
@@ -1,88 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-Filesystem Interface (legacy)
-=============================
-
-.. warning::
-   This section documents the deprecated filesystem layer.  You should
-   use the :ref:`new filesystem layer <filesystem>` instead.
-
-.. _hdfs:
-
-Hadoop File System (HDFS)
--------------------------
-
-PyArrow comes with bindings to a C++-based interface to the Hadoop File
-System. You connect like so:
-
-.. code-block:: python
-
-   import pyarrow as pa
-   fs = pa.hdfs.connect(host, port, user=user, kerb_ticket=ticket_cache_path)
-   with fs.open(path, 'rb') as f:
-       # Do something with f
-
-By default, ``pyarrow.hdfs.HadoopFileSystem`` uses libhdfs, a JNI-based
-interface to the Java Hadoop client. This library is loaded **at runtime**
-(rather than at link / library load time, since the library may not be in your
-LD_LIBRARY_PATH), and relies on some environment variables.
-
-* ``HADOOP_HOME``: the root of your installed Hadoop distribution. Often has
-  `lib/native/libhdfs.so`.
-
-* ``JAVA_HOME``: the location of your Java SDK installation.
-
-* ``ARROW_LIBHDFS_DIR`` (optional): explicit location of ``libhdfs.so`` if it is
-  installed somewhere other than ``$HADOOP_HOME/lib/native``.
-
-* ``CLASSPATH``: must contain the Hadoop jars. You can set these using:
-
-.. code-block:: shell
-
-    export CLASSPATH=`$HADOOP_HOME/bin/hdfs classpath --glob`
-
-If ``CLASSPATH`` is not set, then it will be set automatically if the
-``hadoop`` executable is in your system path, or if ``HADOOP_HOME`` is set.
-
-HDFS API
-~~~~~~~~
-
-.. currentmodule:: pyarrow
-
-.. autosummary::
-   :toctree: generated/
-
-   hdfs.connect
-   HadoopFileSystem.cat
-   HadoopFileSystem.chmod
-   HadoopFileSystem.chown
-   HadoopFileSystem.delete
-   HadoopFileSystem.df
-   HadoopFileSystem.disk_usage
-   HadoopFileSystem.download
-   HadoopFileSystem.exists
-   HadoopFileSystem.get_capacity
-   HadoopFileSystem.get_space_used
-   HadoopFileSystem.info
-   HadoopFileSystem.ls
-   HadoopFileSystem.mkdir
-   HadoopFileSystem.open
-   HadoopFileSystem.rename
-   HadoopFileSystem.rm
-   HadoopFileSystem.upload
-   HdfsFile
diff --git a/docs/source/python/index.rst b/docs/source/python/index.rst
index 08939bc760df6..7acff940ba2ad 100644
--- a/docs/source/python/index.rst
+++ b/docs/source/python/index.rst
@@ -49,7 +49,6 @@ files into Arrow structures.
    memory
    ipc
    filesystems
-   filesystems_deprecated
    numpy
    pandas
    interchange_protocol
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 1d6524373a733..c3a1c578689c8 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -545,7 +545,6 @@ set(CYTHON_EXTENSIONS
     _csv
     _feather
     _fs
-    _hdfsio
     _json
     _pyarrow_cpp_tests)
 set_source_files_properties(pyarrow/lib.pyx PROPERTIES CYTHON_API TRUE)
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 2ee97ddb662e5..7ede69da665ab 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -255,9 +255,8 @@ def print_entry(label, value):
                          BufferReader, BufferOutputStream,
                          OSFile, MemoryMappedFile, memory_map,
                          create_memory_map, MockOutputStream,
-                         input_stream, output_stream)
-
-from pyarrow._hdfsio import HdfsFile, have_libhdfs
+                         input_stream, output_stream,
+                         have_libhdfs)
 
 from pyarrow.lib import (ChunkedArray, RecordBatch, Table, table,
                          concat_arrays, concat_tables, TableGroupBy,
@@ -276,54 +275,12 @@ def print_entry(label, value):
                          ArrowTypeError,
                          ArrowSerializationError)
 
-import pyarrow.hdfs as hdfs
-
 from pyarrow.ipc import serialize_pandas, deserialize_pandas
 import pyarrow.ipc as ipc
 
 import pyarrow.types as types
 
 
-# deprecated top-level access
-
-
-from pyarrow.filesystem import FileSystem as _FileSystem
-from pyarrow.filesystem import LocalFileSystem as _LocalFileSystem
-from pyarrow.hdfs import HadoopFileSystem as _HadoopFileSystem
-
-
-_localfs = _LocalFileSystem._get_instance()
-
-
-_msg = (
-    "pyarrow.{0} is deprecated as of 2.0.0, please use pyarrow.fs.{1} instead."
-)
-
-_serialization_msg = (
-    "'pyarrow.{0}' is deprecated and will be removed in a future version. "
-    "Use pickle or the pyarrow IPC functionality instead."
-)
-
-_deprecated = {
-    "localfs": (_localfs, "LocalFileSystem"),
-    "FileSystem": (_FileSystem, "FileSystem"),
-    "LocalFileSystem": (_LocalFileSystem, "LocalFileSystem"),
-    "HadoopFileSystem": (_HadoopFileSystem, "HadoopFileSystem"),
-}
-
-
-def __getattr__(name):
-    if name in _deprecated:
-        obj, new_name = _deprecated[name]
-        _warnings.warn(_msg.format(name, new_name),
-                       FutureWarning, stacklevel=2)
-        return obj
-
-    raise AttributeError(
-        "module 'pyarrow' has no attribute '{0}'".format(name)
-    )
-
-
 # ----------------------------------------------------------------------
 # Deprecations
 
diff --git a/python/pyarrow/_hdfsio.pyx b/python/pyarrow/_hdfsio.pyx
deleted file mode 100644
index cbcc5d28ca918..0000000000000
--- a/python/pyarrow/_hdfsio.pyx
+++ /dev/null
@@ -1,478 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# ----------------------------------------------------------------------
-# HDFS IO implementation
-
-# cython: language_level = 3
-
-import re
-
-from pyarrow.lib cimport check_status, _Weakrefable, NativeFile
-from pyarrow.includes.common cimport *
-from pyarrow.includes.libarrow cimport *
-from pyarrow.includes.libarrow_fs cimport *
-from pyarrow.lib import frombytes, tobytes, ArrowIOError
-
-
-_HDFS_PATH_RE = re.compile(r'hdfs://(.*):(\d+)(.*)')
-
-
-def have_libhdfs():
-    try:
-        with nogil:
-            check_status(HaveLibHdfs())
-        return True
-    except Exception:
-        return False
-
-
-def strip_hdfs_abspath(path):
-    m = _HDFS_PATH_RE.match(path)
-    if m:
-        return m.group(3)
-    else:
-        return path
-
-
-cdef class HadoopFileSystem(_Weakrefable):
-    cdef:
-        shared_ptr[CIOHadoopFileSystem] client
-
-    cdef readonly:
-        bint is_open
-        object host
-        object user
-        object kerb_ticket
-        int port
-        dict extra_conf
-
-    def _connect(self, host, port, user, kerb_ticket, extra_conf):
-        cdef HdfsConnectionConfig conf
-
-        if host is not None:
-            conf.host = tobytes(host)
-        self.host = host
-
-        conf.port = port
-        self.port = port
-
-        if user is not None:
-            conf.user = tobytes(user)
-        self.user = user
-
-        if kerb_ticket is not None:
-            conf.kerb_ticket = tobytes(kerb_ticket)
-        self.kerb_ticket = kerb_ticket
-
-        with nogil:
-            check_status(HaveLibHdfs())
-
-        if extra_conf is not None and isinstance(extra_conf, dict):
-            conf.extra_conf = {tobytes(k): tobytes(v)
-                               for k, v in extra_conf.items()}
-        self.extra_conf = extra_conf
-
-        with nogil:
-            check_status(CIOHadoopFileSystem.Connect(&conf, &self.client))
-        self.is_open = True
-
-    @classmethod
-    def connect(cls, *args, **kwargs):
-        return cls(*args, **kwargs)
-
-    def __dealloc__(self):
-        if self.is_open:
-            self.close()
-
-    def close(self):
-        """
-        Disconnect from the HDFS cluster
-        """
-        self._ensure_client()
-        with nogil:
-            check_status(self.client.get().Disconnect())
-        self.is_open = False
-
-    cdef _ensure_client(self):
-        if self.client.get() == NULL:
-            raise IOError('HDFS client improperly initialized')
-        elif not self.is_open:
-            raise IOError('HDFS client is closed')
-
-    def exists(self, path):
-        """
-        Returns True if the path is known to the cluster, False if it does not
-        (or there is an RPC error)
-        """
-        self._ensure_client()
-
-        cdef c_string c_path = tobytes(path)
-        cdef c_bool result
-        with nogil:
-            result = self.client.get().Exists(c_path)
-        return result
-
-    def isdir(self, path):
-        cdef HdfsPathInfo info
-        try:
-            self._path_info(path, &info)
-        except ArrowIOError:
-            return False
-        return info.kind == ObjectType_DIRECTORY
-
-    def isfile(self, path):
-        cdef HdfsPathInfo info
-        try:
-            self._path_info(path, &info)
-        except ArrowIOError:
-            return False
-        return info.kind == ObjectType_FILE
-
-    def get_capacity(self):
-        """
-        Get reported total capacity of file system
-
-        Returns
-        -------
-        capacity : int
-        """
-        cdef int64_t capacity = 0
-        with nogil:
-            check_status(self.client.get().GetCapacity(&capacity))
-        return capacity
-
-    def get_space_used(self):
-        """
-        Get space used on file system
-
-        Returns
-        -------
-        space_used : int
-        """
-        cdef int64_t space_used = 0
-        with nogil:
-            check_status(self.client.get().GetUsed(&space_used))
-        return space_used
-
-    def df(self):
-        """
-        Return free space on disk, like the UNIX df command
-
-        Returns
-        -------
-        space : int
-        """
-        return self.get_capacity() - self.get_space_used()
-
-    def rename(self, path, new_path):
-        cdef c_string c_path = tobytes(path)
-        cdef c_string c_new_path = tobytes(new_path)
-        with nogil:
-            check_status(self.client.get().Rename(c_path, c_new_path))
-
-    def info(self, path):
-        """
-        Return detailed HDFS information for path
-
-        Parameters
-        ----------
-        path : string
-            Path to file or directory
-
-        Returns
-        -------
-        path_info : dict
-        """
-        cdef HdfsPathInfo info
-        self._path_info(path, &info)
-        return {
-            'path': frombytes(info.name),
-            'owner': frombytes(info.owner),
-            'group': frombytes(info.group),
-            'size': info.size,
-            'block_size': info.block_size,
-            'last_modified': info.last_modified_time,
-            'last_accessed': info.last_access_time,
-            'replication': info.replication,
-            'permissions': info.permissions,
-            'kind': ('directory' if info.kind == ObjectType_DIRECTORY
-                     else 'file')
-        }
-
-    def stat(self, path):
-        """
-        Return basic file system statistics about path
-
-        Parameters
-        ----------
-        path : string
-            Path to file or directory
-
-        Returns
-        -------
-        stat : dict
-        """
-        cdef FileStatistics info
-        cdef c_string c_path = tobytes(path)
-        with nogil:
-            check_status(self.client.get()
-                         .Stat(c_path, &info))
-        return {
-            'size': info.size,
-            'kind': ('directory' if info.kind == ObjectType_DIRECTORY
-                     else 'file')
-        }
-
-    cdef _path_info(self, path, HdfsPathInfo* info):
-        cdef c_string c_path = tobytes(path)
-
-        with nogil:
-            check_status(self.client.get()
-                         .GetPathInfo(c_path, info))
-
-    def ls(self, path, bint full_info):
-        cdef:
-            c_string c_path = tobytes(path)
-            vector[HdfsPathInfo] listing
-            list results = []
-            int i
-
-        self._ensure_client()
-
-        with nogil:
-            check_status(self.client.get()
-                         .ListDirectory(c_path, &listing))
-
-        cdef const HdfsPathInfo* info
-        for i in range(<int> listing.size()):
-            info = &listing[i]
-
-            # Try to trim off the hdfs://HOST:PORT piece
-            name = strip_hdfs_abspath(frombytes(info.name))
-
-            if full_info:
-                kind = ('file' if info.kind == ObjectType_FILE
-                        else 'directory')
-
-                results.append({
-                    'kind': kind,
-                    'name': name,
-                    'owner': frombytes(info.owner),
-                    'group': frombytes(info.group),
-                    'last_modified_time': info.last_modified_time,
-                    'last_access_time': info.last_access_time,
-                    'size': info.size,
-                    'replication': info.replication,
-                    'block_size': info.block_size,
-                    'permissions': info.permissions
-                })
-            else:
-                results.append(name)
-
-        return results
-
-    def chmod(self, path, mode):
-        """
-        Change file permissions
-
-        Parameters
-        ----------
-        path : string
-            absolute path to file or directory
-        mode : int
-            POSIX-like bitmask
-        """
-        self._ensure_client()
-        cdef c_string c_path = tobytes(path)
-        cdef int c_mode = mode
-        with nogil:
-            check_status(self.client.get()
-                         .Chmod(c_path, c_mode))
-
-    def chown(self, path, owner=None, group=None):
-        """
-        Change file permissions
-
-        Parameters
-        ----------
-        path : string
-            absolute path to file or directory
-        owner : string, default None
-            New owner, None for no change
-        group : string, default None
-            New group, None for no change
-        """
-        cdef:
-            c_string c_path
-            c_string c_owner
-            c_string c_group
-            const char* c_owner_ptr = NULL
-            const char* c_group_ptr = NULL
-
-        self._ensure_client()
-
-        c_path = tobytes(path)
-        if owner is not None:
-            c_owner = tobytes(owner)
-            c_owner_ptr = c_owner.c_str()
-
-        if group is not None:
-            c_group = tobytes(group)
-            c_group_ptr = c_group.c_str()
-
-        with nogil:
-            check_status(self.client.get()
-                         .Chown(c_path, c_owner_ptr, c_group_ptr))
-
-    def mkdir(self, path):
-        """
-        Create indicated directory and any necessary parent directories
-        """
-        self._ensure_client()
-        cdef c_string c_path = tobytes(path)
-        with nogil:
-            check_status(self.client.get()
-                         .MakeDirectory(c_path))
-
-    def delete(self, path, bint recursive=False):
-        """
-        Delete the indicated file or directory
-
-        Parameters
-        ----------
-        path : string
-        recursive : boolean, default False
-            If True, also delete child paths for directories
-        """
-        self._ensure_client()
-
-        cdef c_string c_path = tobytes(path)
-        with nogil:
-            check_status(self.client.get()
-                         .Delete(c_path, recursive == 1))
-
-    def open(self, path, mode='rb', buffer_size=None, replication=None,
-             default_block_size=None):
-        """
-        Open HDFS file for reading or writing
-
-        Parameters
-        ----------
-        mode : string
-            Must be one of 'rb', 'wb', 'ab'
-
-        Returns
-        -------
-        handle : HdfsFile
-        """
-        self._ensure_client()
-
-        cdef HdfsFile out = HdfsFile()
-
-        if mode not in ('rb', 'wb', 'ab'):
-            raise Exception("Mode must be 'rb' (read), "
-                            "'wb' (write, new file), or 'ab' (append)")
-
-        cdef c_string c_path = tobytes(path)
-        cdef c_bool append = False
-
-        # 0 in libhdfs means "use the default"
-        cdef int32_t c_buffer_size = buffer_size or 0
-        cdef int16_t c_replication = replication or 0
-        cdef int64_t c_default_block_size = default_block_size or 0
-
-        cdef shared_ptr[HdfsOutputStream] wr_handle
-        cdef shared_ptr[HdfsReadableFile] rd_handle
-
-        if mode in ('wb', 'ab'):
-            if mode == 'ab':
-                append = True
-
-            with nogil:
-                check_status(
-                    self.client.get()
-                    .OpenWritable(c_path, append, c_buffer_size,
-                                  c_replication, c_default_block_size,
-                                  &wr_handle))
-
-            out.set_output_stream(<shared_ptr[COutputStream]> wr_handle)
-            out.is_writable = True
-        else:
-            with nogil:
-                check_status(self.client.get()
-                             .OpenReadable(c_path, &rd_handle))
-
-            out.set_random_access_file(
-                <shared_ptr[CRandomAccessFile]> rd_handle)
-            out.is_readable = True
-
-        assert not out.closed
-
-        if c_buffer_size == 0:
-            c_buffer_size = 2 ** 16
-
-        out.mode = mode
-        out.buffer_size = c_buffer_size
-        out.parent = _HdfsFileNanny(self, out)
-        out.own_file = True
-
-        return out
-
-    def download(self, path, stream, buffer_size=None):
-        with self.open(path, 'rb') as f:
-            f.download(stream, buffer_size=buffer_size)
-
-    def upload(self, path, stream, buffer_size=None):
-        """
-        Upload file-like object to HDFS path
-        """
-        with self.open(path, 'wb') as f:
-            f.upload(stream, buffer_size=buffer_size)
-
-
-# ARROW-404: Helper class to ensure that files are closed before the
-# client. During deallocation of the extension class, the attributes are
-# decref'd which can cause the client to get closed first if the file has the
-# last remaining reference
-cdef class _HdfsFileNanny(_Weakrefable):
-    cdef:
-        object client
-        object file_handle_ref
-
-    def __cinit__(self, client, file_handle):
-        import weakref
-        self.client = client
-        self.file_handle_ref = weakref.ref(file_handle)
-
-    def __dealloc__(self):
-        fh = self.file_handle_ref()
-        if fh:
-            fh.close()
-        # avoid cyclic GC
-        self.file_handle_ref = None
-        self.client = None
-
-
-cdef class HdfsFile(NativeFile):
-    cdef readonly:
-        int32_t buffer_size
-        object mode
-        object parent
-
-    def __dealloc__(self):
-        self.parent = None
diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py
deleted file mode 100644
index c1e70a1ee699f..0000000000000
--- a/python/pyarrow/filesystem.py
+++ /dev/null
@@ -1,511 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-import os
-import posixpath
-import sys
-import urllib.parse
-import warnings
-
-from os.path import join as pjoin
-
-import pyarrow as pa
-from pyarrow.util import doc, _stringify_path, _is_path_like, _DEPR_MSG
-
-
-_FS_DEPR_MSG = _DEPR_MSG.format(
-    "filesystem.LocalFileSystem", "2.0.0", "fs.LocalFileSystem"
-)
-
-
-class FileSystem:
-    """
-    Abstract filesystem interface.
-    """
-
-    def cat(self, path):
-        """
-        Return contents of file as a bytes object.
-
-        Parameters
-        ----------
-        path : str
-            File path to read content from.
-
-        Returns
-        -------
-        contents : bytes
-        """
-        with self.open(path, 'rb') as f:
-            return f.read()
-
-    def ls(self, path):
-        """
-        Return list of file paths.
-
-        Parameters
-        ----------
-        path : str
-            Directory to list contents from.
-        """
-        raise NotImplementedError
-
-    def delete(self, path, recursive=False):
-        """
-        Delete the indicated file or directory.
-
-        Parameters
-        ----------
-        path : str
-            Path to delete.
-        recursive : bool, default False
-            If True, also delete child paths for directories.
-        """
-        raise NotImplementedError
-
-    def disk_usage(self, path):
-        """
-        Compute bytes used by all contents under indicated path in file tree.
-
-        Parameters
-        ----------
-        path : str
-            Can be a file path or directory.
-
-        Returns
-        -------
-        usage : int
-        """
-        path = _stringify_path(path)
-        path_info = self.stat(path)
-        if path_info['kind'] == 'file':
-            return path_info['size']
-
-        total = 0
-        for root, directories, files in self.walk(path):
-            for child_path in files:
-                abspath = self._path_join(root, child_path)
-                total += self.stat(abspath)['size']
-
-        return total
-
-    def _path_join(self, *args):
-        return self.pathsep.join(args)
-
-    def stat(self, path):
-        """
-        Information about a filesystem entry.
-
-        Returns
-        -------
-        stat : dict
-        """
-        raise NotImplementedError('FileSystem.stat')
-
-    def rm(self, path, recursive=False):
-        """
-        Alias for FileSystem.delete.
-        """
-        return self.delete(path, recursive=recursive)
-
-    def mv(self, path, new_path):
-        """
-        Alias for FileSystem.rename.
-        """
-        return self.rename(path, new_path)
-
-    def rename(self, path, new_path):
-        """
-        Rename file, like UNIX mv command.
-
-        Parameters
-        ----------
-        path : str
-            Path to alter.
-        new_path : str
-            Path to move to.
-        """
-        raise NotImplementedError('FileSystem.rename')
-
-    def mkdir(self, path, create_parents=True):
-        """
-        Create a directory.
-
-        Parameters
-        ----------
-        path : str
-            Path to the directory.
-        create_parents : bool, default True
-            If the parent directories don't exists create them as well.
-        """
-        raise NotImplementedError
-
-    def exists(self, path):
-        """
-        Return True if path exists.
-
-        Parameters
-        ----------
-        path : str
-            Path to check.
-        """
-        raise NotImplementedError
-
-    def isdir(self, path):
-        """
-        Return True if path is a directory.
-
-        Parameters
-        ----------
-        path : str
-            Path to check.
-        """
-        raise NotImplementedError
-
-    def isfile(self, path):
-        """
-        Return True if path is a file.
-
-        Parameters
-        ----------
-        path : str
-            Path to check.
-        """
-        raise NotImplementedError
-
-    def _isfilestore(self):
-        """
-        Returns True if this FileSystem is a unix-style file store with
-        directories.
-        """
-        raise NotImplementedError
-
-    def read_parquet(self, path, columns=None, metadata=None, schema=None,
-                     use_threads=True, use_pandas_metadata=False):
-        """
-        Read Parquet data from path in file system. Can read from a single file
-        or a directory of files.
-
-        Parameters
-        ----------
-        path : str
-            Single file path or directory
-        columns : List[str], optional
-            Subset of columns to read.
-        metadata : pyarrow.parquet.FileMetaData
-            Known metadata to validate files against.
-        schema : pyarrow.parquet.Schema
-            Known schema to validate files against. Alternative to metadata
-            argument.
-        use_threads : bool, default True
-            Perform multi-threaded column reads.
-        use_pandas_metadata : bool, default False
-            If True and file has custom pandas schema metadata, ensure that
-            index columns are also loaded.
-
-        Returns
-        -------
-        table : pyarrow.Table
-        """
-        from pyarrow.parquet import ParquetDataset
-        dataset = ParquetDataset(path, schema=schema, metadata=metadata,
-                                 filesystem=self)
-        return dataset.read(columns=columns, use_threads=use_threads,
-                            use_pandas_metadata=use_pandas_metadata)
-
-    def open(self, path, mode='rb'):
-        """
-        Open file for reading or writing.
-        """
-        raise NotImplementedError
-
-    @property
-    def pathsep(self):
-        return '/'
-
-
-class LocalFileSystem(FileSystem):
-
-    _instance = None
-
-    def __init__(self):
-        warnings.warn(_FS_DEPR_MSG, FutureWarning, stacklevel=2)
-        super().__init__()
-
-    @classmethod
-    def _get_instance(cls):
-        if cls._instance is None:
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore")
-                cls._instance = LocalFileSystem()
-        return cls._instance
-
-    @classmethod
-    def get_instance(cls):
-        warnings.warn(_FS_DEPR_MSG, FutureWarning, stacklevel=2)
-        return cls._get_instance()
-
-    @doc(FileSystem.ls)
-    def ls(self, path):
-        path = _stringify_path(path)
-        return sorted(pjoin(path, x) for x in os.listdir(path))
-
-    @doc(FileSystem.mkdir)
-    def mkdir(self, path, create_parents=True):
-        path = _stringify_path(path)
-        if create_parents:
-            os.makedirs(path)
-        else:
-            os.mkdir(path)
-
-    @doc(FileSystem.isdir)
-    def isdir(self, path):
-        path = _stringify_path(path)
-        return os.path.isdir(path)
-
-    @doc(FileSystem.isfile)
-    def isfile(self, path):
-        path = _stringify_path(path)
-        return os.path.isfile(path)
-
-    @doc(FileSystem._isfilestore)
-    def _isfilestore(self):
-        return True
-
-    @doc(FileSystem.exists)
-    def exists(self, path):
-        path = _stringify_path(path)
-        return os.path.exists(path)
-
-    @doc(FileSystem.open)
-    def open(self, path, mode='rb'):
-        """
-        Open file for reading or writing.
-        """
-        path = _stringify_path(path)
-        return open(path, mode=mode)
-
-    @property
-    def pathsep(self):
-        return os.path.sep
-
-    def walk(self, path):
-        """
-        Directory tree generator, see os.walk.
-        """
-        path = _stringify_path(path)
-        return os.walk(path)
-
-
-class DaskFileSystem(FileSystem):
-    """
-    Wraps s3fs Dask filesystem implementation like s3fs, gcsfs, etc.
-    """
-
-    def __init__(self, fs):
-        warnings.warn(
-            "The pyarrow.filesystem.DaskFileSystem/S3FSWrapper are deprecated "
-            "as of pyarrow 3.0.0, and will be removed in a future version.",
-            FutureWarning, stacklevel=2)
-        self.fs = fs
-
-    @doc(FileSystem.isdir)
-    def isdir(self, path):
-        raise NotImplementedError("Unsupported file system API")
-
-    @doc(FileSystem.isfile)
-    def isfile(self, path):
-        raise NotImplementedError("Unsupported file system API")
-
-    @doc(FileSystem._isfilestore)
-    def _isfilestore(self):
-        """
-        Object Stores like S3 and GCSFS are based on key lookups, not true
-        file-paths.
-        """
-        return False
-
-    @doc(FileSystem.delete)
-    def delete(self, path, recursive=False):
-        path = _stringify_path(path)
-        return self.fs.rm(path, recursive=recursive)
-
-    @doc(FileSystem.exists)
-    def exists(self, path):
-        path = _stringify_path(path)
-        return self.fs.exists(path)
-
-    @doc(FileSystem.mkdir)
-    def mkdir(self, path, create_parents=True):
-        path = _stringify_path(path)
-        if create_parents:
-            return self.fs.mkdirs(path)
-        else:
-            return self.fs.mkdir(path)
-
-    @doc(FileSystem.open)
-    def open(self, path, mode='rb'):
-        """
-        Open file for reading or writing.
-        """
-        path = _stringify_path(path)
-        return self.fs.open(path, mode=mode)
-
-    def ls(self, path, detail=False):
-        path = _stringify_path(path)
-        return self.fs.ls(path, detail=detail)
-
-    def walk(self, path):
-        """
-        Directory tree generator, like os.walk.
-        """
-        path = _stringify_path(path)
-        return self.fs.walk(path)
-
-
-class S3FSWrapper(DaskFileSystem):
-
-    @doc(FileSystem.isdir)
-    def isdir(self, path):
-        path = _sanitize_s3(_stringify_path(path))
-        try:
-            contents = self.fs.ls(path)
-            if len(contents) == 1 and contents[0] == path:
-                return False
-            else:
-                return True
-        except OSError:
-            return False
-
-    @doc(FileSystem.isfile)
-    def isfile(self, path):
-        path = _sanitize_s3(_stringify_path(path))
-        try:
-            contents = self.fs.ls(path)
-            return len(contents) == 1 and contents[0] == path
-        except OSError:
-            return False
-
-    def walk(self, path, refresh=False):
-        """
-        Directory tree generator, like os.walk.
-
-        Generator version of what is in s3fs, which yields a flattened list of
-        files.
-        """
-        path = _sanitize_s3(_stringify_path(path))
-        directories = set()
-        files = set()
-
-        for key in list(self.fs._ls(path, refresh=refresh)):
-            path = key['Key']
-            if key['StorageClass'] == 'DIRECTORY':
-                directories.add(path)
-            elif key['StorageClass'] == 'BUCKET':
-                pass
-            else:
-                files.add(path)
-
-        # s3fs creates duplicate 'DIRECTORY' entries
-        files = sorted([posixpath.split(f)[1] for f in files
-                        if f not in directories])
-        directories = sorted([posixpath.split(x)[1]
-                              for x in directories])
-
-        yield path, directories, files
-
-        for directory in directories:
-            yield from self.walk(directory, refresh=refresh)
-
-
-def _sanitize_s3(path):
-    if path.startswith('s3://'):
-        return path.replace('s3://', '')
-    else:
-        return path
-
-
-def _ensure_filesystem(fs):
-    fs_type = type(fs)
-
-    # If the arrow filesystem was subclassed, assume it supports the full
-    # interface and return it
-    if not issubclass(fs_type, FileSystem):
-        if "fsspec" in sys.modules:
-            fsspec = sys.modules["fsspec"]
-            if isinstance(fs, fsspec.AbstractFileSystem):
-                # for recent fsspec versions that stop inheriting from
-                # pyarrow.filesystem.FileSystem, still allow fsspec
-                # filesystems (which should be compatible with our legacy fs)
-                return fs
-
-        raise OSError('Unrecognized filesystem: {}'.format(fs_type))
-    else:
-        return fs
-
-
-def resolve_filesystem_and_path(where, filesystem=None):
-    """
-    Return filesystem from path which could be an HDFS URI, a local URI,
-    or a plain filesystem path.
-    """
-    if not _is_path_like(where):
-        if filesystem is not None:
-            raise ValueError("filesystem passed but where is file-like, so"
-                             " there is nothing to open with filesystem.")
-        return filesystem, where
-
-    if filesystem is not None:
-        filesystem = _ensure_filesystem(filesystem)
-        if isinstance(filesystem, LocalFileSystem):
-            path = _stringify_path(where)
-        elif not isinstance(where, str):
-            raise TypeError(
-                "Expected string path; path-like objects are only allowed "
-                "with a local filesystem"
-            )
-        else:
-            path = where
-        return filesystem, path
-
-    path = _stringify_path(where)
-
-    parsed_uri = urllib.parse.urlparse(path)
-    if parsed_uri.scheme == 'hdfs' or parsed_uri.scheme == 'viewfs':
-        # Input is hdfs URI such as hdfs://host:port/myfile.parquet
-        netloc_split = parsed_uri.netloc.split(':')
-        host = netloc_split[0]
-        if host == '':
-            host = 'default'
-        else:
-            host = parsed_uri.scheme + "://" + host
-        port = 0
-        if len(netloc_split) == 2 and netloc_split[1].isnumeric():
-            port = int(netloc_split[1])
-        fs = pa.hdfs._connect(host=host, port=port)
-        fs_path = parsed_uri.path
-    elif parsed_uri.scheme == 'file':
-        # Input is local URI such as file:///home/user/myfile.parquet
-        fs = LocalFileSystem._get_instance()
-        fs_path = parsed_uri.path
-    else:
-        # Input is local path such as /home/user/myfile.parquet
-        fs = LocalFileSystem._get_instance()
-        fs_path = path
-
-    return fs, fs_path
diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
index ead750ca44ef8..a256cc540f742 100644
--- a/python/pyarrow/fs.py
+++ b/python/pyarrow/fs.py
@@ -98,9 +98,7 @@ def _filesystem_from_str(uri):
     return filesystem
 
 
-def _ensure_filesystem(
-    filesystem, use_mmap=False, allow_legacy_filesystem=False
-):
+def _ensure_filesystem(filesystem, *, use_mmap=False):
     if isinstance(filesystem, FileSystem):
         return filesystem
     elif isinstance(filesystem, str):
@@ -123,15 +121,6 @@ def _ensure_filesystem(
                 return LocalFileSystem(use_mmap=use_mmap)
             return PyFileSystem(FSSpecHandler(filesystem))
 
-    # map old filesystems to new ones
-    import pyarrow.filesystem as legacyfs
-
-    if isinstance(filesystem, legacyfs.LocalFileSystem):
-        return LocalFileSystem(use_mmap=use_mmap)
-    # TODO handle HDFS?
-    if allow_legacy_filesystem and isinstance(filesystem, legacyfs.FileSystem):
-        return filesystem
-
     raise TypeError(
         "Unrecognized filesystem: {}. `filesystem` argument must be a "
         "FileSystem instance or a valid file system URI'".format(
@@ -139,9 +128,7 @@ def _ensure_filesystem(
     )
 
 
-def _resolve_filesystem_and_path(
-    path, filesystem=None, allow_legacy_filesystem=False, memory_map=False
-):
+def _resolve_filesystem_and_path(path, filesystem=None, *, memory_map=False):
     """
     Return filesystem/path from path which could be an URI or a plain
     filesystem path.
@@ -155,10 +142,7 @@ def _resolve_filesystem_and_path(
         return filesystem, path
 
     if filesystem is not None:
-        filesystem = _ensure_filesystem(
-            filesystem, use_mmap=memory_map,
-            allow_legacy_filesystem=allow_legacy_filesystem
-        )
+        filesystem = _ensure_filesystem(filesystem, use_mmap=memory_map)
         if isinstance(filesystem, LocalFileSystem):
             path = _stringify_path(path)
         elif not isinstance(path, str):
@@ -166,8 +150,7 @@ def _resolve_filesystem_and_path(
                 "Expected string path; path-like objects are only allowed "
                 "with a local filesystem"
             )
-        if not allow_legacy_filesystem:
-            path = filesystem.normalize_path(path)
+        path = filesystem.normalize_path(path)
         return filesystem, path
 
     path = _stringify_path(path)
diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py
deleted file mode 100644
index 2e6c387a8fde3..0000000000000
--- a/python/pyarrow/hdfs.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-
-import os
-import posixpath
-import sys
-import warnings
-
-from pyarrow.util import doc, _DEPR_MSG
-from pyarrow.filesystem import FileSystem
-import pyarrow._hdfsio as _hdfsio
-
-
-class HadoopFileSystem(_hdfsio.HadoopFileSystem, FileSystem):
-    """
-    DEPRECATED: FileSystem interface for HDFS cluster.
-
-    See pyarrow.hdfs.connect for full connection details
-
-    .. deprecated:: 2.0
-        ``pyarrow.hdfs.HadoopFileSystem`` is deprecated,
-        please use ``pyarrow.fs.HadoopFileSystem`` instead.
-    """
-
-    def __init__(self, host="default", port=0, user=None, kerb_ticket=None,
-                 driver='libhdfs', extra_conf=None):
-        warnings.warn(
-            _DEPR_MSG.format(
-                "hdfs.HadoopFileSystem", "2.0.0", "fs.HadoopFileSystem"),
-            FutureWarning, stacklevel=2)
-        if driver == 'libhdfs':
-            _maybe_set_hadoop_classpath()
-
-        self._connect(host, port, user, kerb_ticket, extra_conf)
-
-    def __reduce__(self):
-        return (HadoopFileSystem, (self.host, self.port, self.user,
-                                   self.kerb_ticket, self.extra_conf))
-
-    def _isfilestore(self):
-        """
-        Return True if this is a Unix-style file store with directories.
-        """
-        return True
-
-    @doc(FileSystem.isdir)
-    def isdir(self, path):
-        return super().isdir(path)
-
-    @doc(FileSystem.isfile)
-    def isfile(self, path):
-        return super().isfile(path)
-
-    @doc(FileSystem.delete)
-    def delete(self, path, recursive=False):
-        return super().delete(path, recursive)
-
-    def mkdir(self, path, **kwargs):
-        """
-        Create directory in HDFS.
-
-        Parameters
-        ----------
-        path : str
-            Directory path to create, including any parent directories.
-
-        Notes
-        -----
-        libhdfs does not support create_parents=False, so we ignore this here
-        """
-        return super().mkdir(path)
-
-    @doc(FileSystem.rename)
-    def rename(self, path, new_path):
-        return super().rename(path, new_path)
-
-    @doc(FileSystem.exists)
-    def exists(self, path):
-        return super().exists(path)
-
-    def ls(self, path, detail=False):
-        """
-        Retrieve directory contents and metadata, if requested.
-
-        Parameters
-        ----------
-        path : str
-            HDFS path to retrieve contents of.
-        detail : bool, default False
-            If False, only return list of paths.
-
-        Returns
-        -------
-        result : list of dicts (detail=True) or strings (detail=False)
-        """
-        return super().ls(path, detail)
-
-    def walk(self, top_path):
-        """
-        Directory tree generator for HDFS, like os.walk.
-
-        Parameters
-        ----------
-        top_path : str
-            Root directory for tree traversal.
-
-        Returns
-        -------
-        Generator yielding 3-tuple (dirpath, dirnames, filename)
-        """
-        contents = self.ls(top_path, detail=True)
-
-        directories, files = _libhdfs_walk_files_dirs(top_path, contents)
-        yield top_path, directories, files
-        for dirname in directories:
-            yield from self.walk(self._path_join(top_path, dirname))
-
-
-def _maybe_set_hadoop_classpath():
-    import re
-
-    if re.search(r'hadoop-common[^/]+.jar', os.environ.get('CLASSPATH', '')):
-        return
-
-    if 'HADOOP_HOME' in os.environ:
-        if sys.platform != 'win32':
-            classpath = _derive_hadoop_classpath()
-        else:
-            hadoop_bin = '{}/bin/hadoop'.format(os.environ['HADOOP_HOME'])
-            classpath = _hadoop_classpath_glob(hadoop_bin)
-    else:
-        classpath = _hadoop_classpath_glob('hadoop')
-
-    os.environ['CLASSPATH'] = classpath.decode('utf-8')
-
-
-def _derive_hadoop_classpath():
-    import subprocess
-
-    find_args = ('find', '-L', os.environ['HADOOP_HOME'], '-name', '*.jar')
-    find = subprocess.Popen(find_args, stdout=subprocess.PIPE)
-    xargs_echo = subprocess.Popen(('xargs', 'echo'),
-                                  stdin=find.stdout,
-                                  stdout=subprocess.PIPE)
-    jars = subprocess.check_output(('tr', "' '", "':'"),
-                                   stdin=xargs_echo.stdout)
-    hadoop_conf = os.environ["HADOOP_CONF_DIR"] \
-        if "HADOOP_CONF_DIR" in os.environ \
-        else os.environ["HADOOP_HOME"] + "/etc/hadoop"
-    return (hadoop_conf + ":").encode("utf-8") + jars
-
-
-def _hadoop_classpath_glob(hadoop_bin):
-    import subprocess
-
-    hadoop_classpath_args = (hadoop_bin, 'classpath', '--glob')
-    return subprocess.check_output(hadoop_classpath_args)
-
-
-def _libhdfs_walk_files_dirs(top_path, contents):
-    files = []
-    directories = []
-    for c in contents:
-        scrubbed_name = posixpath.split(c['name'])[1]
-        if c['kind'] == 'file':
-            files.append(scrubbed_name)
-        else:
-            directories.append(scrubbed_name)
-
-    return directories, files
-
-
-def connect(host="default", port=0, user=None, kerb_ticket=None,
-            extra_conf=None):
-    """
-    DEPRECATED: Connect to an HDFS cluster.
-
-    All parameters are optional and should only be set if the defaults need
-    to be overridden.
-
-    Authentication should be automatic if the HDFS cluster uses Kerberos.
-    However, if a username is specified, then the ticket cache will likely
-    be required.
-
-    .. deprecated:: 2.0
-        ``pyarrow.hdfs.connect`` is deprecated,
-        please use ``pyarrow.fs.HadoopFileSystem`` instead.
-
-    Parameters
-    ----------
-    host : NameNode. Set to "default" for fs.defaultFS from core-site.xml.
-    port : NameNode's port. Set to 0 for default or logical (HA) nodes.
-    user : Username when connecting to HDFS; None implies login user.
-    kerb_ticket : Path to Kerberos ticket cache.
-    extra_conf : dict, default None
-      extra Key/Value pairs for config; Will override any
-      hdfs-site.xml properties
-
-    Notes
-    -----
-    The first time you call this method, it will take longer than usual due
-    to JNI spin-up time.
-
-    Returns
-    -------
-    filesystem : HadoopFileSystem
-    """
-    warnings.warn(
-        _DEPR_MSG.format("hdfs.connect", "2.0.0", "fs.HadoopFileSystem"),
-        FutureWarning, stacklevel=2
-    )
-    return _connect(
-        host=host, port=port, user=user, kerb_ticket=kerb_ticket,
-        extra_conf=extra_conf
-    )
-
-
-def _connect(host="default", port=0, user=None, kerb_ticket=None,
-             extra_conf=None):
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        fs = HadoopFileSystem(host=host, port=port, user=user,
-                              kerb_ticket=kerb_ticket,
-                              extra_conf=extra_conf)
-    return fs
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index b57980b3d68fd..7890bf4b2dd76 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -30,6 +30,7 @@ import warnings
 from io import BufferedIOBase, IOBase, TextIOBase, UnsupportedOperation
 from queue import Queue, Empty as QueueEmpty
 
+from pyarrow.lib cimport check_status, HaveLibHdfs
 from pyarrow.util import _is_path_like, _stringify_path
 
 
@@ -46,6 +47,18 @@ cdef extern from "Python.h":
     bytearray PyByteArray_FromStringAndSize(char *string, Py_ssize_t len)
 
 
+def have_libhdfs():
+    """
+    Return true if HDFS (HadoopFileSystem) library is set up correctly.
+    """
+    try:
+        with nogil:
+            check_status(HaveLibHdfs())
+        return True
+    except Exception:
+        return False
+
+
 def io_thread_count():
     """
     Return the number of threads to use for I/O operations.
diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 98a4b2a1138c7..69a1c9d19aae2 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -47,7 +47,6 @@
                               SortingColumn)
 from pyarrow.fs import (LocalFileSystem, FileSystem, FileType,
                         _resolve_filesystem_and_path, _ensure_filesystem)
-from pyarrow import filesystem as legacyfs
 from pyarrow.util import guid, _is_path_like, _stringify_path, _deprecate_api
 
 
@@ -309,7 +308,7 @@ def __init__(self, source, *, metadata=None, common_metadata=None,
         self._close_source = getattr(source, 'closed', True)
 
         filesystem, source = _resolve_filesystem_and_path(
-            source, filesystem, memory_map)
+            source, filesystem, memory_map=memory_map)
         if filesystem is not None:
             source = filesystem.open_input_file(source)
             self._close_source = True  # We opened it here, ensure we close it.
@@ -989,20 +988,13 @@ def __init__(self, where, schema, filesystem=None,
         # sure to close it when `self.close` is called.
         self.file_handle = None
 
-        filesystem, path = _resolve_filesystem_and_path(
-            where, filesystem, allow_legacy_filesystem=True
-        )
+        filesystem, path = _resolve_filesystem_and_path(where, filesystem)
         if filesystem is not None:
-            if isinstance(filesystem, legacyfs.FileSystem):
-                # legacy filesystem (eg custom subclass)
-                # TODO deprecate
-                sink = self.file_handle = filesystem.open(path, 'wb')
-            else:
-                # ARROW-10480: do not auto-detect compression.  While
-                # a filename like foo.parquet.gz is nonconforming, it
-                # shouldn't implicitly apply compression.
-                sink = self.file_handle = filesystem.open_output_stream(
-                    path, compression=None)
+            # ARROW-10480: do not auto-detect compression.  While
+            # a filename like foo.parquet.gz is nonconforming, it
+            # shouldn't implicitly apply compression.
+            sink = self.file_handle = filesystem.open_output_stream(
+                path, compression=None)
         else:
             sink = where
         self._metadata_collector = options.pop('metadata_collector', None)
@@ -1124,12 +1116,6 @@ def _get_pandas_index_columns(keyvalues):
 EXCLUDED_PARQUET_PATHS = {'_SUCCESS'}
 
 
-def _is_local_file_system(fs):
-    return isinstance(fs, LocalFileSystem) or isinstance(
-        fs, legacyfs.LocalFileSystem
-    )
-
-
 _read_docstring_common = """\
 read_dictionary : list, default None
     List of names or column paths (for nested types) to read directly
@@ -1306,7 +1292,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None,
         if (
             hasattr(path_or_paths, "__fspath__") and
             filesystem is not None and
-            not _is_local_file_system(filesystem)
+            not isinstance(filesystem, LocalFileSystem)
         ):
             raise TypeError(
                 "Path-like objects with __fspath__ must only be used with "
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 3c867776ac052..bc21d709ec2c8 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -25,7 +25,6 @@
 
 import pyarrow as pa
 from pyarrow import fs
-from pyarrow.filesystem import LocalFileSystem, FileSystem
 from pyarrow.tests import util
 from pyarrow.tests.parquet.common import (_check_roundtrip, _roundtrip_table,
                                           _test_dataframe)
@@ -259,11 +258,11 @@ def test_fspath(tempdir):
 
     # combined with non-local filesystem raises
     with pytest.raises(TypeError):
-        _read_table(fs_protocol_obj, filesystem=FileSystem())
+        _read_table(fs_protocol_obj, filesystem=fs.FileSystem())
 
 
 @pytest.mark.parametrize("filesystem", [
-    None, fs.LocalFileSystem(), LocalFileSystem._get_instance()
+    None, fs.LocalFileSystem()
 ])
 @pytest.mark.parametrize("name", ("data.parquet", "例.parquet"))
 def test_relative_paths(tempdir, filesystem, name):
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index b6e351bdef9a7..30dae05124f5d 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -26,11 +26,10 @@
 
 import pyarrow as pa
 import pyarrow.compute as pc
-from pyarrow import fs
-from pyarrow.filesystem import LocalFileSystem
+from pyarrow.fs import (FileSelector, FileSystem, LocalFileSystem,
+                        PyFileSystem, SubTreeFileSystem, FSSpecHandler)
 from pyarrow.tests import util
 from pyarrow.util import guid
-from pyarrow.vendored.version import Version
 
 try:
     import pyarrow.parquet as pq
@@ -63,7 +62,7 @@ def test_filesystem_uri(tempdir):
 
     # filesystem object
     result = pq.read_table(
-        path, filesystem=fs.LocalFileSystem())
+        path, filesystem=LocalFileSystem())
     assert result.equals(table)
 
     # filesystem URI
@@ -74,17 +73,17 @@ def test_filesystem_uri(tempdir):
 
 @pytest.mark.pandas
 def test_read_partitioned_directory(tempdir):
-    fs = LocalFileSystem._get_instance()
-    _partition_test_for_filesystem(fs, tempdir)
+    local = LocalFileSystem()
+    _partition_test_for_filesystem(local, tempdir)
 
 
 @pytest.mark.pandas
 def test_read_partitioned_columns_selection(tempdir):
     # ARROW-3861 - do not include partition columns in resulting table when
     # `columns` keyword was passed without those columns
-    fs = LocalFileSystem._get_instance()
+    local = LocalFileSystem()
     base_path = tempdir
-    _partition_test_for_filesystem(fs, base_path)
+    _partition_test_for_filesystem(local, base_path)
 
     dataset = pq.ParquetDataset(base_path)
     result = dataset.read(columns=["values"])
@@ -93,7 +92,7 @@ def test_read_partitioned_columns_selection(tempdir):
 
 @pytest.mark.pandas
 def test_filters_equivalency(tempdir):
-    fs = LocalFileSystem._get_instance()
+    local = LocalFileSystem()
     base_path = tempdir
 
     integer_keys = [0, 1]
@@ -112,12 +111,12 @@ def test_filters_equivalency(tempdir):
                            3),
     }, columns=['integer', 'string', 'boolean'])
 
-    _generate_partition_directories(fs, base_path, partition_spec, df)
+    _generate_partition_directories(local, base_path, partition_spec, df)
 
     # Old filters syntax:
     #  integer == 1 AND string != b AND boolean == True
     dataset = pq.ParquetDataset(
-        base_path, filesystem=fs,
+        base_path, filesystem=local,
         filters=[('integer', '=', 1), ('string', '!=', 'b'),
                  ('boolean', '==', 'True')],
     )
@@ -141,7 +140,7 @@ def test_filters_equivalency(tempdir):
         [('integer', '=', 0), ('boolean', '==', 'False')]
     ]
     dataset = pq.ParquetDataset(
-        base_path, filesystem=fs, filters=filters)
+        base_path, filesystem=local, filters=filters)
     table = dataset.read()
     result_df = table.to_pandas().reset_index(drop=True)
 
@@ -158,13 +157,13 @@ def test_filters_equivalency(tempdir):
     for filters in [[[('string', '==', b'1\0a')]],
                     [[('string', '==', '1\0a')]]]:
         dataset = pq.ParquetDataset(
-            base_path, filesystem=fs, filters=filters)
+            base_path, filesystem=local, filters=filters)
         assert dataset.read().num_rows == 0
 
 
 @pytest.mark.pandas
 def test_filters_cutoff_exclusive_integer(tempdir):
-    fs = LocalFileSystem._get_instance()
+    local = LocalFileSystem()
     base_path = tempdir
 
     integer_keys = [0, 1, 2, 3, 4]
@@ -178,10 +177,10 @@ def test_filters_cutoff_exclusive_integer(tempdir):
         'integers': np.array(integer_keys, dtype='i4'),
     }, columns=['index', 'integers'])
 
-    _generate_partition_directories(fs, base_path, partition_spec, df)
+    _generate_partition_directories(local, base_path, partition_spec, df)
 
     dataset = pq.ParquetDataset(
-        base_path, filesystem=fs,
+        base_path, filesystem=local,
         filters=[
             ('integers', '<', 4),
             ('integers', '>', 1),
@@ -204,7 +203,7 @@ def test_filters_cutoff_exclusive_integer(tempdir):
 )
 @pytest.mark.pandas
 def test_filters_cutoff_exclusive_datetime(tempdir):
-    fs = LocalFileSystem._get_instance()
+    local = LocalFileSystem()
     base_path = tempdir
 
     date_keys = [
@@ -224,10 +223,10 @@ def test_filters_cutoff_exclusive_datetime(tempdir):
         'dates': np.array(date_keys, dtype='datetime64'),
     }, columns=['index', 'dates'])
 
-    _generate_partition_directories(fs, base_path, partition_spec, df)
+    _generate_partition_directories(local, base_path, partition_spec, df)
 
     dataset = pq.ParquetDataset(
-        base_path, filesystem=fs,
+        base_path, filesystem=local,
         filters=[
             ('dates', '<', "2018-04-12"),
             ('dates', '>', "2018-04-10")
@@ -264,7 +263,7 @@ def test_filters_inclusive_datetime(tempdir):
 
 @pytest.mark.pandas
 def test_filters_inclusive_integer(tempdir):
-    fs = LocalFileSystem._get_instance()
+    local = LocalFileSystem()
     base_path = tempdir
 
     integer_keys = [0, 1, 2, 3, 4]
@@ -278,10 +277,10 @@ def test_filters_inclusive_integer(tempdir):
         'integers': np.array(integer_keys, dtype='i4'),
     }, columns=['index', 'integers'])
 
-    _generate_partition_directories(fs, base_path, partition_spec, df)
+    _generate_partition_directories(local, base_path, partition_spec, df)
 
     dataset = pq.ParquetDataset(
-        base_path, filesystem=fs,
+        base_path, filesystem=local,
         filters=[
             ('integers', '<=', 3),
             ('integers', '>=', 2),
@@ -298,7 +297,7 @@ def test_filters_inclusive_integer(tempdir):
 
 @pytest.mark.pandas
 def test_filters_inclusive_set(tempdir):
-    fs = LocalFileSystem._get_instance()
+    local = LocalFileSystem()
     base_path = tempdir
 
     integer_keys = [0, 1]
@@ -317,10 +316,10 @@ def test_filters_inclusive_set(tempdir):
                            3),
     }, columns=['integer', 'string', 'boolean'])
 
-    _generate_partition_directories(fs, base_path, partition_spec, df)
+    _generate_partition_directories(local, base_path, partition_spec, df)
 
     dataset = pq.ParquetDataset(
-        base_path, filesystem=fs,
+        base_path, filesystem=local,
         filters=[('string', 'in', 'ab')],
     )
     table = dataset.read()
@@ -331,7 +330,7 @@ def test_filters_inclusive_set(tempdir):
     assert 'c' not in result_df['string'].values
 
     dataset = pq.ParquetDataset(
-        base_path, filesystem=fs,
+        base_path, filesystem=local,
         filters=[('integer', 'in', [1]), ('string', 'in', ('a', 'b')),
                  ('boolean', 'not in', {'False'})],
     )
@@ -345,7 +344,7 @@ def test_filters_inclusive_set(tempdir):
 
 @pytest.mark.pandas
 def test_filters_invalid_pred_op(tempdir):
-    fs = LocalFileSystem._get_instance()
+    local = LocalFileSystem()
     base_path = tempdir
 
     integer_keys = [0, 1, 2, 3, 4]
@@ -359,26 +358,26 @@ def test_filters_invalid_pred_op(tempdir):
         'integers': np.array(integer_keys, dtype='i4'),
     }, columns=['index', 'integers'])
 
-    _generate_partition_directories(fs, base_path, partition_spec, df)
+    _generate_partition_directories(local, base_path, partition_spec, df)
 
     with pytest.raises(TypeError):
         pq.ParquetDataset(base_path,
-                          filesystem=fs,
+                          filesystem=local,
                           filters=[('integers', 'in', 3), ])
 
     with pytest.raises(ValueError):
         pq.ParquetDataset(base_path,
-                          filesystem=fs,
+                          filesystem=local,
                           filters=[('integers', '=<', 3), ])
 
     # Dataset API returns empty table
     dataset = pq.ParquetDataset(base_path,
-                                filesystem=fs,
+                                filesystem=local,
                                 filters=[('integers', 'in', set()), ])
     assert dataset.read().num_rows == 0
 
     dataset = pq.ParquetDataset(base_path,
-                                filesystem=fs,
+                                filesystem=local,
                                 filters=[('integers', '!=', {3})])
     with pytest.raises(NotImplementedError):
         assert dataset.read().num_rows == 0
@@ -388,7 +387,7 @@ def test_filters_invalid_pred_op(tempdir):
 def test_filters_invalid_column(tempdir):
     # ARROW-5572 - raise error on invalid name in filter specification
     # works with new dataset
-    fs = LocalFileSystem._get_instance()
+    local = LocalFileSystem()
     base_path = tempdir
 
     integer_keys = [0, 1, 2, 3, 4]
@@ -400,11 +399,11 @@ def test_filters_invalid_column(tempdir):
         'integers': np.array(integer_keys, dtype='i4'),
     }, columns=['index', 'integers'])
 
-    _generate_partition_directories(fs, base_path, partition_spec, df)
+    _generate_partition_directories(local, base_path, partition_spec, df)
 
     msg = r"No match for FieldRef.Name\(non_existent_column\)"
     with pytest.raises(ValueError, match=msg):
-        pq.ParquetDataset(base_path, filesystem=fs,
+        pq.ParquetDataset(base_path, filesystem=local,
                           filters=[('non_existent_column', '<', 3), ]).read()
 
 
@@ -419,7 +418,7 @@ def test_filters_invalid_column(tempdir):
 def test_filters_read_table(tempdir, filters, read_method):
     read = getattr(pq, read_method)
     # test that filters keyword is passed through in read_table
-    fs = LocalFileSystem._get_instance()
+    local = LocalFileSystem()
     base_path = tempdir
 
     integer_keys = [0, 1, 2, 3, 4]
@@ -434,9 +433,9 @@ def test_filters_read_table(tempdir, filters, read_method):
         'nested': np.array([{'a': i, 'b': str(i)} for i in range(N)])
     })
 
-    _generate_partition_directories(fs, base_path, partition_spec, df)
+    _generate_partition_directories(local, base_path, partition_spec, df)
 
-    kwargs = dict(filesystem=fs, filters=filters)
+    kwargs = dict(filesystem=local, filters=filters)
 
     table = read(base_path, **kwargs)
     assert table.num_rows == 3
@@ -445,7 +444,7 @@ def test_filters_read_table(tempdir, filters, read_method):
 @pytest.mark.pandas
 def test_partition_keys_with_underscores(tempdir):
     # ARROW-5666 - partition field values with underscores preserve underscores
-    fs = LocalFileSystem._get_instance()
+    local = LocalFileSystem()
     base_path = tempdir
 
     string_keys = ["2019_2", "2019_3"]
@@ -459,7 +458,7 @@ def test_partition_keys_with_underscores(tempdir):
         'year_week': np.array(string_keys, dtype='object'),
     }, columns=['index', 'year_week'])
 
-    _generate_partition_directories(fs, base_path, partition_spec, df)
+    _generate_partition_directories(local, base_path, partition_spec, df)
 
     dataset = pq.ParquetDataset(base_path)
     result = dataset.read()
@@ -499,26 +498,6 @@ def test_read_single_file_list(tempdir):
     assert result.equals(table)
 
 
-@pytest.mark.pandas
-@pytest.mark.s3
-def test_read_partitioned_directory_s3fs_wrapper(s3_example_s3fs):
-    import s3fs
-
-    from pyarrow.filesystem import S3FSWrapper
-
-    if Version(s3fs.__version__) >= Version("0.5"):
-        pytest.skip("S3FSWrapper no longer working for s3fs 0.5+")
-
-    fs, path = s3_example_s3fs
-    with pytest.warns(FutureWarning):
-        wrapper = S3FSWrapper(fs)
-    _partition_test_for_filesystem(wrapper, path)
-
-    # Check that we can auto-wrap
-    dataset = pq.ParquetDataset(path, filesystem=fs)
-    dataset.read()
-
-
 @pytest.mark.pandas
 @pytest.mark.s3
 def test_read_partitioned_directory_s3fs(s3_example_s3fs):
@@ -569,6 +548,9 @@ def _generate_partition_directories(fs, base_dir, partition_spec, df):
     # partition_spec : list of lists, e.g. [['foo', [0, 1, 2],
     #                                       ['bar', ['a', 'b', 'c']]
     # part_table : a pyarrow.Table to write to each partition
+    if not isinstance(fs, FileSystem):
+        fs = PyFileSystem(FSSpecHandler(fs))
+
     DEPTH = len(partition_spec)
 
     pathsep = getattr(fs, "pathsep", getattr(fs, "sep", "/"))
@@ -582,24 +564,27 @@ def _visit_level(base_dir, level, part_keys):
                 str(base_dir),
                 '{}={}'.format(name, value)
             ])
-            fs.mkdir(level_dir)
+            fs.create_dir(level_dir)
 
             if level == DEPTH - 1:
                 # Generate example data
+                from pyarrow.fs import FileType
+
                 file_path = pathsep.join([level_dir, guid()])
                 filtered_df = _filter_partition(df, this_part_keys)
                 part_table = pa.Table.from_pandas(filtered_df)
-                with fs.open(file_path, 'wb') as f:
+                with fs.open_output_stream(file_path) as f:
                     _write_table(part_table, f)
-                assert fs.exists(file_path)
+                assert fs.get_file_info(file_path).type != FileType.NotFound
+                assert fs.get_file_info(file_path).type == FileType.File
 
                 file_success = pathsep.join([level_dir, '_SUCCESS'])
-                with fs.open(file_success, 'wb') as f:
+                with fs.open_output_stream(file_success) as f:
                     pass
             else:
                 _visit_level(level_dir, level + 1, this_part_keys)
                 file_success = pathsep.join([level_dir, '_SUCCESS'])
-                with fs.open(file_success, 'wb') as f:
+                with fs.open_output_stream(file_success) as f:
                     pass
 
     _visit_level(base_dir, 0, [])
@@ -1009,15 +994,21 @@ def _test_write_to_dataset_no_partitions(base_path,
     output_table = pa.Table.from_pandas(output_df)
 
     if filesystem is None:
-        filesystem = LocalFileSystem._get_instance()
+        filesystem = LocalFileSystem()
+    elif not isinstance(filesystem, FileSystem):
+        filesystem = PyFileSystem(FSSpecHandler(filesystem))
 
     # Without partitions, append files to root_path
     n = 5
     for i in range(n):
         pq.write_to_dataset(output_table, base_path,
                             filesystem=filesystem)
-    output_files = [file for file in filesystem.ls(str(base_path))
-                    if file.endswith(".parquet")]
+
+    selector = FileSelector(str(base_path), allow_not_found=False,
+                            recursive=True)
+
+    infos = filesystem.get_file_info(selector)
+    output_files = [info for info in infos if info.path.endswith(".parquet")]
     assert len(output_files) == n
 
     # Deduplicated incoming DataFrame should match
@@ -1103,14 +1094,14 @@ def test_write_to_dataset_filesystem(tempdir):
     table = pa.Table.from_pandas(df)
     path = str(tempdir)
 
-    pq.write_to_dataset(table, path, filesystem=fs.LocalFileSystem())
+    pq.write_to_dataset(table, path, filesystem=LocalFileSystem())
     result = pq.read_table(path)
     assert result.equals(table)
 
 
 def _make_dataset_for_pickling(tempdir, N=100):
     path = tempdir / 'data.parquet'
-    fs = LocalFileSystem._get_instance()
+    local = LocalFileSystem()
 
     df = pd.DataFrame({
         'index': np.arange(N),
@@ -1127,11 +1118,11 @@ def _make_dataset_for_pickling(tempdir, N=100):
     assert reader.metadata.num_row_groups == num_groups
 
     metadata_path = tempdir / '_metadata'
-    with fs.open(metadata_path, 'wb') as f:
+    with local.open_output_stream(str(metadata_path)) as f:
         pq.write_metadata(table.schema, f)
 
     dataset = pq.ParquetDataset(
-        tempdir, filesystem=fs)
+        tempdir, filesystem=local)
 
     return dataset
 
@@ -1249,7 +1240,7 @@ def test_parquet_dataset_new_filesystem(tempdir):
     # Ensure we can pass new FileSystem object to ParquetDataset
     table = pa.table({'a': [1, 2, 3]})
     pq.write_table(table, tempdir / 'data.parquet')
-    filesystem = fs.SubTreeFileSystem(str(tempdir), fs.LocalFileSystem())
+    filesystem = SubTreeFileSystem(str(tempdir), LocalFileSystem())
     dataset = pq.ParquetDataset('.', filesystem=filesystem)
     result = dataset.read()
     assert result.equals(table)
diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py
index 16584684f5c7f..f4ee7529ae87d 100644
--- a/python/pyarrow/tests/parquet/test_parquet_writer.py
+++ b/python/pyarrow/tests/parquet/test_parquet_writer.py
@@ -19,7 +19,6 @@
 
 import pyarrow as pa
 from pyarrow import fs
-from pyarrow.filesystem import FileSystem, LocalFileSystem
 
 try:
     import pyarrow.parquet as pq
@@ -161,7 +160,6 @@ def test_parquet_writer_context_obj_with_exception(tempdir):
 @pytest.mark.pandas
 @pytest.mark.parametrize("filesystem", [
     None,
-    LocalFileSystem._get_instance(),
     fs.LocalFileSystem(),
 ])
 def test_parquet_writer_write_wrappers(tempdir, filesystem):
@@ -250,7 +248,6 @@ def check_chunk_size(data_size, chunk_size, expect_num_chunks):
 @pytest.mark.pandas
 @pytest.mark.parametrize("filesystem", [
     None,
-    LocalFileSystem._get_instance(),
     fs.LocalFileSystem(),
 ])
 def test_parquet_writer_filesystem_local(tempdir, filesystem):
@@ -330,46 +327,6 @@ def test_parquet_writer_filesystem_buffer_raises():
         )
 
 
-@pytest.mark.pandas
-def test_parquet_writer_with_caller_provided_filesystem():
-    out = pa.BufferOutputStream()
-
-    class CustomFS(FileSystem):
-        def __init__(self):
-            self.path = None
-            self.mode = None
-
-        def open(self, path, mode='rb'):
-            self.path = path
-            self.mode = mode
-            return out
-
-    fs = CustomFS()
-    fname = 'expected_fname.parquet'
-    df = _test_dataframe(100)
-    table = pa.Table.from_pandas(df, preserve_index=False)
-
-    with pq.ParquetWriter(fname, table.schema, filesystem=fs, version='2.6') \
-            as writer:
-        writer.write_table(table)
-
-    assert fs.path == fname
-    assert fs.mode == 'wb'
-    assert out.closed
-
-    buf = out.getvalue()
-    table_read = _read_table(pa.BufferReader(buf))
-    df_read = table_read.to_pandas()
-    tm.assert_frame_equal(df_read, df)
-
-    # Should raise ValueError when filesystem is passed with file-like object
-    with pytest.raises(ValueError) as err_info:
-        pq.ParquetWriter(pa.BufferOutputStream(), table.schema, filesystem=fs)
-        expected_msg = ("filesystem passed but where is file-like, so"
-                        " there is nothing to open with filesystem.")
-        assert str(err_info) == expected_msg
-
-
 def test_parquet_writer_store_schema(tempdir):
     table = pa.table({'a': [1, 2, 3]})
 
diff --git a/python/pyarrow/tests/test_filesystem.py b/python/pyarrow/tests/test_filesystem.py
deleted file mode 100644
index 9862c5990d72e..0000000000000
--- a/python/pyarrow/tests/test_filesystem.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pyarrow as pa
-from pyarrow import filesystem
-
-import os
-import pytest
-
-
-def test_filesystem_deprecated():
-    with pytest.warns(FutureWarning):
-        filesystem.LocalFileSystem()
-
-    with pytest.warns(FutureWarning):
-        filesystem.LocalFileSystem.get_instance()
-
-
-def test_filesystem_deprecated_toplevel():
-    with pytest.warns(FutureWarning):
-        pa.localfs
-
-    with pytest.warns(FutureWarning):
-        pa.FileSystem
-
-    with pytest.warns(FutureWarning):
-        pa.LocalFileSystem
-
-    with pytest.warns(FutureWarning):
-        pa.HadoopFileSystem
-
-
-def test_resolve_uri():
-    uri = "file:///home/user/myfile.parquet"
-    fs, path = filesystem.resolve_filesystem_and_path(uri)
-    assert isinstance(fs, filesystem.LocalFileSystem)
-    assert path == "/home/user/myfile.parquet"
-
-
-def test_resolve_local_path():
-    for uri in ['/home/user/myfile.parquet',
-                'myfile.parquet',
-                'my # file ? parquet',
-                'C:/Windows/myfile.parquet',
-                r'C:\\Windows\\myfile.parquet',
-                ]:
-        fs, path = filesystem.resolve_filesystem_and_path(uri)
-        assert isinstance(fs, filesystem.LocalFileSystem)
-        assert path == uri
-
-
-@pytest.mark.filterwarnings("ignore:pyarrow.filesystem.LocalFileSystem")
-def test_resolve_home_directory():
-    uri = '~/myfile.parquet'
-    fs, path = filesystem.resolve_filesystem_and_path(uri)
-    assert isinstance(fs, filesystem.LocalFileSystem)
-    assert path == os.path.expanduser(uri)
-
-    local_fs = filesystem.LocalFileSystem()
-    fs, path = filesystem.resolve_filesystem_and_path(uri, local_fs)
-    assert path == os.path.expanduser(uri)
diff --git a/python/pyarrow/tests/test_hdfs.py b/python/pyarrow/tests/test_hdfs.py
deleted file mode 100644
index 5b94c200f35de..0000000000000
--- a/python/pyarrow/tests/test_hdfs.py
+++ /dev/null
@@ -1,451 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import random
-from io import BytesIO
-from os.path import join as pjoin
-
-import numpy as np
-import pytest
-
-import pyarrow as pa
-from pyarrow.tests import util
-from pyarrow.tests.parquet.common import _test_dataframe
-from pyarrow.tests.parquet.test_dataset import (
-    _test_write_to_dataset_with_partitions,
-    _test_write_to_dataset_no_partitions
-)
-from pyarrow.util import guid
-
-try:
-    from pandas.testing import assert_frame_equal
-except ImportError:
-    pass
-
-
-# ----------------------------------------------------------------------
-# HDFS tests
-
-
-def check_libhdfs_present():
-    if not pa.have_libhdfs():
-        message = 'No libhdfs available on system'
-        if os.environ.get('PYARROW_HDFS_TEST_LIBHDFS_REQUIRE'):
-            pytest.fail(message)
-        else:
-            pytest.skip(message)
-
-
-def hdfs_test_client():
-    host = os.environ.get('ARROW_HDFS_TEST_HOST', 'default')
-    user = os.environ.get('ARROW_HDFS_TEST_USER', None)
-    try:
-        port = int(os.environ.get('ARROW_HDFS_TEST_PORT', 0))
-    except ValueError:
-        raise ValueError('Env variable ARROW_HDFS_TEST_PORT was not '
-                         'an integer')
-
-    with pytest.warns(FutureWarning):
-        return pa.hdfs.connect(host, port, user)
-
-
-@pytest.mark.hdfs
-class HdfsTestCases:
-
-    def _make_test_file(self, hdfs, test_name, test_path, test_data):
-        base_path = pjoin(self.tmp_path, test_name)
-        hdfs.mkdir(base_path)
-
-        full_path = pjoin(base_path, test_path)
-
-        with hdfs.open(full_path, 'wb') as f:
-            f.write(test_data)
-
-        return full_path
-
-    @classmethod
-    def setup_class(cls):
-        cls.check_driver()
-        cls.hdfs = hdfs_test_client()
-        cls.tmp_path = '/tmp/pyarrow-test-{}'.format(random.randint(0, 1000))
-        cls.hdfs.mkdir(cls.tmp_path)
-
-    @classmethod
-    def teardown_class(cls):
-        cls.hdfs.delete(cls.tmp_path, recursive=True)
-        cls.hdfs.close()
-
-    def test_pickle(self, pickle_module):
-        s = pickle_module.dumps(self.hdfs)
-        h2 = pickle_module.loads(s)
-        assert h2.is_open
-        assert h2.host == self.hdfs.host
-        assert h2.port == self.hdfs.port
-        assert h2.user == self.hdfs.user
-        assert h2.kerb_ticket == self.hdfs.kerb_ticket
-        # smoketest unpickled client works
-        h2.ls(self.tmp_path)
-
-    def test_cat(self):
-        path = pjoin(self.tmp_path, 'cat-test')
-
-        data = b'foobarbaz'
-        with self.hdfs.open(path, 'wb') as f:
-            f.write(data)
-
-        contents = self.hdfs.cat(path)
-        assert contents == data
-
-    def test_capacity_space(self):
-        capacity = self.hdfs.get_capacity()
-        space_used = self.hdfs.get_space_used()
-        disk_free = self.hdfs.df()
-
-        assert capacity > 0
-        assert capacity > space_used
-        assert disk_free == (capacity - space_used)
-
-    def test_close(self):
-        client = hdfs_test_client()
-        assert client.is_open
-        client.close()
-        assert not client.is_open
-
-        with pytest.raises(Exception):
-            client.ls('/')
-
-    def test_mkdir(self):
-        path = pjoin(self.tmp_path, 'test-dir/test-dir')
-        parent_path = pjoin(self.tmp_path, 'test-dir')
-
-        self.hdfs.mkdir(path)
-        assert self.hdfs.exists(path)
-
-        self.hdfs.delete(parent_path, recursive=True)
-        assert not self.hdfs.exists(path)
-
-    def test_mv_rename(self):
-        path = pjoin(self.tmp_path, 'mv-test')
-        new_path = pjoin(self.tmp_path, 'mv-new-test')
-
-        data = b'foobarbaz'
-        with self.hdfs.open(path, 'wb') as f:
-            f.write(data)
-
-        assert self.hdfs.exists(path)
-        self.hdfs.mv(path, new_path)
-        assert not self.hdfs.exists(path)
-        assert self.hdfs.exists(new_path)
-
-        assert self.hdfs.cat(new_path) == data
-
-        self.hdfs.rename(new_path, path)
-        assert self.hdfs.cat(path) == data
-
-    def test_info(self):
-        path = pjoin(self.tmp_path, 'info-base')
-        file_path = pjoin(path, 'ex')
-        self.hdfs.mkdir(path)
-
-        data = b'foobarbaz'
-        with self.hdfs.open(file_path, 'wb') as f:
-            f.write(data)
-
-        path_info = self.hdfs.info(path)
-        file_path_info = self.hdfs.info(file_path)
-
-        assert path_info['kind'] == 'directory'
-
-        assert file_path_info['kind'] == 'file'
-        assert file_path_info['size'] == len(data)
-
-    def test_exists_isdir_isfile(self):
-        dir_path = pjoin(self.tmp_path, 'info-base')
-        file_path = pjoin(dir_path, 'ex')
-        missing_path = pjoin(dir_path, 'this-path-is-missing')
-
-        self.hdfs.mkdir(dir_path)
-        with self.hdfs.open(file_path, 'wb') as f:
-            f.write(b'foobarbaz')
-
-        assert self.hdfs.exists(dir_path)
-        assert self.hdfs.exists(file_path)
-        assert not self.hdfs.exists(missing_path)
-
-        assert self.hdfs.isdir(dir_path)
-        assert not self.hdfs.isdir(file_path)
-        assert not self.hdfs.isdir(missing_path)
-
-        assert not self.hdfs.isfile(dir_path)
-        assert self.hdfs.isfile(file_path)
-        assert not self.hdfs.isfile(missing_path)
-
-    def test_disk_usage(self):
-        path = pjoin(self.tmp_path, 'disk-usage-base')
-        p1 = pjoin(path, 'p1')
-        p2 = pjoin(path, 'p2')
-
-        subdir = pjoin(path, 'subdir')
-        p3 = pjoin(subdir, 'p3')
-
-        if self.hdfs.exists(path):
-            self.hdfs.delete(path, True)
-
-        self.hdfs.mkdir(path)
-        self.hdfs.mkdir(subdir)
-
-        data = b'foobarbaz'
-
-        for file_path in [p1, p2, p3]:
-            with self.hdfs.open(file_path, 'wb') as f:
-                f.write(data)
-
-        assert self.hdfs.disk_usage(path) == len(data) * 3
-
-    def test_ls(self):
-        base_path = pjoin(self.tmp_path, 'ls-test')
-        self.hdfs.mkdir(base_path)
-
-        dir_path = pjoin(base_path, 'a-dir')
-        f1_path = pjoin(base_path, 'a-file-1')
-
-        self.hdfs.mkdir(dir_path)
-
-        f = self.hdfs.open(f1_path, 'wb')
-        f.write(b'a' * 10)
-
-        contents = sorted(self.hdfs.ls(base_path, False))
-        assert contents == [dir_path, f1_path]
-
-    def test_chmod_chown(self):
-        path = pjoin(self.tmp_path, 'chmod-test')
-        with self.hdfs.open(path, 'wb') as f:
-            f.write(b'a' * 10)
-
-    def test_download_upload(self):
-        base_path = pjoin(self.tmp_path, 'upload-test')
-
-        data = b'foobarbaz'
-        buf = BytesIO(data)
-        buf.seek(0)
-
-        self.hdfs.upload(base_path, buf)
-
-        out_buf = BytesIO()
-        self.hdfs.download(base_path, out_buf)
-        out_buf.seek(0)
-        assert out_buf.getvalue() == data
-
-    def test_file_context_manager(self):
-        path = pjoin(self.tmp_path, 'ctx-manager')
-
-        data = b'foo'
-        with self.hdfs.open(path, 'wb') as f:
-            f.write(data)
-
-        with self.hdfs.open(path, 'rb') as f:
-            assert f.size() == 3
-            result = f.read(10)
-            assert result == data
-
-    def test_open_not_exist(self):
-        path = pjoin(self.tmp_path, 'does-not-exist-123')
-
-        with pytest.raises(FileNotFoundError):
-            self.hdfs.open(path)
-
-    def test_open_write_error(self):
-        with pytest.raises((FileExistsError, IsADirectoryError)):
-            self.hdfs.open('/', 'wb')
-
-    def test_read_whole_file(self):
-        path = pjoin(self.tmp_path, 'read-whole-file')
-
-        data = b'foo' * 1000
-        with self.hdfs.open(path, 'wb') as f:
-            f.write(data)
-
-        with self.hdfs.open(path, 'rb') as f:
-            result = f.read()
-
-        assert result == data
-
-    def _write_multiple_hdfs_pq_files(self, tmpdir):
-        import pyarrow.parquet as pq
-        nfiles = 10
-        size = 5
-        test_data = []
-        for i in range(nfiles):
-            df = _test_dataframe(size, seed=i)
-
-            df['index'] = np.arange(i * size, (i + 1) * size)
-
-            # Hack so that we don't have a dtype cast in v1 files
-            df['uint32'] = df['uint32'].astype(np.int64)
-
-            path = pjoin(tmpdir, '{}.parquet'.format(i))
-
-            table = pa.Table.from_pandas(df, preserve_index=False)
-            with self.hdfs.open(path, 'wb') as f:
-                pq.write_table(table, f)
-
-            test_data.append(table)
-
-        expected = pa.concat_tables(test_data)
-        return expected
-
-    @pytest.mark.xfail(reason="legacy.FileSystem not supported with ParquetDataset "
-                       "due to legacy path being removed in PyArrow 15.0.0.",
-                       raises=TypeError)
-    @pytest.mark.pandas
-    @pytest.mark.parquet
-    def test_read_multiple_parquet_files(self):
-
-        tmpdir = pjoin(self.tmp_path, 'multi-parquet-' + guid())
-
-        self.hdfs.mkdir(tmpdir)
-
-        expected = self._write_multiple_hdfs_pq_files(tmpdir)
-        result = self.hdfs.read_parquet(tmpdir)
-
-        assert_frame_equal(
-            result.to_pandas().sort_values(by='index').reset_index(drop=True),
-            expected.to_pandas()
-        )
-
-    @pytest.mark.pandas
-    @pytest.mark.parquet
-    def test_read_multiple_parquet_files_with_uri(self):
-        import pyarrow.parquet as pq
-
-        tmpdir = pjoin(self.tmp_path, 'multi-parquet-uri-' + guid())
-
-        self.hdfs.mkdir(tmpdir)
-
-        expected = self._write_multiple_hdfs_pq_files(tmpdir)
-        path = _get_hdfs_uri(tmpdir)
-        result = pq.read_table(path)
-
-        assert_frame_equal(
-            result.to_pandas().sort_values(by='index').reset_index(drop=True),
-            expected.to_pandas()
-        )
-
-    @pytest.mark.xfail(reason="legacy.FileSystem not supported with ParquetDataset "
-                       "due to legacy path being removed in PyArrow 15.0.0.",
-                       raises=TypeError)
-    @pytest.mark.pandas
-    @pytest.mark.parquet
-    def test_read_write_parquet_files_with_uri(self):
-        import pyarrow.parquet as pq
-
-        tmpdir = pjoin(self.tmp_path, 'uri-parquet-' + guid())
-        self.hdfs.mkdir(tmpdir)
-        path = _get_hdfs_uri(pjoin(tmpdir, 'test.parquet'))
-
-        size = 5
-        df = _test_dataframe(size, seed=0)
-        # Hack so that we don't have a dtype cast in v1 files
-        df['uint32'] = df['uint32'].astype(np.int64)
-        table = pa.Table.from_pandas(df, preserve_index=False)
-
-        pq.write_table(table, path, filesystem=self.hdfs)
-
-        result = pq.read_table(path, filesystem=self.hdfs).to_pandas()
-
-        assert_frame_equal(result, df)
-
-    @pytest.mark.xfail(reason="legacy.FileSystem not supported with ParquetDataset "
-                       "due to legacy path being removed in PyArrow 15.0.0.",
-                       raises=TypeError)
-    @pytest.mark.parquet
-    @pytest.mark.pandas
-    def test_write_to_dataset_with_partitions(self):
-        tmpdir = pjoin(self.tmp_path, 'write-partitions-' + guid())
-        self.hdfs.mkdir(tmpdir)
-        _test_write_to_dataset_with_partitions(
-            tmpdir, filesystem=self.hdfs)
-
-    @pytest.mark.xfail(reason="legacy.FileSystem not supported with ParquetDataset "
-                       "due to legacy path being removed in PyArrow 15.0.0.",
-                       raises=TypeError)
-    @pytest.mark.parquet
-    @pytest.mark.pandas
-    def test_write_to_dataset_no_partitions(self):
-        tmpdir = pjoin(self.tmp_path, 'write-no_partitions-' + guid())
-        self.hdfs.mkdir(tmpdir)
-        _test_write_to_dataset_no_partitions(
-            tmpdir, filesystem=self.hdfs)
-
-
-class TestLibHdfs(HdfsTestCases):
-
-    @classmethod
-    def check_driver(cls):
-        check_libhdfs_present()
-
-    def test_orphaned_file(self):
-        hdfs = hdfs_test_client()
-        file_path = self._make_test_file(hdfs, 'orphaned_file_test', 'fname',
-                                         b'foobarbaz')
-
-        f = hdfs.open(file_path)
-        hdfs = None
-        f = None  # noqa
-
-
-def _get_hdfs_uri(path):
-    host = os.environ.get('ARROW_HDFS_TEST_HOST', 'localhost')
-    try:
-        port = int(os.environ.get('ARROW_HDFS_TEST_PORT', 0))
-    except ValueError:
-        raise ValueError('Env variable ARROW_HDFS_TEST_PORT was not '
-                         'an integer')
-    uri = "hdfs://{}:{}{}".format(host, port, path)
-
-    return uri
-
-
-@pytest.mark.hdfs
-@pytest.mark.pandas
-@pytest.mark.parquet
-@pytest.mark.fastparquet
-def test_fastparquet_read_with_hdfs():
-    check_libhdfs_present()
-    try:
-        import snappy  # noqa
-    except ImportError:
-        pytest.skip('fastparquet test requires snappy')
-
-    import pyarrow.parquet as pq
-    fastparquet = pytest.importorskip('fastparquet')
-
-    fs = hdfs_test_client()
-
-    df = util.make_dataframe()
-
-    table = pa.Table.from_pandas(df)
-
-    path = '/tmp/testing.parquet'
-    with fs.open(path, 'wb') as f:
-        pq.write_table(table, f)
-
-    parquet_file = fastparquet.ParquetFile(path, open_with=fs.open)
-
-    result = parquet_file.to_pandas()
-    assert_frame_equal(result, df)
diff --git a/python/setup.py b/python/setup.py
index 423de708e8813..798bd6b05fd0b 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -211,7 +211,6 @@ def initialize_options(self):
         '_s3fs',
         '_substrait',
         '_hdfs',
-        '_hdfsio',
         'gandiva']
 
     def _run_cmake(self):

From 8875485478a85e9a36c876ff3218d1d14b012712 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 4 Mar 2024 09:22:20 -0800
Subject: [PATCH 461/570] MINOR: [C#] Bump Grpc.Tools from 2.61.0 to 2.62.0 in
 /csharp (#40346)

Bumps [Grpc.Tools](https://github.com/grpc/grpc) from 2.61.0 to 2.62.0.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/grpc/grpc/commits">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Grpc.Tools&package-manager=nuget&previous-version=2.61.0&new-version=2.62.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index 95752b0f64858..ab938d83fea00 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -7,7 +7,7 @@
   <ItemGroup>
     <PackageReference Include="Google.Protobuf" Version="3.25.3" />
     <PackageReference Include="Grpc.Net.Client" Version="2.59.0" />
-    <PackageReference Include="Grpc.Tools" Version="2.61.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.62.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0'">

From c4e088a1d6227868e020c71d596970f35bb9e4c9 Mon Sep 17 00:00:00 2001
From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com>
Date: Tue, 5 Mar 2024 01:35:11 +0800
Subject: [PATCH 462/570] GH-40183: [C++] Fix cast function bind failed after
 add an alias name through AddAlias (#40200)

### Rationale for this change

Cast function bind failed after add a alias name through AddAlias.

### What changes are included in this PR?

Add a const `cast_function` which registered in `AddFunction` for check cast alias in `arrow::compute::GetFunction`.

### Are these changes tested?
Yes

### Are there any user-facing changes?
Yes, cast's alias name can also execute with expression system.

* GitHub Issue: #40183

Authored-by: hugo.zhang <hugo.zhang@openpie.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/expression_internal.h |  7 +++++--
 cpp/src/arrow/compute/expression_test.cc    | 11 +++++++++++
 cpp/src/arrow/compute/registry.cc           |  9 +++++++++
 cpp/src/arrow/compute/registry.h            |  5 +++++
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/compute/expression_internal.h b/cpp/src/arrow/compute/expression_internal.h
index 083756dc5fd39..2048ef966519f 100644
--- a/cpp/src/arrow/compute/expression_internal.h
+++ b/cpp/src/arrow/compute/expression_internal.h
@@ -278,9 +278,12 @@ struct FlattenedAssociativeChain {
 
 inline Result<std::shared_ptr<compute::Function>> GetFunction(
     const Expression::Call& call, compute::ExecContext* exec_context) {
-  if (call.function_name != "cast") {
-    return exec_context->func_registry()->GetFunction(call.function_name);
+  ARROW_ASSIGN_OR_RAISE(auto function,
+                        exec_context->func_registry()->GetFunction(call.function_name));
+  if (function.get() != exec_context->func_registry()->cast_function()) {
+    return function;
   }
+
   // XXX this special case is strange; why not make "cast" a ScalarFunction?
   const TypeHolder& to_type =
       ::arrow::internal::checked_cast<const compute::CastOptions&>(*call.options).to_type;
diff --git a/cpp/src/arrow/compute/expression_test.cc b/cpp/src/arrow/compute/expression_test.cc
index 38f8183dabcba..0c3403d3d5fe0 100644
--- a/cpp/src/arrow/compute/expression_test.cc
+++ b/cpp/src/arrow/compute/expression_test.cc
@@ -604,6 +604,17 @@ TEST(Expression, BindCall) {
                 add(cast(field_ref("i32"), float32()), literal(3.5F)));
 }
 
+TEST(Expression, BindWithAliasCasts) {
+  auto fm = GetFunctionRegistry();
+  EXPECT_OK(fm->AddAlias("alias_cast", "cast"));
+
+  auto expr = call("alias_cast", {field_ref("f1")}, CastOptions::Unsafe(arrow::int32()));
+  EXPECT_FALSE(expr.IsBound());
+
+  auto schema = arrow::schema({field("f1", decimal128(30, 3))});
+  ExpectBindsTo(expr, no_change, &expr, *schema);
+}
+
 TEST(Expression, BindWithDecimalArithmeticOps) {
   for (std::string arith_op : {"add", "subtract", "multiply", "divide"}) {
     auto expr = call(arith_op, {field_ref("d1"), field_ref("d2")});
diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc
index 7a54f78a03736..0f535eb373269 100644
--- a/cpp/src/arrow/compute/registry.cc
+++ b/cpp/src/arrow/compute/registry.cc
@@ -125,6 +125,8 @@ class FunctionRegistry::FunctionRegistryImpl {
            static_cast<int>(name_to_function_.size());
   }
 
+  const Function* cast_function() { return cast_function_; }
+
  private:
   // must not acquire mutex
   Status CanAddFunctionName(const std::string& name, bool allow_overwrite) {
@@ -169,6 +171,9 @@ class FunctionRegistry::FunctionRegistryImpl {
     RETURN_NOT_OK(CanAddFunctionName(name, allow_overwrite));
     if (add) {
       name_to_function_[name] = std::move(function);
+      if (name == "cast") {
+        cast_function_ = name_to_function_[name].get();
+      }
     }
     return Status::OK();
   }
@@ -205,6 +210,8 @@ class FunctionRegistry::FunctionRegistryImpl {
   std::mutex lock_;
   std::unordered_map<std::string, std::shared_ptr<Function>> name_to_function_;
   std::unordered_map<std::string, const FunctionOptionsType*> name_to_options_type_;
+
+  const Function* cast_function_;
 };
 
 std::unique_ptr<FunctionRegistry> FunctionRegistry::Make() {
@@ -268,6 +275,8 @@ Result<const FunctionOptionsType*> FunctionRegistry::GetFunctionOptionsType(
 
 int FunctionRegistry::num_functions() const { return impl_->num_functions(); }
 
+const Function* FunctionRegistry::cast_function() const { return impl_->cast_function(); }
+
 namespace internal {
 
 static std::unique_ptr<FunctionRegistry> CreateBuiltInRegistry() {
diff --git a/cpp/src/arrow/compute/registry.h b/cpp/src/arrow/compute/registry.h
index afd9f2007b6cc..f31c4c1ba5920 100644
--- a/cpp/src/arrow/compute/registry.h
+++ b/cpp/src/arrow/compute/registry.h
@@ -107,6 +107,11 @@ class ARROW_EXPORT FunctionRegistry {
   /// \brief The number of currently registered functions.
   int num_functions() const;
 
+  /// \brief The cast function object registered in AddFunction.
+  ///
+  /// Helpful for get cast function as needed.
+  const Function* cast_function() const;
+
  private:
   FunctionRegistry();
 

From 437f02e777154ae744604f917fb659875be8017f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 5 Mar 2024 08:35:44 +0900
Subject: [PATCH 463/570] MINOR: [Java] Bump org.assertj:assertj-core from
 3.24.2 to 3.25.3 in /java (#40352)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.assertj:assertj-core](https://github.com/assertj/assertj) from 3.24.2 to 3.25.3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/assertj/assertj/releases">org.assertj:assertj-core's releases</a>.</em></p>
<blockquote>
<h2>v.3.25.3</h2>
<h2>:bug: Bug Fixes</h2>
<ul>
<li>Lock <code>maven-clean-plugin</code> version for all modules</li>
</ul>
<h3>Core</h3>
<ul>
<li>Fix a performance regression in the recursive comparison related to <code>FieldLocation</code> <a href="https://redirect.github.com/assertj/assertj/issues/3350">#3350</a></li>
<li>Don't fail when the recursive comparison checks compared fields in collection elements <a href="https://redirect.github.com/assertj/assertj/issues/3349">#3349</a> (proper fix: <a href="https://redirect.github.com/assertj/assertj/issues/3354">#3354</a>)</li>
</ul>
<h2>:hammer: Dependency Upgrades</h2>
<ul>
<li>Upgrade to Flatten Maven Plugin 1.6.0 <a href="https://redirect.github.com/assertj/assertj/issues/3335">#3335</a></li>
<li>Upgrade to Groovy 4.0.18 <a href="https://redirect.github.com/assertj/assertj/issues/3347">#3347</a></li>
<li>Upgrade to Hibernate Core 6.4.2.Final <a href="https://redirect.github.com/assertj/assertj/issues/3338">#3338</a></li>
<li>Upgrade to Maven Surefire Report Plugin 3.2.5 <a href="https://redirect.github.com/assertj/assertj/issues/3330">#3330</a></li>
<li>Upgrade to PITest Maven 1.15.6 <a href="https://redirect.github.com/assertj/assertj/issues/3348">#3348</a></li>
<li>Upgrade to SpotBugs Maven Plugin 4.8.3.0 <a href="https://redirect.github.com/assertj/assertj/issues/3336">#3336</a></li>
<li>Upgrade to advanced-security/maven-dependency-submission-action to 4 <a href="https://redirect.github.com/assertj/assertj/issues/3346">#3346</a></li>
</ul>
<h2>:heart: Contributors</h2>
<p>Thanks to all the contributors who worked on this release:</p>
<p><a href="https://github.com/ash211"><code>@​ash211</code></a></p>
<h2>v3.25.2</h2>
<h2>:bug: Bug Fixes</h2>
<ul>
<li>Fix unresolvable Javadoc stylesheet URLs, simplify configuration <a href="https://redirect.github.com/assertj/assertj/issues/3324">#3324</a></li>
</ul>
<h3>Core</h3>
<ul>
<li>Fix missing configuration for <code>MatcherAssert</code> soft assertions</li>
<li>Make deprecation notice visible in <code>AbstractAssert#asList</code> <a href="https://redirect.github.com/assertj/assertj/issues/3327">#3327</a></li>
<li>Recursive comparison uses <code>equals</code> on root object when <code>useOverriddenEquals</code> is enabled <a href="https://redirect.github.com/assertj/assertj/issues/3320">#3320</a></li>
<li><code>satisfiesExactlyInAnyOrder</code> fails if <code>actual</code> overrides <code>equals</code> <a href="https://redirect.github.com/assertj/assertj/issues/3339">#3339</a></li>
<li>Avoid calling <code>actual.hashCode()</code> and <code>expected.hashCode()</code> in <code>DualValue</code> <a href="https://redirect.github.com/assertj/assertj/issues/3340">#3340</a></li>
<li>Recursive comparison checks for existence of fields in types that parameterize nested unordered iterables <a href="https://redirect.github.com/assertj/assertj/issues/3332">#3332</a></li>
</ul>
<h2>:hammer: Dependency Upgrades</h2>
<ul>
<li>Upgrade to EqualsVerifier 3.15.6 <a href="https://redirect.github.com/assertj/assertj/issues/3329">#3329</a></li>
<li>Upgrade to Maven Surefire Plugin 3.2.5 <a href="https://redirect.github.com/assertj/assertj/issues/3328">#3328</a></li>
<li>Upgrade to Spotless Maven Plugin 2.43.0 <a href="https://redirect.github.com/assertj/assertj/issues/3345">#3345</a></li>
</ul>
<h2>:heart: Contributors</h2>
<p>Thanks to all the contributors who worked on this release:</p>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/assertj/assertj/commit/cd72df8cf56e2b7dc6f560cc1066c3643920fe7f"><code>cd72df8</code></a> [maven-release-plugin] prepare release assertj-build-3.25.3</li>
<li><a href="https://github.com/assertj/assertj/commit/c9b3177ad8b133f3b71f95fe94a4f7d9e92d841c"><code>c9b3177</code></a> fix: missing license</li>
<li><a href="https://github.com/assertj/assertj/commit/ae62aca8688421652a00b7d8660db30bc4b4a6ad"><code>ae62aca</code></a> Ignore containers when checking compared fields existence in the recursive co...</li>
<li><a href="https://github.com/assertj/assertj/commit/bad16efa2bdb181983776b71ecbbcf83032609f5"><code>bad16ef</code></a> chore(deps-dev): bump org.hibernate.orm:hibernate-core from 6.4.2.Final to 6....</li>
<li><a href="https://github.com/assertj/assertj/commit/c7940570d5b3f5743ac221e303733aeb1c0e6527"><code>c794057</code></a> Fix a performance regression in the recursive comparison related to FieldLoca...</li>
<li><a href="https://github.com/assertj/assertj/commit/9ecb7f48689f5effaf601f637d9a75a4c5749693"><code>9ecb7f4</code></a> Lock <code>maven-clean-plugin</code> version for all modules</li>
<li><a href="https://github.com/assertj/assertj/commit/d42d40c7d2d35fade2c5da6d0a85d2d3bc4e79cf"><code>d42d40c</code></a> chore(deps): bump org.pitest:pitest-maven from 1.15.3 to 1.15.6 (<a href="https://redirect.github.com/assertj/assertj/issues/3348">#3348</a>)</li>
<li><a href="https://github.com/assertj/assertj/commit/5b32492639129a2cc01864515fe882c224534e20"><code>5b32492</code></a> chore(deps): bump org.apache.maven.plugins:maven-surefire-report-plugin from ...</li>
<li><a href="https://github.com/assertj/assertj/commit/e27cc02080179339948a5ec9ca8dabea5abff3dc"><code>e27cc02</code></a> chore(deps): bump org.codehaus.mojo:flatten-maven-plugin from 1.5.0 to 1.6.0 ...</li>
<li><a href="https://github.com/assertj/assertj/commit/97710916f7f750d0ee9a8857b978c7393386571c"><code>9771091</code></a> chore(deps): bump com.github.spotbugs:spotbugs-maven-plugin from 4.8.2.0 to 4...</li>
<li>Additional commits viewable in <a href="https://github.com/assertj/assertj/compare/assertj-build-3.24.2...assertj-build-3.25.3">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.assertj:assertj-core&package-manager=maven&previous-version=3.24.2&new-version=3.25.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index ea8e30bf500bf..32c042fefd426 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -647,7 +647,7 @@
       <dependency>
         <groupId>org.assertj</groupId>
         <artifactId>assertj-core</artifactId>
-        <version>3.24.2</version>
+        <version>3.25.3</version>
         <scope>test</scope>
       </dependency>
       <dependency>

From 9a970ac54fea89093523ca07a4eb3958dbd079e7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 5 Mar 2024 08:40:01 +0900
Subject: [PATCH 464/570] MINOR: [Java] Bump
 org.apache.drill.tools:drill-fmpp-maven-plugin from 1.5.0 to 1.21.1 in /java
 (#40349)

Bumps org.apache.drill.tools:drill-fmpp-maven-plugin from 1.5.0 to 1.21.1.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.drill.tools:drill-fmpp-maven-plugin&package-manager=maven&previous-version=1.5.0&new-version=1.21.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/vector/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index dde53e7e656bf..3638712ea2109 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -159,7 +159,7 @@
       <plugin> <!-- generate sources from fmpp -->
         <groupId>org.apache.drill.tools</groupId>
         <artifactId>drill-fmpp-maven-plugin</artifactId>
-        <version>1.5.0</version>
+        <version>1.21.1</version>
         <executions>
           <execution>
             <id>generate-fmpp</id>

From 4284649c1720c957f2dd6cf8502a77af0b2c146f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 5 Mar 2024 08:40:24 +0900
Subject: [PATCH 465/570] MINOR: [Java] Bump com.github.luben:zstd-jni from
 1.4.9-1 to 1.5.5-11 in /java (#40350)

Bumps [com.github.luben:zstd-jni](https://github.com/luben/zstd-jni) from 1.4.9-1 to 1.5.5-11.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/luben/zstd-jni/commit/a3c3d7df5067d2760f6639981d277ffcbfbee5f7"><code>a3c3d7d</code></a> v1.5.5-11</li>
<li><a href="https://github.com/luben/zstd-jni/commit/b2ad3834439375b12b0fd0c0b80788a2fe94f06b"><code>b2ad383</code></a> After an exception, only release the cctx lock once</li>
<li><a href="https://github.com/luben/zstd-jni/commit/9a5c4b2e579cd814dde16b69bd2d8bc8073fc69d"><code>9a5c4b2</code></a> Fix flaky test</li>
<li><a href="https://github.com/luben/zstd-jni/commit/475ed5d456fb31bbc04c7b1002a9c65ebc10de66"><code>475ed5d</code></a> More robust error handling</li>
<li><a href="https://github.com/luben/zstd-jni/commit/96be04a14b6c76a855fafdff2819f853cd683aeb"><code>96be04a</code></a> Handle Zstd parameter switches correctly</li>
<li><a href="https://github.com/luben/zstd-jni/commit/b55246e197ae65b3e203d57fcc4021b0845263d0"><code>b55246e</code></a> Improve docs</li>
<li><a href="https://github.com/luben/zstd-jni/commit/ae1ad5271c0b0401e343b90fcd8e7df045d0f795"><code>ae1ad52</code></a> Create sequence producer tests</li>
<li><a href="https://github.com/luben/zstd-jni/commit/e4ad211d5e0a86ec779b0dea0744abc83b09f861"><code>e4ad211</code></a> Improve javadocs</li>
<li><a href="https://github.com/luben/zstd-jni/commit/53e208720e25624a149429952dd2000c82e59593"><code>53e2087</code></a> Allocate seqprod state during registration, so that each compression context ...</li>
<li><a href="https://github.com/luben/zstd-jni/commit/220e9909015845935009392a6d11cd959aa88b1e"><code>220e990</code></a> Make <code>SequenceProducer</code> interface visible</li>
<li>Additional commits viewable in <a href="https://github.com/luben/zstd-jni/compare/v1.4.9-1...v1.5.5-11">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.github.luben:zstd-jni&package-manager=maven&previous-version=1.4.9-1&new-version=1.5.5-11)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/compression/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index d38ad405b94e8..0db3fae4653a6 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -47,7 +47,7 @@
     <dependency>
       <groupId>com.github.luben</groupId>
       <artifactId>zstd-jni</artifactId>
-      <version>1.4.9-1</version>
+      <version>1.5.5-11</version>
     </dependency>
   </dependencies>
 </project>

From b124ed29afc9a4d423fe0bbd817c21d2f9e182e8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 5 Mar 2024 08:40:53 +0900
Subject: [PATCH 466/570] MINOR: [Java] Bump org.apache.rat:apache-rat-plugin
 from 0.13 to 0.16.1 in /java (#40351)

Bumps org.apache.rat:apache-rat-plugin from 0.13 to 0.16.1.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.rat:apache-rat-plugin&package-manager=maven&previous-version=0.13&new-version=0.16.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/pom.xml b/java/pom.xml
index 32c042fefd426..f39e7882153a6 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -419,7 +419,7 @@
         <plugin>
           <groupId>org.apache.rat</groupId>
           <artifactId>apache-rat-plugin</artifactId>
-          <version>0.13</version>
+          <version>0.16.1</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>

From 5fd99efb413fc6e1796a3a39196f776c35077e1e Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 5 Mar 2024 10:02:30 +0900
Subject: [PATCH 467/570] GH-40327: [C++][Parquet] Add missing config.h include
 in key_management_test.cc (#40330)

### Rationale for this change

We need it for `ARROW_WITH_SNAPPY` and `ARROW_ENABLE_THREADING` definitions.

### What changes are included in this PR?

Add missing `#include "arrow/util/config.h"`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40327

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/parquet/encryption/key_management_test.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpp/src/parquet/encryption/key_management_test.cc b/cpp/src/parquet/encryption/key_management_test.cc
index 6f80ab42c9a96..28c96234f633a 100644
--- a/cpp/src/parquet/encryption/key_management_test.cc
+++ b/cpp/src/parquet/encryption/key_management_test.cc
@@ -22,12 +22,13 @@
 #include <thread>
 #include <unordered_map>
 
-#include <arrow/io/file.h>
 #include "arrow/filesystem/filesystem.h"
 #include "arrow/filesystem/localfs.h"
+#include "arrow/io/file.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/config.h"
 #include "arrow/util/logging.h"
 
 #include "parquet/encryption/crypto_factory.h"

From 2445975162905bd8d9a42ffc9cd0daa0e19d3251 Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Tue, 5 Mar 2024 10:04:20 +0900
Subject: [PATCH 468/570] GH-40334: [C++][Gandiva] Add missing OpenSSL
 dependency to encrypt_utils_test.cc (#40338)

### Rationale for this change

The OpenSSL library (`${GANDIVA_OPENSSL_LIBS}`) is needed for `cpp/src/gandiva/encrypt_utils_test.cc`.

### What changes are included in this PR?

Add OpenSSL dependency to `cpp/src/gandiva/encrypt_utils_test.cc`.

### Are these changes tested?

Yes. I did a build test for Gandiva.

### Are there any user-facing changes?

No.

* GitHub Issue: #40334

Authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/gandiva/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 0f28b0da82fed..3e63f88c3d276 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -269,7 +269,8 @@ add_gandiva_test(internals-test
                  interval_holder_test.cc
                  tests/test_util.cc
                  EXTRA_LINK_LIBS
-                 re2::re2)
+                 re2::re2
+                 ${GANDIVA_OPENSSL_LIBS})
 
 add_subdirectory(precompiled)
 add_subdirectory(tests)

From 4ce9a5edd2710fb8bf0c642fd0e3863b01c2ea20 Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 5 Mar 2024 08:56:25 +0100
Subject: [PATCH 469/570] GH-40153: [Python] Make `Tensor.__getbuffer__` work
 on 32-bit platforms (#40294)

### Rationale for this change

`Tensor.__getbuffer__` would silently assume that `Py_ssize_t` is the same width as `int64_t`, which is true only on 64-bit platforms.

### What changes are included in this PR?

Create an internal buffer of `Py_ssize_t` values mirroring a Tensor's shape and strides, to avoid relying on the aforementioned assumption.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.

* GitHub Issue: #40153

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/lib.pxd    |  2 ++
 python/pyarrow/tensor.pxi | 17 +++++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 48350212c2076..b1187a77c2a6e 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -295,6 +295,8 @@ cdef class Tensor(_Weakrefable):
 
     cdef readonly:
         DataType type
+        bytes _ssize_t_shape
+        bytes _ssize_t_strides
 
     cdef void init(self, const shared_ptr[CTensor]& sp_tensor)
 
diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi
index 1afce7f4a1015..6fb4fc99d7cbc 100644
--- a/python/pyarrow/tensor.pxi
+++ b/python/pyarrow/tensor.pxi
@@ -15,6 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+# Avoid name clash with `pa.struct` function
+import struct as _struct
+
 
 cdef class Tensor(_Weakrefable):
     """
@@ -40,6 +43,14 @@ cdef class Tensor(_Weakrefable):
         self.sp_tensor = sp_tensor
         self.tp = sp_tensor.get()
         self.type = pyarrow_wrap_data_type(self.tp.type())
+        self._ssize_t_shape = self._make_shape_or_strides_buffer(self.shape)
+        self._ssize_t_strides = self._make_shape_or_strides_buffer(self.strides)
+
+    def _make_shape_or_strides_buffer(self, values):
+        """
+        Make a bytes object holding an array of `values` cast to `Py_ssize_t`.
+        """
+        return _struct.pack(f"{len(values)}n", *values)
 
     def __repr__(self):
         return """<pyarrow.Tensor>
@@ -282,10 +293,8 @@ strides: {0.strides}""".format(self)
             buffer.readonly = 0
         else:
             buffer.readonly = 1
-        # NOTE: This assumes Py_ssize_t == int64_t, and that the shape
-        # and strides arrays lifetime is tied to the tensor's
-        buffer.shape = <Py_ssize_t *> &self.tp.shape()[0]
-        buffer.strides = <Py_ssize_t *> &self.tp.strides()[0]
+        buffer.shape = <Py_ssize_t *> cp.PyBytes_AsString(self._ssize_t_shape)
+        buffer.strides = <Py_ssize_t *> cp.PyBytes_AsString(self._ssize_t_strides)
         buffer.suboffsets = NULL
 
 
From 3ba6d286caad328b8572a3b9228045da8c8d2043 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Tue, 5 Mar 2024 09:15:42 +0100
Subject: [PATCH 470/570] GH-40059: [C++][Python] Basic conversion of
 RecordBatch to Arrow Tensor (#40064)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

There is no method currently in Arrow C++ to convert `Table` or `RecordBatch` to a `Tensor`. In https://github.com/apache/arrow/issues/40058 we are proposing to add the conversion and this PR starts with the basic implementation for `RecordBatch`.

### What changes are included in this PR?

Basic conversion `RecordBatch` → `Tensor` is added together with Python bindings. The implementation details are:

- One data type (all columns having for example an `int32` data type) support.
- No missing values support (only `NaN`).
- Column-major layout of the resulting `Tensor`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* Closes: #40059

Lead-authored-by: AlenkaF <frim.alenka@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Co-authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 cpp/src/arrow/record_batch.cc        |  92 +++++++++++
 cpp/src/arrow/record_batch.h         |   8 +
 cpp/src/arrow/record_batch_test.cc   | 229 +++++++++++++++++++++++++++
 python/pyarrow/includes/libarrow.pxd |   2 +
 python/pyarrow/table.pxi             |  14 ++
 python/pyarrow/tests/test_table.py   | 142 +++++++++++++++++
 6 files changed, 487 insertions(+)

diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index ca6b45af3d6b4..d23b2b584bc20 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -30,6 +30,7 @@
 #include "arrow/pretty_print.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
+#include "arrow/tensor.h"
 #include "arrow/type.h"
 #include "arrow/util/iterator.h"
 #include "arrow/util/logging.h"
@@ -247,6 +248,97 @@ Result<std::shared_ptr<StructArray>> RecordBatch::ToStructArray() const {
                                        /*offset=*/0);
 }
 
+template <typename DataType>
+inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out) {
+  using CType = typename arrow::TypeTraits<DataType>::CType;
+  auto* out_values = reinterpret_cast<CType*>(out);
+
+  // Loop through all of the columns
+  for (int i = 0; i < batch.num_columns(); ++i) {
+    const auto* in_values = batch.column(i)->data()->GetValues<CType>(1);
+
+    // Copy data of each column
+    memcpy(out_values, in_values, sizeof(CType) * batch.num_rows());
+    out_values += batch.num_rows();
+  }  // End loop through columns
+}
+
+Result<std::shared_ptr<Tensor>> RecordBatch::ToTensor(MemoryPool* pool) const {
+  if (num_columns() == 0) {
+    return Status::TypeError(
+        "Conversion to Tensor for RecordBatches without columns/schema is not "
+        "supported.");
+  }
+  const auto& type = column(0)->type();
+  // Check for supported data types
+  if (!is_integer(type->id()) && !is_floating(type->id())) {
+    return Status::TypeError("DataType is not supported: ", type->ToString());
+  }
+  // Check for uniform data type
+  // Check for no validity bitmap of each field
+  for (int i = 0; i < num_columns(); ++i) {
+    if (column(i)->null_count() > 0) {
+      return Status::TypeError("Can only convert a RecordBatch with no nulls.");
+    }
+    if (column(i)->type() != type) {
+      return Status::TypeError("Can only convert a RecordBatch with uniform data type.");
+    }
+  }
+
+  // Allocate memory
+  ARROW_ASSIGN_OR_RAISE(
+      std::shared_ptr<Buffer> result,
+      AllocateBuffer(type->bit_width() * num_columns() * num_rows(), pool));
+  // Copy data
+  switch (type->id()) {
+    case Type::UINT8:
+      ConvertColumnsToTensor<UInt8Type>(*this, result->mutable_data());
+      break;
+    case Type::UINT16:
+    case Type::HALF_FLOAT:
+      ConvertColumnsToTensor<UInt16Type>(*this, result->mutable_data());
+      break;
+    case Type::UINT32:
+      ConvertColumnsToTensor<UInt32Type>(*this, result->mutable_data());
+      break;
+    case Type::UINT64:
+      ConvertColumnsToTensor<UInt64Type>(*this, result->mutable_data());
+      break;
+    case Type::INT8:
+      ConvertColumnsToTensor<Int8Type>(*this, result->mutable_data());
+      break;
+    case Type::INT16:
+      ConvertColumnsToTensor<Int16Type>(*this, result->mutable_data());
+      break;
+    case Type::INT32:
+      ConvertColumnsToTensor<Int32Type>(*this, result->mutable_data());
+      break;
+    case Type::INT64:
+      ConvertColumnsToTensor<Int64Type>(*this, result->mutable_data());
+      break;
+    case Type::FLOAT:
+      ConvertColumnsToTensor<FloatType>(*this, result->mutable_data());
+      break;
+    case Type::DOUBLE:
+      ConvertColumnsToTensor<DoubleType>(*this, result->mutable_data());
+      break;
+    default:
+      return Status::TypeError("DataType is not supported: ", type->ToString());
+  }
+
+  // Construct Tensor object
+  const auto& fixed_width_type =
+      internal::checked_cast<const FixedWidthType&>(*column(0)->type());
+  std::vector<int64_t> shape = {num_rows(), num_columns()};
+  std::vector<int64_t> strides;
+  ARROW_RETURN_NOT_OK(
+      internal::ComputeColumnMajorStrides(fixed_width_type, shape, &strides));
+  ARROW_ASSIGN_OR_RAISE(auto tensor,
+                        Tensor::Make(type, std::move(result), shape, strides));
+
+  return tensor;
+}
+
 const std::string& RecordBatch::column_name(int i) const {
   return schema_->field(i)->name();
 }
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index 79f93a7b5997f..8a2c1ba6d7497 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -80,6 +80,14 @@ class ARROW_EXPORT RecordBatch {
   /// in the resulting struct array.
   Result<std::shared_ptr<StructArray>> ToStructArray() const;
 
+  /// \brief Convert record batch with one data type to Tensor
+  ///
+  /// Create a Tensor object with shape (number of rows, number of columns) and
+  /// strides (type size in bytes, type size in bytes * number of rows).
+  /// Generated Tensor will have column-major layout.
+  Result<std::shared_ptr<Tensor>> ToTensor(
+      MemoryPool* pool = default_memory_pool()) const;
+
   /// \brief Construct record batch from struct array
   ///
   /// This constructs a record batch using the child arrays of the given
diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc
index db3a2d3def73f..05a20aa487abc 100644
--- a/cpp/src/arrow/record_batch_test.cc
+++ b/cpp/src/arrow/record_batch_test.cc
@@ -31,6 +31,7 @@
 #include "arrow/chunked_array.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
+#include "arrow/tensor.h"
 #include "arrow/testing/builder.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
@@ -592,4 +593,232 @@ TEST_F(TestRecordBatch, ConcatenateRecordBatches) {
   ASSERT_BATCHES_EQUAL(*batch, *null_batch);
 }
 
+TEST_F(TestRecordBatch, ToTensorUnsupported) {
+  const int length = 9;
+
+  // Mixed data type
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", int64());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1};
+  auto schema = ::arrow::schema(fields);
+
+  auto a0 = ArrayFromJSON(int32(), "[1, 2, 3, 4, 5, 6, 7, 8, 9]");
+  auto a1 = ArrayFromJSON(int64(), "[10, 20, 30, 40, 50, 60, 70, 80, 90]");
+
+  auto batch = RecordBatch::Make(schema, length, {a0, a1});
+
+  ASSERT_RAISES_WITH_MESSAGE(
+      TypeError, "Type error: Can only convert a RecordBatch with uniform data type.",
+      batch->ToTensor());
+
+  // Unsupported data type
+  auto f2 = field("f2", utf8());
+
+  std::vector<std::shared_ptr<Field>> fields_1 = {f2};
+  auto schema_2 = ::arrow::schema(fields_1);
+
+  auto a2 = ArrayFromJSON(utf8(), R"(["a", "b", "c", "a", "b", "c", "a", "b", "c"])");
+  auto batch_2 = RecordBatch::Make(schema_2, length, {a2});
+
+  ASSERT_RAISES_WITH_MESSAGE(
+      TypeError, "Type error: DataType is not supported: " + a2->type()->ToString(),
+      batch_2->ToTensor());
+}
+
+TEST_F(TestRecordBatch, ToTensorUnsupportedMissing) {
+  const int length = 9;
+
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", int32());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1};
+  auto schema = ::arrow::schema(fields);
+
+  auto a0 = ArrayFromJSON(int32(), "[1, 2, 3, 4, 5, 6, 7, 8, 9]");
+  auto a1 = ArrayFromJSON(int32(), "[10, 20, 30, 40, null, 60, 70, 80, 90]");
+
+  auto batch = RecordBatch::Make(schema, length, {a0, a1});
+
+  ASSERT_RAISES_WITH_MESSAGE(TypeError,
+                             "Type error: Can only convert a RecordBatch with no nulls.",
+                             batch->ToTensor());
+}
+
+TEST_F(TestRecordBatch, ToTensorEmptyBatch) {
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", int32());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1};
+  auto schema = ::arrow::schema(fields);
+
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<RecordBatch> empty,
+                       RecordBatch::MakeEmpty(schema));
+
+  ASSERT_OK_AND_ASSIGN(auto tensor, empty->ToTensor());
+  ASSERT_OK(tensor->Validate());
+
+  const std::vector<int64_t> strides = {4, 4};
+  const std::vector<int64_t> shape = {0, 2};
+
+  EXPECT_EQ(strides, tensor->strides());
+  EXPECT_EQ(shape, tensor->shape());
+
+  auto batch_no_columns =
+      RecordBatch::Make(::arrow::schema({}), 10, std::vector<std::shared_ptr<Array>>{});
+
+  ASSERT_RAISES_WITH_MESSAGE(TypeError,
+                             "Type error: Conversion to Tensor for RecordBatches without "
+                             "columns/schema is not supported.",
+                             batch_no_columns->ToTensor());
+}
+
+template <typename DataType>
+void CheckTensor(const std::shared_ptr<Tensor>& tensor, const int size,
+                 const std::vector<int64_t> shape, const std::vector<int64_t> f_strides) {
+  EXPECT_EQ(size, tensor->size());
+  EXPECT_EQ(TypeTraits<DataType>::type_singleton(), tensor->type());
+  EXPECT_EQ(shape, tensor->shape());
+  EXPECT_EQ(f_strides, tensor->strides());
+  EXPECT_FALSE(tensor->is_row_major());
+  EXPECT_TRUE(tensor->is_column_major());
+  EXPECT_TRUE(tensor->is_contiguous());
+}
+
+TEST_F(TestRecordBatch, ToTensorSupportedNaN) {
+  const int length = 9;
+
+  auto f0 = field("f0", float32());
+  auto f1 = field("f1", float32());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1};
+  auto schema = ::arrow::schema(fields);
+
+  auto a0 = ArrayFromJSON(float32(), "[NaN, 2, 3, 4, 5, 6, 7, 8, 9]");
+  auto a1 = ArrayFromJSON(float32(), "[10, 20, 30, 40, NaN, 60, 70, 80, 90]");
+
+  auto batch = RecordBatch::Make(schema, length, {a0, a1});
+
+  ASSERT_OK_AND_ASSIGN(auto tensor, batch->ToTensor());
+  ASSERT_OK(tensor->Validate());
+
+  std::vector<int64_t> shape = {9, 2};
+  const int64_t f32_size = sizeof(float);
+  std::vector<int64_t> f_strides = {f32_size, f32_size * shape[0]};
+  std::vector<float> f_values = {
+      static_cast<float>(NAN), 2,  3,  4,  5, 6, 7, 8, 9, 10, 20, 30, 40,
+      static_cast<float>(NAN), 60, 70, 80, 90};
+  auto data = Buffer::Wrap(f_values);
+
+  std::shared_ptr<Tensor> tensor_expected;
+  ASSERT_OK_AND_ASSIGN(tensor_expected, Tensor::Make(float32(), data, shape, f_strides));
+
+  EXPECT_FALSE(tensor_expected->Equals(*tensor));
+  EXPECT_TRUE(tensor_expected->Equals(*tensor, EqualOptions().nans_equal(true)));
+
+  CheckTensor<FloatType>(tensor, 18, shape, f_strides);
+}
+
+template <typename DataType>
+class TestBatchToTensor : public ::testing::Test {};
+
+TYPED_TEST_SUITE_P(TestBatchToTensor);
+
+TYPED_TEST_P(TestBatchToTensor, SupportedTypes) {
+  using DataType = TypeParam;
+  using c_data_type = typename DataType::c_type;
+  const int unit_size = sizeof(c_data_type);
+
+  const int length = 9;
+
+  auto f0 = field("f0", TypeTraits<DataType>::type_singleton());
+  auto f1 = field("f1", TypeTraits<DataType>::type_singleton());
+  auto f2 = field("f2", TypeTraits<DataType>::type_singleton());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1, f2};
+  auto schema = ::arrow::schema(fields);
+
+  auto a0 = ArrayFromJSON(TypeTraits<DataType>::type_singleton(),
+                          "[1, 2, 3, 4, 5, 6, 7, 8, 9]");
+  auto a1 = ArrayFromJSON(TypeTraits<DataType>::type_singleton(),
+                          "[10, 20, 30, 40, 50, 60, 70, 80, 90]");
+  auto a2 = ArrayFromJSON(TypeTraits<DataType>::type_singleton(),
+                          "[100, 100, 100, 100, 100, 100, 100, 100, 100]");
+
+  auto batch = RecordBatch::Make(schema, length, {a0, a1, a2});
+
+  ASSERT_OK_AND_ASSIGN(auto tensor, batch->ToTensor());
+  ASSERT_OK(tensor->Validate());
+
+  std::vector<int64_t> shape = {9, 3};
+  std::vector<int64_t> f_strides = {unit_size, unit_size * shape[0]};
+  std::vector<c_data_type> f_values = {1,   2,   3,   4,   5,   6,   7,   8,   9,
+                                       10,  20,  30,  40,  50,  60,  70,  80,  90,
+                                       100, 100, 100, 100, 100, 100, 100, 100, 100};
+  auto data = Buffer::Wrap(f_values);
+
+  std::shared_ptr<Tensor> tensor_expected;
+  ASSERT_OK_AND_ASSIGN(
+      tensor_expected,
+      Tensor::Make(TypeTraits<DataType>::type_singleton(), data, shape, f_strides));
+
+  EXPECT_TRUE(tensor_expected->Equals(*tensor));
+  CheckTensor<DataType>(tensor, 27, shape, f_strides);
+
+  // Test offsets
+  auto batch_slice = batch->Slice(1);
+
+  ASSERT_OK_AND_ASSIGN(auto tensor_sliced, batch_slice->ToTensor());
+  ASSERT_OK(tensor_sliced->Validate());
+
+  std::vector<int64_t> shape_sliced = {8, 3};
+  std::vector<int64_t> f_strides_sliced = {unit_size, unit_size * shape_sliced[0]};
+  std::vector<c_data_type> f_values_sliced = {2,   3,   4,   5,   6,   7,   8,   9,
+                                              20,  30,  40,  50,  60,  70,  80,  90,
+                                              100, 100, 100, 100, 100, 100, 100, 100};
+  auto data_sliced = Buffer::Wrap(f_values_sliced);
+
+  std::shared_ptr<Tensor> tensor_expected_sliced;
+  ASSERT_OK_AND_ASSIGN(tensor_expected_sliced,
+                       Tensor::Make(TypeTraits<DataType>::type_singleton(), data_sliced,
+                                    shape_sliced, f_strides_sliced));
+
+  EXPECT_TRUE(tensor_expected_sliced->Equals(*tensor_sliced));
+  CheckTensor<DataType>(tensor_expected_sliced, 24, shape_sliced, f_strides_sliced);
+
+  auto batch_slice_1 = batch->Slice(1, 5);
+
+  ASSERT_OK_AND_ASSIGN(auto tensor_sliced_1, batch_slice_1->ToTensor());
+  ASSERT_OK(tensor_sliced_1->Validate());
+
+  std::vector<int64_t> shape_sliced_1 = {5, 3};
+  std::vector<int64_t> f_strides_sliced_1 = {unit_size, unit_size * shape_sliced_1[0]};
+  std::vector<c_data_type> f_values_sliced_1 = {
+      2, 3, 4, 5, 6, 20, 30, 40, 50, 60, 100, 100, 100, 100, 100,
+  };
+  auto data_sliced_1 = Buffer::Wrap(f_values_sliced_1);
+
+  std::shared_ptr<Tensor> tensor_expected_sliced_1;
+  ASSERT_OK_AND_ASSIGN(tensor_expected_sliced_1,
+                       Tensor::Make(TypeTraits<DataType>::type_singleton(), data_sliced_1,
+                                    shape_sliced_1, f_strides_sliced_1));
+
+  EXPECT_TRUE(tensor_expected_sliced_1->Equals(*tensor_sliced_1));
+  CheckTensor<DataType>(tensor_expected_sliced_1, 15, shape_sliced_1, f_strides_sliced_1);
+}
+
+REGISTER_TYPED_TEST_SUITE_P(TestBatchToTensor, SupportedTypes);
+
+INSTANTIATE_TYPED_TEST_SUITE_P(UInt8, TestBatchToTensor, UInt8Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(UInt16, TestBatchToTensor, UInt16Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(UInt32, TestBatchToTensor, UInt32Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(UInt64, TestBatchToTensor, UInt64Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Int8, TestBatchToTensor, Int8Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Int16, TestBatchToTensor, Int16Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Int32, TestBatchToTensor, Int32Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Int64, TestBatchToTensor, Int64Type);
+INSTANTIATE_TYPED_TEST_SUITE_P(Float16, TestBatchToTensor, HalfFloatType);
+INSTANTIATE_TYPED_TEST_SUITE_P(Float32, TestBatchToTensor, FloatType);
+INSTANTIATE_TYPED_TEST_SUITE_P(Float64, TestBatchToTensor, DoubleType);
+
 }  // namespace arrow
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index bc9d05ddbbc37..5ae0f2e0b55b9 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -977,6 +977,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CRecordBatch] Slice(int64_t offset)
         shared_ptr[CRecordBatch] Slice(int64_t offset, int64_t length)
 
+        CResult[shared_ptr[CTensor]] ToTensor() const
+
     cdef cppclass CRecordBatchWithMetadata" arrow::RecordBatchWithMetadata":
         shared_ptr[CRecordBatch] batch
         # The struct in C++ does not actually have these two `const` qualifiers, but
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index d7f7895b538e8..dfd549befc2fe 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -3010,6 +3010,20 @@ cdef class RecordBatch(_Tabular):
                 <CResult[shared_ptr[CArray]]>deref(c_record_batch).ToStructArray())
         return pyarrow_wrap_array(c_array)
 
+    def to_tensor(self):
+        """
+        Convert to a :class:`~pyarrow.Tensor`.
+        """
+        cdef:
+            shared_ptr[CRecordBatch] c_record_batch
+            shared_ptr[CTensor] c_tensor
+
+        c_record_batch = pyarrow_unwrap_batch(self)
+        with nogil:
+            c_tensor = GetResultValue(
+                <CResult[shared_ptr[CTensor]]>deref(c_record_batch).ToTensor())
+        return pyarrow_wrap_tensor(c_tensor)
+
     def _export_to_c(self, out_ptr, out_schema_ptr=0):
         """
         Export to a C ArrowArray struct, given its pointer.
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index f0fd5518de067..87b17c35011c4 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -962,6 +962,148 @@ def test_table_to_struct_array_with_max_chunksize():
     ))
 
 
+def check_tensors(tensor, expected_tensor, type, size):
+    assert tensor.equals(expected_tensor)
+    assert tensor.size == size
+    assert tensor.type == type
+    assert tensor.shape == expected_tensor.shape
+    assert tensor.strides == expected_tensor.strides
+
+
+@pytest.mark.parametrize('typ', [
+    np.uint8, np.uint16, np.uint32, np.uint64,
+    np.int8, np.int16, np.int32, np.int64,
+    np.float32, np.float64,
+])
+def test_recordbatch_to_tensor(typ):
+    arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+    arr3 = [100, 100, 100, 100, 100, 100, 100, 100, 100]
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array(arr1, type=pa.from_numpy_dtype(typ)),
+            pa.array(arr2, type=pa.from_numpy_dtype(typ)),
+            pa.array(arr3, type=pa.from_numpy_dtype(typ)),
+        ], ["a", "b", "c"]
+    )
+    result = batch.to_tensor()
+
+    x = np.array([arr1, arr2, arr3], typ).transpose()
+    expected = pa.Tensor.from_numpy(x)
+
+    check_tensors(result, expected, pa.from_numpy_dtype(typ), 27)
+
+    # Test offset
+    batch1 = batch.slice(1)
+    result = batch1.to_tensor()
+
+    arr1 = [2, 3, 4, 5, 6, 7, 8, 9]
+    arr2 = [20, 30, 40, 50, 60, 70, 80, 90]
+    arr3 = [100, 100, 100, 100, 100, 100, 100, 100]
+
+    x = np.array([arr1, arr2, arr3], typ).transpose()
+    expected = pa.Tensor.from_numpy(x)
+
+    check_tensors(result, expected, pa.from_numpy_dtype(typ), 24)
+
+    batch2 = batch.slice(1, 5)
+    result = batch2.to_tensor()
+
+    arr1 = [2, 3, 4, 5, 6]
+    arr2 = [20, 30, 40, 50, 60]
+    arr3 = [100, 100, 100, 100, 100]
+
+    x = np.array([arr1, arr2, arr3], typ).transpose()
+    expected = pa.Tensor.from_numpy(x)
+
+    check_tensors(result, expected, pa.from_numpy_dtype(typ), 15)
+
+
+def test_recordbatch_to_tensor_nan():
+    arr1 = [1, 2, 3, 4, np.nan, 6, 7, 8, 9]
+    arr2 = [10, 20, 30, 40, 50, 60, 70, np.nan, 90]
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array(arr1, type=pa.float32()),
+            pa.array(arr2, type=pa.float32()),
+        ], ["a", "b"]
+    )
+    result = batch.to_tensor()
+
+    x = np.array([arr1, arr2], np.float32).transpose()
+    expected = pa.Tensor.from_numpy(x)
+
+    np.testing.assert_equal(result.to_numpy(), x)
+    assert result.size == 18
+    assert result.type == pa.float32()
+    assert result.shape == expected.shape
+    assert result.strides == expected.strides
+
+
+def test_recordbatch_to_tensor_null():
+    arr1 = [1, 2, 3, 4, None, 6, 7, 8, 9]
+    arr2 = [10, 20, 30, 40, 50, 60, 70, None, 90]
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array(arr1, type=pa.float32()),
+            pa.array(arr2, type=pa.float32()),
+        ], ["a", "b"]
+    )
+    with pytest.raises(
+        pa.ArrowTypeError,
+        match="Can only convert a RecordBatch with no nulls."
+    ):
+        batch.to_tensor()
+
+
+def test_recordbatch_to_tensor_empty():
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array([], type=pa.float32()),
+            pa.array([], type=pa.float32()),
+        ], ["a", "b"]
+    )
+    result = batch.to_tensor()
+
+    x = np.array([[], []], np.float32).transpose()
+    expected = pa.Tensor.from_numpy(x)
+
+    assert result.size == expected.size
+    assert result.type == pa.float32()
+    assert result.shape == expected.shape
+    assert result.strides == (4, 4)
+
+
+def test_recordbatch_to_tensor_unsupported():
+    # Mixed data type
+    arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
+    arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array(arr1, type=pa.int32()),
+            pa.array(arr2, type=pa.float32()),
+        ], ["a", "b"]
+    )
+    with pytest.raises(
+        pa.ArrowTypeError,
+        match="Can only convert a RecordBatch with uniform data type."
+    ):
+        batch.to_tensor()
+
+    # Unsupported data type
+    arr3 = ["a", "b", "c", "a", "b", "c", "a", "b", "c"]
+    batch = pa.RecordBatch.from_arrays(
+        [
+            pa.array(arr3, type=pa.utf8()),
+        ], ["c"]
+    )
+    with pytest.raises(
+        pa.ArrowTypeError,
+        match="DataType is not supported"
+    ):
+        batch.to_tensor()
+
+
 def _table_like_slice_tests(factory):
     data = [
         pa.array(range(5)),

From 53e0c745ad491af98a5bf18b67541b12d7790beb Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Tue, 5 Mar 2024 13:31:38 +0100
Subject: [PATCH 471/570] GH-40153: [C++][Python] Fix test_gdb failures on
 32-bit (#40293)

### Rationale for this change

`test_gdb.py` would fail on 32-bit platforms because the gdb extension errors out when a timestamp value is larger than the platform's time_t.

### What changes are included in this PR?

1. Catch `OverflowError` from the Python datetime module when trying to format a timestamp
2. Tweak the expected test results on 32-bit

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40153

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/gdb_arrow.py                 |  3 +-
 python/pyarrow/tests/test_gdb.py | 81 ++++++++++++++++++++++----------
 2 files changed, 57 insertions(+), 27 deletions(-)

diff --git a/cpp/gdb_arrow.py b/cpp/gdb_arrow.py
index e6180f2ff0eeb..c3f5ab62981ec 100644
--- a/cpp/gdb_arrow.py
+++ b/cpp/gdb_arrow.py
@@ -304,7 +304,8 @@ def format_timestamp(val, unit):
     seconds, subseconds = divmod(val, traits.multiplier)
     try:
         dt = datetime.datetime.utcfromtimestamp(seconds)
-    except (ValueError, OSError):  # value out of range for datetime.datetime
+    except (ValueError, OSError, OverflowError):
+        # value out of range for datetime.datetime
         pretty = "too large to represent"
     else:
         pretty = dt.isoformat().replace('T', ' ')
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index d0d241cc56438..0d12d710dcf64 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -885,32 +885,61 @@ def test_arrays_heap(gdb_arrow):
         ("arrow::DurationArray of type arrow::duration"
          "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
          "[0] = null, [1] = -1234567890123456789ns}"))
-    check_heap_repr(
-        gdb_arrow, "heap_timestamp_array_s",
-        ("arrow::TimestampArray of type arrow::timestamp"
-         "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {"
-         "[0] = null, [1] = 0s [1970-01-01 00:00:00], "
-         "[2] = -2203932304s [1900-02-28 12:34:56], "
-         "[3] = 63730281600s [3989-07-14 00:00:00]}"))
-    check_heap_repr(
-        gdb_arrow, "heap_timestamp_array_ms",
-        ("arrow::TimestampArray of type arrow::timestamp"
-         "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {"
-         "[0] = null, [1] = -2203932303877ms [1900-02-28 12:34:56.123], "
-         "[2] = 63730281600789ms [3989-07-14 00:00:00.789]}"))
-    check_heap_repr(
-        gdb_arrow, "heap_timestamp_array_us",
-        ("arrow::TimestampArray of type arrow::timestamp"
-         "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {"
-         "[0] = null, "
-         "[1] = -2203932303345679us [1900-02-28 12:34:56.654321], "
-         "[2] = 63730281600456789us [3989-07-14 00:00:00.456789]}"))
-    check_heap_repr(
-        gdb_arrow, "heap_timestamp_array_ns",
-        ("arrow::TimestampArray of type arrow::timestamp"
-         "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
-         "[0] = null, "
-         "[1] = -2203932303012345679ns [1900-02-28 12:34:56.987654321]}"))
+    if sys.maxsize > 2**32:
+        check_heap_repr(
+            gdb_arrow, "heap_timestamp_array_s",
+            ("arrow::TimestampArray of type arrow::timestamp"
+             "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {"
+             "[0] = null, [1] = 0s [1970-01-01 00:00:00], "
+             "[2] = -2203932304s [1900-02-28 12:34:56], "
+             "[3] = 63730281600s [3989-07-14 00:00:00]}"))
+        check_heap_repr(
+            gdb_arrow, "heap_timestamp_array_ms",
+            ("arrow::TimestampArray of type arrow::timestamp"
+             "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {"
+             "[0] = null, [1] = -2203932303877ms [1900-02-28 12:34:56.123], "
+             "[2] = 63730281600789ms [3989-07-14 00:00:00.789]}"))
+        check_heap_repr(
+            gdb_arrow, "heap_timestamp_array_us",
+            ("arrow::TimestampArray of type arrow::timestamp"
+             "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {"
+             "[0] = null, "
+             "[1] = -2203932303345679us [1900-02-28 12:34:56.654321], "
+             "[2] = 63730281600456789us [3989-07-14 00:00:00.456789]}"))
+        check_heap_repr(
+            gdb_arrow, "heap_timestamp_array_ns",
+            ("arrow::TimestampArray of type arrow::timestamp"
+             "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
+             "[0] = null, "
+             "[1] = -2203932303012345679ns [1900-02-28 12:34:56.987654321]}"))
+    else:
+        # Python's datetime is limited to smaller timestamps on 32-bit platforms
+        check_heap_repr(
+            gdb_arrow, "heap_timestamp_array_s",
+            ("arrow::TimestampArray of type arrow::timestamp"
+             "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {"
+             "[0] = null, [1] = 0s [1970-01-01 00:00:00], "
+             "[2] = -2203932304s [too large to represent], "
+             "[3] = 63730281600s [too large to represent]}"))
+        check_heap_repr(
+            gdb_arrow, "heap_timestamp_array_ms",
+            ("arrow::TimestampArray of type arrow::timestamp"
+             "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {"
+             "[0] = null, [1] = -2203932303877ms [too large to represent], "
+             "[2] = 63730281600789ms [too large to represent]}"))
+        check_heap_repr(
+            gdb_arrow, "heap_timestamp_array_us",
+            ("arrow::TimestampArray of type arrow::timestamp"
+             "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {"
+             "[0] = null, "
+             "[1] = -2203932303345679us [too large to represent], "
+             "[2] = 63730281600456789us [too large to represent]}"))
+        check_heap_repr(
+            gdb_arrow, "heap_timestamp_array_ns",
+            ("arrow::TimestampArray of type arrow::timestamp"
+             "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {"
+             "[0] = null, "
+             "[1] = -2203932303012345679ns [too large to represent]}"))
 
     # Decimal
     check_heap_repr(

From ef6ea6beed071ed070daf03508f4c14b4072d6f2 Mon Sep 17 00:00:00 2001
From: David Li <li.davidm96@gmail.com>
Date: Tue, 5 Mar 2024 09:53:13 -0500
Subject: [PATCH 472/570] GH-40345: [FlightRPC][C++][Java][Go] Add URI scheme
 to reuse connection (#40084)

### Rationale for this change

https://docs.google.com/document/d/1g9M9FmsZhkewlT1mLibuceQO8ugI0-fqumVAXKFjVGg/edit?usp=sharing

### What changes are included in this PR?

Base implementation sans integration test

### Are these changes tested?

Yes

### Are there any user-facing changes?
No
* GitHub Issue: #40345

Lead-authored-by: David Li <li.davidm96@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/flight/flight_internals_test.cc |   5 +
 .../flight_integration_test.cc                |   4 +
 .../integration_tests/test_integration.cc     |  47 ++++++++
 cpp/src/arrow/flight/types.cc                 |   6 +
 cpp/src/arrow/flight/types.h                  |   8 ++
 dev/archery/archery/integration/runner.py     |   5 +
 docs/source/format/Flight.rst                 |  70 +++++++++---
 format/Flight.proto                           |  20 ++--
 go/arrow/flight/server.go                     |   8 ++
 .../internal/flight_integration/scenario.go   |  52 +++++++++
 .../org/apache/arrow/flight/Location.java     |  13 +++
 .../apache/arrow/flight/LocationSchemes.java  |   1 +
 .../arrow/flight/TestBasicOperation.java      |   6 +
 .../LocationReuseConnectionScenario.java      |  66 +++++++++++
 .../flight/integration/tests/Scenarios.java   |   1 +
 .../integration/tests/IntegrationTest.java    |  65 -----------
 .../integration/tests/IntegrationTest.java    |   5 +
 .../client/ArrowFlightSqlClientHandler.java   |   5 +
 .../arrow/driver/jdbc/ResultSetTest.java      |  64 +++++++++++
 .../jdbc/utils/FallbackFlightSqlProducer.java | 108 ++++++++++++++++++
 20 files changed, 469 insertions(+), 90 deletions(-)
 create mode 100644 java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/LocationReuseConnectionScenario.java
 delete mode 100644 java/flight/flight-integration-tests/src/main/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
 create mode 100644 java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FallbackFlightSqlProducer.java

diff --git a/cpp/src/arrow/flight/flight_internals_test.cc b/cpp/src/arrow/flight/flight_internals_test.cc
index a1c5250ba66fa..57f4f3e030420 100644
--- a/cpp/src/arrow/flight/flight_internals_test.cc
+++ b/cpp/src/arrow/flight/flight_internals_test.cc
@@ -353,6 +353,11 @@ TEST(FlightTypes, LocationUnknownScheme) {
   ASSERT_OK(Location::Parse("https://example.com/foo"));
 }
 
+TEST(FlightTypes, LocationFallback) {
+  EXPECT_EQ("arrow-flight-reuse-connection://?", Location::ReuseConnection().ToString());
+  EXPECT_EQ("arrow-flight-reuse-connection", Location::ReuseConnection().scheme());
+}
+
 TEST(FlightTypes, RoundtripStatus) {
   // Make sure status codes round trip through our conversions
 
diff --git a/cpp/src/arrow/flight/integration_tests/flight_integration_test.cc b/cpp/src/arrow/flight/integration_tests/flight_integration_test.cc
index 6f3115cc5ab8a..92c088b7fae08 100644
--- a/cpp/src/arrow/flight/integration_tests/flight_integration_test.cc
+++ b/cpp/src/arrow/flight/integration_tests/flight_integration_test.cc
@@ -71,6 +71,10 @@ TEST(FlightIntegration, ExpirationTimeRenewFlightEndpoint) {
   ASSERT_OK(RunScenario("expiration_time:renew_flight_endpoint"));
 }
 
+TEST(FlightIntegration, LocationReuseConnection) {
+  ASSERT_OK(RunScenario("location:reuse_connection"));
+}
+
 TEST(FlightIntegration, SessionOptions) { ASSERT_OK(RunScenario("session_options")); }
 
 TEST(FlightIntegration, PollFlightInfo) { ASSERT_OK(RunScenario("poll_flight_info")); }
diff --git a/cpp/src/arrow/flight/integration_tests/test_integration.cc b/cpp/src/arrow/flight/integration_tests/test_integration.cc
index d4e0a2cda5bd8..464e6832c39c7 100644
--- a/cpp/src/arrow/flight/integration_tests/test_integration.cc
+++ b/cpp/src/arrow/flight/integration_tests/test_integration.cc
@@ -2079,6 +2079,50 @@ class FlightSqlExtensionScenario : public FlightSqlScenario {
     return Status::OK();
   }
 };
+
+/// \brief The server for testing arrow-flight-reuse-connection://.
+class ReuseConnectionServer : public FlightServerBase {
+ public:
+  Status GetFlightInfo(const ServerCallContext& context,
+                       const FlightDescriptor& descriptor,
+                       std::unique_ptr<FlightInfo>* info) override {
+    auto location = Location::ReuseConnection();
+    auto endpoint = FlightEndpoint{{"reuse"}, {location}};
+    ARROW_ASSIGN_OR_RAISE(auto info_data, FlightInfo::Make(arrow::Schema({}), descriptor,
+                                                           {endpoint}, -1, -1));
+    *info = std::make_unique<FlightInfo>(std::move(info_data));
+    return Status::OK();
+  }
+};
+
+/// \brief A scenario for testing arrow-flight-reuse-connection://?.
+class ReuseConnectionScenario : public Scenario {
+  Status MakeServer(std::unique_ptr<FlightServerBase>* server,
+                    FlightServerOptions* options) override {
+    *server = std::make_unique<ReuseConnectionServer>();
+    return Status::OK();
+  }
+
+  Status MakeClient(FlightClientOptions* options) override { return Status::OK(); }
+
+  Status RunClient(std::unique_ptr<FlightClient> client) override {
+    auto descriptor = FlightDescriptor::Command("reuse");
+    ARROW_ASSIGN_OR_RAISE(auto info, client->GetFlightInfo(descriptor));
+    if (info->endpoints().size() != 1) {
+      return Status::Invalid("Expected 1 endpoint, got ", info->endpoints().size());
+    }
+    const auto& endpoint = info->endpoints().front();
+    if (endpoint.locations.size() != 1) {
+      return Status::Invalid("Expected 1 location, got ",
+                             info->endpoints().front().locations.size());
+    } else if (endpoint.locations.front().ToString() !=
+               "arrow-flight-reuse-connection://?") {
+      return Status::Invalid("Expected arrow-flight-reuse-connection://?, got ",
+                             endpoint.locations.front().ToString());
+    }
+    return Status::OK();
+  }
+};
 }  // namespace
 
 Status GetScenario(const std::string& scenario_name, std::shared_ptr<Scenario>* out) {
@@ -2103,6 +2147,9 @@ Status GetScenario(const std::string& scenario_name, std::shared_ptr<Scenario>*
   } else if (scenario_name == "expiration_time:renew_flight_endpoint") {
     *out = std::make_shared<ExpirationTimeRenewFlightEndpointScenario>();
     return Status::OK();
+  } else if (scenario_name == "location:reuse_connection") {
+    *out = std::make_shared<ReuseConnectionScenario>();
+    return Status::OK();
   } else if (scenario_name == "session_options") {
     *out = std::make_shared<SessionOptionsScenario>();
     return Status::OK();
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index 11b2baafad220..a1b799a3a069e 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -829,6 +829,12 @@ arrow::Result<Location> Location::Parse(const std::string& uri_string) {
   return location;
 }
 
+const Location& Location::ReuseConnection() {
+  static Location kFallback =
+      Location::Parse("arrow-flight-reuse-connection://?").ValueOrDie();
+  return kFallback;
+}
+
 arrow::Result<Location> Location::ForGrpcTcp(const std::string& host, const int port) {
   std::stringstream uri_string;
   uri_string << "grpc+tcp://" << host << ':' << port;
diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h
index 4b17149aa2d46..7b3259c3c3a1b 100644
--- a/cpp/src/arrow/flight/types.h
+++ b/cpp/src/arrow/flight/types.h
@@ -424,6 +424,14 @@ struct ARROW_FLIGHT_EXPORT Location {
   /// \brief Initialize a location by parsing a URI string
   static arrow::Result<Location> Parse(const std::string& uri_string);
 
+  /// \brief Get the fallback URI.
+  ///
+  /// arrow-flight-reuse-connection://? means that a client may attempt to
+  /// reuse an existing connection to a Flight service to fetch data instead
+  /// of creating a new connection to one of the other locations listed in a
+  /// FlightEndpoint response.
+  static const Location& ReuseConnection();
+
   /// \brief Initialize a location for a non-TLS, gRPC-based Flight
   /// service from a host and port
   /// \param[in] host The hostname to connect to
diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index 3525ae0be56a5..b50d5c5c2644e 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -608,6 +608,11 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True,
                          "RenewFlightEndpoint are working as expected."),
             skip_testers={"JS", "C#", "Rust"},
         ),
+        Scenario(
+            "location:reuse_connection",
+            description="Ensure arrow-flight-reuse-connection is accepted.",
+            skip_testers={"JS", "C#", "Rust"},
+        ),
         Scenario(
             "session_options",
             description="Ensure Flight SQL Sessions work as expected.",
diff --git a/docs/source/format/Flight.rst b/docs/source/format/Flight.rst
index 73ca848b5e996..7ee84952b4350 100644
--- a/docs/source/format/Flight.rst
+++ b/docs/source/format/Flight.rst
@@ -121,6 +121,13 @@ A client that wishes to download the data would:
    connection to the original server to fetch data. Otherwise, the
    client must connect to one of the indicated locations.
 
+   The server may list "itself" as a location alongside other server
+   locations.  Normally this requires the server to know its public
+   address, but it may also use the special URI string
+   ``arrow-flight-reuse-connection://?`` to tell clients that they may
+   reuse an existing connection to the same server, without having to
+   be able to name itself.  See `Connection Reuse`_ below.
+
    In this way, the locations inside an endpoint can also be thought
    of as performing look-aside load balancing or service discovery
    functions. And the endpoints can represent data that is partitioned
@@ -307,29 +314,58 @@ well, in which case any `authentication method supported by gRPC
 
 .. _Mutual TLS (mTLS): https://grpc.io/docs/guides/auth/#supported-auth-mechanisms
 
-Transport Implementations
-=========================
+Location URIs
+=============
 
 Flight is primarily defined in terms of its Protobuf and gRPC
 specification below, but Arrow implementations may also support
-alternative transports (see :ref:`status-flight-rpc`). In that case,
-implementations should use the following URI schemes for the given
-transport implementations:
-
-+----------------------------+----------------------------+
-| Transport                  | URI Scheme                 |
-+============================+============================+
-| gRPC (plaintext)           | grpc: or grpc+tcp:         |
-+----------------------------+----------------------------+
-| gRPC (TLS)                 | grpc+tls:                  |
-+----------------------------+----------------------------+
-| gRPC (Unix domain socket)  | grpc+unix:                 |
-+----------------------------+----------------------------+
-| UCX_ (plaintext)           | ucx:                       |
-+----------------------------+----------------------------+
+alternative transports (see :ref:`status-flight-rpc`).  Clients and
+servers need to know which transport to use for a given URI in a
+Location, so Flight implementations should use the following URI
+schemes for the given transports:
+
++----------------------------+--------------------------------+
+| Transport                  | URI Scheme                     |
++============================+================================+
+| gRPC (plaintext)           | grpc: or grpc+tcp:             |
++----------------------------+--------------------------------+
+| gRPC (TLS)                 | grpc+tls:                      |
++----------------------------+--------------------------------+
+| gRPC (Unix domain socket)  | grpc+unix:                     |
++----------------------------+--------------------------------+
+| (reuse connection)         | arrow-flight-reuse-connection: |
++----------------------------+--------------------------------+
+| UCX_ (plaintext)           | ucx:                           |
++----------------------------+--------------------------------+
 
 .. _UCX: https://openucx.org/
 
+Connection Reuse
+----------------
+
+"Reuse connection" above is not a particular transport.  Instead, it
+means that the client may try to execute DoGet against the same server
+(and through the same connection) that it originally obtained the
+FlightInfo from (i.e., that it called GetFlightInfo against).  This is
+interpreted the same way as when no specific ``Location`` are
+returned.
+
+This allows the server to return "itself" as one possible location to
+fetch data without having to know its own public address, which can be
+useful in deployments where knowing this would be difficult or
+impossible.  For example, a developer may forward a remote service in
+a cloud environment to their local machine; in this case, the remote
+service would have no way to know the local hostname and port that it
+is being accessed over.
+
+For compatibility reasons, the URI should always be
+``arrow-flight-reuse-connection://?``, with the trailing empty query
+string.  Java's URI implementation does not accept ``scheme:`` or
+``scheme://``, and C++'s implementation does not accept an empty
+string, so the obvious candidates are not compatible.  The chosen
+representation can be parsed by both implementations, as well as Go's
+``net/url`` and Python's ``urllib.parse``.
+
 Error Handling
 ==============
 
diff --git a/format/Flight.proto b/format/Flight.proto
index 59714108e1cbc..4963e8c09ae47 100644
--- a/format/Flight.proto
+++ b/format/Flight.proto
@@ -369,7 +369,7 @@ message FlightInfo {
 
   /*
    * Application-defined metadata.
-   * 
+   *
    * There is no inherent or required relationship between this
    * and the app_metadata fields in the FlightEndpoints or resulting
    * FlightData messages. Since this metadata is application-defined,
@@ -440,11 +440,15 @@ message FlightEndpoint {
    * be redeemed on the current service where the ticket was
    * generated.
    *
-   * If the list is not empty, the expectation is that the ticket can
-   * be redeemed at any of the locations, and that the data returned
-   * will be equivalent. In this case, the ticket may only be redeemed
-   * at one of the given locations, and not (necessarily) on the
-   * current service.
+   * If the list is not empty, the expectation is that the ticket can be
+   * redeemed at any of the locations, and that the data returned will be
+   * equivalent. In this case, the ticket may only be redeemed at one of the
+   * given locations, and not (necessarily) on the current service. If one
+   * of the given locations is "arrow-flight-reuse-connection://?", the
+   * client may redeem the ticket on the service where the ticket was
+   * generated (i.e., the same as above), in addition to the other
+   * locations. (This URI was chosen to maximize compatibility, as 'scheme:'
+   * or 'scheme://' are not accepted by Java's java.net.URI.)
    *
    * In other words, an application can use multiple locations to
    * represent redundant and/or load balanced services.
@@ -460,7 +464,7 @@ message FlightEndpoint {
 
   /*
    * Application-defined metadata.
-   * 
+   *
    * There is no inherent or required relationship between this
    * and the app_metadata fields in the FlightInfo or resulting
    * FlightData messages. Since this metadata is application-defined,
@@ -587,7 +591,7 @@ message SetSessionOptionsResult {
   message Error {
     ErrorValue value = 1;
   }
-  
+
   map<string, Error> errors = 1;
 }
 
diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go
index 2df52434a2a32..1ca5ecdd8ca60 100644
--- a/go/arrow/flight/server.go
+++ b/go/arrow/flight/server.go
@@ -157,6 +157,14 @@ const (
 	CancelStatusNotCancellable = flight.CancelStatus_CANCEL_STATUS_NOT_CANCELLABLE
 )
 
+// Constants for Location
+const (
+	// LocationReuseConnection is a special location that tells clients
+	// they may fetch the data from the same service that they obtained
+	// the FlightEndpoint response from.
+	LocationReuseConnection = "arrow-flight-reuse-connection://?"
+)
+
 // RegisterFlightServiceServer registers an existing flight server onto an
 // existing grpc server, or anything that is a grpc service registrar.
 func RegisterFlightServiceServer(s *grpc.Server, srv FlightServer) {
diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go
index 342727d03cb8d..edddd201ddaea 100644
--- a/go/arrow/internal/flight_integration/scenario.go
+++ b/go/arrow/internal/flight_integration/scenario.go
@@ -70,6 +70,8 @@ func GetScenario(name string, args ...string) Scenario {
 		return &expirationTimeCancelFlightInfoScenarioTester{}
 	case "expiration_time:renew_flight_endpoint":
 		return &expirationTimeRenewFlightEndpointScenarioTester{}
+	case "location:reuse_connection":
+		return &locationReuseConnectionScenarioTester{}
 	case "poll_flight_info":
 		return &pollFlightInfoScenarioTester{}
 	case "app_metadata_flight_info_endpoint":
@@ -1139,6 +1141,56 @@ func (tester *expirationTimeRenewFlightEndpointScenarioTester) RunClient(addr st
 	return nil
 }
 
+type locationReuseConnectionScenarioTester struct {
+	flight.BaseFlightServer
+}
+
+func (m *locationReuseConnectionScenarioTester) GetFlightInfo(ctx context.Context, desc *flight.FlightDescriptor) (*flight.FlightInfo, error) {
+	return &flight.FlightInfo{
+		Schema:           flight.SerializeSchema(arrow.NewSchema([]arrow.Field{}, nil), memory.DefaultAllocator),
+		FlightDescriptor: desc,
+		Endpoint: []*flight.FlightEndpoint{{
+			Ticket:   &flight.Ticket{Ticket: []byte("reuse")},
+			Location: []*flight.Location{{Uri: flight.LocationReuseConnection}},
+		}},
+		TotalRecords: -1,
+		TotalBytes:   -1,
+	}, nil
+}
+
+func (tester *locationReuseConnectionScenarioTester) MakeServer(port int) flight.Server {
+	srv := flight.NewServerWithMiddleware(nil)
+	srv.RegisterFlightService(tester)
+	initServer(port, srv)
+	return srv
+}
+
+func (tester *locationReuseConnectionScenarioTester) RunClient(addr string, opts ...grpc.DialOption) error {
+	client, err := flight.NewClientWithMiddleware(addr, nil, nil, opts...)
+	if err != nil {
+		return err
+	}
+	defer client.Close()
+
+	ctx := context.Background()
+	info, err := client.GetFlightInfo(ctx, &flight.FlightDescriptor{Type: flight.DescriptorCMD, Cmd: []byte("reuse")})
+	if err != nil {
+		return err
+	}
+
+	if len(info.Endpoint) != 1 {
+		return fmt.Errorf("expected 1 endpoint, got %d", len(info.Endpoint))
+	}
+	endpoint := info.Endpoint[0]
+	if len(endpoint.Location) != 1 {
+		return fmt.Errorf("expected 1 location, got %d", len(endpoint.Location))
+	} else if endpoint.Location[0].Uri != flight.LocationReuseConnection {
+		return fmt.Errorf("expected %s, got %s", flight.LocationReuseConnection, endpoint.Location[0].Uri)
+	}
+
+	return nil
+}
+
 type pollFlightInfoScenarioTester struct {
 	flight.BaseFlightServer
 }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java
index fe192aa0c3f9d..2eb3139c9dcdd 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/Location.java
@@ -93,6 +93,19 @@ Flight.Location toProtocol() {
     return Flight.Location.newBuilder().setUri(uri.toString()).build();
   }
 
+  /**
+   * Construct a special URI to indicate to clients that they may fetch data by reusing
+   * an existing connection to a Flight RPC server.
+   */
+  public static Location reuseConnection() {
+    try {
+      return new Location(new URI(LocationSchemes.REUSE_CONNECTION, "", "", "", null));
+    } catch (URISyntaxException e) {
+      // This should never happen.
+      throw new IllegalArgumentException(e);
+    }
+  }
+
   /**
    * Construct a URI for a Flight+gRPC server without transport security.
    *
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/LocationSchemes.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/LocationSchemes.java
index 872e5b1c22deb..f1dbfb95f237e 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/LocationSchemes.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/LocationSchemes.java
@@ -25,6 +25,7 @@ public final class LocationSchemes {
   public static final String GRPC_INSECURE = "grpc+tcp";
   public static final String GRPC_DOMAIN_SOCKET = "grpc+unix";
   public static final String GRPC_TLS = "grpc+tls";
+  public static final String REUSE_CONNECTION = "arrow-flight-reuse-connection";
 
   private LocationSchemes() {
     throw new AssertionError("Do not instantiate this class.");
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java
index ae520ee9b991b..bc34b5e6d6074 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestBasicOperation.java
@@ -79,6 +79,12 @@ public void fastPathDefaults() {
     Assertions.assertFalse(ArrowMessage.ENABLE_ZERO_COPY_WRITE);
   }
 
+  @Test
+  public void fallbackLocation() {
+    Assertions.assertEquals("arrow-flight-reuse-connection://?",
+            Location.reuseConnection().getUri().toString());
+  }
+
   /**
    * ARROW-6017: we should be able to construct locations for unknown schemes.
    */
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/LocationReuseConnectionScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/LocationReuseConnectionScenario.java
new file mode 100644
index 0000000000000..7ffd09c5bf4e8
--- /dev/null
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/LocationReuseConnectionScenario.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.flight.integration.tests;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightDescriptor;
+import org.apache.arrow.flight.FlightEndpoint;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.FlightServer;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.NoOpFlightProducer;
+import org.apache.arrow.flight.Ticket;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/** Test the 'arrow-flight-reuse-connection' scheme. */
+public class LocationReuseConnectionScenario implements Scenario {
+  @Override
+  public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception {
+    return new ReuseConnectionProducer();
+  }
+
+  @Override
+  public void buildServer(FlightServer.Builder builder) throws Exception {
+  }
+
+  @Override
+  public void client(BufferAllocator allocator, Location location, FlightClient client)
+      throws Exception {
+    final FlightInfo info = client.getInfo(FlightDescriptor.command("reuse".getBytes(StandardCharsets.UTF_8)));
+    IntegrationAssertions.assertEquals(1, info.getEndpoints().size());
+    IntegrationAssertions.assertEquals(1, info.getEndpoints().get(0).getLocations().size());
+    Location actual = info.getEndpoints().get(0).getLocations().get(0);
+    IntegrationAssertions.assertEquals(Location.reuseConnection().getUri(), actual.getUri());
+  }
+
+  private static class ReuseConnectionProducer extends NoOpFlightProducer {
+    @Override
+    public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) {
+      List<FlightEndpoint> endpoints = Collections.singletonList(
+              new FlightEndpoint(new Ticket(new byte[0]), Location.reuseConnection()));
+      return new FlightInfo(
+          new Schema(Collections.emptyList()), descriptor, endpoints, /*bytes*/ -1, /*records*/ -1);
+    }
+  }
+}
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
index 6878c22c5ccdc..5ce82e712ab77 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
@@ -44,6 +44,7 @@ private Scenarios() {
     scenarios.put("expiration_time:renew_flight_endpoint", ExpirationTimeRenewFlightEndpointScenario::new);
     scenarios.put("expiration_time:do_get", ExpirationTimeDoGetScenario::new);
     scenarios.put("expiration_time:list_actions", ExpirationTimeListActionsScenario::new);
+    scenarios.put("location:reuse_connection", LocationReuseConnectionScenario::new);
     scenarios.put("middleware", MiddlewareScenario::new);
     scenarios.put("ordered", OrderedScenario::new);
     scenarios.put("poll_flight_info", PollFlightInfoScenario::new);
diff --git a/java/flight/flight-integration-tests/src/main/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java b/java/flight/flight-integration-tests/src/main/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
deleted file mode 100644
index dfb9a810857ba..0000000000000
--- a/java/flight/flight-integration-tests/src/main/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.arrow.flight.integration.tests;
-
-import org.apache.arrow.flight.FlightClient;
-import org.apache.arrow.flight.FlightServer;
-import org.apache.arrow.flight.Location;
-import org.apache.arrow.memory.BufferAllocator;
-import org.apache.arrow.memory.RootAllocator;
-import org.junit.jupiter.api.Test;
-
-/**
- * Run the integration test scenarios in-process.
- */
-class IntegrationTest {
-  @Test
-  void authBasicProto() throws Exception {
-    testScenario("auth:basic_proto");
-  }
-
-  @Test
-  void middleware() throws Exception {
-    testScenario("middleware");
-  }
-
-  @Test
-  void flightSql() throws Exception {
-    testScenario("flight_sql");
-  }
-
-  void testScenario(String scenarioName) throws Exception {
-    try (final BufferAllocator allocator = new RootAllocator()) {
-      final FlightServer.Builder builder = FlightServer.builder()
-          .allocator(allocator)
-          .location(Location.forGrpcInsecure("0.0.0.0", 0));
-      final Scenario scenario = Scenarios.getScenario(scenarioName);
-      scenario.buildServer(builder);
-      builder.producer(scenario.producer(allocator, Location.forGrpcInsecure("0.0.0.0", 0)));
-
-      try (final FlightServer server = builder.build()) {
-        server.start();
-
-        final Location location = Location.forGrpcInsecure("localhost", server.getPort());
-        try (final FlightClient client = FlightClient.builder(allocator, location).build()) {
-          scenario.client(allocator, location, client);
-        }
-      }
-    }
-  }
-}
diff --git a/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java b/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
index f814427567ae9..40aceb336e0ea 100644
--- a/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
+++ b/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
@@ -53,6 +53,11 @@ void expirationTimeRenewFlightEndpoint() throws Exception {
     testScenario("expiration_time:renew_flight_endpoint");
   }
 
+  @Test
+  void locationReuseConnection() throws Exception {
+    testScenario("location:reuse_connection");
+  }
+
   @Test
   void middleware() throws Exception {
     testScenario("middleware");
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
index 1b03f927d7fc6..edd888ef4df81 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
@@ -125,6 +125,11 @@ public List<CloseableEndpointStreamPair> getStreams(final FlightInfo flightInfo)
           CloseableEndpointStreamPair stream = null;
           for (Location location : endpoint.getLocations()) {
             final URI endpointUri = location.getUri();
+            if (endpointUri.getScheme().equals(LocationSchemes.REUSE_CONNECTION)) {
+              stream = new CloseableEndpointStreamPair(
+                      sqlClient.getStream(endpoint.getTicket(), getOptions()), null);
+              break;
+            }
             final Builder builderForEndpoint = new Builder(ArrowFlightSqlClientHandler.this.builder)
                     .withHost(endpointUri.getHost())
                     .withPort(endpointUri.getPort())
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java
index 680803318e3a2..17fb09e26d29d 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/ResultSetTest.java
@@ -47,6 +47,7 @@
 import java.util.concurrent.CountDownLatch;
 
 import org.apache.arrow.driver.jdbc.utils.CoreMockedSqlProducers;
+import org.apache.arrow.driver.jdbc.utils.FallbackFlightSqlProducer;
 import org.apache.arrow.driver.jdbc.utils.PartitionedFlightSqlProducer;
 import org.apache.arrow.flight.FlightEndpoint;
 import org.apache.arrow.flight.FlightProducer;
@@ -59,6 +60,7 @@
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.Types;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
@@ -544,6 +546,68 @@ allocator, forGrpcInsecure("localhost", 0), rootProducer)
     }
   }
 
+  @Test
+  public void testFallbackFlightServer() throws Exception {
+    final Schema schema = new Schema(
+            Collections.singletonList(Field.nullable("int_column", Types.MinorType.INT.getType())));
+    try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+         VectorSchemaRoot resultData = VectorSchemaRoot.create(schema, allocator)) {
+      resultData.setRowCount(1);
+      ((IntVector) resultData.getVector(0)).set(0, 1);
+
+      try (final FallbackFlightSqlProducer rootProducer = new FallbackFlightSqlProducer(resultData);
+           FlightServer rootServer = FlightServer.builder(
+                           allocator, forGrpcInsecure("localhost", 0), rootProducer)
+                   .build()
+                   .start();
+           Connection newConnection = DriverManager.getConnection(String.format(
+                   "jdbc:arrow-flight-sql://%s:%d/?useEncryption=false",
+                   rootServer.getLocation().getUri().getHost(), rootServer.getPort()));
+           Statement newStatement = newConnection.createStatement();
+           ResultSet result = newStatement.executeQuery("fallback")) {
+        List<Integer> actualData = new ArrayList<>();
+        while (result.next()) {
+          actualData.add(result.getInt(1));
+        }
+
+        // Assert
+        assertEquals(resultData.getRowCount(), actualData.size());
+        assertTrue(actualData.contains(((IntVector) resultData.getVector(0)).get(0)));
+      }
+    }
+  }
+
+  @Test
+  public void testFallbackSecondFlightServer() throws Exception {
+    final Schema schema = new Schema(
+            Collections.singletonList(Field.nullable("int_column", Types.MinorType.INT.getType())));
+    try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+         VectorSchemaRoot resultData = VectorSchemaRoot.create(schema, allocator)) {
+      resultData.setRowCount(1);
+      ((IntVector) resultData.getVector(0)).set(0, 1);
+
+      try (final FallbackFlightSqlProducer rootProducer = new FallbackFlightSqlProducer(resultData);
+           FlightServer rootServer = FlightServer.builder(
+                           allocator, forGrpcInsecure("localhost", 0), rootProducer)
+                   .build()
+                   .start();
+           Connection newConnection = DriverManager.getConnection(String.format(
+                   "jdbc:arrow-flight-sql://%s:%d/?useEncryption=false",
+                   rootServer.getLocation().getUri().getHost(), rootServer.getPort()));
+           Statement newStatement = newConnection.createStatement();
+           ResultSet result = newStatement.executeQuery("fallback with error")) {
+        List<Integer> actualData = new ArrayList<>();
+        while (result.next()) {
+          actualData.add(result.getInt(1));
+        }
+
+        // Assert
+        assertEquals(resultData.getRowCount(), actualData.size());
+        assertTrue(actualData.contains(((IntVector) resultData.getVector(0)).get(0)));
+      }
+    }
+  }
+
   @Test
   public void testShouldRunSelectQueryWithEmptyVectorsEmbedded() throws Exception {
     try (Statement statement = connection.createStatement();
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FallbackFlightSqlProducer.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FallbackFlightSqlProducer.java
new file mode 100644
index 0000000000000..2257220a4c845
--- /dev/null
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/utils/FallbackFlightSqlProducer.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.driver.jdbc.utils;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.flight.CallStatus;
+import org.apache.arrow.flight.FlightDescriptor;
+import org.apache.arrow.flight.FlightEndpoint;
+import org.apache.arrow.flight.FlightInfo;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.Result;
+import org.apache.arrow.flight.Ticket;
+import org.apache.arrow.flight.sql.BasicFlightSqlProducer;
+import org.apache.arrow.flight.sql.impl.FlightSql;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import com.google.protobuf.Any;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.Message;
+
+public class FallbackFlightSqlProducer extends BasicFlightSqlProducer {
+  private final VectorSchemaRoot data;
+
+  public FallbackFlightSqlProducer(VectorSchemaRoot resultData) {
+    this.data = resultData;
+  }
+
+  @Override
+  protected <T extends Message> List<FlightEndpoint> determineEndpoints(
+          T request, FlightDescriptor flightDescriptor, Schema schema) {
+    return Collections.emptyList();
+  }
+
+  @Override
+  public void createPreparedStatement(FlightSql.ActionCreatePreparedStatementRequest request,
+                                      CallContext context, StreamListener<Result> listener) {
+    final FlightSql.ActionCreatePreparedStatementResult.Builder resultBuilder =
+            FlightSql.ActionCreatePreparedStatementResult.newBuilder()
+                    .setPreparedStatementHandle(request.getQueryBytes());
+
+    final ByteString datasetSchemaBytes = ByteString.copyFrom(data.getSchema().serializeAsMessage());
+
+    resultBuilder.setDatasetSchema(datasetSchemaBytes);
+    listener.onNext(new Result(Any.pack(resultBuilder.build()).toByteArray()));
+    listener.onCompleted();
+  }
+
+  @Override
+  public FlightInfo getFlightInfoStatement(
+          FlightSql.CommandStatementQuery command, CallContext context, FlightDescriptor descriptor) {
+    return getFlightInfo(descriptor, command.getQuery());
+  }
+
+  @Override
+  public FlightInfo getFlightInfoPreparedStatement(FlightSql.CommandPreparedStatementQuery command,
+                                                   CallContext context, FlightDescriptor descriptor) {
+    return getFlightInfo(descriptor, command.getPreparedStatementHandle().toStringUtf8());
+  }
+
+  @Override
+  public void getStreamStatement(FlightSql.TicketStatementQuery ticket, CallContext context,
+                                 ServerStreamListener listener) {
+    listener.start(data);
+    listener.putNext();
+    listener.completed();
+  }
+
+  @Override
+  public void closePreparedStatement(FlightSql.ActionClosePreparedStatementRequest request,
+                                     CallContext context, StreamListener<Result> listener) {
+    listener.onCompleted();
+  }
+
+  private FlightInfo getFlightInfo(FlightDescriptor descriptor, String query) {
+    final List<FlightEndpoint> endpoints;
+    final Ticket ticket = new Ticket(
+            Any.pack(FlightSql.TicketStatementQuery.getDefaultInstance()).toByteArray());
+    if (query.equals("fallback")) {
+      endpoints = Collections.singletonList(FlightEndpoint.builder(ticket, Location.reuseConnection()).build());
+    } else if (query.equals("fallback with error")) {
+      endpoints = Collections.singletonList(
+                FlightEndpoint.builder(ticket,
+                        Location.forGrpcInsecure("localhost", 9999),
+                        Location.reuseConnection()).build());
+    } else {
+      throw CallStatus.UNIMPLEMENTED.withDescription(query).toRuntimeException();
+    }
+    return FlightInfo.builder(data.getSchema(), descriptor, endpoints).build();
+  }
+}

From 3d467ac7bfae03cf2db09807054c5672e1959aec Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 5 Mar 2024 17:13:32 +0100
Subject: [PATCH 473/570] GH-20127: [Python][CI] Remove legacy hdfs tests from
 hdfs and hypothesis setup (#40363)

### Rationale for this change

Small follow-up on https://github.com/apache/arrow/pull/39825, which removed the `test_hdfs.py` file itself, but didn't remove it from the hypothesis script

* GitHub Issue: #20127

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/scripts/integration_hdfs.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ci/scripts/integration_hdfs.sh b/ci/scripts/integration_hdfs.sh
index c95449379c321..d0444ccb74fdf 100755
--- a/ci/scripts/integration_hdfs.sh
+++ b/ci/scripts/integration_hdfs.sh
@@ -61,9 +61,8 @@ export PYARROW_TEST_HDFS=ON
 export PYARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON
 
 pytest -vs --pyargs pyarrow.tests.test_fs
-pytest -vs --pyargs pyarrow.tests.test_hdfs
 
 use_libhdfs_dir
 pytest -vs --pyargs pyarrow.tests.test_fs
-pytest -vs --pyargs pyarrow.tests.test_hdfs
+
 use_hadoop_home

From ee1a8c39a55f3543a82fed900dadca791f6e9f88 Mon Sep 17 00:00:00 2001
From: Simon Perkins <simon.perkins@gmail.com>
Date: Wed, 6 Mar 2024 09:46:45 +0200
Subject: [PATCH 474/570] GH-40366: [C++] Remove const qualifier from
 Buffer::mutable_span_as (#40367)

### Rationale for this change

The const qualifier on `Buffer:mutable_span_as` prevents it from calling the non-const `Buffer::mutable_data_as`

### What changes are included in this PR?

See issue title

### Are these changes tested?

No, I believe this is a simple oversight

### Are there any user-facing changes?

Yes, `Buffer::mutable_span_as` loses it's const qualifier

* GitHub Issue: #40366

Authored-by: Simon Perkins <simon.perkins@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/buffer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h
index 258a9faac7361..fbf4a22e350ca 100644
--- a/cpp/src/arrow/buffer.h
+++ b/cpp/src/arrow/buffer.h
@@ -269,7 +269,7 @@ class ARROW_EXPORT Buffer {
 
   /// \brief Return the buffer's mutable data as a span
   template <typename T>
-  util::span<T> mutable_span_as() const {
+  util::span<T> mutable_span_as() {
     return util::span(mutable_data_as<T>(), static_cast<size_t>(size() / sizeof(T)));
   }
 

From 96f26a89bd73997f7532643cdb27d04b70971530 Mon Sep 17 00:00:00 2001
From: Paul <53956863+hutch3232@users.noreply.github.com>
Date: Wed, 6 Mar 2024 15:29:15 -0500
Subject: [PATCH 475/570] GH-40227: [R] ensure executable files in
 `create_package_with_all_dependencies` (#40232)

### What changes are included in this PR?

-  Set the `utils::tar` `extra_flags` argument to `NULL` (default `""`). This benefits from logic baked in (also used by `R CMD BUILD`) that checks the mode of `configure` and `cleanup` files and sets them to be executable if they are not already.

### Are these changes tested?

No. I didn't see any existing tests for this function (which perhaps is not a valid reason).

### Are there any user-facing changes?

No. Although if `configure` and/or `cleanup` are not executable, that will be corrected with a user-facing `warning`.

* GitHub Issue: #40227

edit: revised the description based on the new approach in 40e8add.

Authored-by: Paul Donnelly <donnelpa@gmail.com>
Signed-off-by: Nic Crane <thisisnic@gmail.com>
---
 r/R/install-arrow.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/R/install-arrow.R b/r/R/install-arrow.R
index 88eb61a5dae76..74d3a96454777 100644
--- a/r/R/install-arrow.R
+++ b/r/R/install-arrow.R
@@ -251,7 +251,7 @@ create_package_with_all_dependencies <- function(dest_file = NULL, source_file =
   setwd(untar_dir)
 
   message("Repacking tar.gz file to ", dest_file)
-  tar_successful <- utils::tar(dest_file, compression = "gz") == 0
+  tar_successful <- utils::tar(dest_file, compression = "gz", extra_flags = NULL) == 0
   if (!tar_successful) {
     stop("Failed to create new tar.gz file")
   }

From 1d966e98e41ce817d1f8c5159c0b9caa4de75816 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
Date: Wed, 6 Mar 2024 13:51:34 -0800
Subject: [PATCH 476/570] GH-40386: [Python] Fix except clauses (#40387)

### Rationale for this change

See #40386, these changes are necessary for compatibility with Cython 3.0.9

### What changes are included in this PR?

This PR removes unnecessary `noexcept` clauses.

### Are these changes tested?

Covered by existing builds.

### Are there any user-facing changes?

No.

* GitHub Issue: #40386

Authored-by: Vyas Ramasubramani <vyasr@nvidia.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 python/pyarrow/types.pxi | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index fbbf36ae9f551..ec05100caf6c7 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -5462,7 +5462,7 @@ cdef void pycapsule_schema_deleter(object schema_capsule) noexcept:
 
     free(schema)
 
-cdef object alloc_c_schema(ArrowSchema** c_schema) noexcept:
+cdef object alloc_c_schema(ArrowSchema** c_schema):
     c_schema[0] = <ArrowSchema*> malloc(sizeof(ArrowSchema))
     # Ensure the capsule destructor doesn't call a random release pointer
     c_schema[0].release = NULL
@@ -5481,7 +5481,7 @@ cdef void pycapsule_array_deleter(object array_capsule) noexcept:
 
     free(array)
 
-cdef object alloc_c_array(ArrowArray** c_array) noexcept:
+cdef object alloc_c_array(ArrowArray** c_array):
     c_array[0] = <ArrowArray*> malloc(sizeof(ArrowArray))
     # Ensure the capsule destructor doesn't call a random release pointer
     c_array[0].release = NULL
@@ -5500,7 +5500,7 @@ cdef void pycapsule_stream_deleter(object stream_capsule) noexcept:
 
     free(stream)
 
-cdef object alloc_c_stream(ArrowArrayStream** c_stream) noexcept:
+cdef object alloc_c_stream(ArrowArrayStream** c_stream):
     c_stream[0] = <ArrowArrayStream*> malloc(sizeof(ArrowArrayStream))
     # Ensure the capsule destructor doesn't call a random release pointer
     c_stream[0].release = NULL

From 49cdb0fe4e98fda19031c864a18e6156c6edbf3c Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Wed, 6 Mar 2024 23:00:52 -0300
Subject: [PATCH 477/570] GH-40370: [C++] Define ARROW_FORCE_INLINE for
 non-MSVC builds (#40372)

### Rationale for this change

Compiler macros should behave consistently (as much as possible) across differently compilers.

### What changes are included in this PR?

 - Define `ARROW_FORCE_INLINE` correctly
 - Move it from `visibility.h` to `macros.h`

### Are these changes tested?

By being the base commit in changes that I'm compiling successfully and benchmarking.

* GitHub Issue: #40370

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/util/macros.h     | 4 ++++
 cpp/src/arrow/util/visibility.h | 3 ---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h
index b5675faa143db..1d23e829d74a9 100644
--- a/cpp/src/arrow/util/macros.h
+++ b/cpp/src/arrow/util/macros.h
@@ -49,15 +49,19 @@
 #define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
 #define ARROW_NORETURN __attribute__((noreturn))
 #define ARROW_NOINLINE __attribute__((noinline))
+#define ARROW_FORCE_INLINE __attribute__((always_inline))
 #define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
 #elif defined(_MSC_VER)
 #define ARROW_NORETURN __declspec(noreturn)
 #define ARROW_NOINLINE __declspec(noinline)
+#define ARROW_FORCE_INLINE __declspec(forceinline)
 #define ARROW_PREDICT_FALSE(x) (x)
 #define ARROW_PREDICT_TRUE(x) (x)
 #define ARROW_PREFETCH(addr)
 #else
 #define ARROW_NORETURN
+#define ARROW_NOINLINE
+#define ARROW_FORCE_INLINE
 #define ARROW_PREDICT_FALSE(x) (x)
 #define ARROW_PREDICT_TRUE(x) (x)
 #define ARROW_PREFETCH(addr)
diff --git a/cpp/src/arrow/util/visibility.h b/cpp/src/arrow/util/visibility.h
index b0fd790295b02..c06ae51b8e5a3 100644
--- a/cpp/src/arrow/util/visibility.h
+++ b/cpp/src/arrow/util/visibility.h
@@ -52,14 +52,11 @@
 #endif
 
 #define ARROW_NO_EXPORT
-#define ARROW_FORCE_INLINE __forceinline
 
 #else
 
 // Non-Windows
 
-#define ARROW_FORCE_INLINE
-
 #if defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__))
 #ifndef ARROW_EXPORT
 #define ARROW_EXPORT [[gnu::visibility("default")]]

From ed5e1b4166c8d8984a3d01a6a14b939e9a4c8ce4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 7 Mar 2024 12:05:12 +0100
Subject: [PATCH 478/570] GH-40379: [Python] Fix byte_width for binary(0) + fix
 hypothesis tests (#40381)

### Rationale for this change

Fixing the hypothesis tests:

- fixup untested changes to the strategies from https://github.com/apache/arrow/pull/40160
- fix a bug in the `byte_width` attribute discovered by hypothesis (introduced by https://github.com/apache/arrow/pull/39592)

* GitHub Issue: #40379

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 python/pyarrow/tests/strategies.py | 8 ++++++--
 python/pyarrow/tests/test_types.py | 5 +++--
 python/pyarrow/types.pxi           | 2 +-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index 7affe815a22ba..db0aa1397123d 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -167,8 +167,8 @@ def list_types(item_strategy=primitive_types):
             pa.list_,
             item_strategy,
             st.integers(min_value=0, max_value=16)
-        ),
-        st.builds(pa.list_view, item_strategy),
+        ) |
+        st.builds(pa.list_view, item_strategy) |
         st.builds(pa.large_list_view, item_strategy)
     )
 
@@ -322,6 +322,10 @@ def arrays(draw, type, size=None, nullable=True):
         value = _pylist(ty.value_type, size=size, nullable=nullable)
     elif pa.types.is_fixed_size_list(ty):
         value = _pylist(ty.value_type, size=ty.list_size, nullable=nullable)
+    elif pa.types.is_list_view(ty):
+        value = _pylist(ty.value_type, size=size, nullable=nullable)
+    elif pa.types.is_large_list_view(ty):
+        value = _pylist(ty.value_type, size=size, nullable=nullable)
     elif pa.types.is_dictionary(ty):
         values = _pylist(ty.value_type, size=size, nullable=nullable)
         return pa.array(draw(values), type=ty)
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index 1d132a6af8a4d..13f6d83e80a46 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -955,11 +955,12 @@ def test_bit_and_byte_width():
         (pa.date32(), 32, 4),
         (pa.decimal128(19, 4), 128, 16),
         (pa.decimal256(76, 38), 256, 32),
-        (pa.binary(42), 42 * 8, 42)
+        (pa.binary(42), 42 * 8, 42),
+        (pa.binary(0), 0, 0),
     ]:
         assert ty.bit_width == expected_bit_width
 
-        if expected_byte_width == 0:
+        if 0 < expected_bit_width < 8:
             with pytest.raises(ValueError, match="Less than one byte"):
                 ty.byte_width
         else:
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index ec05100caf6c7..6cbad8eeb653c 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -275,7 +275,7 @@ cdef class DataType(_Weakrefable):
         if ty == nullptr:
             raise ValueError("Non-fixed width type")
         byte_width = ty.byte_width()
-        if byte_width == 0:
+        if byte_width == 0 and self.bit_width != 0:
             raise ValueError("Less than one byte")
         return byte_width
 

From e38583c1dcd2bb442891268a87f2004e23ee2e2e Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 7 Mar 2024 12:41:30 +0100
Subject: [PATCH 479/570] GH-38794: [C++][S3] Handle conventional content-type
 for directories (#40147)

### Rationale for this change

Some AWS-related tools write and expect the content-type "application/x-directory" for directory-like entries.

This PR does two things:
1) set the object's content-type to "application/x-directory" when the user explicitly creates a directory
2) when a 0-sized object with content-type starting with "application/x-directory" is encountered, consider it a directory

### Are these changes tested?

Unfortunately, this cannot be tested with MinIO, as it seems to ignore the content-type set on directories (as opposed to regular files).

### Are there any user-facing changes?

Hopefully better compatibility with existing S3 filesystem hierarchies.

* Closes: #38794
* GitHub Issue: #38794

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/s3fs.cc | 61 ++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 19 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 5fefe6b7cb016..b14f96e4dd75f 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -154,9 +154,10 @@ using internal::S3Backend;
 using internal::ToAwsString;
 using internal::ToURLEncodedAwsString;
 
-static const char kSep = '/';
-constexpr char kAwsEndpointUrlEnvVar[] = "AWS_ENDPOINT_URL";
-constexpr char kAwsEndpointUrlS3EnvVar[] = "AWS_ENDPOINT_URL_S3";
+constexpr const char kSep = '/';
+constexpr const char kAwsEndpointUrlEnvVar[] = "AWS_ENDPOINT_URL";
+constexpr const char kAwsEndpointUrlS3EnvVar[] = "AWS_ENDPOINT_URL_S3";
+constexpr const char kAwsDirectoryContentType[] = "application/x-directory";
 
 // -----------------------------------------------------------------------
 // S3ProxyOptions implementation
@@ -1214,6 +1215,26 @@ Status SetObjectMetadata(const std::shared_ptr<const KeyValueMetadata>& metadata
   return Status::OK();
 }
 
+bool IsDirectory(std::string_view key, const S3Model::HeadObjectResult& result) {
+  // If it has a non-zero length, it's a regular file. We do this even if
+  // the key has a trailing slash, as directory markers should never have
+  // any data associated to them.
+  if (result.GetContentLength() > 0) {
+    return false;
+  }
+  // Otherwise, if it has a trailing slash, it's a directory
+  if (internal::HasTrailingSlash(key)) {
+    return true;
+  }
+  // Otherwise, if its content type starts with "application/x-directory",
+  // it's a directory
+  if (::arrow::internal::StartsWith(result.GetContentType(), kAwsDirectoryContentType)) {
+    return true;
+  }
+  // Otherwise, it's a regular file.
+  return false;
+}
+
 // A RandomAccessFile that reads from a S3 object
 class ObjectInputFile final : public io::RandomAccessFile {
  public:
@@ -1743,8 +1764,13 @@ class ObjectOutputStream final : public io::OutputStream {
 };
 
 // This function assumes info->path() is already set
-void FileObjectToInfo(const S3Model::HeadObjectResult& obj, FileInfo* info) {
-  info->set_type(FileType::File);
+void FileObjectToInfo(std::string_view key, const S3Model::HeadObjectResult& obj,
+                      FileInfo* info) {
+  if (IsDirectory(key, obj)) {
+    info->set_type(FileType::Directory);
+  } else {
+    info->set_type(FileType::File);
+  }
   info->set_size(static_cast<int64_t>(obj.GetContentLength()));
   info->set_mtime(FromAwsDatetime(obj.GetLastModified()));
 }
@@ -1854,24 +1880,21 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
     return Status::OK();
   }
 
-  // Create an object with empty contents.  Successful if object already exists.
-  Status CreateEmptyObject(const std::string& bucket, const std::string& key) {
+  // Create a directory-like object with empty contents.  Successful if already exists.
+  Status CreateEmptyDir(const std::string& bucket, std::string_view key_view) {
     ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock());
 
+    auto key = internal::EnsureTrailingSlash(key_view);
     S3Model::PutObjectRequest req;
     req.SetBucket(ToAwsString(bucket));
     req.SetKey(ToAwsString(key));
+    req.SetContentType(kAwsDirectoryContentType);
     req.SetBody(std::make_shared<std::stringstream>(""));
     return OutcomeToStatus(
         std::forward_as_tuple("When creating key '", key, "' in bucket '", bucket, "': "),
         "PutObject", client_lock.Move()->PutObject(req));
   }
 
-  Status CreateEmptyDir(const std::string& bucket, const std::string& key) {
-    DCHECK(!key.empty());
-    return CreateEmptyObject(bucket, key + kSep);
-  }
-
   Status DeleteObject(const std::string& bucket, const std::string& key) {
     ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock());
 
@@ -2145,7 +2168,10 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
           child_path_ss << bucket << kSep << child_key;
           child_key = child_path_ss.str();
           if (obj.GetSize() > 0 || !had_trailing_slash) {
-            // We found a real file
+            // We found a real file.
+            // XXX Ideally, for 0-sized files we would also check the Content-Type
+            // against kAwsDirectoryContentType, but ListObjectsV2 does not give
+            // that information.
             FileInfo info;
             info.set_path(child_key);
             FileObjectToInfo(obj, &info);
@@ -2360,10 +2386,7 @@ class S3FileSystem::Impl : public std::enable_shared_from_this<S3FileSystem::Imp
           ARROW_ASSIGN_OR_RAISE(auto client_lock, self->holder_->Lock());
           auto outcome = client_lock.Move()->HeadObject(req);
           if (outcome.IsSuccess()) {
-            const auto& result = outcome.GetResult();
-            // A directory should be empty and have a trailing slash.  Anything else
-            // we can consider a file
-            return result.GetContentLength() <= 0 && key[key.size() - 1] == '/';
+            return IsDirectory(key, outcome.GetResult());
           }
           if (IsNotFound(outcome.GetError())) {
             // If we can't find it then it isn't a file.
@@ -2641,7 +2664,7 @@ Result<FileInfo> S3FileSystem::GetFileInfo(const std::string& s) {
     auto outcome = client_lock.Move()->HeadObject(req);
     if (outcome.IsSuccess()) {
       // "File" object found
-      FileObjectToInfo(outcome.GetResult(), &info);
+      FileObjectToInfo(path.key, outcome.GetResult(), &info);
       return info;
     }
     if (!IsNotFound(outcome.GetError())) {
@@ -2703,7 +2726,7 @@ Status S3FileSystem::CreateDir(const std::string& s, bool recursive) {
     for (const auto& part : path.key_parts) {
       parent_key += part;
       parent_key += kSep;
-      RETURN_NOT_OK(impl_->CreateEmptyObject(path.bucket, parent_key));
+      RETURN_NOT_OK(impl_->CreateEmptyDir(path.bucket, parent_key));
     }
     return Status::OK();
   } else {

From e950eb4baa73b9ab4e498e71354738c56287c48d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 7 Mar 2024 13:36:53 +0100
Subject: [PATCH 480/570] GH-40377: [Python][CI] Fix install of nightly dask in
 integration tests (#40378)

### Rationale for this change

Use a proper (non-deprecated) way of installing from git with an "extra", which also fixes the currently failing installation.

* GitHub Issue: #40377

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/scripts/install_dask.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ci/scripts/install_dask.sh b/ci/scripts/install_dask.sh
index 478c1d5997906..b89e43cfb311d 100755
--- a/ci/scripts/install_dask.sh
+++ b/ci/scripts/install_dask.sh
@@ -27,7 +27,8 @@ fi
 dask=$1
 
 if [ "${dask}" = "upstream_devel" ]; then
-  pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe]
+  pip install "dask[dataframe] @ git+https://github.com/dask/dask.git"
+  pip install -U git+https://github.com/dask-contrib/dask-expr.git
 elif [ "${dask}" = "latest" ]; then
   pip install dask[dataframe]
 else

From 1c9a3122c603e2766a793766a11ff4c706efb2aa Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 7 Mar 2024 05:24:15 -0800
Subject: [PATCH 481/570] GH-40318: [C++][Docs] Add documentation of array
 factories (#40373)

### Rationale for this change

These factory functions are generally useful and available, so documenting them helps external users find them without having to search the source code.

### What changes are included in this PR?

This PR adds the array factories in arrow/array/util.h into a doxygen group for array factories and adds that group to the Sphinx C++ API documentation.

### Are these changes tested?

I built the docs locally to verify.

### Are there any user-facing changes?

Nothing to the API, only docs.

* GitHub Issue: #40318

Authored-by: Vyas Ramasubramani <vyasr@nvidia.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/array/util.h    | 6 ++++++
 docs/source/cpp/api/array.rst | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/array/util.h b/cpp/src/arrow/array/util.h
index 9f34af0525d96..fd8e75ddb8640 100644
--- a/cpp/src/arrow/array/util.h
+++ b/cpp/src/arrow/array/util.h
@@ -31,6 +31,10 @@
 
 namespace arrow {
 
+/// \defgroup array-factories Array factory functions
+///
+/// @{
+
 /// \brief Create a strongly-typed Array instance from generic ArrayData
 /// \param[in] data the array contents
 /// \return the resulting Array instance
@@ -65,6 +69,8 @@ ARROW_EXPORT
 Result<std::shared_ptr<Array>> MakeEmptyArray(std::shared_ptr<DataType> type,
                                               MemoryPool* pool = default_memory_pool());
 
+/// @}
+
 namespace internal {
 
 /// \brief Swap endian of each element in a generic ArrayData
diff --git a/docs/source/cpp/api/array.rst b/docs/source/cpp/api/array.rst
index eca9ec398c15a..a7e5d0cf07e0a 100644
--- a/docs/source/cpp/api/array.rst
+++ b/docs/source/cpp/api/array.rst
@@ -27,6 +27,12 @@ Arrays
    :project: arrow_cpp
    :members:
 
+Factory functions
+=================
+
+.. doxygengroup:: array-factories
+   :content-only:
+
 Concrete array subclasses
 =========================
 
@@ -86,4 +92,4 @@ Utilities
 .. doxygenclass:: arrow::ArrayVisitor
    :project: arrow_cpp
    :members:
-   :undoc-members:
\ No newline at end of file
+   :undoc-members:

From 40d4c54401c3c4f6f6c0e6116e32dc20050e67b7 Mon Sep 17 00:00:00 2001
From: James Henderson <james@jarohen.dev>
Date: Thu, 7 Mar 2024 17:39:32 +0000
Subject: [PATCH 482/570] MINOR: [Java] add toString on AbstractContainerVector
 (#40404)

### Rationale for this change

Adding the ability to see the values in vectors derived from AbstractContainerVector (e.g. structs, dense unions) when something's not quite going as you're expecting :slightly_smiling_face:

### What changes are included in this PR?

Add AbstractContainerVector.toString in line with BaseValueVector.toString

### Are these changes tested?

Yes, as part of our XTDB development, but not in the Arrow repo - I didn't see any other tests for ValueVector.toString implementations?

### Are there any user-facing changes?

Strictly speaking, yes, if the user's printing out the vector - but Object.toString isn't often considered part of the public API in my experience?

Cheers!

James

Authored-by: James Henderson <james@jarohen.dev>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 .../arrow/vector/complex/AbstractContainerVector.java    | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
index 8e6cdb6c45bc5..12a3c209d716b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
@@ -29,6 +29,7 @@
 import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.ValueVectorUtility;
 
 /**
  * Base class for composite vectors.
@@ -48,6 +49,14 @@ protected AbstractContainerVector(String name, BufferAllocator allocator, CallBa
     this.callBack = callBack;
   }
 
+  /**
+   * Representation of vector suitable for debugging.
+   */
+  @Override
+  public String toString() {
+    return ValueVectorUtility.getToString(this, 0, getValueCount());
+  }
+
   @Override
   public void allocateNew() throws OutOfMemoryException {
     if (!allocateNewSafe()) {

From d2970e1d047f1bd31c31995c35450a7e5bfce3c0 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Thu, 7 Mar 2024 23:37:44 -0300
Subject: [PATCH 483/570] GH-40405: [C++] Produce better error message when
 Move is attempted on flat-namespace accounts (#40406)

### Rationale for this change

To guide users that might try to `Move` on accounts without HNS.

### What changes are included in this PR?

The rewrite of the error message.

### Are these changes tested?

Covered by existing tests.
* GitHub Issue: #40405

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/filesystem/azurefs.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index 8ae33b8818827..ff078f78aeac0 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -2523,7 +2523,9 @@ class AzureFileSystem::Impl {
       }
       return CrossContainerMoveNotImplemented(src, dest);
     }
-    return Status::NotImplemented("The Azure FileSystem is not fully implemented");
+    return Status::NotImplemented(
+        "FileSystem::Move() is not implemented for Azure Storage accounts "
+        "without Hierarchical Namespace support (see arrow/issues/40405).");
   }
 
   Status MovePath(const AzureLocation& src, const AzureLocation& dest) {

From 2a4df7a8a17fa2b1d77c16c47a625c295e2c6bef Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Thu, 7 Mar 2024 20:18:41 -0900
Subject: [PATCH 484/570] MINOR: [Dev][C++] Fix SyntaxWarnings in
 asan_symbolize.py (#40411)

### Rationale for this change

When running the C++ tests I noticed some warnings around string escape sequences in the [asan_symbolize.py](https://github.com/apache/arrow/blob/main/cpp/build-support/asan_symbolize.py):

```
70: Running arrow-s3fs-test, redirecting output into /Users/bryce/src/apache/arrow/cpp/build/build/test-logs/arrow-s3fs-test.txt (attempt 1/1)
70: /Users/bryce/src/apache/arrow/cpp/build-support/asan_symbolize.py:172: SyntaxWarning: invalid escape sequence '\('
70:   match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
70: /Users/bryce/src/apache/arrow/cpp/build-support/asan_symbolize.py:177: SyntaxWarning: invalid escape sequence '\('
70:   function_name = re.sub('\(.*?\)', '', function_name)
70: /Users/bryce/src/apache/arrow/cpp/build-support/asan_symbolize.py:345: SyntaxWarning: invalid escape sequence '\('
70:   '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
```

It looks like this usage was [turned into a SyntaxWarning in Python 3.12](https://docs.python.org/dev/whatsnew/3.12.html#other-language-changes). Eventually they plan to change this to a SyntaxError so I think it's good to fix it now. I didn't look elsewhere in the codebase to find other instances.

### What changes are included in this PR?

Swapped affected instances for raw strings.

### Are these changes tested?

I haven't tested the script against what I assume would be ASAN output of some sort but I did test that the strings compile fine. For example,

```python
>>> atos_line = 'foo(type1, type2) (in object.name) (filename.cc:80)'
>>> re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
<re.Match object; span=(0, 51), match='foo(type1, type2) (in object.name) (filename.cc:8>
>>> re.match(r'^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
<re.Match object; span=(0, 51), match='foo(type1, type2) (in object.name) (filename.cc:8>
```

### Are there any user-facing changes?

No.

Authored-by: Bryce Mecum <petridish@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/build-support/asan_symbolize.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/build-support/asan_symbolize.py b/cpp/build-support/asan_symbolize.py
index 854090ae599cc..8bab72b661ef2 100755
--- a/cpp/build-support/asan_symbolize.py
+++ b/cpp/build-support/asan_symbolize.py
@@ -169,12 +169,12 @@ def symbolize(self, addr, binary, offset):
     atos_line = self.pipe.stdout.readline().rstrip()
     # A well-formed atos response looks like this:
     #   foo(type1, type2) (in object.name) (filename.cc:80)
-    match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
+    match = re.match(r'^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
     if DEBUG:
       print('atos_line: {0}'.format(atos_line))
     if match:
       function_name = match.group(1)
-      function_name = re.sub('\(.*?\)', '', function_name)
+      function_name = re.sub(r'\(.*?\)', '', function_name)
       file_name = fix_filename(match.group(3))
       return ['%s in %s %s' % (addr, function_name, file_name)]
     else:
@@ -342,7 +342,7 @@ def process_stdin(self):
       self.current_line = line.rstrip()
       #0 0x7f6e35cf2e45  (/blah/foo.so+0x11fe45)
       stack_trace_line_format = (
-          '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
+          r'^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
       match = re.match(stack_trace_line_format, line)
       if not match:
         print(self.current_line)

From bdd04c0d470e193a8e4cb0fc1aa71eaa4cfb1197 Mon Sep 17 00:00:00 2001
From: Sten Larsson <sten@burtcorp.com>
Date: Fri, 8 Mar 2024 06:26:41 +0100
Subject: [PATCH 485/570] GH-40402: [GLib] Add missing compute function options
 classes (#40403)

### Rationale for this change

In most cases the options to compute functions are optional, but there are cases where they are required. The following compute functions are not possible to use in Ruby because the required options classes are missing from the GLib bindings:

* `split_pattern`
* `strftime` (can technically be used)
* `strptime`
* `struct_field`

There are probably more functions that cannot be used, but this is a start.

### What changes are included in this PR?

The following GLib classes are added:

* `GArrowSplitPatternOptions`
* `GArrowStrftimeOptions`
* `GArrowStrptimeOptions`
* `GArrowStructFieldOptions`

To be able to return an error, a separate function for setting the field_ref on StructFieldOptions is used instead of a set_property function.

### Are these changes tested?

Yes

### Are there any user-facing changes?

Yes

* GitHub Issue: #40402

Lead-authored-by: Sten Larsson <sten@burtcorp.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/compute.cpp             | 622 ++++++++++++++++++++++
 c_glib/arrow-glib/compute.h               |  69 +++
 c_glib/arrow-glib/compute.hpp             |  24 +
 c_glib/test/test-function.rb              |  30 ++
 c_glib/test/test-split-pattern-options.rb |  56 ++
 c_glib/test/test-strftime-options.rb      |  47 ++
 c_glib/test/test-strptime-options.rb      |  57 ++
 c_glib/test/test-struct-field-options.rb  |  75 +++
 8 files changed, 980 insertions(+)
 create mode 100644 c_glib/test/test-split-pattern-options.rb
 create mode 100644 c_glib/test/test-strftime-options.rb
 create mode 100644 c_glib/test/test-strptime-options.rb
 create mode 100644 c_glib/test/test-struct-field-options.rb

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 9692f277d183f..14be097221dea 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -32,6 +32,7 @@
 #include <arrow-glib/scalar.hpp>
 #include <arrow-glib/schema.hpp>
 #include <arrow-glib/table.hpp>
+#include <arrow-glib/type.hpp>
 
 #include <arrow/acero/exec_plan.h>
 #include <arrow/acero/options.h>
@@ -240,6 +241,16 @@ G_BEGIN_DECLS
  * #GArrowRunEndEncodeOptions is a class to customize the
  * `run_end_encode` function.
  *
+ * #GArrowStrptimeOptions is a class to customize the `strptime` function.
+ *
+ * #GArrowStrftimeOptions is a class to customize the `strftime` function.
+ *
+ * #GArrowSplitPatternOptions is a class to customize the `split_pattern` and
+ * `split_pattern_regex` functions.
+ *
+ * #GArrowStructFieldOptions is a class to customize the `struct_field`
+ * function.
+ *
  * There are many functions to compute data on an array.
  */
 
@@ -6073,6 +6084,523 @@ garrow_run_end_encoded_array_decode(GArrowRunEndEncodedArray *array,
   }
 }
 
+enum {
+  PROP_STRPTIME_OPTIONS_FORMAT = 1,
+  PROP_STRPTIME_OPTIONS_UNIT,
+  PROP_STRPTIME_OPTIONS_ERROR_IS_NULL,
+};
+
+G_DEFINE_TYPE(GArrowStrptimeOptions,
+              garrow_strptime_options,
+              GARROW_TYPE_FUNCTION_OPTIONS)
+
+static void
+garrow_strptime_options_set_property(GObject *object,
+                                     guint prop_id,
+                                     const GValue *value,
+                                     GParamSpec *pspec)
+{
+  auto options =
+    garrow_strptime_options_get_raw(GARROW_STRPTIME_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_STRPTIME_OPTIONS_FORMAT:
+    options->format = g_value_get_string(value);
+    break;
+  case PROP_STRPTIME_OPTIONS_UNIT:
+    options->unit = garrow_time_unit_to_raw(
+      static_cast<GArrowTimeUnit>(g_value_get_enum(value)));
+    break;
+  case PROP_STRPTIME_OPTIONS_ERROR_IS_NULL:
+    options->error_is_null = g_value_get_boolean(value);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_strptime_options_get_property(GObject *object,
+                                     guint prop_id,
+                                     GValue *value,
+                                     GParamSpec *pspec)
+{
+  auto options =
+    garrow_strptime_options_get_raw(GARROW_STRPTIME_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_STRPTIME_OPTIONS_FORMAT:
+    g_value_set_string(value, options->format.c_str());
+    break;
+  case PROP_STRPTIME_OPTIONS_UNIT:
+    g_value_set_enum(value, garrow_time_unit_from_raw(options->unit));
+    break;
+  case PROP_STRPTIME_OPTIONS_ERROR_IS_NULL:
+    g_value_set_boolean(value, options->error_is_null);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_strptime_options_init(GArrowStrptimeOptions *object)
+{
+  auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
+  priv->options = static_cast<arrow::compute::FunctionOptions *>(
+    new arrow::compute::StrptimeOptions());
+}
+
+static void
+garrow_strptime_options_class_init(GArrowStrptimeOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->set_property = garrow_strptime_options_set_property;
+  gobject_class->get_property = garrow_strptime_options_get_property;
+
+  arrow::compute::StrptimeOptions options;
+
+  GParamSpec *spec;
+  /**
+   * GArrowStrptimeOptions:format:
+   *
+   * The desired format string.
+   *
+   * Since: 16.0.0
+   */
+  spec = g_param_spec_string("format",
+                             "Format",
+                             "The desired format string",
+                             options.format.c_str(),
+                             static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_STRPTIME_OPTIONS_FORMAT,
+                                  spec);
+
+  /**
+   * GArrowStrptimeOptions:unit:
+   *
+   * The desired time resolution.
+   *
+   * Since: 16.0.0
+   */
+  spec = g_param_spec_enum("unit",
+                           "Unit",
+                           "The desired time resolution",
+                           GARROW_TYPE_TIME_UNIT,
+                           garrow_time_unit_from_raw(options.unit),
+                           static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_STRPTIME_OPTIONS_UNIT,
+                                  spec);
+
+  /**
+   * GArrowStrptimeOptions:error-is-null:
+   *
+   * Return null on parsing errors if true or raise if false.
+   *
+   * Since: 16.0.0
+   */
+  spec = g_param_spec_boolean("error-is-null",
+                              "Error is null",
+                              "Return null on parsing errors if true or raise if false",
+                              options.error_is_null,
+                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_STRPTIME_OPTIONS_ERROR_IS_NULL,
+                                  spec);
+}
+
+/**
+ * garrow_strptime_options_new:
+ *
+ * Returns: A newly created #GArrowStrptimeOptions.
+ *
+ * Since: 16.0.0
+ */
+GArrowStrptimeOptions *
+garrow_strptime_options_new(void)
+{
+  auto options = g_object_new(GARROW_TYPE_STRPTIME_OPTIONS, NULL);
+  return GARROW_STRPTIME_OPTIONS(options);
+}
+
+enum {
+  PROP_STRFTIME_OPTIONS_FORMAT = 1,
+  PROP_STRFTIME_OPTIONS_LOCALE,
+};
+
+G_DEFINE_TYPE(GArrowStrftimeOptions,
+              garrow_strftime_options,
+              GARROW_TYPE_FUNCTION_OPTIONS)
+
+static void
+garrow_strftime_options_set_property(GObject *object,
+                                     guint prop_id,
+                                     const GValue *value,
+                                     GParamSpec *pspec)
+{
+  auto options =
+    garrow_strftime_options_get_raw(GARROW_STRFTIME_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_STRFTIME_OPTIONS_FORMAT:
+    options->format = g_value_get_string(value);
+    break;
+  case PROP_STRFTIME_OPTIONS_LOCALE:
+    options->locale = g_value_get_string(value);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_strftime_options_get_property(GObject *object,
+                                     guint prop_id,
+                                     GValue *value,
+                                     GParamSpec *pspec)
+{
+  auto options =
+    garrow_strftime_options_get_raw(GARROW_STRFTIME_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_STRFTIME_OPTIONS_FORMAT:
+    g_value_set_string(value, options->format.c_str());
+    break;
+  case PROP_STRFTIME_OPTIONS_LOCALE:
+    g_value_set_string(value, options->locale.c_str());
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_strftime_options_init(GArrowStrftimeOptions *object)
+{
+  auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
+  priv->options = static_cast<arrow::compute::FunctionOptions *>(
+    new arrow::compute::StrftimeOptions());
+}
+
+static void
+garrow_strftime_options_class_init(GArrowStrftimeOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->set_property = garrow_strftime_options_set_property;
+  gobject_class->get_property = garrow_strftime_options_get_property;
+
+  arrow::compute::StrftimeOptions options;
+
+  GParamSpec *spec;
+  /**
+   * GArrowStrftimeOptions:format:
+   *
+   * The desired format string.
+   *
+   * Since: 16.0.0
+   */
+  spec = g_param_spec_string("format",
+                             "Format",
+                             "The desired format string",
+                             options.format.c_str(),
+                             static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_STRFTIME_OPTIONS_FORMAT,
+                                  spec);
+
+  /**
+   * GArrowStrftimeOptions:locale:
+   *
+   * The desired output locale string.
+   *
+   * Since: 16.0.0
+   */
+  spec = g_param_spec_string("locale",
+                             "locale",
+                             "The desired output locale string",
+                             options.locale.c_str(),
+                             static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_STRFTIME_OPTIONS_LOCALE,
+                                  spec);
+}
+
+/**
+ * garrow_strftime_options_new:
+ *
+ * Returns: A newly created #GArrowStrftimeOptions.
+ *
+ * Since: 16.0.0
+ */
+GArrowStrftimeOptions *
+garrow_strftime_options_new(void)
+{
+  auto options = g_object_new(GARROW_TYPE_STRFTIME_OPTIONS, NULL);
+  return GARROW_STRFTIME_OPTIONS(options);
+}
+
+enum {
+  PROP_SPLIT_PATTERN_OPTIONS_PATTERN = 1,
+  PROP_SPLIT_PATTERN_OPTIONS_MAX_SPLITS,
+  PROP_SPLIT_PATTERN_OPTIONS_REVERSE,
+};
+
+G_DEFINE_TYPE(GArrowSplitPatternOptions,
+              garrow_split_pattern_options,
+              GARROW_TYPE_FUNCTION_OPTIONS)
+
+static void
+garrow_split_pattern_options_set_property(GObject *object,
+                                          guint prop_id,
+                                          const GValue *value,
+                                          GParamSpec *pspec)
+{
+  auto options = garrow_split_pattern_options_get_raw(
+    GARROW_SPLIT_PATTERN_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_SPLIT_PATTERN_OPTIONS_PATTERN:
+    options->pattern = g_value_get_string(value);
+    break;
+  case PROP_SPLIT_PATTERN_OPTIONS_MAX_SPLITS:
+    options->max_splits = g_value_get_int64(value);
+    break;
+  case PROP_SPLIT_PATTERN_OPTIONS_REVERSE:
+    options->reverse = g_value_get_boolean(value);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_split_pattern_options_get_property(GObject *object,
+                                          guint prop_id,
+                                          GValue *value,
+                                          GParamSpec *pspec)
+{
+  auto options = garrow_split_pattern_options_get_raw(
+    GARROW_SPLIT_PATTERN_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_SPLIT_PATTERN_OPTIONS_PATTERN:
+    g_value_set_string(value, options->pattern.c_str());
+    break;
+  case PROP_SPLIT_PATTERN_OPTIONS_MAX_SPLITS:
+    g_value_set_int64(value, options->max_splits);
+    break;
+  case PROP_SPLIT_PATTERN_OPTIONS_REVERSE:
+    g_value_set_boolean(value, options->reverse);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_split_pattern_options_init(GArrowSplitPatternOptions *object)
+{
+  auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
+  priv->options = static_cast<arrow::compute::FunctionOptions *>(
+    new arrow::compute::SplitPatternOptions());
+}
+
+static void
+garrow_split_pattern_options_class_init(GArrowSplitPatternOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->set_property = garrow_split_pattern_options_set_property;
+  gobject_class->get_property = garrow_split_pattern_options_get_property;
+
+  arrow::compute::SplitPatternOptions options;
+
+  GParamSpec *spec;
+  /**
+   * GArrowSplitPatternOptions:pattern:
+   *
+   * The exact substring to split on.
+   *
+   * Since: 16.0.0
+   */
+  spec = g_param_spec_string("pattern",
+                             "Pattern",
+                             "The exact substring to split on",
+                             options.pattern.c_str(),
+                             static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_SPLIT_PATTERN_OPTIONS_PATTERN,
+                                  spec);
+
+  /**
+   * GArrowSplitPatternOptions:max_splits:
+   *
+   * Maximum number of splits allowed, or unlimited when -1.
+   *
+   * Since: 16.0.0
+   */
+  spec = g_param_spec_int64("max_splits",
+                            "Max splits",
+                            "Maximum number of splits allowed, or unlimited when -1",
+                            G_MININT64,
+                            G_MAXINT64,
+                            options.max_splits,
+                            static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_SPLIT_PATTERN_OPTIONS_MAX_SPLITS,
+                                  spec);
+
+  /**
+   * GArrowSplitPatternOptions:reverse:
+   *
+   * Start splitting from the end of the string (only relevant when
+   * max_splits != -1)
+   *
+   * Since: 16.0.0
+   */
+  spec = g_param_spec_boolean("reverse",
+                              "Reverse",
+                              "Start splitting from the end of the string (only relevant when max_splits != -1)",
+                              options.reverse,
+                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_SPLIT_PATTERN_OPTIONS_REVERSE,
+                                  spec);
+}
+
+/**
+ * garrow_split_pattern_options_new:
+ *
+ * Returns: A newly created #GArrowSplitPatternOptions.
+ *
+ * Since: 16.0.0
+ */
+GArrowSplitPatternOptions *
+garrow_split_pattern_options_new(void)
+{
+  auto options = g_object_new(GARROW_TYPE_SPLIT_PATTERN_OPTIONS, NULL);
+  return GARROW_SPLIT_PATTERN_OPTIONS(options);
+}
+
+enum {
+  PROP_STRUCT_FIELD_OPTIONS_FIELD_REF = 1,
+};
+
+G_DEFINE_TYPE(GArrowStructFieldOptions,
+              garrow_struct_field_options,
+              GARROW_TYPE_FUNCTION_OPTIONS)
+
+/**
+ * garrow_struct_field_options_set_field_ref:
+ * @field_ref: The name or dot path specifying what to extract from struct or
+ *  union.
+ *
+ * Since: 16.0.0
+ */
+void
+garrow_struct_field_options_set_field_ref(GArrowStructFieldOptions *options,
+                                          const gchar *field_ref,
+                                          GError **error)
+{
+  auto arrow_options = garrow_struct_field_options_get_raw(
+    GARROW_STRUCT_FIELD_OPTIONS(options));
+
+  auto arrow_reference_result = garrow_field_reference_resolve_raw(field_ref);
+  if (!garrow::check(error,
+                     arrow_reference_result,
+                     "[struct-field-options][set-field-ref]")) {
+    return;
+  }
+  arrow_options->field_ref = *arrow_reference_result;
+}
+
+static void
+garrow_struct_field_options_get_property(GObject *object,
+                                          guint prop_id,
+                                          GValue *value,
+                                          GParamSpec *pspec)
+{
+  auto options = garrow_struct_field_options_get_raw(
+    GARROW_STRUCT_FIELD_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_STRUCT_FIELD_OPTIONS_FIELD_REF:
+    {
+      auto name = options->field_ref.name();
+      if (name) {
+        g_value_set_string(value, name->c_str());
+      } else {
+        g_value_set_string(value, options->field_ref.ToDotPath().c_str());
+      }
+    }
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_struct_field_options_init(GArrowStructFieldOptions *object)
+{
+  auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
+  priv->options = static_cast<arrow::compute::FunctionOptions *>(
+    new arrow::compute::StructFieldOptions());
+}
+
+static void
+garrow_struct_field_options_class_init(GArrowStructFieldOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->get_property = garrow_struct_field_options_get_property;
+
+  GParamSpec *spec;
+  /**
+   * GArrowStructFieldOptions:field_ref:
+   *
+   * The name or dot path specifying what to extract from struct or union.
+   *
+   *     dot_path = '.' name
+   *              | '[' digit+ ']'
+   *              | dot_path+
+   *
+   * Since: 16.0.0
+   */
+  spec = g_param_spec_string("field_ref",
+                             "Field ref",
+                             "The name or dot path specifying what to extract from struct or union.",
+                             "",
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_STRUCT_FIELD_OPTIONS_FIELD_REF,
+                                  spec);
+}
+
+/**
+ * garrow_struct_field_options_new:
+ *
+ * Returns: A newly created #GArrowStructFieldOptions.
+ *
+ * Since: 16.0.0
+ */
+GArrowStructFieldOptions *
+garrow_struct_field_options_new(void)
+{
+  auto options = g_object_new(GARROW_TYPE_STRUCT_FIELD_OPTIONS, NULL);
+  return GARROW_STRUCT_FIELD_OPTIONS(options);
+}
+
 G_END_DECLS
 
 
@@ -6191,6 +6719,26 @@ garrow_function_options_new_raw(
     auto options =
       garrow_run_end_encode_options_new_raw(arrow_run_end_encode_options);
     return GARROW_FUNCTION_OPTIONS(options);
+  } else if (arrow_type_name == "StrptimeOptions") {
+    const auto arrow_strptime_options =
+      static_cast<const arrow::compute::StrptimeOptions *>(arrow_options);
+    auto options = garrow_strptime_options_new_raw(arrow_strptime_options);
+    return GARROW_FUNCTION_OPTIONS(options);
+  } else if (arrow_type_name == "StrftimeOptions") {
+    const auto arrow_strftime_options =
+      static_cast<const arrow::compute::StrftimeOptions *>(arrow_options);
+    auto options = garrow_strftime_options_new_raw(arrow_strftime_options);
+    return GARROW_FUNCTION_OPTIONS(options);
+  } else if (arrow_type_name == "SplitPatternOptions") {
+    const auto arrow_split_pattern_options =
+      static_cast<const arrow::compute::SplitPatternOptions *>(arrow_options);
+    auto options = garrow_split_pattern_options_new_raw(arrow_split_pattern_options);
+    return GARROW_FUNCTION_OPTIONS(options);
+  } else if (arrow_type_name == "StructFieldOptions") {
+    const auto arrow_struct_field_options =
+      static_cast<const arrow::compute::StructFieldOptions *>(arrow_options);
+    auto options = garrow_struct_field_options_new_raw(arrow_struct_field_options);
+    return GARROW_FUNCTION_OPTIONS(options);
   } else {
     auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS,
                                 NULL);
@@ -6667,3 +7215,77 @@ garrow_run_end_encode_options_get_raw(GArrowRunEndEncodeOptions *options)
   return static_cast<arrow::compute::RunEndEncodeOptions *>(
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
+
+GArrowStrptimeOptions *
+garrow_strptime_options_new_raw(
+  const arrow::compute::StrptimeOptions *arrow_options)
+{
+  return GARROW_STRPTIME_OPTIONS(
+    g_object_new(GARROW_TYPE_STRPTIME_OPTIONS,
+                 "format", arrow_options->format.c_str(),
+                 "unit", arrow_options->unit,
+                 "error_is_null", arrow_options->error_is_null,
+                 NULL));
+}
+
+arrow::compute::StrptimeOptions *
+garrow_strptime_options_get_raw(GArrowStrptimeOptions *options)
+{
+  return static_cast<arrow::compute::StrptimeOptions *>(
+    garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
+}
+
+GArrowStrftimeOptions *
+garrow_strftime_options_new_raw(
+  const arrow::compute::StrftimeOptions *arrow_options)
+{
+  return GARROW_STRFTIME_OPTIONS(
+    g_object_new(GARROW_TYPE_STRFTIME_OPTIONS,
+                 "format", arrow_options->format.c_str(),
+                 "locale", arrow_options->locale.c_str(),
+                 NULL));
+}
+
+arrow::compute::StrftimeOptions *
+garrow_strftime_options_get_raw(GArrowStrftimeOptions *options)
+{
+  return static_cast<arrow::compute::StrftimeOptions *>(
+    garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
+}
+
+GArrowSplitPatternOptions *
+garrow_split_pattern_options_new_raw(
+  const arrow::compute::SplitPatternOptions *arrow_options)
+{
+  return GARROW_SPLIT_PATTERN_OPTIONS(
+    g_object_new(GARROW_TYPE_SPLIT_PATTERN_OPTIONS,
+                 "pattern", arrow_options->pattern.c_str(),
+                 "max_splits", arrow_options->max_splits,
+                 "reverse", arrow_options->reverse,
+                 NULL));
+}
+
+arrow::compute::SplitPatternOptions *
+garrow_split_pattern_options_get_raw(GArrowSplitPatternOptions *options)
+{
+  return static_cast<arrow::compute::SplitPatternOptions *>(
+    garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
+}
+
+GArrowStructFieldOptions *
+garrow_struct_field_options_new_raw(
+  const arrow::compute::StructFieldOptions *arrow_options)
+{
+  auto options = GARROW_STRUCT_FIELD_OPTIONS(
+    g_object_new(GARROW_TYPE_STRUCT_FIELD_OPTIONS, NULL));
+  auto arrow_new_options = garrow_struct_field_options_get_raw(options);
+  arrow_new_options->field_ref = arrow_options->field_ref;
+  return options;
+}
+
+arrow::compute::StructFieldOptions *
+garrow_struct_field_options_get_raw(GArrowStructFieldOptions *options)
+{
+  return static_cast<arrow::compute::StructFieldOptions *>(
+    garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
+}
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 008ae2a783860..9509e6d977aca 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -1113,5 +1113,74 @@ GArrowArray *
 garrow_run_end_encoded_array_decode(GArrowRunEndEncodedArray *array,
                                     GError **error);
 
+#define GARROW_TYPE_STRPTIME_OPTIONS      \
+  (garrow_strptime_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowStrptimeOptions,
+                         garrow_strptime_options,
+                         GARROW,
+                         STRPTIME_OPTIONS,
+                         GArrowFunctionOptions)
+struct _GArrowStrptimeOptionsClass
+{
+  GArrowFunctionOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_16_0
+GArrowStrptimeOptions *
+garrow_strptime_options_new(void);
+
+#define GARROW_TYPE_STRFTIME_OPTIONS      \
+  (garrow_strftime_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowStrftimeOptions,
+                         garrow_strftime_options,
+                         GARROW,
+                         STRFTIME_OPTIONS,
+                         GArrowFunctionOptions)
+struct _GArrowStrftimeOptionsClass
+{
+  GArrowFunctionOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_16_0
+GArrowStrftimeOptions *
+garrow_strftime_options_new(void);
+
+#define GARROW_TYPE_SPLIT_PATTERN_OPTIONS      \
+  (garrow_split_pattern_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowSplitPatternOptions,
+                         garrow_split_pattern_options,
+                         GARROW,
+                         SPLIT_PATTERN_OPTIONS,
+                         GArrowFunctionOptions)
+struct _GArrowSplitPatternOptionsClass
+{
+  GArrowFunctionOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_16_0
+GArrowSplitPatternOptions *
+garrow_split_pattern_options_new(void);
+
+#define GARROW_TYPE_STRUCT_FIELD_OPTIONS      \
+  (garrow_struct_field_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowStructFieldOptions,
+                         garrow_struct_field_options,
+                         GARROW,
+                         STRUCT_FIELD_OPTIONS,
+                         GArrowFunctionOptions)
+struct _GArrowStructFieldOptionsClass
+{
+  GArrowFunctionOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_16_0
+void
+garrow_struct_field_options_set_field_ref(GArrowStructFieldOptions *options,
+                                          const gchar *field_ref,
+                                          GError **error);
+
+GARROW_AVAILABLE_IN_16_0
+GArrowStructFieldOptions *
+garrow_struct_field_options_new(void);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index 50074d0c98eba..4179a9ee733fd 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -192,3 +192,27 @@ garrow_run_end_encode_options_new_raw(
   const arrow::compute::RunEndEncodeOptions *arrow_options);
 arrow::compute::RunEndEncodeOptions *
 garrow_run_end_encode_options_get_raw(GArrowRunEndEncodeOptions *options);
+
+GArrowStrptimeOptions *
+garrow_strptime_options_new_raw(
+  const arrow::compute::StrptimeOptions *arrow_options);
+arrow::compute::StrptimeOptions *
+garrow_strptime_options_get_raw(GArrowStrptimeOptions *options);
+
+GArrowStrftimeOptions *
+garrow_strftime_options_new_raw(
+  const arrow::compute::StrftimeOptions *arrow_options);
+arrow::compute::StrftimeOptions *
+garrow_strftime_options_get_raw(GArrowStrftimeOptions *options);
+
+GArrowSplitPatternOptions *
+garrow_split_pattern_options_new_raw(
+  const arrow::compute::SplitPatternOptions *arrow_options);
+arrow::compute::SplitPatternOptions *
+garrow_split_pattern_options_get_raw(GArrowSplitPatternOptions *options);
+
+GArrowStructFieldOptions *
+garrow_struct_field_options_new_raw(
+  const arrow::compute::StructFieldOptions *arrow_options);
+arrow::compute::StructFieldOptions *
+garrow_struct_field_options_get_raw(GArrowStructFieldOptions *options);
diff --git a/c_glib/test/test-function.rb b/c_glib/test/test-function.rb
index cffaacba03192..43d491978532b 100644
--- a/c_glib/test/test-function.rb
+++ b/c_glib/test/test-function.rb
@@ -158,6 +158,12 @@ def test_round_to_multiple_options
       assert_equal(Arrow::RoundToMultipleOptions.new,
                    round_to_multiple_function.default_options)
     end
+
+    def test_strftime_options
+      strftime_function = Arrow::Function.find("strftime")
+      assert_equal(Arrow::StrftimeOptions.new,
+                   strftime_function.default_options)
+    end
   end
 
   sub_test_case("#options_type") do
@@ -232,5 +238,29 @@ def test_round_to_multiple_options
       assert_equal(Arrow::RoundToMultipleOptions.gtype,
                    round_to_multiple_function.options_type)
     end
+
+    def test_strptime_options
+      strptime_function = Arrow::Function.find("strptime")
+      assert_equal(Arrow::StrptimeOptions.gtype,
+                   strptime_function.options_type)
+    end
+
+    def test_strftime_options
+      strftime_function = Arrow::Function.find("strftime")
+      assert_equal(Arrow::StrftimeOptions.gtype,
+                   strftime_function.options_type)
+    end
+
+    def test_split_pattern_options
+      split_pattern_function = Arrow::Function.find("split_pattern")
+      assert_equal(Arrow::SplitPatternOptions.gtype,
+                   split_pattern_function.options_type)
+    end
+
+    def test_struct_field_options
+      struct_field_function = Arrow::Function.find("struct_field")
+      assert_equal(Arrow::StructFieldOptions.gtype,
+                   struct_field_function.options_type)
+    end
   end
 end
diff --git a/c_glib/test/test-split-pattern-options.rb b/c_glib/test/test-split-pattern-options.rb
new file mode 100644
index 0000000000000..b6bb5fe01f980
--- /dev/null
+++ b/c_glib/test/test-split-pattern-options.rb
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestSplitPatternOptions < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @options = Arrow::SplitPatternOptions.new
+  end
+
+  def test_pattern_property
+    assert_equal("", @options.pattern)
+    @options.pattern = "foo"
+    assert_equal("foo", @options.pattern)
+  end
+
+  def test_max_splits_property
+    assert_equal(-1, @options.max_splits)
+    @options.max_splits = 1
+    assert_equal(1, @options.max_splits)
+  end
+
+  def test_reverse_property
+    assert do
+      !@options.reverse?
+    end
+    @options.reverse = true
+    assert do
+      @options.reverse?
+    end
+  end
+
+  def test_split_pattern_regex_function
+    args = [
+      Arrow::ArrayDatum.new(build_string_array(["hello world"])),
+    ]
+    @options.pattern = "[lo]+"
+    split_pattern_regex_function = Arrow::Function.find("split_pattern_regex")
+    assert_equal(build_list_array(Arrow::StringDataType.new, [["he", " w", "r", "d"]]),
+                 split_pattern_regex_function.execute(args, @options).value)
+  end
+end
diff --git a/c_glib/test/test-strftime-options.rb b/c_glib/test/test-strftime-options.rb
new file mode 100644
index 0000000000000..81440d5d086ad
--- /dev/null
+++ b/c_glib/test/test-strftime-options.rb
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestStrftimeOptions < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @options = Arrow::StrftimeOptions.new
+  end
+
+  def test_format_property
+    assert_equal("%Y-%m-%dT%H:%M:%S", @options.format)
+    @options.format = "%Y-%m-%d"
+    assert_equal("%Y-%m-%d", @options.format)
+  end
+
+  def test_locale_property
+    assert_equal("C", @options.locale)
+    @options.locale = "sv_SE.UTF-8"
+    assert_equal("sv_SE.UTF-8", @options.locale)
+  end
+
+  def test_strftime_function
+    omit("Missing tzdata on Windows") if Gem.win_platform?
+    args = [
+      Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190854])),
+    ]
+    @options.format = "%Y-%m-%d"
+    strftime_function = Arrow::Function.find("strftime")
+    assert_equal(build_string_array(["2017-09-09"]),
+                 strftime_function.execute(args, @options).value)
+  end
+end
diff --git a/c_glib/test/test-strptime-options.rb b/c_glib/test/test-strptime-options.rb
new file mode 100644
index 0000000000000..994176638a1ee
--- /dev/null
+++ b/c_glib/test/test-strptime-options.rb
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestStrptimeOptions < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @options = Arrow::StrptimeOptions.new
+  end
+
+  def test_format_property
+    assert_equal("", @options.format)
+    @options.format = "%Y-%m-%d"
+    assert_equal("%Y-%m-%d", @options.format)
+  end
+
+  def test_unit_property
+    assert_equal(Arrow::TimeUnit::MICRO, @options.unit)
+    @options.unit = :nano
+    assert_equal(Arrow::TimeUnit::NANO, @options.unit)
+  end
+
+  def test_error_is_null_property
+    assert do
+      !@options.error_is_null?
+    end
+    @options.error_is_null = true
+    assert do
+      @options.error_is_null?
+    end
+  end
+
+  def test_strptime_function
+    args = [
+      Arrow::ArrayDatum.new(build_string_array(["2017-09-09T10:33:10"])),
+    ]
+    @options.format = "%Y-%m-%dT%H:%M:%S"
+    @options.unit = :milli
+    strptime_function = Arrow::Function.find("strptime")
+    assert_equal(build_timestamp_array(:milli, [1504953190000]),
+                 strptime_function.execute(args, @options).value)
+  end
+end
diff --git a/c_glib/test/test-struct-field-options.rb b/c_glib/test/test-struct-field-options.rb
new file mode 100644
index 0000000000000..4a614de6df6e7
--- /dev/null
+++ b/c_glib/test/test-struct-field-options.rb
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestStructFieldOptions < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @options = Arrow::StructFieldOptions.new
+  end
+
+  def test_default
+    assert_equal("", @options.field_ref)
+  end
+
+  def test_set_string
+    @options.field_ref = "foo"
+    assert_equal("foo", @options.field_ref)
+  end
+
+  def test_set_symbol
+    @options.field_ref = :foo
+    assert_equal("foo", @options.field_ref)
+  end
+
+  def test_set_dot_path
+    @options.field_ref = ".foo.bar"
+    assert_equal(".foo.bar", @options.field_ref)
+  end
+
+  def test_set_invalid
+    message = "[struct-field-options][set-field-ref]: Invalid: Dot path '[foo]' contained an unterminated index"
+    assert_raise(Arrow::Error::Invalid.new(message)) do
+      @options.field_ref = "[foo]"
+    end
+  end
+
+  def test_struct_field_function
+    fields = [
+      Arrow::Field.new("score", Arrow::Int8DataType.new),
+      Arrow::Field.new("enabled", Arrow::BooleanDataType.new),
+    ]
+    structs = [
+      {
+        "score" => -29,
+        "enabled" => true,
+      },
+      {
+        "score" => 2,
+        "enabled" => false,
+      },
+      nil,
+    ]
+    args = [
+      Arrow::ArrayDatum.new(build_struct_array(fields, structs)),
+    ]
+    @options.field_ref = "score"
+    struct_field_function = Arrow::Function.find("struct_field")
+    assert_equal(build_int8_array([-29, 2, nil]),
+                 struct_field_function.execute(args, @options).value)
+  end
+end

From 1c23c639ca0707ab5277fe41b8ff89c80360355f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Fri, 8 Mar 2024 12:06:10 +0100
Subject: [PATCH 486/570] MINOR: [Release] Update versions for 16.0.0-SNAPSHOT

---
 ci/scripts/PKGBUILD            | 2 +-
 r/DESCRIPTION                  | 2 +-
 r/NEWS.md                      | 4 +++-
 r/pkgdown/assets/versions.json | 4 ++--
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index 50d4fc28c58f3..1995f1b67fdd3 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -18,7 +18,7 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=15.0.0.9000
+pkgver=15.0.1.9000
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
 arch=("any")
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index 21cc4dec902d2..ab39714864d28 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: arrow
 Title: Integration to 'Apache' 'Arrow'
-Version: 15.0.0.9000
+Version: 15.0.1.9000
 Authors@R: c(
     person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")),
     person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
diff --git a/r/NEWS.md b/r/NEWS.md
index 06c49c7be006f..07c0562d67168 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,7 +17,9 @@
   under the License.
 -->
 
-# arrow 15.0.0.9000
+# arrow 15.0.1.9000
+
+# arrow 15.0.1
 
 # arrow 15.0.0
 
diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json
index 0b7f9884f9b6f..44b4c76e4ca56 100644
--- a/r/pkgdown/assets/versions.json
+++ b/r/pkgdown/assets/versions.json
@@ -1,10 +1,10 @@
 [
     {
-        "name": "15.0.0.9000 (dev)",
+        "name": "15.0.1.9000 (dev)",
         "version": "dev/"
     },
     {
-        "name": "15.0.0 (release)",
+        "name": "15.0.1 (release)",
         "version": ""
     },
     {

From f589e08a2e86e0562e68ed3aaa8e0c8c1e93c5d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Fri, 8 Mar 2024 12:06:11 +0100
Subject: [PATCH 487/570] MINOR: [Release] Update .deb/.rpm changelogs for
 15.0.1

---
 .../linux-packages/apache-arrow-apt-source/debian/changelog | 6 ++++++
 .../apache-arrow-release/yum/apache-arrow-release.spec.in   | 3 +++
 dev/tasks/linux-packages/apache-arrow/debian/changelog      | 6 ++++++
 dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in     | 3 +++
 4 files changed, 18 insertions(+)

diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
index bc83f0ed7c4b0..0ee56839e9d45 100644
--- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow-apt-source (15.0.1-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Raúl Cumplido <raulcumplido@gmail.com>  Fri, 23 Feb 2024 13:06:19 -0000
+
 apache-arrow-apt-source (15.0.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
index 9b6c963593fc3..2919c5b703a1b 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
@@ -102,6 +102,9 @@ else
 fi
 
 %changelog
+* Fri Feb 23 2024 Raúl Cumplido <raulcumplido@gmail.com> - 15.0.1-1
+- New upstream release.
+
 * Tue Jan 16 2024 Raúl Cumplido <raulcumplido@gmail.com> - 15.0.0-1
 - New upstream release.
 
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog
index edff045a48111..b025afe9e5027 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow (15.0.1-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Raúl Cumplido <raulcumplido@gmail.com>  Fri, 23 Feb 2024 13:06:19 -0000
+
 apache-arrow (15.0.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index 79b4eadd92265..bcdc3ed7d8a7f 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -864,6 +864,9 @@ Documentation for Apache Parquet GLib.
 %{_datadir}/gtk-doc/html/parquet-glib/
 
 %changelog
+* Fri Feb 23 2024 Raúl Cumplido <raulcumplido@gmail.com> - 15.0.1-1
+- New upstream release.
+
 * Tue Jan 16 2024 Raúl Cumplido <raulcumplido@gmail.com> - 15.0.0-1
 - New upstream release.
 

From 0148db4f97bfcf80237b265ee0d9a4b61194ffde Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Fri, 8 Mar 2024 15:11:30 +0100
Subject: [PATCH 488/570] GH-38821: [C++] Strengthen handling of duplicate
 slashes in S3, GCS (#40371)

### Rationale for this change

Giving a path such as "bucket//key" to some S3 and GCS filesystem APIs could silently succeed or crash.

Make sure those paths instead return an error, as other instances of duplicate slashes already do.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes. Some paths that were accepted by some filesystems can now return an error.

* GitHub Issue: #38821

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/filesystem/filesystem_test.cc | 12 ++++++++++++
 cpp/src/arrow/filesystem/gcsfs.cc           |  9 +++++++++
 cpp/src/arrow/filesystem/gcsfs_test.cc      | 14 ++++++++++++++
 cpp/src/arrow/filesystem/path_util.cc       | 12 ++++++++++++
 cpp/src/arrow/filesystem/path_util.h        |  4 ++++
 cpp/src/arrow/filesystem/s3fs.cc            | 17 ++++++++---------
 cpp/src/arrow/filesystem/s3fs_test.cc       |  8 ++++++++
 7 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc
index c76c3d27e8f8e..8477647b2cd73 100644
--- a/cpp/src/arrow/filesystem/filesystem_test.cc
+++ b/cpp/src/arrow/filesystem/filesystem_test.cc
@@ -152,6 +152,18 @@ TEST(PathUtil, GetAbstractPathParent) {
   AssertPairEqual(pair, {"abc", "def\\ghi"});
 }
 
+TEST(PathUtil, ValidateAbstractPath) {
+  ASSERT_OK(ValidateAbstractPath(""));
+  ASSERT_OK(ValidateAbstractPath("abc"));
+  ASSERT_OK(ValidateAbstractPath("abc/def"));
+  ASSERT_OK(ValidateAbstractPath("abc/def.ghi"));
+  ASSERT_OK(ValidateAbstractPath("abc/def\\ghi"));
+
+  // Extraneous separators
+  ASSERT_RAISES(Invalid, ValidateAbstractPath("//"));
+  ASSERT_RAISES(Invalid, ValidateAbstractPath("abc//def"));
+}
+
 TEST(PathUtil, ValidateAbstractPathParts) {
   ASSERT_OK(ValidateAbstractPathParts({}));
   ASSERT_OK(ValidateAbstractPathParts({"abc"}));
diff --git a/cpp/src/arrow/filesystem/gcsfs.cc b/cpp/src/arrow/filesystem/gcsfs.cc
index d41cb49022c0c..1f091e980bf97 100644
--- a/cpp/src/arrow/filesystem/gcsfs.cc
+++ b/cpp/src/arrow/filesystem/gcsfs.cc
@@ -73,9 +73,18 @@ struct GcsPath {
     path.full_path = s;
     path.bucket = s.substr(0, first_sep);
     path.object = s.substr(first_sep + 1);
+    RETURN_NOT_OK(Validate(path));
     return path;
   }
 
+  static Status Validate(const GcsPath& path) {
+    auto st = internal::ValidateAbstractPath(path.full_path);
+    if (!st.ok()) {
+      return Status::Invalid(st.message(), " in path ", path.full_path);
+    }
+    return Status::OK();
+  }
+
   GcsPath parent() const {
     auto object_parent = internal::GetAbstractPathParent(object).first;
     if (object_parent.empty()) return GcsPath{bucket, bucket, ""};
diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc
index 7a8afc0ee473a..a6022a8d21681 100644
--- a/cpp/src/arrow/filesystem/gcsfs_test.cc
+++ b/cpp/src/arrow/filesystem/gcsfs_test.cc
@@ -861,6 +861,14 @@ TEST_F(GcsIntegrationTest, CreateDirUri) {
   ASSERT_RAISES(Invalid, fs->CreateDir("gs://" + RandomBucketName(), true));
 }
 
+TEST_F(GcsIntegrationTest, CreateDirExtraneousSlashes) {
+  auto fs = GcsFileSystem::Make(TestGcsOptions());
+  ASSERT_RAISES(Invalid,
+                fs->CreateDir(RandomBucketName() + "//somedir", /*recursive=*/true));
+  ASSERT_RAISES(Invalid, fs->CreateDir(RandomBucketName() + "/somedir//newdir",
+                                       /*recursive=*/true));
+}
+
 TEST_F(GcsIntegrationTest, DeleteBucketDirSuccess) {
   auto fs = GcsFileSystem::Make(TestGcsOptions());
   ASSERT_OK(fs->CreateDir("pyarrow-filesystem/", /*recursive=*/true));
@@ -888,6 +896,12 @@ TEST_F(GcsIntegrationTest, DeleteDirUri) {
   ASSERT_RAISES(Invalid, fs->DeleteDir("gs://" + PreexistingBucketPath()));
 }
 
+TEST_F(GcsIntegrationTest, DeleteDirExtraneousSlashes) {
+  auto fs = GcsFileSystem::Make(TestGcsOptions());
+  ASSERT_RAISES(Invalid, fs->DeleteDir(PreexistingBucketPath() + "/somedir"));
+  ASSERT_RAISES(Invalid, fs->DeleteDir(PreexistingBucketPath() + "somedir//newdir"));
+}
+
 TEST_F(GcsIntegrationTest, DeleteDirContentsSuccess) {
   auto fs = GcsFileSystem::Make(TestGcsOptions());
   ASSERT_OK_AND_ASSIGN(auto hierarchy, CreateHierarchy(fs));
diff --git a/cpp/src/arrow/filesystem/path_util.cc b/cpp/src/arrow/filesystem/path_util.cc
index 9c895ae76c7b8..f38537e59f426 100644
--- a/cpp/src/arrow/filesystem/path_util.cc
+++ b/cpp/src/arrow/filesystem/path_util.cc
@@ -137,6 +137,18 @@ Status ValidateAbstractPathParts(const std::vector<std::string>& parts) {
   return Status::OK();
 }
 
+Status ValidateAbstractPath(std::string_view path) {
+  auto pos = path.find_first_of(kSep);
+  while (pos != path.npos) {
+    ++pos;
+    if (path.length() > pos && path[pos] == kSep) {
+      return Status::Invalid("Empty path component");
+    }
+    pos = path.find_first_of(kSep, pos);
+  }
+  return Status::OK();
+}
+
 std::string ConcatAbstractPath(std::string_view base, std::string_view stem) {
   DCHECK(!stem.empty());
   if (base.empty()) {
diff --git a/cpp/src/arrow/filesystem/path_util.h b/cpp/src/arrow/filesystem/path_util.h
index 1da7afd3f9381..d49d9d2efa7f6 100644
--- a/cpp/src/arrow/filesystem/path_util.h
+++ b/cpp/src/arrow/filesystem/path_util.h
@@ -63,6 +63,10 @@ ARROW_EXPORT int GetAbstractPathDepth(std::string_view path);
 ARROW_EXPORT
 std::pair<std::string, std::string> GetAbstractPathParent(const std::string& s);
 
+// Validate an abstract path.
+ARROW_EXPORT
+Status ValidateAbstractPath(std::string_view path);
+
 // Validate the components of an abstract path.
 ARROW_EXPORT
 Status ValidateAbstractPathParts(const std::vector<std::string>& parts);
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index b14f96e4dd75f..77fd951ba29c4 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -154,10 +154,10 @@ using internal::S3Backend;
 using internal::ToAwsString;
 using internal::ToURLEncodedAwsString;
 
-constexpr const char kSep = '/';
-constexpr const char kAwsEndpointUrlEnvVar[] = "AWS_ENDPOINT_URL";
-constexpr const char kAwsEndpointUrlS3EnvVar[] = "AWS_ENDPOINT_URL_S3";
-constexpr const char kAwsDirectoryContentType[] = "application/x-directory";
+static constexpr const char kSep = '/';
+static constexpr const char kAwsEndpointUrlEnvVar[] = "AWS_ENDPOINT_URL";
+static constexpr const char kAwsEndpointUrlS3EnvVar[] = "AWS_ENDPOINT_URL_S3";
+static constexpr const char kAwsDirectoryContentType[] = "application/x-directory";
 
 // -----------------------------------------------------------------------
 // S3ProxyOptions implementation
@@ -482,12 +482,11 @@ struct S3Path {
   }
 
   static Status Validate(const S3Path& path) {
-    auto result = internal::ValidateAbstractPathParts(path.key_parts);
-    if (!result.ok()) {
-      return Status::Invalid(result.message(), " in path ", path.full_path);
-    } else {
-      return result;
+    auto st = internal::ValidateAbstractPath(path.full_path);
+    if (!st.ok()) {
+      return Status::Invalid(st.message(), " in path ", path.full_path);
     }
+    return Status::OK();
   }
 
   Aws::String ToAwsString() const {
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index ad7aaa1bd43cf..2ed97c5e9729e 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -935,6 +935,10 @@ TEST_F(TestS3FS, CreateDir) {
 
   // URI
   ASSERT_RAISES(Invalid, fs_->CreateDir("s3:bucket/newdir2"));
+
+  // Extraneous slashes
+  ASSERT_RAISES(Invalid, fs_->CreateDir("bucket//somedir"));
+  ASSERT_RAISES(Invalid, fs_->CreateDir("bucket/somedir//newdir"));
 }
 
 TEST_F(TestS3FS, DeleteFile) {
@@ -994,6 +998,10 @@ TEST_F(TestS3FS, DeleteDir) {
 
   // URI
   ASSERT_RAISES(Invalid, fs_->DeleteDir("s3:empty-bucket"));
+
+  // Extraneous slashes
+  ASSERT_RAISES(Invalid, fs_->DeleteDir("bucket//newdir"));
+  ASSERT_RAISES(Invalid, fs_->DeleteDir("bucket/newdir//newsub"));
 }
 
 TEST_F(TestS3FS, DeleteDirContents) {

From 7e286dd004a8fcf2de0f58615793338076741208 Mon Sep 17 00:00:00 2001
From: Josh Soref <2119212+jsoref@users.noreply.github.com>
Date: Fri, 8 Mar 2024 10:42:06 -0500
Subject: [PATCH 489/570] GH-38923: [GLib] Fix spelling (#38924)

### Rationale for this change

### What changes are included in this PR?

Spelling fixes to c_glib/

### Are these changes tested?

### Are there any user-facing changes?

* Closes: #38923

Lead-authored-by: Josh Soref <2119212+jsoref@users.noreply.github.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-dataset-glib/dataset-factory.h  |  2 +-
 c_glib/arrow-flight-sql-glib/server.h        |  2 +-
 c_glib/arrow-glib/array-builder.cpp          |  4 ++--
 c_glib/arrow-glib/buffer.cpp                 |  2 +-
 c_glib/arrow-glib/composite-array.cpp        |  2 +-
 c_glib/arrow-glib/compute.cpp                | 14 +++++++-------
 c_glib/arrow-glib/expression.cpp             |  2 +-
 c_glib/arrow-glib/input-stream.cpp           |  2 +-
 c_glib/arrow-glib/local-file-system.cpp      |  2 +-
 c_glib/arrow-glib/metadata-version.cpp       |  2 +-
 c_glib/arrow-glib/output-stream.cpp          |  8 ++++----
 c_glib/parquet-glib/arrow-file-reader.cpp    | 12 ++++++------
 c_glib/parquet-glib/arrow-file-writer.cpp    |  8 ++++----
 c_glib/parquet-glib/statistics.cpp           |  2 +-
 c_glib/test/flight/test-client-options.rb    |  2 +-
 c_glib/test/gandiva/test-native-function.rb  |  2 +-
 c_glib/test/helper/readable.rb               |  2 +-
 c_glib/test/test-array.rb                    |  4 ++--
 c_glib/test/test-decimal128-data-type.rb     |  2 +-
 c_glib/test/test-decimal256-data-type.rb     |  2 +-
 c_glib/test/test-dictionary-array-builder.rb |  2 +-
 c_glib/test/test-extension-data-type.rb      |  2 +-
 c_glib/test/test-fixed-size-binary-array.rb  |  2 +-
 23 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/c_glib/arrow-dataset-glib/dataset-factory.h b/c_glib/arrow-dataset-glib/dataset-factory.h
index 292a9ca70dd89..ce15babba4ac1 100644
--- a/c_glib/arrow-dataset-glib/dataset-factory.h
+++ b/c_glib/arrow-dataset-glib/dataset-factory.h
@@ -100,7 +100,7 @@ GARROW_AVAILABLE_IN_5_0
 gboolean
 gadataset_file_system_dataset_factory_add_selector(
   GADatasetFileSystemDatasetFactory *factory,
-  GArrorFileSelector *selector,
+  GArrowFileSelector *selector,
   GError **error);
 */
 
diff --git a/c_glib/arrow-flight-sql-glib/server.h b/c_glib/arrow-flight-sql-glib/server.h
index 106b6e40db38f..90eb5ee7e4170 100644
--- a/c_glib/arrow-flight-sql-glib/server.h
+++ b/c_glib/arrow-flight-sql-glib/server.h
@@ -202,7 +202,7 @@ G_DECLARE_DERIVABLE_TYPE(GAFlightSQLServer,
 /**
  * GAFlightSQLServerClass:
  * @get_flight_info_statement: A virtual function to implement
- *   `GetFlightInfoStatment` API that gets a #GAFlightInfo for executing a
+ *   `GetFlightInfoStatement` API that gets a #GAFlightInfo for executing a
  *   SQL query.
  * @do_get_statement: A virtual function to implement `DoGetStatement` API
  *   that gets a #GAFlightDataStream containing the query results.
diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp
index ee6ec69e98366..5171161970bf5 100644
--- a/c_glib/arrow-glib/array-builder.cpp
+++ b/c_glib/arrow-glib/array-builder.cpp
@@ -5179,7 +5179,7 @@ garrow_binary_dictionary_array_builder_append_indices(GArrowBinaryDictionaryArra
  * garrow_binary_dictionary_array_builder_get_dictionary_length:
  * @builder: A #GArrowBinaryDictionaryArrayBuilder.
  *
- * Returns: A number of entries in the dicitonary.
+ * Returns: A number of entries in the dictionary.
  *
  * Since: 2.0.0
  */
@@ -5413,7 +5413,7 @@ garrow_string_dictionary_array_builder_append_indices(GArrowStringDictionaryArra
  * garrow_string_dictionary_array_builder_get_dictionary_length:
  * @builder: A #GArrowStringDictionaryArrayBuilder.
  *
- * Returns: A number of entries in the dicitonary.
+ * Returns: A number of entries in the dictionary.
  *
  * Since: 2.0.0
  */
diff --git a/c_glib/arrow-glib/buffer.cpp b/c_glib/arrow-glib/buffer.cpp
index 58f47518c82e6..86d88cebd5ee4 100644
--- a/c_glib/arrow-glib/buffer.cpp
+++ b/c_glib/arrow-glib/buffer.cpp
@@ -307,7 +307,7 @@ garrow_buffer_get_data(GArrowBuffer *buffer)
  * @buffer: A #GArrowBuffer.
  *
  * Returns: (transfer full) (nullable): The data of the buffer. If the
- *   buffer is imutable, it returns %NULL. The data is owned by the
+ *   buffer is immutable, it returns %NULL. The data is owned by the
  *   buffer. You should not free the data.
  *
  * Since: 0.3.0
diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp
index 36d460c9e0276..6ca22e82389e4 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -2077,7 +2077,7 @@ garrow_run_end_encoded_array_find_physical_offset(
  *   range of values from offset to length.
  *
  *   Avoid calling this function if the physical length can be
- *   estabilished in some other way (e.g. when iterating over the runs
+ *   established in some other way (e.g. when iterating over the runs
  *   sequentially until the end). This function uses binary-search, so
  *   it has a O(log N) cost.
  *
diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 14be097221dea..1415d953a584d 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -231,7 +231,7 @@ G_BEGIN_DECLS
  * #GArrowUTF8NormalizeOptions is a class to customize the
  * `utf8_normalize` function.
  *
- * #GArrowQuantileOptions is a class to customize the `qunatile`
+ * #GArrowQuantileOptions is a class to customize the `quantile`
  * function.
  *
  * #GArrowIndexOptions is a class to customize the `index` function.
@@ -5434,7 +5434,7 @@ garrow_record_batch_take(GArrowRecordBatch *record_batch,
  * @options: (nullable): A #GArrowFilterOptions.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable) (transfer full): The #GArrowArray filterd
+ * Returns: (nullable) (transfer full): The #GArrowArray filtered
  *   with a boolean selection filter. Nulls in the filter will
  *   result in nulls in the output.
  *
@@ -5670,7 +5670,7 @@ garrow_table_sort_indices(GArrowTable *table,
  * @options: (nullable): A #GArrowFilterOptions.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable) (transfer full): The #GArrowTable filterd
+ * Returns: (nullable) (transfer full): The #GArrowTable filtered
  *   with a boolean selection filter. Nulls in the filter will
  *   result in nulls in the output.
  *
@@ -5709,7 +5709,7 @@ garrow_table_filter(GArrowTable *table,
  * @options: (nullable): A #GArrowFilterOptions.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable) (transfer full): The #GArrowTable filterd
+ * Returns: (nullable) (transfer full): The #GArrowTable filtered
  *   with a chunked array filter. Nulls in the filter will
  *   result in nulls in the output.
  *
@@ -5750,7 +5750,7 @@ garrow_table_filter_chunked_array(GArrowTable *table,
  * @options: (nullable): A #GArrowFilterOptions.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable) (transfer full): The #GArrowChunkedArray filterd
+ * Returns: (nullable) (transfer full): The #GArrowChunkedArray filtered
  *   with a boolean selection filter. Nulls in the filter will
  *   result in nulls in the output.
  *
@@ -5789,7 +5789,7 @@ garrow_chunked_array_filter(GArrowChunkedArray *chunked_array,
  * @options: (nullable): A #GArrowFilterOptions.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable) (transfer full): The #GArrowChunkedArray filterd
+ * Returns: (nullable) (transfer full): The #GArrowChunkedArray filtered
  *   with a chunked array filter. Nulls in the filter will
  *   result in nulls in the output.
  *
@@ -5830,7 +5830,7 @@ garrow_chunked_array_filter_chunked_array(GArrowChunkedArray *chunked_array,
  * @options: (nullable): A #GArrowFilterOptions.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: (nullable) (transfer full): The #GArrowRecordBatch filterd
+ * Returns: (nullable) (transfer full): The #GArrowRecordBatch filtered
  *   with a boolean selection filter. Nulls in the filter will
  *   result in nulls in the output.
  *
diff --git a/c_glib/arrow-glib/expression.cpp b/c_glib/arrow-glib/expression.cpp
index 419f668823d0a..48b5fd055e4d0 100644
--- a/c_glib/arrow-glib/expression.cpp
+++ b/c_glib/arrow-glib/expression.cpp
@@ -166,7 +166,7 @@ garrow_field_expression_class_init(GArrowFieldExpressionClass *klass)
  * @reference: A field name or dot path.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: A newly created #GArrowFieldExpression on sucess, %NULL on
+ * Returns: A newly created #GArrowFieldExpression on success, %NULL on
  *   error.
  *
  * Since: 6.0.0
diff --git a/c_glib/arrow-glib/input-stream.cpp b/c_glib/arrow-glib/input-stream.cpp
index 844c83d629b8f..b65e89845480c 100644
--- a/c_glib/arrow-glib/input-stream.cpp
+++ b/c_glib/arrow-glib/input-stream.cpp
@@ -697,7 +697,7 @@ garrow_file_input_stream_new_file_descriptor(gint file_descriptor,
 
 /**
  * garrow_file_input_stream_get_file_descriptor:
- * @stream: A #GArrowFileInuptStream.
+ * @stream: A #GArrowFileInputStream.
  *
  * Returns: The file descriptor of @stream.
  *
diff --git a/c_glib/arrow-glib/local-file-system.cpp b/c_glib/arrow-glib/local-file-system.cpp
index c4b29658e31c9..ae503bf73136b 100644
--- a/c_glib/arrow-glib/local-file-system.cpp
+++ b/c_glib/arrow-glib/local-file-system.cpp
@@ -28,7 +28,7 @@ G_BEGIN_DECLS
  * @title: Local file system classes
  * @include: arrow-glib/arrow-glib.h
  *
- * #GArrowLocalFileSystemOptions is a class for specifyiing options of
+ * #GArrowLocalFileSystemOptions is a class for specifying options of
  * an instance of #GArrowLocalFileSystem.
  *
  * #GArrowLocalFileSystem is a class for an implementation of a file system
diff --git a/c_glib/arrow-glib/metadata-version.cpp b/c_glib/arrow-glib/metadata-version.cpp
index 69cbaec37e94a..ffefeb2f08dcd 100644
--- a/c_glib/arrow-glib/metadata-version.cpp
+++ b/c_glib/arrow-glib/metadata-version.cpp
@@ -22,7 +22,7 @@
 /**
  * SECTION: metadata-version
  * @title: GArrowMetadataVersion
- * @short_description: Metadata version mapgging between Arrow and arrow-glib
+ * @short_description: Metadata version mapping between Arrow and arrow-glib
  *
  * #GArrowMetadataVersion provides metadata versions corresponding
  * to `arrow::ipc::MetadataVersion` values.
diff --git a/c_glib/arrow-glib/output-stream.cpp b/c_glib/arrow-glib/output-stream.cpp
index b5b51584c496f..a9317e9f28007 100644
--- a/c_glib/arrow-glib/output-stream.cpp
+++ b/c_glib/arrow-glib/output-stream.cpp
@@ -395,13 +395,13 @@ namespace garrow {
                         int64_t n_bytes) override {
       GError *error = NULL;
       gsize n_written_bytes;
-      auto successed = g_output_stream_write_all(output_stream_,
+      auto succeeded = g_output_stream_write_all(output_stream_,
                                                  data,
                                                  n_bytes,
                                                  &n_written_bytes,
                                                  NULL,
                                                  &error);
-      if (successed) {
+      if (succeeded) {
         position_ += n_written_bytes;
         return arrow::Status::OK();
       } else {
@@ -415,8 +415,8 @@ namespace garrow {
 
     arrow::Status Flush() override {
       GError *error = NULL;
-      auto successed = g_output_stream_flush(output_stream_, NULL, &error);
-      if (successed) {
+      auto succeeded = g_output_stream_flush(output_stream_, NULL, &error);
+      if (succeeded) {
         return arrow::Status::OK();
       } else {
         return garrow_error_to_status(error,
diff --git a/c_glib/parquet-glib/arrow-file-reader.cpp b/c_glib/parquet-glib/arrow-file-reader.cpp
index fd21a9e9c3ab9..f7a4b09f814ac 100644
--- a/c_glib/parquet-glib/arrow-file-reader.cpp
+++ b/c_glib/parquet-glib/arrow-file-reader.cpp
@@ -123,7 +123,7 @@ gparquet_arrow_file_reader_class_init(GParquetArrowFileReaderClass *klass)
 /**
  * gparquet_arrow_file_reader_new_arrow:
  * @source: Arrow source to be read.
- * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable): A newly created #GParquetArrowFileReader.
  *
@@ -152,7 +152,7 @@ gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source,
 /**
  * gparquet_arrow_file_reader_new_path:
  * @path: Path to be read.
- * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable): A newly created #GParquetArrowFileReader.
  *
@@ -189,7 +189,7 @@ gparquet_arrow_file_reader_new_path(const gchar *path,
 /**
  * gparquet_arrow_file_reader_read_table:
  * @reader: A #GParquetArrowFileReader.
- * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (transfer full) (nullable): A read #GArrowTable.
  *
@@ -220,7 +220,7 @@ gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader,
  *   If an index is negative, the index is counted backward from the
  *   end of the columns. `-1` means the last column.
  * @n_column_indices: The number of elements of @column_indices.
- * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (transfer full) (nullable): A read #GArrowTable.
  *
@@ -273,7 +273,7 @@ gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader,
 /**
  * gparquet_arrow_file_reader_get_schema:
  * @reader: A #GParquetArrowFileReader.
- * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (transfer full) (nullable): A got #GArrowSchema.
  *
@@ -302,7 +302,7 @@ gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader,
  * @i: The index of the column to be read.
  *   If an index is negative, the index is counted backward from the
  *   end of the columns. `-1` means the last column.
- * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (transfer full) (nullable): A read #GArrowChunkedArray.
  *
diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp b/c_glib/parquet-glib/arrow-file-writer.cpp
index 537e833053214..f923edbf5e5c8 100644
--- a/c_glib/parquet-glib/arrow-file-writer.cpp
+++ b/c_glib/parquet-glib/arrow-file-writer.cpp
@@ -406,7 +406,7 @@ gparquet_arrow_file_writer_class_init(GParquetArrowFileWriterClass *klass)
  * @schema: Arrow schema for written data.
  * @sink: Arrow output stream to be written.
  * @writer_properties: (nullable): A #GParquetWriterProperties.
- * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable): A newly created #GParquetArrowFileWriter.
  *
@@ -451,7 +451,7 @@ gparquet_arrow_file_writer_new_arrow(GArrowSchema *schema,
  * @schema: Arrow schema for written data.
  * @path: Path to be read.
  * @writer_properties: (nullable): A #GParquetWriterProperties.
- * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: (nullable): A newly created #GParquetArrowFileWriter.
  *
@@ -505,7 +505,7 @@ gparquet_arrow_file_writer_new_path(GArrowSchema *schema,
  * @writer: A #GParquetArrowFileWriter.
  * @table: A table to be written.
  * @chunk_size: The max number of rows in a row group.
- * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
@@ -528,7 +528,7 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
 /**
  * gparquet_arrow_file_writer_close:
  * @writer: A #GParquetArrowFileWriter.
- * @error: (nullable): Return locatipcn for a #GError or %NULL.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
  * Returns: %TRUE on success, %FALSE if there was an error.
  *
diff --git a/c_glib/parquet-glib/statistics.cpp b/c_glib/parquet-glib/statistics.cpp
index 5dae756bf62e2..596768d98007c 100644
--- a/c_glib/parquet-glib/statistics.cpp
+++ b/c_glib/parquet-glib/statistics.cpp
@@ -169,7 +169,7 @@ gparquet_statistics_get_n_nulls(GParquetStatistics *statistics)
 }
 
 /**
- * gparquet_statistics_has_n_distinct_valuess:
+ * gparquet_statistics_has_n_distinct_values:
  * @statistics: A #GParquetStatistics.
  *
  * Returns: %TRUE if the number of distinct values is set, %FALSE otherwise.
diff --git a/c_glib/test/flight/test-client-options.rb b/c_glib/test/flight/test-client-options.rb
index b1a67c60699c2..e1f3a2f850a44 100644
--- a/c_glib/test/flight/test-client-options.rb
+++ b/c_glib/test/flight/test-client-options.rb
@@ -51,7 +51,7 @@ def test_write_size_limit_bytes
     assert_equal(100, @options.write_size_limit_bytes)
   end
 
-  def test_disable_server_verifiation
+  def test_disable_server_verification
     assert do
       not @options.disable_server_verification?
     end
diff --git a/c_glib/test/gandiva/test-native-function.rb b/c_glib/test/gandiva/test-native-function.rb
index 630a1f7c32d2a..4d4d6fc7d3f66 100644
--- a/c_glib/test/gandiva/test-native-function.rb
+++ b/c_glib/test/gandiva/test-native-function.rb
@@ -59,7 +59,7 @@ def test_to_string
                  modulo.to_s)
   end
 
-  sub_test_case("get_result_nullbale_type") do
+  sub_test_case("get_result_nullable_type") do
     def test_if_null
       assert_equal(Gandiva::ResultNullableType::IF_NULL,
                    @not.result_nullable_type)
diff --git a/c_glib/test/helper/readable.rb b/c_glib/test/helper/readable.rb
index 81bf0795c6b50..cea5faf7681de 100644
--- a/c_glib/test/helper/readable.rb
+++ b/c_glib/test/helper/readable.rb
@@ -19,7 +19,7 @@ module Helper
   module Readable
     def read_table(input, type: :file)
       if input.is_a?(Arrow::Buffer)
-        input_stream = Arrow::BufferIntputStream.new(input)
+        input_stream = Arrow::BufferInputStream.new(input)
       else
         input_stream = Arrow::FileInputStream.new(input)
       end
diff --git a/c_glib/test/test-array.rb b/c_glib/test/test-array.rb
index c03aecf1732b3..4da641b203f9c 100644
--- a/c_glib/test/test-array.rb
+++ b/c_glib/test/test-array.rb
@@ -141,12 +141,12 @@ def test_no_diff
 
     def test_diff
       array = build_string_array(["Start", "Shutdown", "Reboot"])
-      other_array = build_string_array(["Start", "Shutdonw", "Reboot"])
+      other_array = build_string_array(["Start", "Running", "Reboot"])
       assert_equal(<<-STRING.chomp, array.diff_unified(other_array))
 
 @@ -1, +1 @@
 -"Shutdown"
-+"Shutdonw"
++"Running"
 
       STRING
     end
diff --git a/c_glib/test/test-decimal128-data-type.rb b/c_glib/test/test-decimal128-data-type.rb
index 92f2f47f0bd71..f0e62c9d131b4 100644
--- a/c_glib/test/test-decimal128-data-type.rb
+++ b/c_glib/test/test-decimal128-data-type.rb
@@ -41,7 +41,7 @@ def test_scale
     assert_equal(2, data_type.scale)
   end
 
-  def test_deciaml_data_type_new
+  def test_decimal_data_type_new
     assert_equal(Arrow::Decimal128DataType.new(8, 2),
                  Arrow::DecimalDataType.new(8, 2))
   end
diff --git a/c_glib/test/test-decimal256-data-type.rb b/c_glib/test/test-decimal256-data-type.rb
index b26f7396043cf..6d803f7ce9020 100644
--- a/c_glib/test/test-decimal256-data-type.rb
+++ b/c_glib/test/test-decimal256-data-type.rb
@@ -41,7 +41,7 @@ def test_scale
     assert_equal(2, data_type.scale)
   end
 
-  def test_deciaml_data_type_new
+  def test_decimal_data_type_new
     assert_equal(Arrow::Decimal256DataType.new(39, 1),
                  Arrow::DecimalDataType.new(39, 1))
   end
diff --git a/c_glib/test/test-dictionary-array-builder.rb b/c_glib/test/test-dictionary-array-builder.rb
index 4531e44f4a4ee..2d14563c840c8 100644
--- a/c_glib/test/test-dictionary-array-builder.rb
+++ b/c_glib/test/test-dictionary-array-builder.rb
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-class TestDictinaryArrayBuilder < Test::Unit::TestCase
+class TestDictionaryArrayBuilder < Test::Unit::TestCase
   include Helper::Buildable
 
   def setup
diff --git a/c_glib/test/test-extension-data-type.rb b/c_glib/test/test-extension-data-type.rb
index 59c6395e98df2..6c114b81e2c33 100644
--- a/c_glib/test/test-extension-data-type.rb
+++ b/c_glib/test/test-extension-data-type.rb
@@ -91,7 +91,7 @@ def test_wrap_chunked_array
                                              ["a" * 16, nil])
     storage2 = build_fixed_size_binary_array(data_type.storage_data_type,
                                              ["c" * 16])
-    chunkd_array = Arrow::ChunkedArray.new([storage1, storage2])
+    chunked_array = Arrow::ChunkedArray.new([storage1, storage2])
     extension_chunked_array = data_type.wrap_chunked_array(chunked_array)
     assert_equal([
                    data_type,
diff --git a/c_glib/test/test-fixed-size-binary-array.rb b/c_glib/test/test-fixed-size-binary-array.rb
index 29189e78a7d9c..8de32c57a622b 100644
--- a/c_glib/test/test-fixed-size-binary-array.rb
+++ b/c_glib/test/test-fixed-size-binary-array.rb
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-class TestFixedSizeBinaryrray < Test::Unit::TestCase
+class TestFixedSizeBinaryArray < Test::Unit::TestCase
   include Helper::Buildable
 
   def setup

From 8325ad1c167faa74e70c9d43647fabdb2ac41b6a Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Sun, 10 Mar 2024 02:27:39 +0900
Subject: [PATCH 490/570] GH-40422: [C++][FlightRPC] Add missing
 expiration_time arguments (#40425)

### Rationale for this change

This happens when `-DARROW_BUILD_BENCHMARKS=ON` and sometimes cannot be detected by CI.

### What changes are included in this PR?

Add missing `expiration_time` and `app_metadata` arguments as default.

### Are these changes tested?

yes (build and run unit test)

### Are there any user-facing changes?

No

* GitHub Issue: #40422

Authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
---
 cpp/src/arrow/flight/integration_tests/test_integration.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/flight/integration_tests/test_integration.cc b/cpp/src/arrow/flight/integration_tests/test_integration.cc
index 464e6832c39c7..21f57efd122c3 100644
--- a/cpp/src/arrow/flight/integration_tests/test_integration.cc
+++ b/cpp/src/arrow/flight/integration_tests/test_integration.cc
@@ -2087,7 +2087,7 @@ class ReuseConnectionServer : public FlightServerBase {
                        const FlightDescriptor& descriptor,
                        std::unique_ptr<FlightInfo>* info) override {
     auto location = Location::ReuseConnection();
-    auto endpoint = FlightEndpoint{{"reuse"}, {location}};
+    auto endpoint = FlightEndpoint{{"reuse"}, {location}, std::nullopt, ""};
     ARROW_ASSIGN_OR_RAISE(auto info_data, FlightInfo::Make(arrow::Schema({}), descriptor,
                                                            {endpoint}, -1, -1));
     *info = std::make_unique<FlightInfo>(std::move(info_data));

From 6a7a6ee308b69c12f46f874cb3d52892e172d7b7 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sun, 10 Mar 2024 05:41:36 +0900
Subject: [PATCH 491/570] GH-40432: [C++] Add missing Threads::Threads
 dependency to arrow_static (#40433)

### Rationale for this change

`libarrow.a` uses `std::mutex` and so on. So we need to link to `Threads::Threads`. But #39824 dropped it accidentally.

### What changes are included in this PR?

Add unexpectedly dropped `Threads::Threads` dependency to `arrow_static` again.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #40432

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 0bf55e38f92b5..b53d76e74765f 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -137,6 +137,7 @@ endif()
 if(ARROW_ENABLE_THREADING)
   list(APPEND ARROW_SHARED_PRIVATE_LINK_LIBS Threads::Threads)
   list(APPEND ARROW_STATIC_LINK_LIBS Threads::Threads)
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS Threads::Threads)
 endif()
 
 if(NOT MSVC_TOOLCHAIN)

From 4de08748afa7dc2b7f36017ed608bfee3cab70e1 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sun, 10 Mar 2024 16:54:24 +0900
Subject: [PATCH 492/570] GH-39935: [GLib][Docs] Use GI-DocGen instead of
 GTK-Doc (#40427)

### Rationale for this change

DI-DocGen is a new documentation generator for GObject-based libraries. GTK-Doc is still maintained but abandoned.

### What changes are included in this PR?

Use GI-DocGen.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #39935

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/Brewfile                               |   2 -
 c_glib/arrow-cuda-glib/meson.build            |   4 +-
 c_glib/arrow-dataset-glib/meson.build         |   8 +-
 c_glib/arrow-flight-glib/meson.build          |  10 +-
 c_glib/arrow-flight-sql-glib/meson.build      |  16 +-
 c_glib/arrow-glib/meson.build                 |   6 +-
 c_glib/doc/arrow-cuda-glib.toml.in            |  55 +++
 c_glib/doc/arrow-dataset-glib.toml.in         |  55 +++
 .../arrow-dataset-glib-docs.xml               |  92 -----
 c_glib/doc/arrow-dataset-glib/entities.xml.in |  24 --
 c_glib/doc/arrow-dataset-glib/meson.build     |  83 -----
 c_glib/doc/arrow-flight-glib.toml.in          |  55 +++
 .../arrow-flight-glib-docs.xml                |  75 -----
 c_glib/doc/arrow-flight-glib/entities.xml.in  |  24 --
 c_glib/doc/arrow-flight-glib/meson.build      |  83 -----
 c_glib/doc/arrow-flight-sql-glib.toml.in      |  55 +++
 .../arrow-flight-sql-glib-docs.xml            |  70 ----
 .../doc/arrow-flight-sql-glib/entities.xml.in |  24 --
 c_glib/doc/arrow-flight-sql-glib/meson.build  |  89 -----
 c_glib/doc/arrow-glib.toml.in                 |  87 +++++
 c_glib/doc/arrow-glib/arrow-glib-docs.xml     | 313 ------------------
 c_glib/doc/arrow-glib/entities.xml.in         |  24 --
 c_glib/doc/arrow-glib/meson.build             |  93 ------
 c_glib/doc/gandiva-glib.toml.in               |  55 +++
 c_glib/doc/gandiva-glib/entities.xml.in       |  24 --
 c_glib/doc/gandiva-glib/gandiva-glib-docs.xml | 132 --------
 c_glib/doc/gandiva-glib/meson.build           |  83 -----
 c_glib/doc/meson.build                        |  93 ++++++
 c_glib/doc/parquet-glib.toml.in               |  55 +++
 c_glib/doc/parquet-glib/entities.xml.in       |  24 --
 c_glib/doc/parquet-glib/meson.build           |  83 -----
 c_glib/doc/parquet-glib/parquet-glib-docs.xml |  98 ------
 c_glib/doc/urlmap.js                          |  29 ++
 c_glib/example/vala/meson.build               |   4 +-
 c_glib/gandiva-glib/meson.build               |   4 +-
 c_glib/meson.build                            |  69 ++--
 c_glib/meson_options.txt                      |  14 +-
 c_glib/parquet-glib/meson.build               |   4 +-
 ci/docker/linux-apt-c-glib.dockerfile         |   2 +-
 ci/docker/linux-apt-docs.dockerfile           |   2 +-
 ci/scripts/c_glib_build.sh                    |   6 +-
 ci/scripts/msys2_setup.sh                     |   1 -
 .../apt/debian-bookworm/Dockerfile            |   2 +-
 .../apt/debian-bullseye/Dockerfile            |  10 +-
 .../apache-arrow/apt/debian-trixie/Dockerfile |   2 +-
 .../apache-arrow/apt/ubuntu-focal/Dockerfile  |   3 +-
 .../apache-arrow/apt/ubuntu-jammy/Dockerfile  |   2 +-
 .../apache-arrow/apt/ubuntu-noble/Dockerfile  |   2 +-
 .../apache-arrow/debian/control.in            |   3 +-
 .../debian/libarrow-dataset-glib-doc.doc-base |   4 +-
 .../debian/libarrow-dataset-glib-doc.install  |   2 +-
 .../debian/libarrow-dataset-glib-doc.links    |   4 +-
 .../debian/libarrow-flight-glib-doc.doc-base  |   4 +-
 .../debian/libarrow-flight-glib-doc.install   |   2 +-
 .../debian/libarrow-flight-glib-doc.links     |   4 +-
 .../libarrow-flight-sql-glib-doc.doc-base     |   4 +-
 .../libarrow-flight-sql-glib-doc.install      |   2 +-
 .../debian/libarrow-flight-sql-glib-doc.links |   7 +-
 .../debian/libarrow-glib-doc.doc-base         |   4 +-
 .../debian/libarrow-glib-doc.install          |   5 +-
 .../debian/libarrow-glib-doc.links            |   3 +-
 .../debian/libgandiva-glib-doc.doc-base       |   4 +-
 .../debian/libgandiva-glib-doc.install        |   2 +-
 .../debian/libgandiva-glib-doc.links          |   3 +-
 .../debian/libparquet-glib-doc.doc-base       |   4 +-
 .../debian/libparquet-glib-doc.install        |   2 +-
 .../debian/libparquet-glib-doc.links          |   4 +-
 .../linux-packages/apache-arrow/debian/rules  |   2 +-
 .../apache-arrow/yum/almalinux-8/Dockerfile   |   2 +-
 .../apache-arrow/yum/almalinux-9/Dockerfile   |   2 +-
 .../yum/amazon-linux-2023/Dockerfile          |   3 +-
 .../apache-arrow/yum/arrow.spec.in            |  45 ++-
 .../apache-arrow/yum/centos-7/Dockerfile      |   2 +-
 .../yum/centos-8-stream/Dockerfile            |   2 +-
 .../yum/centos-9-stream/Dockerfile            |   2 +-
 75 files changed, 712 insertions(+), 1565 deletions(-)
 create mode 100644 c_glib/doc/arrow-cuda-glib.toml.in
 create mode 100644 c_glib/doc/arrow-dataset-glib.toml.in
 delete mode 100644 c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
 delete mode 100644 c_glib/doc/arrow-dataset-glib/entities.xml.in
 delete mode 100644 c_glib/doc/arrow-dataset-glib/meson.build
 create mode 100644 c_glib/doc/arrow-flight-glib.toml.in
 delete mode 100644 c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml
 delete mode 100644 c_glib/doc/arrow-flight-glib/entities.xml.in
 delete mode 100644 c_glib/doc/arrow-flight-glib/meson.build
 create mode 100644 c_glib/doc/arrow-flight-sql-glib.toml.in
 delete mode 100644 c_glib/doc/arrow-flight-sql-glib/arrow-flight-sql-glib-docs.xml
 delete mode 100644 c_glib/doc/arrow-flight-sql-glib/entities.xml.in
 delete mode 100644 c_glib/doc/arrow-flight-sql-glib/meson.build
 create mode 100644 c_glib/doc/arrow-glib.toml.in
 delete mode 100644 c_glib/doc/arrow-glib/arrow-glib-docs.xml
 delete mode 100644 c_glib/doc/arrow-glib/entities.xml.in
 delete mode 100644 c_glib/doc/arrow-glib/meson.build
 create mode 100644 c_glib/doc/gandiva-glib.toml.in
 delete mode 100644 c_glib/doc/gandiva-glib/entities.xml.in
 delete mode 100644 c_glib/doc/gandiva-glib/gandiva-glib-docs.xml
 delete mode 100644 c_glib/doc/gandiva-glib/meson.build
 create mode 100644 c_glib/doc/meson.build
 create mode 100644 c_glib/doc/parquet-glib.toml.in
 delete mode 100644 c_glib/doc/parquet-glib/entities.xml.in
 delete mode 100644 c_glib/doc/parquet-glib/meson.build
 delete mode 100644 c_glib/doc/parquet-glib/parquet-glib-docs.xml
 create mode 100644 c_glib/doc/urlmap.js

diff --git a/c_glib/Brewfile b/c_glib/Brewfile
index 5ab502036961a..d281e047350bd 100644
--- a/c_glib/Brewfile
+++ b/c_glib/Brewfile
@@ -16,7 +16,5 @@
 # under the License.
 
 brew "gobject-introspection"
-brew "gtk-doc"
-brew "libtool"
 brew "meson"
 brew "vala"
diff --git a/c_glib/arrow-cuda-glib/meson.build b/c_glib/arrow-cuda-glib/meson.build
index 1718e2fc990d5..88029e6dc2073 100644
--- a/c_glib/arrow-cuda-glib/meson.build
+++ b/c_glib/arrow-cuda-glib/meson.build
@@ -72,15 +72,13 @@ if have_gi
                        dependencies: gir_dependencies,
                        export_packages: 'arrow-cuda-glib',
                        extra_args: gir_extra_args,
-                       fatal_warnings: gi_fatal_warnings,
                        header: 'arrow-cuda-glib/arrow-cuda-glib.h',
                        identifier_prefix: 'GArrowCUDA',
                        includes: [
                          'Arrow-1.0',
                        ],
-                       install: true,
+                       kwargs: generate_gi_common_args,
                        namespace: 'ArrowCUDA',
-                       nsversion: api_version,
                        sources: sources + c_headers,
                        symbol_prefix: 'garrow_cuda')
 
diff --git a/c_glib/arrow-dataset-glib/meson.build b/c_glib/arrow-dataset-glib/meson.build
index 4037cf00b7a13..0c869a4183efa 100644
--- a/c_glib/arrow-dataset-glib/meson.build
+++ b/c_glib/arrow-dataset-glib/meson.build
@@ -88,7 +88,7 @@ pkgconfig.generate(libarrow_dataset_glib,
                    version: version)
 
 if have_gi
-  dataset_glib_gir = \
+  arrow_dataset_glib_gir = \
     gnome.generate_gir(libarrow_dataset_glib,
                        dependencies: declare_dependency(sources: arrow_glib_gir),
                        export_packages: 'arrow-dataset-glib',
@@ -96,15 +96,13 @@ if have_gi
                          '--warn-all',
                          '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
                        ],
-                       fatal_warnings: gi_fatal_warnings,
                        header: 'arrow-dataset-glib/arrow-dataset-glib.h',
                        identifier_prefix: 'GADataset',
                        includes: [
                          'Arrow-1.0',
                        ],
-                       install: true,
+                       kwargs: generate_gi_common_args,
                        namespace: 'ArrowDataset',
-                       nsversion: api_version,
                        sources: sources + c_headers + enums,
                        symbol_prefix: 'gadataset')
 
@@ -115,6 +113,6 @@ if have_gi
                           arrow_glib_vapi,
                           'gio-2.0',
                         ],
-                        sources: [dataset_glib_gir[0]])
+                        sources: [arrow_dataset_glib_gir[0]])
   endif
 endif
diff --git a/c_glib/arrow-flight-glib/meson.build b/c_glib/arrow-flight-glib/meson.build
index b869fd226b86a..70db7400b124a 100644
--- a/c_glib/arrow-flight-glib/meson.build
+++ b/c_glib/arrow-flight-glib/meson.build
@@ -65,7 +65,7 @@ pkgconfig.generate(libarrow_flight_glib,
                    version: version)
 
 if have_gi
-  flight_glib_gir = \
+  arrow_flight_glib_gir = \
     gnome.generate_gir(libarrow_flight_glib,
                        dependencies: declare_dependency(sources: arrow_glib_gir),
                        export_packages: 'arrow-flight-glib',
@@ -73,26 +73,24 @@ if have_gi
                          '--warn-all',
                          '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
                        ],
-                       fatal_warnings: gi_fatal_warnings,
                        header: 'arrow-flight-glib/arrow-flight-glib.h',
                        identifier_prefix: 'GAFlight',
                        includes: [
                          'Arrow-1.0',
                        ],
-                       install: true,
+                       kwargs: generate_gi_common_args,
                        namespace: 'ArrowFlight',
-                       nsversion: api_version,
                        sources: sources + c_headers,
                        symbol_prefix: 'gaflight')
 
   if generate_vapi
-    flight_glib_vapi = \
+    arrow_flight_glib_vapi = \
       gnome.generate_vapi('arrow-flight-glib',
                           install: true,
                           packages: [
                             arrow_glib_vapi,
                             'gio-2.0',
                           ],
-                          sources: [flight_glib_gir[0]])
+                          sources: [arrow_flight_glib_gir[0]])
   endif
 endif
diff --git a/c_glib/arrow-flight-sql-glib/meson.build b/c_glib/arrow-flight-sql-glib/meson.build
index f1ea6d67cb04e..e7abc605bb819 100644
--- a/c_glib/arrow-flight-sql-glib/meson.build
+++ b/c_glib/arrow-flight-sql-glib/meson.build
@@ -63,27 +63,25 @@ pkgconfig.generate(libarrow_flight_sql_glib,
                    version: version)
 
 if have_gi
-  flight_sql_glib_gir_dependencies = \
-    declare_dependency(sources: [arrow_glib_gir, flight_glib_gir])
-  flight_sql_glib_gir = \
+  arrow_flight_sql_glib_gir_dependencies = \
+    declare_dependency(sources: [arrow_glib_gir, arrow_flight_glib_gir])
+  arrow_flight_sql_glib_gir = \
     gnome.generate_gir(libarrow_flight_sql_glib,
-                       dependencies: flight_sql_glib_gir_dependencies,
+                       dependencies: arrow_flight_sql_glib_gir_dependencies,
                        export_packages: 'arrow-flight-sql-glib',
                        extra_args: [
                          '--warn-all',
                          '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
                          '--include-uninstalled=./arrow-flight-glib/ArrowFlight-1.0.gir',
                        ],
-                       fatal_warnings: gi_fatal_warnings,
                        header: 'arrow-flight-sql-glib/arrow-flight-sql-glib.h',
                        identifier_prefix: 'GAFlightSQL',
                        includes: [
                          'Arrow-1.0',
                          'ArrowFlight-1.0',
                        ],
-                       install: true,
+                       kwargs: generate_gi_common_args,
                        namespace: 'ArrowFlightSQL',
-                       nsversion: api_version,
                        sources: sources + c_headers,
                        symbol_prefix: 'gaflightsql')
 
@@ -91,10 +89,10 @@ if have_gi
     gnome.generate_vapi('arrow-flight-sql-glib',
                         install: true,
                         packages: [
-                          flight_glib_vapi,
+                          arrow_flight_glib_vapi,
                           arrow_glib_vapi,
                           'gio-2.0',
                         ],
-                        sources: [flight_sql_glib_gir[0]])
+                        sources: [arrow_flight_sql_glib_gir[0]])
   endif
 endif
diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build
index b26dcc4c080eb..7efba11bf9417 100644
--- a/c_glib/arrow-glib/meson.build
+++ b/c_glib/arrow-glib/meson.build
@@ -279,18 +279,16 @@ if have_gi
                                       extra_args: [
                                         '--warn-all',
                                       ],
-                                      fatal_warnings: gi_fatal_warnings,
                                       header: 'arrow-glib/arrow-glib.h',
                                       identifier_prefix: 'GArrow',
                                       includes: [
                                         'GObject-2.0',
                                         'Gio-2.0',
                                       ],
-                                      install: true,
                                       namespace: 'Arrow',
-                                      nsversion: api_version,
                                       sources: sources + c_headers + enums,
-                                      symbol_prefix: 'garrow')
+                                      symbol_prefix: 'garrow',
+                                      kwargs: generate_gi_common_args)
 
   if generate_vapi
     arrow_glib_vapi = gnome.generate_vapi('arrow-glib',
diff --git a/c_glib/doc/arrow-cuda-glib.toml.in b/c_glib/doc/arrow-cuda-glib.toml.in
new file mode 100644
index 0000000000000..fbd491487aabd
--- /dev/null
+++ b/c_glib/doc/arrow-cuda-glib.toml.in
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[library]
+name = "Apache Arrow CUDA GLib"
+version = "@VERSION@"
+browse_url = "https://github.com/apache/arrow/tree/@SOURCE_REFERENCE@/c_glib/arrow-cuda-glib/"
+repository_url = "https://github.com/apache/arrow.git"
+website_url = "https://arrow.apache.org/docs/c_glib/arrow-cuda-glib/"
+authors = "The Apache Software Foundation"
+license = "Apache-2.0"
+description = "Apache Arrow CUDA GLib API"
+dependencies = ["Arrow-1.0"]
+related = ["Gio-2.0", "GObject-2.0"]
+search_index = true
+
+[dependencies."Arrow-1.0"]
+name = "Arrow"
+description = "Apache Arrow GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-glib/"
+
+[related."Gio-2.0"]
+name = "GIO"
+description = "GObject Interfaces and Objects, Networking, IPC, and I/O"
+docs_url = "https://docs.gtk.org/gio/"
+
+[related."GObject-2.0"]
+name = "GObject"
+description = "The base type system library"
+docs_url = "https://docs.gtk.org/gobject/"
+
+[source-location]
+base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/arrow-cuda-glib"
+
+[extra]
+content_base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/doc/"
+content_files = [
+]
+content_images = [
+]
+urlmap_file = "urlmap.js"
diff --git a/c_glib/doc/arrow-dataset-glib.toml.in b/c_glib/doc/arrow-dataset-glib.toml.in
new file mode 100644
index 0000000000000..1e76bab4db30f
--- /dev/null
+++ b/c_glib/doc/arrow-dataset-glib.toml.in
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[library]
+name = "Apache Arrow Dataset GLib"
+version = "@VERSION@"
+browse_url = "https://github.com/apache/arrow/tree/@SOURCE_REFERENCE@/c_glib/arrow-dataset-glib/"
+repository_url = "https://github.com/apache/arrow.git"
+website_url = "https://arrow.apache.org/docs/c_glib/arrow-dataset-glib/"
+authors = "The Apache Software Foundation"
+license = "Apache-2.0"
+description = "Apache Arrow Dataset GLib API"
+dependencies = ["Arrow-1.0"]
+related = ["Gio-2.0", "GObject-2.0"]
+search_index = true
+
+[dependencies."Arrow-1.0"]
+name = "Arrow"
+description = "Apache Arrow GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-glib/"
+
+[related."Gio-2.0"]
+name = "GIO"
+description = "GObject Interfaces and Objects, Networking, IPC, and I/O"
+docs_url = "https://docs.gtk.org/gio/"
+
+[related."GObject-2.0"]
+name = "GObject"
+description = "The base type system library"
+docs_url = "https://docs.gtk.org/gobject/"
+
+[source-location]
+base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/arrow-dataset-glib"
+
+[extra]
+content_base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/doc/"
+content_files = [
+]
+content_images = [
+]
+urlmap_file = "urlmap.js"
diff --git a/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml b/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
deleted file mode 100644
index e6066379ceba3..0000000000000
--- a/c_glib/doc/arrow-dataset-glib/arrow-dataset-glib-docs.xml
+++ /dev/null
@@ -1,92 +0,0 @@
-<?xml version="1.0"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
-               "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"
-[
-  <!ENTITY % local.common.attrib "xmlns:xi  CDATA  #FIXED 'http://www.w3.org/2003/XInclude'">
-  <!ENTITY % gtkdocentities SYSTEM "entities.xml">
-  %gtkdocentities;
-]>
-<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude">
-  <bookinfo>
-    <title>&package_name; Reference Manual</title>
-    <releaseinfo>
-      for &package_string;.
-      <!--
-      The latest version of this documentation can be found on-line at
-      <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>.
-      -->
-    </releaseinfo>
-  </bookinfo>
-
-  <part id="data">
-    <title>Data</title>
-    <chapter id="source">
-      <title>Partitioning</title>
-      <xi:include href="xml/partitioning.xml"/>
-      <title>Dataset</title>
-      <xi:include href="xml/dataset.xml"/>
-      <title>Dataset factory</title>
-      <xi:include href="xml/dataset-factory.xml"/>
-    </chapter>
-    <chapter id="read">
-      <title>Scan</title>
-      <xi:include href="xml/scanner.xml"/>
-      <title>Fragment</title>
-      <xi:include href="xml/fragment.xml"/>
-      <title>File format</title>
-      <xi:include href="xml/file-format.xml"/>
-    </chapter>
-  </part>
-
-  <chapter id="object-tree">
-    <title>Object Hierarchy</title>
-    <xi:include href="xml/tree_index.sgml"/>
-  </chapter>
-  <index id="api-index-full">
-    <title>API Index</title>
-    <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="deprecated-api-index" role="deprecated">
-    <title>Index of deprecated API</title>
-    <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-11-0-0" role="11.0.0">
-    <title>Index of new symbols in 11.0.0</title>
-    <xi:include href="xml/api-index-11.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-6-0-0" role="6.0.0">
-    <title>Index of new symbols in 6.0.0</title>
-    <xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-5-0-0" role="5.0.0">
-    <title>Index of new symbols in 5.0.0</title>
-    <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-4-0-0" role="4.0.0">
-    <title>Index of new symbols in 4.0.0</title>
-    <xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-3-0-0" role="3.0.0">
-    <title>Index of new symbols in 3.0.0</title>
-    <xi:include href="xml/api-index-3.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include>
-</book>
diff --git a/c_glib/doc/arrow-dataset-glib/entities.xml.in b/c_glib/doc/arrow-dataset-glib/entities.xml.in
deleted file mode 100644
index aa5addb4e8431..0000000000000
--- a/c_glib/doc/arrow-dataset-glib/entities.xml.in
+++ /dev/null
@@ -1,24 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<!ENTITY package "@PACKAGE@">
-<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@">
-<!ENTITY package_name "@PACKAGE_NAME@">
-<!ENTITY package_string "@PACKAGE_STRING@">
-<!ENTITY package_url "@PACKAGE_URL@">
-<!ENTITY package_version "@PACKAGE_VERSION@">
diff --git a/c_glib/doc/arrow-dataset-glib/meson.build b/c_glib/doc/arrow-dataset-glib/meson.build
deleted file mode 100644
index ca037b7e36a2e..0000000000000
--- a/c_glib/doc/arrow-dataset-glib/meson.build
+++ /dev/null
@@ -1,83 +0,0 @@
-# -*- indent-tabs-mode: nil -*-
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-package_id = 'arrow-dataset-glib'
-package_name = 'Apache Arrow Dataset GLib'
-entities_conf = configuration_data()
-entities_conf.set('PACKAGE', package_id)
-entities_conf.set('PACKAGE_BUGREPORT',
-                  'https://issues.apache.org/jira/browse/ARROW')
-entities_conf.set('PACKAGE_NAME', package_name)
-entities_conf.set('PACKAGE_STRING',
-                  ' '.join([package_id, version]))
-entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/')
-entities_conf.set('PACKAGE_VERSION', version)
-configure_file(input: 'entities.xml.in',
-               output: 'entities.xml',
-               configuration: entities_conf)
-
-private_headers = [
-]
-
-content_files = [
-]
-
-html_images = [
-]
-
-glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix')
-glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html')
-arrow_glib_doc_path = join_paths(data_dir,
-                                 'gtk-doc',
-                                 'html',
-                                 'arrow-glib')
-doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id)
-
-source_directories = [
-  join_paths(meson.source_root(), package_id),
-  join_paths(meson.build_root(), package_id),
-]
-dependencies = [
-  arrow_glib,
-  arrow_dataset_glib,
-]
-ignore_headers = []
-gnome.gtkdoc(package_id,
-             main_xml: package_id + '-docs.xml',
-             src_dir: source_directories,
-             dependencies: dependencies,
-             ignore_headers: ignore_headers,
-             gobject_typesfile: package_id + '.types',
-             scan_args: [
-               '--rebuild-types',
-               '--deprecated-guards=GARROW_DISABLE_DEPRECATED',
-             ],
-             mkdb_args: [
-               '--output-format=xml',
-               '--name-space=gadataset',
-               '--source-suffixes=c,cpp,h',
-             ],
-             fixxref_args: [
-               '--html-dir=' + doc_path,
-               '--extra-dir=' + join_paths(glib_doc_path, 'glib'),
-               '--extra-dir=' + join_paths(glib_doc_path, 'gobject'),
-               '--extra-dir=' + arrow_glib_doc_path,
-             ],
-             html_assets: html_images,
-             install: true)
diff --git a/c_glib/doc/arrow-flight-glib.toml.in b/c_glib/doc/arrow-flight-glib.toml.in
new file mode 100644
index 0000000000000..9fef030f431f6
--- /dev/null
+++ b/c_glib/doc/arrow-flight-glib.toml.in
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[library]
+name = "Apache Arrow Flight GLib"
+version = "@VERSION@"
+browse_url = "https://github.com/apache/arrow/tree/@SOURCE_REFERENCE@/c_glib/arrow-flight-glib/"
+repository_url = "https://github.com/apache/arrow.git"
+website_url = "https://arrow.apache.org/docs/c_glib/arrow-flight-glib/"
+authors = "The Apache Software Foundation"
+license = "Apache-2.0"
+description = "Apache Arrow Flight GLib API"
+dependencies = ["Arrow-1.0"]
+related = ["Gio-2.0", "GObject-2.0"]
+search_index = true
+
+[dependencies."Arrow-1.0"]
+name = "Arrow"
+description = "Apache Arrow GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-glib/"
+
+[related."Gio-2.0"]
+name = "GIO"
+description = "GObject Interfaces and Objects, Networking, IPC, and I/O"
+docs_url = "https://docs.gtk.org/gio/"
+
+[related."GObject-2.0"]
+name = "GObject"
+description = "The base type system library"
+docs_url = "https://docs.gtk.org/gobject/"
+
+[source-location]
+base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/arrow-flight-glib"
+
+[extra]
+content_base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/doc/"
+content_files = [
+]
+content_images = [
+]
+urlmap_file = "urlmap.js"
diff --git a/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml b/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml
deleted file mode 100644
index e58ff375c5964..0000000000000
--- a/c_glib/doc/arrow-flight-glib/arrow-flight-glib-docs.xml
+++ /dev/null
@@ -1,75 +0,0 @@
-<?xml version="1.0"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
-               "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"
-[
-  <!ENTITY % local.common.attrib "xmlns:xi  CDATA  #FIXED 'http://www.w3.org/2003/XInclude'">
-  <!ENTITY % gtkdocentities SYSTEM "entities.xml">
-  %gtkdocentities;
-]>
-<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude">
-  <bookinfo>
-    <title>&package_name; Reference Manual</title>
-    <releaseinfo>
-      for &package_string;.
-      <!--
-      The latest version of this documentation can be found on-line at
-      <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>.
-      -->
-    </releaseinfo>
-  </bookinfo>
-
-  <part id="rpc">
-    <title>RPC</title>
-    <xi:include href="xml/common.xml"/>
-    <xi:include href="xml/client.xml"/>
-    <xi:include href="xml/server.xml"/>
-  </part>
-
-  <chapter id="object-tree">
-    <title>Object Hierarchy</title>
-    <xi:include href="xml/tree_index.sgml"/>
-  </chapter>
-  <index id="api-index-full">
-    <title>API Index</title>
-    <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="deprecated-api-index" role="deprecated">
-    <title>Index of deprecated API</title>
-    <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-14-0-0" role="14.0.0">
-    <title>Index of new symbols in 14.0.0</title>
-    <xi:include href="xml/api-index-14.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-12-0-0" role="12.0.0">
-    <title>Index of new symbols in 12.0.0</title>
-    <xi:include href="xml/api-index-12.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-6-0-0" role="6.0.0">
-    <title>Index of new symbols in 6.0.0</title>
-    <xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-5-0-0" role="5.0.0">
-    <title>Index of new symbols in 5.0.0</title>
-    <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include>
-</book>
diff --git a/c_glib/doc/arrow-flight-glib/entities.xml.in b/c_glib/doc/arrow-flight-glib/entities.xml.in
deleted file mode 100644
index aa5addb4e8431..0000000000000
--- a/c_glib/doc/arrow-flight-glib/entities.xml.in
+++ /dev/null
@@ -1,24 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<!ENTITY package "@PACKAGE@">
-<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@">
-<!ENTITY package_name "@PACKAGE_NAME@">
-<!ENTITY package_string "@PACKAGE_STRING@">
-<!ENTITY package_url "@PACKAGE_URL@">
-<!ENTITY package_version "@PACKAGE_VERSION@">
diff --git a/c_glib/doc/arrow-flight-glib/meson.build b/c_glib/doc/arrow-flight-glib/meson.build
deleted file mode 100644
index 64524409c72df..0000000000000
--- a/c_glib/doc/arrow-flight-glib/meson.build
+++ /dev/null
@@ -1,83 +0,0 @@
-# -*- indent-tabs-mode: nil -*-
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-package_id = 'arrow-flight-glib'
-package_name = 'Apache Arrow Flight GLib'
-entities_conf = configuration_data()
-entities_conf.set('PACKAGE', package_id)
-entities_conf.set('PACKAGE_BUGREPORT',
-                  'https://issues.apache.org/jira/browse/ARROW')
-entities_conf.set('PACKAGE_NAME', package_name)
-entities_conf.set('PACKAGE_STRING',
-                  ' '.join([package_id, version]))
-entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/')
-entities_conf.set('PACKAGE_VERSION', version)
-configure_file(input: 'entities.xml.in',
-               output: 'entities.xml',
-               configuration: entities_conf)
-
-private_headers = [
-]
-
-content_files = [
-]
-
-html_images = [
-]
-
-glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix')
-glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html')
-arrow_glib_doc_path = join_paths(data_dir,
-                                 'gtk-doc',
-                                 'html',
-                                 'arrow-glib')
-doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id)
-
-source_directories = [
-  join_paths(meson.source_root(), package_id),
-  join_paths(meson.build_root(), package_id),
-]
-dependencies = [
-  arrow_glib,
-  arrow_flight_glib,
-]
-ignore_headers = []
-gnome.gtkdoc(package_id,
-             main_xml: package_id + '-docs.xml',
-             src_dir: source_directories,
-             dependencies: dependencies,
-             ignore_headers: ignore_headers,
-             gobject_typesfile: package_id + '.types',
-             scan_args: [
-               '--rebuild-types',
-               '--deprecated-guards=GARROW_DISABLE_DEPRECATED',
-             ],
-             mkdb_args: [
-               '--output-format=xml',
-               '--name-space=gaflight',
-               '--source-suffixes=c,cpp,h',
-             ],
-             fixxref_args: [
-               '--html-dir=' + doc_path,
-               '--extra-dir=' + join_paths(glib_doc_path, 'glib'),
-               '--extra-dir=' + join_paths(glib_doc_path, 'gobject'),
-               '--extra-dir=' + arrow_glib_doc_path,
-             ],
-             html_assets: html_images,
-             install: true)
diff --git a/c_glib/doc/arrow-flight-sql-glib.toml.in b/c_glib/doc/arrow-flight-sql-glib.toml.in
new file mode 100644
index 0000000000000..e4b9c3a83aa33
--- /dev/null
+++ b/c_glib/doc/arrow-flight-sql-glib.toml.in
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[library]
+name = "Apache Arrow Flight SQL GLib"
+version = "@VERSION@"
+browse_url = "https://github.com/apache/arrow/tree/@SOURCE_REFERENCE@/c_glib/arrow-flight-sql-glib/"
+repository_url = "https://github.com/apache/arrow.git"
+website_url = "https://arrow.apache.org/docs/c_glib/arrow-flight-sql-glib/"
+authors = "The Apache Software Foundation"
+license = "Apache-2.0"
+description = "Apache Arrow Flight SQL GLib API"
+dependencies = ["ArrowFlight-1.0", "Arrow-1.0"]
+related = ["GObject-2.0"]
+search_index = true
+
+[dependencies."ArrowFlight-1.0"]
+name = "ArrowFlight"
+description = "Apache Arrow Flight GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-flight-glib/"
+
+[dependencies."Arrow-1.0"]
+name = "Arrow"
+description = "Apache Arrow GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-glib/"
+
+[related."GObject-2.0"]
+name = "GObject"
+description = "The base type system library"
+docs_url = "https://docs.gtk.org/gobject/"
+
+[source-location]
+base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/arrow-flight-sql-glib"
+
+[extra]
+content_base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/doc/"
+content_files = [
+]
+content_images = [
+]
+urlmap_file = "urlmap.js"
diff --git a/c_glib/doc/arrow-flight-sql-glib/arrow-flight-sql-glib-docs.xml b/c_glib/doc/arrow-flight-sql-glib/arrow-flight-sql-glib-docs.xml
deleted file mode 100644
index f87d657461140..0000000000000
--- a/c_glib/doc/arrow-flight-sql-glib/arrow-flight-sql-glib-docs.xml
+++ /dev/null
@@ -1,70 +0,0 @@
-<?xml version="1.0"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
-               "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"
-[
-  <!ENTITY % local.common.attrib "xmlns:xi  CDATA  #FIXED 'http://www.w3.org/2003/XInclude'">
-  <!ENTITY % gtkdocentities SYSTEM "entities.xml">
-  %gtkdocentities;
-]>
-<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude">
-  <bookinfo>
-    <title>&package_name; Reference Manual</title>
-    <releaseinfo>
-      for &package_string;.
-      <!--
-      The latest version of this documentation can be found on-line at
-      <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>.
-      -->
-    </releaseinfo>
-  </bookinfo>
-
-  <part id="sql">
-    <title>SQL</title>
-    <xi:include href="xml/client.xml"/>
-    <xi:include href="xml/server.xml"/>
-  </part>
-
-  <chapter id="object-tree">
-    <title>Object Hierarchy</title>
-    <xi:include href="xml/tree_index.sgml"/>
-  </chapter>
-  <index id="api-index-full">
-    <title>API Index</title>
-    <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="deprecated-api-index" role="deprecated">
-    <title>Index of deprecated API</title>
-    <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-14-0-0" role="14.0.0">
-    <title>Index of new symbols in 14.0.0</title>
-    <xi:include href="xml/api-index-14.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-13-0-0" role="13.0.0">
-    <title>Index of new symbols in 13.0.0</title>
-    <xi:include href="xml/api-index-13.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-9-0-0" role="9.0.0">
-    <title>Index of new symbols in 9.0.0</title>
-    <xi:include href="xml/api-index-9.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include>
-</book>
diff --git a/c_glib/doc/arrow-flight-sql-glib/entities.xml.in b/c_glib/doc/arrow-flight-sql-glib/entities.xml.in
deleted file mode 100644
index aa5addb4e8431..0000000000000
--- a/c_glib/doc/arrow-flight-sql-glib/entities.xml.in
+++ /dev/null
@@ -1,24 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<!ENTITY package "@PACKAGE@">
-<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@">
-<!ENTITY package_name "@PACKAGE_NAME@">
-<!ENTITY package_string "@PACKAGE_STRING@">
-<!ENTITY package_url "@PACKAGE_URL@">
-<!ENTITY package_version "@PACKAGE_VERSION@">
diff --git a/c_glib/doc/arrow-flight-sql-glib/meson.build b/c_glib/doc/arrow-flight-sql-glib/meson.build
deleted file mode 100644
index 46a7cc490595e..0000000000000
--- a/c_glib/doc/arrow-flight-sql-glib/meson.build
+++ /dev/null
@@ -1,89 +0,0 @@
-# -*- indent-tabs-mode: nil -*-
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-package_id = 'arrow-flight-sql-glib'
-package_name = 'Apache Arrow Flight SQL GLib'
-entities_conf = configuration_data()
-entities_conf.set('PACKAGE', package_id)
-entities_conf.set('PACKAGE_BUGREPORT',
-                  'https://issues.apache.org/jira/browse/ARROW')
-entities_conf.set('PACKAGE_NAME', package_name)
-entities_conf.set('PACKAGE_STRING',
-                  ' '.join([package_id, version]))
-entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/')
-entities_conf.set('PACKAGE_VERSION', version)
-configure_file(input: 'entities.xml.in',
-               output: 'entities.xml',
-               configuration: entities_conf)
-
-private_headers = [
-]
-
-content_files = [
-]
-
-html_images = [
-]
-
-glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix')
-glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html')
-arrow_glib_doc_path = join_paths(data_dir,
-                                 'gtk-doc',
-                                 'html',
-                                 'arrow-glib')
-arrow_flight_glib_doc_path = join_paths(data_dir,
-                                        'gtk-doc',
-                                        'html',
-                                        'arrow-flight-glib')
-doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id)
-
-source_directories = [
-  join_paths(meson.source_root(), package_id),
-  join_paths(meson.build_root(), package_id),
-]
-dependencies = [
-  arrow_glib,
-  arrow_flight_glib,
-  arrow_flight_sql_glib,
-]
-ignore_headers = []
-gnome.gtkdoc(package_id,
-             main_xml: package_id + '-docs.xml',
-             src_dir: source_directories,
-             dependencies: dependencies,
-             ignore_headers: ignore_headers,
-             gobject_typesfile: package_id + '.types',
-             scan_args: [
-               '--rebuild-types',
-               '--deprecated-guards=GARROW_DISABLE_DEPRECATED',
-             ],
-             mkdb_args: [
-               '--output-format=xml',
-               '--name-space=gaflightsql',
-               '--source-suffixes=c,cpp,h',
-             ],
-             fixxref_args: [
-               '--html-dir=' + doc_path,
-               '--extra-dir=' + join_paths(glib_doc_path, 'glib'),
-               '--extra-dir=' + join_paths(glib_doc_path, 'gobject'),
-               '--extra-dir=' + arrow_glib_doc_path,
-               '--extra-dir=' + arrow_flight_glib_doc_path,
-             ],
-             html_assets: html_images,
-             install: true)
diff --git a/c_glib/doc/arrow-glib.toml.in b/c_glib/doc/arrow-glib.toml.in
new file mode 100644
index 0000000000000..28402310d500a
--- /dev/null
+++ b/c_glib/doc/arrow-glib.toml.in
@@ -0,0 +1,87 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[library]
+name = "Apache Arrow GLib"
+version = "@VERSION@"
+browse_url = "https://github.com/apache/arrow/tree/@SOURCE_REFERENCE@/c_glib/arrow-glib/"
+repository_url = "https://github.com/apache/arrow.git"
+website_url = "https://arrow.apache.org/docs/c_glib/arrow-glib/"
+authors = "The Apache Software Foundation"
+license = "Apache-2.0"
+description = "Apache Arrow GLib API"
+dependencies = ["Gio-2.0", "GObject-2.0"]
+related = [
+  "ArrowCUDA-1.0",
+  "ArrowData-1.0",
+  "ArrowFlight-1.0",
+  "ArrowFlightSQL-1.0",
+  "Gandiva-1.0",
+  "Parquet-1.0",
+]
+search_index = true
+
+[dependencies."Gio-2.0"]
+name = "Gio"
+description = "GObject Interfaces and Objects, Networking, IPC, and I/O"
+docs_url = "https://docs.gtk.org/gio/"
+
+[dependencies."GObject-2.0"]
+name = "GObject"
+description = "The base type system library"
+docs_url = "https://docs.gtk.org/gobject/"
+
+[related."ArrowCUDA-1.0"]
+name = "Apache Arrow CUDA GLib"
+description = "Apache Arrow CUDA GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-cuda-glib/"
+
+[related."ArrowDataset-1.0"]
+name = "Apache Arrow Dataset GLib"
+description = "Apache Arrow Dataset GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-dataset-glib/"
+
+[related."ArrowFlight-1.0"]
+name = "Apache Arrow Flight GLib"
+description = "Apache Arrow Flight GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-flight-glib/"
+
+[related."ArrowFlightSQL-1.0"]
+name = "Apache Arrow FlightSQL GLib"
+description = "Apache Arrow FlightSQL GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-flight-sql-glib/"
+
+[related."Gandiva-1.0"]
+name = "Apache Gandiva GLib"
+description = "Apache Gandiva GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/gandiva-glib/"
+
+[related."Parquet-1.0"]
+name = "Apache Parquet GLib"
+description = "Apache Parquet GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-parquet-glib/"
+
+[source-location]
+base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/arrow-glib"
+
+[extra]
+content_base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/doc/"
+content_files = [
+]
+content_images = [
+]
+urlmap_file = "urlmap.js"
diff --git a/c_glib/doc/arrow-glib/arrow-glib-docs.xml b/c_glib/doc/arrow-glib/arrow-glib-docs.xml
deleted file mode 100644
index e92eb955675ed..0000000000000
--- a/c_glib/doc/arrow-glib/arrow-glib-docs.xml
+++ /dev/null
@@ -1,313 +0,0 @@
-<?xml version="1.0"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
-               "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"
-[
-  <!ENTITY % local.common.attrib "xmlns:xi  CDATA  #FIXED 'http://www.w3.org/2003/XInclude'">
-  <!ENTITY % gtkdocentities SYSTEM "entities.xml">
-  %gtkdocentities;
-]>
-<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude">
-  <bookinfo>
-    <title>&package_name; Reference Manual</title>
-    <releaseinfo>
-      for &package_string;.
-      <!--
-      The latest version of this documentation can be found on-line at
-      <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>.
-      -->
-    </releaseinfo>
-  </bookinfo>
-
-  <part id="data">
-    <title>Data</title>
-    <chapter id="array">
-      <title>Array</title>
-      <xi:include href="xml/basic-array.xml"/>
-      <xi:include href="xml/composite-array.xml"/>
-    </chapter>
-    <chapter id="array-builder">
-      <title>Array builder</title>
-      <xi:include href="xml/array-builder.xml"/>
-    </chapter>
-    <chapter id="tensor">
-      <title>Tensor</title>
-      <xi:include href="xml/tensor.xml"/>
-    </chapter>
-    <chapter id="value">
-      <title>Value</title>
-      <xi:include href="xml/decimal.xml"/>
-      <xi:include href="xml/interval.xml"/>
-    </chapter>
-    <chapter id="scalar">
-      <title>Scalar</title>
-      <xi:include href="xml/scalar.xml"/>
-    </chapter>
-    <chapter id="type">
-      <title>Type</title>
-      <xi:include href="xml/type.xml"/>
-      <xi:include href="xml/basic-data-type.xml"/>
-      <xi:include href="xml/composite-data-type.xml"/>
-    </chapter>
-    <chapter id="schema">
-      <title>Schema</title>
-      <xi:include href="xml/field.xml"/>
-      <xi:include href="xml/schema.xml"/>
-    </chapter>
-    <chapter id="table">
-      <title>Table</title>
-      <xi:include href="xml/table.xml"/>
-      <xi:include href="xml/record-batch.xml"/>
-      <xi:include href="xml/chunked-array.xml"/>
-    </chapter>
-    <chapter id="table-builder">
-      <title>Table builder</title>
-      <xi:include href="xml/table-builder.xml"/>
-    </chapter>
-    <chapter id="computation">
-      <title>Computation</title>
-      <xi:include href="xml/compute.xml"/>
-      <xi:include href="xml/datum.xml"/>
-      <xi:include href="xml/expression.xml"/>
-    </chapter>
-    <chapter id="buffer">
-      <title>Buffer</title>
-      <xi:include href="xml/buffer.xml"/>
-    </chapter>
-    <chapter id="codec">
-      <title>Codec</title>
-      <xi:include href="xml/codec.xml"/>
-    </chapter>
-    <chapter id="error">
-      <title>Error</title>
-      <xi:include href="xml/error.xml"/>
-    </chapter>
-  </part>
-
-  <part id="file-system-api">
-    <title>File system API</title>
-    <chapter id="file-system">
-      <title>File system</title>
-      <xi:include href="xml/file-system.xml"/>
-    </chapter>
-    <chapter id="local-file-system">
-      <title>Local file system</title>
-      <xi:include href="xml/local-file-system.xml"/>
-    </chapter>
-  </part>
-
-  <part id="io">
-    <title>IO</title>
-    <chapter id="mode">
-      <title>Mode</title>
-      <xi:include href="xml/file-mode.xml"/>
-    </chapter>
-    <chapter id="input">
-      <title>Input</title>
-      <xi:include href="xml/readable.xml"/>
-      <xi:include href="xml/input-stream.xml"/>
-    </chapter>
-    <chapter id="output">
-      <title>Output</title>
-      <xi:include href="xml/writable.xml"/>
-      <xi:include href="xml/writable-file.xml"/>
-      <xi:include href="xml/output-stream.xml"/>
-    </chapter>
-    <chapter id="input-output">
-      <title>Input and output</title>
-      <xi:include href="xml/file.xml"/>
-    </chapter>
-  </part>
-
-  <part id="ipc">
-    <title>IPC</title>
-    <chapter id="metadata">
-      <title>Metadata</title>
-      <xi:include href="xml/metadata-version.xml"/>
-    </chapter>
-    <chapter id="ipc-options">
-      <title>Options</title>
-      <xi:include href="xml/ipc-options.xml"/>
-    </chapter>
-    <chapter id="reader">
-      <title>Reader</title>
-      <xi:include href="xml/reader.xml"/>
-      <xi:include href="xml/orc-file-reader.xml"><xi:fallback /></xi:include>
-    </chapter>
-    <chapter id="writer">
-      <title>Writer</title>
-      <xi:include href="xml/writer.xml"/>
-    </chapter>
-  </part>
-
-  <part id="gpu">
-    <title>GPU</title>
-    <chapter id="cuda">
-      <title>CUDA</title>
-      <xi:include href="xml/cuda.xml"><xi:fallback /></xi:include>
-    </chapter>
-  </part>
-
-  <part id="memory">
-    <title>Memory</title>
-    <chapter id="memory-pool">
-      <title>Memory pool</title>
-      <xi:include href="xml/memory-pool.xml"><xi:fallback /></xi:include>
-    </chapter>
-  </part>
-
-  <part id="misc">
-    <title>Misc</title>
-    <chapter id="version">
-      <title>Version</title>
-      <xi:include href="xml/version.xml"></xi:include>
-    </chapter>
-  </part>
-
-  <chapter id="object-tree">
-    <title>Object Hierarchy</title>
-    <xi:include href="xml/tree_index.sgml"/>
-  </chapter>
-  <index id="api-index-full">
-    <title>API Index</title>
-    <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="deprecated-api-index" role="deprecated">
-    <title>Index of deprecated API</title>
-    <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-16-0-0" role="16.0.0">
-    <title>Index of new symbols in 16.0.0</title>
-    <xi:include href="xml/api-index-16.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-13-0-0" role="13.0.0">
-    <title>Index of new symbols in 13.0.0</title>
-    <xi:include href="xml/api-index-13.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-12-0-0" role="12.0.0">
-    <title>Index of new symbols in 12.0.0</title>
-    <xi:include href="xml/api-index-12.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-11-0-0" role="11.0.0">
-    <title>Index of new symbols in 11.0.0</title>
-    <xi:include href="xml/api-index-11.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-10-0-0" role="10.0.0">
-    <title>Index of new symbols in 10.0.0</title>
-    <xi:include href="xml/api-index-10.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-9-0-0" role="9.0.0">
-    <title>Index of new symbols in 9.0.0</title>
-    <xi:include href="xml/api-index-9.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-8-0-0" role="8.0.0">
-    <title>Index of new symbols in 8.0.0</title>
-    <xi:include href="xml/api-index-8.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-7-0-0" role="7.0.0">
-    <title>Index of new symbols in 7.0.0</title>
-    <xi:include href="xml/api-index-7.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-6-0-0" role="6.0.0">
-    <title>Index of new symbols in 6.0.0</title>
-    <xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-5-0-0" role="5.0.0">
-    <title>Index of new symbols in 5.0.0</title>
-    <xi:include href="xml/api-index-5.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-4-0-0" role="4.0.0">
-    <title>Index of new symbols in 4.0.0</title>
-    <xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-3-0-0" role="3.0.0">
-    <title>Index of new symbols in 3.0.0</title>
-    <xi:include href="xml/api-index-3.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-2-0-0" role="2.0.0">
-    <title>Index of new symbols in 2.0.0</title>
-    <xi:include href="xml/api-index-2.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-1-0-0" role="1.0.0">
-    <title>Index of new symbols in 1.0.0</title>
-    <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-17-0" role="0.17.0">
-    <title>Index of new symbols in 0.17.0</title>
-    <xi:include href="xml/api-index-0.17.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-16-0" role="0.16.0">
-    <title>Index of new symbols in 0.16.0</title>
-    <xi:include href="xml/api-index-0.16.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-15-0" role="0.15.0">
-    <title>Index of new symbols in 0.15.0</title>
-    <xi:include href="xml/api-index-0.15.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-14-0" role="0.14.0">
-    <title>Index of new symbols in 0.14.0</title>
-    <xi:include href="xml/api-index-0.14.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-13-0" role="0.13.0">
-    <title>Index of new symbols in 0.13.0</title>
-    <xi:include href="xml/api-index-0.13.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-12-0" role="0.12.0">
-    <title>Index of new symbols in 0.12.0</title>
-    <xi:include href="xml/api-index-0.12.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-11-0" role="0.11.0">
-    <title>Index of new symbols in 0.11.0</title>
-    <xi:include href="xml/api-index-0.11.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-10-0" role="0.10.0">
-    <title>Index of new symbols in 0.10.0</title>
-    <xi:include href="xml/api-index-0.10.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-9-0" role="0.9.0">
-    <title>Index of new symbols in 0.9.0</title>
-    <xi:include href="xml/api-index-0.9.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-8-0" role="0.8.0">
-    <title>Index of new symbols in 0.8.0</title>
-    <xi:include href="xml/api-index-0.8.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-7-0" role="0.7.0">
-    <title>Index of new symbols in 0.7.0</title>
-    <xi:include href="xml/api-index-0.7.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-6-0" role="0.6.0">
-    <title>Index of new symbols in 0.6.0</title>
-    <xi:include href="xml/api-index-0.6.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-5-0" role="0.5.0">
-    <title>Index of new symbols in 0.5.0</title>
-    <xi:include href="xml/api-index-0.5.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-4-0" role="0.4.0">
-    <title>Index of new symbols in 0.4.0</title>
-    <xi:include href="xml/api-index-0.4.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-3-0" role="0.3.0">
-    <title>Index of new symbols in 0.3.0</title>
-    <xi:include href="xml/api-index-0.3.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include>
-</book>
diff --git a/c_glib/doc/arrow-glib/entities.xml.in b/c_glib/doc/arrow-glib/entities.xml.in
deleted file mode 100644
index aa5addb4e8431..0000000000000
--- a/c_glib/doc/arrow-glib/entities.xml.in
+++ /dev/null
@@ -1,24 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<!ENTITY package "@PACKAGE@">
-<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@">
-<!ENTITY package_name "@PACKAGE_NAME@">
-<!ENTITY package_string "@PACKAGE_STRING@">
-<!ENTITY package_url "@PACKAGE_URL@">
-<!ENTITY package_version "@PACKAGE_VERSION@">
diff --git a/c_glib/doc/arrow-glib/meson.build b/c_glib/doc/arrow-glib/meson.build
deleted file mode 100644
index eeb2fd85dad48..0000000000000
--- a/c_glib/doc/arrow-glib/meson.build
+++ /dev/null
@@ -1,93 +0,0 @@
-# -*- indent-tabs-mode: nil -*-
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-package_id = meson.project_name()
-package_name = 'Apache Arrow GLib'
-entities_conf = configuration_data()
-entities_conf.set('PACKAGE', package_id)
-entities_conf.set('PACKAGE_BUGREPORT',
-                  'https://issues.apache.org/jira/browse/ARROW')
-entities_conf.set('PACKAGE_NAME', package_name)
-entities_conf.set('PACKAGE_STRING',
-                  ' '.join([package_name, version]))
-entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/')
-entities_conf.set('PACKAGE_VERSION', version)
-configure_file(input: 'entities.xml.in',
-               output: 'entities.xml',
-               configuration: entities_conf)
-
-private_headers = [
-]
-
-content_files = [
-]
-
-html_images = [
-]
-
-glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix')
-glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html')
-doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id)
-
-source_directories = [
-  join_paths(meson.source_root(), package_id),
-  join_paths(meson.build_root(), package_id),
-]
-dependencies = [
-  arrow_glib,
-]
-if arrow_cuda.found()
-  source_directories += [
-    join_paths(meson.source_root(), 'arrow-cuda-glib'),
-    join_paths(meson.build_root(), 'arrow-cuda-glib'),
-  ]
-  dependencies += [
-    arrow_cuda_glib,
-  ]
-endif
-ignore_headers = [
-  join_paths(meson.source_root(), 'arrow-glib', 'gobject-type.h'),
-]
-if not have_arrow_orc
-  ignore_headers += [
-    join_paths(meson.source_root(), 'arrow-glib', 'orc-file-reader.h'),
-  ]
-endif
-gnome.gtkdoc(package_id,
-             main_xml: package_id + '-docs.xml',
-             src_dir: source_directories,
-             dependencies: dependencies,
-             ignore_headers: ignore_headers,
-             gobject_typesfile: package_id + '.types',
-             scan_args: [
-               '--rebuild-types',
-               '--deprecated-guards=GARROW_DISABLE_DEPRECATED',
-             ],
-             mkdb_args: [
-               '--output-format=xml',
-               '--name-space=garrow',
-               '--source-suffixes=c,cpp,h',
-             ],
-             fixxref_args: [
-               '--html-dir=' + doc_path,
-               '--extra-dir=' + join_paths(glib_doc_path, 'glib'),
-               '--extra-dir=' + join_paths(glib_doc_path, 'gobject'),
-             ],
-             html_assets: html_images,
-             install: true)
diff --git a/c_glib/doc/gandiva-glib.toml.in b/c_glib/doc/gandiva-glib.toml.in
new file mode 100644
index 0000000000000..56bf5b75d7f68
--- /dev/null
+++ b/c_glib/doc/gandiva-glib.toml.in
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[library]
+name = "Apache Arrow Gandiva GLib"
+version = "@VERSION@"
+browse_url = "https://github.com/apache/arrow/tree/@SOURCE_REFERENCE@/c_glib/gandiva-glib/"
+repository_url = "https://github.com/apache/arrow.git"
+website_url = "https://arrow.apache.org/docs/c_glib/gandiva-glib/"
+authors = "The Apache Software Foundation"
+license = "Apache-2.0"
+description = "Apache Arrow Gandiva GLib API"
+dependencies = ["Arrow-1.0"]
+related = ["Parquet-1.0", "GObject-2.0"]
+search_index = true
+
+[dependencies."Arrow-1.0"]
+name = "Arrow"
+description = "Apache Arrow GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-glib/"
+
+[related."Parquet-1.0"]
+name = "Apache Parquet GLib"
+description = "Apache Parquet GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-parquet-glib/"
+
+[related."GObject-2.0"]
+name = "GObject"
+description = "The base type system library"
+docs_url = "https://docs.gtk.org/gobject/"
+
+[source-location]
+base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/gandiva-glib"
+
+[extra]
+content_base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/doc/"
+content_files = [
+]
+content_images = [
+]
+urlmap_file = "urlmap.js"
diff --git a/c_glib/doc/gandiva-glib/entities.xml.in b/c_glib/doc/gandiva-glib/entities.xml.in
deleted file mode 100644
index aa5addb4e8431..0000000000000
--- a/c_glib/doc/gandiva-glib/entities.xml.in
+++ /dev/null
@@ -1,24 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<!ENTITY package "@PACKAGE@">
-<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@">
-<!ENTITY package_name "@PACKAGE_NAME@">
-<!ENTITY package_string "@PACKAGE_STRING@">
-<!ENTITY package_url "@PACKAGE_URL@">
-<!ENTITY package_version "@PACKAGE_VERSION@">
diff --git a/c_glib/doc/gandiva-glib/gandiva-glib-docs.xml b/c_glib/doc/gandiva-glib/gandiva-glib-docs.xml
deleted file mode 100644
index a5c32f11337e8..0000000000000
--- a/c_glib/doc/gandiva-glib/gandiva-glib-docs.xml
+++ /dev/null
@@ -1,132 +0,0 @@
-<?xml version="1.0"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
-               "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"
-[
-  <!ENTITY % local.common.attrib "xmlns:xi  CDATA  #FIXED 'http://www.w3.org/2003/XInclude'">
-  <!ENTITY % gtkdocentities SYSTEM "entities.xml">
-  %gtkdocentities;
-]>
-<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude">
-  <bookinfo>
-    <title>&package_name; Reference Manual</title>
-    <releaseinfo>
-      for &package_string;.
-      <!--
-      The latest version of this documentation can be found on-line at
-      <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>.
-      -->
-    </releaseinfo>
-  </bookinfo>
-
-  <part id="evaluate-expression">
-    <title>Evaluate Expression</title>
-    <chapter id="expression">
-      <title>Expression</title>
-      <xi:include href="xml/expression.xml"/>
-    </chapter>
-    <chapter id="filter">
-      <title>Filter</title>
-      <xi:include href="xml/filter.xml"/>
-    </chapter>
-    <chapter id="selection-vector">
-      <title>Selection vector</title>
-      <xi:include href="xml/selection-vector.xml"/>
-    </chapter>
-    <chapter id="projector">
-      <title>Projector</title>
-      <xi:include href="xml/projector.xml"/>
-    </chapter>
-  </part>
-
-  <part id="expression-tree">
-    <title>Expression Tree</title>
-    <chapter id="node">
-      <title>Node</title>
-      <xi:include href="xml/node.xml"/>
-    </chapter>
-  </part>
-
-  <part id="function">
-    <title>Function</title>
-    <chapter id="function-registry">
-      <title>Registry</title>
-      <xi:include href="xml/function-registry.xml"/>
-    </chapter>
-    <chapter id="function-signature">
-      <title>Signature</title>
-      <xi:include href="xml/function-signature.xml"/>
-    </chapter>
-    <chapter id="native-function">
-      <title>Native function</title>
-      <xi:include href="xml/native-function.xml"/>
-    </chapter>
-  </part>
-
-  <part id="misc">
-    <title>Misc</title>
-    <chapter id="version">
-      <title>Version</title>
-      <xi:include href="xml/version.xml"></xi:include>
-    </chapter>
-  </part>
-
-  <chapter id="object-tree">
-    <title>Object Hierarchy</title>
-    <xi:include href="xml/tree_index.sgml"/>
-  </chapter>
-  <index id="api-index-full">
-    <title>API Index</title>
-    <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="deprecated-api-index" role="deprecated">
-    <title>Index of deprecated API</title>
-    <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-15-0-0" role="15.0.0">
-    <title>Index of new symbols in 15.0.0</title>
-    <xi:include href="xml/api-index-15.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-4-0-0" role="4.0.0">
-    <title>Index of new symbols in 4.0.0</title>
-    <xi:include href="xml/api-index-4.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-1-0-0" role="1.0.0">
-    <title>Index of new symbols in 1.0.0</title>
-    <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-17-0" role="0.17.0">
-    <title>Index of new symbols in 0.17.0</title>
-    <xi:include href="xml/api-index-0.17.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-15-0" role="0.15.0">
-    <title>Index of new symbols in 0.15.0</title>
-    <xi:include href="xml/api-index-0.15.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-14-0" role="0.14.0">
-    <title>Index of new symbols in 0.14.0</title>
-    <xi:include href="xml/api-index-0.14.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-12-0" role="0.12.0">
-    <title>Index of new symbols in 0.12.0</title>
-    <xi:include href="xml/api-index-0.12.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include>
-</book>
diff --git a/c_glib/doc/gandiva-glib/meson.build b/c_glib/doc/gandiva-glib/meson.build
deleted file mode 100644
index 7ff815f42a17a..0000000000000
--- a/c_glib/doc/gandiva-glib/meson.build
+++ /dev/null
@@ -1,83 +0,0 @@
-# -*- indent-tabs-mode: nil -*-
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-package_id = 'gandiva-glib'
-package_name = 'Gandiva GLib'
-entities_conf = configuration_data()
-entities_conf.set('PACKAGE', package_id)
-entities_conf.set('PACKAGE_BUGREPORT',
-                  'https://issues.apache.org/jira/browse/ARROW')
-entities_conf.set('PACKAGE_NAME', package_name)
-entities_conf.set('PACKAGE_STRING',
-                  ' '.join([package_name, version]))
-entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/')
-entities_conf.set('PACKAGE_VERSION', version)
-configure_file(input: 'entities.xml.in',
-               output: 'entities.xml',
-               configuration: entities_conf)
-
-private_headers = [
-]
-
-content_files = [
-]
-
-html_images = [
-]
-
-glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix')
-glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html')
-arrow_glib_doc_path = join_paths(data_dir,
-                                 'gtk-doc',
-                                 'html',
-                                 'arrow-glib')
-doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id)
-
-source_directories = [
-  join_paths(meson.source_root(), package_id),
-  join_paths(meson.build_root(), package_id),
-]
-dependencies = [
-  arrow_glib,
-  gandiva_glib,
-]
-ignore_headers = []
-gnome.gtkdoc(package_id,
-             main_xml: package_id + '-docs.xml',
-             src_dir: source_directories,
-             dependencies: dependencies,
-             ignore_headers: ignore_headers,
-             gobject_typesfile: package_id + '.types',
-             scan_args: [
-               '--rebuild-types',
-               '--deprecated-guards=GGANDIVA_DISABLE_DEPRECATED',
-             ],
-             mkdb_args: [
-               '--output-format=xml',
-               '--name-space=ggandiva',
-               '--source-suffixes=c,cpp,h',
-             ],
-             fixxref_args: [
-               '--html-dir=' + doc_path,
-               '--extra-dir=' + join_paths(glib_doc_path, 'glib'),
-               '--extra-dir=' + join_paths(glib_doc_path, 'gobject'),
-               '--extra-dir=' + arrow_glib_doc_path,
-             ],
-             html_assets: html_images,
-             install: true)
diff --git a/c_glib/doc/meson.build b/c_glib/doc/meson.build
new file mode 100644
index 0000000000000..8d0ac4229b2a6
--- /dev/null
+++ b/c_glib/doc/meson.build
@@ -0,0 +1,93 @@
+# -*- indent-tabs-mode: nil -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+source_reference = get_option('source_reference')
+gi_docgen = find_program('gi-docgen')
+
+gi_docgen_toml_conf = configuration_data()
+gi_docgen_toml_conf.set('SOURCE_REFERENCE', source_reference)
+# We can't use "version.replace('-SNAPSHOT', '.dev')" here because
+# Ubuntu 20.04's Meson is < 0.58.0.
+if version_tag == ''
+  gi_docgen_version_tag = ''
+else
+  # GI-DocGen doesn't like MAJOR.MINOR.PATCH-SNAPSHOT format.
+  gi_docgen_version_tag = '.dev'
+endif
+gi_docgen_version = '@0@.@1@.@2@@3@'.format(version_major,
+                                            version_minor,
+                                            version_micro,
+                                            gi_docgen_version_tag)
+gi_docgen_toml_conf.set('VERSION', gi_docgen_version)
+
+gir_top_build_dir = meson.current_build_dir() / '..'
+arrow_glib_gir_dir = gir_top_build_dir / 'arrow-glib'
+arrow_flight_glib_gir_dir = gir_top_build_dir / 'arrow-flight-glib'
+entries = [['arrow-glib', arrow_glib_gir[0]]]
+if arrow_cuda.found()
+  entries += [['arrow-cuda-glib', arrow_cuda_glib_gir[0]]]
+endif
+if arrow_dataset.found()
+  entries += [['arrow-dataset-glib', arrow_dataset_glib_gir[0]]]
+endif
+if arrow_flight.found()
+  entries += [['arrow-flight-glib', arrow_flight_glib_gir[0]]]
+endif
+if arrow_flight_sql.found()
+  entries += [['arrow-flight-sql-glib', arrow_flight_sql_glib_gir[0]]]
+endif
+if gandiva.found()
+  entries += [['gandiva-glib', gandiva_glib_gir[0]]]
+endif
+if parquet.found()
+  entries += [['parquet-glib', parquet_glib_gir[0]]]
+endif
+foreach entry : entries
+  module_name = entry[0]
+  gir = entry[1]
+  gi_docgen_toml = configure_file(input: '@0@.toml.in'.format(module_name),
+                                  output: '@0@.toml'.format(module_name),
+                                  configuration: gi_docgen_toml_conf)
+  gir_dir = gir_top_build_dir / module_name
+  current_source_dir = meson.current_source_dir()
+  command = [
+    gi_docgen,
+    'generate',
+    '--add-include-path=@0@'.format(arrow_flight_glib_gir_dir),
+    '--add-include-path=@0@'.format(arrow_glib_gir_dir),
+    '--add-include-path=@0@'.format(gir_dir),
+    '--config=@INPUT0@',
+    '--content-dir=@0@'.format(current_source_dir),
+    '--no-namespace-dir',
+    '--output-dir=@OUTPUT@',
+    '--quiet',
+  ]
+  if get_option('werror')
+    command += ['--fatal-warnings']
+  endif
+  command += ['@INPUT1@']
+  custom_target('@0@-doc'.format(module_name),
+                input: [gi_docgen_toml, gir],
+                depend_files: ['urlmap.js'],
+                output: module_name,
+                command: command,
+                build_by_default: true,
+                install: true,
+                install_dir: doc_dir)
+endforeach
diff --git a/c_glib/doc/parquet-glib.toml.in b/c_glib/doc/parquet-glib.toml.in
new file mode 100644
index 0000000000000..119ed0b563414
--- /dev/null
+++ b/c_glib/doc/parquet-glib.toml.in
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[library]
+name = "Apache Parquet GLib"
+version = "@VERSION@"
+browse_url = "https://github.com/apache/arrow/tree/@SOURCE_REFERENCE@/c_glib/parquet-glib/"
+repository_url = "https://github.com/apache/arrow.git"
+website_url = "https://arrow.apache.org/docs/c_glib/parquet-glib/"
+authors = "The Apache Software Foundation"
+license = "Apache-2.0"
+description = "Apache Parquet GLib API"
+dependencies = ["Arrow-1.0"]
+related = ["Gio-2.0", "GObject-2.0"]
+search_index = true
+
+[dependencies."Arrow-1.0"]
+name = "Arrow"
+description = "Apache Arrow GLib API"
+docs_url = "https://arrow.apache.org/docs/c_glib/arrow-glib/"
+
+[related."Gio-2.0"]
+name = "GIO"
+description = "GObject Interfaces and Objects, Networking, IPC, and I/O"
+docs_url = "https://docs.gtk.org/gio/"
+
+[related."GObject-2.0"]
+name = "GObject"
+description = "The base type system library"
+docs_url = "https://docs.gtk.org/gobject/"
+
+[source-location]
+base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/parquet-glib"
+
+[extra]
+content_base_url = "https://github.com/apache/arrow/blob/@SOURCE_REFERENCE@/c_glib/doc/"
+content_files = [
+]
+content_images = [
+]
+urlmap_file = "urlmap.js"
diff --git a/c_glib/doc/parquet-glib/entities.xml.in b/c_glib/doc/parquet-glib/entities.xml.in
deleted file mode 100644
index aa5addb4e8431..0000000000000
--- a/c_glib/doc/parquet-glib/entities.xml.in
+++ /dev/null
@@ -1,24 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<!ENTITY package "@PACKAGE@">
-<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@">
-<!ENTITY package_name "@PACKAGE_NAME@">
-<!ENTITY package_string "@PACKAGE_STRING@">
-<!ENTITY package_url "@PACKAGE_URL@">
-<!ENTITY package_version "@PACKAGE_VERSION@">
diff --git a/c_glib/doc/parquet-glib/meson.build b/c_glib/doc/parquet-glib/meson.build
deleted file mode 100644
index f4ee794d05a1a..0000000000000
--- a/c_glib/doc/parquet-glib/meson.build
+++ /dev/null
@@ -1,83 +0,0 @@
-# -*- indent-tabs-mode: nil -*-
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-package_id = 'parquet-glib'
-package_name = 'Apache Parquet GLib'
-entities_conf = configuration_data()
-entities_conf.set('PACKAGE', package_id)
-entities_conf.set('PACKAGE_BUGREPORT',
-                  'https://issues.apache.org/jira/browse/PARQUET')
-entities_conf.set('PACKAGE_NAME', package_name)
-entities_conf.set('PACKAGE_STRING',
-                  ' '.join([package_name, version]))
-entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/')
-entities_conf.set('PACKAGE_VERSION', version)
-configure_file(input: 'entities.xml.in',
-               output: 'entities.xml',
-               configuration: entities_conf)
-
-private_headers = [
-]
-
-content_files = [
-]
-
-html_images = [
-]
-
-glib_prefix = dependency('glib-2.0').get_pkgconfig_variable('prefix')
-glib_doc_path = join_paths(glib_prefix, 'share', 'gtk-doc', 'html')
-arrow_glib_doc_path = join_paths(data_dir,
-                                 'gtk-doc',
-                                 'html',
-                                 'arrow-glib')
-doc_path = join_paths(data_dir, 'gtk-doc', 'html', package_id)
-
-source_directories = [
-  join_paths(meson.source_root(), package_id),
-  join_paths(meson.build_root(), package_id),
-]
-dependencies = [
-  parquet_glib,
-  arrow_glib,
-]
-ignore_headers = []
-gnome.gtkdoc(package_id,
-             main_xml: package_id + '-docs.xml',
-             src_dir: source_directories,
-             dependencies: dependencies,
-             ignore_headers: ignore_headers,
-             gobject_typesfile: package_id + '.types',
-             scan_args: [
-               '--rebuild-types',
-               '--deprecated-guards=GARROW_DISABLE_DEPRECATED',
-             ],
-             mkdb_args: [
-               '--output-format=xml',
-               '--name-space=gparquet',
-               '--source-suffixes=c,cpp,h',
-             ],
-             fixxref_args: [
-               '--html-dir=' + doc_path,
-               '--extra-dir=' + join_paths(glib_doc_path, 'glib'),
-               '--extra-dir=' + join_paths(glib_doc_path, 'gobject'),
-               '--extra-dir=' + arrow_glib_doc_path,
-             ],
-             html_assets: html_images,
-             install: true)
diff --git a/c_glib/doc/parquet-glib/parquet-glib-docs.xml b/c_glib/doc/parquet-glib/parquet-glib-docs.xml
deleted file mode 100644
index 05cfb74b64b4f..0000000000000
--- a/c_glib/doc/parquet-glib/parquet-glib-docs.xml
+++ /dev/null
@@ -1,98 +0,0 @@
-<?xml version="1.0"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN"
-               "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"
-[
-  <!ENTITY % local.common.attrib "xmlns:xi  CDATA  #FIXED 'http://www.w3.org/2003/XInclude'">
-  <!ENTITY % gtkdocentities SYSTEM "entities.xml">
-  %gtkdocentities;
-]>
-<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude">
-  <bookinfo>
-    <title>&package_name; Reference Manual</title>
-    <releaseinfo>
-      for &package_string;.
-      <!--
-      The latest version of this documentation can be found on-line at
-      <ulink role="online-location" url="http://[SERVER]/&package_name;/">http://[SERVER]/&package_name;/</ulink>.
-      -->
-    </releaseinfo>
-  </bookinfo>
-
-  <part id="arrow">
-    <title>Arrow</title>
-    <chapter id="io">
-      <title>IO</title>
-      <xi:include href="xml/arrow-file-reader.xml"/>
-      <xi:include href="xml/arrow-file-writer.xml"/>
-    </chapter>
-  </part>
-
-  <part id="data">
-    <title>Data</title>
-    <chapter id="meta">
-      <title>Meta</title>
-      <xi:include href="xml/metadata.xml"/>
-      <xi:include href="xml/statistics.xml"/>
-    </chapter>
-  </part>
-
-  <chapter id="object-tree">
-    <title>Object Hierarchy</title>
-    <xi:include href="xml/tree_index.sgml"/>
-  </chapter>
-  <index id="api-index-full">
-    <title>API Index</title>
-    <xi:include href="xml/api-index-full.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="deprecated-api-index" role="deprecated">
-    <title>Index of deprecated API</title>
-    <xi:include href="xml/api-index-deprecated.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-8-0-0" role="8.0.0">
-    <title>Index of new symbols in 8.0.0</title>
-    <xi:include href="xml/api-index-8.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-6-0-0" role="6.0.0">
-    <title>Index of new symbols in 6.0.0</title>
-    <xi:include href="xml/api-index-6.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-1-0-0" role="1.0.0">
-    <title>Index of new symbols in 1.0.0</title>
-    <xi:include href="xml/api-index-1.0.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-17-0" role="0.17.0">
-    <title>Index of new symbols in 0.17.0</title>
-    <xi:include href="xml/api-index-0.17.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-15-0" role="0.15.0">
-    <title>Index of new symbols in 0.15.0</title>
-    <xi:include href="xml/api-index-0.15.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-12-0" role="0.12.0">
-    <title>Index of new symbols in 0.12.0</title>
-    <xi:include href="xml/api-index-0.12.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <index id="api-index-0-11-0" role="0.11.0">
-    <title>Index of new symbols in 0.11.0</title>
-    <xi:include href="xml/api-index-0.11.0.xml"><xi:fallback /></xi:include>
-  </index>
-  <xi:include href="xml/annotation-glossary.xml"><xi:fallback /></xi:include>
-</book>
diff --git a/c_glib/doc/urlmap.js b/c_glib/doc/urlmap.js
new file mode 100644
index 0000000000000..4760a3ab04c05
--- /dev/null
+++ b/c_glib/doc/urlmap.js
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+baseURLs = [
+  ["Arrow", "https://arrow.apache.org/docs/c_glib/arrow-glib/"],
+  ["ArrowCUDA", "https://arrow.apache.org/docs/c_glib/arrow-cuda-glib/"],
+  ["ArrowDataset", "https://arrow.apache.org/docs/c_glib/arrow-dataset-glib/"],
+  ["ArrowFlight", "https://arrow.apache.org/docs/c_glib/arrow-flight-glib/"],
+  ["ArrowFlightSQL", "https://arrow.apache.org/docs/c_glib/arrow-flight-sql-glib/"],
+  ["GIO", "https://docs.gtk.org/gio/"],
+  ["GLib", "https://docs.gtk.org/glib/"],
+  ["GObject", "https://docs.gtk.org/gobject/"],
+  ["Gandiva", "https://arrow.apache.org/docs/c_glib/gandiva-glib/"],
+  ["Parquet", "https://arrow.apache.org/docs/c_glib/parquet-glib/"],
+]
diff --git a/c_glib/example/vala/meson.build b/c_glib/example/vala/meson.build
index 42e40f692a25a..474f0b1e9a51a 100644
--- a/c_glib/example/vala/meson.build
+++ b/c_glib/example/vala/meson.build
@@ -20,8 +20,8 @@
 if generate_vapi
   vala_example_executable_kwargs = {
     'c_args': [
-      '-I' + meson.build_root(),
-      '-I' + meson.source_root(),
+      '-I' + project_build_root,
+      '-I' + project_source_root,
     ],
     'dependencies': [
       arrow_glib_vapi,
diff --git a/c_glib/gandiva-glib/meson.build b/c_glib/gandiva-glib/meson.build
index 52729f64da007..d5cab109dcf89 100644
--- a/c_glib/gandiva-glib/meson.build
+++ b/c_glib/gandiva-glib/meson.build
@@ -111,15 +111,13 @@ if have_gi
                          '--warn-all',
                          '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
                        ],
-                       fatal_warnings: gi_fatal_warnings,
                        header: 'gandiva-glib/gandiva-glib.h',
                        identifier_prefix: 'GGandiva',
                        includes: [
                          'Arrow-1.0'
                        ],
-                       install: true,
+                       kwargs: generate_gi_common_args,
                        namespace: 'Gandiva',
-                       nsversion: api_version,
                        sources: sources + c_headers + enums,
                        symbol_prefix: 'ggandiva')
 
diff --git a/c_glib/meson.build b/c_glib/meson.build
index ffd41d4d574a7..04d0129855b20 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -18,11 +18,24 @@
 # under the License.
 
 project('arrow-glib', 'c', 'cpp',
-        license: 'Apache-2.0',
         default_options: [
           'c_std=c99',
           'cpp_std=c++17',
-        ])
+        ],
+        license: 'Apache-2.0',
+        # Debian:
+        #   https://packages.debian.org/search?keywords=meson
+        #
+        #   * bullseye: 0.56.2
+        #   * bullseye-backports:1.0.0
+        #   * bookworm: 1.0.0
+        #
+        # Ubuntu:
+        #   https://packages.ubuntu.com/search?keywords=meson
+        #
+        #   * 20.04: 0.53.2
+        #   * 22.04: 0.61.2
+        meson_version: '>=0.53.2')
 
 version = '16.0.0-SNAPSHOT'
 if version.endswith('-SNAPSHOT')
@@ -41,9 +54,18 @@ so_version = version_major * 100 + version_minor
 so_version_patch = version_micro
 library_version = '@0@.@1@.@2@'.format(so_version, so_version_patch, 0)
 
+if meson.version().version_compare('>=0.56.0')
+  project_build_root = meson.project_build_root()
+  project_source_root = meson.project_source_root()
+else
+  project_build_root = meson.build_root()
+  project_source_root = meson.source_root()
+endif
+
 prefix = get_option('prefix')
 include_dir = join_paths(prefix, get_option('includedir'))
 data_dir = join_paths(prefix, get_option('datadir'))
+doc_dir = join_paths(data_dir, 'doc')
 gir_dir = join_paths(data_dir, 'gir-1.0')
 vapi_dir = join_paths(data_dir, 'vala', 'vapi')
 
@@ -55,7 +77,13 @@ base_include_directories = [
   include_directories('.')
 ]
 
-gi_fatal_warnings = (build_machine.system() != 'windows')
+generate_gi_common_args = {
+  'install': true,
+  'nsversion': api_version,
+}
+if get_option('werror') and meson.version().version_compare('>=0.55.0')
+  generate_gi_common_args += {'fatal_warnings': true}
+endif
 have_gi = dependency('gobject-introspection-1.0', required: false).found()
 if have_gi
   pkgconfig_variables += ['girdir=@0@'.format(gir_dir)]
@@ -71,7 +99,7 @@ arrow_cpp_build_type = get_option('arrow_cpp_build_type')
 if arrow_cpp_build_dir == ''
   arrow_cpp_build_lib_dir = ''
 else
-  arrow_cpp_build_lib_dir = join_paths(meson.source_root(),
+  arrow_cpp_build_lib_dir = join_paths(project_source_root,
                                        arrow_cpp_build_dir,
                                        arrow_cpp_build_type.to_lower())
 endif
@@ -166,36 +194,23 @@ if parquet.found()
 endif
 subdir('example')
 
-if get_option('gtk_doc')
-  subdir('doc/arrow-glib')
-  if arrow_dataset.found()
-    subdir('doc/arrow-dataset-glib')
-  endif
-  if arrow_flight.found()
-    subdir('doc/arrow-flight-glib')
-  endif
-  if arrow_flight_sql.found()
-    subdir('doc/arrow-flight-sql-glib')
-  endif
-  if gandiva.found()
-    subdir('doc/gandiva-glib')
-  endif
-  if parquet.found()
-    subdir('doc/parquet-glib')
-  endif
+if get_option('doc')
+  subdir('doc')
 endif
 
 install_data('../LICENSE.txt',
              'README.md',
-             install_dir: join_paths(data_dir, 'doc', meson.project_name()))
+             install_dir: data_dir / meson.project_name())
 
 run_test = find_program('test/run-test.sh')
 test('unit test',
      run_test,
      env: [
-       'ARROW_GLIB_TYPELIB_DIR=@0@/arrow-glib'.format(meson.build_root()),
-       'ARROW_CUDA_GLIB_TYPELIB_DIR=@0@/arrow-cuda-glib'.format(meson.build_root()),
-       'ARROW_DATASET_GLIB_TYPELIB_DIR=@0@/arrow-dataset-glib'.format(meson.build_root()),
-       'GANDIVA_GLIB_TYPELIB_DIR=@0@/gandiva-glib'.format(meson.build_root()),
-       'PARQUET_GLIB_TYPELIB_DIR=@0@/parquet-glib'.format(meson.build_root()),
+       'ARROW_CUDA_GLIB_TYPELIB_DIR=@0@/arrow-cuda-glib'.format(project_build_root),
+       'ARROW_DATASET_GLIB_TYPELIB_DIR=@0@/arrow-dataset-glib'.format(project_build_root),
+       'ARROW_FLIGHT_GLIB_TYPELIB_DIR=@0@/arrow-flight-glib'.format(project_build_root),
+       'ARROW_FLIGHT_SQL_GLIB_TYPELIB_DIR=@0@/arrow-flight-sql-glib'.format(project_build_root),
+       'ARROW_GLIB_TYPELIB_DIR=@0@/arrow-glib'.format(project_build_root),
+       'GANDIVA_GLIB_TYPELIB_DIR=@0@/gandiva-glib'.format(project_build_root),
+       'PARQUET_GLIB_TYPELIB_DIR=@0@/parquet-glib'.format(project_build_root),
      ])
diff --git a/c_glib/meson_options.txt b/c_glib/meson_options.txt
index 6631fccf2d73e..e2aa74872d9e7 100644
--- a/c_glib/meson_options.txt
+++ b/c_glib/meson_options.txt
@@ -27,10 +27,22 @@ option('arrow_cpp_build_type',
        value: 'release',
        description: '-DCMAKE_BUILD_TYPE option value for Arrow C++')
 
+option('doc',
+       type: 'boolean',
+       value: false,
+       description: 'Build document')
+
 option('gtk_doc',
        type: 'boolean',
        value: false,
-       description: 'Build document by GTK-Doc')
+       # This requires Meson 0.63.0 or later
+       # deprecated: 'doc',
+       description: 'Build document')
+
+option('source_reference',
+       type: 'string',
+       value: 'main',
+       description: 'Source reference (revision/branch/tag/...) to refer source URL in documents generated by GI-DocGen')
 
 option('vapi',
        type: 'boolean',
diff --git a/c_glib/parquet-glib/meson.build b/c_glib/parquet-glib/meson.build
index 08288484bf9b4..67de0bf2d91fb 100644
--- a/c_glib/parquet-glib/meson.build
+++ b/c_glib/parquet-glib/meson.build
@@ -80,15 +80,13 @@ if have_gi
                          '--warn-all',
                          '--include-uninstalled=./arrow-glib/Arrow-1.0.gir',
                        ],
-                       fatal_warnings: gi_fatal_warnings,
                        header: 'parquet-glib/parquet-glib.h',
                        identifier_prefix: 'GParquet',
                        includes: [
                          'Arrow-1.0',
                        ],
-                       install: true,
+                       kwargs: generate_gi_common_args,
                        namespace: 'Parquet',
-                       nsversion: api_version,
                        sources: sources + c_headers,
                        symbol_prefix: 'gparquet')
 
diff --git a/ci/docker/linux-apt-c-glib.dockerfile b/ci/docker/linux-apt-c-glib.dockerfile
index dd2e2ac438220..b89301bedafe4 100644
--- a/ci/docker/linux-apt-c-glib.dockerfile
+++ b/ci/docker/linux-apt-c-glib.dockerfile
@@ -20,7 +20,7 @@ FROM ${base}
 
 RUN apt-get update -y -q && \
     apt-get install -y -q \
-        gtk-doc-tools \
+        gi-docgen \
         libgirepository1.0-dev \
         libglib2.0-doc \
         lsb-release \
diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index 3d102796b8c00..93412ca81cdd5 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -35,8 +35,8 @@ RUN apt-get update -y && \
         automake \
         curl \
         doxygen \
+        gi-docgen \
         gobject-introspection \
-        gtk-doc-tools \
         libcurl4-openssl-dev \
         libfontconfig1-dev \
         libfribidi-dev \
diff --git a/ci/scripts/c_glib_build.sh b/ci/scripts/c_glib_build.sh
index 6e3e33afb150a..c4d2c4fdb5617 100755
--- a/ci/scripts/c_glib_build.sh
+++ b/ci/scripts/c_glib_build.sh
@@ -26,7 +26,7 @@ build_root=${2}
 : ${ARROW_GLIB_WERROR:=false}
 : ${ARROW_GLIB_VAPI:=true}
 : ${BUILD_DOCS_C_GLIB:=OFF}
-with_gtk_doc=$([ "${BUILD_DOCS_C_GLIB}" == "ON" ] && echo "true" || echo "false")
+with_doc=$([ "${BUILD_DOCS_C_GLIB}" == "ON" ] && echo "true" || echo "false")
 
 export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
 
@@ -39,7 +39,7 @@ mkdir -p ${build_dir}
 meson setup \
       --prefix=$ARROW_HOME \
       --libdir=lib \
-      -Dgtk_doc=${with_gtk_doc} \
+      -Ddoc=${with_doc} \
       -Dvapi=${ARROW_GLIB_VAPI} \
       -Dwerror=${ARROW_GLIB_WERROR} \
       ${build_dir} \
@@ -52,5 +52,5 @@ popd
 
 if [ "${BUILD_DOCS_C_GLIB}" == "ON" ]; then
   mkdir -p ${build_root}/docs/c_glib
-  rsync -a ${ARROW_HOME}/share/gtk-doc/html/ ${build_root}/docs/c_glib
+  cp -a ${ARROW_HOME}/share/doc/*-glib/ ${build_root}/docs/c_glib/
 fi
diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh
index 8bd36328ec607..155522342eb9f 100755
--- a/ci/scripts/msys2_setup.sh
+++ b/ci/scripts/msys2_setup.sh
@@ -68,7 +68,6 @@ esac
 case "${target}" in
   c_glib|ruby)
     packages+=(${MINGW_PACKAGE_PREFIX}-gobject-introspection)
-    packages+=(${MINGW_PACKAGE_PREFIX}-gtk-doc)
     packages+=(${MINGW_PACKAGE_PREFIX}-meson)
     packages+=(${MINGW_PACKAGE_PREFIX}-vala)
     ;;
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
index 762bc909e1cc9..f7aa57848bd36 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
@@ -42,8 +42,8 @@ RUN \
     cmake \
     debhelper \
     devscripts \
+    gi-docgen \
     git \
-    gtk-doc-tools \
     libboost-filesystem-dev \
     libboost-system-dev \
     libbrotli-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
index 86cea77c14527..2edcd4d5ed216 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile
@@ -22,6 +22,10 @@ RUN \
   echo "debconf debconf/frontend select Noninteractive" | \
     debconf-set-selections
 
+RUN \
+  echo "deb http://deb.debian.org/debian bullseye-backports main" > \
+    /etc/apt/sources.list.d/backports.list
+
 RUN \
   echo 'APT::Install-Recommends "false";' > \
     /etc/apt/apt.conf.d/disable-install-recommends
@@ -39,7 +43,6 @@ RUN \
     debhelper \
     devscripts \
     git \
-    gtk-doc-tools \
     libboost-filesystem-dev \
     libboost-system-dev \
     libbrotli-dev \
@@ -63,7 +66,6 @@ RUN \
     libzstd-dev \
     llvm-dev \
     lsb-release \
-    meson \
     ninja-build \
     nlohmann-json3-dev \
     pkg-config \
@@ -77,5 +79,9 @@ RUN \
   if apt list | grep '^nvidia-cuda-toolkit/'; then \
     apt install -y -V ${quiet} nvidia-cuda-toolkit; \
   fi && \
+  apt install -y -V -t bullseye-backports ${quiet} \
+    meson && \
+  pip3 install gi-docgen && \
+  ln -fs /usr/local/bin/gi-docgen /usr/bin && \
   apt clean && \
   rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile
index 70289e9af7ea0..8a6accbfc8b16 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile
@@ -43,8 +43,8 @@ RUN \
     cmake \
     debhelper \
     devscripts \
+    gi-docgen \
     git \
-    gtk-doc-tools \
     libboost-filesystem-dev \
     libboost-system-dev \
     libbrotli-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
index 63bafe11fe8e8..fdd0362680c5a 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
@@ -37,7 +37,6 @@ RUN \
     debhelper \
     devscripts \
     git \
-    gtk-doc-tools \
     libboost-filesystem-dev \
     libboost-system-dev \
     libbrotli-dev \
@@ -75,5 +74,7 @@ RUN \
   if apt list | grep '^nvidia-cuda-toolkit/'; then \
     apt install -y -V ${quiet} nvidia-cuda-toolkit; \
   fi && \
+  pip3 install gi-docgen && \
+  ln -fs /usr/local/bin/gi-docgen /usr/bin && \
   apt clean && \
   rm -rf /var/lib/apt/lists/*
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
index bbae48b0f2b3b..ad3db51252f87 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
@@ -36,8 +36,8 @@ RUN \
     cmake \
     debhelper \
     devscripts \
+    gi-docgen \
     git \
-    gtk-doc-tools \
     libboost-filesystem-dev \
     libboost-system-dev \
     libbrotli-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
index 33f2d9a35371b..386be00c37ed7 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
@@ -37,8 +37,8 @@ RUN \
     cmake \
     debhelper \
     devscripts \
+    gi-docgen \
     git \
-    gtk-doc-tools \
     libboost-filesystem-dev \
     libboost-system-dev \
     libbrotli-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index 5b8de89dcd67e..5258fa97f4b74 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -5,9 +5,10 @@ Maintainer: Apache Arrow Developers <dev@arrow.apache.org>
 Build-Depends:
   cmake,
   debhelper (>= 12),
+# TODO: Enable this after we drop support for Ubuntu 20.04.
+#  gi-docgen,
   git,
   gobject-introspection,
-  gtk-doc-tools,
   libboost-filesystem-dev,
   libboost-system-dev,
   libbrotli-dev,
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base
index 5ec8156b05d6c..9ce1079019553 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.doc-base
@@ -5,5 +5,5 @@ Abstract: Apache Arrow Dataset GLib provides an API to read and write semantic d
 Section: Programming
 
 Format: HTML
-Index: /usr/share/gtk-doc/html/arrow-dataset-glib/index.html
-Files: /usr/share/gtk-doc/html/arrow-dataset-glib/*.html
+Index: /usr/share/doc/libarrow-dataset-glib-doc/arrow-dataset-glib/index.html
+Files: /usr/share/doc/libarrow-dataset-glib-doc/arrow-dataset-glib/*.html
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install
index 523bc206e1e03..4d8e3e11942d9 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.install
@@ -1 +1 @@
-usr/share/gtk-doc/html/arrow-dataset-glib
+usr/share/doc/arrow-dataset-glib usr/share/doc/libarrow-dataset-glib-doc/
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links
index 3d880362b5e9f..a7dd825ecac69 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib-doc.links
@@ -1,3 +1,5 @@
-usr/share/gtk-doc/html/arrow-dataset-glib usr/share/doc/libarrow-dataset-glib-doc/arrow-dataset-glib 
+usr/share/doc/libarrow-dataset-glib-doc/arrow-dataset-glib usr/share/devhelp/books/arrow-dataset-glib
+usr/share/doc/libarrow-glib-doc/arrow-glib usr/share/doc/libarrow-dataset-glib-doc/arrow-glib
+usr/share/doc/libglib2.0-doc/gio usr/share/doc/libarrow-dataset-glib-doc/gio
 usr/share/doc/libglib2.0-doc/glib usr/share/doc/libarrow-dataset-glib-doc/glib
 usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libarrow-dataset-glib-doc/gobject
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
index 94b17c11b9d04..f2fd6f6bfef81 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.doc-base
@@ -5,5 +5,5 @@ Abstract: Apache Arrow Flight GLib provides a general-purpose client-server fram
 Section: Programming
 
 Format: HTML
-Index: /usr/share/gtk-doc/html/arrow-flight-glib/index.html
-Files: /usr/share/gtk-doc/html/arrow-flight-glib/*.html
+Index: /usr/share/doc/libarrow-flight-glib-doc/arrow-flight-glib/index.html
+Files: /usr/share/doc/libarrow-flight-glib-doc/arrow-flight-glib/*.html
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
index 3c95f17ed771a..432b0f0b5d296 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.install
@@ -1 +1 @@
-usr/share/gtk-doc/html/arrow-flight-glib
+usr/share/doc/arrow-flight-glib usr/share/doc/libarrow-flight-glib-doc/
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
index d55c89a1b0809..5f9a15fc03db0 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib-doc.links
@@ -1,3 +1,5 @@
-usr/share/gtk-doc/html/arrow-flight-glib usr/share/doc/libarrow-flight-glib-doc/arrow-flight-glib 
+usr/share/doc/libarrow-flight-glib-doc/arrow-flight-glib usr/share/devhelp/books/arrow-flight-glib
+usr/share/doc/libarrow-glib-doc/arrow-glib usr/share/doc/libarrow-flight-glib-doc/arrow-glib
+usr/share/doc/libglib2.0-doc/gio usr/share/doc/libarrow-flight-glib-doc/gio
 usr/share/doc/libglib2.0-doc/glib usr/share/doc/libarrow-flight-glib-doc/glib
 usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libarrow-flight-glib-doc/gobject
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.doc-base b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.doc-base
index 5569fa83ed5c5..60a8f18ddcfbd 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.doc-base
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.doc-base
@@ -5,5 +5,5 @@ Abstract: Apache Arrow Flight SQL GLib provides a client-server framework to int
 Section: Programming
 
 Format: HTML
-Index: /usr/share/gtk-doc/html/arrow-flight-sql-glib/index.html
-Files: /usr/share/gtk-doc/html/arrow-flight-sql-glib/*.html
+Index: /usr/share/doc/libarrow-flight-sql-glib-doc/arrow-flight-sql-glib/index.html
+Files: /usr/share/doc/libarrow-flight-sql-glib-doc/arrow-flight-sql-glib/*.html
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.install
index c069c0c67d80d..e1187d1a2c611 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.install
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.install
@@ -1 +1 @@
-usr/share/gtk-doc/html/arrow-flight-sql-glib
+usr/share/doc/arrow-flight-sql-glib usr/share/doc/libarrow-flight-sql-glib-doc/
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.links b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.links
index 96c26e3664d80..1555a20a71002 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.links
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib-doc.links
@@ -1,5 +1,6 @@
+usr/share/doc/libarrow-flight-glib-doc/arrow-glib usr/share/doc/libarrow-flight-sql-glib-doc/arrow-flight-glib
+usr/share/doc/libarrow-flight-sql-glib-doc/arrow-flight-sql-glib usr/share/devhelp/books/arrow-flight-sql-glib
+usr/share/doc/libarrow-glib-doc/arrow-glib usr/share/doc/libarrow-flight-sql-glib-doc/arrow-glib
+usr/share/doc/libglib2.0-doc/gio usr/share/doc/libarrow-flight-sql-glib-doc/gio
 usr/share/doc/libglib2.0-doc/glib usr/share/doc/libarrow-flight-sql-glib-doc/glib
 usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libarrow-flight-sql-glib-doc/gobject
-usr/share/gtk-doc/html/arrow-flight-glib usr/share/doc/libarrow-flight-glib-doc/arrow-flight-glib 
-usr/share/gtk-doc/html/arrow-flight-sql-glib usr/share/doc/libarrow-flight-glib-doc/arrow-flight-sql-glib 
-usr/share/gtk-doc/html/arrow-glib usr/share/doc/libarrow-flight-glib-doc/arrow-glib 
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base
index 8ae4ffb6ddaa5..4f0c5acc22240 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base
@@ -5,5 +5,5 @@ Abstract: Apache Arrow GLib is a data processing library for analysis that uses
 Section: Programming
 
 Format: HTML
-Index: /usr/share/gtk-doc/html/arrow-glib/index.html
-Files: /usr/share/gtk-doc/html/arrow-glib/*.html
+Index: /usr/share/doc/libarrow-glib-doc/arrow-glib/index.html
+Files: /usr/share/doc/libarrow-glib-doc/arrow-glib/*.html
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install
index 912a29c585084..f6d4e3cd9d82b 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install
@@ -1,2 +1,3 @@
-usr/share/doc/arrow-glib/
-usr/share/gtk-doc/html/arrow-glib
+usr/share/arrow-glib/*.md
+usr/share/arrow-glib/*.txt
+usr/share/doc/arrow-glib usr/share/doc/libarrow-glib-doc/
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links
index 556987d0a8065..10689c01e0abc 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links
@@ -1,3 +1,4 @@
-usr/share/gtk-doc/html/arrow-glib usr/share/doc/libarrow-glib-doc/arrow-glib
+usr/share/doc/libarrow-glib-doc/arrow-glib usr/share/devhelp/books/arrow-glib
+usr/share/doc/libglib2.0-doc/gio usr/share/doc/libarrow-glib-doc/gio
 usr/share/doc/libglib2.0-doc/glib usr/share/doc/libarrow-glib-doc/glib
 usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libarrow-glib-doc/gobject
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base
index 2bf913062fb8c..24b1cb83b04d9 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.doc-base
@@ -5,5 +5,5 @@ Abstract: Gandiva GLib is a toolset for compiling and evaluating expressions on
 Section: Programming
 
 Format: HTML
-Index: /usr/share/gtk-doc/html/gandiva-glib/index.html
-Files: /usr/share/gtk-doc/html/gandiva-glib/*.html
+Index: /usr/share/doc/libgandiva-glib-doc/gandiva-glib/index.html
+Files: /usr/share/doc/libgandiva-glib-doc/gandiva-glib/*.html
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install
index 358e4e5c768be..d96dd268c6715 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.install
@@ -1 +1 @@
-usr/share/gtk-doc/html/gandiva-glib
+usr/share/doc/gandiva-glib usr/share/doc/libgandiva-glib-doc/
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links
index 234794e232efb..2abd41c0bf29a 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-doc.links
@@ -1,3 +1,4 @@
-usr/share/gtk-doc/html/gandiva-glib usr/share/doc/libgandiva-glib-doc/gandiva-glib
+usr/share/doc/libgandiva-glib-doc/gandiva-glib usr/share/devhelp/books/gandiva-glib
+usr/share/doc/libarrow-glib-doc/arrow-glib usr/share/doc/libgandiva-glib-doc/arrow-glib
 usr/share/doc/libglib2.0-doc/glib usr/share/doc/libgandiva-glib-doc/glib
 usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libgandiva-glib-doc/gobject
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base
index cc68e2df6c11f..199d9501871d3 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.doc-base
@@ -5,5 +5,5 @@ Abstract: Apache Parquet GLib is a columnar storage format processing library th
 Section: Programming
 
 Format: HTML
-Index: /usr/share/gtk-doc/html/parquet-glib/index.html
-Files: /usr/share/gtk-doc/html/parquet-glib/*.html
+Index: /usr/share/doc/libparquet-glib-doc/parquet-glib/index.html
+Files: /usr/share/doc/libparquet-glib-doc/parquet-glib/*.html
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install
index 5843ea3dab8b3..3b4530cca1f53 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.install
@@ -1 +1 @@
-usr/share/gtk-doc/html/parquet-glib
+usr/share/doc/parquet-glib usr/share/doc/libparquet-glib-doc/
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links
index c31f346b174d6..e4d32da3b34fc 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links
+++ b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib-doc.links
@@ -1,3 +1,5 @@
-usr/share/gtk-doc/html/parquet-glib usr/share/doc/libparquet-glib-doc/parquet-glib 
+usr/share/doc/libparquet-glib-doc/parquet-glib usr/share/devhelp/books/parquet-glib
+usr/share/doc/libarrow-glib-doc/arrow-glib usr/share/doc/libparquet-glib-doc/arrow-glib
+usr/share/doc/libglib2.0-doc/gio usr/share/doc/libparquet-glib-doc/gio
 usr/share/doc/libglib2.0-doc/glib usr/share/doc/libparquet-glib-doc/glib
 usr/share/doc/libglib2.0-doc/gobject usr/share/doc/libparquet-glib-doc/gobject
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules
index c5086950511b0..36c43cf0d5968 100755
--- a/dev/tasks/linux-packages/apache-arrow/debian/rules
+++ b/dev/tasks/linux-packages/apache-arrow/debian/rules
@@ -64,7 +64,7 @@ override_dh_auto_build:
 	  --						\
 	  -Darrow_cpp_build_type=$(BUILD_TYPE)		\
 	  -Darrow_cpp_build_dir=../cpp_build		\
-	  -Dgtk_doc=true				\
+	  -Ddoc=true					\
 	  -Dvapi=true
 	env							\
 	  LD_LIBRARY_PATH=$(CURDIR)/cpp_build/$(BUILD_TYPE)	\
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
index 5a5fd903bfc36..d846915ab21de 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile
@@ -39,7 +39,6 @@ RUN \
     git \
     glog-devel \
     gobject-introspection-devel \
-    gtk-doc \
     json-devel \
     libarchive \
     libzstd-devel \
@@ -61,5 +60,6 @@ RUN \
     thrift-devel \
     # utf8proc-devel \
     vala \
+    which \
     zlib-devel && \
   dnf clean ${quiet} all
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile
index f3ae6295dfb47..222ab1b58d34d 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile
@@ -41,7 +41,6 @@ RUN \
     gflags-devel \
     git \
     gobject-introspection-devel \
-    gtk-doc \
     json-devel \
     libarchive \
     libzstd-devel \
@@ -63,5 +62,6 @@ RUN \
     thrift-devel \
     utf8proc-devel \
     vala \
+    which \
     zlib-devel && \
   dnf clean ${quiet} all
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2023/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2023/Dockerfile
index cc2c55545a07d..7f0f3e90a363a 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2023/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/amazon-linux-2023/Dockerfile
@@ -38,7 +38,6 @@ RUN \
     gobject-introspection-devel \
     grpc-devel \
     grpc-plugins \
-    gtk-doc \
     libzstd-devel \
     llvm-devel \
     lz4-devel \
@@ -49,6 +48,7 @@ RUN \
     pkg-config \
     protobuf-compiler \
     protobuf-devel \
+    python3-pip \
     rapidjson-devel \
     re2-devel \
     rpmdevtools \
@@ -56,5 +56,6 @@ RUN \
     tar \
     utf8proc-devel \
     vala \
+    which \
     zlib-devel && \
   dnf clean ${quiet} all
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index bcdc3ed7d8a7f..4d78fbf3e81c7 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -82,6 +82,8 @@
 %define have_thrift (%{_rhel} >= 8)
 %define have_utf8proc (%{_rhel} >= 9 || %{_amzn} >= 2023)
 
+%define enable_glib_doc (%{_rhel} >= 9 || %{is_amazon_linux})
+
 Name:		@PACKAGE@
 Version:	@VERSION@
 Release:	@RELEASE@%{?dist}
@@ -142,7 +144,6 @@ BuildRequires:	ncurses-devel
 %endif
 
 BuildRequires:	gobject-introspection-devel
-BuildRequires:	gtk-doc
 %if %{use_vala}
 BuildRequires:	vala
 %endif
@@ -199,22 +200,25 @@ cd c_glib
 %if %{_amzn} >= 2023
   # Do nothing
 %else
-  %if (%{_rhel} >= 8 || "%{_arch}" != "aarch64")
-    pip3 install meson
+  %if %{is_centos_7}
+    # Meson 0.62.0 or later requires Python 3.7 or later.
+    pip3 install 'meson<0.62.0'
   %else
-    # Meson 0.57.0 or later requires Ninja 1.8.2 or later but EPEL for
-    # Amazon Linux 2 aarch64 provides Ninja 1.7.2. We can remove
-    # '<0.57.0' once we drop support for Amazon Linux 2.
-    pip3 install 'meson<0.57.0'
+    pip3 install meson
   %endif
 %endif
+%if %{enable_glib_doc}
+  pip3 install gi-docgen
+%endif
 meson setup build \
   --default-library=both \
   --libdir=%{_libdir} \
   --prefix=%{_prefix} \
   -Darrow_cpp_build_dir=../cpp/%{arrow_cmake_builddir} \
   -Darrow_cpp_build_type=$cpp_build_type \
-  -Dgtk_doc=true \
+%if %{enable_glib_doc}
+  -Ddoc=true \
+%endif
 %if %{use_vala}
   -Dvapi=true
 %endif
@@ -596,8 +600,11 @@ Documentation for Apache Arrow GLib.
 %defattr(-,root,root,-)
 %doc README.md
 %license LICENSE.txt NOTICE.txt
+%{_datadir}/arrow-glib/*.txt
+%{_datadir}/arrow-glib/*.md
+%if %{enable_glib_doc}
 %{_docdir}/arrow-glib/
-%{_datadir}/gtk-doc/html/arrow-glib/
+%endif
 
 %package -n %{name}%{major_version}-dataset-glib-libs
 Summary:	Runtime libraries for Apache Arrow Dataset GLib
@@ -649,7 +656,9 @@ Documentation for Apache Arrow dataset GLib.
 %defattr(-,root,root,-)
 %doc README.md
 %license LICENSE.txt NOTICE.txt
-%{_datadir}/gtk-doc/html/arrow-dataset-glib/
+%if %{enable_glib_doc}
+%{_docdir}/arrow-dataset-glib/
+%endif
 
 %if %{use_flight}
 %package -n %{name}%{major_version}-flight-glib-libs
@@ -702,7 +711,9 @@ Documentation for Apache Arrow Flight GLib.
 %defattr(-,root,root,-)
 %doc README.md
 %license LICENSE.txt NOTICE.txt
-%{_datadir}/gtk-doc/html/arrow-flight-glib/
+%if %{enable_glib_doc}
+%{_docdir}/arrow-flight-glib/
+%endif
 
 %package -n %{name}%{major_version}-flight-sql-glib-libs
 Summary:	Runtime libraries for Apache Arrow Flight SQL GLib
@@ -754,7 +765,9 @@ Documentation for Apache Arrow Flight SQL GLib.
 %defattr(-,root,root,-)
 %doc README.md
 %license LICENSE.txt NOTICE.txt
-%{_datadir}/gtk-doc/html/arrow-flight-sql-glib/
+  %if %{enable_glib_doc}
+%{_docdir}/arrow-flight-sql-glib/
+  %endif
 %endif
 
 %if %{use_gandiva}
@@ -808,7 +821,9 @@ Documentation for Gandiva GLib.
 %defattr(-,root,root,-)
 %doc README.md
 %license LICENSE.txt NOTICE.txt
-%{_datadir}/gtk-doc/html/gandiva-glib/
+  %if %{enable_glib_doc}
+%{_docdir}/gandiva-glib/
+  %endif
 %endif
 
 %package -n parquet%{major_version}-glib-libs
@@ -861,7 +876,9 @@ Documentation for Apache Parquet GLib.
 %defattr(-,root,root,-)
 %doc README.md
 %license LICENSE.txt NOTICE.txt
-%{_datadir}/gtk-doc/html/parquet-glib/
+%if %{enable_glib_doc}
+%{_docdir}/parquet-glib/
+%endif
 
 %changelog
 * Fri Feb 23 2024 Raúl Cumplido <raulcumplido@gmail.com> - 15.0.1-1
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
index 1da8e0fb79621..3c52a5663f773 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile
@@ -44,7 +44,6 @@ RUN \
     git \
     glog-devel \
     gobject-introspection-devel \
-    gtk-doc \
     json-devel \
     libzstd-devel \
     lz4-devel \
@@ -58,6 +57,7 @@ RUN \
     tar \
     thrift-devel \
     vala \
+    which \
     zlib-devel && \
   yum clean ${quiet} all
 
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile
index 5a23c5e04d645..acc29d3e3776c 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile
@@ -39,7 +39,6 @@ RUN \
     git \
     glog-devel \
     gobject-introspection-devel \
-    gtk-doc \
     json-devel \
     libarchive \
     libzstd-devel \
@@ -60,5 +59,6 @@ RUN \
     tar \
     thrift-devel \
     vala \
+    which \
     zlib-devel && \
   dnf clean ${quiet} all
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile
index b1e1630103c34..9522d999af5b7 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile
@@ -38,7 +38,6 @@ RUN \
     gflags-devel \
     git \
     gobject-introspection-devel \
-    gtk-doc \
     json-devel \
     libarchive \
     libzstd-devel \
@@ -59,5 +58,6 @@ RUN \
     thrift-devel \
     utf8proc-devel \
     vala \
+    which \
     zlib-devel && \
   dnf clean ${quiet} all

From 8c7820a576ff5197a40ba529f838ae13f4ff4095 Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Sun, 10 Mar 2024 16:25:39 -0800
Subject: [PATCH 493/570] MINOR: [R][Docs] Fix typo in s3_bucket example code
 (#40447)

### Rationale for this change

The current code is invalid R code and will error so I'd like to fix that.

### Are these changes tested?

Yes, but manually.

Current code:

```r
>  Sys.setenv("ARROW_S3_LOG_LEVEL","DEBUG")
Error in Sys.setenv("ARROW_S3_LOG_LEVEL", "DEBUG") :
  all arguments must be named
```

New code:

```r
>  Sys.setenv("ARROW_S3_LOG_LEVEL"="DEBUG")
```

### Are there any user-facing changes?

Correct docs.

Authored-by: Bryce Mecum <petridish@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 r/R/filesystem.R   | 2 +-
 r/man/s3_bucket.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/r/R/filesystem.R b/r/R/filesystem.R
index c6f92cba1932c..0e4484d1b583d 100644
--- a/r/R/filesystem.R
+++ b/r/R/filesystem.R
@@ -486,7 +486,7 @@ default_s3_options <- list(
 #' @examplesIf FALSE
 #' # Turn on debug logging. The following line of code should be run in a fresh
 #' # R session prior to any calls to `s3_bucket()` (or other S3 functions)
-#' Sys.setenv("ARROW_S3_LOG_LEVEL", "DEBUG")
+#' Sys.setenv("ARROW_S3_LOG_LEVEL"="DEBUG")
 #' bucket <- s3_bucket("voltrondata-labs-datasets")
 #'
 #' @export
diff --git a/r/man/s3_bucket.Rd b/r/man/s3_bucket.Rd
index 1b30a5cde1177..bffcfa5c387db 100644
--- a/r/man/s3_bucket.Rd
+++ b/r/man/s3_bucket.Rd
@@ -35,7 +35,7 @@ bucket <- s3_bucket("voltrondata-labs-datasets")
 \dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # Turn on debug logging. The following line of code should be run in a fresh
 # R session prior to any calls to `s3_bucket()` (or other S3 functions)
-Sys.setenv("ARROW_S3_LOG_LEVEL", "DEBUG")
+Sys.setenv("ARROW_S3_LOG_LEVEL"="DEBUG")
 bucket <- s3_bucket("voltrondata-labs-datasets")
 \dontshow{\}) # examplesIf}
 }

From f299db0215c3178c542e2ab608013d6b6f43d783 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 11 Mar 2024 09:59:29 +0900
Subject: [PATCH 494/570] GH-40445: [C++] Fix static build on Windows (#40446)

### Rationale for this change

```text
LINK : warning LNK4217: symbol 'uriEscapeExA' defined in 'arrow_static.lib(unity_0_c.c.obj)' is imported by 'arrow_static.lib(unity_5_cxx.cxx.obj)' in function '"class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> > __cdecl arrow::internal::UriEscape(class std::basic_string_view<char,struct std::char_traits<char> >)" (?UriEscape@ internal@ arrow@@ YA?AV?$basic_string@ DU?$char_traits@ D@ std@@ V?$allocator@ D@ 2@@ std@@ V?$basic_string_view@ DU?$char_traits@ D@ std@@@ 4@@ Z)'
LINK : warning LNK4217: symbol 'uriUnescapeInPlaceA' defined in 'arrow_static.lib(unity_0_c.c.obj)' is imported by 'arrow_static.lib(unity_5_cxx.cxx.obj)' in function '"class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> > __cdecl arrow::internal::UriUnescape(class std::basic_string_view<char,struct std::char_traits<char> >)" (?UriUnescape@ internal@ arrow@@ YA?AV?$basic_string@ DU?$char_traits@ D@ std@@ V?$allocator@ D@ 2@@ std@@ V?$basic_string_view@ DU?$char_traits@ D@ std@@@ 4@@ Z)'
LINK : warning LNK4217: symbol 'uriWindowsFilenameToUriStringA' defined in 'arrow_static.lib(unity_0_c.c.obj)' is imported by 'arrow_static.lib(unity_5_cxx.cxx.obj)' in function '"class arrow::Result<class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> > > __cdecl arrow::internal::UriFromAbsolutePath(class std::basic_string_view<char,struct std::char_traits<char> >)" (?UriFromAbsolutePath@ internal@ arrow@@ YA?AV?$Result@ V?$basic_string@ DU?$char_traits@ D@ std@@ V?$allocator@ D@ 2@@ std@@@ 2@ V?$basic_string_view@ DU?$char_traits@ D@ std@@@ std@@@ Z)'
arrow_static.lib(unity_5_cxx.cxx.obj) : error LNK2019: unresolved external symbol __imp_uriParseSingleUriExA referenced in function "public: class arrow::Status __cdecl arrow::internal::Uri::Parse(class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> > const &)" (?Parse@ Uri@ internal@ arrow@@ QEAA?AVStatus@ 3@ AEBV?$basic_string@ DU?$char_traits@ D@ std@@ V?$allocator@ D@ 2@@ std@@@ Z)
arrow_static.lib(unity_5_cxx.cxx.obj) : error LNK2019: unresolved external symbol __imp_uriFreeUriMembersA referenced in function "public: __cdecl arrow::internal::Uri::Impl::~Impl(void)" (??1Impl@ Uri@ internal@ arrow@@ QEAA@ XZ)
arrow_static.lib(unity_5_cxx.cxx.obj) : error LNK2019: unresolved external symbol __imp_uriDissectQueryMallocA referenced in function "public: class arrow::Result<class std::vector<struct std::pair<class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> >,class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> > >,class std::allocator<struct std::pair<class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> >,class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> > > > > > __cdecl arrow::internal::Uri::query_items(void)const " (?query_items@ Uri@ internal@ arrow@@ QEBA?AV?$Result@ V?$vector@ U?$pair@ V?$basic_string@ DU?$char_traits@ D@ std@@ V?$allocator@ D@ 2@@ std@@ V12@@ std@@ V?$allocator@ U?$pair@ V?$basic_string@ DU?$char_traits@ D@ std@@ V?$allocator@ D@ 2@@ std@@ V12@@ std@@@ 2@@ std@@@ 3@ XZ)
arrow_static.lib(unity_5_cxx.cxx.obj) : error LNK2019: unresolved external symbol __imp_uriFreeQueryListA referenced in function "public: class arrow::Result<class std::vector<struct std::pair<class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> >,class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> > >,class std::allocator<struct std::pair<class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> >,class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> > > > > > __cdecl arrow::internal::Uri::query_items(void)const " (?query_items@ Uri@ internal@ arrow@@ QEBA?AV?$Result@ V?$vector@ U?$pair@ V?$basic_string@ DU?$char_traits@ D@ std@@ V?$allocator@ D@ 2@@ std@@ V12@@ std@@ V?$allocator@ U?$pair@ V?$basic_string@ DU?$char_traits@ D@ std@@ V?$allocator@ D@ 2@@ std@@ V12@@ std@@@ 2@@ std@@@ 3@ XZ)
aws-c-io.lib(secure_channel_tls_handler.c.obj) : error LNK2019: unresolved external symbol __imp_CertFreeCertificateContext referenced in function s_ctx_new
aws-c-io.lib(windows_pki_utils.c.obj) : error LNK2001: unresolved external symbol __imp_CertFreeCertificateContext
aws-c-io.lib(secure_channel_tls_handler.c.obj) : error LNK2019: unresolved external symbol __imp_CertCreateCertificateChainEngine referenced in function s_manually_verify_peer_cert
aws-c-io.lib(secure_channel_tls_handler.c.obj) : error LNK2019: unresolved external symbol __imp_CertFreeCertificateChainEngine referenced in function s_manually_verify_peer_cert
aws-c-io.lib(secure_channel_tls_handler.c.obj) : error LNK2019: unresolved external symbol __imp_CertGetCertificateChain referenced in function s_manually_verify_peer_cert
aws-c-io.lib(secure_channel_tls_handler.c.obj) : error LNK2019: unresolved external symbol __imp_CertFreeCertificateChain referenced in function s_manually_verify_peer_cert
aws-c-io.lib(secure_channel_tls_handler.c.obj) : error LNK2019: unresolved external symbol __imp_CertVerifyCertificateChainPolicy referenced in function s_manually_verify_peer_cert
aws-c-io.lib(windows_pki_utils.c.obj) : error LNK2019: unresolved external symbol __imp_CryptDecodeObjectEx referenced in function aws_import_key_pair_to_cert_context
aws-c-io.lib(windows_pki_utils.c.obj) : error LNK2019: unresolved external symbol __imp_CertOpenStore referenced in function aws_import_key_pair_to_cert_context
aws-c-io.lib(windows_pki_utils.c.obj) : error LNK2019: unresolved external symbol __imp_CertCloseStore referenced in function aws_close_cert_store
aws-c-io.lib(windows_pki_utils.c.obj) : error LNK2019: unresolved external symbol __imp_CertFindCertificateInStore referenced in function aws_load_cert_from_system_cert_store
aws-c-io.lib(windows_pki_utils.c.obj) : error LNK2019: unresolved external symbol __imp_CertSetCertificateContextProperty referenced in function aws_import_key_pair_to_cert_context
aws-c-io.lib(windows_pki_utils.c.obj) : error LNK2019: unresolved external symbol __imp_CertAddCertificateContextToStore referenced in function aws_import_key_pair_to_cert_context
aws-c-io.lib(windows_pki_utils.c.obj) : error LNK2019: unresolved external symbol __imp_CryptQueryObject referenced in function aws_import_key_pair_to_cert_context
aws-c-io.lib(windows_pki_utils.c.obj) : error LNK2019: unresolved external symbol __imp_CryptStringToBinaryA referenced in function aws_load_cert_from_system_cert_store
```

### What changes are included in this PR?

* Add missing `URI_STATIC_BUILD` macro definition that was removed by #39824 accidentally.
* Add missing `crypt32.lib` dependency to `aws-c-io`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40445

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 3 +++
 cpp/src/arrow/CMakeLists.txt                | 7 ++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index b6b6ac18ea8cb..4de7741d340f9 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -5018,6 +5018,9 @@ macro(build_awssdk)
                           "ncrypt.lib"
                           "Secur32.lib"
                           "Shlwapi.lib")
+    set_property(TARGET AWS::aws-c-io
+                 APPEND
+                 PROPERTY INTERFACE_LINK_LIBRARIES "crypt32.lib")
   endif()
 
   # AWSSDK is static-only build
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index b53d76e74765f..c97645594e3d8 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -478,7 +478,7 @@ set_source_files_properties(vendored/datetime/tz.cpp
 arrow_add_object_library(ARROW_VENDORED ${ARROW_VENDORED_SRCS})
 # Disable DLL exports in vendored uriparser library
 foreach(ARROW_VENDORED_TARGET ${ARROW_VENDORED_TARGETS})
-  target_compile_definitions(${ARROW_VENDORED_TARGET} PUBLIC URI_STATIC_BUILD)
+  target_compile_definitions(${ARROW_VENDORED_TARGET} PRIVATE URI_STATIC_BUILD)
 endforeach()
 
 set(ARROW_UTIL_SRCS
@@ -559,6 +559,11 @@ endif()
 
 arrow_add_object_library(ARROW_UTIL ${ARROW_UTIL_SRCS})
 
+# Disable DLL exports in vendored uriparser library
+foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+  target_compile_definitions(${ARROW_UTIL_TARGET} PRIVATE URI_STATIC_BUILD)
+endforeach()
+
 if(ARROW_USE_BOOST)
   foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
     target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE Boost::headers)

From bf07ce266511c6a2445f4b0fcd970e9d2e694431 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 11 Mar 2024 10:48:27 +0900
Subject: [PATCH 495/570] GH-40443: [Python] Suppress
 python/examples/minimal_build/Dockerfile.* warnings (#40444)

### Rationale for this change

    python/examples/minimal_build/Dockerfile.fedora:20 DL3040 warning: `dnf clean all` missing after dnf command.
    python/examples/minimal_build/Dockerfile.fedora:20 DL3041 warning: Specify version with `dnf install -y <package>-<version>`.
    python/examples/minimal_build/Dockerfile.fedora:32 DL3042 warning: Avoid use of cache directory with pip. Use `pip install --no-cache-dir <package>`
    python/examples/minimal_build/Dockerfile.ubuntu:38 DL3042 warning: Avoid use of cache directory with pip. Use `pip install --no-cache-dir <package>`

### What changes are included in this PR?

* Ignore "DL3041 warning: Specify version with `dnf install -y <package>-<version>`." because we don't specify version.
* Suppress other warnings.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40443

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .hadolint.yaml                                  | 5 +++--
 .pre-commit-config.yaml                         | 2 +-
 python/examples/minimal_build/Dockerfile.fedora | 5 +++--
 python/examples/minimal_build/Dockerfile.ubuntu | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/.hadolint.yaml b/.hadolint.yaml
index a6d7d03f84f9e..6d326d7317ee3 100644
--- a/.hadolint.yaml
+++ b/.hadolint.yaml
@@ -18,7 +18,8 @@
 ignored:
   - DL3008
   - DL3013
+  - DL3015 # Avoid additional packages by specifying `--no-install-recommends`
   - DL3018
-  - DL3015  # Avoid additional packages by specifying `--no-install-recommends`
-  - DL3028  # Ruby gem version pinning
+  - DL3028 # Ruby gem version pinning
   - DL3007 # r-sanitizer must use latest
+  - DL3041 # Specify version with `dnf install -y <package>-<version>`.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d3c7624f63e71..917f9cc425ae8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -33,7 +33,7 @@ repos:
         name: Docker Format
         language: docker_image
         types:
-         - dockerfile
+          - dockerfile
         entry: --entrypoint /bin/hadolint hadolint/hadolint:latest -
         exclude: ^dev/.*$
   - repo: https://github.com/pycqa/flake8
diff --git a/python/examples/minimal_build/Dockerfile.fedora b/python/examples/minimal_build/Dockerfile.fedora
index e7b9600b67b0e..24a90337aa454 100644
--- a/python/examples/minimal_build/Dockerfile.fedora
+++ b/python/examples/minimal_build/Dockerfile.fedora
@@ -27,6 +27,7 @@ RUN dnf update -y && \
         make \
         cmake \
         ninja-build \
-        python3-devel
+        python3-devel && \
+    dnf clean all
 
-RUN pip3 install -U pip setuptools
+RUN pip3 install --no-cache-dir -U pip setuptools
diff --git a/python/examples/minimal_build/Dockerfile.ubuntu b/python/examples/minimal_build/Dockerfile.ubuntu
index e059c91017f16..ebea4b045e592 100644
--- a/python/examples/minimal_build/Dockerfile.ubuntu
+++ b/python/examples/minimal_build/Dockerfile.ubuntu
@@ -35,4 +35,4 @@ RUN apt-get update -y -q && \
       && \
       apt-get clean && rm -rf /var/lib/apt/lists*
 
-RUN pip3 install -U pip setuptools
+RUN pip3 install --no-cache-dir -U pip setuptools

From 6ebd555a1095e64c04ccf4f6a0762d85b0bd0376 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 11 Mar 2024 10:50:46 +0900
Subject: [PATCH 496/570] GH-40439: [Python] Fix flake8 failures in
 python/benchmarks/parquet.py (#40440)

### Rationale for this change

Failures:

    /arrow/python/benchmarks/parquet.py:18:1: F401 'shutil' imported but unused
    /arrow/python/benchmarks/parquet.py:19:1: F401 'tempfile' imported but unused
    /arrow/python/benchmarks/parquet.py:23:1: F401 'pandas as pd' imported but unused

### What changes are included in this PR?

* Remove unused imports.
* Add python/benchmarks/ to lint targets.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40439

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/archery/archery/utils/lint.py | 9 +++++----
 python/benchmarks/parquet.py      | 4 ----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/dev/archery/archery/utils/lint.py b/dev/archery/archery/utils/lint.py
index 3efe5994055db..15f22ca2e6e5c 100644
--- a/dev/archery/archery/utils/lint.py
+++ b/dev/archery/archery/utils/lint.py
@@ -187,11 +187,12 @@ def python_linter(src, fix=False):
         return
 
     # Gather files for autopep8
-    patterns = ["python/pyarrow/**/*.py",
+    patterns = ["python/benchmarks/**/*.py",
+                "python/examples/**/*.py",
+                "python/pyarrow/**/*.py",
                 "python/pyarrow/**/*.pyx",
                 "python/pyarrow/**/*.pxd",
                 "python/pyarrow/**/*.pxi",
-                "python/examples/**/*.py",
                 "dev/*.py",
                 "dev/archery/**/*.py",
                 "dev/release/**/*.py"]
@@ -232,8 +233,8 @@ def python_linter(src, fix=False):
     yield LintResult.from_cmd(
         flake8("--extend-exclude=" + ','.join(flake8_exclude),
                "--config=" + os.path.join(src.python, "setup.cfg"),
-               setup_py, src.pyarrow, os.path.join(src.python, "examples"),
-               src.dev, check=False))
+               setup_py, src.pyarrow, os.path.join(src.python, "benchmarks"),
+               os.path.join(src.python, "examples"), src.dev, check=False))
 
     logger.info("Running Cython linter (cython-lint)")
 
diff --git a/python/benchmarks/parquet.py b/python/benchmarks/parquet.py
index e459ea2c369b4..ca39919b4b9b7 100644
--- a/python/benchmarks/parquet.py
+++ b/python/benchmarks/parquet.py
@@ -15,12 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import shutil
-import tempfile
-
 from pandas.util.testing import rands
 import numpy as np
-import pandas as pd
 
 import pyarrow as pa
 try:

From 6a78580f0ce7d35c938e1c26c73e70cda3eec004 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 11 Mar 2024 13:53:09 +0900
Subject: [PATCH 497/570] GH-31545: [GLib] Enable clang-format (#40451)

### Rationale for this change

This will help new contributors.

### What changes are included in this PR?

Create `c_glib/.clang-format` based on the current style. But there are many changes...

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #31545

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .pre-commit-config.yaml                       |    7 +
 c_glib/.clang-format                          |   45 +
 c_glib/arrow-cuda-glib/cuda.cpp               |  185 +-
 c_glib/arrow-cuda-glib/cuda.h                 |   39 +-
 c_glib/arrow-cuda-glib/cuda.hpp               |   12 +-
 .../arrow-dataset-glib/dataset-definition.h   |    6 +-
 c_glib/arrow-dataset-glib/dataset-factory.cpp |  159 +-
 c_glib/arrow-dataset-glib/dataset-factory.h   |   36 +-
 c_glib/arrow-dataset-glib/dataset.cpp         |  186 +-
 c_glib/arrow-dataset-glib/dataset.h           |   14 +-
 c_glib/arrow-dataset-glib/dataset.hpp         |   19 +-
 c_glib/arrow-dataset-glib/file-format.cpp     |  148 +-
 c_glib/arrow-dataset-glib/file-format.h       |   41 +-
 c_glib/arrow-dataset-glib/file-format.hpp     |    8 +-
 c_glib/arrow-dataset-glib/fragment.cpp        |   48 +-
 c_glib/arrow-dataset-glib/fragment.h          |   10 +-
 c_glib/arrow-dataset-glib/fragment.hpp        |    7 +-
 c_glib/arrow-dataset-glib/partitioning.cpp    |  205 +-
 c_glib/arrow-dataset-glib/partitioning.h      |   41 +-
 c_glib/arrow-dataset-glib/partitioning.hpp    |    3 +-
 c_glib/arrow-dataset-glib/scanner.cpp         |   91 +-
 c_glib/arrow-dataset-glib/scanner.h           |   25 +-
 c_glib/arrow-dataset-glib/scanner.hpp         |    3 +-
 c_glib/arrow-flight-glib/client.cpp           |  156 +-
 c_glib/arrow-flight-glib/client.h             |   31 +-
 c_glib/arrow-flight-glib/client.hpp           |    9 +-
 c_glib/arrow-flight-glib/common.cpp           |  313 ++-
 c_glib/arrow-flight-glib/common.h             |   96 +-
 c_glib/arrow-flight-glib/common.hpp           |    4 +-
 c_glib/arrow-flight-glib/server.cpp           |  416 ++--
 c_glib/arrow-flight-glib/server.h             |   95 +-
 c_glib/arrow-flight-glib/server.hpp           |   40 +-
 c_glib/arrow-flight-sql-glib/client.cpp       |  157 +-
 c_glib/arrow-flight-sql-glib/client.h         |   44 +-
 c_glib/arrow-flight-sql-glib/client.hpp       |    6 +-
 c_glib/arrow-flight-sql-glib/server.cpp       |  345 ++--
 c_glib/arrow-flight-sql-glib/server.h         |  126 +-
 c_glib/arrow-flight-sql-glib/server.hpp       |   10 +-
 c_glib/arrow-glib/array-builder.cpp           | 1701 +++++++---------
 c_glib/arrow-glib/array-builder.h             | 1366 +++++++------
 c_glib/arrow-glib/array-builder.hpp           |    4 +-
 c_glib/arrow-glib/basic-array-definition.h    |   13 +-
 c_glib/arrow-glib/basic-array.cpp             |  736 +++----
 c_glib/arrow-glib/basic-array.h               |  667 +++----
 c_glib/arrow-glib/basic-array.hpp             |    6 +-
 c_glib/arrow-glib/basic-data-type.cpp         |  522 ++---
 c_glib/arrow-glib/basic-data-type.h           |  198 +-
 c_glib/arrow-glib/basic-data-type.hpp         |   13 +-
 c_glib/arrow-glib/buffer.cpp                  |  127 +-
 c_glib/arrow-glib/buffer.h                    |  104 +-
 c_glib/arrow-glib/buffer.hpp                  |    3 +-
 c_glib/arrow-glib/chunked-array-definition.h  |    8 +-
 c_glib/arrow-glib/chunked-array.cpp           |   73 +-
 c_glib/arrow-glib/chunked-array.h             |   47 +-
 c_glib/arrow-glib/chunked-array.hpp           |    8 +-
 c_glib/arrow-glib/codec.cpp                   |   34 +-
 c_glib/arrow-glib/codec.h                     |   14 +-
 c_glib/arrow-glib/composite-array.cpp         |  591 +++---
 c_glib/arrow-glib/composite-array.h           |  170 +-
 c_glib/arrow-glib/composite-data-type.cpp     |  124 +-
 c_glib/arrow-glib/composite-data-type.h       |   86 +-
 c_glib/arrow-glib/compute-definition.h        |   22 +-
 c_glib/arrow-glib/compute.cpp                 | 1729 +++++++----------
 c_glib/arrow-glib/compute.h                   |  375 ++--
 c_glib/arrow-glib/compute.hpp                 |   75 +-
 c_glib/arrow-glib/datum.cpp                   |  206 +-
 c_glib/arrow-glib/datum.h                     |   55 +-
 c_glib/arrow-glib/datum.hpp                   |   15 +-
 c_glib/arrow-glib/decimal.cpp                 |  150 +-
 c_glib/arrow-glib/decimal.h                   |  156 +-
 c_glib/arrow-glib/error.cpp                   |   21 +-
 c_glib/arrow-glib/error.h                     |    9 +-
 c_glib/arrow-glib/error.hpp                   |   43 +-
 c_glib/arrow-glib/expression.cpp              |   46 +-
 c_glib/arrow-glib/expression.h                |   16 +-
 c_glib/arrow-glib/expression.hpp              |    1 -
 c_glib/arrow-glib/field.cpp                   |   79 +-
 c_glib/arrow-glib/field.h                     |   43 +-
 c_glib/arrow-glib/field.hpp                   |    8 +-
 c_glib/arrow-glib/file-mode.hpp               |    6 +-
 c_glib/arrow-glib/file-system.cpp             |  329 ++--
 c_glib/arrow-glib/file-system.h               |  142 +-
 c_glib/arrow-glib/file-system.hpp             |    9 +-
 c_glib/arrow-glib/file.cpp                    |   10 +-
 c_glib/arrow-glib/file.h                      |   20 +-
 c_glib/arrow-glib/file.hpp                    |    3 +-
 c_glib/arrow-glib/gobject-type.h              |  161 +-
 c_glib/arrow-glib/input-stream.cpp            |  376 ++--
 c_glib/arrow-glib/input-stream.h              |  114 +-
 c_glib/arrow-glib/input-stream.hpp            |   26 +-
 c_glib/arrow-glib/internal-hash-table.hpp     |   18 +-
 c_glib/arrow-glib/interval.cpp                |   86 +-
 c_glib/arrow-glib/interval.h                  |   16 +-
 c_glib/arrow-glib/interval.hpp                |    1 -
 c_glib/arrow-glib/ipc-options.cpp             |   64 +-
 c_glib/arrow-glib/ipc-options.h               |   17 +-
 c_glib/arrow-glib/local-file-system.cpp       |   39 +-
 c_glib/arrow-glib/local-file-system.h         |    3 +-
 c_glib/arrow-glib/local-file-system.hpp       |    1 -
 c_glib/arrow-glib/memory-pool.cpp             |   28 +-
 c_glib/arrow-glib/memory-pool.h               |   20 +-
 c_glib/arrow-glib/metadata-version.hpp        |    6 +-
 c_glib/arrow-glib/orc-file-reader.cpp         |   97 +-
 c_glib/arrow-glib/orc-file-reader.h           |   26 +-
 c_glib/arrow-glib/output-stream.cpp           |  222 ++-
 c_glib/arrow-glib/output-stream.h             |  150 +-
 c_glib/arrow-glib/output-stream.hpp           |   23 +-
 c_glib/arrow-glib/readable.cpp                |   20 +-
 c_glib/arrow-glib/readable.h                  |   16 +-
 c_glib/arrow-glib/readable.hpp                |    3 +-
 c_glib/arrow-glib/reader.cpp                  |  441 ++---
 c_glib/arrow-glib/reader.h                    |  216 +-
 c_glib/arrow-glib/reader.hpp                  |   21 +-
 c_glib/arrow-glib/record-batch.cpp            |  119 +-
 c_glib/arrow-glib/record-batch.h              |   81 +-
 c_glib/arrow-glib/record-batch.hpp            |    6 +-
 c_glib/arrow-glib/scalar.cpp                  |  815 ++++----
 c_glib/arrow-glib/scalar.h                    |  247 +--
 c_glib/arrow-glib/schema.cpp                  |   52 +-
 c_glib/arrow-glib/schema.h                    |   70 +-
 c_glib/arrow-glib/schema.hpp                  |    6 +-
 c_glib/arrow-glib/table-builder.cpp           |   42 +-
 c_glib/arrow-glib/table-builder.h             |   30 +-
 c_glib/arrow-glib/table-builder.hpp           |    6 +-
 c_glib/arrow-glib/table.cpp                   |  116 +-
 c_glib/arrow-glib/table.h                     |   71 +-
 c_glib/arrow-glib/table.hpp                   |    6 +-
 c_glib/arrow-glib/tensor.cpp                  |   61 +-
 c_glib/arrow-glib/tensor.h                    |   66 +-
 c_glib/arrow-glib/tensor.hpp                  |   11 +-
 c_glib/arrow-glib/type.h                      |    3 +-
 c_glib/arrow-glib/type.hpp                    |   12 +-
 c_glib/arrow-glib/writable-file.cpp           |    7 +-
 c_glib/arrow-glib/writable-file.h             |   18 +-
 c_glib/arrow-glib/writable.cpp                |    7 +-
 c_glib/arrow-glib/writable.h                  |   19 +-
 c_glib/arrow-glib/writer.cpp                  |   92 +-
 c_glib/arrow-glib/writer.h                    |  152 +-
 c_glib/arrow-glib/writer.hpp                  |   15 +-
 c_glib/example/build.c                        |    3 +-
 c_glib/example/extension-type.c               |   82 +-
 c_glib/example/read-file.c                    |   43 +-
 c_glib/example/read-stream.c                  |   40 +-
 c_glib/example/receive-network.c              |   54 +-
 c_glib/example/send-network.c                 |   44 +-
 c_glib/gandiva-glib/expression.cpp            |   95 +-
 c_glib/gandiva-glib/expression.h              |   22 +-
 c_glib/gandiva-glib/expression.hpp            |    9 +-
 c_glib/gandiva-glib/filter.cpp                |   77 +-
 c_glib/gandiva-glib/filter.h                  |   10 +-
 c_glib/gandiva-glib/function-registry.cpp     |   47 +-
 c_glib/gandiva-glib/function-registry.h       |   10 +-
 c_glib/gandiva-glib/function-signature.cpp    |   23 +-
 c_glib/gandiva-glib/function-signature.h      |   26 +-
 c_glib/gandiva-glib/function-signature.hpp    |    7 +-
 c_glib/gandiva-glib/native-function.cpp       |   45 +-
 c_glib/gandiva-glib/native-function.h         |   23 +-
 c_glib/gandiva-glib/native-function.hpp       |    6 +-
 c_glib/gandiva-glib/node.cpp                  |  363 ++--
 c_glib/gandiva-glib/node.h                    |   88 +-
 c_glib/gandiva-glib/node.hpp                  |    3 +-
 c_glib/gandiva-glib/projector.cpp             |  131 +-
 c_glib/gandiva-glib/projector.h               |   16 +-
 c_glib/gandiva-glib/projector.hpp             |    7 +-
 c_glib/gandiva-glib/selection-vector.cpp      |   84 +-
 c_glib/gandiva-glib/selection-vector.h        |   27 +-
 c_glib/parquet-glib/arrow-file-reader.cpp     |   91 +-
 c_glib/parquet-glib/arrow-file-reader.h       |   12 +-
 c_glib/parquet-glib/arrow-file-writer.cpp     |   79 +-
 c_glib/parquet-glib/arrow-file-writer.h       |   17 +-
 c_glib/parquet-glib/metadata.cpp              |  160 +-
 c_glib/parquet-glib/metadata.h                |   38 +-
 c_glib/parquet-glib/metadata.hpp              |   13 +-
 c_glib/parquet-glib/statistics.cpp            |  136 +-
 c_glib/parquet-glib/statistics.h              |   39 +-
 c_glib/parquet-glib/statistics.hpp            |    3 +-
 176 files changed, 8378 insertions(+), 11305 deletions(-)
 create mode 100644 c_glib/.clang-format

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 917f9cc425ae8..af47aa4e50cff 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -52,3 +52,10 @@ repos:
     hooks:
       - id: cython-lint
         args: [--no-pycodestyle]
+  - repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: v14.0.6
+    hooks:
+      - id: clang-format
+        name: C/GLib Format
+        files: >-
+          ^c_glib/
diff --git a/c_glib/.clang-format b/c_glib/.clang-format
new file mode 100644
index 0000000000000..f1009b78bb89f
--- /dev/null
+++ b/c_glib/.clang-format
@@ -0,0 +1,45 @@
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+---
+AlignAfterOpenBracket: true
+AlignConsecutiveMacros: AcrossEmptyLines
+AllowAllArgumentsOnNextLine: false
+AllowShortEnumsOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: false
+AlwaysBreakAfterReturnType: All
+BinPackArguments: false
+BinPackParameters: false
+BraceWrapping:
+  AfterFunction: true
+  AfterStruct: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+ColumnLimit: 90
+ConstructorInitializerIndentWidth: 2
+ContinuationIndentWidth: 2
+IndentCaseBlocks: true
+IndentPPDirectives: AfterHash
+IndentWidth: 2
+NamespaceIndentation: All
+PPIndentWidth: 2
+PackConstructorInitializers: CurrentLine
+SortIncludes: Never
+---
+Language: JavaScript
+SpacesInContainerLiterals: false
diff --git a/c_glib/arrow-cuda-glib/cuda.cpp b/c_glib/arrow-cuda-glib/cuda.cpp
index 13525b5906fa5..66790558d3f39 100644
--- a/c_glib/arrow-cuda-glib/cuda.cpp
+++ b/c_glib/arrow-cuda-glib/cuda.cpp
@@ -70,9 +70,7 @@ G_BEGIN_DECLS
  * #GArrowCUDABuffer.
  */
 
-G_DEFINE_TYPE(GArrowCUDADeviceManager,
-              garrow_cuda_device_manager,
-              G_TYPE_OBJECT)
+G_DEFINE_TYPE(GArrowCUDADeviceManager, garrow_cuda_device_manager, G_TYPE_OBJECT)
 
 static void
 garrow_cuda_device_manager_init(GArrowCUDADeviceManager *object)
@@ -98,8 +96,7 @@ garrow_cuda_device_manager_new(GError **error)
 {
   auto arrow_manager = arrow::cuda::CudaDeviceManager::Instance();
   if (garrow::check(error, arrow_manager, "[cuda][device-manager][new]")) {
-    auto manager = g_object_new(GARROW_CUDA_TYPE_DEVICE_MANAGER,
-                                NULL);
+    auto manager = g_object_new(GARROW_CUDA_TYPE_DEVICE_MANAGER, NULL);
     return GARROW_CUDA_DEVICE_MANAGER(manager);
   } else {
     return NULL;
@@ -125,8 +122,7 @@ garrow_cuda_device_manager_get_context(GArrowCUDADeviceManager *manager,
 {
   auto arrow_manager = arrow::cuda::CudaDeviceManager::Instance();
   auto arrow_cuda_context = (*arrow_manager)->GetContext(gpu_number);
-  if (garrow::check(error, arrow_cuda_context,
-                    "[cuda][device-manager][get-context]]")) {
+  if (garrow::check(error, arrow_cuda_context, "[cuda][device-manager][get-context]]")) {
     return garrow_cuda_context_new_raw(&(*arrow_cuda_context));
   } else {
     return NULL;
@@ -148,8 +144,8 @@ garrow_cuda_device_manager_get_n_devices(GArrowCUDADeviceManager *manager)
   return (*arrow_manager)->num_devices();
 }
 
-
-typedef struct GArrowCUDAContextPrivate_ {
+typedef struct GArrowCUDAContextPrivate_
+{
   std::shared_ptr<arrow::cuda::CudaContext> context;
 } GArrowCUDAContextPrivate;
 
@@ -157,14 +153,11 @@ enum {
   PROP_CONTEXT = 1
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowCUDAContext,
-                           garrow_cuda_context,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowCUDAContext, garrow_cuda_context, G_TYPE_OBJECT)
 
-#define GARROW_CUDA_CONTEXT_GET_PRIVATE(object) \
-  static_cast<GArrowCUDAContextPrivate *>(      \
-    garrow_cuda_context_get_instance_private(   \
-      GARROW_CUDA_CONTEXT(object)))
+#define GARROW_CUDA_CONTEXT_GET_PRIVATE(object)                                          \
+  static_cast<GArrowCUDAContextPrivate *>(                                               \
+    garrow_cuda_context_get_instance_private(GARROW_CUDA_CONTEXT(object)))
 
 static void
 garrow_cuda_context_finalize(GObject *object)
@@ -186,8 +179,8 @@ garrow_cuda_context_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_CONTEXT:
-    priv->context =
-      *static_cast<std::shared_ptr<arrow::cuda::CudaContext> *>(g_value_get_pointer(value));
+    priv->context = *static_cast<std::shared_ptr<arrow::cuda::CudaContext> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -212,7 +205,7 @@ static void
 garrow_cuda_context_init(GArrowCUDAContext *object)
 {
   auto priv = GARROW_CUDA_CONTEXT_GET_PRIVATE(object);
-  new(&priv->context) std::shared_ptr<arrow::cuda::CudaContext>;
+  new (&priv->context) std::shared_ptr<arrow::cuda::CudaContext>;
 }
 
 static void
@@ -222,7 +215,7 @@ garrow_cuda_context_class_init(GArrowCUDAContextClass *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_cuda_context_finalize;
+  gobject_class->finalize = garrow_cuda_context_finalize;
   gobject_class->set_property = garrow_cuda_context_set_property;
   gobject_class->get_property = garrow_cuda_context_get_property;
 
@@ -231,11 +224,11 @@ garrow_cuda_context_class_init(GArrowCUDAContextClass *klass)
    *
    * Since: 0.8.0
    */
-  spec = g_param_spec_pointer("context",
-                              "Context",
-                              "The raw std::shared_ptr<arrow::cuda::CudaContext>",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "context",
+    "Context",
+    "The raw std::shared_ptr<arrow::cuda::CudaContext>",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CONTEXT, spec);
 }
 
@@ -254,10 +247,7 @@ garrow_cuda_context_get_allocated_size(GArrowCUDAContext *context)
   return arrow_context->bytes_allocated();
 }
 
-
-G_DEFINE_TYPE(GArrowCUDABuffer,
-              garrow_cuda_buffer,
-              GARROW_TYPE_BUFFER)
+G_DEFINE_TYPE(GArrowCUDABuffer, garrow_cuda_buffer, GARROW_TYPE_BUFFER)
 
 static void
 garrow_cuda_buffer_init(GArrowCUDABuffer *object)
@@ -281,15 +271,13 @@ garrow_cuda_buffer_class_init(GArrowCUDABufferClass *klass)
  * Since: 0.8.0
  */
 GArrowCUDABuffer *
-garrow_cuda_buffer_new(GArrowCUDAContext *context,
-                       gint64 size,
-                       GError **error)
+garrow_cuda_buffer_new(GArrowCUDAContext *context, gint64 size, GError **error)
 {
   auto arrow_context = garrow_cuda_context_get_raw(context);
   auto arrow_buffer_result = arrow_context->Allocate(size);
   if (garrow::check(error, arrow_buffer_result, "[cuda][buffer][new]")) {
     std::shared_ptr<arrow::cuda::CudaBuffer> arrow_buffer =
-        std::move(*arrow_buffer_result);
+      std::move(*arrow_buffer_result);
     return garrow_cuda_buffer_new_raw(&arrow_buffer);
   } else {
     return NULL;
@@ -341,8 +329,8 @@ garrow_cuda_buffer_new_record_batch(GArrowCUDAContext *context,
 {
   auto arrow_context = garrow_cuda_context_get_raw(context);
   auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
-  auto arrow_buffer = arrow::cuda::SerializeRecordBatch(*arrow_record_batch,
-                                                        arrow_context.get());
+  auto arrow_buffer =
+    arrow::cuda::SerializeRecordBatch(*arrow_record_batch, arrow_context.get());
   if (garrow::check(error, arrow_buffer, "[cuda][buffer][new-record-batch]")) {
     return garrow_cuda_buffer_new_raw(&(*arrow_buffer));
   } else {
@@ -398,9 +386,7 @@ garrow_cuda_buffer_copy_from_host(GArrowCUDABuffer *buffer,
 {
   auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer);
   auto status = arrow_buffer->CopyFromHost(0, data, size);
-  return garrow_error_check(error,
-                            status,
-                            "[cuda][buffer][copy-from-host]");
+  return garrow_error_check(error, status, "[cuda][buffer][copy-from-host]");
 }
 
 /**
@@ -466,15 +452,12 @@ garrow_cuda_buffer_read_record_batch(GArrowCUDABuffer *buffer,
 
   if (options) {
     auto arrow_options = garrow_read_options_get_raw(options);
-    auto arrow_dictionary_memo =
-      garrow_read_options_get_dictionary_memo_raw(options);
-    auto arrow_record_batch =
-      arrow::cuda::ReadRecordBatch(arrow_schema,
-                                   arrow_dictionary_memo,
-                                   arrow_buffer,
-                                   arrow_options->memory_pool);
-    if (garrow::check(error, arrow_record_batch,
-                      "[cuda][buffer][read-record-batch]")) {
+    auto arrow_dictionary_memo = garrow_read_options_get_dictionary_memo_raw(options);
+    auto arrow_record_batch = arrow::cuda::ReadRecordBatch(arrow_schema,
+                                                           arrow_dictionary_memo,
+                                                           arrow_buffer,
+                                                           arrow_options->memory_pool);
+    if (garrow::check(error, arrow_record_batch, "[cuda][buffer][read-record-batch]")) {
       return garrow_record_batch_new_raw(&(*arrow_record_batch));
     } else {
       return NULL;
@@ -482,12 +465,8 @@ garrow_cuda_buffer_read_record_batch(GArrowCUDABuffer *buffer,
   } else {
     auto arrow_pool = arrow::default_memory_pool();
     auto arrow_record_batch =
-      arrow::cuda::ReadRecordBatch(arrow_schema,
-                                   nullptr,
-                                   arrow_buffer,
-                                   arrow_pool);
-    if (garrow::check(error, arrow_record_batch,
-                      "[cuda][buffer][read-record-batch]")) {
+      arrow::cuda::ReadRecordBatch(arrow_schema, nullptr, arrow_buffer, arrow_pool);
+    if (garrow::check(error, arrow_record_batch, "[cuda][buffer][read-record-batch]")) {
       return garrow_record_batch_new_raw(&(*arrow_record_batch));
     } else {
       return NULL;
@@ -495,10 +474,7 @@ garrow_cuda_buffer_read_record_batch(GArrowCUDABuffer *buffer,
   }
 }
 
-
-G_DEFINE_TYPE(GArrowCUDAHostBuffer,
-              garrow_cuda_host_buffer,
-              GARROW_TYPE_MUTABLE_BUFFER)
+G_DEFINE_TYPE(GArrowCUDAHostBuffer, garrow_cuda_host_buffer, GARROW_TYPE_MUTABLE_BUFFER)
 
 static void
 garrow_cuda_host_buffer_init(GArrowCUDAHostBuffer *object)
@@ -534,8 +510,8 @@ garrow_cuda_host_buffer_new(gint gpu_number, gint64 size, GError **error)
   }
 }
 
-
-typedef struct GArrowCUDAIPCMemoryHandlePrivate_ {
+typedef struct GArrowCUDAIPCMemoryHandlePrivate_
+{
   std::shared_ptr<arrow::cuda::CudaIpcMemHandle> ipc_memory_handle;
 } GArrowCUDAIPCMemoryHandlePrivate;
 
@@ -547,9 +523,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowCUDAIPCMemoryHandle,
                            garrow_cuda_ipc_memory_handle,
                            G_TYPE_OBJECT)
 
-#define GARROW_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(object)       \
-  static_cast<GArrowCUDAIPCMemoryHandlePrivate *>(              \
-    garrow_cuda_ipc_memory_handle_get_instance_private(         \
+#define GARROW_CUDA_IPC_MEMORY_HANDLE_GET_PRIVATE(object)                                \
+  static_cast<GArrowCUDAIPCMemoryHandlePrivate *>(                                       \
+    garrow_cuda_ipc_memory_handle_get_instance_private(                                  \
       GARROW_CUDA_IPC_MEMORY_HANDLE(object)))
 
 static void
@@ -573,7 +549,8 @@ garrow_cuda_ipc_memory_handle_set_property(GObject *object,
   switch (prop_id) {
   case PROP_IPC_MEMORY_HANDLE:
     priv->ipc_memory_handle =
-      *static_cast<std::shared_ptr<arrow::cuda::CudaIpcMemHandle> *>(g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<arrow::cuda::CudaIpcMemHandle> *>(
+        g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -606,7 +583,7 @@ garrow_cuda_ipc_memory_handle_class_init(GArrowCUDAIPCMemoryHandleClass *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_cuda_ipc_memory_handle_finalize;
+  gobject_class->finalize = garrow_cuda_ipc_memory_handle_finalize;
   gobject_class->set_property = garrow_cuda_ipc_memory_handle_set_property;
   gobject_class->get_property = garrow_cuda_ipc_memory_handle_get_property;
 
@@ -615,11 +592,11 @@ garrow_cuda_ipc_memory_handle_class_init(GArrowCUDAIPCMemoryHandleClass *klass)
    *
    * Since: 0.8.0
    */
-  spec = g_param_spec_pointer("ipc-memory-handle",
-                              "IPC Memory Handle",
-                              "The raw std::shared_ptr<arrow::cuda::CudaIpcMemHandle>",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "ipc-memory-handle",
+    "IPC Memory Handle",
+    "The raw std::shared_ptr<arrow::cuda::CudaIpcMemHandle>",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_IPC_MEMORY_HANDLE, spec);
 }
 
@@ -635,9 +612,7 @@ garrow_cuda_ipc_memory_handle_class_init(GArrowCUDAIPCMemoryHandleClass *klass)
  * Since: 0.8.0
  */
 GArrowCUDAIPCMemoryHandle *
-garrow_cuda_ipc_memory_handle_new(const guint8 *data,
-                                  gsize size,
-                                  GError **error)
+garrow_cuda_ipc_memory_handle_new(const guint8 *data, gsize size, GError **error)
 {
   auto arrow_handle = arrow::cuda::CudaIpcMemHandle::FromBuffer(data);
   if (garrow::check(error, arrow_handle, "[cuda][ipc-memory-handle][new]")) {
@@ -660,13 +635,11 @@ garrow_cuda_ipc_memory_handle_new(const guint8 *data,
  * Since: 0.8.0
  */
 GArrowBuffer *
-garrow_cuda_ipc_memory_handle_serialize(GArrowCUDAIPCMemoryHandle *handle,
-                                        GError **error)
+garrow_cuda_ipc_memory_handle_serialize(GArrowCUDAIPCMemoryHandle *handle, GError **error)
 {
   auto arrow_handle = garrow_cuda_ipc_memory_handle_get_raw(handle);
   auto arrow_buffer = arrow_handle->Serialize(arrow::default_memory_pool());
-  if (garrow::check(error, arrow_buffer,
-                    "[cuda][ipc-memory-handle][serialize]")) {
+  if (garrow::check(error, arrow_buffer, "[cuda][ipc-memory-handle][serialize]")) {
     return garrow_buffer_new_raw(&(*arrow_buffer));
   } else {
     return NULL;
@@ -674,7 +647,8 @@ garrow_cuda_ipc_memory_handle_serialize(GArrowCUDAIPCMemoryHandle *handle,
 }
 
 static GArrowBuffer *
-garrow_cuda_buffer_input_stream_buffer_new_raw_readable_interface(std::shared_ptr<arrow::Buffer> *arrow_buffer)
+garrow_cuda_buffer_input_stream_buffer_new_raw_readable_interface(
+  std::shared_ptr<arrow::Buffer> *arrow_buffer)
 {
   auto arrow_cuda_buffer =
     reinterpret_cast<std::shared_ptr<arrow::cuda::CudaBuffer> *>(arrow_buffer);
@@ -695,17 +669,15 @@ garrow_cuda_buffer_input_stream_readable_interface_init(GArrowReadableInterface
 {
   iface->buffer_new_raw =
     garrow_cuda_buffer_input_stream_buffer_new_raw_readable_interface;
-  iface->get_raw =
-    garrow_cuda_buffer_input_stream_get_raw_readable_interface;
+  iface->get_raw = garrow_cuda_buffer_input_stream_get_raw_readable_interface;
 }
 
 G_DEFINE_TYPE_WITH_CODE(
   GArrowCUDABufferInputStream,
   garrow_cuda_buffer_input_stream,
   GARROW_TYPE_BUFFER_INPUT_STREAM,
-  G_IMPLEMENT_INTERFACE(
-    GARROW_TYPE_READABLE,
-    garrow_cuda_buffer_input_stream_readable_interface_init))
+  G_IMPLEMENT_INTERFACE(GARROW_TYPE_READABLE,
+                        garrow_cuda_buffer_input_stream_readable_interface_init))
 
 static void
 garrow_cuda_buffer_input_stream_init(GArrowCUDABufferInputStream *object)
@@ -730,12 +702,10 @@ GArrowCUDABufferInputStream *
 garrow_cuda_buffer_input_stream_new(GArrowCUDABuffer *buffer)
 {
   auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer);
-  auto arrow_reader =
-    std::make_shared<arrow::cuda::CudaBufferReader>(arrow_buffer);
+  auto arrow_reader = std::make_shared<arrow::cuda::CudaBufferReader>(arrow_buffer);
   return garrow_cuda_buffer_input_stream_new_raw(&arrow_reader);
 }
 
-
 G_DEFINE_TYPE(GArrowCUDABufferOutputStream,
               garrow_cuda_buffer_output_stream,
               GARROW_TYPE_OUTPUT_STREAM)
@@ -763,8 +733,7 @@ GArrowCUDABufferOutputStream *
 garrow_cuda_buffer_output_stream_new(GArrowCUDABuffer *buffer)
 {
   auto arrow_buffer = garrow_cuda_buffer_get_raw(buffer);
-  auto arrow_writer =
-    std::make_shared<arrow::cuda::CudaBufferWriter>(arrow_buffer);
+  auto arrow_writer = std::make_shared<arrow::cuda::CudaBufferWriter>(arrow_buffer);
   return garrow_cuda_buffer_output_stream_new_raw(&arrow_writer);
 }
 
@@ -828,15 +797,13 @@ garrow_cuda_buffer_output_stream_get_buffered_size(GArrowCUDABufferOutputStream
   return arrow_stream->num_bytes_buffered();
 }
 
-
 G_END_DECLS
 
 GArrowCUDAContext *
 garrow_cuda_context_new_raw(std::shared_ptr<arrow::cuda::CudaContext> *arrow_context)
 {
-  return GARROW_CUDA_CONTEXT(g_object_new(GARROW_CUDA_TYPE_CONTEXT,
-                                          "context", arrow_context,
-                                          NULL));
+  return GARROW_CUDA_CONTEXT(
+    g_object_new(GARROW_CUDA_TYPE_CONTEXT, "context", arrow_context, NULL));
 }
 
 std::shared_ptr<arrow::cuda::CudaContext>
@@ -850,10 +817,12 @@ garrow_cuda_context_get_raw(GArrowCUDAContext *context)
 }
 
 GArrowCUDAIPCMemoryHandle *
-garrow_cuda_ipc_memory_handle_new_raw(std::shared_ptr<arrow::cuda::CudaIpcMemHandle> *arrow_handle)
+garrow_cuda_ipc_memory_handle_new_raw(
+  std::shared_ptr<arrow::cuda::CudaIpcMemHandle> *arrow_handle)
 {
   auto handle = g_object_new(GARROW_CUDA_TYPE_IPC_MEMORY_HANDLE,
-                             "ipc-memory-handle", arrow_handle,
+                             "ipc-memory-handle",
+                             arrow_handle,
                              NULL);
   return GARROW_CUDA_IPC_MEMORY_HANDLE(handle);
 }
@@ -871,9 +840,8 @@ garrow_cuda_ipc_memory_handle_get_raw(GArrowCUDAIPCMemoryHandle *handle)
 GArrowCUDABuffer *
 garrow_cuda_buffer_new_raw(std::shared_ptr<arrow::cuda::CudaBuffer> *arrow_buffer)
 {
-  return GARROW_CUDA_BUFFER(g_object_new(GARROW_CUDA_TYPE_BUFFER,
-                                         "buffer", arrow_buffer,
-                                         NULL));
+  return GARROW_CUDA_BUFFER(
+    g_object_new(GARROW_CUDA_TYPE_BUFFER, "buffer", arrow_buffer, NULL));
 }
 
 std::shared_ptr<arrow::cuda::CudaBuffer>
@@ -887,11 +855,10 @@ garrow_cuda_buffer_get_raw(GArrowCUDABuffer *buffer)
 }
 
 GArrowCUDAHostBuffer *
-garrow_cuda_host_buffer_new_raw(std::shared_ptr<arrow::cuda::CudaHostBuffer> *arrow_buffer)
+garrow_cuda_host_buffer_new_raw(
+  std::shared_ptr<arrow::cuda::CudaHostBuffer> *arrow_buffer)
 {
-  auto buffer = g_object_new(GARROW_CUDA_TYPE_HOST_BUFFER,
-                             "buffer", arrow_buffer,
-                             NULL);
+  auto buffer = g_object_new(GARROW_CUDA_TYPE_HOST_BUFFER, "buffer", arrow_buffer, NULL);
   return GARROW_CUDA_HOST_BUFFER(buffer);
 }
 
@@ -906,10 +873,12 @@ garrow_cuda_host_buffer_get_raw(GArrowCUDAHostBuffer *buffer)
 }
 
 GArrowCUDABufferInputStream *
-garrow_cuda_buffer_input_stream_new_raw(std::shared_ptr<arrow::cuda::CudaBufferReader> *arrow_reader)
+garrow_cuda_buffer_input_stream_new_raw(
+  std::shared_ptr<arrow::cuda::CudaBufferReader> *arrow_reader)
 {
   auto input_stream = g_object_new(GARROW_CUDA_TYPE_BUFFER_INPUT_STREAM,
-                                   "input-stream", arrow_reader,
+                                   "input-stream",
+                                   arrow_reader,
                                    NULL);
   return GARROW_CUDA_BUFFER_INPUT_STREAM(input_stream);
 }
@@ -920,16 +889,17 @@ garrow_cuda_buffer_input_stream_get_raw(GArrowCUDABufferInputStream *input_strea
   if (!input_stream)
     return nullptr;
 
-  auto arrow_reader =
-    garrow_input_stream_get_raw(GARROW_INPUT_STREAM(input_stream));
+  auto arrow_reader = garrow_input_stream_get_raw(GARROW_INPUT_STREAM(input_stream));
   return std::static_pointer_cast<arrow::cuda::CudaBufferReader>(arrow_reader);
 }
 
 GArrowCUDABufferOutputStream *
-garrow_cuda_buffer_output_stream_new_raw(std::shared_ptr<arrow::cuda::CudaBufferWriter> *arrow_writer)
+garrow_cuda_buffer_output_stream_new_raw(
+  std::shared_ptr<arrow::cuda::CudaBufferWriter> *arrow_writer)
 {
   auto output_stream = g_object_new(GARROW_CUDA_TYPE_BUFFER_OUTPUT_STREAM,
-                                    "output-stream", arrow_writer,
+                                    "output-stream",
+                                    arrow_writer,
                                     NULL);
   return GARROW_CUDA_BUFFER_OUTPUT_STREAM(output_stream);
 }
@@ -940,7 +910,6 @@ garrow_cuda_buffer_output_stream_get_raw(GArrowCUDABufferOutputStream *output_st
   if (!output_stream)
     return nullptr;
 
-  auto arrow_writer =
-    garrow_output_stream_get_raw(GARROW_OUTPUT_STREAM(output_stream));
+  auto arrow_writer = garrow_output_stream_get_raw(GARROW_OUTPUT_STREAM(output_stream));
   return std::static_pointer_cast<arrow::cuda::CudaBufferWriter>(arrow_writer);
 }
diff --git a/c_glib/arrow-cuda-glib/cuda.h b/c_glib/arrow-cuda-glib/cuda.h
index 3c98daeaf758d..863743a620bf8 100644
--- a/c_glib/arrow-cuda-glib/cuda.h
+++ b/c_glib/arrow-cuda-glib/cuda.h
@@ -35,22 +35,16 @@ struct _GArrowCUDADeviceManagerClass
 };
 
 #define GARROW_CUDA_TYPE_CONTEXT (garrow_cuda_context_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowCUDAContext,
-                         garrow_cuda_context,
-                         GARROW_CUDA,
-                         CONTEXT,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowCUDAContext, garrow_cuda_context, GARROW_CUDA, CONTEXT, GObject)
 struct _GArrowCUDAContextClass
 {
   GObjectClass parent_class;
 };
 
 #define GARROW_CUDA_TYPE_BUFFER (garrow_cuda_buffer_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowCUDABuffer,
-                         garrow_cuda_buffer,
-                         GARROW_CUDA,
-                         BUFFER,
-                         GArrowBuffer)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowCUDABuffer, garrow_cuda_buffer, GARROW_CUDA, BUFFER, GArrowBuffer)
 struct _GArrowCUDABufferClass
 {
   GArrowBufferClass parent_class;
@@ -67,8 +61,7 @@ struct _GArrowCUDAHostBufferClass
   GArrowMutableBufferClass parent_class;
 };
 
-#define GARROW_CUDA_TYPE_IPC_MEMORY_HANDLE      \
-  (garrow_cuda_ipc_memory_handle_get_type())
+#define GARROW_CUDA_TYPE_IPC_MEMORY_HANDLE (garrow_cuda_ipc_memory_handle_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowCUDAIPCMemoryHandle,
                          garrow_cuda_ipc_memory_handle,
                          GARROW_CUDA,
@@ -79,8 +72,7 @@ struct _GArrowCUDAIPCMemoryHandleClass
   GObjectClass parent_class;
 };
 
-#define GARROW_CUDA_TYPE_BUFFER_INPUT_STREAM    \
-  (garrow_cuda_buffer_input_stream_get_type())
+#define GARROW_CUDA_TYPE_BUFFER_INPUT_STREAM (garrow_cuda_buffer_input_stream_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowCUDABufferInputStream,
                          garrow_cuda_buffer_input_stream,
                          GARROW_CUDA,
@@ -91,7 +83,7 @@ struct _GArrowCUDABufferInputStreamClass
   GArrowBufferInputStreamClass parent_class;
 };
 
-#define GARROW_CUDA_TYPE_BUFFER_OUTPUT_STREAM   \
+#define GARROW_CUDA_TYPE_BUFFER_OUTPUT_STREAM                                            \
   (garrow_cuda_buffer_output_stream_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowCUDABufferOutputStream,
                          garrow_cuda_buffer_output_stream,
@@ -116,11 +108,8 @@ garrow_cuda_device_manager_get_n_devices(GArrowCUDADeviceManager *manager);
 gint64
 garrow_cuda_context_get_allocated_size(GArrowCUDAContext *context);
 
-
 GArrowCUDABuffer *
-garrow_cuda_buffer_new(GArrowCUDAContext *context,
-                       gint64 size,
-                       GError **error);
+garrow_cuda_buffer_new(GArrowCUDAContext *context, gint64 size, GError **error);
 GArrowCUDABuffer *
 garrow_cuda_buffer_new_ipc(GArrowCUDAContext *context,
                            GArrowCUDAIPCMemoryHandle *handle,
@@ -140,8 +129,7 @@ garrow_cuda_buffer_copy_from_host(GArrowCUDABuffer *buffer,
                                   gint64 size,
                                   GError **error);
 GArrowCUDAIPCMemoryHandle *
-garrow_cuda_buffer_export(GArrowCUDABuffer *buffer,
-                          GError **error);
+garrow_cuda_buffer_export(GArrowCUDABuffer *buffer, GError **error);
 GArrowCUDAContext *
 garrow_cuda_buffer_get_context(GArrowCUDABuffer *buffer);
 GArrowRecordBatch *
@@ -150,16 +138,11 @@ garrow_cuda_buffer_read_record_batch(GArrowCUDABuffer *buffer,
                                      GArrowReadOptions *options,
                                      GError **error);
 
-
 GArrowCUDAHostBuffer *
-garrow_cuda_host_buffer_new(gint gpu_number,
-                            gint64 size,
-                            GError **error);
+garrow_cuda_host_buffer_new(gint gpu_number, gint64 size, GError **error);
 
 GArrowCUDAIPCMemoryHandle *
-garrow_cuda_ipc_memory_handle_new(const guint8 *data,
-                                  gsize size,
-                                  GError **error);
+garrow_cuda_ipc_memory_handle_new(const guint8 *data, gsize size, GError **error);
 
 GArrowBuffer *
 garrow_cuda_ipc_memory_handle_serialize(GArrowCUDAIPCMemoryHandle *handle,
diff --git a/c_glib/arrow-cuda-glib/cuda.hpp b/c_glib/arrow-cuda-glib/cuda.hpp
index 0f8985a9de4f5..fc0bc4f0bcb6d 100644
--- a/c_glib/arrow-cuda-glib/cuda.hpp
+++ b/c_glib/arrow-cuda-glib/cuda.hpp
@@ -29,7 +29,8 @@ std::shared_ptr<arrow::cuda::CudaContext>
 garrow_cuda_context_get_raw(GArrowCUDAContext *context);
 
 GArrowCUDAIPCMemoryHandle *
-garrow_cuda_ipc_memory_handle_new_raw(std::shared_ptr<arrow::cuda::CudaIpcMemHandle> *arrow_handle);
+garrow_cuda_ipc_memory_handle_new_raw(
+  std::shared_ptr<arrow::cuda::CudaIpcMemHandle> *arrow_handle);
 std::shared_ptr<arrow::cuda::CudaIpcMemHandle>
 garrow_cuda_ipc_memory_handle_get_raw(GArrowCUDAIPCMemoryHandle *handle);
 
@@ -39,16 +40,19 @@ std::shared_ptr<arrow::cuda::CudaBuffer>
 garrow_cuda_buffer_get_raw(GArrowCUDABuffer *buffer);
 
 GArrowCUDAHostBuffer *
-garrow_cuda_host_buffer_new_raw(std::shared_ptr<arrow::cuda::CudaHostBuffer> *arrow_buffer);
+garrow_cuda_host_buffer_new_raw(
+  std::shared_ptr<arrow::cuda::CudaHostBuffer> *arrow_buffer);
 std::shared_ptr<arrow::cuda::CudaHostBuffer>
 garrow_cuda_host_buffer_get_raw(GArrowCUDAHostBuffer *buffer);
 
 GArrowCUDABufferInputStream *
-garrow_cuda_buffer_input_stream_new_raw(std::shared_ptr<arrow::cuda::CudaBufferReader> *arrow_reader);
+garrow_cuda_buffer_input_stream_new_raw(
+  std::shared_ptr<arrow::cuda::CudaBufferReader> *arrow_reader);
 std::shared_ptr<arrow::cuda::CudaBufferReader>
 garrow_cuda_buffer_input_stream_get_raw(GArrowCUDABufferInputStream *input_stream);
 
 GArrowCUDABufferOutputStream *
-garrow_cuda_buffer_output_stream_new_raw(std::shared_ptr<arrow::cuda::CudaBufferWriter> *arrow_writer);
+garrow_cuda_buffer_output_stream_new_raw(
+  std::shared_ptr<arrow::cuda::CudaBufferWriter> *arrow_writer);
 std::shared_ptr<arrow::cuda::CudaBufferWriter>
 garrow_cuda_buffer_output_stream_get_raw(GArrowCUDABufferOutputStream *output_stream);
diff --git a/c_glib/arrow-dataset-glib/dataset-definition.h b/c_glib/arrow-dataset-glib/dataset-definition.h
index d0e71b7fbdd7e..f278b05a135f5 100644
--- a/c_glib/arrow-dataset-glib/dataset-definition.h
+++ b/c_glib/arrow-dataset-glib/dataset-definition.h
@@ -24,11 +24,7 @@
 G_BEGIN_DECLS
 
 #define GADATASET_TYPE_DATASET (gadataset_dataset_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADatasetDataset,
-                         gadataset_dataset,
-                         GADATASET,
-                         DATASET,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GADatasetDataset, gadataset_dataset, GADATASET, DATASET, GObject)
 struct _GADatasetDatasetClass
 {
   GObjectClass parent_class;
diff --git a/c_glib/arrow-dataset-glib/dataset-factory.cpp b/c_glib/arrow-dataset-glib/dataset-factory.cpp
index 97cab555420fb..cc664176b64cd 100644
--- a/c_glib/arrow-dataset-glib/dataset-factory.cpp
+++ b/c_glib/arrow-dataset-glib/dataset-factory.cpp
@@ -44,7 +44,8 @@ G_BEGIN_DECLS
  * Since: 5.0.0
  */
 
-struct GADatasetFinishOptionsPrivate {
+struct GADatasetFinishOptionsPrivate
+{
   arrow::dataset::FinishOptions options;
   GArrowSchema *schema;
 };
@@ -60,10 +61,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFinishOptions,
                            gadataset_finish_options,
                            G_TYPE_OBJECT)
 
-#define GADATASET_FINISH_OPTIONS_GET_PRIVATE(obj)        \
-  static_cast<GADatasetFinishOptionsPrivate *>(          \
-    gadataset_finish_options_get_instance_private(       \
-      GADATASET_FINISH_OPTIONS(obj)))
+#define GADATASET_FINISH_OPTIONS_GET_PRIVATE(obj)                                        \
+  static_cast<GADatasetFinishOptionsPrivate *>(                                          \
+    gadataset_finish_options_get_instance_private(GADATASET_FINISH_OPTIONS(obj)))
 
 static void
 gadataset_finish_options_finalize(GObject *object)
@@ -161,7 +161,7 @@ static void
 gadataset_finish_options_init(GADatasetFinishOptions *object)
 {
   auto priv = GADATASET_FINISH_OPTIONS_GET_PRIVATE(object);
-  new(&priv->options) arrow::dataset::FinishOptions;
+  new (&priv->options) arrow::dataset::FinishOptions;
 }
 
 static void
@@ -174,11 +174,11 @@ gadataset_finish_options_class_init(GADatasetFinishOptionsClass *klass)
   gobject_class->get_property = gadataset_finish_options_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("finish-options",
-                              "Finish options",
-                              "The raw arrow::dataset::FinishOptions *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "finish-options",
+    "Finish options",
+    "The raw arrow::dataset::FinishOptions *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FINISH_OPTIONS, spec);
 
   /**
@@ -240,8 +240,8 @@ gadataset_finish_options_new(void)
   return gadataset_finish_options_new_raw(nullptr);
 }
 
-
-typedef struct GADatasetDatasetFactoryPrivate_ {
+typedef struct GADatasetDatasetFactoryPrivate_
+{
   std::shared_ptr<arrow::dataset::DatasetFactory> factory;
 } GADatasetDatasetFactoryPrivate;
 
@@ -253,10 +253,9 @@ G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetDatasetFactory,
                                     gadataset_dataset_factory,
                                     G_TYPE_OBJECT)
 
-#define GADATASET_DATASET_FACTORY_GET_PRIVATE(obj)        \
-  static_cast<GADatasetDatasetFactoryPrivate *>(          \
-    gadataset_dataset_factory_get_instance_private(       \
-      GADATASET_DATASET_FACTORY(obj)))
+#define GADATASET_DATASET_FACTORY_GET_PRIVATE(obj)                                       \
+  static_cast<GADatasetDatasetFactoryPrivate *>(                                         \
+    gadataset_dataset_factory_get_instance_private(GADATASET_DATASET_FACTORY(obj)))
 
 static void
 gadataset_dataset_factory_finalize(GObject *object)
@@ -295,23 +294,23 @@ static void
 gadataset_dataset_factory_init(GADatasetDatasetFactory *object)
 {
   auto priv = GADATASET_DATASET_FACTORY_GET_PRIVATE(object);
-  new(&priv->factory) std::shared_ptr<arrow::dataset::DatasetFactory>;
+  new (&priv->factory) std::shared_ptr<arrow::dataset::DatasetFactory>;
 }
 
 static void
 gadataset_dataset_factory_class_init(GADatasetDatasetFactoryClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->finalize     = gadataset_dataset_factory_finalize;
+  gobject_class->finalize = gadataset_dataset_factory_finalize;
   gobject_class->set_property = gadataset_dataset_factory_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("dataset-factory",
-                              "Dataset factory",
-                              "The raw "
-                              "std::shared<arrow::dataset::DatasetFactory> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "dataset-factory",
+    "Dataset factory",
+    "The raw "
+    "std::shared<arrow::dataset::DatasetFactory> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_DATASET_FACTORY, spec);
 }
 
@@ -345,8 +344,8 @@ gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory,
   }
 }
 
-
-typedef struct GADatasetFileSystemDatasetFactoryPrivate_ {
+typedef struct GADatasetFileSystemDatasetFactoryPrivate_
+{
   GADatasetFileFormat *format;
   GArrowFileSystem *file_system;
   GADatasetPartitioning *partitioning;
@@ -365,9 +364,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDatasetFactory,
                            gadataset_file_system_dataset_factory,
                            GADATASET_TYPE_DATASET_FACTORY)
 
-#define GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(obj)  \
-  static_cast<GADatasetFileSystemDatasetFactoryPrivate *>(      \
-    gadataset_file_system_dataset_factory_get_instance_private( \
+#define GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(obj)                           \
+  static_cast<GADatasetFileSystemDatasetFactoryPrivate *>(                               \
+    gadataset_file_system_dataset_factory_get_instance_private(                          \
       GADATASET_FILE_SYSTEM_DATASET_FACTORY(obj)))
 
 static void
@@ -395,8 +394,7 @@ gadataset_file_system_dataset_factory_dispose(GObject *object)
     priv->files = NULL;
   }
 
-  G_OBJECT_CLASS(
-    gadataset_file_system_dataset_factory_parent_class)->dispose(object);
+  G_OBJECT_CLASS(gadataset_file_system_dataset_factory_parent_class)->dispose(object);
 }
 
 static void
@@ -404,8 +402,7 @@ gadataset_file_system_dataset_factory_finalize(GObject *object)
 {
   auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
   priv->options.~FileSystemFactoryOptions();
-  G_OBJECT_CLASS(
-    gadataset_file_system_dataset_factory_parent_class)->finalize(object);
+  G_OBJECT_CLASS(gadataset_file_system_dataset_factory_parent_class)->finalize(object);
 }
 
 static void
@@ -430,8 +427,7 @@ gadataset_file_system_dataset_factory_set_property(GObject *object,
       if (partitioning) {
         g_object_ref(partitioning);
         priv->partitioning = GADATASET_PARTITIONING(partitioning);
-        priv->options.partitioning =
-          gadataset_partitioning_get_raw(priv->partitioning);
+        priv->options.partitioning = gadataset_partitioning_get_raw(priv->partitioning);
       } else {
         priv->options.partitioning = arrow::dataset::Partitioning::Default();
       }
@@ -477,11 +473,10 @@ gadataset_file_system_dataset_factory_get_property(GObject *object,
 }
 
 static void
-gadataset_file_system_dataset_factory_init(
-  GADatasetFileSystemDatasetFactory *object)
+gadataset_file_system_dataset_factory_init(GADatasetFileSystemDatasetFactory *object)
 {
   auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(object);
-  new(&priv->options) arrow::dataset::FileSystemFactoryOptions;
+  new (&priv->options) arrow::dataset::FileSystemFactoryOptions;
 }
 
 static void
@@ -489,8 +484,8 @@ gadataset_file_system_dataset_factory_class_init(
   GADatasetFileSystemDatasetFactoryClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->dispose      = gadataset_file_system_dataset_factory_dispose;
-  gobject_class->finalize     = gadataset_file_system_dataset_factory_finalize;
+  gobject_class->dispose = gadataset_file_system_dataset_factory_dispose;
+  gobject_class->finalize = gadataset_file_system_dataset_factory_finalize;
   gobject_class->set_property = gadataset_file_system_dataset_factory_set_property;
   gobject_class->get_property = gadataset_file_system_dataset_factory_get_property;
 
@@ -502,12 +497,12 @@ gadataset_file_system_dataset_factory_class_init(
    *
    * Since: 5.0.0
    */
-  spec = g_param_spec_object("format",
-                             "Format",
-                             "Format passed to GADatasetFileSystemDataset",
-                             GADATASET_TYPE_FILE_FORMAT,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "format",
+    "Format",
+    "Format passed to GADatasetFileSystemDataset",
+    GADATASET_TYPE_FILE_FORMAT,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FORMAT, spec);
 
   /**
@@ -568,9 +563,7 @@ GADatasetFileSystemDatasetFactory *
 gadataset_file_system_dataset_factory_new(GADatasetFileFormat *format)
 {
   return GADATASET_FILE_SYSTEM_DATASET_FACTORY(
-    g_object_new(GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY,
-                 "format", format,
-                 NULL));
+    g_object_new(GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY, "format", format, NULL));
 }
 
 /**
@@ -592,9 +585,7 @@ gadataset_file_system_dataset_factory_set_file_system(
   const gchar *context = "[file-system-dataset-factory][set-file-system]";
   auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
   if (priv->file_system) {
-    garrow::check(error,
-                  arrow::Status::Invalid("file system is already set"),
-                  context);
+    garrow::check(error, arrow::Status::Invalid("file system is already set"), context);
     return FALSE;
   }
   priv->file_system = file_system;
@@ -614,21 +605,16 @@ gadataset_file_system_dataset_factory_set_file_system(
  */
 gboolean
 gadataset_file_system_dataset_factory_set_file_system_uri(
-  GADatasetFileSystemDatasetFactory *factory,
-  const gchar *uri,
-  GError **error)
+  GADatasetFileSystemDatasetFactory *factory, const gchar *uri, GError **error)
 {
   const gchar *context = "[file-system-dataset-factory][set-file-system-uri]";
   auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
   if (priv->file_system) {
-    garrow::check(error,
-                  arrow::Status::Invalid("file system is already set"),
-                  context);
+    garrow::check(error, arrow::Status::Invalid("file system is already set"), context);
     return FALSE;
   }
   std::string internal_path;
-  auto arrow_file_system_result =
-    arrow::fs::FileSystemFromUri(uri, &internal_path);
+  auto arrow_file_system_result = arrow::fs::FileSystemFromUri(uri, &internal_path);
   if (!garrow::check(error, arrow_file_system_result, context)) {
     return FALSE;
   }
@@ -654,17 +640,14 @@ gadataset_file_system_dataset_factory_set_file_system_uri(
  * Since: 5.0.0
  */
 gboolean
-gadataset_file_system_dataset_factory_add_path(
-  GADatasetFileSystemDatasetFactory *factory,
-  const gchar *path,
-  GError **error)
+gadataset_file_system_dataset_factory_add_path(GADatasetFileSystemDatasetFactory *factory,
+                                               const gchar *path,
+                                               GError **error)
 {
   const gchar *context = "[file-system-dataset-factory][add-path]";
   auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
   if (!priv->file_system) {
-    garrow::check(error,
-                  arrow::Status::Invalid("file system isn't set"),
-                  context);
+    garrow::check(error, arrow::Status::Invalid("file system isn't set"), context);
     return FALSE;
   }
   auto arrow_file_system = garrow_file_system_get_raw(priv->file_system);
@@ -689,25 +672,20 @@ gadataset_file_system_dataset_factory_add_path(
  * Since: 5.0.0
  */
 GADatasetFileSystemDataset *
-gadataset_file_system_dataset_factory_finish(
-  GADatasetFileSystemDatasetFactory *factory,
-  GADatasetFinishOptions *options,
-  GError **error)
+gadataset_file_system_dataset_factory_finish(GADatasetFileSystemDatasetFactory *factory,
+                                             GADatasetFinishOptions *options,
+                                             GError **error)
 {
   const gchar *context = "[file-system-dataset-factory][finish]";
   auto priv = GADATASET_FILE_SYSTEM_DATASET_FACTORY_GET_PRIVATE(factory);
   if (!priv->file_system) {
-    garrow::check(error,
-                  arrow::Status::Invalid("file system isn't set"),
-                  context);
+    garrow::check(error, arrow::Status::Invalid("file system isn't set"), context);
     return NULL;
   }
   auto arrow_file_system = garrow_file_system_get_raw(priv->file_system);
   auto arrow_format = gadataset_file_format_get_raw(priv->format);
-  arrow::Result<std::shared_ptr<arrow::dataset::DatasetFactory>>
-    arrow_factory_result;
-  if (priv->files &&
-      !priv->files->next &&
+  arrow::Result<std::shared_ptr<arrow::dataset::DatasetFactory>> arrow_factory_result;
+  if (priv->files && !priv->files->next &&
       garrow_file_info_is_dir(GARROW_FILE_INFO(priv->files->data))) {
     auto file = GARROW_FILE_INFO(priv->files->data);
     arrow::fs::FileSelector arrow_selector;
@@ -744,24 +722,25 @@ gadataset_file_system_dataset_factory_finish(
     return NULL;
   }
   auto arrow_dataset = *arrow_dataset_result;
-  return GADATASET_FILE_SYSTEM_DATASET(
-    gadataset_dataset_new_raw(&arrow_dataset,
-                              "dataset", &arrow_dataset,
-                              "file-system", priv->file_system,
-                              "format", priv->format,
-                              "partitioning", priv->partitioning,
-                              NULL));
+  return GADATASET_FILE_SYSTEM_DATASET(gadataset_dataset_new_raw(&arrow_dataset,
+                                                                 "dataset",
+                                                                 &arrow_dataset,
+                                                                 "file-system",
+                                                                 priv->file_system,
+                                                                 "format",
+                                                                 priv->format,
+                                                                 "partitioning",
+                                                                 priv->partitioning,
+                                                                 NULL));
 }
 
-
 G_END_DECLS
 
 GADatasetFinishOptions *
 gadataset_finish_options_new_raw(arrow::dataset::FinishOptions *options)
 {
-  return GADATASET_FINISH_OPTIONS(g_object_new(GADATASET_TYPE_FINISH_OPTIONS,
-                                               "finish-options", options,
-                                               NULL));
+  return GADATASET_FINISH_OPTIONS(
+    g_object_new(GADATASET_TYPE_FINISH_OPTIONS, "finish-options", options, NULL));
 }
 
 arrow::dataset::FinishOptions *
diff --git a/c_glib/arrow-dataset-glib/dataset-factory.h b/c_glib/arrow-dataset-glib/dataset-factory.h
index ce15babba4ac1..1dc875837fe21 100644
--- a/c_glib/arrow-dataset-glib/dataset-factory.h
+++ b/c_glib/arrow-dataset-glib/dataset-factory.h
@@ -24,11 +24,8 @@
 G_BEGIN_DECLS
 
 #define GADATASET_TYPE_FINISH_OPTIONS (gadataset_finish_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADatasetFinishOptions,
-                         gadataset_finish_options,
-                         GADATASET,
-                         FINISH_OPTIONS,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GADatasetFinishOptions, gadataset_finish_options, GADATASET, FINISH_OPTIONS, GObject)
 struct _GADatasetFinishOptionsClass
 {
   GObjectClass parent_class;
@@ -39,11 +36,8 @@ GADatasetFinishOptions *
 gadataset_finish_options_new(void);
 
 #define GADATASET_TYPE_DATASET_FACTORY (gadataset_dataset_factory_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADatasetDatasetFactory,
-                         gadataset_dataset_factory,
-                         GADATASET,
-                         DATASET_FACTORY,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GADatasetDatasetFactory, gadataset_dataset_factory, GADATASET, DATASET_FACTORY, GObject)
 struct _GADatasetDatasetFactoryClass
 {
   GObjectClass parent_class;
@@ -55,8 +49,7 @@ gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory,
                                  GADatasetFinishOptions *options,
                                  GError **error);
 
-
-#define GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY      \
+#define GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY                                       \
   (gadataset_file_system_dataset_factory_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetFactory,
                          gadataset_file_system_dataset_factory,
@@ -79,16 +72,13 @@ gadataset_file_system_dataset_factory_set_file_system(
   GError **error);
 gboolean
 gadataset_file_system_dataset_factory_set_file_system_uri(
-  GADatasetFileSystemDatasetFactory *factory,
-  const gchar *uri,
-  GError **error);
+  GADatasetFileSystemDatasetFactory *factory, const gchar *uri, GError **error);
 
 GARROW_AVAILABLE_IN_5_0
 gboolean
-gadataset_file_system_dataset_factory_add_path(
-  GADatasetFileSystemDatasetFactory *factory,
-  const gchar *path,
-  GError **error);
+gadataset_file_system_dataset_factory_add_path(GADatasetFileSystemDatasetFactory *factory,
+                                               const gchar *path,
+                                               GError **error);
 /*
 GARROW_AVAILABLE_IN_5_0
 gboolean
@@ -106,10 +96,8 @@ gadataset_file_system_dataset_factory_add_selector(
 
 GARROW_AVAILABLE_IN_5_0
 GADatasetFileSystemDataset *
-gadataset_file_system_dataset_factory_finish(
-  GADatasetFileSystemDatasetFactory *factory,
-  GADatasetFinishOptions *options,
-  GError **error);
-
+gadataset_file_system_dataset_factory_finish(GADatasetFileSystemDatasetFactory *factory,
+                                             GADatasetFinishOptions *options,
+                                             GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/dataset.cpp b/c_glib/arrow-dataset-glib/dataset.cpp
index 8613bedad4295..704d6b589ee94 100644
--- a/c_glib/arrow-dataset-glib/dataset.cpp
+++ b/c_glib/arrow-dataset-glib/dataset.cpp
@@ -45,7 +45,8 @@ G_BEGIN_DECLS
  * Since: 5.0.0
  */
 
-typedef struct GADatasetDatasetPrivate_ {
+typedef struct GADatasetDatasetPrivate_
+{
   std::shared_ptr<arrow::dataset::Dataset> dataset;
 } GADatasetDatasetPrivate;
 
@@ -53,14 +54,11 @@ enum {
   PROP_DATASET = 1,
 };
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetDataset,
-                                    gadataset_dataset,
-                                    G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetDataset, gadataset_dataset, G_TYPE_OBJECT)
 
-#define GADATASET_DATASET_GET_PRIVATE(obj)         \
-  static_cast<GADatasetDatasetPrivate *>(          \
-    gadataset_dataset_get_instance_private(        \
-      GADATASET_DATASET(obj)))
+#define GADATASET_DATASET_GET_PRIVATE(obj)                                               \
+  static_cast<GADatasetDatasetPrivate *>(                                                \
+    gadataset_dataset_get_instance_private(GADATASET_DATASET(obj)))
 
 static void
 gadataset_dataset_finalize(GObject *object)
@@ -80,9 +78,8 @@ gadataset_dataset_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_DATASET:
-    priv->dataset =
-      *static_cast<std::shared_ptr<arrow::dataset::Dataset> *>(
-        g_value_get_pointer(value));
+    priv->dataset = *static_cast<std::shared_ptr<arrow::dataset::Dataset> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -94,23 +91,23 @@ static void
 gadataset_dataset_init(GADatasetDataset *object)
 {
   auto priv = GADATASET_DATASET_GET_PRIVATE(object);
-  new(&priv->dataset) std::shared_ptr<arrow::dataset::Dataset>;
+  new (&priv->dataset) std::shared_ptr<arrow::dataset::Dataset>;
 }
 
 static void
 gadataset_dataset_class_init(GADatasetDatasetClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->finalize     = gadataset_dataset_finalize;
+  gobject_class->finalize = gadataset_dataset_finalize;
   gobject_class->set_property = gadataset_dataset_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("dataset",
-                              "Dataset",
-                              "The raw "
-                              "std::shared<arrow::dataset::Dataset> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "dataset",
+    "Dataset",
+    "The raw "
+    "std::shared<arrow::dataset::Dataset> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_DATASET, spec);
 }
 
@@ -125,8 +122,7 @@ gadataset_dataset_class_init(GADatasetDatasetClass *klass)
  * Since: 5.0.0
  */
 GADatasetScannerBuilder *
-gadataset_dataset_begin_scan(GADatasetDataset *dataset,
-                             GError **error)
+gadataset_dataset_begin_scan(GADatasetDataset *dataset, GError **error)
 {
   return gadataset_scanner_builder_new(dataset, error);
 }
@@ -142,28 +138,21 @@ gadataset_dataset_begin_scan(GADatasetDataset *dataset,
  * Since: 5.0.0
  */
 GArrowTable *
-gadataset_dataset_to_table(GADatasetDataset *dataset,
-                           GError **error)
+gadataset_dataset_to_table(GADatasetDataset *dataset, GError **error)
 {
   auto arrow_dataset = gadataset_dataset_get_raw(dataset);
   auto arrow_scanner_builder_result = arrow_dataset->NewScan();
-  if (!garrow::check(error,
-                     arrow_scanner_builder_result,
-                     "[dataset][to-table]")) {
+  if (!garrow::check(error, arrow_scanner_builder_result, "[dataset][to-table]")) {
     return NULL;
   }
   auto arrow_scanner_builder = *arrow_scanner_builder_result;
   auto arrow_scanner_result = arrow_scanner_builder->Finish();
-  if (!garrow::check(error,
-                     arrow_scanner_result,
-                     "[dataset][to-table]")) {
+  if (!garrow::check(error, arrow_scanner_result, "[dataset][to-table]")) {
     return NULL;
   }
   auto arrow_scanner = *arrow_scanner_result;
   auto arrow_table_result = arrow_scanner->ToTable();
-  if (!garrow::check(error,
-                     arrow_scanner_result,
-                     "[dataset][to-table]")) {
+  if (!garrow::check(error, arrow_scanner_result, "[dataset][to-table]")) {
     return NULL;
   }
   return garrow_table_new_raw(&(*arrow_table_result));
@@ -187,8 +176,8 @@ gadataset_dataset_get_type_name(GADatasetDataset *dataset)
   return g_strndup(type_name.data(), type_name.size());
 }
 
-
-typedef struct GADatasetFileSystemDatasetWriteOptionsPrivate_ {
+typedef struct GADatasetFileSystemDatasetWriteOptionsPrivate_
+{
   arrow::dataset::FileSystemDatasetWriteOptions options;
   GADatasetFileWriteOptions *file_write_options;
   GArrowFileSystem *file_system;
@@ -208,9 +197,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDatasetWriteOptions,
                            gadataset_file_system_dataset_write_options,
                            G_TYPE_OBJECT)
 
-#define GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(obj)    \
-  static_cast<GADatasetFileSystemDatasetWriteOptionsPrivate *>(         \
-    gadataset_file_system_dataset_write_options_get_instance_private(   \
+#define GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(obj)                     \
+  static_cast<GADatasetFileSystemDatasetWriteOptionsPrivate *>(                          \
+    gadataset_file_system_dataset_write_options_get_instance_private(                    \
       GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS(obj)))
 
 static void
@@ -218,8 +207,8 @@ gadataset_file_system_dataset_write_options_finalize(GObject *object)
 {
   auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(object);
   priv->options.~FileSystemDatasetWriteOptions();
-  G_OBJECT_CLASS(gadataset_file_system_dataset_write_options_parent_class)->
-    finalize(object);
+  G_OBJECT_CLASS(gadataset_file_system_dataset_write_options_parent_class)
+    ->finalize(object);
 }
 
 static void
@@ -242,8 +231,8 @@ gadataset_file_system_dataset_write_options_dispose(GObject *object)
     priv->partitioning = NULL;
   }
 
-  G_OBJECT_CLASS(gadataset_file_system_dataset_write_options_parent_class)->
-    dispose(object);
+  G_OBJECT_CLASS(gadataset_file_system_dataset_write_options_parent_class)
+    ->dispose(object);
 }
 
 static void
@@ -264,8 +253,7 @@ gadataset_file_system_dataset_write_options_set_property(GObject *object,
       auto old_file_write_options = priv->file_write_options;
       if (file_write_options) {
         g_object_ref(file_write_options);
-        priv->file_write_options =
-          GADATASET_FILE_WRITE_OPTIONS(file_write_options);
+        priv->file_write_options = GADATASET_FILE_WRITE_OPTIONS(file_write_options);
         priv->options.file_write_options =
           gadataset_file_write_options_get_raw(priv->file_write_options);
       } else {
@@ -308,8 +296,7 @@ gadataset_file_system_dataset_write_options_set_property(GObject *object,
       if (partitioning) {
         g_object_ref(partitioning);
         priv->partitioning = GADATASET_PARTITIONING(partitioning);
-        priv->options.partitioning =
-          gadataset_partitioning_get_raw(priv->partitioning);
+        priv->options.partitioning = gadataset_partitioning_get_raw(priv->partitioning);
       } else {
         priv->options.partitioning = arrow::dataset::Partitioning::Default();
       }
@@ -368,7 +355,7 @@ gadataset_file_system_dataset_write_options_init(
   GADatasetFileSystemDatasetWriteOptions *object)
 {
   auto priv = GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS_GET_PRIVATE(object);
-  new(&(priv->options)) arrow::dataset::FileSystemDatasetWriteOptions;
+  new (&(priv->options)) arrow::dataset::FileSystemDatasetWriteOptions;
   priv->options.partitioning = arrow::dataset::Partitioning::Default();
 }
 
@@ -377,14 +364,10 @@ gadataset_file_system_dataset_write_options_class_init(
   GADatasetFileSystemDatasetWriteOptionsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->finalize =
-    gadataset_file_system_dataset_write_options_finalize;
-  gobject_class->dispose =
-    gadataset_file_system_dataset_write_options_dispose;
-  gobject_class->set_property =
-    gadataset_file_system_dataset_write_options_set_property;
-  gobject_class->get_property =
-    gadataset_file_system_dataset_write_options_get_property;
+  gobject_class->finalize = gadataset_file_system_dataset_write_options_finalize;
+  gobject_class->dispose = gadataset_file_system_dataset_write_options_dispose;
+  gobject_class->set_property = gadataset_file_system_dataset_write_options_set_property;
+  gobject_class->get_property = gadataset_file_system_dataset_write_options_get_property;
 
   arrow::dataset::FileSystemDatasetWriteOptions default_options;
   GParamSpec *spec;
@@ -493,12 +476,11 @@ GADatasetFileSystemDatasetWriteOptions *
 gadataset_file_system_dataset_write_options_new(void)
 {
   return GADATASET_FILE_SYSTEM_DATASET_WRITE_OPTIONS(
-    g_object_new(GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS,
-                 NULL));
+    g_object_new(GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS, NULL));
 }
 
-
-typedef struct GADatasetFileSystemDatasetPrivate_ {
+typedef struct GADatasetFileSystemDatasetPrivate_
+{
   GADatasetFileFormat *format;
   GArrowFileSystem *file_system;
   GADatasetPartitioning *partitioning;
@@ -514,9 +496,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileSystemDataset,
                            gadataset_file_system_dataset,
                            GADATASET_TYPE_DATASET)
 
-#define GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(obj)   \
-  static_cast<GADatasetFileSystemDatasetPrivate *>(      \
-    gadataset_file_system_dataset_get_instance_private(  \
+#define GADATASET_FILE_SYSTEM_DATASET_GET_PRIVATE(obj)                                   \
+  static_cast<GADatasetFileSystemDatasetPrivate *>(                                      \
+    gadataset_file_system_dataset_get_instance_private(                                  \
       GADATASET_FILE_SYSTEM_DATASET(obj)))
 
 static void
@@ -594,7 +576,7 @@ static void
 gadataset_file_system_dataset_class_init(GADatasetFileSystemDatasetClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->dispose      = gadataset_file_system_dataset_dispose;
+  gobject_class->dispose = gadataset_file_system_dataset_dispose;
   gobject_class->set_property = gadataset_file_system_dataset_set_property;
   gobject_class->get_property = gadataset_file_system_dataset_get_property;
 
@@ -606,15 +588,13 @@ gadataset_file_system_dataset_class_init(GADatasetFileSystemDatasetClass *klass)
    *
    * Since: 5.0.0
    */
-  spec = g_param_spec_object("format",
-                             "Format",
-                             "Format of the dataset",
-                             GADATASET_TYPE_FILE_FORMAT,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class,
-                                  PROP_FILE_SYSTEM_DATASET_FORMAT,
-                                  spec);
+  spec = g_param_spec_object(
+    "format",
+    "Format",
+    "Format of the dataset",
+    GADATASET_TYPE_FILE_FORMAT,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_FILE_SYSTEM_DATASET_FORMAT, spec);
 
   /**
    * GADatasetFileSystemDataset:file-system:
@@ -623,12 +603,12 @@ gadataset_file_system_dataset_class_init(GADatasetFileSystemDatasetClass *klass)
    *
    * Since: 5.0.0
    */
-  spec = g_param_spec_object("file-system",
-                             "File system",
-                             "File system of the dataset",
-                             GARROW_TYPE_FILE_SYSTEM,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "file-system",
+    "File system",
+    "File system of the dataset",
+    GARROW_TYPE_FILE_SYSTEM,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class,
                                   PROP_FILE_SYSTEM_DATASET_FILE_SYSTEM,
                                   spec);
@@ -640,12 +620,12 @@ gadataset_file_system_dataset_class_init(GADatasetFileSystemDatasetClass *klass)
    *
    * Since: 6.0.0
    */
-  spec = g_param_spec_object("partitioning",
-                             "Partitioning",
-                             "Partitioning of the dataset",
-                             GADATASET_TYPE_PARTITIONING,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "partitioning",
+    "Partitioning",
+    "Partitioning of the dataset",
+    GADATASET_TYPE_PARTITIONING,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class,
                                   PROP_FILE_SYSTEM_DATASET_PARTITIONING,
                                   spec);
@@ -668,56 +648,42 @@ gadataset_file_system_dataset_write_scanner(
   GError **error)
 {
   auto arrow_scanner = gadataset_scanner_get_raw(scanner);
-  auto arrow_options =
-    gadataset_file_system_dataset_write_options_get_raw(options);
-  auto status =
-    arrow::dataset::FileSystemDataset::Write(*arrow_options, arrow_scanner);
-  return garrow::check(error,
-                       status,
-                       "[file-system-dataset][write-scanner]");
+  auto arrow_options = gadataset_file_system_dataset_write_options_get_raw(options);
+  auto status = arrow::dataset::FileSystemDataset::Write(*arrow_options, arrow_scanner);
+  return garrow::check(error, status, "[file-system-dataset][write-scanner]");
 }
 
-
 G_END_DECLS
 
 GADatasetDataset *
-gadataset_dataset_new_raw(
-  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset)
+gadataset_dataset_new_raw(std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset)
 {
-  return gadataset_dataset_new_raw(arrow_dataset,
-                                   "dataset", arrow_dataset,
-                                   NULL);
+  return gadataset_dataset_new_raw(arrow_dataset, "dataset", arrow_dataset, NULL);
 }
 
 GADatasetDataset *
-gadataset_dataset_new_raw(
-  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
-  const gchar *first_property_name,
-  ...)
+gadataset_dataset_new_raw(std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+                          const gchar *first_property_name,
+                          ...)
 {
   va_list args;
   va_start(args, first_property_name);
-  auto array = gadataset_dataset_new_raw_valist(arrow_dataset,
-                                                first_property_name,
-                                                args);
+  auto array = gadataset_dataset_new_raw_valist(arrow_dataset, first_property_name, args);
   va_end(args);
   return array;
 }
 
 GADatasetDataset *
-gadataset_dataset_new_raw_valist(
-  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
-  const gchar *first_property_name,
-  va_list args)
+gadataset_dataset_new_raw_valist(std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+                                 const gchar *first_property_name,
+                                 va_list args)
 {
   GType type = GADATASET_TYPE_DATASET;
   const auto type_name = (*arrow_dataset)->type_name();
   if (type_name == "filesystem") {
     type = GADATASET_TYPE_FILE_SYSTEM_DATASET;
   }
-  return GADATASET_DATASET(g_object_new_valist(type,
-                                               first_property_name,
-                                               args));
+  return GADATASET_DATASET(g_object_new_valist(type, first_property_name, args));
 }
 
 std::shared_ptr<arrow::dataset::Dataset>
diff --git a/c_glib/arrow-dataset-glib/dataset.h b/c_glib/arrow-dataset-glib/dataset.h
index 6c003ca82c804..57f6c7729f073 100644
--- a/c_glib/arrow-dataset-glib/dataset.h
+++ b/c_glib/arrow-dataset-glib/dataset.h
@@ -27,18 +27,15 @@ G_BEGIN_DECLS
 
 GARROW_AVAILABLE_IN_5_0
 GADatasetScannerBuilder *
-gadataset_dataset_begin_scan(GADatasetDataset *dataset,
-                             GError **error);
+gadataset_dataset_begin_scan(GADatasetDataset *dataset, GError **error);
 GARROW_AVAILABLE_IN_5_0
 GArrowTable *
-gadataset_dataset_to_table(GADatasetDataset *dataset,
-                           GError **error);
+gadataset_dataset_to_table(GADatasetDataset *dataset, GError **error);
 GARROW_AVAILABLE_IN_5_0
 gchar *
 gadataset_dataset_get_type_name(GADatasetDataset *dataset);
 
-
-#define GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS        \
+#define GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS                                 \
   (gadataset_file_system_dataset_write_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetWriteOptions,
                          gadataset_file_system_dataset_write_options,
@@ -54,9 +51,7 @@ GARROW_AVAILABLE_IN_6_0
 GADatasetFileSystemDatasetWriteOptions *
 gadataset_file_system_dataset_write_options_new(void);
 
-
-#define GADATASET_TYPE_FILE_SYSTEM_DATASET      \
-  (gadataset_file_system_dataset_get_type())
+#define GADATASET_TYPE_FILE_SYSTEM_DATASET (gadataset_file_system_dataset_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDataset,
                          gadataset_file_system_dataset,
                          GADATASET,
@@ -74,5 +69,4 @@ gadataset_file_system_dataset_write_scanner(
   GADatasetFileSystemDatasetWriteOptions *options,
   GError **error);
 
-
 G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/dataset.hpp b/c_glib/arrow-dataset-glib/dataset.hpp
index 1dab391e8af14..30d2f45cba72a 100644
--- a/c_glib/arrow-dataset-glib/dataset.hpp
+++ b/c_glib/arrow-dataset-glib/dataset.hpp
@@ -23,24 +23,19 @@
 
 #include <arrow-dataset-glib/dataset.h>
 
-
 GADatasetDataset *
-gadataset_dataset_new_raw(
-  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset);
+gadataset_dataset_new_raw(std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset);
 GADatasetDataset *
-gadataset_dataset_new_raw(
-  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
-  const gchar *first_property_name,
-  ...);
+gadataset_dataset_new_raw(std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+                          const gchar *first_property_name,
+                          ...);
 GADatasetDataset *
-gadataset_dataset_new_raw_valist(
-  std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
-  const gchar *first_property_name,
-  va_list arg);
+gadataset_dataset_new_raw_valist(std::shared_ptr<arrow::dataset::Dataset> *arrow_dataset,
+                                 const gchar *first_property_name,
+                                 va_list arg);
 std::shared_ptr<arrow::dataset::Dataset>
 gadataset_dataset_get_raw(GADatasetDataset *dataset);
 
-
 arrow::dataset::FileSystemDatasetWriteOptions *
 gadataset_file_system_dataset_write_options_get_raw(
   GADatasetFileSystemDatasetWriteOptions *options);
diff --git a/c_glib/arrow-dataset-glib/file-format.cpp b/c_glib/arrow-dataset-glib/file-format.cpp
index db0eb064ebabe..121c6511f6a68 100644
--- a/c_glib/arrow-dataset-glib/file-format.cpp
+++ b/c_glib/arrow-dataset-glib/file-format.cpp
@@ -50,7 +50,8 @@ G_BEGIN_DECLS
  * Since: 3.0.0
  */
 
-typedef struct GADatasetFileWriteOptionsPrivate_ {
+typedef struct GADatasetFileWriteOptionsPrivate_
+{
   std::shared_ptr<arrow::dataset::FileWriteOptions> options;
 } GADatasetFileWriteOptionsPrivate;
 
@@ -62,9 +63,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileWriteOptions,
                            gadataset_file_write_options,
                            G_TYPE_OBJECT)
 
-#define GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(obj)       \
-  static_cast<GADatasetFileWriteOptionsPrivate *>(          \
-    gadataset_file_write_options_get_instance_private(      \
+#define GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(obj)                                    \
+  static_cast<GADatasetFileWriteOptionsPrivate *>(                                       \
+    gadataset_file_write_options_get_instance_private(                                   \
       GADATASET_FILE_WRITE_OPTIONS(obj)))
 
 static void
@@ -85,9 +86,8 @@ gadataset_file_write_options_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_OPTIONS:
-    priv->options =
-      *static_cast<std::shared_ptr<arrow::dataset::FileWriteOptions> *>(
-        g_value_get_pointer(value));
+    priv->options = *static_cast<std::shared_ptr<arrow::dataset::FileWriteOptions> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -99,7 +99,7 @@ static void
 gadataset_file_write_options_init(GADatasetFileWriteOptions *object)
 {
   auto priv = GADATASET_FILE_WRITE_OPTIONS_GET_PRIVATE(object);
-  new(&priv->options) std::shared_ptr<arrow::dataset::FileWriteOptions>;
+  new (&priv->options) std::shared_ptr<arrow::dataset::FileWriteOptions>;
 }
 
 static void
@@ -107,21 +107,21 @@ gadataset_file_write_options_class_init(GADatasetFileWriteOptionsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gadataset_file_write_options_finalize;
+  gobject_class->finalize = gadataset_file_write_options_finalize;
   gobject_class->set_property = gadataset_file_write_options_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("options",
-                              "Options",
-                              "The raw "
-                              "std::shared<arrow::dataset::FileWriteOptions> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "options",
+    "Options",
+    "The raw "
+    "std::shared<arrow::dataset::FileWriteOptions> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_OPTIONS, spec);
 }
 
-
-typedef struct GADatasetFileWriterPrivate_ {
+typedef struct GADatasetFileWriterPrivate_
+{
   std::shared_ptr<arrow::dataset::FileWriter> writer;
 } GADatasetFileWriterPrivate;
 
@@ -129,14 +129,11 @@ enum {
   PROP_WRITER = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileWriter,
-                           gadataset_file_writer,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileWriter, gadataset_file_writer, G_TYPE_OBJECT)
 
-#define GADATASET_FILE_WRITER_GET_PRIVATE(obj)              \
-  static_cast<GADatasetFileWriterPrivate *>(                \
-    gadataset_file_writer_get_instance_private(             \
-      GADATASET_FILE_WRITER(obj)))
+#define GADATASET_FILE_WRITER_GET_PRIVATE(obj)                                           \
+  static_cast<GADatasetFileWriterPrivate *>(                                             \
+    gadataset_file_writer_get_instance_private(GADATASET_FILE_WRITER(obj)))
 
 static void
 gadataset_file_writer_finalize(GObject *object)
@@ -156,9 +153,8 @@ gadataset_file_writer_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_WRITER:
-    priv->writer =
-      *static_cast<std::shared_ptr<arrow::dataset::FileWriter> *>(
-        g_value_get_pointer(value));
+    priv->writer = *static_cast<std::shared_ptr<arrow::dataset::FileWriter> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -170,7 +166,7 @@ static void
 gadataset_file_writer_init(GADatasetFileWriter *object)
 {
   auto priv = GADATASET_FILE_WRITER_GET_PRIVATE(object);
-  new(&(priv->writer)) std::shared_ptr<arrow::dataset::FileWriter>;
+  new (&(priv->writer)) std::shared_ptr<arrow::dataset::FileWriter>;
 }
 
 static void
@@ -178,16 +174,16 @@ gadataset_file_writer_class_init(GADatasetFileWriterClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gadataset_file_writer_finalize;
+  gobject_class->finalize = gadataset_file_writer_finalize;
   gobject_class->set_property = gadataset_file_writer_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("writer",
-                              "Writer",
-                              "The raw "
-                              "std::shared<arrow::dataset::FileWriter> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "writer",
+    "Writer",
+    "The raw "
+    "std::shared<arrow::dataset::FileWriter> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_WRITER, spec);
 }
 
@@ -230,9 +226,7 @@ gadataset_file_writer_write_record_batch_reader(GADatasetFileWriter *writer,
   const auto arrow_writer = gadataset_file_writer_get_raw(writer);
   auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
   auto status = arrow_writer->Write(arrow_reader.get());
-  return garrow::check(error,
-                       status,
-                       "[file-writer][write-record-batch-reader]");
+  return garrow::check(error, status, "[file-writer][write-record-batch-reader]");
 }
 
 /**
@@ -245,18 +239,15 @@ gadataset_file_writer_write_record_batch_reader(GADatasetFileWriter *writer,
  * Since: 6.0.0
  */
 gboolean
-gadataset_file_writer_finish(GADatasetFileWriter *writer,
-                             GError **error)
+gadataset_file_writer_finish(GADatasetFileWriter *writer, GError **error)
 {
   const auto arrow_writer = gadataset_file_writer_get_raw(writer);
   auto status = arrow_writer->Finish().status();
-  return garrow::check(error,
-                       status,
-                       "[file-writer][finish]");
+  return garrow::check(error, status, "[file-writer][finish]");
 }
 
-
-typedef struct GADatasetFileFormatPrivate_ {
+typedef struct GADatasetFileFormatPrivate_
+{
   std::shared_ptr<arrow::dataset::FileFormat> format;
 } GADatasetFileFormatPrivate;
 
@@ -264,14 +255,11 @@ enum {
   PROP_FORMAT = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileFormat,
-                           gadataset_file_format,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetFileFormat, gadataset_file_format, G_TYPE_OBJECT)
 
-#define GADATASET_FILE_FORMAT_GET_PRIVATE(obj)        \
-  static_cast<GADatasetFileFormatPrivate *>(          \
-    gadataset_file_format_get_instance_private(       \
-      GADATASET_FILE_FORMAT(obj)))
+#define GADATASET_FILE_FORMAT_GET_PRIVATE(obj)                                           \
+  static_cast<GADatasetFileFormatPrivate *>(                                             \
+    gadataset_file_format_get_instance_private(GADATASET_FILE_FORMAT(obj)))
 
 static void
 gadataset_file_format_finalize(GObject *object)
@@ -291,9 +279,8 @@ gadataset_file_format_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_FORMAT:
-    priv->format =
-      *static_cast<std::shared_ptr<arrow::dataset::FileFormat> *>(
-        g_value_get_pointer(value));
+    priv->format = *static_cast<std::shared_ptr<arrow::dataset::FileFormat> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -305,7 +292,7 @@ static void
 gadataset_file_format_init(GADatasetFileFormat *object)
 {
   auto priv = GADATASET_FILE_FORMAT_GET_PRIVATE(object);
-  new(&priv->format) std::shared_ptr<arrow::dataset::FileFormat>;
+  new (&priv->format) std::shared_ptr<arrow::dataset::FileFormat>;
 }
 
 static void
@@ -313,15 +300,15 @@ gadataset_file_format_class_init(GADatasetFileFormatClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gadataset_file_format_finalize;
+  gobject_class->finalize = gadataset_file_format_finalize;
   gobject_class->set_property = gadataset_file_format_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("format",
-                              "Format",
-                              "The raw std::shared<arrow::dataset::FileFormat> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "format",
+    "Format",
+    "The raw std::shared<arrow::dataset::FileFormat> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FORMAT, spec);
 }
 
@@ -388,11 +375,10 @@ gadataset_file_format_open_writer(GADatasetFileFormat *format,
   auto arrow_file_system = garrow_file_system_get_raw(file_system);
   auto arrow_schema = garrow_schema_get_raw(schema);
   auto arrow_options = gadataset_file_write_options_get_raw(options);
-  auto arrow_writer_result =
-    arrow_format->MakeWriter(arrow_destination,
-                             arrow_schema,
-                             arrow_options,
-                             {arrow_file_system, path});
+  auto arrow_writer_result = arrow_format->MakeWriter(arrow_destination,
+                                                      arrow_schema,
+                                                      arrow_options,
+                                                      {arrow_file_system, path});
   if (garrow::check(error, arrow_writer_result, "[file-format][open-writer]")) {
     auto arrow_writer = *arrow_writer_result;
     return gadataset_file_writer_new_raw(&arrow_writer);
@@ -419,7 +405,6 @@ gadataset_file_format_equal(GADatasetFileFormat *format,
   return arrow_format->Equals(*arrow_other_format);
 }
 
-
 G_DEFINE_TYPE(GADatasetCSVFileFormat,
               gadataset_csv_file_format,
               GADATASET_TYPE_FILE_FORMAT)
@@ -449,7 +434,6 @@ gadataset_csv_file_format_new(void)
   return GADATASET_CSV_FILE_FORMAT(gadataset_file_format_new_raw(&arrow_format));
 }
 
-
 G_DEFINE_TYPE(GADatasetIPCFileFormat,
               gadataset_ipc_file_format,
               GADATASET_TYPE_FILE_FORMAT)
@@ -479,7 +463,6 @@ gadataset_ipc_file_format_new(void)
   return GADATASET_IPC_FILE_FORMAT(gadataset_file_format_new_raw(&arrow_format));
 }
 
-
 G_DEFINE_TYPE(GADatasetParquetFileFormat,
               gadataset_parquet_file_format,
               GADATASET_TYPE_FILE_FORMAT)
@@ -506,11 +489,9 @@ gadataset_parquet_file_format_new(void)
 {
   std::shared_ptr<arrow::dataset::FileFormat> arrow_format =
     std::make_shared<arrow::dataset::ParquetFileFormat>();
-  return GADATASET_PARQUET_FILE_FORMAT(
-    gadataset_file_format_new_raw(&arrow_format));
+  return GADATASET_PARQUET_FILE_FORMAT(gadataset_file_format_new_raw(&arrow_format));
 }
 
-
 G_END_DECLS
 
 GADatasetFileWriteOptions *
@@ -518,9 +499,7 @@ gadataset_file_write_options_new_raw(
   std::shared_ptr<arrow::dataset::FileWriteOptions> *arrow_options)
 {
   return GADATASET_FILE_WRITE_OPTIONS(
-    g_object_new(GADATASET_TYPE_FILE_WRITE_OPTIONS,
-                 "options", arrow_options,
-                 NULL));
+    g_object_new(GADATASET_TYPE_FILE_WRITE_OPTIONS, "options", arrow_options, NULL));
 }
 
 std::shared_ptr<arrow::dataset::FileWriteOptions>
@@ -530,14 +509,11 @@ gadataset_file_write_options_get_raw(GADatasetFileWriteOptions *options)
   return priv->options;
 }
 
-
 GADatasetFileWriter *
-gadataset_file_writer_new_raw(
-  std::shared_ptr<arrow::dataset::FileWriter> *arrow_writer)
+gadataset_file_writer_new_raw(std::shared_ptr<arrow::dataset::FileWriter> *arrow_writer)
 {
-  return GADATASET_FILE_WRITER(g_object_new(GADATASET_TYPE_FILE_WRITER,
-                                            "writer", arrow_writer,
-                                            NULL));
+  return GADATASET_FILE_WRITER(
+    g_object_new(GADATASET_TYPE_FILE_WRITER, "writer", arrow_writer, NULL));
 }
 
 std::shared_ptr<arrow::dataset::FileWriter>
@@ -547,10 +523,8 @@ gadataset_file_writer_get_raw(GADatasetFileWriter *writer)
   return priv->writer;
 }
 
-
 GADatasetFileFormat *
-gadataset_file_format_new_raw(
-  std::shared_ptr<arrow::dataset::FileFormat> *arrow_format)
+gadataset_file_format_new_raw(std::shared_ptr<arrow::dataset::FileFormat> *arrow_format)
 {
   GType type = GADATASET_TYPE_FILE_FORMAT;
   const auto &type_name = (*arrow_format)->type_name();
@@ -561,9 +535,7 @@ gadataset_file_format_new_raw(
   } else if (type_name == "parquet") {
     type = GADATASET_TYPE_PARQUET_FILE_FORMAT;
   }
-  return GADATASET_FILE_FORMAT(g_object_new(type,
-                                            "format", arrow_format,
-                                            NULL));
+  return GADATASET_FILE_FORMAT(g_object_new(type, "format", arrow_format, NULL));
 }
 
 std::shared_ptr<arrow::dataset::FileFormat>
diff --git a/c_glib/arrow-dataset-glib/file-format.h b/c_glib/arrow-dataset-glib/file-format.h
index 16a8340747ce4..29487e59d70dd 100644
--- a/c_glib/arrow-dataset-glib/file-format.h
+++ b/c_glib/arrow-dataset-glib/file-format.h
@@ -23,8 +23,7 @@
 
 G_BEGIN_DECLS
 
-#define GADATASET_TYPE_FILE_WRITE_OPTIONS       \
-  (gadataset_file_write_options_get_type())
+#define GADATASET_TYPE_FILE_WRITE_OPTIONS (gadataset_file_write_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetFileWriteOptions,
                          gadataset_file_write_options,
                          GADATASET,
@@ -35,14 +34,9 @@ struct _GADatasetFileWriteOptionsClass
   GObjectClass parent_class;
 };
 
-
-#define GADATASET_TYPE_FILE_WRITER              \
-  (gadataset_file_writer_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADatasetFileWriter,
-                         gadataset_file_writer,
-                         GADATASET,
-                         FILE_WRITER,
-                         GObject)
+#define GADATASET_TYPE_FILE_WRITER (gadataset_file_writer_get_type())
+G_DECLARE_DERIVABLE_TYPE(
+  GADatasetFileWriter, gadataset_file_writer, GADATASET, FILE_WRITER, GObject)
 struct _GADatasetFileWriterClass
 {
   GObjectClass parent_class;
@@ -60,16 +54,11 @@ gadataset_file_writer_write_record_batch_reader(GADatasetFileWriter *writer,
                                                 GError **error);
 GARROW_AVAILABLE_IN_6_0
 gboolean
-gadataset_file_writer_finish(GADatasetFileWriter *writer,
-                             GError **error);
-
+gadataset_file_writer_finish(GADatasetFileWriter *writer, GError **error);
 
 #define GADATASET_TYPE_FILE_FORMAT (gadataset_file_format_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADatasetFileFormat,
-                         gadataset_file_format,
-                         GADATASET,
-                         FILE_FORMAT,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GADatasetFileFormat, gadataset_file_format, GADATASET, FILE_FORMAT, GObject)
 struct _GADatasetFileFormatClass
 {
   GObjectClass parent_class;
@@ -96,7 +85,6 @@ gboolean
 gadataset_file_format_equal(GADatasetFileFormat *format,
                             GADatasetFileFormat *other_format);
 
-
 #define GADATASET_TYPE_CSV_FILE_FORMAT (gadataset_csv_file_format_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetCSVFileFormat,
                          gadataset_csv_file_format,
@@ -109,8 +97,8 @@ struct _GADatasetCSVFileFormatClass
 };
 
 GARROW_AVAILABLE_IN_3_0
-GADatasetCSVFileFormat *gadataset_csv_file_format_new(void);
-
+GADatasetCSVFileFormat *
+gadataset_csv_file_format_new(void);
 
 #define GADATASET_TYPE_IPC_FILE_FORMAT (gadataset_ipc_file_format_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetIPCFileFormat,
@@ -124,11 +112,10 @@ struct _GADatasetIPCFileFormatClass
 };
 
 GARROW_AVAILABLE_IN_3_0
-GADatasetIPCFileFormat *gadataset_ipc_file_format_new(void);
+GADatasetIPCFileFormat *
+gadataset_ipc_file_format_new(void);
 
-
-#define GADATASET_TYPE_PARQUET_FILE_FORMAT      \
-  (gadataset_parquet_file_format_get_type())
+#define GADATASET_TYPE_PARQUET_FILE_FORMAT (gadataset_parquet_file_format_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetParquetFileFormat,
                          gadataset_parquet_file_format,
                          GADATASET,
@@ -140,7 +127,7 @@ struct _GADatasetParquetFileFormatClass
 };
 
 GARROW_AVAILABLE_IN_3_0
-GADatasetParquetFileFormat *gadataset_parquet_file_format_new(void);
-
+GADatasetParquetFileFormat *
+gadataset_parquet_file_format_new(void);
 
 G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/file-format.hpp b/c_glib/arrow-dataset-glib/file-format.hpp
index 636dc5c015b3c..f425fa0246394 100644
--- a/c_glib/arrow-dataset-glib/file-format.hpp
+++ b/c_glib/arrow-dataset-glib/file-format.hpp
@@ -29,16 +29,12 @@ gadataset_file_write_options_new_raw(
 std::shared_ptr<arrow::dataset::FileWriteOptions>
 gadataset_file_write_options_get_raw(GADatasetFileWriteOptions *options);
 
-
 GADatasetFileWriter *
-gadataset_file_writer_new_raw(
-  std::shared_ptr<arrow::dataset::FileWriter> *arrow_writer);
+gadataset_file_writer_new_raw(std::shared_ptr<arrow::dataset::FileWriter> *arrow_writer);
 std::shared_ptr<arrow::dataset::FileWriter>
 gadataset_file_writer_get_raw(GADatasetFileWriter *writer);
 
-
 GADatasetFileFormat *
-gadataset_file_format_new_raw(
-  std::shared_ptr<arrow::dataset::FileFormat> *arrow_format);
+gadataset_file_format_new_raw(std::shared_ptr<arrow::dataset::FileFormat> *arrow_format);
 std::shared_ptr<arrow::dataset::FileFormat>
 gadataset_file_format_get_raw(GADatasetFileFormat *format);
diff --git a/c_glib/arrow-dataset-glib/fragment.cpp b/c_glib/arrow-dataset-glib/fragment.cpp
index f2f0cd1c3e9bc..0a8efe4acd4ea 100644
--- a/c_glib/arrow-dataset-glib/fragment.cpp
+++ b/c_glib/arrow-dataset-glib/fragment.cpp
@@ -39,7 +39,8 @@ G_BEGIN_DECLS
 
 /* arrow::dataset::Fragment */
 
-typedef struct GADatasetFragmentPrivate_ {
+typedef struct GADatasetFragmentPrivate_
+{
   std::shared_ptr<arrow::dataset::Fragment> fragment;
 } GADatasetFragmentPrivate;
 
@@ -47,14 +48,11 @@ enum {
   PROP_FRAGMENT = 1,
 };
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetFragment,
-                                    gadataset_fragment,
-                                    G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetFragment, gadataset_fragment, G_TYPE_OBJECT)
 
-#define GADATASET_FRAGMENT_GET_PRIVATE(obj)           \
-  static_cast<GADatasetFragmentPrivate *>(            \
-    gadataset_fragment_get_instance_private(          \
-      GADATASET_FRAGMENT(obj)))
+#define GADATASET_FRAGMENT_GET_PRIVATE(obj)                                              \
+  static_cast<GADatasetFragmentPrivate *>(                                               \
+    gadataset_fragment_get_instance_private(GADATASET_FRAGMENT(obj)))
 
 static void
 gadataset_fragment_finalize(GObject *object)
@@ -76,9 +74,8 @@ gadataset_fragment_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_FRAGMENT:
-    priv->fragment =
-      *static_cast<std::shared_ptr<arrow::dataset::Fragment> *>(
-        g_value_get_pointer(value));
+    priv->fragment = *static_cast<std::shared_ptr<arrow::dataset::Fragment> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -90,7 +87,7 @@ static void
 gadataset_fragment_init(GADatasetFragment *object)
 {
   auto priv = GADATASET_FRAGMENT_GET_PRIVATE(object);
-  new(&priv->fragment) std::shared_ptr<arrow::dataset::Fragment>;
+  new (&priv->fragment) std::shared_ptr<arrow::dataset::Fragment>;
 }
 
 static void
@@ -98,15 +95,15 @@ gadataset_fragment_class_init(GADatasetFragmentClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gadataset_fragment_finalize;
+  gobject_class->finalize = gadataset_fragment_finalize;
   gobject_class->set_property = gadataset_fragment_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("fragment",
-                              "Fragment",
-                              "The raw std::shared<arrow::dataset::Fragment> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "fragment",
+    "Fragment",
+    "The raw std::shared<arrow::dataset::Fragment> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FRAGMENT, spec);
 }
 
@@ -158,13 +155,10 @@ gadataset_in_memory_fragment_new(GArrowSchema *schema,
 G_END_DECLS
 
 GADatasetFragment *
-gadataset_fragment_new_raw(
-  std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment)
+gadataset_fragment_new_raw(std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment)
 {
-  auto fragment =
-    GADATASET_FRAGMENT(g_object_new(GADATASET_TYPE_FRAGMENT,
-                                    "fragment", arrow_fragment,
-                                    NULL));
+  auto fragment = GADATASET_FRAGMENT(
+    g_object_new(GADATASET_TYPE_FRAGMENT, "fragment", arrow_fragment, NULL));
   return fragment;
 }
 
@@ -179,9 +173,7 @@ GADatasetInMemoryFragment *
 gadataset_in_memory_fragment_new_raw(
   std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment)
 {
-  auto fragment =
-    GADATASET_IN_MEMORY_FRAGMENT(g_object_new(GADATASET_TYPE_IN_MEMORY_FRAGMENT,
-                                              "fragment", arrow_fragment,
-                                              NULL));
+  auto fragment = GADATASET_IN_MEMORY_FRAGMENT(
+    g_object_new(GADATASET_TYPE_IN_MEMORY_FRAGMENT, "fragment", arrow_fragment, NULL));
   return fragment;
 }
diff --git a/c_glib/arrow-dataset-glib/fragment.h b/c_glib/arrow-dataset-glib/fragment.h
index 9376b6cf3ee08..49acc360a3679 100644
--- a/c_glib/arrow-dataset-glib/fragment.h
+++ b/c_glib/arrow-dataset-glib/fragment.h
@@ -26,11 +26,8 @@ G_BEGIN_DECLS
 /* arrow::dataset::Fragment */
 
 #define GADATASET_TYPE_FRAGMENT (gadataset_fragment_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADatasetFragment,
-                         gadataset_fragment,
-                         GADATASET,
-                         FRAGMENT,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GADatasetFragment, gadataset_fragment, GADATASET, FRAGMENT, GObject)
 struct _GADatasetFragmentClass
 {
   GObjectClass parent_class;
@@ -38,8 +35,7 @@ struct _GADatasetFragmentClass
 
 /* arrow::dataset::InMemoryFragment */
 
-#define GADATASET_TYPE_IN_MEMORY_FRAGMENT       \
-  (gadataset_in_memory_fragment_get_type())
+#define GADATASET_TYPE_IN_MEMORY_FRAGMENT (gadataset_in_memory_fragment_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetInMemoryFragment,
                          gadataset_in_memory_fragment,
                          GADATASET,
diff --git a/c_glib/arrow-dataset-glib/fragment.hpp b/c_glib/arrow-dataset-glib/fragment.hpp
index 904f83653965b..7360905723c22 100644
--- a/c_glib/arrow-dataset-glib/fragment.hpp
+++ b/c_glib/arrow-dataset-glib/fragment.hpp
@@ -26,10 +26,9 @@
 std::shared_ptr<arrow::dataset::Fragment>
 gadataset_fragment_get_raw(GADatasetFragment *fragment);
 
-GADatasetFragment*
-gadataset_fragment_new_raw(
-  std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment);
+GADatasetFragment *
+gadataset_fragment_new_raw(std::shared_ptr<arrow::dataset::Fragment> *arrow_fragment);
 
-GADatasetInMemoryFragment*
+GADatasetInMemoryFragment *
 gadataset_in_memory_fragment_new_raw(
   std::shared_ptr<arrow::dataset::InMemoryFragment> *arrow_fragment);
diff --git a/c_glib/arrow-dataset-glib/partitioning.cpp b/c_glib/arrow-dataset-glib/partitioning.cpp
index b22289b5d01fc..8506b6a4df8ec 100644
--- a/c_glib/arrow-dataset-glib/partitioning.cpp
+++ b/c_glib/arrow-dataset-glib/partitioning.cpp
@@ -56,7 +56,8 @@ G_BEGIN_DECLS
  * Since: 6.0.0
  */
 
-struct GADatasetPartitioningFactoryOptionsPrivate {
+struct GADatasetPartitioningFactoryOptionsPrivate
+{
   gboolean infer_dictionary;
   GArrowSchema *schema;
   GADatasetSegmentEncoding segment_encoding;
@@ -72,9 +73,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GADatasetPartitioningFactoryOptions,
                            gadataset_partitioning_factory_options,
                            G_TYPE_OBJECT)
 
-#define GADATASET_PARTITIONING_FACTORY_OPTIONS_GET_PRIVATE(obj)         \
-  static_cast<GADatasetPartitioningFactoryOptionsPrivate *>(            \
-    gadataset_partitioning_factory_options_get_instance_private(        \
+#define GADATASET_PARTITIONING_FACTORY_OPTIONS_GET_PRIVATE(obj)                          \
+  static_cast<GADatasetPartitioningFactoryOptionsPrivate *>(                             \
+    gadataset_partitioning_factory_options_get_instance_private(                         \
       GADATASET_PARTITIONING_FACTORY_OPTIONS(obj)))
 
 static void
@@ -155,8 +156,7 @@ gadataset_partitioning_factory_options_get_property(GObject *object,
 }
 
 static void
-gadataset_partitioning_factory_options_init(
-  GADatasetPartitioningFactoryOptions *object)
+gadataset_partitioning_factory_options_init(GADatasetPartitioningFactoryOptions *object)
 {
 }
 
@@ -167,10 +167,8 @@ gadataset_partitioning_factory_options_class_init(
   auto gobject_class = G_OBJECT_CLASS(klass);
 
   gobject_class->dispose = gadataset_partitioning_factory_options_dispose;
-  gobject_class->set_property =
-    gadataset_partitioning_factory_options_set_property;
-  gobject_class->get_property =
-    gadataset_partitioning_factory_options_get_property;
+  gobject_class->set_property = gadataset_partitioning_factory_options_set_property;
+  gobject_class->get_property = gadataset_partitioning_factory_options_get_property;
 
   arrow::dataset::PartitioningFactoryOptions default_options;
   GParamSpec *spec;
@@ -210,9 +208,7 @@ gadataset_partitioning_factory_options_class_init(
                              "against the schema and update internal state",
                              GARROW_TYPE_SCHEMA,
                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_FACTORY_OPTIONS_SCHEMA,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_FACTORY_OPTIONS_SCHEMA, spec);
 
   /**
    * GADatasetPartitioningFactoryOptions:segment-encoding:
@@ -222,15 +218,15 @@ gadataset_partitioning_factory_options_class_init(
    *
    * Since: 11.0.0
    */
-  spec = g_param_spec_enum("segment-encoding",
-                           "Segment encoding",
-                           "After splitting a path into components, "
-                           "decode the path components before "
-                           "parsing according to this scheme",
-                           GADATASET_TYPE_SEGMENT_ENCODING,
-                           static_cast<GADatasetSegmentEncoding>(
-                             default_options.segment_encoding),
-                           static_cast<GParamFlags>(G_PARAM_READWRITE));
+  spec = g_param_spec_enum(
+    "segment-encoding",
+    "Segment encoding",
+    "After splitting a path into components, "
+    "decode the path components before "
+    "parsing according to this scheme",
+    GADATASET_TYPE_SEGMENT_ENCODING,
+    static_cast<GADatasetSegmentEncoding>(default_options.segment_encoding),
+    static_cast<GParamFlags>(G_PARAM_READWRITE));
   g_object_class_install_property(gobject_class,
                                   PROP_FACTORY_OPTIONS_SEGMENT_ENCODING,
                                   spec);
@@ -247,12 +243,11 @@ GADatasetPartitioningFactoryOptions *
 gadataset_partitioning_factory_options_new(void)
 {
   return GADATASET_PARTITIONING_FACTORY_OPTIONS(
-    g_object_new(GADATASET_TYPE_PARTITIONING_FACTORY_OPTIONS,
-                 nullptr));
+    g_object_new(GADATASET_TYPE_PARTITIONING_FACTORY_OPTIONS, nullptr));
 }
 
-
-struct GADatasetPartitioningPrivate {
+struct GADatasetPartitioningPrivate
+{
   std::shared_ptr<arrow::dataset::Partitioning> partitioning;
 };
 
@@ -264,10 +259,9 @@ G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GADatasetPartitioning,
                                     gadataset_partitioning,
                                     G_TYPE_OBJECT)
 
-#define GADATASET_PARTITIONING_GET_PRIVATE(obj)         \
-  static_cast<GADatasetPartitioningPrivate *>(          \
-    gadataset_partitioning_get_instance_private(        \
-      GADATASET_PARTITIONING(obj)))
+#define GADATASET_PARTITIONING_GET_PRIVATE(obj)                                          \
+  static_cast<GADatasetPartitioningPrivate *>(                                           \
+    gadataset_partitioning_get_instance_private(GADATASET_PARTITIONING(obj)))
 
 static void
 gadataset_partitioning_finalize(GObject *object)
@@ -287,9 +281,8 @@ gadataset_partitioning_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_PARTITIONING:
-    priv->partitioning =
-      *static_cast<std::shared_ptr<arrow::dataset::Partitioning> *>(
-        g_value_get_pointer(value));
+    priv->partitioning = *static_cast<std::shared_ptr<arrow::dataset::Partitioning> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -301,7 +294,7 @@ static void
 gadataset_partitioning_init(GADatasetPartitioning *object)
 {
   auto priv = GADATASET_PARTITIONING_GET_PRIVATE(object);
-  new(&priv->partitioning) std::shared_ptr<arrow::dataset::Partitioning>;
+  new (&priv->partitioning) std::shared_ptr<arrow::dataset::Partitioning>;
 }
 
 static void
@@ -309,16 +302,16 @@ gadataset_partitioning_class_init(GADatasetPartitioningClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gadataset_partitioning_finalize;
+  gobject_class->finalize = gadataset_partitioning_finalize;
   gobject_class->set_property = gadataset_partitioning_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("partitioning",
-                              "Partitioning",
-                              "The raw "
-                              "std::shared<arrow::dataset::Partitioning> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "partitioning",
+    "Partitioning",
+    "The raw "
+    "std::shared<arrow::dataset::Partitioning> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_PARTITIONING, spec);
 }
 
@@ -337,11 +330,9 @@ gadataset_partitioning_get_type_name(GADatasetPartitioning *partitioning)
 {
   auto arrow_partitioning = gadataset_partitioning_get_raw(partitioning);
   auto arrow_type_name = arrow_partitioning->type_name();
-  return g_strndup(arrow_type_name.c_str(),
-                   arrow_type_name.size());
+  return g_strndup(arrow_type_name.c_str(), arrow_type_name.size());
 }
 
-
 /**
  * gadataset_partitioning_create_default:
  *
@@ -357,8 +348,8 @@ gadataset_partitioning_create_default(void)
   return gadataset_partitioning_new_raw(&arrow_partitioning);
 }
 
-
-struct GADatasetKeyValuePartitioningOptionsPrivate {
+struct GADatasetKeyValuePartitioningOptionsPrivate
+{
   GADatasetSegmentEncoding segment_encoding;
 };
 
@@ -370,9 +361,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GADatasetKeyValuePartitioningOptions,
                            gadataset_key_value_partitioning_options,
                            G_TYPE_OBJECT)
 
-#define GADATASET_KEY_VALUE_PARTITIONING_OPTIONS_GET_PRIVATE(obj)       \
-  static_cast<GADatasetKeyValuePartitioningOptionsPrivate *>(           \
-    gadataset_key_value_partitioning_options_get_instance_private(      \
+#define GADATASET_KEY_VALUE_PARTITIONING_OPTIONS_GET_PRIVATE(obj)                        \
+  static_cast<GADatasetKeyValuePartitioningOptionsPrivate *>(                            \
+    gadataset_key_value_partitioning_options_get_instance_private(                       \
       GADATASET_KEY_VALUE_PARTITIONING_OPTIONS(obj)))
 
 static void
@@ -424,10 +415,8 @@ gadataset_key_value_partitioning_options_class_init(
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->set_property =
-    gadataset_key_value_partitioning_options_set_property;
-  gobject_class->get_property =
-    gadataset_key_value_partitioning_options_get_property;
+  gobject_class->set_property = gadataset_key_value_partitioning_options_set_property;
+  gobject_class->get_property = gadataset_key_value_partitioning_options_get_property;
 
   arrow::dataset::KeyValuePartitioningOptions default_options;
   GParamSpec *spec;
@@ -439,18 +428,16 @@ gadataset_key_value_partitioning_options_class_init(
    *
    * Since: 11.0.0
    */
-  spec = g_param_spec_enum("segment-encoding",
-                           "Segment encoding",
-                           "After splitting a path into components, "
-                           "decode the path components before "
-                           "parsing according to this scheme",
-                           GADATASET_TYPE_SEGMENT_ENCODING,
-                           static_cast<GADatasetSegmentEncoding>(
-                             default_options.segment_encoding),
-                           static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_OPTIONS_SEGMENT_ENCODING,
-                                  spec);
+  spec = g_param_spec_enum(
+    "segment-encoding",
+    "Segment encoding",
+    "After splitting a path into components, "
+    "decode the path components before "
+    "parsing according to this scheme",
+    GADATASET_TYPE_SEGMENT_ENCODING,
+    static_cast<GADatasetSegmentEncoding>(default_options.segment_encoding),
+    static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_OPTIONS_SEGMENT_ENCODING, spec);
 }
 
 /**
@@ -464,11 +451,9 @@ GADatasetKeyValuePartitioningOptions *
 gadataset_key_value_partitioning_options_new(void)
 {
   return GADATASET_KEY_VALUE_PARTITIONING_OPTIONS(
-    g_object_new(GADATASET_TYPE_KEY_VALUE_PARTITIONING_OPTIONS,
-                 nullptr));
+    g_object_new(GADATASET_TYPE_KEY_VALUE_PARTITIONING_OPTIONS, nullptr));
 }
 
-
 G_DEFINE_ABSTRACT_TYPE(GADatasetKeyValuePartitioning,
                        gadataset_key_value_partitioning,
                        GADATASET_TYPE_PARTITIONING)
@@ -479,19 +464,17 @@ gadataset_key_value_partitioning_init(GADatasetKeyValuePartitioning *object)
 }
 
 static void
-gadataset_key_value_partitioning_class_init(
-  GADatasetKeyValuePartitioningClass *klass)
+gadataset_key_value_partitioning_class_init(GADatasetKeyValuePartitioningClass *klass)
 {
 }
 
 G_END_DECLS
 template <typename Partitioning, typename PartitioningOptions>
 GADatasetPartitioning *
-garrow_key_value_partitioning_new(
-  GArrowSchema *schema,
-  GList *dictionaries,
-  PartitioningOptions &arrow_options,
-  GError **error)
+garrow_key_value_partitioning_new(GArrowSchema *schema,
+                                  GList *dictionaries,
+                                  PartitioningOptions &arrow_options,
+                                  GError **error)
 {
   auto arrow_schema = garrow_schema_get_raw(schema);
   std::vector<std::shared_ptr<arrow::Array>> arrow_dictionaries;
@@ -503,12 +486,8 @@ garrow_key_value_partitioning_new(
       arrow_dictionaries.push_back(nullptr);
     }
   }
-  auto arrow_partitioning =
-    std::static_pointer_cast<arrow::dataset::Partitioning>(
-      std::make_shared<Partitioning>(
-        arrow_schema,
-        arrow_dictionaries,
-        arrow_options));
+  auto arrow_partitioning = std::static_pointer_cast<arrow::dataset::Partitioning>(
+    std::make_shared<Partitioning>(arrow_schema, arrow_dictionaries, arrow_options));
   return gadataset_partitioning_new_raw(&arrow_partitioning);
 }
 G_BEGIN_DECLS
@@ -523,8 +502,7 @@ gadataset_directory_partitioning_init(GADatasetDirectoryPartitioning *object)
 }
 
 static void
-gadataset_directory_partitioning_class_init(
-  GADatasetDirectoryPartitioningClass *klass)
+gadataset_directory_partitioning_class_init(GADatasetDirectoryPartitioningClass *klass)
 {
 }
 
@@ -542,11 +520,10 @@ gadataset_directory_partitioning_class_init(
  * Since: 6.0.0
  */
 GADatasetDirectoryPartitioning *
-gadataset_directory_partitioning_new(
-  GArrowSchema *schema,
-  GList *dictionaries,
-  GADatasetKeyValuePartitioningOptions *options,
-  GError **error)
+gadataset_directory_partitioning_new(GArrowSchema *schema,
+                                     GList *dictionaries,
+                                     GADatasetKeyValuePartitioningOptions *options,
+                                     GError **error)
 {
   arrow::dataset::KeyValuePartitioningOptions arrow_options;
   if (options) {
@@ -554,11 +531,14 @@ gadataset_directory_partitioning_new(
   }
   return GADATASET_DIRECTORY_PARTITIONING(
     garrow_key_value_partitioning_new<arrow::dataset::DirectoryPartitioning>(
-      schema, dictionaries, arrow_options, error));
+      schema,
+      dictionaries,
+      arrow_options,
+      error));
 }
 
-
-struct GADatasetHivePartitioningOptionsPrivate {
+struct GADatasetHivePartitioningOptionsPrivate
+{
   gchar *null_fallback;
 };
 
@@ -570,9 +550,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GADatasetHivePartitioningOptions,
                            gadataset_hive_partitioning_options,
                            GADATASET_TYPE_KEY_VALUE_PARTITIONING_OPTIONS)
 
-#define GADATASET_HIVE_PARTITIONING_OPTIONS_GET_PRIVATE(obj)        \
-  static_cast<GADatasetHivePartitioningOptionsPrivate *>(           \
-    gadataset_hive_partitioning_options_get_instance_private(       \
+#define GADATASET_HIVE_PARTITIONING_OPTIONS_GET_PRIVATE(obj)                             \
+  static_cast<GADatasetHivePartitioningOptionsPrivate *>(                                \
+    gadataset_hive_partitioning_options_get_instance_private(                            \
       GADATASET_HIVE_PARTITIONING_OPTIONS(obj)))
 
 static void
@@ -631,8 +611,7 @@ gadataset_hive_partitioning_options_get_property(GObject *object,
 }
 
 static void
-gadataset_hive_partitioning_options_init(
-  GADatasetHivePartitioningOptions *object)
+gadataset_hive_partitioning_options_init(GADatasetHivePartitioningOptions *object)
 {
 }
 
@@ -661,9 +640,7 @@ gadataset_hive_partitioning_options_class_init(
                              "The fallback string for null",
                              default_options.null_fallback.c_str(),
                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_OPTIONS_NULL_FALLBACK,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_OPTIONS_NULL_FALLBACK, spec);
 }
 
 /**
@@ -677,11 +654,9 @@ GADatasetHivePartitioningOptions *
 gadataset_hive_partitioning_options_new(void)
 {
   return GADATASET_HIVE_PARTITIONING_OPTIONS(
-    g_object_new(GADATASET_TYPE_HIVE_PARTITIONING_OPTIONS,
-                 nullptr));
+    g_object_new(GADATASET_TYPE_HIVE_PARTITIONING_OPTIONS, nullptr));
 }
 
-
 G_DEFINE_TYPE(GADatasetHivePartitioning,
               gadataset_hive_partitioning,
               GADATASET_TYPE_KEY_VALUE_PARTITIONING)
@@ -692,8 +667,7 @@ gadataset_hive_partitioning_init(GADatasetHivePartitioning *object)
 }
 
 static void
-gadataset_hive_partitioning_class_init(
-  GADatasetHivePartitioningClass *klass)
+gadataset_hive_partitioning_class_init(GADatasetHivePartitioningClass *klass)
 {
 }
 
@@ -721,8 +695,10 @@ gadataset_hive_partitioning_new(GArrowSchema *schema,
     arrow_options = gadataset_hive_partitioning_options_get_raw(options);
   }
   return GADATASET_HIVE_PARTITIONING(
-    garrow_key_value_partitioning_new<arrow::dataset::HivePartitioning>(
-      schema, dictionaries, arrow_options, error));
+    garrow_key_value_partitioning_new<arrow::dataset::HivePartitioning>(schema,
+                                                                        dictionaries,
+                                                                        arrow_options,
+                                                                        error));
 }
 
 /**
@@ -735,8 +711,7 @@ gadataset_hive_partitioning_new(GArrowSchema *schema,
  * Since: 11.0.0
  */
 gchar *
-gadataset_hive_partitioning_get_null_fallback(
-  GADatasetHivePartitioning *partitioning)
+gadataset_hive_partitioning_get_null_fallback(GADatasetHivePartitioning *partitioning)
 {
   const auto arrow_partitioning =
     std::static_pointer_cast<arrow::dataset::HivePartitioning>(
@@ -745,7 +720,6 @@ gadataset_hive_partitioning_get_null_fallback(
   return g_strdup(null_fallback.c_str());
 }
 
-
 G_END_DECLS
 
 arrow::dataset::PartitioningFactoryOptions
@@ -775,13 +749,11 @@ gadataset_key_value_partitioning_options_get_raw(
 }
 
 arrow::dataset::HivePartitioningOptions
-gadataset_hive_partitioning_options_get_raw(
-  GADatasetHivePartitioningOptions *options)
+gadataset_hive_partitioning_options_get_raw(GADatasetHivePartitioningOptions *options)
 {
   auto priv = GADATASET_HIVE_PARTITIONING_OPTIONS_GET_PRIVATE(options);
-  auto arrow_key_value_options =
-    gadataset_key_value_partitioning_options_get_raw(
-      GADATASET_KEY_VALUE_PARTITIONING_OPTIONS(options));
+  auto arrow_key_value_options = gadataset_key_value_partitioning_options_get_raw(
+    GADATASET_KEY_VALUE_PARTITIONING_OPTIONS(options));
   arrow::dataset::HivePartitioningOptions arrow_options;
   arrow_options.segment_encoding = arrow_key_value_options.segment_encoding;
   arrow_options.null_fallback = priv->null_fallback;
@@ -799,9 +771,8 @@ gadataset_partitioning_new_raw(
   } else if (arrow_type_name == "hive") {
     type = GADATASET_TYPE_HIVE_PARTITIONING;
   }
-  return GADATASET_PARTITIONING(g_object_new(type,
-                                             "partitioning", arrow_partitioning,
-                                             nullptr));
+  return GADATASET_PARTITIONING(
+    g_object_new(type, "partitioning", arrow_partitioning, nullptr));
 }
 
 std::shared_ptr<arrow::dataset::Partitioning>
diff --git a/c_glib/arrow-dataset-glib/partitioning.h b/c_glib/arrow-dataset-glib/partitioning.h
index ca347b895b984..ccf476272e429 100644
--- a/c_glib/arrow-dataset-glib/partitioning.h
+++ b/c_glib/arrow-dataset-glib/partitioning.h
@@ -37,8 +37,7 @@ typedef enum {
   GADATASET_SEGMENT_ENCODING_URI,
 } GADatasetSegmentEncoding;
 
-
-#define GADATASET_TYPE_PARTITIONING_FACTORY_OPTIONS   \
+#define GADATASET_TYPE_PARTITIONING_FACTORY_OPTIONS                                      \
   (gadataset_partitioning_factory_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetPartitioningFactoryOptions,
                          gadataset_partitioning_factory_options,
@@ -54,13 +53,9 @@ GARROW_AVAILABLE_IN_11_0
 GADatasetPartitioningFactoryOptions *
 gadataset_partitioning_factory_options_new(void);
 
-
 #define GADATASET_TYPE_PARTITIONING (gadataset_partitioning_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADatasetPartitioning,
-                         gadataset_partitioning,
-                         GADATASET,
-                         PARTITIONING,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GADatasetPartitioning, gadataset_partitioning, GADATASET, PARTITIONING, GObject)
 struct _GADatasetPartitioningClass
 {
   GObjectClass parent_class;
@@ -70,13 +65,11 @@ GARROW_AVAILABLE_IN_6_0
 gchar *
 gadataset_partitioning_get_type_name(GADatasetPartitioning *partitioning);
 
-
 GARROW_AVAILABLE_IN_12_0
 GADatasetPartitioning *
 gadataset_partitioning_create_default(void);
 
-
-#define GADATASET_TYPE_KEY_VALUE_PARTITIONING_OPTIONS   \
+#define GADATASET_TYPE_KEY_VALUE_PARTITIONING_OPTIONS                                    \
   (gadataset_key_value_partitioning_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetKeyValuePartitioningOptions,
                          gadataset_key_value_partitioning_options,
@@ -92,8 +85,7 @@ GARROW_AVAILABLE_IN_11_0
 GADatasetKeyValuePartitioningOptions *
 gadataset_key_value_partitioning_options_new(void);
 
-
-#define GADATASET_TYPE_KEY_VALUE_PARTITIONING   \
+#define GADATASET_TYPE_KEY_VALUE_PARTITIONING                                            \
   (gadataset_key_value_partitioning_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetKeyValuePartitioning,
                          gadataset_key_value_partitioning,
@@ -105,8 +97,7 @@ struct _GADatasetKeyValuePartitioningClass
   GADatasetPartitioningClass parent_class;
 };
 
-
-#define GADATASET_TYPE_DIRECTORY_PARTITIONING   \
+#define GADATASET_TYPE_DIRECTORY_PARTITIONING                                            \
   (gadataset_directory_partitioning_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetDirectoryPartitioning,
                          gadataset_directory_partitioning,
@@ -120,14 +111,12 @@ struct _GADatasetDirectoryPartitioningClass
 
 GARROW_AVAILABLE_IN_6_0
 GADatasetDirectoryPartitioning *
-gadataset_directory_partitioning_new(
-  GArrowSchema *schema,
-  GList *dictionaries,
-  GADatasetKeyValuePartitioningOptions *options,
-  GError **error);
-
+gadataset_directory_partitioning_new(GArrowSchema *schema,
+                                     GList *dictionaries,
+                                     GADatasetKeyValuePartitioningOptions *options,
+                                     GError **error);
 
-#define GADATASET_TYPE_HIVE_PARTITIONING_OPTIONS   \
+#define GADATASET_TYPE_HIVE_PARTITIONING_OPTIONS                                         \
   (gadataset_hive_partitioning_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetHivePartitioningOptions,
                          gadataset_hive_partitioning_options,
@@ -143,9 +132,7 @@ GARROW_AVAILABLE_IN_11_0
 GADatasetHivePartitioningOptions *
 gadataset_hive_partitioning_options_new(void);
 
-
-#define GADATASET_TYPE_HIVE_PARTITIONING        \
-  (gadataset_hive_partitioning_get_type())
+#define GADATASET_TYPE_HIVE_PARTITIONING (gadataset_hive_partitioning_get_type())
 G_DECLARE_DERIVABLE_TYPE(GADatasetHivePartitioning,
                          gadataset_hive_partitioning,
                          GADATASET,
@@ -164,8 +151,6 @@ gadataset_hive_partitioning_new(GArrowSchema *schema,
                                 GError **error);
 GARROW_AVAILABLE_IN_11_0
 gchar *
-gadataset_hive_partitioning_get_null_fallback(
-  GADatasetHivePartitioning *partitioning);
-
+gadataset_hive_partitioning_get_null_fallback(GADatasetHivePartitioning *partitioning);
 
 G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/partitioning.hpp b/c_glib/arrow-dataset-glib/partitioning.hpp
index 4ce8667e78992..a57a44d714024 100644
--- a/c_glib/arrow-dataset-glib/partitioning.hpp
+++ b/c_glib/arrow-dataset-glib/partitioning.hpp
@@ -32,8 +32,7 @@ gadataset_key_value_partitioning_options_get_raw(
   GADatasetKeyValuePartitioningOptions *options);
 
 arrow::dataset::HivePartitioningOptions
-gadataset_hive_partitioning_options_get_raw(
-  GADatasetHivePartitioningOptions *options);
+gadataset_hive_partitioning_options_get_raw(GADatasetHivePartitioningOptions *options);
 
 GADatasetPartitioning *
 gadataset_partitioning_new_raw(
diff --git a/c_glib/arrow-dataset-glib/scanner.cpp b/c_glib/arrow-dataset-glib/scanner.cpp
index 07a5d4aeadae9..717532db9220f 100644
--- a/c_glib/arrow-dataset-glib/scanner.cpp
+++ b/c_glib/arrow-dataset-glib/scanner.cpp
@@ -40,7 +40,8 @@ G_BEGIN_DECLS
  * Since: 5.0.0
  */
 
-typedef struct GADatasetScannerPrivate_ {
+typedef struct GADatasetScannerPrivate_
+{
   std::shared_ptr<arrow::dataset::Scanner> scanner;
 } GADatasetScannerPrivate;
 
@@ -48,14 +49,11 @@ enum {
   PROP_SCANNER = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScanner,
-                           gadataset_scanner,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScanner, gadataset_scanner, G_TYPE_OBJECT)
 
-#define GADATASET_SCANNER_GET_PRIVATE(obj)        \
-  static_cast<GADatasetScannerPrivate *>(         \
-    gadataset_scanner_get_instance_private(       \
-      GADATASET_SCANNER(obj)))
+#define GADATASET_SCANNER_GET_PRIVATE(obj)                                               \
+  static_cast<GADatasetScannerPrivate *>(                                                \
+    gadataset_scanner_get_instance_private(GADATASET_SCANNER(obj)))
 
 static void
 gadataset_scanner_finalize(GObject *object)
@@ -75,9 +73,8 @@ gadataset_scanner_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_SCANNER:
-    priv->scanner =
-      *static_cast<std::shared_ptr<arrow::dataset::Scanner> *>(
-        g_value_get_pointer(value));
+    priv->scanner = *static_cast<std::shared_ptr<arrow::dataset::Scanner> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -89,22 +86,22 @@ static void
 gadataset_scanner_init(GADatasetScanner *object)
 {
   auto priv = GADATASET_SCANNER_GET_PRIVATE(object);
-  new(&priv->scanner) std::shared_ptr<arrow::dataset::Scanner>;
+  new (&priv->scanner) std::shared_ptr<arrow::dataset::Scanner>;
 }
 
 static void
 gadataset_scanner_class_init(GADatasetScannerClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->finalize     = gadataset_scanner_finalize;
+  gobject_class->finalize = gadataset_scanner_finalize;
   gobject_class->set_property = gadataset_scanner_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("scanner",
-                              "Scanner",
-                              "The raw std::shared<arrow::dataset::Scanner> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "scanner",
+    "Scanner",
+    "The raw std::shared<arrow::dataset::Scanner> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_SCANNER, spec);
 }
 
@@ -119,8 +116,7 @@ gadataset_scanner_class_init(GADatasetScannerClass *klass)
  * Since: 5.0.0
  */
 GArrowTable *
-gadataset_scanner_to_table(GADatasetScanner *scanner,
-                           GError **error)
+gadataset_scanner_to_table(GADatasetScanner *scanner, GError **error)
 {
   auto arrow_scanner = gadataset_scanner_get_raw(scanner);
   auto arrow_table_result = arrow_scanner->ToTable();
@@ -132,8 +128,8 @@ gadataset_scanner_to_table(GADatasetScanner *scanner,
   }
 }
 
-
-typedef struct GADatasetScannerBuilderPrivate_ {
+typedef struct GADatasetScannerBuilderPrivate_
+{
   std::shared_ptr<arrow::dataset::ScannerBuilder> scanner_builder;
 } GADatasetScannerBuilderPrivate;
 
@@ -145,10 +141,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GADatasetScannerBuilder,
                            gadataset_scanner_builder,
                            G_TYPE_OBJECT)
 
-#define GADATASET_SCANNER_BUILDER_GET_PRIVATE(obj)        \
-  static_cast<GADatasetScannerBuilderPrivate *>(          \
-    gadataset_scanner_builder_get_instance_private(       \
-      GADATASET_SCANNER_BUILDER(obj)))
+#define GADATASET_SCANNER_BUILDER_GET_PRIVATE(obj)                                       \
+  static_cast<GADatasetScannerBuilderPrivate *>(                                         \
+    gadataset_scanner_builder_get_instance_private(GADATASET_SCANNER_BUILDER(obj)))
 
 static void
 gadataset_scanner_builder_finalize(GObject *object)
@@ -182,23 +177,23 @@ static void
 gadataset_scanner_builder_init(GADatasetScannerBuilder *object)
 {
   auto priv = GADATASET_SCANNER_BUILDER_GET_PRIVATE(object);
-  new(&priv->scanner_builder) std::shared_ptr<arrow::dataset::ScannerBuilder>;
+  new (&priv->scanner_builder) std::shared_ptr<arrow::dataset::ScannerBuilder>;
 }
 
 static void
 gadataset_scanner_builder_class_init(GADatasetScannerBuilderClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->finalize     = gadataset_scanner_builder_finalize;
+  gobject_class->finalize = gadataset_scanner_builder_finalize;
   gobject_class->set_property = gadataset_scanner_builder_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("scanner-builder",
-                              "Scanner builder",
-                              "The raw "
-                              "std::shared<arrow::dataset::ScannerBuilder> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "scanner-builder",
+    "Scanner builder",
+    "The raw "
+    "std::shared<arrow::dataset::ScannerBuilder> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_SCANNER_BUILDER, spec);
 }
 
@@ -217,9 +212,7 @@ gadataset_scanner_builder_new(GADatasetDataset *dataset, GError **error)
 {
   auto arrow_dataset = gadataset_dataset_get_raw(dataset);
   auto arrow_scanner_builder_result = arrow_dataset->NewScan();
-  if (garrow::check(error,
-                    arrow_scanner_builder_result,
-                    "[scanner-builder][new]")) {
+  if (garrow::check(error, arrow_scanner_builder_result, "[scanner-builder][new]")) {
     auto arrow_scanner_builder = *arrow_scanner_builder_result;
     return gadataset_scanner_builder_new_raw(&arrow_scanner_builder);
   } else {
@@ -236,8 +229,7 @@ gadataset_scanner_builder_new(GADatasetDataset *dataset, GError **error)
  * Since: 6.0.0
  */
 GADatasetScannerBuilder *
-gadataset_scanner_builder_new_record_batch_reader(
-  GArrowRecordBatchReader *reader)
+gadataset_scanner_builder_new_record_batch_reader(GArrowRecordBatchReader *reader)
 {
   auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
   auto arrow_scanner_builder =
@@ -278,8 +270,7 @@ gadataset_scanner_builder_set_filter(GADatasetScannerBuilder *builder,
  * Since: 5.0.0
  */
 GADatasetScanner *
-gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder,
-                                 GError **error)
+gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder, GError **error)
 {
   auto arrow_builder = gadataset_scanner_builder_get_raw(builder);
   auto arrow_scanner_result = arrow_builder->Finish();
@@ -291,17 +282,13 @@ gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder,
   }
 }
 
-
 G_END_DECLS
 
 GADatasetScanner *
-gadataset_scanner_new_raw(
-  std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner)
+gadataset_scanner_new_raw(std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner)
 {
-  auto scanner =
-    GADATASET_SCANNER(g_object_new(GADATASET_TYPE_SCANNER,
-                                   "scanner", arrow_scanner,
-                                   NULL));
+  auto scanner = GADATASET_SCANNER(
+    g_object_new(GADATASET_TYPE_SCANNER, "scanner", arrow_scanner, NULL));
   return scanner;
 }
 
@@ -316,10 +303,10 @@ GADatasetScannerBuilder *
 gadataset_scanner_builder_new_raw(
   std::shared_ptr<arrow::dataset::ScannerBuilder> *arrow_scanner_builder)
 {
-  return GADATASET_SCANNER_BUILDER(
-    g_object_new(GADATASET_TYPE_SCANNER_BUILDER,
-                 "scanner-builder", arrow_scanner_builder,
-                 NULL));
+  return GADATASET_SCANNER_BUILDER(g_object_new(GADATASET_TYPE_SCANNER_BUILDER,
+                                                "scanner-builder",
+                                                arrow_scanner_builder,
+                                                NULL));
 }
 
 std::shared_ptr<arrow::dataset::ScannerBuilder>
diff --git a/c_glib/arrow-dataset-glib/scanner.h b/c_glib/arrow-dataset-glib/scanner.h
index 464661c9d0f7a..3c7432fb268e4 100644
--- a/c_glib/arrow-dataset-glib/scanner.h
+++ b/c_glib/arrow-dataset-glib/scanner.h
@@ -25,11 +25,7 @@
 G_BEGIN_DECLS
 
 #define GADATASET_TYPE_SCANNER (gadataset_scanner_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADatasetScanner,
-                         gadataset_scanner,
-                         GADATASET,
-                         SCANNER,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GADatasetScanner, gadataset_scanner, GADATASET, SCANNER, GObject)
 struct _GADatasetScannerClass
 {
   GObjectClass parent_class;
@@ -37,15 +33,11 @@ struct _GADatasetScannerClass
 
 GARROW_AVAILABLE_IN_5_0
 GArrowTable *
-gadataset_scanner_to_table(GADatasetScanner *scanner,
-                           GError **error);
+gadataset_scanner_to_table(GADatasetScanner *scanner, GError **error);
 
 #define GADATASET_TYPE_SCANNER_BUILDER (gadataset_scanner_builder_get_type())
-G_DECLARE_DERIVABLE_TYPE(GADatasetScannerBuilder,
-                         gadataset_scanner_builder,
-                         GADATASET,
-                         SCANNER_BUILDER,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GADatasetScannerBuilder, gadataset_scanner_builder, GADATASET, SCANNER_BUILDER, GObject)
 struct _GADatasetScannerBuilderClass
 {
   GObjectClass parent_class;
@@ -53,12 +45,10 @@ struct _GADatasetScannerBuilderClass
 
 GARROW_AVAILABLE_IN_5_0
 GADatasetScannerBuilder *
-gadataset_scanner_builder_new(GADatasetDataset *dataset,
-                              GError **error);
+gadataset_scanner_builder_new(GADatasetDataset *dataset, GError **error);
 GARROW_AVAILABLE_IN_6_0
 GADatasetScannerBuilder *
-gadataset_scanner_builder_new_record_batch_reader(
-  GArrowRecordBatchReader *reader);
+gadataset_scanner_builder_new_record_batch_reader(GArrowRecordBatchReader *reader);
 
 GARROW_AVAILABLE_IN_6_0
 gboolean
@@ -68,7 +58,6 @@ gadataset_scanner_builder_set_filter(GADatasetScannerBuilder *builder,
 
 GARROW_AVAILABLE_IN_5_0
 GADatasetScanner *
-gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder,
-                                 GError **error);
+gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder, GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-dataset-glib/scanner.hpp b/c_glib/arrow-dataset-glib/scanner.hpp
index 663ab6fc44b82..028754a1813fd 100644
--- a/c_glib/arrow-dataset-glib/scanner.hpp
+++ b/c_glib/arrow-dataset-glib/scanner.hpp
@@ -25,8 +25,7 @@
 #include <arrow-dataset-glib/scanner.h>
 
 GADatasetScanner *
-gadataset_scanner_new_raw(
-  std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner);
+gadataset_scanner_new_raw(std::shared_ptr<arrow::dataset::Scanner> *arrow_scanner);
 std::shared_ptr<arrow::dataset::Scanner>
 gadataset_scanner_get_raw(GADatasetScanner *scanner);
 
diff --git a/c_glib/arrow-flight-glib/client.cpp b/c_glib/arrow-flight-glib/client.cpp
index 60dec29dbbdfb..80c47e336f872 100644
--- a/c_glib/arrow-flight-glib/client.cpp
+++ b/c_glib/arrow-flight-glib/client.cpp
@@ -56,18 +56,16 @@ gaflight_stream_reader_class_init(GAFlightStreamReaderClass *klass)
 {
 }
 
-typedef struct GAFlightCallOptionsPrivate_ {
+typedef struct GAFlightCallOptionsPrivate_
+{
   arrow::flight::FlightCallOptions options;
 } GAFlightCallOptionsPrivate;
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightCallOptions,
-                           gaflight_call_options,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightCallOptions, gaflight_call_options, G_TYPE_OBJECT)
 
-#define GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(obj)        \
-  static_cast<GAFlightCallOptionsPrivate *>(          \
-    gaflight_call_options_get_instance_private(       \
-      GAFLIGHT_CALL_OPTIONS(obj)))
+#define GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(obj)                                           \
+  static_cast<GAFlightCallOptionsPrivate *>(                                             \
+    gaflight_call_options_get_instance_private(GAFLIGHT_CALL_OPTIONS(obj)))
 
 static void
 gaflight_call_options_finalize(GObject *object)
@@ -83,7 +81,7 @@ static void
 gaflight_call_options_init(GAFlightCallOptions *object)
 {
   auto priv = GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(object);
-  new(&priv->options) arrow::flight::FlightCallOptions;
+  new (&priv->options) arrow::flight::FlightCallOptions;
 }
 
 static void
@@ -165,8 +163,8 @@ gaflight_call_options_foreach_header(GAFlightCallOptions *options,
   }
 }
 
-
-struct GAFlightClientOptionsPrivate {
+struct GAFlightClientOptionsPrivate
+{
   arrow::flight::FlightClientOptions options;
 };
 
@@ -179,14 +177,11 @@ enum {
   PROP_DISABLE_SERVER_VERIFICATION,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightClientOptions,
-                           gaflight_client_options,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightClientOptions, gaflight_client_options, G_TYPE_OBJECT)
 
-#define GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(obj)        \
-  static_cast<GAFlightClientOptionsPrivate *>(          \
-    gaflight_client_options_get_instance_private(       \
-      GAFLIGHT_CLIENT_OPTIONS(obj)))
+#define GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(obj)                                         \
+  static_cast<GAFlightClientOptionsPrivate *>(                                           \
+    gaflight_client_options_get_instance_private(GAFLIGHT_CLIENT_OPTIONS(obj)))
 
 static void
 gaflight_client_options_finalize(GObject *object)
@@ -268,7 +263,7 @@ static void
 gaflight_client_options_init(GAFlightClientOptions *object)
 {
   auto priv = GAFLIGHT_CLIENT_OPTIONS_GET_PRIVATE(object);
-  new(&(priv->options)) arrow::flight::FlightClientOptions;
+  new (&(priv->options)) arrow::flight::FlightClientOptions;
   priv->options = arrow::flight::FlightClientOptions::Defaults();
 }
 
@@ -295,9 +290,7 @@ gaflight_client_options_class_init(GAFlightClientOptionsClass *klass)
                              nullptr,
                              options.tls_root_certs.c_str(),
                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_TLS_ROOT_CERTIFICATES,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_TLS_ROOT_CERTIFICATES, spec);
 
   /**
    * GAFlightClientOptions:override-host-name:
@@ -311,9 +304,7 @@ gaflight_client_options_class_init(GAFlightClientOptionsClass *klass)
                              nullptr,
                              options.override_hostname.c_str(),
                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_OVERRIDE_HOST_NAME,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_OVERRIDE_HOST_NAME, spec);
 
   /**
    * GAFlightClientOptions:certificate-chain:
@@ -327,9 +318,7 @@ gaflight_client_options_class_init(GAFlightClientOptionsClass *klass)
                              nullptr,
                              options.cert_chain.c_str(),
                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_CERTIFICATE_CHAIN,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_CERTIFICATE_CHAIN, spec);
 
   /**
    * GAFlightClientOptions:private-key:
@@ -344,9 +333,7 @@ gaflight_client_options_class_init(GAFlightClientOptionsClass *klass)
                              nullptr,
                              options.private_key.c_str(),
                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_PRIVATE_KEY,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_PRIVATE_KEY, spec);
 
   /**
    * GAFlightClientOptions:write-size-limit-bytes:
@@ -366,9 +353,7 @@ gaflight_client_options_class_init(GAFlightClientOptionsClass *klass)
                             INT64_MAX,
                             options.write_size_limit_bytes,
                             static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_WRITE_SIZE_LIMIT_BYTES,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_WRITE_SIZE_LIMIT_BYTES, spec);
 
   /**
    * GAFlightClientOptions:disable-server-verification:
@@ -383,9 +368,7 @@ gaflight_client_options_class_init(GAFlightClientOptionsClass *klass)
                               NULL,
                               options.disable_server_verification,
                               static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_DISABLE_SERVER_VERIFICATION,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_DISABLE_SERVER_VERIFICATION, spec);
 }
 
 /**
@@ -402,8 +385,8 @@ gaflight_client_options_new(void)
     g_object_new(GAFLIGHT_TYPE_CLIENT_OPTIONS, NULL));
 }
 
-
-struct GAFlightClientPrivate {
+struct GAFlightClientPrivate
+{
   std::shared_ptr<arrow::flight::FlightClient> client;
 };
 
@@ -411,14 +394,11 @@ enum {
   PROP_CLIENT = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightClient,
-                           gaflight_client,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightClient, gaflight_client, G_TYPE_OBJECT)
 
-#define GAFLIGHT_CLIENT_GET_PRIVATE(obj)         \
-  static_cast<GAFlightClientPrivate *>(          \
-    gaflight_client_get_instance_private(        \
-      GAFLIGHT_CLIENT(obj)))
+#define GAFLIGHT_CLIENT_GET_PRIVATE(obj)                                                 \
+  static_cast<GAFlightClientPrivate *>(                                                  \
+    gaflight_client_get_instance_private(GAFLIGHT_CLIENT(obj)))
 
 static void
 gaflight_client_finalize(GObject *object)
@@ -440,9 +420,8 @@ gaflight_client_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_CLIENT:
-    priv->client =
-      *(static_cast<std::shared_ptr<arrow::flight::FlightClient> *>(
-          g_value_get_pointer(value)));
+    priv->client = *(static_cast<std::shared_ptr<arrow::flight::FlightClient> *>(
+      g_value_get_pointer(value)));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -454,7 +433,7 @@ static void
 gaflight_client_init(GAFlightClient *object)
 {
   auto priv = GAFLIGHT_CLIENT_GET_PRIVATE(object);
-  new(&priv->client) std::shared_ptr<arrow::flight::FlightClient>;
+  new (&priv->client) std::shared_ptr<arrow::flight::FlightClient>;
 }
 
 static void
@@ -466,11 +445,11 @@ gaflight_client_class_init(GAFlightClientClass *klass)
   gobject_class->set_property = gaflight_client_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("client",
-                              "Client",
-                              "The raw std::shared_ptr<arrow::flight::FlightClient>",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "client",
+    "Client",
+    "The raw std::shared_ptr<arrow::flight::FlightClient>",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CLIENT, spec);
 }
 
@@ -498,8 +477,7 @@ gaflight_client_new(GAFlightLocation *location,
     result = arrow::flight::FlightClient::Connect(*flight_location);
   }
   if (garrow::check(error, result, "[flight-client][new]")) {
-    std::shared_ptr<arrow::flight::FlightClient> flight_client =
-      std::move(*result);
+    std::shared_ptr<arrow::flight::FlightClient> flight_client = std::move(*result);
     return gaflight_client_new_raw(&flight_client);
   } else {
     return NULL;
@@ -516,14 +494,11 @@ gaflight_client_new(GAFlightLocation *location,
  * Since: 8.0.0
  */
 gboolean
-gaflight_client_close(GAFlightClient *client,
-                      GError **error)
+gaflight_client_close(GAFlightClient *client, GError **error)
 {
   auto flight_client = gaflight_client_get_raw(client);
   auto status = flight_client->Close();
-  return garrow::check(error,
-                       status,
-                       "[flight-client][close]");
+  return garrow::check(error, status, "[flight-client][close]");
 }
 
 /**
@@ -557,19 +532,13 @@ gaflight_client_authenticate_basic_token(GAFlightClient *client,
   if (options) {
     flight_options = gaflight_call_options_get_raw(options);
   }
-  auto result = flight_client->AuthenticateBasicToken(*flight_options,
-                                                      user,
-                                                      password);
-  if (!garrow::check(error,
-                     result,
-                     "[flight-client][authenticate-basic-token]")) {
+  auto result = flight_client->AuthenticateBasicToken(*flight_options, user, password);
+  if (!garrow::check(error, result, "[flight-client][authenticate-basic-token]")) {
     return FALSE;
   }
   auto bearer_token = *result;
-  *bearer_name = g_strndup(bearer_token.first.data(),
-                           bearer_token.first.size());
-  *bearer_value = g_strndup(bearer_token.second.data(),
-                            bearer_token.second.size());
+  *bearer_name = g_strndup(bearer_token.first.data(), bearer_token.first.size());
+  *bearer_value = g_strndup(bearer_token.second.data(), bearer_token.second.size());
   return TRUE;
 }
 
@@ -605,18 +574,14 @@ gaflight_client_list_flights(GAFlightClient *client,
   std::unique_ptr<arrow::flight::FlightListing> flight_listing;
   auto result = flight_client->ListFlights(*flight_options, *flight_criteria);
   auto status = std::move(result).Value(&flight_listing);
-  if (!garrow::check(error,
-                     status,
-                     "[flight-client][list-flights]")) {
+  if (!garrow::check(error, status, "[flight-client][list-flights]")) {
     return NULL;
   }
   GList *listing = NULL;
   std::unique_ptr<arrow::flight::FlightInfo> flight_info;
   while (true) {
     status = flight_listing->Next().Value(&flight_info);
-    if (!garrow::check(error,
-                       status,
-                       "[flight-client][list-flights]")) {
+    if (!garrow::check(error, status, "[flight-client][list-flights]")) {
       g_list_free_full(listing, g_object_unref);
       return NULL;
     }
@@ -654,8 +619,7 @@ gaflight_client_get_flight_info(GAFlightClient *client,
   if (options) {
     flight_options = gaflight_call_options_get_raw(options);
   }
-  auto result = flight_client->GetFlightInfo(*flight_options,
-                                             *flight_descriptor);
+  auto result = flight_client->GetFlightInfo(*flight_options, *flight_descriptor);
   if (!garrow::check(error, result, "[flight-client][get-flight-info]")) {
     return NULL;
   }
@@ -690,29 +654,25 @@ gaflight_client_do_get(GAFlightClient *client,
     flight_options = gaflight_call_options_get_raw(options);
   }
   auto result = flight_client->DoGet(*flight_options, *flight_ticket);
-  if (!garrow::check(error,
-                     result,
-                     "[flight-client][do-get]")) {
+  if (!garrow::check(error, result, "[flight-client][do-get]")) {
     return nullptr;
   }
   auto flight_reader = std::move(*result);
   return gaflight_stream_reader_new_raw(flight_reader.release(), TRUE);
 }
 
-
 G_END_DECLS
 
-
 GAFlightStreamReader *
-gaflight_stream_reader_new_raw(
-  arrow::flight::FlightStreamReader *flight_reader,
-  gboolean is_owner)
-{
-  return GAFLIGHT_STREAM_READER(
-    g_object_new(GAFLIGHT_TYPE_STREAM_READER,
-                 "reader", flight_reader,
-                 "is-owner", is_owner,
-                 NULL));
+gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader,
+                               gboolean is_owner)
+{
+  return GAFLIGHT_STREAM_READER(g_object_new(GAFLIGHT_TYPE_STREAM_READER,
+                                             "reader",
+                                             flight_reader,
+                                             "is-owner",
+                                             is_owner,
+                                             NULL));
 }
 
 arrow::flight::FlightCallOptions *
@@ -737,10 +697,8 @@ gaflight_client_get_raw(GAFlightClient *client)
 }
 
 GAFlightClient *
-gaflight_client_new_raw(
-  std::shared_ptr<arrow::flight::FlightClient> *flight_client)
+gaflight_client_new_raw(std::shared_ptr<arrow::flight::FlightClient> *flight_client)
 {
-  return GAFLIGHT_CLIENT(g_object_new(GAFLIGHT_TYPE_CLIENT,
-                                      "client", flight_client,
-                                      NULL));
+  return GAFLIGHT_CLIENT(
+    g_object_new(GAFLIGHT_TYPE_CLIENT, "client", flight_client, NULL));
 }
diff --git a/c_glib/arrow-flight-glib/client.h b/c_glib/arrow-flight-glib/client.h
index 42b18bfa6075a..f67d58371d583 100644
--- a/c_glib/arrow-flight-glib/client.h
+++ b/c_glib/arrow-flight-glib/client.h
@@ -23,9 +23,7 @@
 
 G_BEGIN_DECLS
 
-
-#define GAFLIGHT_TYPE_STREAM_READER       \
-  (gaflight_stream_reader_get_type())
+#define GAFLIGHT_TYPE_STREAM_READER (gaflight_stream_reader_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightStreamReader,
                          gaflight_stream_reader,
                          GAFLIGHT,
@@ -36,13 +34,9 @@ struct _GAFlightStreamReaderClass
   GAFlightRecordBatchReaderClass parent_class;
 };
 
-
 #define GAFLIGHT_TYPE_CALL_OPTIONS (gaflight_call_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightCallOptions,
-                         gaflight_call_options,
-                         GAFLIGHT,
-                         CALL_OPTIONS,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightCallOptions, gaflight_call_options, GAFLIGHT, CALL_OPTIONS, GObject)
 struct _GAFlightCallOptionsClass
 {
   GObjectClass parent_class;
@@ -66,13 +60,9 @@ gaflight_call_options_foreach_header(GAFlightCallOptions *options,
                                      GAFlightHeaderFunc func,
                                      gpointer user_data);
 
-
 #define GAFLIGHT_TYPE_CLIENT_OPTIONS (gaflight_client_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightClientOptions,
-                         gaflight_client_options,
-                         GAFLIGHT,
-                         CLIENT_OPTIONS,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightClientOptions, gaflight_client_options, GAFLIGHT, CLIENT_OPTIONS, GObject)
 struct _GAFlightClientOptionsClass
 {
   GObjectClass parent_class;
@@ -82,13 +72,8 @@ GARROW_AVAILABLE_IN_5_0
 GAFlightClientOptions *
 gaflight_client_options_new(void);
 
-
 #define GAFLIGHT_TYPE_CLIENT (gaflight_client_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightClient,
-                         gaflight_client,
-                         GAFLIGHT,
-                         CLIENT,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GAFlightClient, gaflight_client, GAFLIGHT, CLIENT, GObject)
 struct _GAFlightClientClass
 {
   GObjectClass parent_class;
@@ -102,8 +87,7 @@ gaflight_client_new(GAFlightLocation *location,
 
 GARROW_AVAILABLE_IN_8_0
 gboolean
-gaflight_client_close(GAFlightClient *client,
-                      GError **error);
+gaflight_client_close(GAFlightClient *client, GError **error);
 
 GARROW_AVAILABLE_IN_12_0
 gboolean
@@ -136,5 +120,4 @@ gaflight_client_do_get(GAFlightClient *client,
                        GAFlightCallOptions *options,
                        GError **error);
 
-
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/client.hpp b/c_glib/arrow-flight-glib/client.hpp
index b6b768b8d3197..6d7bdcecf3006 100644
--- a/c_glib/arrow-flight-glib/client.hpp
+++ b/c_glib/arrow-flight-glib/client.hpp
@@ -23,11 +23,9 @@
 
 #include <arrow-flight-glib/client.h>
 
-
 GAFlightStreamReader *
-gaflight_stream_reader_new_raw(
-  arrow::flight::FlightStreamReader *flight_reader,
-  gboolean is_owner);
+gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader,
+                               gboolean is_owner);
 
 arrow::flight::FlightCallOptions *
 gaflight_call_options_get_raw(GAFlightCallOptions *options);
@@ -38,5 +36,4 @@ gaflight_client_options_get_raw(GAFlightClientOptions *options);
 std::shared_ptr<arrow::flight::FlightClient>
 gaflight_client_get_raw(GAFlightClient *client);
 GAFlightClient *
-gaflight_client_new_raw(
-  std::shared_ptr<arrow::flight::FlightClient> *flight_client);
+gaflight_client_new_raw(std::shared_ptr<arrow::flight::FlightClient> *flight_client);
diff --git a/c_glib/arrow-flight-glib/common.cpp b/c_glib/arrow-flight-glib/common.cpp
index 5aee3483032ae..efc544f10cf66 100644
--- a/c_glib/arrow-flight-glib/common.cpp
+++ b/c_glib/arrow-flight-glib/common.cpp
@@ -53,7 +53,6 @@ G_BEGIN_DECLS
  * Since: 5.0.0
  */
 
-
 /**
  * GAFlightHeaderFunc:
  * @name: A header name.
@@ -68,8 +67,8 @@ G_BEGIN_DECLS
  * Since: 9.0.0
  */
 
-
-typedef struct GAFlightCriteriaPrivate_ {
+typedef struct GAFlightCriteriaPrivate_
+{
   arrow::flight::Criteria criteria;
   GBytes *expression;
 } GAFlightCriteriaPrivate;
@@ -78,14 +77,11 @@ enum {
   PROP_EXPRESSION = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightCriteria,
-                           gaflight_criteria,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightCriteria, gaflight_criteria, G_TYPE_OBJECT)
 
-#define GAFLIGHT_CRITERIA_GET_PRIVATE(obj)            \
-  static_cast<GAFlightCriteriaPrivate *>(             \
-    gaflight_criteria_get_instance_private(           \
-      GAFLIGHT_CRITERIA(obj)))
+#define GAFLIGHT_CRITERIA_GET_PRIVATE(obj)                                               \
+  static_cast<GAFlightCriteriaPrivate *>(                                                \
+    gaflight_criteria_get_instance_private(GAFLIGHT_CRITERIA(obj)))
 
 static void
 gaflight_criteria_dispose(GObject *object)
@@ -127,8 +123,7 @@ gaflight_criteria_set_property(GObject *object,
     {
       gsize size;
       auto data = g_bytes_get_data(priv->expression, &size);
-      priv->criteria.expression.assign(static_cast<const char *>(data),
-                                       size);
+      priv->criteria.expression.assign(static_cast<const char *>(data), size);
     }
     break;
   default:
@@ -159,7 +154,7 @@ static void
 gaflight_criteria_init(GAFlightCriteria *object)
 {
   auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(object);
-  new(&priv->criteria) arrow::flight::Criteria;
+  new (&priv->criteria) arrow::flight::Criteria;
 }
 
 static void
@@ -201,24 +196,19 @@ GAFlightCriteria *
 gaflight_criteria_new(GBytes *expression)
 {
   return GAFLIGHT_CRITERIA(
-    g_object_new(GAFLIGHT_TYPE_CRITERIA,
-                 "expression", expression,
-                 NULL));
+    g_object_new(GAFLIGHT_TYPE_CRITERIA, "expression", expression, NULL));
 }
 
-
-typedef struct GAFlightLocationPrivate_ {
+typedef struct GAFlightLocationPrivate_
+{
   arrow::flight::Location location;
 } GAFlightLocationPrivate;
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightLocation,
-                           gaflight_location,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightLocation, gaflight_location, G_TYPE_OBJECT)
 
-#define GAFLIGHT_LOCATION_GET_PRIVATE(obj)            \
-  static_cast<GAFlightLocationPrivate *>(             \
-    gaflight_location_get_instance_private(           \
-      GAFLIGHT_LOCATION(obj)))
+#define GAFLIGHT_LOCATION_GET_PRIVATE(obj)                                               \
+  static_cast<GAFlightLocationPrivate *>(                                                \
+    gaflight_location_get_instance_private(GAFLIGHT_LOCATION(obj)))
 
 static void
 gaflight_location_finalize(GObject *object)
@@ -234,7 +224,7 @@ static void
 gaflight_location_init(GAFlightLocation *object)
 {
   auto priv = GAFLIGHT_LOCATION_GET_PRIVATE(object);
-  new(&priv->location) arrow::flight::Location;
+  new (&priv->location) arrow::flight::Location;
 }
 
 static void
@@ -255,8 +245,7 @@ gaflight_location_class_init(GAFlightLocationClass *klass)
  * Since: 5.0.0
  */
 GAFlightLocation *
-gaflight_location_new(const gchar *uri,
-                      GError **error)
+gaflight_location_new(const gchar *uri, GError **error)
 {
   auto location = GAFLIGHT_LOCATION(g_object_new(GAFLIGHT_TYPE_LOCATION, NULL));
   auto flight_location = gaflight_location_get_raw(location);
@@ -316,16 +305,15 @@ gaflight_location_get_scheme(GAFlightLocation *location)
  * Since: 5.0.0
  */
 gboolean
-gaflight_location_equal(GAFlightLocation *location,
-                        GAFlightLocation *other_location)
+gaflight_location_equal(GAFlightLocation *location, GAFlightLocation *other_location)
 {
   const auto flight_location = gaflight_location_get_raw(location);
   const auto flight_other_location = gaflight_location_get_raw(other_location);
   return flight_location->Equals(*flight_other_location);
 }
 
-
-typedef struct GAFlightDescriptorPrivate_ {
+typedef struct GAFlightDescriptorPrivate_
+{
   arrow::flight::FlightDescriptor descriptor;
 } GAFlightDescriptorPrivate;
 
@@ -337,10 +325,9 @@ G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightDescriptor,
                                     gaflight_descriptor,
                                     G_TYPE_OBJECT)
 
-#define GAFLIGHT_DESCRIPTOR_GET_PRIVATE(obj)            \
-  static_cast<GAFlightDescriptorPrivate *>(             \
-    gaflight_descriptor_get_instance_private(           \
-      GAFLIGHT_DESCRIPTOR(obj)))
+#define GAFLIGHT_DESCRIPTOR_GET_PRIVATE(obj)                                             \
+  static_cast<GAFlightDescriptorPrivate *>(                                              \
+    gaflight_descriptor_get_instance_private(GAFLIGHT_DESCRIPTOR(obj)))
 
 static void
 gaflight_descriptor_finalize(GObject *object)
@@ -362,8 +349,8 @@ gaflight_descriptor_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_DESCRIPTOR:
-    priv->descriptor = *static_cast<arrow::flight::FlightDescriptor *>(
-      g_value_get_pointer(value));
+    priv->descriptor =
+      *static_cast<arrow::flight::FlightDescriptor *>(g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -375,7 +362,7 @@ static void
 gaflight_descriptor_init(GAFlightDescriptor *object)
 {
   auto priv = GAFLIGHT_DESCRIPTOR_GET_PRIVATE(object);
-  new(&priv->descriptor) arrow::flight::FlightDescriptor;
+  new (&priv->descriptor) arrow::flight::FlightDescriptor;
 }
 
 static void
@@ -387,11 +374,11 @@ gaflight_descriptor_class_init(GAFlightDescriptorClass *klass)
   gobject_class->set_property = gaflight_descriptor_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("descriptor",
-                              "Descriptor",
-                              "The raw arrow::flight::FlightDescriptor",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "descriptor",
+    "Descriptor",
+    "The raw arrow::flight::FlightDescriptor",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_EXPRESSION, spec);
 }
 
@@ -427,17 +414,12 @@ gboolean
 gaflight_descriptor_equal(GAFlightDescriptor *descriptor,
                           GAFlightDescriptor *other_descriptor)
 {
-  const auto flight_descriptor =
-    gaflight_descriptor_get_raw(descriptor);
-  const auto flight_other_descriptor =
-    gaflight_descriptor_get_raw(other_descriptor);
+  const auto flight_descriptor = gaflight_descriptor_get_raw(descriptor);
+  const auto flight_other_descriptor = gaflight_descriptor_get_raw(other_descriptor);
   return flight_descriptor->Equals(*flight_other_descriptor);
 }
 
-
-G_DEFINE_TYPE(GAFlightPathDescriptor,
-              gaflight_path_descriptor,
-              GAFLIGHT_TYPE_DESCRIPTOR)
+G_DEFINE_TYPE(GAFlightPathDescriptor, gaflight_path_descriptor, GAFLIGHT_TYPE_DESCRIPTOR)
 
 static void
 gaflight_path_descriptor_init(GAFlightPathDescriptor *object)
@@ -460,16 +442,14 @@ gaflight_path_descriptor_class_init(GAFlightPathDescriptorClass *klass)
  * Since: 5.0.0
  */
 GAFlightPathDescriptor *
-gaflight_path_descriptor_new(const gchar **paths,
-                             gsize n_paths)
+gaflight_path_descriptor_new(const gchar **paths, gsize n_paths)
 {
   std::vector<std::string> flight_paths;
   for (gsize i = 0; i < n_paths; i++) {
     flight_paths.push_back(paths[i]);
   }
   auto flight_descriptor = arrow::flight::FlightDescriptor::Path(flight_paths);
-  return GAFLIGHT_PATH_DESCRIPTOR(
-    gaflight_descriptor_new_raw(&flight_descriptor));
+  return GAFLIGHT_PATH_DESCRIPTOR(gaflight_descriptor_new_raw(&flight_descriptor));
 }
 
 /**
@@ -502,7 +482,6 @@ gaflight_path_descriptor_get_paths(GAFlightPathDescriptor *descriptor)
   }
 }
 
-
 G_DEFINE_TYPE(GAFlightCommandDescriptor,
               gaflight_command_descriptor,
               GAFLIGHT_TYPE_DESCRIPTOR)
@@ -529,8 +508,7 @@ GAFlightCommandDescriptor *
 gaflight_command_descriptor_new(const gchar *command)
 {
   auto flight_descriptor = arrow::flight::FlightDescriptor::Command(command);
-  return GAFLIGHT_COMMAND_DESCRIPTOR(
-    gaflight_descriptor_new_raw(&flight_descriptor));
+  return GAFLIGHT_COMMAND_DESCRIPTOR(gaflight_descriptor_new_raw(&flight_descriptor));
 }
 
 /**
@@ -552,8 +530,8 @@ gaflight_command_descriptor_get_command(GAFlightCommandDescriptor *descriptor)
   return g_strdup(flight_command.c_str());
 }
 
-
-typedef struct GAFlightTicketPrivate_ {
+typedef struct GAFlightTicketPrivate_
+{
   arrow::flight::Ticket ticket;
   GBytes *data;
 } GAFlightTicketPrivate;
@@ -562,14 +540,11 @@ enum {
   PROP_DATA = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightTicket,
-                           gaflight_ticket,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightTicket, gaflight_ticket, G_TYPE_OBJECT)
 
-#define GAFLIGHT_TICKET_GET_PRIVATE(obj)            \
-  static_cast<GAFlightTicketPrivate *>(             \
-    gaflight_ticket_get_instance_private(           \
-      GAFLIGHT_TICKET(obj)))
+#define GAFLIGHT_TICKET_GET_PRIVATE(obj)                                                 \
+  static_cast<GAFlightTicketPrivate *>(                                                  \
+    gaflight_ticket_get_instance_private(GAFLIGHT_TICKET(obj)))
 
 static void
 gaflight_ticket_dispose(GObject *object)
@@ -611,8 +586,7 @@ gaflight_ticket_set_property(GObject *object,
     {
       gsize size;
       auto data = g_bytes_get_data(priv->data, &size);
-      priv->ticket.ticket.assign(static_cast<const char *>(data),
-                                 size);
+      priv->ticket.ticket.assign(static_cast<const char *>(data), size);
     }
     break;
   default:
@@ -643,7 +617,7 @@ static void
 gaflight_ticket_init(GAFlightTicket *object)
 {
   auto priv = GAFLIGHT_TICKET_GET_PRIVATE(object);
-  new(&priv->ticket) arrow::flight::Ticket;
+  new (&priv->ticket) arrow::flight::Ticket;
 }
 
 static void
@@ -685,10 +659,7 @@ gaflight_ticket_class_init(GAFlightTicketClass *klass)
 GAFlightTicket *
 gaflight_ticket_new(GBytes *data)
 {
-  return GAFLIGHT_TICKET(
-    g_object_new(GAFLIGHT_TYPE_TICKET,
-                 "data", data,
-                 NULL));
+  return GAFLIGHT_TICKET(g_object_new(GAFLIGHT_TYPE_TICKET, "data", data, NULL));
 }
 
 /**
@@ -701,16 +672,15 @@ gaflight_ticket_new(GBytes *data)
  * Since: 5.0.0
  */
 gboolean
-gaflight_ticket_equal(GAFlightTicket *ticket,
-                      GAFlightTicket *other_ticket)
+gaflight_ticket_equal(GAFlightTicket *ticket, GAFlightTicket *other_ticket)
 {
   const auto flight_ticket = gaflight_ticket_get_raw(ticket);
   const auto flight_other_ticket = gaflight_ticket_get_raw(other_ticket);
   return flight_ticket->Equals(*flight_other_ticket);
 }
 
-
-typedef struct GAFlightEndpointPrivate_ {
+typedef struct GAFlightEndpointPrivate_
+{
   arrow::flight::FlightEndpoint endpoint;
   GAFlightTicket *ticket;
   GList *locations;
@@ -720,14 +690,11 @@ enum {
   PROP_TICKET = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightEndpoint,
-                           gaflight_endpoint,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightEndpoint, gaflight_endpoint, G_TYPE_OBJECT)
 
-#define GAFLIGHT_ENDPOINT_GET_PRIVATE(obj)            \
-  static_cast<GAFlightEndpointPrivate *>(             \
-    gaflight_endpoint_get_instance_private(           \
-      GAFLIGHT_ENDPOINT(obj)))
+#define GAFLIGHT_ENDPOINT_GET_PRIVATE(obj)                                               \
+  static_cast<GAFlightEndpointPrivate *>(                                                \
+    gaflight_endpoint_get_instance_private(GAFLIGHT_ENDPOINT(obj)))
 
 static void
 gaflight_endpoint_dispose(GObject *object)
@@ -779,7 +746,7 @@ static void
 gaflight_endpoint_init(GAFlightEndpoint *object)
 {
   auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(object);
-  new(&priv->endpoint) arrow::flight::FlightEndpoint;
+  new (&priv->endpoint) arrow::flight::FlightEndpoint;
 }
 
 static void
@@ -817,8 +784,7 @@ gaflight_endpoint_class_init(GAFlightEndpointClass *klass)
  * Since: 5.0.0
  */
 GAFlightEndpoint *
-gaflight_endpoint_new(GAFlightTicket *ticket,
-                      GList *locations)
+gaflight_endpoint_new(GAFlightTicket *ticket, GList *locations)
 {
   auto endpoint = gaflight_endpoint_new_raw(nullptr, ticket);
   auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(endpoint);
@@ -840,8 +806,7 @@ gaflight_endpoint_new(GAFlightTicket *ticket,
  * Since: 5.0.0
  */
 gboolean
-gaflight_endpoint_equal(GAFlightEndpoint *endpoint,
-                        GAFlightEndpoint *other_endpoint)
+gaflight_endpoint_equal(GAFlightEndpoint *endpoint, GAFlightEndpoint *other_endpoint)
 {
   const auto flight_endpoint = gaflight_endpoint_get_raw(endpoint);
   const auto flight_other_endpoint = gaflight_endpoint_get_raw(other_endpoint);
@@ -867,15 +832,14 @@ gaflight_endpoint_get_locations(GAFlightEndpoint *endpoint)
   const auto flight_endpoint = gaflight_endpoint_get_raw(endpoint);
   GList *locations = NULL;
   for (const auto &flight_location : flight_endpoint->locations) {
-    auto location = gaflight_location_new(flight_location.ToString().c_str(),
-                                          nullptr);
+    auto location = gaflight_location_new(flight_location.ToString().c_str(), nullptr);
     locations = g_list_prepend(locations, location);
   }
   return g_list_reverse(locations);
 }
 
-
-typedef struct GAFlightInfoPrivate_ {
+typedef struct GAFlightInfoPrivate_
+{
   arrow::flight::FlightInfo info;
 } GAFlightInfoPrivate;
 
@@ -883,14 +847,11 @@ enum {
   PROP_INFO = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightInfo,
-                           gaflight_info,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightInfo, gaflight_info, G_TYPE_OBJECT)
 
-#define GAFLIGHT_INFO_GET_PRIVATE(obj)            \
-  static_cast<GAFlightInfoPrivate *>(             \
-    gaflight_info_get_instance_private(           \
-      GAFLIGHT_INFO(obj)))
+#define GAFLIGHT_INFO_GET_PRIVATE(obj)                                                   \
+  static_cast<GAFlightInfoPrivate *>(                                                    \
+    gaflight_info_get_instance_private(GAFLIGHT_INFO(obj)))
 
 static void
 gaflight_info_finalize(GObject *object)
@@ -913,9 +874,8 @@ gaflight_info_set_property(GObject *object,
   switch (prop_id) {
   case PROP_INFO:
     {
-      auto info =
-        static_cast<arrow::flight::FlightInfo *>(g_value_get_pointer(value));
-      new(&(priv->info)) arrow::flight::FlightInfo(*info);
+      auto info = static_cast<arrow::flight::FlightInfo *>(g_value_get_pointer(value));
+      new (&(priv->info)) arrow::flight::FlightInfo(*info);
     }
     break;
   default:
@@ -938,11 +898,11 @@ gaflight_info_class_init(GAFlightInfoClass *klass)
   gobject_class->set_property = gaflight_info_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("info",
-                              "Info",
-                              "The raw arrow::flight::FlightInfo *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "info",
+    "Info",
+    "The raw arrow::flight::FlightInfo *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_INFO, spec);
 }
 
@@ -974,15 +934,12 @@ gaflight_info_new(GArrowSchema *schema,
     auto endpoint = GAFLIGHT_ENDPOINT(node->data);
     flight_endpoints.push_back(*gaflight_endpoint_get_raw(endpoint));
   }
-  auto flight_info_result =
-    arrow::flight::FlightInfo::Make(*arrow_schema,
-                                    *flight_descriptor,
-                                    flight_endpoints,
-                                    total_records,
-                                    total_bytes);
-  if (!garrow::check(error,
-                     flight_info_result,
-                     "[flight-info][new]")) {
+  auto flight_info_result = arrow::flight::FlightInfo::Make(*arrow_schema,
+                                                            *flight_descriptor,
+                                                            flight_endpoints,
+                                                            total_records,
+                                                            total_bytes);
+  if (!garrow::check(error, flight_info_result, "[flight-info][new]")) {
     return NULL;
   }
   return gaflight_info_new_raw(&(*flight_info_result));
@@ -999,22 +956,15 @@ gaflight_info_new(GArrowSchema *schema,
  * Since: 5.0.0
  */
 gboolean
-gaflight_info_equal(GAFlightInfo *info,
-                    GAFlightInfo *other_info)
+gaflight_info_equal(GAFlightInfo *info, GAFlightInfo *other_info)
 {
   const auto flight_info = gaflight_info_get_raw(info);
   const auto flight_other_info = gaflight_info_get_raw(other_info);
-  return
-    (flight_info->serialized_schema() ==
-     flight_other_info->serialized_schema()) &&
-    (flight_info->descriptor() ==
-     flight_other_info->descriptor()) &&
-    (flight_info->endpoints() ==
-     flight_other_info->endpoints()) &&
-    (flight_info->total_records() ==
-     flight_other_info->total_records()) &&
-    (flight_info->total_bytes() ==
-     flight_other_info->total_bytes());
+  return (flight_info->serialized_schema() == flight_other_info->serialized_schema()) &&
+         (flight_info->descriptor() == flight_other_info->descriptor()) &&
+         (flight_info->endpoints() == flight_other_info->endpoints()) &&
+         (flight_info->total_records() == flight_other_info->total_records()) &&
+         (flight_info->total_bytes() == flight_other_info->total_bytes());
 }
 
 /**
@@ -1028,9 +978,7 @@ gaflight_info_equal(GAFlightInfo *info,
  * Since: 5.0.0
  */
 GArrowSchema *
-gaflight_info_get_schema(GAFlightInfo *info,
-                         GArrowReadOptions *options,
-                         GError **error)
+gaflight_info_get_schema(GAFlightInfo *info, GArrowReadOptions *options, GError **error)
 {
   const auto flight_info = gaflight_info_get_raw(info);
   arrow::Status status;
@@ -1115,7 +1063,8 @@ gaflight_info_get_total_bytes(GAFlightInfo *info)
   return flight_info->total_bytes();
 }
 
-typedef struct GAFlightStreamChunkPrivate_ {
+typedef struct GAFlightStreamChunkPrivate_
+{
   arrow::flight::FlightStreamChunk chunk;
 } GAFlightStreamChunkPrivate;
 
@@ -1123,14 +1072,11 @@ enum {
   PROP_CHUNK = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightStreamChunk,
-                           gaflight_stream_chunk,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightStreamChunk, gaflight_stream_chunk, G_TYPE_OBJECT)
 
-#define GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(obj)            \
-  static_cast<GAFlightStreamChunkPrivate *>(             \
-    gaflight_stream_chunk_get_instance_private(           \
-      GAFLIGHT_STREAM_CHUNK(obj)))
+#define GAFLIGHT_STREAM_CHUNK_GET_PRIVATE(obj)                                           \
+  static_cast<GAFlightStreamChunkPrivate *>(                                             \
+    gaflight_stream_chunk_get_instance_private(GAFLIGHT_STREAM_CHUNK(obj)))
 
 static void
 gaflight_stream_chunk_finalize(GObject *object)
@@ -1153,8 +1099,7 @@ gaflight_stream_chunk_set_property(GObject *object,
   switch (prop_id) {
   case PROP_CHUNK:
     priv->chunk =
-      *static_cast<arrow::flight::FlightStreamChunk *>(
-        g_value_get_pointer(value));
+      *static_cast<arrow::flight::FlightStreamChunk *>(g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -1176,11 +1121,11 @@ gaflight_stream_chunk_class_init(GAFlightStreamChunkClass *klass)
   gobject_class->set_property = gaflight_stream_chunk_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("chunk",
-                              "Stream chunk",
-                              "The raw arrow::flight::FlightStreamChunk *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "chunk",
+    "Stream chunk",
+    "The raw arrow::flight::FlightStreamChunk *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CHUNK, spec);
 }
 
@@ -1220,8 +1165,8 @@ gaflight_stream_chunk_get_metadata(GAFlightStreamChunk *chunk)
   }
 }
 
-
-typedef struct GAFlightRecordBatchReaderPrivate_ {
+typedef struct GAFlightRecordBatchReaderPrivate_
+{
   arrow::flight::MetadataRecordBatchReader *reader;
   bool is_owner;
 } GAFlightRecordBatchReaderPrivate;
@@ -1235,9 +1180,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GAFlightRecordBatchReader,
                            gaflight_record_batch_reader,
                            G_TYPE_OBJECT)
 
-#define GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(obj)            \
-  static_cast<GAFlightRecordBatchReaderPrivate *>(               \
-    gaflight_record_batch_reader_get_instance_private(           \
+#define GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(obj)                                    \
+  static_cast<GAFlightRecordBatchReaderPrivate *>(                                       \
+    gaflight_record_batch_reader_get_instance_private(                                   \
       GAFLIGHT_RECORD_BATCH_READER(obj)))
 
 static void
@@ -1261,8 +1206,7 @@ gaflight_record_batch_reader_set_property(GObject *object,
   switch (prop_id) {
   case PROP_READER:
     priv->reader =
-      static_cast<arrow::flight::MetadataRecordBatchReader *>(
-        g_value_get_pointer(value));
+      static_cast<arrow::flight::MetadataRecordBatchReader *>(g_value_get_pointer(value));
     break;
   case PROP_IS_OWNER:
     priv->is_owner = g_value_get_boolean(value);
@@ -1287,19 +1231,19 @@ gaflight_record_batch_reader_class_init(GAFlightRecordBatchReaderClass *klass)
   gobject_class->set_property = gaflight_record_batch_reader_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("reader",
-                              nullptr,
-                              nullptr,
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "reader",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_READER, spec);
 
-  spec = g_param_spec_boolean("is-owner",
-                              nullptr,
-                              nullptr,
-                              TRUE,
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_boolean(
+    "is-owner",
+    nullptr,
+    nullptr,
+    TRUE,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_IS_OWNER, spec);
 }
 
@@ -1314,8 +1258,7 @@ gaflight_record_batch_reader_class_init(GAFlightRecordBatchReaderClass *klass)
  * Since: 6.0.0
  */
 GAFlightStreamChunk *
-gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader,
-                                       GError **error)
+gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader, GError **error)
 {
   auto flight_reader = gaflight_record_batch_reader_get_raw(reader);
   arrow::flight::FlightStreamChunk flight_chunk;
@@ -1341,8 +1284,7 @@ gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader,
  * Since: 6.0.0
  */
 GArrowTable *
-gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader,
-                                      GError **error)
+gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError **error)
 {
   auto flight_reader = gaflight_record_batch_reader_get_raw(reader);
   std::shared_ptr<arrow::Table> arrow_table;
@@ -1354,18 +1296,16 @@ gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader,
   }
 }
 
-
 G_END_DECLS
 
-
 GAFlightCriteria *
 gaflight_criteria_new_raw(const arrow::flight::Criteria *flight_criteria)
 {
   auto criteria = g_object_new(GAFLIGHT_TYPE_CRITERIA, NULL);
   auto priv = GAFLIGHT_CRITERIA_GET_PRIVATE(criteria);
   priv->criteria = *flight_criteria;
-  priv->expression = g_bytes_new(priv->criteria.expression.data(),
-                                 priv->criteria.expression.size());
+  priv->expression =
+    g_bytes_new(priv->criteria.expression.data(), priv->criteria.expression.size());
   return GAFLIGHT_CRITERIA(criteria);
 }
 
@@ -1384,8 +1324,7 @@ gaflight_location_get_raw(GAFlightLocation *location)
 }
 
 GAFlightDescriptor *
-gaflight_descriptor_new_raw(
-  const arrow::flight::FlightDescriptor *flight_descriptor)
+gaflight_descriptor_new_raw(const arrow::flight::FlightDescriptor *flight_descriptor)
 {
   GType gtype = GAFLIGHT_TYPE_DESCRIPTOR;
   switch (flight_descriptor->type) {
@@ -1398,9 +1337,7 @@ gaflight_descriptor_new_raw(
   default:
     break;
   }
-  return GAFLIGHT_DESCRIPTOR(g_object_new(gtype,
-                                          "descriptor", flight_descriptor,
-                                          NULL));
+  return GAFLIGHT_DESCRIPTOR(g_object_new(gtype, "descriptor", flight_descriptor, NULL));
 }
 
 arrow::flight::FlightDescriptor *
@@ -1416,8 +1353,7 @@ gaflight_ticket_new_raw(const arrow::flight::Ticket *flight_ticket)
   auto ticket = g_object_new(GAFLIGHT_TYPE_TICKET, NULL);
   auto priv = GAFLIGHT_TICKET_GET_PRIVATE(ticket);
   priv->ticket = *flight_ticket;
-  priv->data = g_bytes_new(priv->ticket.ticket.data(),
-                           priv->ticket.ticket.size());
+  priv->data = g_bytes_new(priv->ticket.ticket.data(), priv->ticket.ticket.size());
   return GAFLIGHT_TICKET(ticket);
 }
 
@@ -1432,8 +1368,7 @@ GAFlightEndpoint *
 gaflight_endpoint_new_raw(const arrow::flight::FlightEndpoint *flight_endpoint,
                           GAFlightTicket *ticket)
 {
-  auto endpoint = GAFLIGHT_ENDPOINT(g_object_new(GAFLIGHT_TYPE_ENDPOINT,
-                                                 NULL));
+  auto endpoint = GAFLIGHT_ENDPOINT(g_object_new(GAFLIGHT_TYPE_ENDPOINT, NULL));
   auto priv = GAFLIGHT_ENDPOINT_GET_PRIVATE(endpoint);
   if (ticket) {
     priv->ticket = ticket;
@@ -1463,9 +1398,7 @@ gaflight_endpoint_get_raw(GAFlightEndpoint *endpoint)
 GAFlightInfo *
 gaflight_info_new_raw(arrow::flight::FlightInfo *flight_info)
 {
-  return GAFLIGHT_INFO(g_object_new(GAFLIGHT_TYPE_INFO,
-                                    "info", flight_info,
-                                    NULL));
+  return GAFLIGHT_INFO(g_object_new(GAFLIGHT_TYPE_INFO, "info", flight_info, NULL));
 }
 
 arrow::flight::FlightInfo *
@@ -1479,9 +1412,7 @@ GAFlightStreamChunk *
 gaflight_stream_chunk_new_raw(arrow::flight::FlightStreamChunk *flight_chunk)
 {
   return GAFLIGHT_STREAM_CHUNK(
-    g_object_new(GAFLIGHT_TYPE_STREAM_CHUNK,
-                 "chunk", flight_chunk,
-                 NULL));
+    g_object_new(GAFLIGHT_TYPE_STREAM_CHUNK, "chunk", flight_chunk, NULL));
 }
 
 arrow::flight::FlightStreamChunk *
diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h
index 469ff4097f59e..fcb23b1885ea7 100644
--- a/c_glib/arrow-flight-glib/common.h
+++ b/c_glib/arrow-flight-glib/common.h
@@ -23,18 +23,12 @@
 
 G_BEGIN_DECLS
 
-
-typedef void(*GAFlightHeaderFunc)(const gchar *name,
-                                  const gchar *value,
-                                  gpointer  user_data);
-
+typedef void (*GAFlightHeaderFunc)(const gchar *name,
+                                   const gchar *value,
+                                   gpointer user_data);
 
 #define GAFLIGHT_TYPE_CRITERIA (gaflight_criteria_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightCriteria,
-                         gaflight_criteria,
-                         GAFLIGHT,
-                         CRITERIA,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GAFlightCriteria, gaflight_criteria, GAFLIGHT, CRITERIA, GObject)
 struct _GAFlightCriteriaClass
 {
   GObjectClass parent_class;
@@ -44,13 +38,8 @@ GARROW_AVAILABLE_IN_5_0
 GAFlightCriteria *
 gaflight_criteria_new(GBytes *expression);
 
-
 #define GAFLIGHT_TYPE_LOCATION (gaflight_location_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightLocation,
-                         gaflight_location,
-                         GAFLIGHT,
-                         LOCATION,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GAFlightLocation, gaflight_location, GAFLIGHT, LOCATION, GObject)
 struct _GAFlightLocationClass
 {
   GObjectClass parent_class;
@@ -58,8 +47,7 @@ struct _GAFlightLocationClass
 
 GARROW_AVAILABLE_IN_5_0
 GAFlightLocation *
-gaflight_location_new(const gchar *uri,
-                      GError **error);
+gaflight_location_new(const gchar *uri, GError **error);
 
 GARROW_AVAILABLE_IN_5_0
 gchar *
@@ -71,16 +59,11 @@ gaflight_location_get_scheme(GAFlightLocation *location);
 
 GARROW_AVAILABLE_IN_5_0
 gboolean
-gaflight_location_equal(GAFlightLocation *location,
-                        GAFlightLocation *other_location);
-
+gaflight_location_equal(GAFlightLocation *location, GAFlightLocation *other_location);
 
 #define GAFLIGHT_TYPE_DESCRIPTOR (gaflight_descriptor_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightDescriptor,
-                         gaflight_descriptor,
-                         GAFLIGHT,
-                         DESCRIPTOR,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightDescriptor, gaflight_descriptor, GAFLIGHT, DESCRIPTOR, GObject)
 struct _GAFlightDescriptorClass
 {
   GObjectClass parent_class;
@@ -95,7 +78,6 @@ gboolean
 gaflight_descriptor_equal(GAFlightDescriptor *descriptor,
                           GAFlightDescriptor *other_descriptor);
 
-
 #define GAFLIGHT_TYPE_PATH_DESCRIPTOR (gaflight_path_descriptor_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightPathDescriptor,
                          gaflight_path_descriptor,
@@ -109,14 +91,12 @@ struct _GAFlightPathDescriptorClass
 
 GARROW_AVAILABLE_IN_5_0
 GAFlightPathDescriptor *
-gaflight_path_descriptor_new(const gchar **paths,
-                             gsize n_paths);
+gaflight_path_descriptor_new(const gchar **paths, gsize n_paths);
 
 GARROW_AVAILABLE_IN_5_0
 gchar **
 gaflight_path_descriptor_get_paths(GAFlightPathDescriptor *descriptor);
 
-
 #define GAFLIGHT_TYPE_COMMAND_DESCRIPTOR (gaflight_command_descriptor_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightCommandDescriptor,
                          gaflight_command_descriptor,
@@ -136,13 +116,8 @@ GARROW_AVAILABLE_IN_5_0
 gchar *
 gaflight_command_descriptor_get_command(GAFlightCommandDescriptor *descriptor);
 
-
 #define GAFLIGHT_TYPE_TICKET (gaflight_ticket_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightTicket,
-                         gaflight_ticket,
-                         GAFLIGHT,
-                         TICKET,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GAFlightTicket, gaflight_ticket, GAFLIGHT, TICKET, GObject)
 struct _GAFlightTicketClass
 {
   GObjectClass parent_class;
@@ -154,16 +129,10 @@ gaflight_ticket_new(GBytes *data);
 
 GARROW_AVAILABLE_IN_5_0
 gboolean
-gaflight_ticket_equal(GAFlightTicket *ticket,
-                      GAFlightTicket *other_ticket);
-
+gaflight_ticket_equal(GAFlightTicket *ticket, GAFlightTicket *other_ticket);
 
 #define GAFLIGHT_TYPE_ENDPOINT (gaflight_endpoint_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightEndpoint,
-                         gaflight_endpoint,
-                         GAFLIGHT,
-                         ENDPOINT,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GAFlightEndpoint, gaflight_endpoint, GAFLIGHT, ENDPOINT, GObject)
 struct _GAFlightEndpointClass
 {
   GObjectClass parent_class;
@@ -171,25 +140,18 @@ struct _GAFlightEndpointClass
 
 GARROW_AVAILABLE_IN_5_0
 GAFlightEndpoint *
-gaflight_endpoint_new(GAFlightTicket *ticket,
-                      GList *locations);
+gaflight_endpoint_new(GAFlightTicket *ticket, GList *locations);
 
 GARROW_AVAILABLE_IN_5_0
 gboolean
-gaflight_endpoint_equal(GAFlightEndpoint *endpoint,
-                        GAFlightEndpoint *other_endpoint);
+gaflight_endpoint_equal(GAFlightEndpoint *endpoint, GAFlightEndpoint *other_endpoint);
 
 GARROW_AVAILABLE_IN_5_0
 GList *
 gaflight_endpoint_get_locations(GAFlightEndpoint *endpoint);
 
-
 #define GAFLIGHT_TYPE_INFO (gaflight_info_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightInfo,
-                         gaflight_info,
-                         GAFLIGHT,
-                         INFO,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GAFlightInfo, gaflight_info, GAFLIGHT, INFO, GObject)
 struct _GAFlightInfoClass
 {
   GObjectClass parent_class;
@@ -206,14 +168,11 @@ gaflight_info_new(GArrowSchema *schema,
 
 GARROW_AVAILABLE_IN_5_0
 gboolean
-gaflight_info_equal(GAFlightInfo *info,
-                    GAFlightInfo *other_info);
+gaflight_info_equal(GAFlightInfo *info, GAFlightInfo *other_info);
 
 GARROW_AVAILABLE_IN_5_0
 GArrowSchema *
-gaflight_info_get_schema(GAFlightInfo *info,
-                         GArrowReadOptions *options,
-                         GError **error);
+gaflight_info_get_schema(GAFlightInfo *info, GArrowReadOptions *options, GError **error);
 GARROW_AVAILABLE_IN_5_0
 GAFlightDescriptor *
 gaflight_info_get_descriptor(GAFlightInfo *info);
@@ -227,13 +186,9 @@ GARROW_AVAILABLE_IN_5_0
 gint64
 gaflight_info_get_total_bytes(GAFlightInfo *info);
 
-
 #define GAFLIGHT_TYPE_STREAM_CHUNK (gaflight_stream_chunk_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightStreamChunk,
-                         gaflight_stream_chunk,
-                         GAFLIGHT,
-                         STREAM_CHUNK,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightStreamChunk, gaflight_stream_chunk, GAFLIGHT, STREAM_CHUNK, GObject)
 struct _GAFlightStreamChunkClass
 {
   GObjectClass parent_class;
@@ -246,9 +201,7 @@ GARROW_AVAILABLE_IN_6_0
 GArrowBuffer *
 gaflight_stream_chunk_get_metadata(GAFlightStreamChunk *chunk);
 
-
-#define GAFLIGHT_TYPE_RECORD_BATCH_READER       \
-  (gaflight_record_batch_reader_get_type())
+#define GAFLIGHT_TYPE_RECORD_BATCH_READER (gaflight_record_batch_reader_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchReader,
                          gaflight_record_batch_reader,
                          GAFLIGHT,
@@ -261,13 +214,10 @@ struct _GAFlightRecordBatchReaderClass
 
 GARROW_AVAILABLE_IN_6_0
 GAFlightStreamChunk *
-gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader,
-                                       GError **error);
+gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader, GError **error);
 
 GARROW_AVAILABLE_IN_6_0
 GArrowTable *
-gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader,
-                                      GError **error);
-
+gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/common.hpp b/c_glib/arrow-flight-glib/common.hpp
index d23f7c8867f5f..b748d6f382184 100644
--- a/c_glib/arrow-flight-glib/common.hpp
+++ b/c_glib/arrow-flight-glib/common.hpp
@@ -23,7 +23,6 @@
 
 #include <arrow-flight-glib/common.h>
 
-
 GAFlightCriteria *
 gaflight_criteria_new_raw(const arrow::flight::Criteria *flight_criteria);
 arrow::flight::Criteria *
@@ -33,8 +32,7 @@ arrow::flight::Location *
 gaflight_location_get_raw(GAFlightLocation *location);
 
 GAFlightDescriptor *
-gaflight_descriptor_new_raw(
-  const arrow::flight::FlightDescriptor *flight_descriptor);
+gaflight_descriptor_new_raw(const arrow::flight::FlightDescriptor *flight_descriptor);
 arrow::flight::FlightDescriptor *
 gaflight_descriptor_get_raw(GAFlightDescriptor *descriptor);
 
diff --git a/c_glib/arrow-flight-glib/server.cpp b/c_glib/arrow-flight-glib/server.cpp
index 9deb1623b16cd..f7444918e90f6 100644
--- a/c_glib/arrow-flight-glib/server.cpp
+++ b/c_glib/arrow-flight-glib/server.cpp
@@ -63,8 +63,8 @@ G_BEGIN_DECLS
  * Since: 5.0.0
  */
 
-
-typedef struct GAFlightDataStreamPrivate_ {
+typedef struct GAFlightDataStreamPrivate_
+{
   arrow::flight::FlightDataStream *stream;
 } GAFlightDataStreamPrivate;
 
@@ -72,14 +72,11 @@ enum {
   PROP_STREAM = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightDataStream,
-                           gaflight_data_stream,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightDataStream, gaflight_data_stream, G_TYPE_OBJECT)
 
-#define GAFLIGHT_DATA_STREAM_GET_PRIVATE(obj)        \
-  static_cast<GAFlightDataStreamPrivate *>(          \
-    gaflight_data_stream_get_instance_private(       \
-      GAFLIGHT_DATA_STREAM(obj)))
+#define GAFLIGHT_DATA_STREAM_GET_PRIVATE(obj)                                            \
+  static_cast<GAFlightDataStreamPrivate *>(                                              \
+    gaflight_data_stream_get_instance_private(GAFLIGHT_DATA_STREAM(obj)))
 
 static void
 gaflight_data_stream_finalize(GObject *object)
@@ -101,8 +98,8 @@ gaflight_data_stream_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_STREAM:
-    priv->stream = static_cast<arrow::flight::FlightDataStream *>(
-      g_value_get_pointer(value));
+    priv->stream =
+      static_cast<arrow::flight::FlightDataStream *>(g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -124,16 +121,16 @@ gaflight_data_stream_class_init(GAFlightDataStreamClass *klass)
   gobject_class->set_property = gaflight_data_stream_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("stream",
-                              "Stream",
-                              "The raw arrow::flight::FlightDataStream *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "stream",
+    "Stream",
+    "The raw arrow::flight::FlightDataStream *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_STREAM, spec);
 }
 
-
-typedef struct GAFlightRecordBatchStreamPrivate_ {
+typedef struct GAFlightRecordBatchStreamPrivate_
+{
   GArrowRecordBatchReader *reader;
 } GAFlightRecordBatchStreamPrivate;
 
@@ -145,9 +142,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GAFlightRecordBatchStream,
                            gaflight_record_batch_stream,
                            GAFLIGHT_TYPE_DATA_STREAM)
 
-#define GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(obj)        \
-  static_cast<GAFlightRecordBatchStreamPrivate *>(           \
-    gaflight_record_batch_stream_get_instance_private(       \
+#define GAFLIGHT_RECORD_BATCH_STREAM_GET_PRIVATE(obj)                                    \
+  static_cast<GAFlightRecordBatchStreamPrivate *>(                                       \
+    gaflight_record_batch_stream_get_instance_private(                                   \
       GAFLIGHT_RECORD_BATCH_STREAM(obj)))
 
 static void
@@ -221,12 +218,12 @@ gaflight_record_batch_stream_class_init(GAFlightRecordBatchStreamClass *klass)
    *
    * Since: 6.0.0
    */
-  spec = g_param_spec_object("reader",
-                             "Reader",
-                             "The reader that produces record batches",
-                             GARROW_TYPE_RECORD_BATCH_READER,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "reader",
+    "Reader",
+    "The reader that produces record batches",
+    GARROW_TYPE_RECORD_BATCH_READER,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_READER, spec);
 }
 
@@ -252,16 +249,17 @@ gaflight_record_batch_stream_new(GArrowRecordBatchReader *reader,
   } else {
     arrow_options = &arrow_options_default;
   }
-  auto stream = std::make_unique<
-    arrow::flight::RecordBatchStream>(arrow_reader, *arrow_options);
+  auto stream =
+    std::make_unique<arrow::flight::RecordBatchStream>(arrow_reader, *arrow_options);
   return static_cast<GAFlightRecordBatchStream *>(
     g_object_new(GAFLIGHT_TYPE_RECORD_BATCH_STREAM,
-                 "stream", stream.release(),
-                 "reader", reader,
+                 "stream",
+                 stream.release(),
+                 "reader",
+                 reader,
                  NULL));
 }
 
-
 G_DEFINE_TYPE(GAFlightMessageReader,
               gaflight_message_reader,
               GAFLIGHT_TYPE_RECORD_BATCH_READER)
@@ -292,8 +290,8 @@ gaflight_message_reader_get_descriptor(GAFlightMessageReader *reader)
   return gaflight_descriptor_new_raw(&flight_descriptor);
 }
 
-
-struct GAFlightServerCallContextPrivate {
+struct GAFlightServerCallContextPrivate
+{
   arrow::flight::ServerCallContext *call_context;
   std::string current_incoming_header_key;
   std::string current_incoming_header_value;
@@ -307,9 +305,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GAFlightServerCallContext,
                            gaflight_server_call_context,
                            G_TYPE_OBJECT)
 
-#define GAFLIGHT_SERVER_CALL_CONTEXT_GET_PRIVATE(obj)   \
-  static_cast<GAFlightServerCallContextPrivate *>(      \
-    gaflight_server_call_context_get_instance_private(  \
+#define GAFLIGHT_SERVER_CALL_CONTEXT_GET_PRIVATE(obj)                                    \
+  static_cast<GAFlightServerCallContextPrivate *>(                                       \
+    gaflight_server_call_context_get_instance_private(                                   \
       GAFLIGHT_SERVER_CALL_CONTEXT(obj)))
 
 static void
@@ -332,8 +330,7 @@ gaflight_server_call_context_set_property(GObject *object,
   switch (prop_id) {
   case PROP_CALL_CONTEXT:
     priv->call_context =
-      static_cast<arrow::flight::ServerCallContext *>(
-        g_value_get_pointer(value));
+      static_cast<arrow::flight::ServerCallContext *>(g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -345,8 +342,8 @@ static void
 gaflight_server_call_context_init(GAFlightServerCallContext *object)
 {
   auto priv = GAFLIGHT_SERVER_CALL_CONTEXT_GET_PRIVATE(object);
-  new(&(priv->current_incoming_header_key)) std::string;
-  new(&(priv->current_incoming_header_value)) std::string;
+  new (&(priv->current_incoming_header_key)) std::string;
+  new (&(priv->current_incoming_header_value)) std::string;
 }
 
 static void
@@ -358,11 +355,11 @@ gaflight_server_call_context_class_init(GAFlightServerCallContextClass *klass)
   gobject_class->set_property = gaflight_server_call_context_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("call-context",
-                              "Call context",
-                              "The raw arrow::flight::ServerCallContext",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "call-context",
+    "Call context",
+    "The raw arrow::flight::ServerCallContext",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CALL_CONTEXT, spec);
 }
 
@@ -377,10 +374,9 @@ gaflight_server_call_context_class_init(GAFlightServerCallContextClass *klass)
  * Since: 14.0.0
  */
 void
-gaflight_server_call_context_foreach_incoming_header(
-  GAFlightServerCallContext *context,
-  GAFlightHeaderFunc func,
-  gpointer user_data)
+gaflight_server_call_context_foreach_incoming_header(GAFlightServerCallContext *context,
+                                                     GAFlightHeaderFunc func,
+                                                     gpointer user_data)
 {
   auto priv = GAFLIGHT_SERVER_CALL_CONTEXT_GET_PRIVATE(context);
   auto flight_context = gaflight_server_call_context_get_raw(context);
@@ -393,8 +389,8 @@ gaflight_server_call_context_foreach_incoming_header(
   }
 }
 
-
-struct GAFlightServerAuthSenderPrivate {
+struct GAFlightServerAuthSenderPrivate
+{
   arrow::flight::ServerAuthSender *sender;
 };
 
@@ -406,10 +402,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GAFlightServerAuthSender,
                            gaflight_server_auth_sender,
                            G_TYPE_OBJECT)
 
-#define GAFLIGHT_SERVER_AUTH_SENDER_GET_PRIVATE(obj)        \
-  static_cast<GAFlightServerAuthSenderPrivate *>(           \
-    gaflight_server_auth_sender_get_instance_private(       \
-      GAFLIGHT_SERVER_AUTH_SENDER(obj)))
+#define GAFLIGHT_SERVER_AUTH_SENDER_GET_PRIVATE(obj)                                     \
+  static_cast<GAFlightServerAuthSenderPrivate *>(                                        \
+    gaflight_server_auth_sender_get_instance_private(GAFLIGHT_SERVER_AUTH_SENDER(obj)))
 
 static void
 gaflight_server_auth_sender_set_property(GObject *object,
@@ -443,11 +438,11 @@ gaflight_server_auth_sender_class_init(GAFlightServerAuthSenderClass *klass)
   gobject_class->set_property = gaflight_server_auth_sender_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("sender",
-                              "Sender",
-                              "The raw arrow::flight::ServerAuthSender *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "sender",
+    "Sender",
+    "The raw arrow::flight::ServerAuthSender *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_SERVER_AUTH_SENDER, spec);
 }
 
@@ -476,8 +471,8 @@ gaflight_server_auth_sender_write(GAFlightServerAuthSender *sender,
   return garrow::check(error, status, "[flight-server-auth-sender][write]");
 }
 
-
-struct GAFlightServerAuthReaderPrivate {
+struct GAFlightServerAuthReaderPrivate
+{
   arrow::flight::ServerAuthReader *reader;
 };
 
@@ -489,10 +484,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GAFlightServerAuthReader,
                            gaflight_server_auth_reader,
                            G_TYPE_OBJECT)
 
-#define GAFLIGHT_SERVER_AUTH_READER_GET_PRIVATE(obj)        \
-  static_cast<GAFlightServerAuthReaderPrivate *>(           \
-    gaflight_server_auth_reader_get_instance_private(       \
-      GAFLIGHT_SERVER_AUTH_READER(obj)))
+#define GAFLIGHT_SERVER_AUTH_READER_GET_PRIVATE(obj)                                     \
+  static_cast<GAFlightServerAuthReaderPrivate *>(                                        \
+    gaflight_server_auth_reader_get_instance_private(GAFLIGHT_SERVER_AUTH_READER(obj)))
 
 static void
 gaflight_server_auth_reader_set_property(GObject *object,
@@ -526,11 +520,11 @@ gaflight_server_auth_reader_class_init(GAFlightServerAuthReaderClass *klass)
   gobject_class->set_property = gaflight_server_auth_reader_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("reader",
-                              "Reader",
-                              "The raw arrow::flight::ServerAuthReader *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "reader",
+    "Reader",
+    "The raw arrow::flight::ServerAuthReader *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_SERVER_AUTH_READER, spec);
 }
 
@@ -547,8 +541,7 @@ gaflight_server_auth_reader_class_init(GAFlightServerAuthReaderClass *klass)
  * Since: 12.0.0
  */
 GBytes *
-gaflight_server_auth_reader_read(GAFlightServerAuthReader *reader,
-                                 GError **error)
+gaflight_server_auth_reader_read(GAFlightServerAuthReader *reader, GError **error)
 {
   auto flight_reader = gaflight_server_auth_reader_get_raw(reader);
   std::string flight_message;
@@ -559,8 +552,8 @@ gaflight_server_auth_reader_read(GAFlightServerAuthReader *reader,
   return g_bytes_new(flight_message.data(), flight_message.size());
 }
 
-
-struct GAFlightServerAuthHandlerPrivate {
+struct GAFlightServerAuthHandlerPrivate
+{
   std::shared_ptr<arrow::flight::ServerAuthHandler> handler;
 };
 
@@ -572,9 +565,9 @@ G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightServerAuthHandler,
                                     gaflight_server_auth_handler,
                                     G_TYPE_OBJECT)
 
-#define GAFLIGHT_SERVER_AUTH_HANDLER_GET_PRIVATE(obj)        \
-  static_cast<GAFlightServerAuthHandlerPrivate *>(           \
-    gaflight_server_auth_handler_get_instance_private(       \
+#define GAFLIGHT_SERVER_AUTH_HANDLER_GET_PRIVATE(obj)                                    \
+  static_cast<GAFlightServerAuthHandlerPrivate *>(                                       \
+    gaflight_server_auth_handler_get_instance_private(                                   \
       GAFLIGHT_SERVER_AUTH_HANDLER(obj)))
 
 static void
@@ -596,9 +589,8 @@ gaflight_server_auth_handler_set_property(GObject *object,
   switch (prop_id) {
   case PROP_SERVER_AUTH_HANDLER:
     if (g_value_get_pointer(value)) {
-      priv->handler =
-        *static_cast<std::shared_ptr<arrow::flight::ServerAuthHandler>*>(
-          g_value_get_pointer(value));
+      priv->handler = *static_cast<std::shared_ptr<arrow::flight::ServerAuthHandler> *>(
+        g_value_get_pointer(value));
     }
     break;
   default:
@@ -611,7 +603,7 @@ static void
 gaflight_server_auth_handler_init(GAFlightServerAuthHandler *object)
 {
   auto priv = GAFLIGHT_SERVER_AUTH_HANDLER_GET_PRIVATE(object);
-  new(&priv->handler) std::shared_ptr<arrow::flight::ServerAuthHandler>;
+  new (&priv->handler) std::shared_ptr<arrow::flight::ServerAuthHandler>;
 }
 
 static void
@@ -623,29 +615,30 @@ gaflight_server_auth_handler_class_init(GAFlightServerAuthHandlerClass *klass)
   gobject_class->set_property = gaflight_server_auth_handler_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("handler",
-                              "Handler",
-                              "The raw std::shared_ptr<"
-                              "arrow::flight::ServerAuthHandler>",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "handler",
+    "Handler",
+    "The raw std::shared_ptr<"
+    "arrow::flight::ServerAuthHandler>",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_SERVER_AUTH_HANDLER, spec);
 }
 
-
 G_END_DECLS
 namespace gaflight {
   class ServerCustomAuthHandler : public arrow::flight::ServerAuthHandler {
   public:
     explicit ServerCustomAuthHandler(GAFlightServerCustomAuthHandler *handler)
       : arrow::flight::ServerAuthHandler(),
-        handler_(handler) {
+        handler_(handler)
+    {
     }
 
     arrow::Status
     Authenticate(const arrow::flight::ServerCallContext &context,
                  arrow::flight::ServerAuthSender *sender,
-                 arrow::flight::ServerAuthReader *reader) override {
+                 arrow::flight::ServerAuthReader *reader) override
+    {
       auto klass = GAFLIGHT_SERVER_CUSTOM_AUTH_HANDLER_GET_CLASS(handler_);
       auto gacontext = gaflight_server_call_context_new_raw(&context);
       auto gasender = gaflight_server_auth_sender_new_raw(sender);
@@ -668,7 +661,8 @@ namespace gaflight {
     arrow::Status
     IsValid(const arrow::flight::ServerCallContext &context,
             const std::string &token,
-            std::string *peer_identity) override {
+            std::string *peer_identity) override
+    {
       auto klass = GAFLIGHT_SERVER_CUSTOM_AUTH_HANDLER_GET_CLASS(handler_);
       auto gacontext = gaflight_server_call_context_new_raw(&context);
       auto gtoken = g_bytes_new_static(token.data(), token.size());
@@ -678,11 +672,9 @@ namespace gaflight {
       g_object_unref(gacontext);
       if (gpeer_identity) {
         gsize gpeer_identity_size;
-        auto gpeer_identity_data = g_bytes_get_data(gpeer_identity,
-                                                    &gpeer_identity_size);
-        *peer_identity =
-          std::string(static_cast<const char *>(gpeer_identity_data),
-                      gpeer_identity_size);
+        auto gpeer_identity_data = g_bytes_get_data(gpeer_identity, &gpeer_identity_size);
+        *peer_identity = std::string(static_cast<const char *>(gpeer_identity_data),
+                                     gpeer_identity_size);
         g_bytes_unref(gpeer_identity);
       }
       if (error) {
@@ -698,7 +690,7 @@ namespace gaflight {
   private:
     GAFlightServerCustomAuthHandler *handler_;
   };
-}
+} // namespace gaflight
 G_BEGIN_DECLS
 
 G_DEFINE_TYPE(GAFlightServerCustomAuthHandler,
@@ -732,25 +724,20 @@ gaflight_server_custom_auth_handler_class_init(
  * Since: 12.0.0
  */
 void
-gaflight_server_custom_auth_handler_authenticate(
-  GAFlightServerCustomAuthHandler *handler,
-  GAFlightServerCallContext *context,
-  GAFlightServerAuthSender *sender,
-  GAFlightServerAuthReader *reader,
-  GError **error)
+gaflight_server_custom_auth_handler_authenticate(GAFlightServerCustomAuthHandler *handler,
+                                                 GAFlightServerCallContext *context,
+                                                 GAFlightServerAuthSender *sender,
+                                                 GAFlightServerAuthReader *reader,
+                                                 GError **error)
 {
   auto flight_handler =
-    gaflight_server_auth_handler_get_raw(
-      GAFLIGHT_SERVER_AUTH_HANDLER(handler));
+    gaflight_server_auth_handler_get_raw(GAFLIGHT_SERVER_AUTH_HANDLER(handler));
   auto flight_context = gaflight_server_call_context_get_raw(context);
   auto flight_sender = gaflight_server_auth_sender_get_raw(sender);
   auto flight_reader = gaflight_server_auth_reader_get_raw(reader);
-  auto status = flight_handler->Authenticate(*flight_context,
-                                             flight_sender,
-                                             flight_reader);
-  garrow::check(error,
-                status,
-                "[flight-server-custom-auth-handler][authenticate]");
+  auto status =
+    flight_handler->Authenticate(*flight_context, flight_sender, flight_reader);
+  garrow::check(error, status, "[flight-server-custom-auth-handler][authenticate]");
 }
 
 /**
@@ -769,36 +756,32 @@ gaflight_server_custom_auth_handler_authenticate(
  * Since: 12.0.0
  */
 GBytes *
-gaflight_server_custom_auth_handler_is_valid(
-  GAFlightServerCustomAuthHandler *handler,
-  GAFlightServerCallContext *context,
-  GBytes *token,
-  GError **error)
+gaflight_server_custom_auth_handler_is_valid(GAFlightServerCustomAuthHandler *handler,
+                                             GAFlightServerCallContext *context,
+                                             GBytes *token,
+                                             GError **error)
 {
   auto flight_handler =
-    gaflight_server_auth_handler_get_raw(
-      GAFLIGHT_SERVER_AUTH_HANDLER(handler));
+    gaflight_server_auth_handler_get_raw(GAFLIGHT_SERVER_AUTH_HANDLER(handler));
   gsize token_size;
   auto token_data = g_bytes_get_data(token, &token_size);
   auto flight_context = gaflight_server_call_context_get_raw(context);
   std::string flight_token(static_cast<const char *>(token_data), token_size);
   std::string flight_peer_identity;
-  auto status = flight_handler->IsValid(*flight_context,
-                                        flight_token,
-                                        &flight_peer_identity);
+  auto status =
+    flight_handler->IsValid(*flight_context, flight_token, &flight_peer_identity);
   if (garrow::check(error,
                     status,
                     "[flight-server-custom-auth-handler]"
                     "[is-valid]")) {
-    return g_bytes_new(flight_peer_identity.data(),
-                       flight_peer_identity.size());
+    return g_bytes_new(flight_peer_identity.data(), flight_peer_identity.size());
   } else {
     return nullptr;
   }
 }
 
-
-typedef struct GAFlightServerOptionsPrivate_ {
+typedef struct GAFlightServerOptionsPrivate_
+{
   arrow::flight::FlightServerOptions options;
   GAFlightLocation *location;
   GAFlightServerAuthHandler *auth_handler;
@@ -809,14 +792,11 @@ enum {
   PROP_AUTH_HANDLER,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightServerOptions,
-                           gaflight_server_options,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightServerOptions, gaflight_server_options, G_TYPE_OBJECT)
 
-#define GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(obj)        \
-  static_cast<GAFlightServerOptionsPrivate *>(          \
-    gaflight_server_options_get_instance_private(       \
-      GAFLIGHT_SERVER_OPTIONS(obj)))
+#define GAFLIGHT_SERVER_OPTIONS_GET_PRIVATE(obj)                                         \
+  static_cast<GAFlightServerOptionsPrivate *>(                                           \
+    gaflight_server_options_get_instance_private(GAFLIGHT_SERVER_OPTIONS(obj)))
 
 static void
 gaflight_server_options_dispose(GObject *object)
@@ -859,7 +839,7 @@ gaflight_server_options_set_property(GObject *object,
     {
       priv->location = GAFLIGHT_LOCATION(g_value_dup_object(value));
       auto flight_location = gaflight_location_get_raw(priv->location);
-      new(&(priv->options)) arrow::flight::FlightServerOptions(*flight_location);
+      new (&(priv->options)) arrow::flight::FlightServerOptions(*flight_location);
     }
     break;
   case PROP_AUTH_HANDLER:
@@ -867,8 +847,7 @@ gaflight_server_options_set_property(GObject *object,
       if (priv->auth_handler) {
         g_object_unref(priv->auth_handler);
       }
-      priv->auth_handler =
-        GAFLIGHT_SERVER_AUTH_HANDLER(g_value_dup_object(value));
+      priv->auth_handler = GAFLIGHT_SERVER_AUTH_HANDLER(g_value_dup_object(value));
       if (priv->auth_handler) {
         priv->options.auth_handler =
           gaflight_server_auth_handler_get_raw(priv->auth_handler);
@@ -927,12 +906,12 @@ gaflight_server_options_class_init(GAFlightServerOptionsClass *klass)
    *
    * Since: 5.0.0
    */
-  spec = g_param_spec_object("location",
-                             "Location",
-                             "The location to be listened",
-                             GAFLIGHT_TYPE_LOCATION,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "location",
+    "Location",
+    "The location to be listened",
+    GAFLIGHT_TYPE_LOCATION,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_LOCATION, spec);
 
   /**
@@ -962,48 +941,37 @@ GAFlightServerOptions *
 gaflight_server_options_new(GAFlightLocation *location)
 {
   return static_cast<GAFlightServerOptions *>(
-    g_object_new(GAFLIGHT_TYPE_SERVER_OPTIONS,
-                 "location", location,
-                 NULL));
+    g_object_new(GAFLIGHT_TYPE_SERVER_OPTIONS, "location", location, NULL));
 }
 
-
-G_DEFINE_INTERFACE(GAFlightServable,
-                   gaflight_servable,
-                   G_TYPE_OBJECT)
+G_DEFINE_INTERFACE(GAFlightServable, gaflight_servable, G_TYPE_OBJECT)
 
 static void
 gaflight_servable_default_init(GAFlightServableInterface *iface)
 {
 }
 
-
 G_END_DECLS
 namespace gaflight {
   class Server : public arrow::flight::FlightServerBase {
   public:
-    explicit Server(GAFlightServer *gaserver) :
-      FlightServerBase(),
-      gaserver_(gaserver) {
-    }
+    explicit Server(GAFlightServer *gaserver) : FlightServerBase(), gaserver_(gaserver) {}
 
     ~Server() override = default;
 
     arrow::Status
-    ListFlights(
-      const arrow::flight::ServerCallContext &context,
-      const arrow::flight::Criteria *criteria,
-      std::unique_ptr<arrow::flight::FlightListing> *listing) override {
+    ListFlights(const arrow::flight::ServerCallContext &context,
+                const arrow::flight::Criteria *criteria,
+                std::unique_ptr<arrow::flight::FlightListing> *listing) override
+    {
       auto gacontext = gaflight_server_call_context_new_raw(&context);
       GAFlightCriteria *gacriteria = nullptr;
       if (criteria) {
         gacriteria = gaflight_criteria_new_raw(criteria);
       }
       GError *gerror = nullptr;
-      auto gaflights = gaflight_server_list_flights(gaserver_,
-                                                    gacontext,
-                                                    gacriteria,
-                                                    &gerror);
+      auto gaflights =
+        gaflight_server_list_flights(gaserver_, gacontext, gacriteria, &gerror);
       if (gacriteria) {
         g_object_unref(gacriteria);
       }
@@ -1020,22 +988,20 @@ namespace gaflight {
         g_object_unref(gaflight);
       }
       g_list_free(gaflights);
-      *listing = std::make_unique<
-        arrow::flight::SimpleFlightListing>(flights);
+      *listing = std::make_unique<arrow::flight::SimpleFlightListing>(flights);
       return arrow::Status::OK();
     }
 
     arrow::Status
     GetFlightInfo(const arrow::flight::ServerCallContext &context,
                   const arrow::flight::FlightDescriptor &request,
-                  std::unique_ptr<arrow::flight::FlightInfo> *info) override {
+                  std::unique_ptr<arrow::flight::FlightInfo> *info) override
+    {
       auto gacontext = gaflight_server_call_context_new_raw(&context);
       auto garequest = gaflight_descriptor_new_raw(&request);
       GError *gerror = nullptr;
-      auto gainfo = gaflight_server_get_flight_info(gaserver_,
-                                                    gacontext,
-                                                    garequest,
-                                                    &gerror);
+      auto gainfo =
+        gaflight_server_get_flight_info(gaserver_, gacontext, garequest, &gerror);
       g_object_unref(garequest);
       g_object_unref(gacontext);
       if (gerror) {
@@ -1043,23 +1009,20 @@ namespace gaflight {
                                       arrow::StatusCode::UnknownError,
                                       "[flight-server][get-flight-info]");
       }
-      *info = std::make_unique<arrow::flight::FlightInfo>(
-        *gaflight_info_get_raw(gainfo));
+      *info = std::make_unique<arrow::flight::FlightInfo>(*gaflight_info_get_raw(gainfo));
       g_object_unref(gainfo);
       return arrow::Status::OK();
     }
 
-    arrow::Status DoGet(
-      const arrow::flight::ServerCallContext &context,
-      const arrow::flight::Ticket &ticket,
-      std::unique_ptr<arrow::flight::FlightDataStream> *stream) override {
+    arrow::Status
+    DoGet(const arrow::flight::ServerCallContext &context,
+          const arrow::flight::Ticket &ticket,
+          std::unique_ptr<arrow::flight::FlightDataStream> *stream) override
+    {
       auto gacontext = gaflight_server_call_context_new_raw(&context);
       auto gaticket = gaflight_ticket_new_raw(&ticket);
       GError *gerror = nullptr;
-      auto gastream = gaflight_server_do_get(gaserver_,
-                                             gacontext,
-                                             gaticket,
-                                             &gerror);
+      auto gastream = gaflight_server_do_get(gaserver_, gacontext, gaticket, &gerror);
       g_object_unref(gaticket);
       g_object_unref(gacontext);
       if (gerror) {
@@ -1074,10 +1037,11 @@ namespace gaflight {
   private:
     GAFlightServer *gaserver_;
   };
-};
+}; // namespace gaflight
 G_BEGIN_DECLS
 
-struct GAFlightServerPrivate {
+struct GAFlightServerPrivate
+{
   gaflight::Server server;
 };
 
@@ -1092,18 +1056,13 @@ gaflight_server_servable_interface_init(GAFlightServableInterface *iface)
   iface->get_raw = gaflight_server_servable_get_raw;
 }
 
-G_DEFINE_ABSTRACT_TYPE_WITH_CODE(GAFlightServer,
-                                 gaflight_server,
-                                 G_TYPE_OBJECT,
-                                 G_ADD_PRIVATE(GAFlightServer);
-                                 G_IMPLEMENT_INTERFACE(
-                                   GAFLIGHT_TYPE_SERVABLE,
-                                   gaflight_server_servable_interface_init))
+G_DEFINE_ABSTRACT_TYPE_WITH_CODE(
+  GAFlightServer, gaflight_server, G_TYPE_OBJECT, G_ADD_PRIVATE(GAFlightServer);
+  G_IMPLEMENT_INTERFACE(GAFLIGHT_TYPE_SERVABLE, gaflight_server_servable_interface_init))
 
-#define GAFLIGHT_SERVER_GET_PRIVATE(object)         \
-  static_cast<GAFlightServerPrivate *>(             \
-    gaflight_server_get_instance_private(           \
-      GAFLIGHT_SERVER(object)))
+#define GAFLIGHT_SERVER_GET_PRIVATE(object)                                              \
+  static_cast<GAFlightServerPrivate *>(                                                  \
+    gaflight_server_get_instance_private(GAFLIGHT_SERVER(object)))
 
 G_END_DECLS
 static arrow::flight::FlightServerBase *
@@ -1127,7 +1086,7 @@ static void
 gaflight_server_init(GAFlightServer *object)
 {
   auto priv = GAFLIGHT_SERVER_GET_PRIVATE(object);
-  new(&(priv->server)) gaflight::Server(object);
+  new (&(priv->server)) gaflight::Server(object);
 }
 
 static void
@@ -1170,8 +1129,7 @@ gaflight_server_listen(GAFlightServer *server,
 gint
 gaflight_server_get_port(GAFlightServer *server)
 {
-  const auto flight_server =
-    gaflight_servable_get_raw(GAFLIGHT_SERVABLE(server));
+  const auto flight_server = gaflight_servable_get_raw(GAFLIGHT_SERVABLE(server));
   return flight_server->port();
 }
 
@@ -1188,13 +1146,10 @@ gaflight_server_get_port(GAFlightServer *server)
  * Since: 5.0.0
  */
 gboolean
-gaflight_server_shutdown(GAFlightServer *server,
-                         GError **error)
+gaflight_server_shutdown(GAFlightServer *server, GError **error)
 {
   auto flight_server = gaflight_servable_get_raw(GAFLIGHT_SERVABLE(server));
-  return garrow::check(error,
-                       flight_server->Shutdown(),
-                       "[flight-server][shutdown]");
+  return garrow::check(error, flight_server->Shutdown(), "[flight-server][shutdown]");
 }
 
 /**
@@ -1217,10 +1172,7 @@ gaflight_server_list_flights(GAFlightServer *server,
 {
   auto klass = GAFLIGHT_SERVER_GET_CLASS(server);
   if (!(klass && klass->list_flights)) {
-    g_set_error(error,
-                GARROW_ERROR,
-                GARROW_ERROR_NOT_IMPLEMENTED,
-                "not implemented");
+    g_set_error(error, GARROW_ERROR, GARROW_ERROR_NOT_IMPLEMENTED, "not implemented");
     return nullptr;
   }
   return (*(klass->list_flights))(server, context, criteria, error);
@@ -1245,10 +1197,7 @@ gaflight_server_get_flight_info(GAFlightServer *server,
 {
   auto klass = GAFLIGHT_SERVER_GET_CLASS(server);
   if (!(klass && klass->get_flight_info)) {
-    g_set_error(error,
-                GARROW_ERROR,
-                GARROW_ERROR_NOT_IMPLEMENTED,
-                "not implemented");
+    g_set_error(error, GARROW_ERROR, GARROW_ERROR_NOT_IMPLEMENTED, "not implemented");
     return nullptr;
   }
   return (*(klass->get_flight_info))(server, context, request, error);
@@ -1273,19 +1222,14 @@ gaflight_server_do_get(GAFlightServer *server,
 {
   auto klass = GAFLIGHT_SERVER_GET_CLASS(server);
   if (!(klass && klass->do_get)) {
-    g_set_error(error,
-                GARROW_ERROR,
-                GARROW_ERROR_NOT_IMPLEMENTED,
-                "not implemented");
+    g_set_error(error, GARROW_ERROR, GARROW_ERROR_NOT_IMPLEMENTED, "not implemented");
     return nullptr;
   }
   return (*(klass->do_get))(server, context, ticket, error);
 }
 
-
 G_END_DECLS
 
-
 arrow::flight::FlightDataStream *
 gaflight_data_stream_get_raw(GAFlightDataStream *stream)
 {
@@ -1293,17 +1237,16 @@ gaflight_data_stream_get_raw(GAFlightDataStream *stream)
   return priv->stream;
 }
 
-
 GAFlightMessageReader *
-gaflight_message_reader_new_raw(
-  arrow::flight::FlightMessageReader *flight_reader,
-  gboolean is_owner)
-{
-  return GAFLIGHT_MESSAGE_READER(
-    g_object_new(GAFLIGHT_TYPE_MESSAGE_READER,
-                 "reader", flight_reader,
-                 "is-owner", is_owner,
-                 NULL));
+gaflight_message_reader_new_raw(arrow::flight::FlightMessageReader *flight_reader,
+                                gboolean is_owner)
+{
+  return GAFLIGHT_MESSAGE_READER(g_object_new(GAFLIGHT_TYPE_MESSAGE_READER,
+                                              "reader",
+                                              flight_reader,
+                                              "is-owner",
+                                              is_owner,
+                                              NULL));
 }
 
 arrow::flight::FlightMessageReader *
@@ -1314,15 +1257,14 @@ gaflight_message_reader_get_raw(GAFlightMessageReader *reader)
   return static_cast<arrow::flight::FlightMessageReader *>(flight_reader);
 }
 
-
 GAFlightServerCallContext *
 gaflight_server_call_context_new_raw(
   const arrow::flight::ServerCallContext *flight_call_context)
 {
-  return GAFLIGHT_SERVER_CALL_CONTEXT(
-    g_object_new(GAFLIGHT_TYPE_SERVER_CALL_CONTEXT,
-                 "call-context", flight_call_context,
-                 NULL));
+  return GAFLIGHT_SERVER_CALL_CONTEXT(g_object_new(GAFLIGHT_TYPE_SERVER_CALL_CONTEXT,
+                                                   "call-context",
+                                                   flight_call_context,
+                                                   NULL));
 }
 
 const arrow::flight::ServerCallContext *
@@ -1333,13 +1275,10 @@ gaflight_server_call_context_get_raw(GAFlightServerCallContext *call_context)
 }
 
 GAFlightServerAuthSender *
-gaflight_server_auth_sender_new_raw(
-  arrow::flight::ServerAuthSender *flight_sender)
+gaflight_server_auth_sender_new_raw(arrow::flight::ServerAuthSender *flight_sender)
 {
   return GAFLIGHT_SERVER_AUTH_SENDER(
-    g_object_new(GAFLIGHT_TYPE_SERVER_AUTH_SENDER,
-                 "sender", flight_sender,
-                 nullptr));
+    g_object_new(GAFLIGHT_TYPE_SERVER_AUTH_SENDER, "sender", flight_sender, nullptr));
 }
 
 arrow::flight::ServerAuthSender *
@@ -1350,13 +1289,10 @@ gaflight_server_auth_sender_get_raw(GAFlightServerAuthSender *sender)
 }
 
 GAFlightServerAuthReader *
-gaflight_server_auth_reader_new_raw(
-  arrow::flight::ServerAuthReader *flight_reader)
+gaflight_server_auth_reader_new_raw(arrow::flight::ServerAuthReader *flight_reader)
 {
   return GAFLIGHT_SERVER_AUTH_READER(
-    g_object_new(GAFLIGHT_TYPE_SERVER_AUTH_READER,
-                 "reader", flight_reader,
-                 nullptr));
+    g_object_new(GAFLIGHT_TYPE_SERVER_AUTH_READER, "reader", flight_reader, nullptr));
 }
 
 arrow::flight::ServerAuthReader *
diff --git a/c_glib/arrow-flight-glib/server.h b/c_glib/arrow-flight-glib/server.h
index 77ecf36fd5221..89f5a0a596e9e 100644
--- a/c_glib/arrow-flight-glib/server.h
+++ b/c_glib/arrow-flight-glib/server.h
@@ -23,22 +23,15 @@
 
 G_BEGIN_DECLS
 
-
-#define GAFLIGHT_TYPE_DATA_STREAM       \
-  (gaflight_data_stream_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightDataStream,
-                         gaflight_data_stream,
-                         GAFLIGHT,
-                         DATA_STREAM,
-                         GObject)
+#define GAFLIGHT_TYPE_DATA_STREAM (gaflight_data_stream_get_type())
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightDataStream, gaflight_data_stream, GAFLIGHT, DATA_STREAM, GObject)
 struct _GAFlightDataStreamClass
 {
   GObjectClass parent_class;
 };
 
-
-#define GAFLIGHT_TYPE_RECORD_BATCH_STREAM       \
-  (gaflight_record_batch_stream_get_type())
+#define GAFLIGHT_TYPE_RECORD_BATCH_STREAM (gaflight_record_batch_stream_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchStream,
                          gaflight_record_batch_stream,
                          GAFLIGHT,
@@ -54,9 +47,7 @@ GAFlightRecordBatchStream *
 gaflight_record_batch_stream_new(GArrowRecordBatchReader *reader,
                                  GArrowWriteOptions *options);
 
-
-#define GAFLIGHT_TYPE_MESSAGE_READER            \
-  (gaflight_message_reader_get_type())
+#define GAFLIGHT_TYPE_MESSAGE_READER (gaflight_message_reader_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightMessageReader,
                          gaflight_message_reader,
                          GAFLIGHT,
@@ -71,9 +62,7 @@ GARROW_AVAILABLE_IN_14_0
 GAFlightDescriptor *
 gaflight_message_reader_get_descriptor(GAFlightMessageReader *reader);
 
-
-#define GAFLIGHT_TYPE_SERVER_CALL_CONTEXT       \
-  (gaflight_server_call_context_get_type())
+#define GAFLIGHT_TYPE_SERVER_CALL_CONTEXT (gaflight_server_call_context_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightServerCallContext,
                          gaflight_server_call_context,
                          GAFLIGHT,
@@ -86,14 +75,11 @@ struct _GAFlightServerCallContextClass
 
 GARROW_AVAILABLE_IN_14_0
 void
-gaflight_server_call_context_foreach_incoming_header(
-  GAFlightServerCallContext *context,
-  GAFlightHeaderFunc func,
-  gpointer user_data);
-
+gaflight_server_call_context_foreach_incoming_header(GAFlightServerCallContext *context,
+                                                     GAFlightHeaderFunc func,
+                                                     gpointer user_data);
 
-#define GAFLIGHT_TYPE_SERVER_AUTH_SENDER        \
-  (gaflight_server_auth_sender_get_type())
+#define GAFLIGHT_TYPE_SERVER_AUTH_SENDER (gaflight_server_auth_sender_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightServerAuthSender,
                          gaflight_server_auth_sender,
                          GAFLIGHT,
@@ -110,9 +96,7 @@ gaflight_server_auth_sender_write(GAFlightServerAuthSender *sender,
                                   GBytes *message,
                                   GError **error);
 
-
-#define GAFLIGHT_TYPE_SERVER_AUTH_READER        \
-  (gaflight_server_auth_reader_get_type())
+#define GAFLIGHT_TYPE_SERVER_AUTH_READER (gaflight_server_auth_reader_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightServerAuthReader,
                          gaflight_server_auth_reader,
                          GAFLIGHT,
@@ -125,12 +109,9 @@ struct _GAFlightServerAuthReaderClass
 
 GARROW_AVAILABLE_IN_12_0
 GBytes *
-gaflight_server_auth_reader_read(GAFlightServerAuthReader *reader,
-                                 GError **error);
-
+gaflight_server_auth_reader_read(GAFlightServerAuthReader *reader, GError **error);
 
-#define GAFLIGHT_TYPE_SERVER_AUTH_HANDLER       \
-  (gaflight_server_auth_handler_get_type())
+#define GAFLIGHT_TYPE_SERVER_AUTH_HANDLER (gaflight_server_auth_handler_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightServerAuthHandler,
                          gaflight_server_auth_handler,
                          GAFLIGHT,
@@ -141,7 +122,7 @@ struct _GAFlightServerAuthHandlerClass
   GObjectClass parent_class;
 };
 
-#define GAFLIGHT_TYPE_SERVER_CUSTOM_AUTH_HANDLER       \
+#define GAFLIGHT_TYPE_SERVER_CUSTOM_AUTH_HANDLER                                         \
   (gaflight_server_custom_auth_handler_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightServerCustomAuthHandler,
                          gaflight_server_custom_auth_handler,
@@ -173,28 +154,22 @@ struct _GAFlightServerCustomAuthHandlerClass
 
 GARROW_AVAILABLE_IN_12_0
 void
-gaflight_server_custom_auth_handler_authenticate(
-  GAFlightServerCustomAuthHandler *handler,
-  GAFlightServerCallContext *context,
-  GAFlightServerAuthSender *sender,
-  GAFlightServerAuthReader *reader,
-  GError **error);
+gaflight_server_custom_auth_handler_authenticate(GAFlightServerCustomAuthHandler *handler,
+                                                 GAFlightServerCallContext *context,
+                                                 GAFlightServerAuthSender *sender,
+                                                 GAFlightServerAuthReader *reader,
+                                                 GError **error);
 
 GARROW_AVAILABLE_IN_12_0
 GBytes *
-gaflight_server_custom_auth_handler_is_valid(
-  GAFlightServerCustomAuthHandler *handler,
-  GAFlightServerCallContext *context,
-  GBytes *token,
-  GError **error);
-
+gaflight_server_custom_auth_handler_is_valid(GAFlightServerCustomAuthHandler *handler,
+                                             GAFlightServerCallContext *context,
+                                             GBytes *token,
+                                             GError **error);
 
 #define GAFLIGHT_TYPE_SERVER_OPTIONS (gaflight_server_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightServerOptions,
-                         gaflight_server_options,
-                         GAFLIGHT,
-                         SERVER_OPTIONS,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightServerOptions, gaflight_server_options, GAFLIGHT, SERVER_OPTIONS, GObject)
 struct _GAFlightServerOptionsClass
 {
   GObjectClass parent_class;
@@ -204,21 +179,11 @@ GARROW_AVAILABLE_IN_5_0
 GAFlightServerOptions *
 gaflight_server_options_new(GAFlightLocation *location);
 
-
 #define GAFLIGHT_TYPE_SERVABLE (gaflight_servable_get_type())
-G_DECLARE_INTERFACE(GAFlightServable,
-                    gaflight_servable,
-                    GAFLIGHT,
-                    SERVABLE,
-                    GObject)
-
+G_DECLARE_INTERFACE(GAFlightServable, gaflight_servable, GAFLIGHT, SERVABLE, GObject)
 
 #define GAFLIGHT_TYPE_SERVER (gaflight_server_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightServer,
-                         gaflight_server,
-                         GAFLIGHT,
-                         SERVER,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GAFlightServer, gaflight_server, GAFLIGHT, SERVER, GObject)
 /**
  * GAFlightServerClass:
  * @list_flights: A virtual function to implement `ListFlights` API.
@@ -254,12 +219,10 @@ gint
 gaflight_server_get_port(GAFlightServer *server);
 GARROW_AVAILABLE_IN_5_0
 gboolean
-gaflight_server_shutdown(GAFlightServer *server,
-                         GError **error);
+gaflight_server_shutdown(GAFlightServer *server, GError **error);
 GARROW_AVAILABLE_IN_5_0
 gboolean
-gaflight_server_wait(GAFlightServer *server,
-                     GError **error);
+gaflight_server_wait(GAFlightServer *server, GError **error);
 
 GARROW_AVAILABLE_IN_5_0
 GList *
diff --git a/c_glib/arrow-flight-glib/server.hpp b/c_glib/arrow-flight-glib/server.hpp
index 6b273dc9e5a8f..70da867d5b0e9 100644
--- a/c_glib/arrow-flight-glib/server.hpp
+++ b/c_glib/arrow-flight-glib/server.hpp
@@ -23,18 +23,15 @@
 
 #include <arrow-flight-glib/server.h>
 
-
 arrow::flight::FlightDataStream *
 gaflight_data_stream_get_raw(GAFlightDataStream *stream);
 
 GAFlightMessageReader *
-gaflight_message_reader_new_raw(
-  arrow::flight::FlightMessageReader *flight_reader,
-  gboolean is_owner);
+gaflight_message_reader_new_raw(arrow::flight::FlightMessageReader *flight_reader,
+                                gboolean is_owner);
 arrow::flight::FlightMessageReader *
 gaflight_message_reader_get_raw(GAFlightMessageReader *reader);
 
-
 GAFlightServerCallContext *
 gaflight_server_call_context_new_raw(
   const arrow::flight::ServerCallContext *flight_call_context);
@@ -42,14 +39,12 @@ const arrow::flight::ServerCallContext *
 gaflight_server_call_context_get_raw(GAFlightServerCallContext *call_context);
 
 GAFlightServerAuthSender *
-gaflight_server_auth_sender_new_raw(
-  arrow::flight::ServerAuthSender *flight_sender);
+gaflight_server_auth_sender_new_raw(arrow::flight::ServerAuthSender *flight_sender);
 arrow::flight::ServerAuthSender *
 gaflight_server_auth_sender_get_raw(GAFlightServerAuthSender *sender);
 
 GAFlightServerAuthReader *
-gaflight_server_auth_reader_new_raw(
-  arrow::flight::ServerAuthReader *flight_reader);
+gaflight_server_auth_reader_new_raw(arrow::flight::ServerAuthReader *flight_reader);
 arrow::flight::ServerAuthReader *
 gaflight_server_auth_reader_get_raw(GAFlightServerAuthReader *reader);
 
@@ -59,7 +54,6 @@ gaflight_server_auth_handler_get_raw(GAFlightServerAuthHandler *handler);
 arrow::flight::FlightServerOptions *
 gaflight_server_options_get_raw(GAFlightServerOptions *options);
 
-
 struct _GAFlightServableInterface
 {
   GTypeInterface parent_iface;
@@ -70,30 +64,34 @@ struct _GAFlightServableInterface
 arrow::flight::FlightServerBase *
 gaflight_servable_get_raw(GAFlightServable *servable);
 
-
 namespace gaflight {
   class DataStream : public arrow::flight::FlightDataStream {
   public:
-    explicit DataStream(GAFlightDataStream *gastream) :
-      arrow::flight::FlightDataStream(),
-      gastream_(gastream) {
+    explicit DataStream(GAFlightDataStream *gastream)
+      : arrow::flight::FlightDataStream(),
+        gastream_(gastream)
+    {
     }
 
-    ~DataStream() override {
-      g_object_unref(gastream_);
-    }
+    ~DataStream() override { g_object_unref(gastream_); }
 
-    std::shared_ptr<arrow::Schema> schema() override {
+    std::shared_ptr<arrow::Schema>
+    schema() override
+    {
       auto stream = gaflight_data_stream_get_raw(gastream_);
       return stream->schema();
     }
 
-    arrow::Result<arrow::flight::FlightPayload> GetSchemaPayload() override {
+    arrow::Result<arrow::flight::FlightPayload>
+    GetSchemaPayload() override
+    {
       auto stream = gaflight_data_stream_get_raw(gastream_);
       return stream->GetSchemaPayload();
     }
 
-    arrow::Result<arrow::flight::FlightPayload> Next() override {
+    arrow::Result<arrow::flight::FlightPayload>
+    Next() override
+    {
       auto stream = gaflight_data_stream_get_raw(gastream_);
       return stream->Next();
     }
@@ -101,4 +99,4 @@ namespace gaflight {
   private:
     GAFlightDataStream *gastream_;
   };
-};
+}; // namespace gaflight
diff --git a/c_glib/arrow-flight-sql-glib/client.cpp b/c_glib/arrow-flight-sql-glib/client.cpp
index 8e1cf11549710..cd0f477818bd3 100644
--- a/c_glib/arrow-flight-sql-glib/client.cpp
+++ b/c_glib/arrow-flight-sql-glib/client.cpp
@@ -37,7 +37,8 @@ G_BEGIN_DECLS
  * Since: 9.0.0
  */
 
-struct GAFlightSQLPreparedStatementPrivate {
+struct GAFlightSQLPreparedStatementPrivate
+{
   std::shared_ptr<arrow::flight::sql::PreparedStatement> statement;
   GAFlightSQLClient *client;
 };
@@ -51,9 +52,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GAFlightSQLPreparedStatement,
                            gaflightsql_prepared_statement,
                            G_TYPE_OBJECT)
 
-#define GAFLIGHTSQL_PREPARED_STATEMENT_GET_PRIVATE(object)      \
-  static_cast<GAFlightSQLPreparedStatementPrivate *>(           \
-    gaflightsql_prepared_statement_get_instance_private(        \
+#define GAFLIGHTSQL_PREPARED_STATEMENT_GET_PRIVATE(object)                               \
+  static_cast<GAFlightSQLPreparedStatementPrivate *>(                                    \
+    gaflightsql_prepared_statement_get_instance_private(                                 \
       GAFLIGHTSQL_PREPARED_STATEMENT(object)))
 
 static void
@@ -122,12 +123,11 @@ static void
 gaflightsql_prepared_statement_init(GAFlightSQLPreparedStatement *object)
 {
   auto priv = GAFLIGHTSQL_PREPARED_STATEMENT_GET_PRIVATE(object);
-  new(&priv->statement) std::shared_ptr<arrow::flight::sql::PreparedStatement>;
+  new (&priv->statement) std::shared_ptr<arrow::flight::sql::PreparedStatement>;
 }
 
 static void
-gaflightsql_prepared_statement_class_init(
-  GAFlightSQLPreparedStatementClass *klass)
+gaflightsql_prepared_statement_class_init(GAFlightSQLPreparedStatementClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
@@ -137,11 +137,11 @@ gaflightsql_prepared_statement_class_init(
   gobject_class->get_property = gaflightsql_prepared_statement_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("statement",
-                              nullptr,
-                              nullptr,
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "statement",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_STATEMENT, spec);
 
   /**
@@ -151,15 +151,13 @@ gaflightsql_prepared_statement_class_init(
    *
    * Since: 14.0.0
    */
-  spec = g_param_spec_object("client",
-                             nullptr,
-                             nullptr,
-                             GAFLIGHTSQL_TYPE_CLIENT,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class,
-                                  PROP_PREPARED_STATEMENT_CLIENT,
-                                  spec);
+  spec = g_param_spec_object(
+    "client",
+    nullptr,
+    nullptr,
+    GAFLIGHTSQL_TYPE_CLIENT,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_PREPARED_STATEMENT_CLIENT, spec);
 }
 
 /**
@@ -185,9 +183,7 @@ gaflightsql_prepared_statement_execute(GAFlightSQLPreparedStatement *statement,
     flight_options = gaflight_call_options_get_raw(options);
   }
   auto result = flight_sql_statement->Execute(*flight_options);
-  if (!garrow::check(error,
-                     result,
-                     "[flight-sql-prepared-statement][execute]")) {
+  if (!garrow::check(error, result, "[flight-sql-prepared-statement][execute]")) {
     return nullptr;
   }
   auto flight_info = std::move(*result);
@@ -205,10 +201,9 @@ gaflightsql_prepared_statement_execute(GAFlightSQLPreparedStatement *statement,
  * Since: 14.0.0
  */
 gint64
-gaflightsql_prepared_statement_execute_update(
-  GAFlightSQLPreparedStatement *statement,
-  GAFlightCallOptions *options,
-  GError **error)
+gaflightsql_prepared_statement_execute_update(GAFlightSQLPreparedStatement *statement,
+                                              GAFlightCallOptions *options,
+                                              GError **error)
 {
   auto flight_sql_statement = gaflightsql_prepared_statement_get_raw(statement);
   arrow::flight::FlightCallOptions flight_default_options;
@@ -217,9 +212,7 @@ gaflightsql_prepared_statement_execute_update(
     flight_options = gaflight_call_options_get_raw(options);
   }
   auto result = flight_sql_statement->ExecuteUpdate(*flight_options);
-  if (!garrow::check(error,
-                     result,
-                     "[flight-sql-prepared-statement][execute-update]")) {
+  if (!garrow::check(error, result, "[flight-sql-prepared-statement][execute-update]")) {
     return 0;
   }
   return *result;
@@ -251,8 +244,7 @@ gaflightsql_prepared_statement_get_parameter_schema(
  * Since: 14.0.0
  */
 GArrowSchema *
-gaflightsql_prepared_statement_get_dataset_schema(
-  GAFlightSQLPreparedStatement *statement)
+gaflightsql_prepared_statement_get_dataset_schema(GAFlightSQLPreparedStatement *statement)
 {
   auto flight_sql_statement = gaflightsql_prepared_statement_get_raw(statement);
   auto arrow_schema = flight_sql_statement->dataset_schema();
@@ -271,10 +263,9 @@ gaflightsql_prepared_statement_get_dataset_schema(
  * Since: 14.0.0
  */
 gboolean
-gaflightsql_prepared_statement_set_record_batch(
-  GAFlightSQLPreparedStatement *statement,
-  GArrowRecordBatch *record_batch,
-  GError **error)
+gaflightsql_prepared_statement_set_record_batch(GAFlightSQLPreparedStatement *statement,
+                                                GArrowRecordBatch *record_batch,
+                                                GError **error)
 {
   auto flight_sql_statement = gaflightsql_prepared_statement_get_raw(statement);
   auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
@@ -350,8 +341,8 @@ gaflightsql_prepared_statement_is_closed(GAFlightSQLPreparedStatement *statement
   return flight_sql_statement->IsClosed();
 }
 
-
-struct GAFlightSQLClientPrivate {
+struct GAFlightSQLClientPrivate
+{
   arrow::flight::sql::FlightSqlClient *client;
   GAFlightClient *flight_client;
 };
@@ -361,14 +352,11 @@ enum {
   PROP_FLIGHT_CLIENT,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightSQLClient,
-                           gaflightsql_client,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightSQLClient, gaflightsql_client, G_TYPE_OBJECT)
 
-#define GAFLIGHTSQL_CLIENT_GET_PRIVATE(object)      \
-  static_cast<GAFlightSQLClientPrivate *>(          \
-    gaflightsql_client_get_instance_private(        \
-      GAFLIGHTSQL_CLIENT(object)))
+#define GAFLIGHTSQL_CLIENT_GET_PRIVATE(object)                                           \
+  static_cast<GAFlightSQLClientPrivate *>(                                               \
+    gaflightsql_client_get_instance_private(GAFLIGHTSQL_CLIENT(object)))
 
 static void
 gaflightsql_client_dispose(GObject *object)
@@ -404,8 +392,7 @@ gaflightsql_client_set_property(GObject *object,
   switch (prop_id) {
   case PROP_CLIENT:
     priv->client =
-      static_cast<arrow::flight::sql::FlightSqlClient *>(
-        g_value_get_pointer(value));
+      static_cast<arrow::flight::sql::FlightSqlClient *>(g_value_get_pointer(value));
     break;
   case PROP_FLIGHT_CLIENT:
     priv->flight_client = GAFLIGHT_CLIENT(g_value_dup_object(value));
@@ -450,11 +437,11 @@ gaflightsql_client_class_init(GAFlightSQLClientClass *klass)
   gobject_class->get_property = gaflightsql_client_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("client",
-                              "Client",
-                              "The raw arrow::flight::sql::FlightSqlClient *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "client",
+    "Client",
+    "The raw arrow::flight::sql::FlightSqlClient *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CLIENT, spec);
 
   /**
@@ -464,12 +451,12 @@ gaflightsql_client_class_init(GAFlightSQLClientClass *klass)
    *
    * Since: 9.0.0
    */
-  spec = g_param_spec_object("flight-client",
-                             "Flight client",
-                             "The underlying Flight client",
-                             GAFLIGHT_TYPE_CLIENT,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "flight-client",
+    "Flight client",
+    "The underlying Flight client",
+    GAFLIGHT_TYPE_CLIENT,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FLIGHT_CLIENT, spec);
 }
 
@@ -485,8 +472,7 @@ GAFlightSQLClient *
 gaflightsql_client_new(GAFlightClient *client)
 {
   auto flight_client = gaflight_client_get_raw(client);
-  auto flight_sql_client =
-    new arrow::flight::sql::FlightSqlClient(flight_client);
+  auto flight_sql_client = new arrow::flight::sql::FlightSqlClient(flight_client);
   return gaflightsql_client_new_raw(flight_sql_client, client);
 }
 
@@ -515,9 +501,7 @@ gaflightsql_client_execute(GAFlightSQLClient *client,
     flight_options = gaflight_call_options_get_raw(options);
   }
   auto result = flight_sql_client->Execute(*flight_options, query);
-  if (!garrow::check(error,
-                     result,
-                     "[flight-sql-client][execute]")) {
+  if (!garrow::check(error, result, "[flight-sql-client][execute]")) {
     return nullptr;
   }
   auto flight_info = std::move(*result);
@@ -548,9 +532,7 @@ gaflightsql_client_execute_update(GAFlightSQLClient *client,
     flight_options = gaflight_call_options_get_raw(options);
   }
   auto result = flight_sql_client->ExecuteUpdate(*flight_options, query);
-  if (!garrow::check(error,
-                     result,
-                     "[flight-sql-client][execute-update]")) {
+  if (!garrow::check(error, result, "[flight-sql-client][execute-update]")) {
     return 0;
   }
   return *result;
@@ -583,9 +565,7 @@ gaflightsql_client_do_get(GAFlightSQLClient *client,
     flight_options = gaflight_call_options_get_raw(options);
   }
   auto result = flight_sql_client->DoGet(*flight_options, *flight_ticket);
-  if (!garrow::check(error,
-                     result,
-                     "[flight-sql-client][do-get]")) {
+  if (!garrow::check(error, result, "[flight-sql-client][do-get]")) {
     return nullptr;
   }
   auto flight_reader = std::move(*result);
@@ -617,30 +597,26 @@ gaflightsql_client_prepare(GAFlightSQLClient *client,
     flight_options = gaflight_call_options_get_raw(options);
   }
   auto result = flight_sql_client->Prepare(*flight_options, query);
-  if (!garrow::check(error,
-                     result,
-                     "[flight-sql-client][prepare]")) {
+  if (!garrow::check(error, result, "[flight-sql-client][prepare]")) {
     return nullptr;
   }
   auto flight_sql_statement = std::move(*result);
-  return gaflightsql_prepared_statement_new_raw(&flight_sql_statement,
-                                                client);
+  return gaflightsql_prepared_statement_new_raw(&flight_sql_statement, client);
 }
 
-
 G_END_DECLS
 
-
 GAFlightSQLPreparedStatement *
 gaflightsql_prepared_statement_new_raw(
   std::shared_ptr<arrow::flight::sql::PreparedStatement> *flight_sql_statement,
   GAFlightSQLClient *client)
 {
-  return GAFLIGHTSQL_PREPARED_STATEMENT(
-    g_object_new(GAFLIGHTSQL_TYPE_PREPARED_STATEMENT,
-                 "statement", flight_sql_statement,
-                 "client", client,
-                 nullptr));
+  return GAFLIGHTSQL_PREPARED_STATEMENT(g_object_new(GAFLIGHTSQL_TYPE_PREPARED_STATEMENT,
+                                                     "statement",
+                                                     flight_sql_statement,
+                                                     "client",
+                                                     client,
+                                                     nullptr));
 }
 
 std::shared_ptr<arrow::flight::sql::PreparedStatement>
@@ -650,17 +626,16 @@ gaflightsql_prepared_statement_get_raw(GAFlightSQLPreparedStatement *statement)
   return priv->statement;
 }
 
-
 GAFlightSQLClient *
-gaflightsql_client_new_raw(
-  arrow::flight::sql::FlightSqlClient *flight_sql_client,
-  GAFlightClient *client)
+gaflightsql_client_new_raw(arrow::flight::sql::FlightSqlClient *flight_sql_client,
+                           GAFlightClient *client)
 {
-  return GAFLIGHTSQL_CLIENT(
-    g_object_new(GAFLIGHTSQL_TYPE_CLIENT,
-                 "client", flight_sql_client,
-                 "flight_client", client,
-                 nullptr));
+  return GAFLIGHTSQL_CLIENT(g_object_new(GAFLIGHTSQL_TYPE_CLIENT,
+                                         "client",
+                                         flight_sql_client,
+                                         "flight_client",
+                                         client,
+                                         nullptr));
 }
 
 arrow::flight::sql::FlightSqlClient *
diff --git a/c_glib/arrow-flight-sql-glib/client.h b/c_glib/arrow-flight-sql-glib/client.h
index f2a025fef099b..9a5a8987f7195 100644
--- a/c_glib/arrow-flight-sql-glib/client.h
+++ b/c_glib/arrow-flight-sql-glib/client.h
@@ -23,9 +23,7 @@
 
 G_BEGIN_DECLS
 
-
-#define GAFLIGHTSQL_TYPE_PREPARED_STATEMENT     \
-  (gaflightsql_prepared_statement_get_type())
+#define GAFLIGHTSQL_TYPE_PREPARED_STATEMENT (gaflightsql_prepared_statement_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLPreparedStatement,
                          gaflightsql_prepared_statement,
                          GAFLIGHTSQL,
@@ -38,17 +36,15 @@ struct _GAFlightSQLPreparedStatementClass
 
 GARROW_AVAILABLE_IN_14_0
 GAFlightInfo *
-gaflightsql_prepared_statement_execute(
-  GAFlightSQLPreparedStatement *statement,
-  GAFlightCallOptions *options,
-  GError **error);
+gaflightsql_prepared_statement_execute(GAFlightSQLPreparedStatement *statement,
+                                       GAFlightCallOptions *options,
+                                       GError **error);
 
 GARROW_AVAILABLE_IN_14_0
 gint64
-gaflightsql_prepared_statement_execute_update(
-  GAFlightSQLPreparedStatement *statement,
-  GAFlightCallOptions *options,
-  GError **error);
+gaflightsql_prepared_statement_execute_update(GAFlightSQLPreparedStatement *statement,
+                                              GAFlightCallOptions *options,
+                                              GError **error);
 
 GARROW_AVAILABLE_IN_14_0
 GArrowSchema *
@@ -62,10 +58,9 @@ gaflightsql_prepared_statement_get_dataset_schema(
 
 GARROW_AVAILABLE_IN_14_0
 gboolean
-gaflightsql_prepared_statement_set_record_batch(
-  GAFlightSQLPreparedStatement *statement,
-  GArrowRecordBatch *record_batch,
-  GError **error);
+gaflightsql_prepared_statement_set_record_batch(GAFlightSQLPreparedStatement *statement,
+                                                GArrowRecordBatch *record_batch,
+                                                GError **error);
 
 GARROW_AVAILABLE_IN_14_0
 gboolean
@@ -76,23 +71,17 @@ gaflightsql_prepared_statement_set_record_batch_reader(
 
 GARROW_AVAILABLE_IN_14_0
 gboolean
-gaflightsql_prepared_statement_close(
-  GAFlightSQLPreparedStatement *statement,
-  GAFlightCallOptions *options,
-  GError **error);
+gaflightsql_prepared_statement_close(GAFlightSQLPreparedStatement *statement,
+                                     GAFlightCallOptions *options,
+                                     GError **error);
 
 GARROW_AVAILABLE_IN_14_0
 gboolean
-gaflightsql_prepared_statement_is_closed(
-  GAFlightSQLPreparedStatement *statement);
-
+gaflightsql_prepared_statement_is_closed(GAFlightSQLPreparedStatement *statement);
 
 #define GAFLIGHTSQL_TYPE_CLIENT (gaflightsql_client_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightSQLClient,
-                         gaflightsql_client,
-                         GAFLIGHTSQL,
-                         CLIENT,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightSQLClient, gaflightsql_client, GAFLIGHTSQL, CLIENT, GObject)
 struct _GAFlightSQLClientClass
 {
   GObjectClass parent_class;
@@ -130,5 +119,4 @@ gaflightsql_client_prepare(GAFlightSQLClient *client,
                            GAFlightCallOptions *options,
                            GError **error);
 
-
 G_END_DECLS
diff --git a/c_glib/arrow-flight-sql-glib/client.hpp b/c_glib/arrow-flight-sql-glib/client.hpp
index 09136f8819d52..11f17478f642e 100644
--- a/c_glib/arrow-flight-sql-glib/client.hpp
+++ b/c_glib/arrow-flight-sql-glib/client.hpp
@@ -23,7 +23,6 @@
 
 #include <arrow-flight-sql-glib/client.h>
 
-
 GAFlightSQLPreparedStatement *
 gaflightsql_prepared_statement_new_raw(
   std::shared_ptr<arrow::flight::sql::PreparedStatement> *flight_sql_statement,
@@ -32,8 +31,7 @@ std::shared_ptr<arrow::flight::sql::PreparedStatement>
 gaflightsql_prepared_statement_get_raw(GAFlightSQLPreparedStatement *statement);
 
 GAFlightSQLClient *
-gaflightsql_client_new_raw(
-  arrow::flight::sql::FlightSqlClient *flight_sql_client,
-  GAFlightClient *client);
+gaflightsql_client_new_raw(arrow::flight::sql::FlightSqlClient *flight_sql_client,
+                           GAFlightClient *client);
 arrow::flight::sql::FlightSqlClient *
 gaflightsql_client_get_raw(GAFlightSQLClient *client);
diff --git a/c_glib/arrow-flight-sql-glib/server.cpp b/c_glib/arrow-flight-sql-glib/server.cpp
index 518b771732a0f..bea916430b99e 100644
--- a/c_glib/arrow-flight-sql-glib/server.cpp
+++ b/c_glib/arrow-flight-sql-glib/server.cpp
@@ -51,8 +51,8 @@ G_BEGIN_DECLS
  * Since: 9.0.0
  */
 
-
-struct GAFlightSQLCommandPrivate {
+struct GAFlightSQLCommandPrivate
+{
   void *command;
 };
 
@@ -60,14 +60,11 @@ enum {
   PROP_COMMAND = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightSQLCommand,
-                           gaflightsql_command,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightSQLCommand, gaflightsql_command, G_TYPE_OBJECT)
 
-#define GAFLIGHTSQL_COMMAND_GET_PRIVATE(object)         \
-  static_cast<GAFlightSQLCommandPrivate *>(             \
-    gaflightsql_command_get_instance_private(           \
-      GAFLIGHTSQL_COMMAND(object)))
+#define GAFLIGHTSQL_COMMAND_GET_PRIVATE(object)                                          \
+  static_cast<GAFlightSQLCommandPrivate *>(                                              \
+    gaflightsql_command_get_instance_private(GAFLIGHTSQL_COMMAND(object)))
 
 static void
 gaflightsql_command_set_property(GObject *object,
@@ -99,15 +96,14 @@ gaflightsql_command_class_init(GAFlightSQLCommandClass *klass)
   gobject_class->set_property = gaflightsql_command_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("command",
-                              "Command",
-                              "The raw command struct",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "command",
+    "Command",
+    "The raw command struct",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_COMMAND, spec);
 }
 
-
 G_DEFINE_TYPE(GAFlightSQLStatementQuery,
               gaflightsql_statement_query,
               GAFLIGHTSQL_TYPE_COMMAND)
@@ -137,7 +133,6 @@ gaflightsql_statement_query_get_query(GAFlightSQLStatementQuery *command)
   return statement_query->query.c_str();
 }
 
-
 G_DEFINE_TYPE(GAFlightSQLStatementUpdate,
               gaflightsql_statement_update,
               GAFLIGHTSQL_TYPE_COMMAND)
@@ -167,14 +162,12 @@ gaflightsql_statement_update_get_query(GAFlightSQLStatementUpdate *command)
   return statement_update->query.c_str();
 }
 
-
 G_DEFINE_TYPE(GAFlightSQLPreparedStatementUpdate,
               gaflightsql_prepared_statement_update,
               GAFLIGHTSQL_TYPE_COMMAND)
 
 static void
-gaflightsql_prepared_statement_update_init(
-  GAFlightSQLPreparedStatementUpdate *object)
+gaflightsql_prepared_statement_update_init(GAFlightSQLPreparedStatementUpdate *object)
 {
 }
 
@@ -202,14 +195,13 @@ gaflightsql_prepared_statement_update_get_handle(
                             update->prepared_statement_handle.size());
 }
 
-
 G_DEFINE_TYPE(GAFlightSQLStatementQueryTicket,
               gaflightsql_statement_query_ticket,
               GAFLIGHTSQL_TYPE_COMMAND)
 
-#define GAFLIGHTSQL_STATEMENT_QUERY_TICKET_GET_PRIVATE(object) \
-  static_cast<GAFlightSQLStatementQueryTicketPrivate *>(       \
-    gaflightsql_statement_query_ticket_get_instance_private(   \
+#define GAFLIGHTSQL_STATEMENT_QUERY_TICKET_GET_PRIVATE(object)                           \
+  static_cast<GAFlightSQLStatementQueryTicketPrivate *>(                                 \
+    gaflightsql_statement_query_ticket_get_instance_private(                             \
       GAFLIGHTSQL_STATEMENT_QUERY_TICKET(object)))
 
 static void
@@ -218,8 +210,7 @@ gaflightsql_statement_query_ticket_init(GAFlightSQLStatementQueryTicket *object)
 }
 
 static void
-gaflightsql_statement_query_ticket_class_init(
-  GAFlightSQLStatementQueryTicketClass *klass)
+gaflightsql_statement_query_ticket_class_init(GAFlightSQLStatementQueryTicketClass *klass)
 {
 }
 
@@ -234,18 +225,14 @@ gaflightsql_statement_query_ticket_class_init(
  * Since: 9.0.0
  */
 GBytes *
-gaflightsql_statement_query_ticket_generate_handle(const gchar *query,
-                                                   GError **error)
+gaflightsql_statement_query_ticket_generate_handle(const gchar *query, GError **error)
 {
   auto result = arrow::flight::sql::CreateStatementQueryTicket(query);
-  if (!garrow::check(error,
-                     result,
-                     "[flight-sql-statement-query-ticket][new]")) {
+  if (!garrow::check(error, result, "[flight-sql-statement-query-ticket][new]")) {
     return nullptr;
   }
   auto flight_sql_handle = std::move(*result);
-  return g_bytes_new(flight_sql_handle.data(),
-                     flight_sql_handle.size());
+  return g_bytes_new(flight_sql_handle.data(), flight_sql_handle.size());
 }
 
 /**
@@ -258,17 +245,15 @@ gaflightsql_statement_query_ticket_generate_handle(const gchar *query,
  * Since: 9.0.0
  */
 GBytes *
-gaflightsql_statement_query_ticket_get_handle(
-  GAFlightSQLStatementQueryTicket *command)
+gaflightsql_statement_query_ticket_get_handle(GAFlightSQLStatementQueryTicket *command)
 {
-  auto statement_query_ticket =
-    gaflightsql_statement_query_ticket_get_raw(command);
+  auto statement_query_ticket = gaflightsql_statement_query_ticket_get_raw(command);
   auto &handle = statement_query_ticket->statement_handle;
   return g_bytes_new_static(handle.data(), handle.size());
 }
 
-
-struct GAFlightSQLCreatePreparedStatementRequestPrivate {
+struct GAFlightSQLCreatePreparedStatementRequestPrivate
+{
   arrow::flight::sql::ActionCreatePreparedStatementRequest *request;
 };
 
@@ -280,9 +265,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GAFlightSQLCreatePreparedStatementRequest,
                            gaflightsql_create_prepared_statement_request,
                            G_TYPE_OBJECT)
 
-#define GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_REQUEST_GET_PRIVATE(object) \
-  static_cast<GAFlightSQLCreatePreparedStatementRequestPrivate *>(      \
-    gaflightsql_create_prepared_statement_request_get_instance_private( \
+#define GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_REQUEST_GET_PRIVATE(object)                \
+  static_cast<GAFlightSQLCreatePreparedStatementRequestPrivate *>(                       \
+    gaflightsql_create_prepared_statement_request_get_instance_private(                  \
       GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_REQUEST(object)))
 
 static void
@@ -320,11 +305,11 @@ gaflightsql_create_prepared_statement_request_class_init(
     gaflightsql_create_prepared_statement_request_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("request",
-                              nullptr,
-                              nullptr,
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "request",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_REQUEST, spec);
 }
 
@@ -360,8 +345,8 @@ gaflightsql_create_prepared_statement_request_get_transaction_id(
   return priv->request->transaction_id.c_str();
 }
 
-
-struct GAFlightSQLCreatePreparedStatementResultPrivate {
+struct GAFlightSQLCreatePreparedStatementResultPrivate
+{
   arrow::flight::sql::ActionCreatePreparedStatementResult result;
 };
 
@@ -369,9 +354,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GAFlightSQLCreatePreparedStatementResult,
                            gaflightsql_create_prepared_statement_result,
                            G_TYPE_OBJECT)
 
-#define GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(object) \
-  static_cast<GAFlightSQLCreatePreparedStatementResultPrivate *>(       \
-    gaflightsql_create_prepared_statement_result_get_instance_private(  \
+#define GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(object)                 \
+  static_cast<GAFlightSQLCreatePreparedStatementResultPrivate *>(                        \
+    gaflightsql_create_prepared_statement_result_get_instance_private(                   \
       GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT(object)))
 
 static void
@@ -379,7 +364,8 @@ gaflightsql_create_prepared_statement_result_finalize(GObject *object)
 {
   auto priv = GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(object);
   priv->result.~ActionCreatePreparedStatementResult();
-  G_OBJECT_CLASS(gaflightsql_create_prepared_statement_result_parent_class)->finalize(object);
+  G_OBJECT_CLASS(gaflightsql_create_prepared_statement_result_parent_class)
+    ->finalize(object);
 }
 
 static void
@@ -387,7 +373,7 @@ gaflightsql_create_prepared_statement_result_init(
   GAFlightSQLCreatePreparedStatementResult *object)
 {
   auto priv = GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(object);
-  new(&(priv->result)) arrow::flight::sql::ActionCreatePreparedStatementResult();
+  new (&(priv->result)) arrow::flight::sql::ActionCreatePreparedStatementResult();
 }
 
 static void
@@ -395,8 +381,7 @@ gaflightsql_create_prepared_statement_result_class_init(
   GAFlightSQLCreatePreparedStatementResultClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->finalize =
-    gaflightsql_create_prepared_statement_result_finalize;
+  gobject_class->finalize = gaflightsql_create_prepared_statement_result_finalize;
 }
 
 /**
@@ -410,8 +395,7 @@ GAFlightSQLCreatePreparedStatementResult *
 gaflightsql_create_prepared_statement_result_new(void)
 {
   return GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT(
-    g_object_new(GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_RESULT,
-                 nullptr));
+    g_object_new(GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_RESULT, nullptr));
 }
 
 /**
@@ -423,11 +407,9 @@ gaflightsql_create_prepared_statement_result_new(void)
  */
 void
 gaflightsql_create_prepared_statement_result_set_dataset_schema(
-  GAFlightSQLCreatePreparedStatementResult *result,
-  GArrowSchema *schema)
+  GAFlightSQLCreatePreparedStatementResult *result, GArrowSchema *schema)
 {
-  auto priv =
-    GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(result);
+  auto priv = GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(result);
   priv->result.dataset_schema = garrow_schema_get_raw(schema);
 }
 
@@ -443,8 +425,7 @@ GArrowSchema *
 gaflightsql_create_prepared_statement_result_get_dataset_schema(
   GAFlightSQLCreatePreparedStatementResult *result)
 {
-  auto priv =
-    GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(result);
+  auto priv = GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(result);
   if (!priv->result.dataset_schema) {
     return nullptr;
   }
@@ -460,11 +441,9 @@ gaflightsql_create_prepared_statement_result_get_dataset_schema(
  */
 void
 gaflightsql_create_prepared_statement_result_set_parameter_schema(
-  GAFlightSQLCreatePreparedStatementResult *result,
-  GArrowSchema *schema)
+  GAFlightSQLCreatePreparedStatementResult *result, GArrowSchema *schema)
 {
-  auto priv =
-    GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(result);
+  auto priv = GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(result);
   priv->result.parameter_schema = garrow_schema_get_raw(schema);
 }
 
@@ -480,8 +459,7 @@ GArrowSchema *
 gaflightsql_create_prepared_statement_result_get_parameter_schema(
   GAFlightSQLCreatePreparedStatementResult *result)
 {
-  auto priv =
-    GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(result);
+  auto priv = GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(result);
   if (!priv->result.parameter_schema) {
     return nullptr;
   }
@@ -497,11 +475,9 @@ gaflightsql_create_prepared_statement_result_get_parameter_schema(
  */
 void
 gaflightsql_create_prepared_statement_result_set_handle(
-  GAFlightSQLCreatePreparedStatementResult *result,
-  GBytes *handle)
+  GAFlightSQLCreatePreparedStatementResult *result, GBytes *handle)
 {
-  auto priv =
-    GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(result);
+  auto priv = GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(result);
   size_t handle_size;
   auto handle_data = g_bytes_get_data(handle, &handle_size);
   priv->result.prepared_statement_handle =
@@ -521,14 +497,13 @@ GBytes *
 gaflightsql_create_prepared_statement_result_get_handle(
   GAFlightSQLCreatePreparedStatementResult *result)
 {
-  auto priv =
-    GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(result);
+  auto priv = GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(result);
   return g_bytes_new_static(priv->result.prepared_statement_handle.data(),
                             priv->result.prepared_statement_handle.length());
 }
 
-
-struct GAFlightSQLClosePreparedStatementRequestPrivate {
+struct GAFlightSQLClosePreparedStatementRequestPrivate
+{
   arrow::flight::sql::ActionClosePreparedStatementRequest *request;
 };
 
@@ -536,9 +511,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GAFlightSQLClosePreparedStatementRequest,
                            gaflightsql_close_prepared_statement_request,
                            G_TYPE_OBJECT)
 
-#define GAFLIGHTSQL_CLOSE_PREPARED_STATEMENT_REQUEST_GET_PRIVATE(object) \
-  static_cast<GAFlightSQLClosePreparedStatementRequestPrivate *>(      \
-    gaflightsql_close_prepared_statement_request_get_instance_private( \
+#define GAFLIGHTSQL_CLOSE_PREPARED_STATEMENT_REQUEST_GET_PRIVATE(object)                 \
+  static_cast<GAFlightSQLClosePreparedStatementRequestPrivate *>(                        \
+    gaflightsql_close_prepared_statement_request_get_instance_private(                   \
       GAFLIGHTSQL_CLOSE_PREPARED_STATEMENT_REQUEST(object)))
 
 static void
@@ -572,15 +547,14 @@ gaflightsql_close_prepared_statement_request_class_init(
   GAFlightSQLClosePreparedStatementRequestClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->set_property =
-    gaflightsql_close_prepared_statement_request_set_property;
+  gobject_class->set_property = gaflightsql_close_prepared_statement_request_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("request",
-                              nullptr,
-                              nullptr,
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "request",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_REQUEST, spec);
 }
 
@@ -602,23 +576,23 @@ gaflightsql_close_prepared_statement_request_get_handle(
                             priv->request->prepared_statement_handle.length());
 }
 
-
 G_END_DECLS
 namespace gaflightsql {
   class Server : public arrow::flight::sql::FlightSqlServerBase {
   public:
-    explicit Server(GAFlightSQLServer *gaserver) :
-      FlightSqlServerBase(),
-      gaserver_(gaserver) {
+    explicit Server(GAFlightSQLServer *gaserver)
+      : FlightSqlServerBase(),
+        gaserver_(gaserver)
+    {
     }
 
     ~Server() override = default;
 
     arrow::Result<std::unique_ptr<arrow::flight::FlightInfo>>
-    GetFlightInfoStatement(
-      const arrow::flight::ServerCallContext& context,
-      const arrow::flight::sql::StatementQuery& command,
-      const arrow::flight::FlightDescriptor& descriptor) override {
+    GetFlightInfoStatement(const arrow::flight::ServerCallContext &context,
+                           const arrow::flight::sql::StatementQuery &command,
+                           const arrow::flight::FlightDescriptor &descriptor) override
+    {
       auto gacontext = gaflight_server_call_context_new_raw(&context);
       auto gacommand = gaflightsql_statement_query_new_raw(&command);
       auto gadescriptor = gaflight_descriptor_new_raw(&descriptor);
@@ -633,25 +607,20 @@ namespace gaflightsql {
       g_object_unref(gacontext);
       if (gerror) {
         auto context = "[flight-sql-server][get-flight-info-statement]";
-        return garrow_error_to_status(gerror,
-                                      arrow::StatusCode::UnknownError,
-                                      context);
+        return garrow_error_to_status(gerror, arrow::StatusCode::UnknownError, context);
       }
-      return std::make_unique<arrow::flight::FlightInfo>(
-        *gaflight_info_get_raw(gainfo));
+      return std::make_unique<arrow::flight::FlightInfo>(*gaflight_info_get_raw(gainfo));
     }
 
     arrow::Result<std::unique_ptr<arrow::flight::FlightDataStream>>
-    DoGetStatement(
-      const arrow::flight::ServerCallContext &context,
-      const arrow::flight::sql::StatementQueryTicket& command) override {
+    DoGetStatement(const arrow::flight::ServerCallContext &context,
+                   const arrow::flight::sql::StatementQueryTicket &command) override
+    {
       auto gacontext = gaflight_server_call_context_new_raw(&context);
       auto gacommand = gaflightsql_statement_query_ticket_new_raw(&command);
       GError *gerror = nullptr;
-      auto gastream = gaflightsql_server_do_get_statement(gaserver_,
-                                                          gacontext,
-                                                          gacommand,
-                                                          &gerror);
+      auto gastream =
+        gaflightsql_server_do_get_statement(gaserver_, gacontext, gacommand, &gerror);
       g_object_unref(gacommand);
       g_object_unref(gacontext);
       if (gerror) {
@@ -665,7 +634,8 @@ namespace gaflightsql {
     arrow::Result<int64_t>
     DoPutCommandStatementUpdate(
       const arrow::flight::ServerCallContext &context,
-      const arrow::flight::sql::StatementUpdate &command) override {
+      const arrow::flight::sql::StatementUpdate &command) override
+    {
       auto gacontext = gaflight_server_call_context_new_raw(&context);
       auto gacommand = gaflightsql_statement_update_new_raw(&command);
       GError *gerror = nullptr;
@@ -689,7 +659,8 @@ namespace gaflightsql {
     DoPutPreparedStatementUpdate(
       const arrow::flight::ServerCallContext &context,
       const arrow::flight::sql::PreparedStatementUpdate &command,
-      arrow::flight::FlightMessageReader *reader) override {
+      arrow::flight::FlightMessageReader *reader) override
+    {
       auto gacontext = gaflight_server_call_context_new_raw(&context);
       auto gacommand = gaflightsql_prepared_statement_update_new_raw(&command);
       auto gareader = gaflight_message_reader_new_raw(reader, FALSE);
@@ -715,22 +686,21 @@ namespace gaflightsql {
     arrow::Result<arrow::flight::sql::ActionCreatePreparedStatementResult>
     CreatePreparedStatement(
       const arrow::flight::ServerCallContext &context,
-      const arrow::flight::sql::ActionCreatePreparedStatementRequest &request) override {
+      const arrow::flight::sql::ActionCreatePreparedStatementRequest &request) override
+    {
       auto gacontext = gaflight_server_call_context_new_raw(&context);
       auto garequest = gaflightsql_create_prepared_statement_request_new_raw(&request);
       GError *gerror = nullptr;
-      auto garesult =
-        gaflightsql_server_create_prepared_statement(gaserver_,
-                                                     gacontext,
-                                                     garequest,
-                                                     &gerror);
+      auto garesult = gaflightsql_server_create_prepared_statement(gaserver_,
+                                                                   gacontext,
+                                                                   garequest,
+                                                                   &gerror);
       g_object_unref(garequest);
       g_object_unref(gacontext);
       if (gerror) {
-        return garrow_error_to_status(
-          gerror,
-          arrow::StatusCode::UnknownError,
-          "[flight-sql-server][create-prepared-statement]");
+        return garrow_error_to_status(gerror,
+                                      arrow::StatusCode::UnknownError,
+                                      "[flight-sql-server][create-prepared-statement]");
       }
       auto garesult_priv =
         GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_RESULT_GET_PRIVATE(garesult);
@@ -742,7 +712,8 @@ namespace gaflightsql {
     arrow::Status
     ClosePreparedStatement(
       const arrow::flight::ServerCallContext &context,
-      const arrow::flight::sql::ActionClosePreparedStatementRequest &request) override {
+      const arrow::flight::sql::ActionClosePreparedStatementRequest &request) override
+    {
       auto gacontext = gaflight_server_call_context_new_raw(&context);
       auto garequest = gaflightsql_close_prepared_statement_request_new_raw(&request);
       GError *gerror = nullptr;
@@ -753,10 +724,9 @@ namespace gaflightsql {
       g_object_unref(garequest);
       g_object_unref(gacontext);
       if (gerror) {
-        return garrow_error_to_status(
-          gerror,
-          arrow::StatusCode::UnknownError,
-          "[flight-sql-server][close-prepared-statement]");
+        return garrow_error_to_status(gerror,
+                                      arrow::StatusCode::UnknownError,
+                                      "[flight-sql-server][close-prepared-statement]");
       } else {
         return arrow::Status::OK();
       }
@@ -765,10 +735,11 @@ namespace gaflightsql {
   private:
     GAFlightSQLServer *gaserver_;
   };
-};
+}; // namespace gaflightsql
 G_BEGIN_DECLS
 
-struct GAFlightSQLServerPrivate {
+struct GAFlightSQLServerPrivate
+{
   gaflightsql::Server server;
 };
 
@@ -783,18 +754,17 @@ gaflightsql_server_servable_interface_init(GAFlightServableInterface *iface)
   iface->get_raw = gaflightsql_server_servable_get_raw;
 }
 
-G_DEFINE_ABSTRACT_TYPE_WITH_CODE(GAFlightSQLServer,
-                                 gaflightsql_server,
-                                 GAFLIGHT_TYPE_SERVER,
-                                 G_ADD_PRIVATE(GAFlightSQLServer);
-                                 G_IMPLEMENT_INTERFACE(
-                                   GAFLIGHT_TYPE_SERVABLE,
-                                   gaflightsql_server_servable_interface_init))
+G_DEFINE_ABSTRACT_TYPE_WITH_CODE(
+  GAFlightSQLServer,
+  gaflightsql_server,
+  GAFLIGHT_TYPE_SERVER,
+  G_ADD_PRIVATE(GAFlightSQLServer);
+  G_IMPLEMENT_INTERFACE(GAFLIGHT_TYPE_SERVABLE,
+                        gaflightsql_server_servable_interface_init))
 
-#define GAFLIGHTSQL_SERVER_GET_PRIVATE(object)   \
-  static_cast<GAFlightSQLServerPrivate *>(       \
-    gaflightsql_server_get_instance_private(     \
-      GAFLIGHTSQL_SERVER(object)))
+#define GAFLIGHTSQL_SERVER_GET_PRIVATE(object)                                           \
+  static_cast<GAFlightSQLServerPrivate *>(                                               \
+    gaflightsql_server_get_instance_private(GAFLIGHTSQL_SERVER(object)))
 
 G_END_DECLS
 static arrow::flight::FlightServerBase *
@@ -817,7 +787,7 @@ static void
 gaflightsql_server_init(GAFlightSQLServer *object)
 {
   auto priv = GAFLIGHTSQL_SERVER_GET_PRIVATE(object);
-  new(&(priv->server)) gaflightsql::Server(object);
+  new (&(priv->server)) gaflightsql::Server(object);
 }
 
 static void
@@ -841,26 +811,19 @@ gaflightsql_server_class_init(GAFlightSQLServerClass *klass)
  * Since: 9.0.0
  */
 GAFlightInfo *
-gaflightsql_server_get_flight_info_statement(
-  GAFlightSQLServer *server,
-  GAFlightServerCallContext *context,
-  GAFlightSQLStatementQuery *command,
-  GAFlightDescriptor *descriptor,
-  GError **error)
+gaflightsql_server_get_flight_info_statement(GAFlightSQLServer *server,
+                                             GAFlightServerCallContext *context,
+                                             GAFlightSQLStatementQuery *command,
+                                             GAFlightDescriptor *descriptor,
+                                             GError **error)
 {
   auto klass = GAFLIGHTSQL_SERVER_GET_CLASS(server);
   if (!(klass && klass->get_flight_info_statement)) {
-    g_set_error(error,
-                GARROW_ERROR,
-                GARROW_ERROR_NOT_IMPLEMENTED,
-                "not implemented");
+    g_set_error(error, GARROW_ERROR, GARROW_ERROR_NOT_IMPLEMENTED, "not implemented");
     return NULL;
   }
-  return (*(klass->get_flight_info_statement))(server,
-                                               context,
-                                               command,
-                                               descriptor,
-                                               error);
+  return (
+    *(klass->get_flight_info_statement))(server, context, command, descriptor, error);
 }
 
 /**
@@ -883,10 +846,7 @@ gaflightsql_server_do_get_statement(GAFlightSQLServer *server,
 {
   auto klass = GAFLIGHTSQL_SERVER_GET_CLASS(server);
   if (!(klass && klass->do_get_statement)) {
-    g_set_error(error,
-                GARROW_ERROR,
-                GARROW_ERROR_NOT_IMPLEMENTED,
-                "not implemented");
+    g_set_error(error, GARROW_ERROR, GARROW_ERROR_NOT_IMPLEMENTED, "not implemented");
     return nullptr;
   }
   return (*(klass->do_get_statement))(server, context, ticket, error);
@@ -904,18 +864,14 @@ gaflightsql_server_do_get_statement(GAFlightSQLServer *server,
  * Since: 13.0.0
  */
 gint64
-gaflightsql_server_do_put_command_statement_update(
-  GAFlightSQLServer *server,
-  GAFlightServerCallContext *context,
-  GAFlightSQLStatementUpdate *command,
-  GError **error)
+gaflightsql_server_do_put_command_statement_update(GAFlightSQLServer *server,
+                                                   GAFlightServerCallContext *context,
+                                                   GAFlightSQLStatementUpdate *command,
+                                                   GError **error)
 {
   auto klass = GAFLIGHTSQL_SERVER_GET_CLASS(server);
   if (!(klass && klass->do_put_command_statement_update)) {
-    g_set_error(error,
-                GARROW_ERROR,
-                GARROW_ERROR_NOT_IMPLEMENTED,
-                "not implemented");
+    g_set_error(error, GARROW_ERROR, GARROW_ERROR_NOT_IMPLEMENTED, "not implemented");
     return 0;
   }
   return klass->do_put_command_statement_update(server, context, command, error);
@@ -943,14 +899,10 @@ gaflightsql_server_do_put_prepared_statement_update(
 {
   auto klass = GAFLIGHTSQL_SERVER_GET_CLASS(server);
   if (!(klass && klass->do_put_prepared_statement_update)) {
-    g_set_error(error,
-                GARROW_ERROR,
-                GARROW_ERROR_NOT_IMPLEMENTED,
-                "not implemented");
+    g_set_error(error, GARROW_ERROR, GARROW_ERROR_NOT_IMPLEMENTED, "not implemented");
     return 0;
   }
-  return klass->do_put_prepared_statement_update(
-    server, context, command, reader, error);
+  return klass->do_put_prepared_statement_update(server, context, command, reader, error);
 }
 
 /**
@@ -976,10 +928,7 @@ gaflightsql_server_create_prepared_statement(
 {
   auto klass = GAFLIGHTSQL_SERVER_GET_CLASS(server);
   if (!(klass && klass->create_prepared_statement)) {
-    g_set_error(error,
-                GARROW_ERROR,
-                GARROW_ERROR_NOT_IMPLEMENTED,
-                "not implemented");
+    g_set_error(error, GARROW_ERROR, GARROW_ERROR_NOT_IMPLEMENTED, "not implemented");
     return nullptr;
   }
   return klass->create_prepared_statement(server, context, request, error);
@@ -1003,27 +952,20 @@ gaflightsql_server_close_prepared_statement(
 {
   auto klass = GAFLIGHTSQL_SERVER_GET_CLASS(server);
   if (!(klass && klass->close_prepared_statement)) {
-    g_set_error(error,
-                GARROW_ERROR,
-                GARROW_ERROR_NOT_IMPLEMENTED,
-                "not implemented");
+    g_set_error(error, GARROW_ERROR, GARROW_ERROR_NOT_IMPLEMENTED, "not implemented");
     return;
   }
   return klass->close_prepared_statement(server, context, request, error);
 }
 
-
 G_END_DECLS
 
-
 GAFlightSQLStatementQuery *
 gaflightsql_statement_query_new_raw(
   const arrow::flight::sql::StatementQuery *flight_command)
 {
   return GAFLIGHTSQL_STATEMENT_QUERY(
-    g_object_new(GAFLIGHTSQL_TYPE_STATEMENT_QUERY,
-                 "command", flight_command,
-                 nullptr));
+    g_object_new(GAFLIGHTSQL_TYPE_STATEMENT_QUERY, "command", flight_command, nullptr));
 }
 
 const arrow::flight::sql::StatementQuery *
@@ -1033,15 +975,12 @@ gaflightsql_statement_query_get_raw(GAFlightSQLStatementQuery *command)
   return static_cast<const arrow::flight::sql::StatementQuery *>(priv->command);
 }
 
-
 GAFlightSQLStatementUpdate *
 gaflightsql_statement_update_new_raw(
   const arrow::flight::sql::StatementUpdate *flight_command)
 {
   return GAFLIGHTSQL_STATEMENT_UPDATE(
-    g_object_new(GAFLIGHTSQL_TYPE_STATEMENT_UPDATE,
-                 "command", flight_command,
-                 nullptr));
+    g_object_new(GAFLIGHTSQL_TYPE_STATEMENT_UPDATE, "command", flight_command, nullptr));
 }
 
 const arrow::flight::sql::StatementUpdate *
@@ -1051,54 +990,50 @@ gaflightsql_statement_update_get_raw(GAFlightSQLStatementUpdate *command)
   return static_cast<const arrow::flight::sql::StatementUpdate *>(priv->command);
 }
 
-
 GAFlightSQLPreparedStatementUpdate *
 gaflightsql_prepared_statement_update_new_raw(
   const arrow::flight::sql::PreparedStatementUpdate *flight_command)
 {
   return GAFLIGHTSQL_PREPARED_STATEMENT_UPDATE(
     g_object_new(GAFLIGHTSQL_TYPE_PREPARED_STATEMENT_UPDATE,
-                 "command", flight_command,
+                 "command",
+                 flight_command,
                  nullptr));
 }
 
 const arrow::flight::sql::PreparedStatementUpdate *
-gaflightsql_prepared_statement_update_get_raw(
-  GAFlightSQLPreparedStatementUpdate *command)
+gaflightsql_prepared_statement_update_get_raw(GAFlightSQLPreparedStatementUpdate *command)
 {
   auto priv = GAFLIGHTSQL_COMMAND_GET_PRIVATE(command);
-  return static_cast<const arrow::flight::sql::PreparedStatementUpdate *>(
-    priv->command);
+  return static_cast<const arrow::flight::sql::PreparedStatementUpdate *>(priv->command);
 }
 
-
 GAFlightSQLStatementQueryTicket *
 gaflightsql_statement_query_ticket_new_raw(
   const arrow::flight::sql::StatementQueryTicket *flight_command)
 {
   return GAFLIGHTSQL_STATEMENT_QUERY_TICKET(
     g_object_new(GAFLIGHTSQL_TYPE_STATEMENT_QUERY_TICKET,
-                 "command", flight_command,
+                 "command",
+                 flight_command,
                  nullptr));
 }
 
 const arrow::flight::sql::StatementQueryTicket *
-gaflightsql_statement_query_ticket_get_raw(
-  GAFlightSQLStatementQueryTicket *command)
+gaflightsql_statement_query_ticket_get_raw(GAFlightSQLStatementQueryTicket *command)
 {
   auto priv = GAFLIGHTSQL_COMMAND_GET_PRIVATE(command);
-  return static_cast<const arrow::flight::sql::StatementQueryTicket *>(
-    priv->command);
+  return static_cast<const arrow::flight::sql::StatementQueryTicket *>(priv->command);
 }
 
-
 GAFlightSQLCreatePreparedStatementRequest *
 gaflightsql_create_prepared_statement_request_new_raw(
   const arrow::flight::sql::ActionCreatePreparedStatementRequest *flight_request)
 {
   return GAFLIGHTSQL_CREATE_PREPARED_STATEMENT_REQUEST(
     g_object_new(GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_REQUEST,
-                 "request", flight_request,
+                 "request",
+                 flight_request,
                  nullptr));
 }
 
@@ -1110,14 +1045,14 @@ gaflightsql_create_prepared_statement_request_get_raw(
   return priv->request;
 }
 
-
 GAFlightSQLClosePreparedStatementRequest *
 gaflightsql_close_prepared_statement_request_new_raw(
   const arrow::flight::sql::ActionClosePreparedStatementRequest *flight_request)
 {
   return GAFLIGHTSQL_CLOSE_PREPARED_STATEMENT_REQUEST(
     g_object_new(GAFLIGHTSQL_TYPE_CLOSE_PREPARED_STATEMENT_REQUEST,
-                 "request", flight_request,
+                 "request",
+                 flight_request,
                  nullptr));
 }
 
diff --git a/c_glib/arrow-flight-sql-glib/server.h b/c_glib/arrow-flight-sql-glib/server.h
index 90eb5ee7e4170..d6fd7e4d10394 100644
--- a/c_glib/arrow-flight-sql-glib/server.h
+++ b/c_glib/arrow-flight-sql-glib/server.h
@@ -23,19 +23,14 @@
 
 G_BEGIN_DECLS
 
-
 #define GAFLIGHTSQL_TYPE_COMMAND (gaflightsql_command_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightSQLCommand,
-                         gaflightsql_command,
-                         GAFLIGHTSQL,
-                         COMMAND,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightSQLCommand, gaflightsql_command, GAFLIGHTSQL, COMMAND, GObject)
 struct _GAFlightSQLCommandClass
 {
   GObjectClass parent_class;
 };
 
-
 #define GAFLIGHTSQL_TYPE_STATEMENT_QUERY (gaflightsql_statement_query_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLStatementQuery,
                          gaflightsql_statement_query,
@@ -51,7 +46,6 @@ GARROW_AVAILABLE_IN_9_0
 const gchar *
 gaflightsql_statement_query_get_query(GAFlightSQLStatementQuery *command);
 
-
 #define GAFLIGHTSQL_TYPE_STATEMENT_UPDATE (gaflightsql_statement_update_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLStatementUpdate,
                          gaflightsql_statement_update,
@@ -67,8 +61,7 @@ GARROW_AVAILABLE_IN_13_0
 const gchar *
 gaflightsql_statement_update_get_query(GAFlightSQLStatementUpdate *command);
 
-
-#define GAFLIGHTSQL_TYPE_PREPARED_STATEMENT_UPDATE      \
+#define GAFLIGHTSQL_TYPE_PREPARED_STATEMENT_UPDATE                                       \
   (gaflightsql_prepared_statement_update_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLPreparedStatementUpdate,
                          gaflightsql_prepared_statement_update,
@@ -85,8 +78,7 @@ GBytes *
 gaflightsql_prepared_statement_update_get_handle(
   GAFlightSQLPreparedStatementUpdate *command);
 
-
-#define GAFLIGHTSQL_TYPE_STATEMENT_QUERY_TICKET         \
+#define GAFLIGHTSQL_TYPE_STATEMENT_QUERY_TICKET                                          \
   (gaflightsql_statement_query_ticket_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLStatementQueryTicket,
                          gaflightsql_statement_query_ticket,
@@ -100,15 +92,12 @@ struct _GAFlightSQLStatementQueryTicketClass
 
 GARROW_AVAILABLE_IN_9_0
 GBytes *
-gaflightsql_statement_query_ticket_generate_handle(const gchar *query,
-                                                   GError **error);
+gaflightsql_statement_query_ticket_generate_handle(const gchar *query, GError **error);
 GARROW_AVAILABLE_IN_9_0
 GBytes *
-gaflightsql_statement_query_ticket_get_handle(
-  GAFlightSQLStatementQueryTicket *command);
+gaflightsql_statement_query_ticket_get_handle(GAFlightSQLStatementQueryTicket *command);
 
-
-#define GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_REQUEST      \
+#define GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_REQUEST                               \
   (gaflightsql_create_prepared_statement_request_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLCreatePreparedStatementRequest,
                          gaflightsql_create_prepared_statement_request,
@@ -130,8 +119,7 @@ const gchar *
 gaflightsql_create_prepared_statement_request_get_transaction_id(
   GAFlightSQLCreatePreparedStatementRequest *request);
 
-
-#define GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_RESULT       \
+#define GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_RESULT                                \
   (gaflightsql_create_prepared_statement_result_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLCreatePreparedStatementResult,
                          gaflightsql_create_prepared_statement_result,
@@ -149,8 +137,7 @@ gaflightsql_create_prepared_statement_result_new(void);
 GARROW_AVAILABLE_IN_14_0
 void
 gaflightsql_create_prepared_statement_result_set_dataset_schema(
-  GAFlightSQLCreatePreparedStatementResult *result,
-  GArrowSchema *schema);
+  GAFlightSQLCreatePreparedStatementResult *result, GArrowSchema *schema);
 GARROW_AVAILABLE_IN_14_0
 GArrowSchema *
 gaflightsql_create_prepared_statement_result_get_dataset_schema(
@@ -158,8 +145,7 @@ gaflightsql_create_prepared_statement_result_get_dataset_schema(
 GARROW_AVAILABLE_IN_14_0
 void
 gaflightsql_create_prepared_statement_result_set_parameter_schema(
-  GAFlightSQLCreatePreparedStatementResult *result,
-  GArrowSchema *schema);
+  GAFlightSQLCreatePreparedStatementResult *result, GArrowSchema *schema);
 GARROW_AVAILABLE_IN_14_0
 GArrowSchema *
 gaflightsql_create_prepared_statement_result_get_parameter_schema(
@@ -167,15 +153,13 @@ gaflightsql_create_prepared_statement_result_get_parameter_schema(
 GARROW_AVAILABLE_IN_14_0
 void
 gaflightsql_create_prepared_statement_result_set_handle(
-  GAFlightSQLCreatePreparedStatementResult *result,
-  GBytes *handle);
+  GAFlightSQLCreatePreparedStatementResult *result, GBytes *handle);
 GARROW_AVAILABLE_IN_14_0
 GBytes *
 gaflightsql_create_prepared_statement_result_get_handle(
   GAFlightSQLCreatePreparedStatementResult *result);
 
-
-#define GAFLIGHTSQL_TYPE_CLOSE_PREPARED_STATEMENT_REQUEST      \
+#define GAFLIGHTSQL_TYPE_CLOSE_PREPARED_STATEMENT_REQUEST                                \
   (gaflightsql_close_prepared_statement_request_get_type())
 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLClosePreparedStatementRequest,
                          gaflightsql_close_prepared_statement_request,
@@ -192,13 +176,9 @@ GBytes *
 gaflightsql_close_prepared_statement_request_get_handle(
   GAFlightSQLClosePreparedStatementRequest *request);
 
-
 #define GAFLIGHTSQL_TYPE_SERVER (gaflightsql_server_get_type())
-G_DECLARE_DERIVABLE_TYPE(GAFlightSQLServer,
-                         gaflightsql_server,
-                         GAFLIGHTSQL,
-                         SERVER,
-                         GAFlightServer)
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightSQLServer, gaflightsql_server, GAFLIGHTSQL, SERVER, GAFlightServer)
 /**
  * GAFlightSQLServerClass:
  * @get_flight_info_statement: A virtual function to implement
@@ -222,62 +202,54 @@ struct _GAFlightSQLServerClass
 {
   GAFlightServerClass parent_class;
 
-  GAFlightInfo *(*get_flight_info_statement)(
-    GAFlightSQLServer *server,
-    GAFlightServerCallContext *context,
-    GAFlightSQLStatementQuery *command,
-    GAFlightDescriptor *descriptor,
-    GError **error);
-  GAFlightDataStream *(*do_get_statement)(
-    GAFlightSQLServer *server,
-    GAFlightServerCallContext *context,
-    GAFlightSQLStatementQueryTicket *ticket,
-    GError **error);
-  gint64 (*do_put_command_statement_update)(
-    GAFlightSQLServer *server,
-    GAFlightServerCallContext *context,
-    GAFlightSQLStatementUpdate *command,
-    GError **error);
-  gint64 (*do_put_prepared_statement_update)(
-    GAFlightSQLServer *server,
-    GAFlightServerCallContext *context,
-    GAFlightSQLPreparedStatementUpdate *command,
-    GAFlightMessageReader *reader,
-    GError **error);
+  GAFlightInfo *(*get_flight_info_statement)(GAFlightSQLServer *server,
+                                             GAFlightServerCallContext *context,
+                                             GAFlightSQLStatementQuery *command,
+                                             GAFlightDescriptor *descriptor,
+                                             GError **error);
+  GAFlightDataStream *(*do_get_statement)(GAFlightSQLServer *server,
+                                          GAFlightServerCallContext *context,
+                                          GAFlightSQLStatementQueryTicket *ticket,
+                                          GError **error);
+  gint64 (*do_put_command_statement_update)(GAFlightSQLServer *server,
+                                            GAFlightServerCallContext *context,
+                                            GAFlightSQLStatementUpdate *command,
+                                            GError **error);
+  gint64 (*do_put_prepared_statement_update)(GAFlightSQLServer *server,
+                                             GAFlightServerCallContext *context,
+                                             GAFlightSQLPreparedStatementUpdate *command,
+                                             GAFlightMessageReader *reader,
+                                             GError **error);
   GAFlightSQLCreatePreparedStatementResult *(*create_prepared_statement)(
     GAFlightSQLServer *server,
     GAFlightServerCallContext *context,
     GAFlightSQLCreatePreparedStatementRequest *request,
     GError **error);
-  void (*close_prepared_statement)(
-    GAFlightSQLServer *server,
-    GAFlightServerCallContext *context,
-    GAFlightSQLClosePreparedStatementRequest *request,
-    GError **error);
+  void (*close_prepared_statement)(GAFlightSQLServer *server,
+                                   GAFlightServerCallContext *context,
+                                   GAFlightSQLClosePreparedStatementRequest *request,
+                                   GError **error);
 };
 
 GARROW_AVAILABLE_IN_9_0
 GAFlightInfo *
-gaflightsql_server_get_flight_info_statement(
-  GAFlightSQLServer *server,
-  GAFlightServerCallContext *context,
-  GAFlightSQLStatementQuery *command,
-  GAFlightDescriptor *descriptor,
-  GError **error);
+gaflightsql_server_get_flight_info_statement(GAFlightSQLServer *server,
+                                             GAFlightServerCallContext *context,
+                                             GAFlightSQLStatementQuery *command,
+                                             GAFlightDescriptor *descriptor,
+                                             GError **error);
 GARROW_AVAILABLE_IN_9_0
 GAFlightDataStream *
-gaflightsql_server_do_get_statement(
-  GAFlightSQLServer *server,
-  GAFlightServerCallContext *context,
-  GAFlightSQLStatementQueryTicket *ticket,
-  GError **error);
+gaflightsql_server_do_get_statement(GAFlightSQLServer *server,
+                                    GAFlightServerCallContext *context,
+                                    GAFlightSQLStatementQueryTicket *ticket,
+                                    GError **error);
 GARROW_AVAILABLE_IN_13_0
 gint64
-gaflightsql_server_do_put_command_statement_update(
-  GAFlightSQLServer *server,
-  GAFlightServerCallContext *context,
-  GAFlightSQLStatementUpdate *command,
-  GError **error);
+gaflightsql_server_do_put_command_statement_update(GAFlightSQLServer *server,
+                                                   GAFlightServerCallContext *context,
+                                                   GAFlightSQLStatementUpdate *command,
+                                                   GError **error);
 /* We can restore this after we bump version to 14.0.0-SNAPSHOT. */
 /* GARROW_AVAILABLE_IN_14_0 */
 gint64
diff --git a/c_glib/arrow-flight-sql-glib/server.hpp b/c_glib/arrow-flight-sql-glib/server.hpp
index bdecf054a4d61..acce6f971a4a1 100644
--- a/c_glib/arrow-flight-sql-glib/server.hpp
+++ b/c_glib/arrow-flight-sql-glib/server.hpp
@@ -23,20 +23,17 @@
 
 #include <arrow-flight-sql-glib/server.h>
 
-
 GAFlightSQLStatementQuery *
 gaflightsql_statement_query_new_raw(
   const arrow::flight::sql::StatementQuery *flight_command);
 const arrow::flight::sql::StatementQuery *
-gaflightsql_statement_query_get_raw(
-  GAFlightSQLStatementQuery *command);
+gaflightsql_statement_query_get_raw(GAFlightSQLStatementQuery *command);
 
 GAFlightSQLStatementUpdate *
 gaflightsql_statement_update_new_raw(
   const arrow::flight::sql::StatementUpdate *flight_command);
 const arrow::flight::sql::StatementUpdate *
-gaflightsql_statement_update_get_raw(
-  GAFlightSQLStatementUpdate *command);
+gaflightsql_statement_update_get_raw(GAFlightSQLStatementUpdate *command);
 
 GAFlightSQLPreparedStatementUpdate *
 gaflightsql_prepared_statement_update_new_raw(
@@ -49,8 +46,7 @@ GAFlightSQLStatementQueryTicket *
 gaflightsql_statement_query_ticket_new_raw(
   const arrow::flight::sql::StatementQueryTicket *flight_command);
 const arrow::flight::sql::StatementQueryTicket *
-gaflightsql_statement_query_ticket_get_raw(
-  GAFlightSQLStatementQueryTicket *command);
+gaflightsql_statement_query_ticket_get_raw(GAFlightSQLStatementQueryTicket *command);
 
 GAFlightSQLCreatePreparedStatementRequest *
 gaflightsql_create_prepared_statement_request_new_raw(
diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp
index 5171161970bf5..6d8ce4a35ac0a 100644
--- a/c_glib/arrow-glib/array-builder.cpp
+++ b/c_glib/arrow-glib/array-builder.cpp
@@ -81,9 +81,7 @@ garrow_array_builder_append_values(VALUE *values,
       for (gint64 j = 0; j < n_values; ++j) {
         valid_bytes[j] = chunked_is_valids[j];
       }
-      auto status = append_function(values + offset,
-                                    n_values,
-                                    valid_bytes);
+      auto status = append_function(values + offset, n_values, valid_bytes);
       if (!garrow_error_check(error, status, context)) {
         return FALSE;
       }
@@ -174,8 +172,7 @@ garrow_array_builder_append_values(GArrowArrayBuilder *builder,
       if (value) {
         size_t data_size;
         auto raw_data = g_bytes_get_data(value, &data_size);
-        strings.push_back(std::string(static_cast<const char *>(raw_data),
-                                      data_size));
+        strings.push_back(std::string(static_cast<const char *>(raw_data), data_size));
         if (valid_bytes) {
           valid_bytes_buffer[j] = is_valids[offset + j];
         }
@@ -202,19 +199,17 @@ garrow_array_builder_append_values(GArrowArrayBuilder *builder,
 
 template <typename VALUE, typename GET_VALUE_FUNCTION>
 gboolean
-garrow_array_builder_append_values(
-  GArrowArrayBuilder *builder,
-  VALUE *values,
-  gint64 values_length,
-  const gboolean *is_valids,
-  gint64 is_valids_length,
-  GError **error,
-  const gchar *context,
-  GET_VALUE_FUNCTION get_value_function)
+garrow_array_builder_append_values(GArrowArrayBuilder *builder,
+                                   VALUE *values,
+                                   gint64 values_length,
+                                   const gboolean *is_valids,
+                                   gint64 is_valids_length,
+                                   GError **error,
+                                   const gchar *context,
+                                   GET_VALUE_FUNCTION get_value_function)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::FixedSizeBinaryBuilder>(
-      garrow_array_builder_get_raw(builder));
+  auto arrow_builder = std::static_pointer_cast<arrow::FixedSizeBinaryBuilder>(
+    garrow_array_builder_get_raw(builder));
   if (is_valids_length > 0 && values_length != is_valids_length) {
     g_set_error(error,
                 GARROW_ERROR,
@@ -260,9 +255,7 @@ garrow_array_builder_append_values(
         value = values[offset + j];
       }
       if (value) {
-        get_value_function(data + (value_size * j),
-                           value,
-                           value_size);
+        get_value_function(data + (value_size * j), value, value_size);
       } else {
         is_valid = false;
         if (!valid_bytes) {
@@ -348,7 +341,6 @@ garrow_array_builder_append_values(GArrowArrayBuilder *builder,
   return TRUE;
 }
 
-
 G_BEGIN_DECLS
 
 /**
@@ -481,7 +473,8 @@ G_BEGIN_DECLS
  * #GArrowSparseUnionArray.
  */
 
-struct GArrowArrayBuilderPrivate {
+struct GArrowArrayBuilderPrivate
+{
   std::shared_ptr<arrow::ArrayBuilder> array_builder;
   GList *children;
 };
@@ -495,10 +488,9 @@ G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowArrayBuilder,
                                     garrow_array_builder,
                                     G_TYPE_OBJECT)
 
-#define GARROW_ARRAY_BUILDER_GET_PRIVATE(obj)         \
-  static_cast<GArrowArrayBuilderPrivate *>(           \
-     garrow_array_builder_get_instance_private(       \
-       GARROW_ARRAY_BUILDER(obj)))
+#define GARROW_ARRAY_BUILDER_GET_PRIVATE(obj)                                            \
+  static_cast<GArrowArrayBuilderPrivate *>(                                              \
+    garrow_array_builder_get_instance_private(GARROW_ARRAY_BUILDER(obj)))
 
 static void
 garrow_array_builder_finalize(GObject *object)
@@ -528,8 +520,7 @@ garrow_array_builder_set_property(GObject *object,
   switch (prop_id) {
   case PROP_ARRAY_BUILDER:
     priv->array_builder =
-      *static_cast<std::shared_ptr<arrow::ArrayBuilder> *>(
-        g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<arrow::ArrayBuilder> *>(g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -541,7 +532,7 @@ static void
 garrow_array_builder_init(GArrowArrayBuilder *builder)
 {
   auto priv = GARROW_ARRAY_BUILDER_GET_PRIVATE(builder);
-  new(&priv->array_builder) std::shared_ptr<arrow::ArrayBuilder>;
+  new (&priv->array_builder) std::shared_ptr<arrow::ArrayBuilder>;
 }
 
 static void
@@ -549,16 +540,16 @@ garrow_array_builder_class_init(GArrowArrayBuilderClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_array_builder_finalize;
-  gobject_class->dispose      = garrow_array_builder_dispose;
+  gobject_class->finalize = garrow_array_builder_finalize;
+  gobject_class->dispose = garrow_array_builder_dispose;
   gobject_class->set_property = garrow_array_builder_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("array-builder",
-                              "Array builder",
-                              "The raw std::shared_ptr<arrow::ArrayBuilder>",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "array-builder",
+    "Array builder",
+    "The raw std::shared_ptr<arrow::ArrayBuilder>",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_ARRAY_BUILDER, spec);
 }
 
@@ -573,8 +564,7 @@ garrow_array_builder_new(const std::shared_ptr<arrow::DataType> &type,
   if (!garrow_error_check(error, status, context)) {
     return nullptr;
   }
-  std::shared_ptr<arrow::ArrayBuilder>
-    shared_arrow_builder(std::move(arrow_builder));
+  std::shared_ptr<arrow::ArrayBuilder> shared_arrow_builder(std::move(arrow_builder));
   return garrow_array_builder_new_raw(&shared_arrow_builder);
 }
 
@@ -700,8 +690,7 @@ garrow_array_builder_get_n_nulls(GArrowArrayBuilder *builder)
  * Since: 12.0.0
  */
 GArrowArrayBuilder *
-garrow_array_builder_get_child(GArrowArrayBuilder *builder,
-                               gint i)
+garrow_array_builder_get_child(GArrowArrayBuilder *builder, gint i)
 {
   auto children = garrow_array_builder_get_children(builder);
   auto child = g_list_nth_data(children, i);
@@ -745,9 +734,7 @@ garrow_array_builder_get_children(GArrowArrayBuilder *builder)
  * Since: 2.0.0
  */
 gboolean
-garrow_array_builder_resize(GArrowArrayBuilder *builder,
-                            gint64 capacity,
-                            GError **error)
+garrow_array_builder_resize(GArrowArrayBuilder *builder, gint64 capacity, GError **error)
 {
   auto arrow_builder = garrow_array_builder_get_raw(builder);
   auto status = arrow_builder->Resize(capacity);
@@ -784,8 +771,7 @@ garrow_array_builder_reserve(GArrowArrayBuilder *builder,
  * Since: 3.0.0
  */
 gboolean
-garrow_array_builder_append_null(GArrowArrayBuilder *builder,
-                                 GError **error)
+garrow_array_builder_append_null(GArrowArrayBuilder *builder, GError **error)
 {
   auto arrow_builder = garrow_array_builder_get_raw(builder);
   auto status = arrow_builder->AppendNull();
@@ -806,9 +792,7 @@ garrow_array_builder_append_null(GArrowArrayBuilder *builder,
  * Since: 3.0.0
  */
 gboolean
-garrow_array_builder_append_nulls(GArrowArrayBuilder *builder,
-                                  gint64 n,
-                                  GError **error)
+garrow_array_builder_append_nulls(GArrowArrayBuilder *builder, gint64 n, GError **error)
 {
   const gchar *context = "[array-builder][append-nulls]";
   if (n < 0) {
@@ -840,14 +824,11 @@ garrow_array_builder_append_nulls(GArrowArrayBuilder *builder,
  * Since: 3.0.0
  */
 gboolean
-garrow_array_builder_append_empty_value(GArrowArrayBuilder *builder,
-                                        GError **error)
+garrow_array_builder_append_empty_value(GArrowArrayBuilder *builder, GError **error)
 {
   auto arrow_builder = garrow_array_builder_get_raw(builder);
   auto status = arrow_builder->AppendEmptyValue();
-  return garrow_error_check(error,
-                            status,
-                            "[array-builder][append-empty-value]");
+  return garrow_error_check(error, status, "[array-builder][append-empty-value]");
 }
 
 /**
@@ -888,7 +869,6 @@ garrow_array_builder_append_empty_values(GArrowArrayBuilder *builder,
   return garrow_error_check(error, status, context);
 }
 
-
 G_DEFINE_TYPE(GArrowNullArrayBuilder,
               garrow_null_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -913,9 +893,8 @@ garrow_null_array_builder_class_init(GArrowNullArrayBuilderClass *klass)
 GArrowNullArrayBuilder *
 garrow_null_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::null(),
-                                          NULL,
-                                          "[null-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::null(), NULL, "[null-array-builder][new]");
   return GARROW_NULL_ARRAY_BUILDER(builder);
 }
 
@@ -932,8 +911,7 @@ garrow_null_array_builder_new(void)
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_null_array_builder_append_null(GArrowNullArrayBuilder *builder,
-                                      GError **error)
+garrow_null_array_builder_append_null(GArrowNullArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -959,12 +937,9 @@ garrow_null_array_builder_append_nulls(GArrowNullArrayBuilder *builder,
                                        gint64 n,
                                        GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowBooleanArrayBuilder,
               garrow_boolean_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -987,9 +962,8 @@ garrow_boolean_array_builder_class_init(GArrowBooleanArrayBuilderClass *klass)
 GArrowBooleanArrayBuilder *
 garrow_boolean_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::boolean(),
-                                          NULL,
-                                          "[boolean-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::boolean(), NULL, "[boolean-array-builder][new]");
   return GARROW_BOOLEAN_ARRAY_BUILDER(builder);
 }
 
@@ -1027,11 +1001,11 @@ garrow_boolean_array_builder_append_value(GArrowBooleanArrayBuilder *builder,
                                           gboolean value,
                                           GError **error)
 {
-  return garrow_array_builder_append_value<arrow::BooleanBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     static_cast<bool>(value),
-     error,
-     "[boolean-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::BooleanBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    static_cast<bool>(value),
+    error,
+    "[boolean-array-builder][append-value]");
 }
 
 /**
@@ -1065,14 +1039,14 @@ garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder,
   for (gint64 i = 0; i < values_length; ++i) {
     arrow_values[i] = values[i];
   }
-  return garrow_array_builder_append_values<arrow::BooleanBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     arrow_values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[boolean-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::BooleanBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    arrow_values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[boolean-array-builder][append-values]");
 }
 
 /**
@@ -1113,15 +1087,10 @@ garrow_boolean_array_builder_append_nulls(GArrowBooleanArrayBuilder *builder,
                                           gint64 n,
                                           GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
-G_DEFINE_TYPE(GArrowIntArrayBuilder,
-              garrow_int_array_builder,
-              GARROW_TYPE_ARRAY_BUILDER)
+G_DEFINE_TYPE(GArrowIntArrayBuilder, garrow_int_array_builder, GARROW_TYPE_ARRAY_BUILDER)
 
 static void
 garrow_int_array_builder_init(GArrowIntArrayBuilder *builder)
@@ -1144,11 +1113,10 @@ GArrowIntArrayBuilder *
 garrow_int_array_builder_new(void)
 {
   auto memory_pool = arrow::default_memory_pool();
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::ArrayBuilder>(
-      std::make_shared<arrow::AdaptiveIntBuilder>(memory_pool));
-  auto builder = garrow_array_builder_new_raw(&arrow_builder,
-                                              GARROW_TYPE_INT_ARRAY_BUILDER);
+  auto arrow_builder = std::static_pointer_cast<arrow::ArrayBuilder>(
+    std::make_shared<arrow::AdaptiveIntBuilder>(memory_pool));
+  auto builder =
+    garrow_array_builder_new_raw(&arrow_builder, GARROW_TYPE_INT_ARRAY_BUILDER);
   return GARROW_INT_ARRAY_BUILDER(builder);
 }
 
@@ -1188,11 +1156,11 @@ garrow_int_array_builder_append_value(GArrowIntArrayBuilder *builder,
                                       gint64 value,
                                       GError **error)
 {
-  return garrow_array_builder_append_value<arrow::AdaptiveIntBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[int-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::AdaptiveIntBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[int-array-builder][append-value]");
 }
 
 /**
@@ -1222,14 +1190,14 @@ garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder,
                                        gint64 is_valids_length,
                                        GError **error)
 {
-  return garrow_array_builder_append_values<arrow::AdaptiveIntBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     reinterpret_cast<const int64_t *>(values),
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[int-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::AdaptiveIntBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    reinterpret_cast<const int64_t *>(values),
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[int-array-builder][append-values]");
 }
 
 /**
@@ -1245,8 +1213,7 @@ garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_int_array_builder_append_null(GArrowIntArrayBuilder *builder,
-                                     GError **error)
+garrow_int_array_builder_append_null(GArrowIntArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -1272,12 +1239,9 @@ garrow_int_array_builder_append_nulls(GArrowIntArrayBuilder *builder,
                                       gint64 n,
                                       GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowUIntArrayBuilder,
               garrow_uint_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -1303,11 +1267,10 @@ GArrowUIntArrayBuilder *
 garrow_uint_array_builder_new(void)
 {
   auto memory_pool = arrow::default_memory_pool();
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::ArrayBuilder>(
-      std::make_shared<arrow::AdaptiveUIntBuilder>(memory_pool));
-  auto builder = garrow_array_builder_new_raw(&arrow_builder,
-                                              GARROW_TYPE_UINT_ARRAY_BUILDER);
+  auto arrow_builder = std::static_pointer_cast<arrow::ArrayBuilder>(
+    std::make_shared<arrow::AdaptiveUIntBuilder>(memory_pool));
+  auto builder =
+    garrow_array_builder_new_raw(&arrow_builder, GARROW_TYPE_UINT_ARRAY_BUILDER);
   return GARROW_UINT_ARRAY_BUILDER(builder);
 }
 
@@ -1347,11 +1310,11 @@ garrow_uint_array_builder_append_value(GArrowUIntArrayBuilder *builder,
                                        guint64 value,
                                        GError **error)
 {
-  return garrow_array_builder_append_value<arrow::AdaptiveUIntBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[uint-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::AdaptiveUIntBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[uint-array-builder][append-value]");
 }
 
 /**
@@ -1381,14 +1344,14 @@ garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder,
                                         gint64 is_valids_length,
                                         GError **error)
 {
-  return garrow_array_builder_append_values<arrow::AdaptiveUIntBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     reinterpret_cast<const uint64_t *>(values),
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[uint-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::AdaptiveUIntBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    reinterpret_cast<const uint64_t *>(values),
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[uint-array-builder][append-values]");
 }
 
 /**
@@ -1404,8 +1367,7 @@ garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_uint_array_builder_append_null(GArrowUIntArrayBuilder *builder,
-                                      GError **error)
+garrow_uint_array_builder_append_null(GArrowUIntArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -1431,12 +1393,9 @@ garrow_uint_array_builder_append_nulls(GArrowUIntArrayBuilder *builder,
                                        gint64 n,
                                        GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowInt8ArrayBuilder,
               garrow_int8_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -1459,9 +1418,8 @@ garrow_int8_array_builder_class_init(GArrowInt8ArrayBuilderClass *klass)
 GArrowInt8ArrayBuilder *
 garrow_int8_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::int8(),
-                                          NULL,
-                                          "[int8-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::int8(), NULL, "[int8-array-builder][new]");
   return GARROW_INT8_ARRAY_BUILDER(builder);
 }
 
@@ -1499,11 +1457,11 @@ garrow_int8_array_builder_append_value(GArrowInt8ArrayBuilder *builder,
                                        gint8 value,
                                        GError **error)
 {
-  return garrow_array_builder_append_value<arrow::Int8Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[int8-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::Int8Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[int8-array-builder][append-value]");
 }
 
 /**
@@ -1533,14 +1491,14 @@ garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder,
                                         gint64 is_valids_length,
                                         GError **error)
 {
-  return garrow_array_builder_append_values<arrow::Int8Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[int8-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::Int8Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[int8-array-builder][append-values]");
 }
 
 /**
@@ -1554,8 +1512,7 @@ garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_int8_array_builder_append_null(GArrowInt8ArrayBuilder *builder,
-                                      GError **error)
+garrow_int8_array_builder_append_null(GArrowInt8ArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -1581,12 +1538,9 @@ garrow_int8_array_builder_append_nulls(GArrowInt8ArrayBuilder *builder,
                                        gint64 n,
                                        GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowUInt8ArrayBuilder,
               garrow_uint8_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -1609,9 +1563,8 @@ garrow_uint8_array_builder_class_init(GArrowUInt8ArrayBuilderClass *klass)
 GArrowUInt8ArrayBuilder *
 garrow_uint8_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::uint8(),
-                                          NULL,
-                                          "[uint8-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::uint8(), NULL, "[uint8-array-builder][new]");
   return GARROW_UINT8_ARRAY_BUILDER(builder);
 }
 
@@ -1646,14 +1599,14 @@ garrow_uint8_array_builder_append(GArrowUInt8ArrayBuilder *builder,
  */
 gboolean
 garrow_uint8_array_builder_append_value(GArrowUInt8ArrayBuilder *builder,
-                                  guint8 value,
-                                  GError **error)
+                                        guint8 value,
+                                        GError **error)
 {
-  return garrow_array_builder_append_value<arrow::UInt8Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[uint8-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::UInt8Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[uint8-array-builder][append-value]");
 }
 
 /**
@@ -1683,14 +1636,14 @@ garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder,
                                          gint64 is_valids_length,
                                          GError **error)
 {
-  return garrow_array_builder_append_values<arrow::UInt8Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[uint8-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::UInt8Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[uint8-array-builder][append-values]");
 }
 
 /**
@@ -1704,8 +1657,7 @@ garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_uint8_array_builder_append_null(GArrowUInt8ArrayBuilder *builder,
-                                       GError **error)
+garrow_uint8_array_builder_append_null(GArrowUInt8ArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -1731,12 +1683,9 @@ garrow_uint8_array_builder_append_nulls(GArrowUInt8ArrayBuilder *builder,
                                         gint64 n,
                                         GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowInt16ArrayBuilder,
               garrow_int16_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -1759,9 +1708,8 @@ garrow_int16_array_builder_class_init(GArrowInt16ArrayBuilderClass *klass)
 GArrowInt16ArrayBuilder *
 garrow_int16_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::int16(),
-                                          NULL,
-                                          "[int16-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::int16(), NULL, "[int16-array-builder][new]");
   return GARROW_INT16_ARRAY_BUILDER(builder);
 }
 
@@ -1799,11 +1747,11 @@ garrow_int16_array_builder_append_value(GArrowInt16ArrayBuilder *builder,
                                         gint16 value,
                                         GError **error)
 {
-  return garrow_array_builder_append_value<arrow::Int16Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[int16-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::Int16Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[int16-array-builder][append-value]");
 }
 
 /**
@@ -1833,14 +1781,14 @@ garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder,
                                          gint64 is_valids_length,
                                          GError **error)
 {
-  return garrow_array_builder_append_values<arrow::Int16Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[int16-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::Int16Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[int16-array-builder][append-values]");
 }
 
 /**
@@ -1854,8 +1802,7 @@ garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_int16_array_builder_append_null(GArrowInt16ArrayBuilder *builder,
-                                       GError **error)
+garrow_int16_array_builder_append_null(GArrowInt16ArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -1881,12 +1828,9 @@ garrow_int16_array_builder_append_nulls(GArrowInt16ArrayBuilder *builder,
                                         gint64 n,
                                         GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowUInt16ArrayBuilder,
               garrow_uint16_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -1909,9 +1853,8 @@ garrow_uint16_array_builder_class_init(GArrowUInt16ArrayBuilderClass *klass)
 GArrowUInt16ArrayBuilder *
 garrow_uint16_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::uint16(),
-                                          NULL,
-                                          "[uint16-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::uint16(), NULL, "[uint16-array-builder][new]");
   return GARROW_UINT16_ARRAY_BUILDER(builder);
 }
 
@@ -1949,11 +1892,11 @@ garrow_uint16_array_builder_append_value(GArrowUInt16ArrayBuilder *builder,
                                          guint16 value,
                                          GError **error)
 {
-  return garrow_array_builder_append_value<arrow::UInt16Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[uint16-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::UInt16Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[uint16-array-builder][append-value]");
 }
 
 /**
@@ -1983,14 +1926,14 @@ garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder,
                                           gint64 is_valids_length,
                                           GError **error)
 {
-  return garrow_array_builder_append_values<arrow::UInt16Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[uint16-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::UInt16Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[uint16-array-builder][append-values]");
 }
 
 /**
@@ -2004,8 +1947,7 @@ garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_uint16_array_builder_append_null(GArrowUInt16ArrayBuilder *builder,
-                                        GError **error)
+garrow_uint16_array_builder_append_null(GArrowUInt16ArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -2031,12 +1973,9 @@ garrow_uint16_array_builder_append_nulls(GArrowUInt16ArrayBuilder *builder,
                                          gint64 n,
                                          GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowInt32ArrayBuilder,
               garrow_int32_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -2059,9 +1998,8 @@ garrow_int32_array_builder_class_init(GArrowInt32ArrayBuilderClass *klass)
 GArrowInt32ArrayBuilder *
 garrow_int32_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::int32(),
-                                          NULL,
-                                          "[int32-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::int32(), NULL, "[int32-array-builder][new]");
   return GARROW_INT32_ARRAY_BUILDER(builder);
 }
 
@@ -2099,11 +2037,11 @@ garrow_int32_array_builder_append_value(GArrowInt32ArrayBuilder *builder,
                                         gint32 value,
                                         GError **error)
 {
-  return garrow_array_builder_append_value<arrow::Int32Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[int32-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::Int32Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[int32-array-builder][append-value]");
 }
 
 /**
@@ -2133,14 +2071,14 @@ garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder,
                                          gint64 is_valids_length,
                                          GError **error)
 {
-  return garrow_array_builder_append_values<arrow::Int32Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[int32-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::Int32Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[int32-array-builder][append-values]");
 }
 
 /**
@@ -2154,8 +2092,7 @@ garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_int32_array_builder_append_null(GArrowInt32ArrayBuilder *builder,
-                                       GError **error)
+garrow_int32_array_builder_append_null(GArrowInt32ArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -2181,12 +2118,9 @@ garrow_int32_array_builder_append_nulls(GArrowInt32ArrayBuilder *builder,
                                         gint64 n,
                                         GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowUInt32ArrayBuilder,
               garrow_uint32_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -2209,9 +2143,8 @@ garrow_uint32_array_builder_class_init(GArrowUInt32ArrayBuilderClass *klass)
 GArrowUInt32ArrayBuilder *
 garrow_uint32_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::uint32(),
-                                          NULL,
-                                          "[uint32-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::uint32(), NULL, "[uint32-array-builder][new]");
   return GARROW_UINT32_ARRAY_BUILDER(builder);
 }
 
@@ -2249,11 +2182,11 @@ garrow_uint32_array_builder_append_value(GArrowUInt32ArrayBuilder *builder,
                                          guint32 value,
                                          GError **error)
 {
-  return garrow_array_builder_append_value<arrow::UInt32Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[uint32-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::UInt32Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[uint32-array-builder][append-value]");
 }
 
 /**
@@ -2283,14 +2216,14 @@ garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder,
                                           gint64 is_valids_length,
                                           GError **error)
 {
-  return garrow_array_builder_append_values<arrow::UInt32Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[uint32-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::UInt32Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[uint32-array-builder][append-values]");
 }
 
 /**
@@ -2304,8 +2237,7 @@ garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_uint32_array_builder_append_null(GArrowUInt32ArrayBuilder *builder,
-                                        GError **error)
+garrow_uint32_array_builder_append_null(GArrowUInt32ArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -2331,12 +2263,9 @@ garrow_uint32_array_builder_append_nulls(GArrowUInt32ArrayBuilder *builder,
                                          gint64 n,
                                          GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowInt64ArrayBuilder,
               garrow_int64_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -2359,9 +2288,8 @@ garrow_int64_array_builder_class_init(GArrowInt64ArrayBuilderClass *klass)
 GArrowInt64ArrayBuilder *
 garrow_int64_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::int64(),
-                                          NULL,
-                                          "[int64-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::int64(), NULL, "[int64-array-builder][new]");
   return GARROW_INT64_ARRAY_BUILDER(builder);
 }
 
@@ -2399,11 +2327,11 @@ garrow_int64_array_builder_append_value(GArrowInt64ArrayBuilder *builder,
                                         gint64 value,
                                         GError **error)
 {
-  return garrow_array_builder_append_value<arrow::Int64Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[int64-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::Int64Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[int64-array-builder][append-value]");
 }
 
 /**
@@ -2433,14 +2361,14 @@ garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder,
                                          gint64 is_valids_length,
                                          GError **error)
 {
-  return garrow_array_builder_append_values<arrow::Int64Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     reinterpret_cast<const int64_t *>(values),
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[int64-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::Int64Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    reinterpret_cast<const int64_t *>(values),
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[int64-array-builder][append-values]");
 }
 
 /**
@@ -2454,8 +2382,7 @@ garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_int64_array_builder_append_null(GArrowInt64ArrayBuilder *builder,
-                                       GError **error)
+garrow_int64_array_builder_append_null(GArrowInt64ArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -2481,12 +2408,9 @@ garrow_int64_array_builder_append_nulls(GArrowInt64ArrayBuilder *builder,
                                         gint64 n,
                                         GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowUInt64ArrayBuilder,
               garrow_uint64_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -2509,9 +2433,8 @@ garrow_uint64_array_builder_class_init(GArrowUInt64ArrayBuilderClass *klass)
 GArrowUInt64ArrayBuilder *
 garrow_uint64_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::uint64(),
-                                          NULL,
-                                          "[uint64-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::uint64(), NULL, "[uint64-array-builder][new]");
   return GARROW_UINT64_ARRAY_BUILDER(builder);
 }
 
@@ -2528,8 +2451,8 @@ garrow_uint64_array_builder_new(void)
  */
 gboolean
 garrow_uint64_array_builder_append(GArrowUInt64ArrayBuilder *builder,
-                                  guint64 value,
-                                  GError **error)
+                                   guint64 value,
+                                   GError **error)
 {
   return garrow_uint64_array_builder_append_value(builder, value, error);
 }
@@ -2549,11 +2472,11 @@ garrow_uint64_array_builder_append_value(GArrowUInt64ArrayBuilder *builder,
                                          guint64 value,
                                          GError **error)
 {
-  return garrow_array_builder_append_value<arrow::UInt64Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[uint64-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::UInt64Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[uint64-array-builder][append-value]");
 }
 
 /**
@@ -2583,14 +2506,14 @@ garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder,
                                           gint64 is_valids_length,
                                           GError **error)
 {
-  return garrow_array_builder_append_values<arrow::UInt64Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     reinterpret_cast<const uint64_t *>(values),
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[uint64-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::UInt64Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    reinterpret_cast<const uint64_t *>(values),
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[uint64-array-builder][append-values]");
 }
 
 /**
@@ -2604,8 +2527,7 @@ garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_uint64_array_builder_append_null(GArrowUInt64ArrayBuilder *builder,
-                                        GError **error)
+garrow_uint64_array_builder_append_null(GArrowUInt64ArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -2631,12 +2553,9 @@ garrow_uint64_array_builder_append_nulls(GArrowUInt64ArrayBuilder *builder,
                                          gint64 n,
                                          GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowHalfFloatArrayBuilder,
               garrow_half_float_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -2647,8 +2566,7 @@ garrow_half_float_array_builder_init(GArrowHalfFloatArrayBuilder *builder)
 }
 
 static void
-garrow_half_float_array_builder_class_init(
-  GArrowHalfFloatArrayBuilderClass *klass)
+garrow_half_float_array_builder_class_init(GArrowHalfFloatArrayBuilderClass *klass)
 {
 }
 
@@ -2679,16 +2597,15 @@ garrow_half_float_array_builder_new(void)
  * Since: 11.0.0
  */
 gboolean
-garrow_half_float_array_builder_append_value(
-  GArrowHalfFloatArrayBuilder *builder,
-  guint16 value,
-  GError **error)
+garrow_half_float_array_builder_append_value(GArrowHalfFloatArrayBuilder *builder,
+                                             guint16 value,
+                                             GError **error)
 {
-  return garrow_array_builder_append_value<arrow::HalfFloatBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[half-float-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::HalfFloatBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[half-float-array-builder][append-value]");
 }
 
 /**
@@ -2711,25 +2628,23 @@ garrow_half_float_array_builder_append_value(
  * Since: 11.0.0
  */
 gboolean
-garrow_half_float_array_builder_append_values(
-  GArrowHalfFloatArrayBuilder *builder,
-  const guint16 *values,
-  gint64 values_length,
-  const gboolean *is_valids,
-  gint64 is_valids_length,
-  GError **error)
+garrow_half_float_array_builder_append_values(GArrowHalfFloatArrayBuilder *builder,
+                                              const guint16 *values,
+                                              gint64 values_length,
+                                              const gboolean *is_valids,
+                                              gint64 is_valids_length,
+                                              GError **error)
 {
-  return garrow_array_builder_append_values<arrow::HalfFloatBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[half-float-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::HalfFloatBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[half-float-array-builder][append-values]");
 }
 
-
 G_DEFINE_TYPE(GArrowFloatArrayBuilder,
               garrow_float_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -2752,9 +2667,8 @@ garrow_float_array_builder_class_init(GArrowFloatArrayBuilderClass *klass)
 GArrowFloatArrayBuilder *
 garrow_float_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::float32(),
-                                          NULL,
-                                          "[float-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::float32(), NULL, "[float-array-builder][new]");
   return GARROW_FLOAT_ARRAY_BUILDER(builder);
 }
 
@@ -2792,11 +2706,11 @@ garrow_float_array_builder_append_value(GArrowFloatArrayBuilder *builder,
                                         gfloat value,
                                         GError **error)
 {
-  return garrow_array_builder_append_value<arrow::FloatBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[float-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::FloatBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[float-array-builder][append-value]");
 }
 
 /**
@@ -2826,14 +2740,14 @@ garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder,
                                          gint64 is_valids_length,
                                          GError **error)
 {
-  return garrow_array_builder_append_values<arrow::FloatBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[float-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::FloatBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[float-array-builder][append-values]");
 }
 
 /**
@@ -2847,8 +2761,7 @@ garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_float_array_builder_append_null(GArrowFloatArrayBuilder *builder,
-                                       GError **error)
+garrow_float_array_builder_append_null(GArrowFloatArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -2874,12 +2787,9 @@ garrow_float_array_builder_append_nulls(GArrowFloatArrayBuilder *builder,
                                         gint64 n,
                                         GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowDoubleArrayBuilder,
               garrow_double_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -2902,9 +2812,8 @@ garrow_double_array_builder_class_init(GArrowDoubleArrayBuilderClass *klass)
 GArrowDoubleArrayBuilder *
 garrow_double_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::float64(),
-                                          NULL,
-                                          "[double-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::float64(), NULL, "[double-array-builder][new]");
   return GARROW_DOUBLE_ARRAY_BUILDER(builder);
 }
 
@@ -2942,11 +2851,11 @@ garrow_double_array_builder_append_value(GArrowDoubleArrayBuilder *builder,
                                          gdouble value,
                                          GError **error)
 {
-  return garrow_array_builder_append_value<arrow::DoubleBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[double-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::DoubleBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[double-array-builder][append-value]");
 }
 
 /**
@@ -2976,14 +2885,14 @@ garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder,
                                           gint64 is_valids_length,
                                           GError **error)
 {
-  return garrow_array_builder_append_values<arrow::DoubleBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[double-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::DoubleBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[double-array-builder][append-values]");
 }
 
 /**
@@ -2997,8 +2906,7 @@ garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_double_array_builder_append_null(GArrowDoubleArrayBuilder *builder,
-                                        GError **error)
+garrow_double_array_builder_append_null(GArrowDoubleArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -3024,12 +2932,9 @@ garrow_double_array_builder_append_nulls(GArrowDoubleArrayBuilder *builder,
                                          gint64 n,
                                          GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowBinaryArrayBuilder,
               garrow_binary_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -3052,9 +2957,8 @@ garrow_binary_array_builder_class_init(GArrowBinaryArrayBuilderClass *klass)
 GArrowBinaryArrayBuilder *
 garrow_binary_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::binary(),
-                                          NULL,
-                                          "[binary-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::binary(), NULL, "[binary-array-builder][new]");
   return GARROW_BINARY_ARRAY_BUILDER(builder);
 }
 
@@ -3096,13 +3000,10 @@ garrow_binary_array_builder_append_value(GArrowBinaryArrayBuilder *builder,
                                          gint32 length,
                                          GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::BinaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::BinaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto status = arrow_builder->Append(value, length);
-  return garrow_error_check(error,
-                            status,
-                            "[binary-array-builder][append-value]");
+  return garrow_error_check(error, status, "[binary-array-builder][append-value]");
 }
 
 /**
@@ -3120,16 +3021,12 @@ garrow_binary_array_builder_append_value_bytes(GArrowBinaryArrayBuilder *builder
                                                GBytes *value,
                                                GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::BinaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::BinaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   gsize size;
   auto data = g_bytes_get_data(value, &size);
-  auto status = arrow_builder->Append(static_cast<const uint8_t *>(data),
-                                      size);
-  return garrow_error_check(error,
-                            status,
-                            "[binary-array-builder][append-value-bytes]");
+  auto status = arrow_builder->Append(static_cast<const uint8_t *>(data), size);
+  return garrow_error_check(error, status, "[binary-array-builder][append-value-bytes]");
 }
 
 /**
@@ -3159,14 +3056,14 @@ garrow_binary_array_builder_append_values(GArrowBinaryArrayBuilder *builder,
                                           gint64 is_valids_length,
                                           GError **error)
 {
-  return garrow_array_builder_append_values<arrow::BinaryBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[binary-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::BinaryBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[binary-array-builder][append-values]");
 }
 
 /**
@@ -3180,8 +3077,7 @@ garrow_binary_array_builder_append_values(GArrowBinaryArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_binary_array_builder_append_null(GArrowBinaryArrayBuilder *builder,
-                                        GError **error)
+garrow_binary_array_builder_append_null(GArrowBinaryArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -3204,12 +3100,9 @@ garrow_binary_array_builder_append_nulls(GArrowBinaryArrayBuilder *builder,
                                          gint64 n,
                                          GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowLargeBinaryArrayBuilder,
               garrow_large_binary_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -3257,13 +3150,10 @@ garrow_large_binary_array_builder_append_value(GArrowLargeBinaryArrayBuilder *bu
                                                gint64 length,
                                                GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::LargeBinaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::LargeBinaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto status = arrow_builder->Append(value, length);
-  return garrow_error_check(error,
-                            status,
-                            "[large-binary-array-builder][append-value]");
+  return garrow_error_check(error, status, "[large-binary-array-builder][append-value]");
 }
 
 /**
@@ -3277,17 +3167,14 @@ garrow_large_binary_array_builder_append_value(GArrowLargeBinaryArrayBuilder *bu
  * Since: 0.16.0
  */
 gboolean
-garrow_large_binary_array_builder_append_value_bytes(GArrowLargeBinaryArrayBuilder *builder,
-                                                     GBytes *value,
-                                                     GError **error)
+garrow_large_binary_array_builder_append_value_bytes(
+  GArrowLargeBinaryArrayBuilder *builder, GBytes *value, GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::LargeBinaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::LargeBinaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   gsize size;
   gconstpointer data = g_bytes_get_data(value, &size);
-  auto status = arrow_builder->Append(static_cast<const uint8_t *>(data),
-                                      size);
+  auto status = arrow_builder->Append(static_cast<const uint8_t *>(data), size);
   return garrow_error_check(error,
                             status,
                             "[large-binary-array-builder][append-value-bytes]");
@@ -3320,14 +3207,14 @@ garrow_large_binary_array_builder_append_values(GArrowLargeBinaryArrayBuilder *b
                                                 gint64 is_valids_length,
                                                 GError **error)
 {
-  return garrow_array_builder_append_values<arrow::LargeBinaryBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[large-binary-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::LargeBinaryBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[large-binary-array-builder][append-values]");
 }
 
 /**
@@ -3367,12 +3254,9 @@ garrow_large_binary_array_builder_append_nulls(GArrowLargeBinaryArrayBuilder *bu
                                                gint64 n,
                                                GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowStringArrayBuilder,
               garrow_string_array_builder,
               GARROW_TYPE_BINARY_ARRAY_BUILDER)
@@ -3395,9 +3279,8 @@ garrow_string_array_builder_class_init(GArrowStringArrayBuilderClass *klass)
 GArrowStringArrayBuilder *
 garrow_string_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::utf8(),
-                                          NULL,
-                                          "[string-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::utf8(), NULL, "[string-array-builder][new]");
   return GARROW_STRING_ARRAY_BUILDER(builder);
 }
 
@@ -3456,11 +3339,10 @@ garrow_string_array_builder_append_string(GArrowStringArrayBuilder *builder,
                                           const gchar *value,
                                           GError **error)
 {
-  return garrow_string_array_builder_append_string_len(
-    builder,
-    value,
-    static_cast<gint32>(strlen(value)),
-    error);
+  return garrow_string_array_builder_append_string_len(builder,
+                                                       value,
+                                                       static_cast<gint32>(strlen(value)),
+                                                       error);
 }
 
 /**
@@ -3480,13 +3362,10 @@ garrow_string_array_builder_append_string_len(GArrowStringArrayBuilder *builder,
                                               gint32 length,
                                               GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::StringBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::StringBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto status = arrow_builder->Append(value, length);
-  return garrow_error_check(error,
-                            status,
-                            "[string-array-builder][append-string]");
+  return garrow_error_check(error, status, "[string-array-builder][append-string]");
 }
 
 /**
@@ -3554,17 +3433,16 @@ garrow_string_array_builder_append_strings(GArrowStringArrayBuilder *builder,
                                            gint64 is_valids_length,
                                            GError **error)
 {
-  return garrow_array_builder_append_values<arrow::StringBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[string-array-builder][append-strings]");
+  return garrow_array_builder_append_values<arrow::StringBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[string-array-builder][append-strings]");
 }
 
-
 G_DEFINE_TYPE(GArrowLargeStringArrayBuilder,
               garrow_large_string_array_builder,
               GARROW_TYPE_LARGE_BINARY_ARRAY_BUILDER)
@@ -3628,15 +3506,15 @@ garrow_large_string_array_builder_append_string(GArrowLargeStringArrayBuilder *b
  *
  * Since: 8.0.0
  */
-gboolean garrow_large_string_array_builder_append_string_len(
+gboolean
+garrow_large_string_array_builder_append_string_len(
   GArrowLargeStringArrayBuilder *builder,
   const gchar *value,
   gint64 length,
   GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::LargeStringBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::LargeStringBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto status = arrow_builder->Append(value, length);
   return garrow_error_check(error,
                             status,
@@ -3670,24 +3548,22 @@ garrow_large_string_array_builder_append_strings(GArrowLargeStringArrayBuilder *
                                                  gint64 is_valids_length,
                                                  GError **error)
 {
-  return garrow_array_builder_append_values<arrow::LargeStringBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[large-string-array-builder][append-strings]");
+  return garrow_array_builder_append_values<arrow::LargeStringBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[large-string-array-builder][append-strings]");
 }
 
-
 G_DEFINE_TYPE(GArrowFixedSizeBinaryArrayBuilder,
               garrow_fixed_size_binary_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
 
 static void
-garrow_fixed_size_binary_array_builder_init(
-  GArrowFixedSizeBinaryArrayBuilder *builder)
+garrow_fixed_size_binary_array_builder_init(GArrowFixedSizeBinaryArrayBuilder *builder)
 {
 }
 
@@ -3704,14 +3580,12 @@ garrow_fixed_size_binary_array_builder_class_init(
  * Returns: A newly created #GArrowFixedSizeBinaryArrayBuilder.
  */
 GArrowFixedSizeBinaryArrayBuilder *
-garrow_fixed_size_binary_array_builder_new(
-  GArrowFixedSizeBinaryDataType *data_type)
+garrow_fixed_size_binary_array_builder_new(GArrowFixedSizeBinaryDataType *data_type)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto builder =
-    garrow_array_builder_new(arrow_data_type,
-                             NULL,
-                             "[fixed-size-binary-array-builder][new]");
+  auto builder = garrow_array_builder_new(arrow_data_type,
+                                          NULL,
+                                          "[fixed-size-binary-array-builder][new]");
   return GARROW_FIXED_SIZE_BINARY_ARRAY_BUILDER(builder);
 }
 
@@ -3734,9 +3608,8 @@ garrow_fixed_size_binary_array_builder_append_value(
   GError **error)
 {
   const gchar *context = "[fixed-size-binary-array-builder][append-value]";
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::FixedSizeBinaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::FixedSizeBinaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   arrow::Status status;
   if (value) {
     if (arrow_builder->byte_width() != length) {
@@ -3768,14 +3641,11 @@ garrow_fixed_size_binary_array_builder_append_value(
  */
 gboolean
 garrow_fixed_size_binary_array_builder_append_value_bytes(
-  GArrowFixedSizeBinaryArrayBuilder *builder,
-  GBytes *value,
-  GError **error)
+  GArrowFixedSizeBinaryArrayBuilder *builder, GBytes *value, GError **error)
 {
   const gchar *context = "[fixed-size-binary-array-builder][append-value-bytes]";
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::FixedSizeBinaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::FixedSizeBinaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
 
   gsize size;
   auto data = g_bytes_get_data(value, &size);
@@ -3865,16 +3735,15 @@ garrow_fixed_size_binary_array_builder_append_values_packed(
   gint64 is_valids_length,
   GError **error)
 {
-  return garrow_array_builder_append_values<arrow::FixedSizeBinaryBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     is_valids,
-     is_valids_length,
-     error,
-     "[fixed-size-binary-array-builder][append-values-packed]");
+  return garrow_array_builder_append_values<arrow::FixedSizeBinaryBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    is_valids,
+    is_valids_length,
+    error,
+    "[fixed-size-binary-array-builder][append-values-packed]");
 }
 
-
 G_DEFINE_TYPE(GArrowDate32ArrayBuilder,
               garrow_date32_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -3899,9 +3768,8 @@ garrow_date32_array_builder_class_init(GArrowDate32ArrayBuilderClass *klass)
 GArrowDate32ArrayBuilder *
 garrow_date32_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::date32(),
-                                          NULL,
-                                          "[date32-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::date32(), NULL, "[date32-array-builder][new]");
   return GARROW_DATE32_ARRAY_BUILDER(builder);
 }
 
@@ -3941,11 +3809,11 @@ garrow_date32_array_builder_append_value(GArrowDate32ArrayBuilder *builder,
                                          gint32 value,
                                          GError **error)
 {
-  return garrow_array_builder_append_value<arrow::Date32Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[date32-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::Date32Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[date32-array-builder][append-value]");
 }
 
 /**
@@ -3976,14 +3844,14 @@ garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder,
                                           gint64 is_valids_length,
                                           GError **error)
 {
-  return garrow_array_builder_append_values<arrow::Date32Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[date32-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::Date32Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[date32-array-builder][append-values]");
 }
 
 /**
@@ -3999,8 +3867,7 @@ garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_date32_array_builder_append_null(GArrowDate32ArrayBuilder *builder,
-                                        GError **error)
+garrow_date32_array_builder_append_null(GArrowDate32ArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -4026,12 +3893,9 @@ garrow_date32_array_builder_append_nulls(GArrowDate32ArrayBuilder *builder,
                                          gint64 n,
                                          GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowDate64ArrayBuilder,
               garrow_date64_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -4056,9 +3920,8 @@ garrow_date64_array_builder_class_init(GArrowDate64ArrayBuilderClass *klass)
 GArrowDate64ArrayBuilder *
 garrow_date64_array_builder_new(void)
 {
-  auto builder = garrow_array_builder_new(arrow::date64(),
-                                          NULL,
-                                          "[date64-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow::date64(), NULL, "[date64-array-builder][new]");
   return GARROW_DATE64_ARRAY_BUILDER(builder);
 }
 
@@ -4098,11 +3961,11 @@ garrow_date64_array_builder_append_value(GArrowDate64ArrayBuilder *builder,
                                          gint64 value,
                                          GError **error)
 {
-  return garrow_array_builder_append_value<arrow::Date64Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[date64-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::Date64Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[date64-array-builder][append-value]");
 }
 
 /**
@@ -4133,14 +3996,14 @@ garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder,
                                           gint64 is_valids_length,
                                           GError **error)
 {
-  return garrow_array_builder_append_values<arrow::Date64Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     reinterpret_cast<const int64_t *>(values),
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[date64-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::Date64Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    reinterpret_cast<const int64_t *>(values),
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[date64-array-builder][append-values]");
 }
 
 /**
@@ -4156,8 +4019,7 @@ garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_date64_array_builder_append_null(GArrowDate64ArrayBuilder *builder,
-                                        GError **error)
+garrow_date64_array_builder_append_null(GArrowDate64ArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -4183,12 +4045,9 @@ garrow_date64_array_builder_append_nulls(GArrowDate64ArrayBuilder *builder,
                                          gint64 n,
                                          GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowTimestampArrayBuilder,
               garrow_timestamp_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -4215,9 +4074,8 @@ GArrowTimestampArrayBuilder *
 garrow_timestamp_array_builder_new(GArrowTimestampDataType *data_type)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto builder = garrow_array_builder_new(arrow_data_type,
-                                          NULL,
-                                          "[timestamp-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow_data_type, NULL, "[timestamp-array-builder][new]");
   return GARROW_TIMESTAMP_ARRAY_BUILDER(builder);
 }
 
@@ -4257,11 +4115,11 @@ garrow_timestamp_array_builder_append_value(GArrowTimestampArrayBuilder *builder
                                             gint64 value,
                                             GError **error)
 {
-  return garrow_array_builder_append_value<arrow::TimestampBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[timestamp-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::TimestampBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[timestamp-array-builder][append-value]");
 }
 
 /**
@@ -4292,14 +4150,14 @@ garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builde
                                              gint64 is_valids_length,
                                              GError **error)
 {
-  return garrow_array_builder_append_values<arrow::TimestampBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     reinterpret_cast<const int64_t *>(values),
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[timestamp-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::TimestampBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    reinterpret_cast<const int64_t *>(values),
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[timestamp-array-builder][append-values]");
 }
 
 /**
@@ -4342,12 +4200,9 @@ garrow_timestamp_array_builder_append_nulls(GArrowTimestampArrayBuilder *builder
                                             gint64 n,
                                             GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowTime32ArrayBuilder,
               garrow_time32_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -4374,9 +4229,8 @@ GArrowTime32ArrayBuilder *
 garrow_time32_array_builder_new(GArrowTime32DataType *data_type)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto builder = garrow_array_builder_new(arrow_data_type,
-                                          NULL,
-                                          "[time32-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow_data_type, NULL, "[time32-array-builder][new]");
   return GARROW_TIME32_ARRAY_BUILDER(builder);
 }
 
@@ -4416,11 +4270,11 @@ garrow_time32_array_builder_append_value(GArrowTime32ArrayBuilder *builder,
                                          gint32 value,
                                          GError **error)
 {
-  return garrow_array_builder_append_value<arrow::Time32Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[time32-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::Time32Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[time32-array-builder][append-value]");
 }
 
 /**
@@ -4451,14 +4305,14 @@ garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder,
                                           gint64 is_valids_length,
                                           GError **error)
 {
-  return garrow_array_builder_append_values<arrow::Time32Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[time32-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::Time32Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[time32-array-builder][append-values]");
 }
 
 /**
@@ -4474,8 +4328,7 @@ garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_time32_array_builder_append_null(GArrowTime32ArrayBuilder *builder,
-                                        GError **error)
+garrow_time32_array_builder_append_null(GArrowTime32ArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -4501,12 +4354,9 @@ garrow_time32_array_builder_append_nulls(GArrowTime32ArrayBuilder *builder,
                                          gint64 n,
                                          GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowTime64ArrayBuilder,
               garrow_time64_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -4533,9 +4383,8 @@ GArrowTime64ArrayBuilder *
 garrow_time64_array_builder_new(GArrowTime64DataType *data_type)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto builder = garrow_array_builder_new(arrow_data_type,
-                                          NULL,
-                                          "[time64-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow_data_type, NULL, "[time64-array-builder][new]");
   return GARROW_TIME64_ARRAY_BUILDER(builder);
 }
 
@@ -4575,11 +4424,11 @@ garrow_time64_array_builder_append_value(GArrowTime64ArrayBuilder *builder,
                                          gint64 value,
                                          GError **error)
 {
-  return garrow_array_builder_append_value<arrow::Time64Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[time64-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::Time64Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[time64-array-builder][append-value]");
 }
 
 /**
@@ -4610,14 +4459,14 @@ garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder,
                                           gint64 is_valids_length,
                                           GError **error)
 {
-  return garrow_array_builder_append_values<arrow::Time64Builder>
-    (GARROW_ARRAY_BUILDER(builder),
-     reinterpret_cast<const int64_t *>(values),
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[time64-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::Time64Builder>(
+    GARROW_ARRAY_BUILDER(builder),
+    reinterpret_cast<const int64_t *>(values),
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[time64-array-builder][append-values]");
 }
 
 /**
@@ -4633,8 +4482,7 @@ garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_time64_array_builder_append_null(GArrowTime64ArrayBuilder *builder,
-                                        GError **error)
+garrow_time64_array_builder_append_null(GArrowTime64ArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -4660,19 +4508,15 @@ garrow_time64_array_builder_append_nulls(GArrowTime64ArrayBuilder *builder,
                                          gint64 n,
                                          GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
-
 G_DEFINE_TYPE(GArrowMonthIntervalArrayBuilder,
               garrow_month_interval_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
 
 static void
-garrow_month_interval_array_builder_init(
-  GArrowMonthIntervalArrayBuilder *builder)
+garrow_month_interval_array_builder_init(GArrowMonthIntervalArrayBuilder *builder)
 {
 }
 
@@ -4709,16 +4553,15 @@ garrow_month_interval_array_builder_new(void)
  * Since: 8.0.0
  */
 gboolean
-garrow_month_interval_array_builder_append_value(
-  GArrowMonthIntervalArrayBuilder *builder,
-  gint32 value,
-  GError **error)
+garrow_month_interval_array_builder_append_value(GArrowMonthIntervalArrayBuilder *builder,
+                                                 gint32 value,
+                                                 GError **error)
 {
-  return garrow_array_builder_append_value<arrow::MonthIntervalBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     value,
-     error,
-     "[month-interval-array-builder][append-value]");
+  return garrow_array_builder_append_value<arrow::MonthIntervalBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    value,
+    error,
+    "[month-interval-array-builder][append-value]");
 }
 
 /**
@@ -4749,24 +4592,22 @@ garrow_month_interval_array_builder_append_values(
   gint64 is_valids_length,
   GError **error)
 {
-  return garrow_array_builder_append_values<arrow::MonthIntervalBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     values,
-     values_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[month-interval-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::MonthIntervalBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    values,
+    values_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[month-interval-array-builder][append-values]");
 }
 
-
 G_DEFINE_TYPE(GArrowDayTimeIntervalArrayBuilder,
               garrow_day_time_interval_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
 
 static void
-garrow_day_time_interval_array_builder_init(
-  GArrowDayTimeIntervalArrayBuilder *builder)
+garrow_day_time_interval_array_builder_init(GArrowDayTimeIntervalArrayBuilder *builder)
 {
 }
 
@@ -4804,20 +4645,17 @@ garrow_day_time_interval_array_builder_new(void)
  */
 gboolean
 garrow_day_time_interval_array_builder_append_value(
-  GArrowDayTimeIntervalArrayBuilder *builder,
-  GArrowDayMillisecond *value,
-  GError **error)
+  GArrowDayTimeIntervalArrayBuilder *builder, GArrowDayMillisecond *value, GError **error)
 {
   if (value) {
     auto arrow_day_millisecond = garrow_day_millisecond_get_raw(value);
-    return garrow_array_builder_append_value<arrow::DayTimeIntervalBuilder>
-      (GARROW_ARRAY_BUILDER(builder),
-       *arrow_day_millisecond,
-       error,
-       "[day-time-interval-array-builder][append-value]");
+    return garrow_array_builder_append_value<arrow::DayTimeIntervalBuilder>(
+      GARROW_ARRAY_BUILDER(builder),
+      *arrow_day_millisecond,
+      error,
+      "[day-time-interval-array-builder][append-value]");
   } else {
-    return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder),
-                                            error);
+    return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
   }
 }
 
@@ -4849,9 +4687,8 @@ garrow_day_time_interval_array_builder_append_values(
   gint64 is_valids_length,
   GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::DayTimeIntervalBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::DayTimeIntervalBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
 
   return garrow_array_builder_append_values(
     values,
@@ -4880,7 +4717,6 @@ garrow_day_time_interval_array_builder_append_values(
     });
 }
 
-
 G_DEFINE_TYPE(GArrowMonthDayNanoIntervalArrayBuilder,
               garrow_month_day_nano_interval_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -4907,10 +4743,9 @@ garrow_month_day_nano_interval_array_builder_class_init(
 GArrowMonthDayNanoIntervalArrayBuilder *
 garrow_month_day_nano_interval_array_builder_new(void)
 {
-  auto builder =
-    garrow_array_builder_new(arrow::month_day_nano_interval(),
-                             NULL,
-                             "[month-day-nano-interval-array-builder][new]");
+  auto builder = garrow_array_builder_new(arrow::month_day_nano_interval(),
+                                          NULL,
+                                          "[month-day-nano-interval-array-builder][new]");
   return GARROW_MONTH_DAY_NANO_INTERVAL_ARRAY_BUILDER(builder);
 }
 
@@ -4938,8 +4773,7 @@ garrow_month_day_nano_interval_array_builder_append_value(
       error,
       "[month-day-nano-interval-array-builder][append-value]");
   } else {
-    return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder),
-                                            error);
+    return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
   }
 }
 
@@ -4971,9 +4805,8 @@ garrow_month_day_nano_interval_array_builder_append_values(
   gint64 is_valids_length,
   GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::MonthDayNanoIntervalBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::MonthDayNanoIntervalBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
 
   return garrow_array_builder_append_values(
     values,
@@ -5002,7 +4835,6 @@ garrow_month_day_nano_interval_array_builder_append_values(
     });
 }
 
-
 G_DEFINE_TYPE(GArrowBinaryDictionaryArrayBuilder,
               garrow_binary_dictionary_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -5013,11 +4845,11 @@ garrow_binary_dictionary_array_builder_init(GArrowBinaryDictionaryArrayBuilder *
 }
 
 static void
-garrow_binary_dictionary_array_builder_class_init(GArrowBinaryDictionaryArrayBuilderClass *klass)
+garrow_binary_dictionary_array_builder_class_init(
+  GArrowBinaryDictionaryArrayBuilderClass *klass)
 {
 }
 
-
 /**
  * garrow_binary_dictionary_array_builder_new:
  *
@@ -5029,7 +4861,8 @@ GArrowBinaryDictionaryArrayBuilder *
 garrow_binary_dictionary_array_builder_new(void)
 {
   // We can use arrow:int8() for the index type of the following arrow_dict_type
-  // because arrow::MakeBuilder creates a dictionary builder with arrow::AdaptiveIntBuilder.
+  // because arrow::MakeBuilder creates a dictionary builder with
+  // arrow::AdaptiveIntBuilder.
   auto arrow_dict_type = arrow::dictionary(arrow::int8(), arrow::binary());
   auto builder = garrow_array_builder_new(arrow_dict_type,
                                           nullptr,
@@ -5050,8 +4883,8 @@ garrow_binary_dictionary_array_builder_new(void)
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_binary_dictionary_array_builder_append_null(GArrowBinaryDictionaryArrayBuilder *builder,
-                                                   GError **error)
+garrow_binary_dictionary_array_builder_append_null(
+  GArrowBinaryDictionaryArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -5068,14 +4901,14 @@ garrow_binary_dictionary_array_builder_append_null(GArrowBinaryDictionaryArrayBu
  * Since: 2.0.0
  */
 gboolean
-garrow_binary_dictionary_array_builder_append_value(GArrowBinaryDictionaryArrayBuilder *builder,
-                                                    const guint8 *value,
-                                                    gint32 length,
-                                                    GError **error)
+garrow_binary_dictionary_array_builder_append_value(
+  GArrowBinaryDictionaryArrayBuilder *builder,
+  const guint8 *value,
+  gint32 length,
+  GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto status = arrow_builder->Append(value, length);
   return garrow_error_check(error,
                             status,
@@ -5093,17 +4926,14 @@ garrow_binary_dictionary_array_builder_append_value(GArrowBinaryDictionaryArrayB
  * Since: 2.0.0
  */
 gboolean
-garrow_binary_dictionary_array_builder_append_value_bytes(GArrowBinaryDictionaryArrayBuilder *builder,
-                                                          GBytes *value,
-                                                          GError **error)
+garrow_binary_dictionary_array_builder_append_value_bytes(
+  GArrowBinaryDictionaryArrayBuilder *builder, GBytes *value, GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   gsize size;
   auto data = g_bytes_get_data(value, &size);
-  auto status = arrow_builder->Append(static_cast<const uint8_t *>(data),
-                                      size);
+  auto status = arrow_builder->Append(static_cast<const uint8_t *>(data), size);
   return garrow_error_check(error,
                             status,
                             "[binary-dictionary-array-builder][append-value-bytes]");
@@ -5120,13 +4950,11 @@ garrow_binary_dictionary_array_builder_append_value_bytes(GArrowBinaryDictionary
  * Since: 2.0.0
  */
 gboolean
-garrow_binary_dictionary_array_builder_append_array(GArrowBinaryDictionaryArrayBuilder *builder,
-                                                    GArrowBinaryArray *array,
-                                                    GError **error)
+garrow_binary_dictionary_array_builder_append_array(
+  GArrowBinaryDictionaryArrayBuilder *builder, GArrowBinaryArray *array, GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto arrow_array = garrow_array_get_raw<arrow::BinaryType>(GARROW_ARRAY(array));
   auto status = arrow_builder->AppendArray(*arrow_array);
   return garrow_error_check(error,
@@ -5153,25 +4981,28 @@ garrow_binary_dictionary_array_builder_append_array(GArrowBinaryDictionaryArrayB
  * Since: 2.0.0
  */
 gboolean
-garrow_binary_dictionary_array_builder_append_indices(GArrowBinaryDictionaryArrayBuilder *builder,
-                                                      const gint64 *values,
-                                                      gint64 values_length,
-                                                      const gboolean *is_valids,
-                                                      gint64 is_valids_length,
-                                                      GError **error)
+garrow_binary_dictionary_array_builder_append_indices(
+  GArrowBinaryDictionaryArrayBuilder *builder,
+  const gint64 *values,
+  gint64 values_length,
+  const gboolean *is_valids,
+  gint64 is_valids_length,
+  GError **error)
 {
   static const char *context = "[binary-dictionary-array-builder][append-indices]";
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
-  auto append_function = [&arrow_builder](
-      const gint64 *values,
-      gint64 values_length,
-      const uint8_t *valid_bytes) -> arrow::Status {
+  auto arrow_builder = std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto append_function = [&arrow_builder](const gint64 *values,
+                                          gint64 values_length,
+                                          const uint8_t *valid_bytes) -> arrow::Status {
     return arrow_builder->AppendIndices(values, values_length, valid_bytes);
   };
-  return garrow_array_builder_append_values(values, values_length, is_valids,
-                                            is_valids_length, error, context,
+  return garrow_array_builder_append_values(values,
+                                            values_length,
+                                            is_valids,
+                                            is_valids_length,
+                                            error,
+                                            context,
                                             append_function);
 }
 
@@ -5183,11 +5014,12 @@ garrow_binary_dictionary_array_builder_append_indices(GArrowBinaryDictionaryArra
  *
  * Since: 2.0.0
  */
-gint64 garrow_binary_dictionary_array_builder_get_dictionary_length(GArrowBinaryDictionaryArrayBuilder *builder)
+gint64
+garrow_binary_dictionary_array_builder_get_dictionary_length(
+  GArrowBinaryDictionaryArrayBuilder *builder)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   return arrow_builder->dictionary_length();
 }
 
@@ -5203,15 +5035,15 @@ gint64 garrow_binary_dictionary_array_builder_get_dictionary_length(GArrowBinary
  * Since: 2.0.0
  */
 gboolean
-garrow_binary_dictionary_array_builder_finish_delta(GArrowBinaryDictionaryArrayBuilder* builder,
-                                                    GArrowArray **out_indices,
-                                                    GArrowArray **out_delta,
-                                                    GError **error)
+garrow_binary_dictionary_array_builder_finish_delta(
+  GArrowBinaryDictionaryArrayBuilder *builder,
+  GArrowArray **out_indices,
+  GArrowArray **out_delta,
+  GError **error)
 {
   static const char *context = "[binary-dictionary-array-builder][finish-delta]";
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   std::shared_ptr<arrow::Array> arrow_indices, arrow_delta;
   auto status = arrow_builder->FinishDelta(&arrow_indices, &arrow_delta);
   if (!garrow_error_check(error, status, context)) {
@@ -5233,13 +5065,11 @@ garrow_binary_dictionary_array_builder_finish_delta(GArrowBinaryDictionaryArrayB
  * Since: 2.0.0
  */
 gboolean
-garrow_binary_dictionary_array_builder_insert_memo_values(GArrowBinaryDictionaryArrayBuilder *builder,
-                                                          GArrowBinaryArray *values,
-                                                          GError **error)
+garrow_binary_dictionary_array_builder_insert_memo_values(
+  GArrowBinaryDictionaryArrayBuilder *builder, GArrowBinaryArray *values, GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto arrow_array = garrow_array_get_raw<arrow::BinaryType>(GARROW_ARRAY(values));
   auto status = arrow_builder->InsertMemoValues(*arrow_array);
   return garrow_error_check(error,
@@ -5256,15 +5086,14 @@ garrow_binary_dictionary_array_builder_insert_memo_values(GArrowBinaryDictionary
  * Since: 2.0.0
  */
 void
-garrow_binary_dictionary_array_builder_reset_full(GArrowBinaryDictionaryArrayBuilder *builder)
+garrow_binary_dictionary_array_builder_reset_full(
+  GArrowBinaryDictionaryArrayBuilder *builder)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::BinaryDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   arrow_builder->ResetFull();
 }
 
-
 G_DEFINE_TYPE(GArrowStringDictionaryArrayBuilder,
               garrow_string_dictionary_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
@@ -5275,11 +5104,11 @@ garrow_string_dictionary_array_builder_init(GArrowStringDictionaryArrayBuilder *
 }
 
 static void
-garrow_string_dictionary_array_builder_class_init(GArrowStringDictionaryArrayBuilderClass *klass)
+garrow_string_dictionary_array_builder_class_init(
+  GArrowStringDictionaryArrayBuilderClass *klass)
 {
 }
 
-
 /**
  * garrow_string_dictionary_array_builder_new:
  *
@@ -5291,7 +5120,8 @@ GArrowStringDictionaryArrayBuilder *
 garrow_string_dictionary_array_builder_new(void)
 {
   // We can use arrow:int8() for the index type of the following arrow_dict_type
-  // because arrow::MakeBuilder creates a dictionary builder with arrow::AdaptiveIntBuilder.
+  // because arrow::MakeBuilder creates a dictionary builder with
+  // arrow::AdaptiveIntBuilder.
   auto arrow_dict_type = arrow::dictionary(arrow::int8(), arrow::utf8());
   auto builder = garrow_array_builder_new(arrow_dict_type,
                                           nullptr,
@@ -5312,8 +5142,8 @@ garrow_string_dictionary_array_builder_new(void)
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_string_dictionary_array_builder_append_null(GArrowStringDictionaryArrayBuilder *builder,
-                                                   GError **error)
+garrow_string_dictionary_array_builder_append_null(
+  GArrowStringDictionaryArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -5329,15 +5159,12 @@ garrow_string_dictionary_array_builder_append_null(GArrowStringDictionaryArrayBu
  * Since: 2.0.0
  */
 gboolean
-garrow_string_dictionary_array_builder_append_string(GArrowStringDictionaryArrayBuilder *builder,
-                                                     const gchar *value,
-                                                     GError **error)
+garrow_string_dictionary_array_builder_append_string(
+  GArrowStringDictionaryArrayBuilder *builder, const gchar *value, GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::StringDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
-  auto status = arrow_builder->Append(value,
-                                      static_cast<guint32>(strlen(value)));
+  auto arrow_builder = std::static_pointer_cast<arrow::StringDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto status = arrow_builder->Append(value, static_cast<guint32>(strlen(value)));
   return garrow_error_check(error,
                             status,
                             "[string-dictionary-array-builder][append-string]");
@@ -5354,13 +5181,11 @@ garrow_string_dictionary_array_builder_append_string(GArrowStringDictionaryArray
  * Since: 2.0.0
  */
 gboolean
-garrow_string_dictionary_array_builder_append_array(GArrowStringDictionaryArrayBuilder *builder,
-                                                    GArrowStringArray *array,
-                                                    GError **error)
+garrow_string_dictionary_array_builder_append_array(
+  GArrowStringDictionaryArrayBuilder *builder, GArrowStringArray *array, GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::StringDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::StringDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto arrow_array = garrow_array_get_raw<arrow::StringType>(GARROW_ARRAY(array));
   auto status = arrow_builder->AppendArray(*arrow_array);
   return garrow_error_check(error,
@@ -5387,25 +5212,28 @@ garrow_string_dictionary_array_builder_append_array(GArrowStringDictionaryArrayB
  * Since: 2.0.0
  */
 gboolean
-garrow_string_dictionary_array_builder_append_indices(GArrowStringDictionaryArrayBuilder *builder,
-                                                      const gint64 *values,
-                                                      gint64 values_length,
-                                                      const gboolean *is_valids,
-                                                      gint64 is_valids_length,
-                                                      GError **error)
+garrow_string_dictionary_array_builder_append_indices(
+  GArrowStringDictionaryArrayBuilder *builder,
+  const gint64 *values,
+  gint64 values_length,
+  const gboolean *is_valids,
+  gint64 is_valids_length,
+  GError **error)
 {
   static const char *context = "[string-dictionary-array-builder][append-indices]";
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::StringDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
-  auto append_function = [&arrow_builder](
-      const gint64 *values,
-      gint64 values_length,
-      const uint8_t *valid_bytes) -> arrow::Status {
+  auto arrow_builder = std::static_pointer_cast<arrow::StringDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto append_function = [&arrow_builder](const gint64 *values,
+                                          gint64 values_length,
+                                          const uint8_t *valid_bytes) -> arrow::Status {
     return arrow_builder->AppendIndices(values, values_length, valid_bytes);
   };
-  return garrow_array_builder_append_values(values, values_length, is_valids,
-                                            is_valids_length, error, context,
+  return garrow_array_builder_append_values(values,
+                                            values_length,
+                                            is_valids,
+                                            is_valids_length,
+                                            error,
+                                            context,
                                             append_function);
 }
 
@@ -5417,11 +5245,12 @@ garrow_string_dictionary_array_builder_append_indices(GArrowStringDictionaryArra
  *
  * Since: 2.0.0
  */
-gint64 garrow_string_dictionary_array_builder_get_dictionary_length(GArrowStringDictionaryArrayBuilder *builder)
+gint64
+garrow_string_dictionary_array_builder_get_dictionary_length(
+  GArrowStringDictionaryArrayBuilder *builder)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::StringDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::StringDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   return arrow_builder->dictionary_length();
 }
 
@@ -5437,15 +5266,15 @@ gint64 garrow_string_dictionary_array_builder_get_dictionary_length(GArrowString
  * Since: 2.0.0
  */
 gboolean
-garrow_string_dictionary_array_builder_finish_delta(GArrowStringDictionaryArrayBuilder* builder,
-                                                    GArrowArray **out_indices,
-                                                    GArrowArray **out_delta,
-                                                    GError **error)
+garrow_string_dictionary_array_builder_finish_delta(
+  GArrowStringDictionaryArrayBuilder *builder,
+  GArrowArray **out_indices,
+  GArrowArray **out_delta,
+  GError **error)
 {
   static const char *context = "[string-dictionary-array-builder][finish-delta]";
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::StringDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::StringDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   std::shared_ptr<arrow::Array> arrow_indices, arrow_delta;
   auto status = arrow_builder->FinishDelta(&arrow_indices, &arrow_delta);
   if (!garrow_error_check(error, status, context)) {
@@ -5467,13 +5296,11 @@ garrow_string_dictionary_array_builder_finish_delta(GArrowStringDictionaryArrayB
  * Since: 2.0.0
  */
 gboolean
-garrow_string_dictionary_array_builder_insert_memo_values(GArrowStringDictionaryArrayBuilder *builder,
-                                                          GArrowStringArray *values,
-                                                          GError **error)
+garrow_string_dictionary_array_builder_insert_memo_values(
+  GArrowStringDictionaryArrayBuilder *builder, GArrowStringArray *values, GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::StringDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::StringDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto arrow_array = garrow_array_get_raw<arrow::StringType>(GARROW_ARRAY(values));
   auto status = arrow_builder->InsertMemoValues(*arrow_array);
   return garrow_error_check(error,
@@ -5490,16 +5317,16 @@ garrow_string_dictionary_array_builder_insert_memo_values(GArrowStringDictionary
  * Since: 2.0.0
  */
 void
-garrow_string_dictionary_array_builder_reset_full(GArrowStringDictionaryArrayBuilder *builder)
+garrow_string_dictionary_array_builder_reset_full(
+  GArrowStringDictionaryArrayBuilder *builder)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::StringDictionaryBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::StringDictionaryBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   arrow_builder->ResetFull();
 }
 
-
-typedef struct GArrowListArrayBuilderPrivate_ {
+typedef struct GArrowListArrayBuilderPrivate_
+{
   GArrowArrayBuilder *value_builder;
 } GArrowListArrayBuilderPrivate;
 
@@ -5507,10 +5334,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowListArrayBuilder,
                            garrow_list_array_builder,
                            GARROW_TYPE_ARRAY_BUILDER)
 
-#define GARROW_LIST_ARRAY_BUILDER_GET_PRIVATE(obj)         \
-  static_cast<GArrowListArrayBuilderPrivate *>(            \
-     garrow_list_array_builder_get_instance_private(       \
-       GARROW_LIST_ARRAY_BUILDER(obj)))
+#define GARROW_LIST_ARRAY_BUILDER_GET_PRIVATE(obj)                                       \
+  static_cast<GArrowListArrayBuilderPrivate *>(                                          \
+    garrow_list_array_builder_get_instance_private(GARROW_LIST_ARRAY_BUILDER(obj)))
 
 static void
 garrow_list_array_builder_dispose(GObject *object)
@@ -5548,8 +5374,7 @@ garrow_list_array_builder_class_init(GArrowListArrayBuilderClass *klass)
  * Returns: A newly created #GArrowListArrayBuilder.
  */
 GArrowListArrayBuilder *
-garrow_list_array_builder_new(GArrowListDataType *data_type,
-                              GError **error)
+garrow_list_array_builder_new(GArrowListDataType *data_type, GError **error)
 {
   if (!GARROW_IS_LIST_DATA_TYPE(data_type)) {
     g_set_error(error,
@@ -5559,11 +5384,9 @@ garrow_list_array_builder_new(GArrowListDataType *data_type,
     return NULL;
   }
 
-  auto arrow_data_type =
-    garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto builder = garrow_array_builder_new(arrow_data_type,
-                                          error,
-                                          "[list-array-builder][new]");
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto builder =
+    garrow_array_builder_new(arrow_data_type, error, "[list-array-builder][new]");
   return GARROW_LIST_ARRAY_BUILDER(builder);
 }
 
@@ -5614,8 +5437,7 @@ garrow_list_array_builder_new(GArrowListDataType *data_type,
  *   Use garrow_list_array_builder_append_value() instead.
  */
 gboolean
-garrow_list_array_builder_append(GArrowListArrayBuilder *builder,
-                                 GError **error)
+garrow_list_array_builder_append(GArrowListArrayBuilder *builder, GError **error)
 {
   return garrow_list_array_builder_append_value(builder, error);
 }
@@ -5666,12 +5488,10 @@ garrow_list_array_builder_append(GArrowListArrayBuilder *builder,
  * Since: 0.12.0
  */
 gboolean
-garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder,
-                                       GError **error)
+garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder, GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::ListBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::ListBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto status = arrow_builder->Append();
   return garrow_error_check(error, status, "[list-array-builder][append-value]");
 }
@@ -5689,8 +5509,7 @@ garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_list_array_builder_append_null(GArrowListArrayBuilder *builder,
-                                      GError **error)
+garrow_list_array_builder_append_null(GArrowListArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -5706,17 +5525,16 @@ garrow_list_array_builder_get_value_builder(GArrowListArrayBuilder *builder)
 {
   auto priv = GARROW_LIST_ARRAY_BUILDER_GET_PRIVATE(builder);
   if (!priv->value_builder) {
-    auto arrow_builder =
-      std::static_pointer_cast<arrow::ListBuilder>(
-        garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+    auto arrow_builder = std::static_pointer_cast<arrow::ListBuilder>(
+      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
     auto arrow_value_builder = arrow_builder->value_builder();
     priv->value_builder = garrow_array_builder_new_raw(arrow_value_builder);
   }
   return priv->value_builder;
 }
 
-
-typedef struct GArrowLargeListArrayBuilderPrivate_ {
+typedef struct GArrowLargeListArrayBuilderPrivate_
+{
   GArrowArrayBuilder *value_builder;
 } GArrowLargeListArrayBuilderPrivate;
 
@@ -5724,10 +5542,10 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowLargeListArrayBuilder,
                            garrow_large_list_array_builder,
                            GARROW_TYPE_ARRAY_BUILDER)
 
-#define GARROW_LARGE_LIST_ARRAY_BUILDER_GET_PRIVATE(obj)        \
-  static_cast<GArrowLargeListArrayBuilderPrivate *>(            \
-     garrow_large_list_array_builder_get_instance_private(      \
-       GARROW_LARGE_LIST_ARRAY_BUILDER(obj)))
+#define GARROW_LARGE_LIST_ARRAY_BUILDER_GET_PRIVATE(obj)                                 \
+  static_cast<GArrowLargeListArrayBuilderPrivate *>(                                     \
+    garrow_large_list_array_builder_get_instance_private(                                \
+      GARROW_LARGE_LIST_ARRAY_BUILDER(obj)))
 
 static void
 garrow_large_list_array_builder_dispose(GObject *object)
@@ -5765,8 +5583,7 @@ garrow_large_list_array_builder_class_init(GArrowLargeListArrayBuilderClass *kla
  * Since: 0.16.0
  */
 GArrowLargeListArrayBuilder *
-garrow_large_list_array_builder_new(GArrowLargeListDataType *data_type,
-                                    GError **error)
+garrow_large_list_array_builder_new(GArrowLargeListDataType *data_type, GError **error)
 {
   if (!GARROW_IS_LARGE_LIST_DATA_TYPE(data_type)) {
     g_set_error(error,
@@ -5776,11 +5593,9 @@ garrow_large_list_array_builder_new(GArrowLargeListDataType *data_type,
     return NULL;
   }
 
-  auto arrow_data_type =
-    garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto builder = garrow_array_builder_new(arrow_data_type,
-                                          error,
-                                          "[large-list-array-builder][new]");
+  auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  auto builder =
+    garrow_array_builder_new(arrow_data_type, error, "[large-list-array-builder][new]");
   return GARROW_LARGE_LIST_ARRAY_BUILDER(builder);
 }
 
@@ -5803,9 +5618,8 @@ gboolean
 garrow_large_list_array_builder_append_value(GArrowLargeListArrayBuilder *builder,
                                              GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::LargeListBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::LargeListBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto status = arrow_builder->Append();
   return garrow_error_check(error, status, "[large-list-array-builder][append-value]");
 }
@@ -5844,24 +5658,21 @@ garrow_large_list_array_builder_get_value_builder(GArrowLargeListArrayBuilder *b
 {
   auto priv = GARROW_LARGE_LIST_ARRAY_BUILDER_GET_PRIVATE(builder);
   if (!priv->value_builder) {
-    auto arrow_builder =
-      std::static_pointer_cast<arrow::LargeListBuilder>(
-        garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+    auto arrow_builder = std::static_pointer_cast<arrow::LargeListBuilder>(
+      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
     auto arrow_value_builder = arrow_builder->value_builder();
     priv->value_builder = garrow_array_builder_new_raw(arrow_value_builder);
   }
   return priv->value_builder;
 }
 
-
 G_DEFINE_TYPE(GArrowStructArrayBuilder,
               garrow_struct_array_builder,
               GARROW_TYPE_ARRAY_BUILDER)
 
-#define GARROW_STRUCT_ARRAY_BUILDER_GET_PRIVATE(obj)         \
-  static_cast<GArrowStructArrayBuilderPrivate *>(            \
-     garrow_struct_array_builder_get_instance_private(       \
-       GARROW_STRUCT_ARRAY_BUILDER(obj)))
+#define GARROW_STRUCT_ARRAY_BUILDER_GET_PRIVATE(obj)                                     \
+  static_cast<GArrowStructArrayBuilderPrivate *>(                                        \
+    garrow_struct_array_builder_get_instance_private(GARROW_STRUCT_ARRAY_BUILDER(obj)))
 
 static void
 garrow_struct_array_builder_init(GArrowStructArrayBuilder *builder)
@@ -5881,8 +5692,7 @@ garrow_struct_array_builder_class_init(GArrowStructArrayBuilderClass *klass)
  * Returns: A newly created #GArrowStructArrayBuilder.
  */
 GArrowStructArrayBuilder *
-garrow_struct_array_builder_new(GArrowStructDataType *data_type,
-                                GError **error)
+garrow_struct_array_builder_new(GArrowStructDataType *data_type, GError **error)
 {
   if (!GARROW_IS_STRUCT_DATA_TYPE(data_type)) {
     g_set_error(error,
@@ -5893,9 +5703,8 @@ garrow_struct_array_builder_new(GArrowStructDataType *data_type,
   }
 
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto builder = garrow_array_builder_new(arrow_data_type,
-                                          error,
-                                          "[struct-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow_data_type, error, "[struct-array-builder][new]");
   return GARROW_STRUCT_ARRAY_BUILDER(builder);
 }
 
@@ -5921,8 +5730,7 @@ garrow_struct_array_builder_new(GArrowStructDataType *data_type,
  *   Use garrow_struct_array_builder_append_value() instead.
  */
 gboolean
-garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder,
-                                   GError **error)
+garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder, GError **error)
 {
   return garrow_struct_array_builder_append_value(builder, error);
 }
@@ -5951,13 +5759,10 @@ gboolean
 garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder,
                                          GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::StructBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::StructBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto status = arrow_builder->Append();
-  return garrow_error_check(error,
-                            status,
-                            "[struct-array-builder][append-value]");
+  return garrow_error_check(error, status, "[struct-array-builder][append-value]");
 }
 
 /**
@@ -5973,8 +5778,7 @@ garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder,
-                                        GError **error)
+garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -5990,8 +5794,7 @@ garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder,
  *   Use garrow_array_builder_get_child() instead.
  */
 GArrowArrayBuilder *
-garrow_struct_array_builder_get_field_builder(GArrowStructArrayBuilder *builder,
-                                              gint i)
+garrow_struct_array_builder_get_field_builder(GArrowStructArrayBuilder *builder, gint i)
 {
   return garrow_array_builder_get_child(GARROW_ARRAY_BUILDER(builder), i);
 }
@@ -6012,8 +5815,8 @@ garrow_struct_array_builder_get_field_builders(GArrowStructArrayBuilder *builder
   return garrow_array_builder_get_children(GARROW_ARRAY_BUILDER(builder));
 }
 
-
-typedef struct GArrowMapArrayBuilderPrivate_ {
+typedef struct GArrowMapArrayBuilderPrivate_
+{
   GArrowArrayBuilder *key_builder;
   GArrowArrayBuilder *item_builder;
   GArrowArrayBuilder *value_builder;
@@ -6023,10 +5826,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowMapArrayBuilder,
                            garrow_map_array_builder,
                            GARROW_TYPE_ARRAY_BUILDER)
 
-#define GARROW_MAP_ARRAY_BUILDER_GET_PRIVATE(object)      \
-  static_cast<GArrowMapArrayBuilderPrivate *>(            \
-     garrow_map_array_builder_get_instance_private(       \
-       GARROW_MAP_ARRAY_BUILDER(object)))
+#define GARROW_MAP_ARRAY_BUILDER_GET_PRIVATE(object)                                     \
+  static_cast<GArrowMapArrayBuilderPrivate *>(                                           \
+    garrow_map_array_builder_get_instance_private(GARROW_MAP_ARRAY_BUILDER(object)))
 
 static void
 garrow_map_array_builder_dispose(GObject *object)
@@ -6075,8 +5877,7 @@ garrow_map_array_builder_class_init(GArrowMapArrayBuilderClass *klass)
  * Since: 0.17.0
  */
 GArrowMapArrayBuilder *
-garrow_map_array_builder_new(GArrowMapDataType *data_type,
-                             GError **error)
+garrow_map_array_builder_new(GArrowMapDataType *data_type, GError **error)
 {
   if (!GARROW_IS_MAP_DATA_TYPE(data_type)) {
     g_set_error(error,
@@ -6087,9 +5888,8 @@ garrow_map_array_builder_new(GArrowMapDataType *data_type,
   }
 
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto builder = garrow_array_builder_new(arrow_data_type,
-                                          error,
-                                          "[map-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow_data_type, error, "[map-array-builder][new]");
   if (builder) {
     return GARROW_MAP_ARRAY_BUILDER(builder);
   } else {
@@ -6107,16 +5907,12 @@ garrow_map_array_builder_new(GArrowMapDataType *data_type,
  * Since: 0.17.0
  */
 gboolean
-garrow_map_array_builder_append_value(GArrowMapArrayBuilder *builder,
-                                      GError **error)
+garrow_map_array_builder_append_value(GArrowMapArrayBuilder *builder, GError **error)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::MapBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::MapBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto status = arrow_builder->Append();
-  return garrow::check(error,
-                       status,
-                       "[map-array-builder][append-value]");
+  return garrow::check(error, status, "[map-array-builder][append-value]");
 }
 
 /**
@@ -6146,14 +5942,14 @@ garrow_map_array_builder_append_values(GArrowMapArrayBuilder *builder,
                                        gint64 is_valids_length,
                                        GError **error)
 {
-  return garrow_array_builder_append_values<arrow::MapBuilder>
-    (GARROW_ARRAY_BUILDER(builder),
-     reinterpret_cast<const int32_t *>(offsets),
-     offsets_length,
-     is_valids,
-     is_valids_length,
-     error,
-     "[map-array-builder][append-values]");
+  return garrow_array_builder_append_values<arrow::MapBuilder>(
+    GARROW_ARRAY_BUILDER(builder),
+    reinterpret_cast<const int32_t *>(offsets),
+    offsets_length,
+    is_valids,
+    is_valids_length,
+    error,
+    "[map-array-builder][append-values]");
 }
 
 /**
@@ -6169,8 +5965,7 @@ garrow_map_array_builder_append_values(GArrowMapArrayBuilder *builder,
  *   Use garrow_array_builder_append_null() instead.
  */
 gboolean
-garrow_map_array_builder_append_null(GArrowMapArrayBuilder *builder,
-                                     GError **error)
+garrow_map_array_builder_append_null(GArrowMapArrayBuilder *builder, GError **error)
 {
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
@@ -6196,9 +5991,7 @@ garrow_map_array_builder_append_nulls(GArrowMapArrayBuilder *builder,
                                       gint64 n,
                                       GError **error)
 {
-  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder),
-                                           n,
-                                           error);
+  return garrow_array_builder_append_nulls(GARROW_ARRAY_BUILDER(builder), n, error);
 }
 
 /**
@@ -6214,9 +6007,8 @@ garrow_map_array_builder_get_key_builder(GArrowMapArrayBuilder *builder)
 {
   auto priv = GARROW_MAP_ARRAY_BUILDER_GET_PRIVATE(builder);
   if (!priv->key_builder) {
-    auto arrow_builder =
-      std::static_pointer_cast<arrow::MapBuilder>(
-        garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+    auto arrow_builder = std::static_pointer_cast<arrow::MapBuilder>(
+      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
     auto arrow_key_builder = arrow_builder->key_builder();
     priv->key_builder = garrow_array_builder_new_raw(arrow_key_builder);
   }
@@ -6236,9 +6028,8 @@ garrow_map_array_builder_get_item_builder(GArrowMapArrayBuilder *builder)
 {
   auto priv = GARROW_MAP_ARRAY_BUILDER_GET_PRIVATE(builder);
   if (!priv->item_builder) {
-    auto arrow_builder =
-      std::static_pointer_cast<arrow::MapBuilder>(
-        garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+    auto arrow_builder = std::static_pointer_cast<arrow::MapBuilder>(
+      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
     auto arrow_item_builder = arrow_builder->item_builder();
     priv->item_builder = garrow_array_builder_new_raw(arrow_item_builder);
   }
@@ -6261,16 +6052,14 @@ garrow_map_array_builder_get_value_builder(GArrowMapArrayBuilder *builder)
 {
   auto priv = GARROW_MAP_ARRAY_BUILDER_GET_PRIVATE(builder);
   if (!priv->value_builder) {
-    auto arrow_builder =
-      std::static_pointer_cast<arrow::MapBuilder>(
-        garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+    auto arrow_builder = std::static_pointer_cast<arrow::MapBuilder>(
+      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
     auto arrow_value_builder = arrow_builder->value_builder();
     priv->value_builder = garrow_array_builder_new_raw(arrow_value_builder);
   }
   return priv->value_builder;
 }
 
-
 G_DEFINE_TYPE(GArrowDecimal128ArrayBuilder,
               garrow_decimal128_array_builder,
               GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER)
@@ -6297,9 +6086,8 @@ GArrowDecimal128ArrayBuilder *
 garrow_decimal128_array_builder_new(GArrowDecimal128DataType *data_type)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto builder = garrow_array_builder_new(arrow_data_type,
-                                          NULL,
-                                          "[decimal128-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow_data_type, NULL, "[decimal128-array-builder][new]");
   return GARROW_DECIMAL128_ARRAY_BUILDER(builder);
 }
 
@@ -6341,14 +6129,13 @@ garrow_decimal128_array_builder_append_value(GArrowDecimal128ArrayBuilder *build
 {
   if (value) {
     auto arrow_decimal = garrow_decimal128_get_raw(value);
-    return garrow_array_builder_append_value<arrow::Decimal128Builder>
-      (GARROW_ARRAY_BUILDER(builder),
-       *arrow_decimal,
-       error,
-       "[decimal128-array-builder][append-value]");
+    return garrow_array_builder_append_value<arrow::Decimal128Builder>(
+      GARROW_ARRAY_BUILDER(builder),
+      *arrow_decimal,
+      error,
+      "[decimal128-array-builder][append-value]");
   } else {
-    return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder),
-                                            error);
+    return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
   }
 }
 
@@ -6372,13 +6159,12 @@ garrow_decimal128_array_builder_append_value(GArrowDecimal128ArrayBuilder *build
  * Since: 3.0.0
  */
 gboolean
-garrow_decimal128_array_builder_append_values(
-  GArrowDecimal128ArrayBuilder *builder,
-  GArrowDecimal128 **values,
-  gint64 values_length,
-  const gboolean *is_valids,
-  gint64 is_valids_length,
-  GError **error)
+garrow_decimal128_array_builder_append_values(GArrowDecimal128ArrayBuilder *builder,
+                                              GArrowDecimal128 **values,
+                                              gint64 values_length,
+                                              const gboolean *is_valids,
+                                              gint64 is_valids_length,
+                                              GError **error)
 {
   return garrow_array_builder_append_values(
     GARROW_ARRAY_BUILDER(builder),
@@ -6415,7 +6201,6 @@ garrow_decimal128_array_builder_append_null(GArrowDecimal128ArrayBuilder *builde
   return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
 }
 
-
 G_DEFINE_TYPE(GArrowDecimal256ArrayBuilder,
               garrow_decimal256_array_builder,
               GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER)
@@ -6442,9 +6227,8 @@ GArrowDecimal256ArrayBuilder *
 garrow_decimal256_array_builder_new(GArrowDecimal256DataType *data_type)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto builder = garrow_array_builder_new(arrow_data_type,
-                                          NULL,
-                                          "[decimal256-array-builder][new]");
+  auto builder =
+    garrow_array_builder_new(arrow_data_type, NULL, "[decimal256-array-builder][new]");
   return GARROW_DECIMAL256_ARRAY_BUILDER(builder);
 }
 
@@ -6465,14 +6249,13 @@ garrow_decimal256_array_builder_append_value(GArrowDecimal256ArrayBuilder *build
 {
   if (value) {
     auto arrow_decimal = garrow_decimal256_get_raw(value);
-    return garrow_array_builder_append_value<arrow::Decimal256Builder>
-      (GARROW_ARRAY_BUILDER(builder),
-       *arrow_decimal,
-       error,
-       "[decimal256-array-builder][append-value]");
+    return garrow_array_builder_append_value<arrow::Decimal256Builder>(
+      GARROW_ARRAY_BUILDER(builder),
+      *arrow_decimal,
+      error,
+      "[decimal256-array-builder][append-value]");
   } else {
-    return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder),
-                                            error);
+    return garrow_array_builder_append_null(GARROW_ARRAY_BUILDER(builder), error);
   }
 }
 
@@ -6496,13 +6279,12 @@ garrow_decimal256_array_builder_append_value(GArrowDecimal256ArrayBuilder *build
  * Since: 3.0.0
  */
 gboolean
-garrow_decimal256_array_builder_append_values(
-  GArrowDecimal256ArrayBuilder *builder,
-  GArrowDecimal256 **values,
-  gint64 values_length,
-  const gboolean *is_valids,
-  gint64 is_valids_length,
-  GError **error)
+garrow_decimal256_array_builder_append_values(GArrowDecimal256ArrayBuilder *builder,
+                                              GArrowDecimal256 **values,
+                                              gint64 values_length,
+                                              const gboolean *is_valids,
+                                              gint64 is_valids_length,
+                                              GError **error)
 {
   return garrow_array_builder_append_values(
     GARROW_ARRAY_BUILDER(builder),
@@ -6518,7 +6300,6 @@ garrow_decimal256_array_builder_append_values(
     });
 }
 
-
 G_DEFINE_ABSTRACT_TYPE(GArrowUnionArrayBuilder,
                        garrow_union_array_builder,
                        GARROW_TYPE_ARRAY_BUILDER)
@@ -6548,9 +6329,8 @@ garrow_union_array_builder_append_child(GArrowUnionArrayBuilder *builder,
                                         GArrowArrayBuilder *child,
                                         const gchar *field_name)
 {
-  auto arrow_builder =
-    std::static_pointer_cast<arrow::BasicUnionBuilder>(
-      garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
+  auto arrow_builder = std::static_pointer_cast<arrow::BasicUnionBuilder>(
+    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder)));
   auto arrow_child = garrow_array_builder_get_raw(child);
   if (!field_name) {
     field_name = "";
@@ -6584,8 +6364,7 @@ garrow_union_array_builder_append_value(GArrowUnionArrayBuilder *builder,
                                         gint8 value,
                                         GError **error)
 {
-  auto arrow_builder =
-    garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder));
+  auto arrow_builder = garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder));
   arrow::Status status;
   if (GARROW_IS_DENSE_UNION_ARRAY_BUILDER(builder)) {
     auto arrow_union_builder =
@@ -6596,12 +6375,9 @@ garrow_union_array_builder_append_value(GArrowUnionArrayBuilder *builder,
       std::static_pointer_cast<arrow::SparseUnionBuilder>(arrow_builder);
     status = arrow_union_builder->Append(value);
   }
-  return garrow_error_check(error,
-                            status,
-                            "[union-array-builder][append-value]");
+  return garrow_error_check(error, status, "[union-array-builder][append-value]");
 }
 
-
 G_END_DECLS
 template <typename BUILDER>
 GArrowArrayBuilder *
@@ -6615,9 +6391,8 @@ garrow_union_array_builder_new(GArrowUnionDataType *data_type,
     return garrow_array_builder_new(arrow_data_type, error, context);
   } else {
     auto memory_pool = arrow::default_memory_pool();
-    auto arrow_builder =
-      std::static_pointer_cast<arrow::ArrayBuilder>(
-        std::make_shared<BUILDER>(memory_pool));
+    auto arrow_builder = std::static_pointer_cast<arrow::ArrayBuilder>(
+      std::make_shared<BUILDER>(memory_pool));
     return garrow_array_builder_new_raw(&arrow_builder, builder_gtype);
   }
 }
@@ -6651,22 +6426,19 @@ garrow_dense_union_array_builder_class_init(GArrowDenseUnionArrayBuilderClass *k
  * Since: 12.0.0
  */
 GArrowDenseUnionArrayBuilder *
-garrow_dense_union_array_builder_new(GArrowDenseUnionDataType *data_type,
-                                     GError **error)
+garrow_dense_union_array_builder_new(GArrowDenseUnionDataType *data_type, GError **error)
 {
-  auto builder =
-    garrow_union_array_builder_new<arrow::DenseUnionBuilder>(
-      GARROW_UNION_DATA_TYPE(data_type),
-      GARROW_TYPE_DENSE_UNION_ARRAY_BUILDER,
-      "[dense-union-array-builder][new]",
-      error);
+  auto builder = garrow_union_array_builder_new<arrow::DenseUnionBuilder>(
+    GARROW_UNION_DATA_TYPE(data_type),
+    GARROW_TYPE_DENSE_UNION_ARRAY_BUILDER,
+    "[dense-union-array-builder][new]",
+    error);
   if (!builder) {
     return nullptr;
   }
   return GARROW_DENSE_UNION_ARRAY_BUILDER(builder);
 }
 
-
 G_DEFINE_TYPE(GArrowSparseUnionArrayBuilder,
               garrow_sparse_union_array_builder,
               GARROW_TYPE_UNION_ARRAY_BUILDER)
@@ -6696,21 +6468,19 @@ garrow_sparse_union_array_builder_class_init(GArrowSparseUnionArrayBuilderClass
  */
 GArrowSparseUnionArrayBuilder *
 garrow_sparse_union_array_builder_new(GArrowSparseUnionDataType *data_type,
-                                     GError **error)
+                                      GError **error)
 {
-  auto builder =
-    garrow_union_array_builder_new<arrow::SparseUnionBuilder>(
-      GARROW_UNION_DATA_TYPE(data_type),
-      GARROW_TYPE_SPARSE_UNION_ARRAY_BUILDER,
-      "[sparse-union-array-builder][new]",
-      error);
+  auto builder = garrow_union_array_builder_new<arrow::SparseUnionBuilder>(
+    GARROW_UNION_DATA_TYPE(data_type),
+    GARROW_TYPE_SPARSE_UNION_ARRAY_BUILDER,
+    "[sparse-union-array-builder][new]",
+    error);
   if (!builder) {
     return nullptr;
   }
   return GARROW_SPARSE_UNION_ARRAY_BUILDER(builder);
 }
 
-
 G_END_DECLS
 
 GArrowArrayBuilder *
@@ -6818,18 +6588,17 @@ garrow_array_builder_new_raw(std::shared_ptr<arrow::ArrayBuilder> *arrow_builder
     case arrow::Type::type::DICTIONARY:
       {
         auto dict_type =
-          std::static_pointer_cast<arrow::DictionaryType>(
-            (*arrow_builder)->type());
+          std::static_pointer_cast<arrow::DictionaryType>((*arrow_builder)->type());
         switch (dict_type->value_type()->id()) {
-          case arrow::Type::type::BINARY:
-            type = GARROW_TYPE_BINARY_DICTIONARY_ARRAY_BUILDER;
-            break;
-          case arrow::Type::type::STRING:
-            type = GARROW_TYPE_STRING_DICTIONARY_ARRAY_BUILDER;
-            break;
-          default:
-            type = GARROW_TYPE_ARRAY_BUILDER;
-            break;
+        case arrow::Type::type::BINARY:
+          type = GARROW_TYPE_BINARY_DICTIONARY_ARRAY_BUILDER;
+          break;
+        case arrow::Type::type::STRING:
+          type = GARROW_TYPE_STRING_DICTIONARY_ARRAY_BUILDER;
+          break;
+        default:
+          type = GARROW_TYPE_ARRAY_BUILDER;
+          break;
         }
       }
       break;
@@ -6846,18 +6615,18 @@ garrow_array_builder_new_raw(std::shared_ptr<arrow::ArrayBuilder> *arrow_builder
   }
 
   auto builder =
-    GARROW_ARRAY_BUILDER(g_object_new(type,
-                                      "array-builder", arrow_builder,
-                                      NULL));
+    GARROW_ARRAY_BUILDER(g_object_new(type, "array-builder", arrow_builder, NULL));
   return builder;
 }
 
 GArrowArrayBuilder *
-garrow_array_builder_new_raw(arrow::ArrayBuilder *arrow_builder,
-                             GType type)
+garrow_array_builder_new_raw(arrow::ArrayBuilder *arrow_builder, GType type)
 {
-  struct NothingDeleter {
-    void operator()(arrow::ArrayBuilder *arrow_builder) {
+  struct NothingDeleter
+  {
+    void
+    operator()(arrow::ArrayBuilder *arrow_builder)
+    {
     }
   };
   std::shared_ptr<arrow::ArrayBuilder> arrow_shared_builder(arrow_builder,
diff --git a/c_glib/arrow-glib/array-builder.h b/c_glib/arrow-glib/array-builder.h
index 56b4cc71b6f89..8a1385b9b8c1b 100644
--- a/c_glib/arrow-glib/array-builder.h
+++ b/c_glib/arrow-glib/array-builder.h
@@ -26,11 +26,8 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_ARRAY_BUILDER (garrow_array_builder_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowArrayBuilder,
-                         garrow_array_builder,
-                         GARROW,
-                         ARRAY_BUILDER,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowArrayBuilder, garrow_array_builder, GARROW, ARRAY_BUILDER, GObject)
 struct _GArrowArrayBuilderClass
 {
   GObjectClass parent_class;
@@ -38,51 +35,55 @@ struct _GArrowArrayBuilderClass
 
 GArrowDataType *
 garrow_array_builder_get_value_data_type(GArrowArrayBuilder *builder);
-GArrowType garrow_array_builder_get_value_type(GArrowArrayBuilder *builder);
+GArrowType
+garrow_array_builder_get_value_type(GArrowArrayBuilder *builder);
 
-GArrowArray *garrow_array_builder_finish(GArrowArrayBuilder *builder,
-                                         GError **error);
+GArrowArray *
+garrow_array_builder_finish(GArrowArrayBuilder *builder, GError **error);
 
 GARROW_AVAILABLE_IN_2_0
-void garrow_array_builder_reset(GArrowArrayBuilder *builder);
+void
+garrow_array_builder_reset(GArrowArrayBuilder *builder);
 
 GARROW_AVAILABLE_IN_2_0
-gint64 garrow_array_builder_get_capacity(GArrowArrayBuilder *builder);
+gint64
+garrow_array_builder_get_capacity(GArrowArrayBuilder *builder);
 GARROW_AVAILABLE_IN_2_0
-gint64 garrow_array_builder_get_length(GArrowArrayBuilder *builder);
+gint64
+garrow_array_builder_get_length(GArrowArrayBuilder *builder);
 GARROW_AVAILABLE_IN_2_0
-gint64 garrow_array_builder_get_n_nulls(GArrowArrayBuilder *builder);
+gint64
+garrow_array_builder_get_n_nulls(GArrowArrayBuilder *builder);
 GARROW_AVAILABLE_IN_12_0
 GArrowArrayBuilder *
-garrow_array_builder_get_child(GArrowArrayBuilder *builder,
-                               gint i);
+garrow_array_builder_get_child(GArrowArrayBuilder *builder, gint i);
 GARROW_AVAILABLE_IN_12_0
 GList *
 garrow_array_builder_get_children(GArrowArrayBuilder *builder);
 
 GARROW_AVAILABLE_IN_2_0
-gboolean garrow_array_builder_resize(GArrowArrayBuilder *builder,
-                                     gint64 capacity,
-                                     GError **error);
+gboolean
+garrow_array_builder_resize(GArrowArrayBuilder *builder, gint64 capacity, GError **error);
 GARROW_AVAILABLE_IN_2_0
-gboolean garrow_array_builder_reserve(GArrowArrayBuilder *builder,
-                                      gint64 additional_capacity,
-                                      GError **error);
+gboolean
+garrow_array_builder_reserve(GArrowArrayBuilder *builder,
+                             gint64 additional_capacity,
+                             GError **error);
 
 GARROW_AVAILABLE_IN_3_0
-gboolean garrow_array_builder_append_null(GArrowArrayBuilder *builder,
-                                          GError **error);
+gboolean
+garrow_array_builder_append_null(GArrowArrayBuilder *builder, GError **error);
 GARROW_AVAILABLE_IN_3_0
-gboolean garrow_array_builder_append_nulls(GArrowArrayBuilder *builder,
-                                           gint64 n,
-                                           GError **error);
+gboolean
+garrow_array_builder_append_nulls(GArrowArrayBuilder *builder, gint64 n, GError **error);
 GARROW_AVAILABLE_IN_3_0
-gboolean garrow_array_builder_append_empty_value(GArrowArrayBuilder *builder,
-                                                 GError **error);
+gboolean
+garrow_array_builder_append_empty_value(GArrowArrayBuilder *builder, GError **error);
 GARROW_AVAILABLE_IN_3_0
-gboolean garrow_array_builder_append_empty_values(GArrowArrayBuilder *builder,
-                                                  gint64 n,
-                                                  GError **error);
+gboolean
+garrow_array_builder_append_empty_values(GArrowArrayBuilder *builder,
+                                         gint64 n,
+                                         GError **error);
 
 #define GARROW_TYPE_NULL_ARRAY_BUILDER (garrow_null_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowNullArrayBuilder,
@@ -96,23 +97,23 @@ struct _GArrowNullArrayBuilderClass
 };
 
 GARROW_AVAILABLE_IN_0_13
-GArrowNullArrayBuilder *garrow_null_array_builder_new(void);
+GArrowNullArrayBuilder *
+garrow_null_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
 GARROW_AVAILABLE_IN_0_13
-gboolean garrow_null_array_builder_append_null(GArrowNullArrayBuilder *builder,
-                                               GError **error);
+gboolean
+garrow_null_array_builder_append_null(GArrowNullArrayBuilder *builder, GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
 GARROW_AVAILABLE_IN_0_13
-gboolean garrow_null_array_builder_append_nulls(GArrowNullArrayBuilder *builder,
-                                                gint64 n,
-                                                GError **error);
+gboolean
+garrow_null_array_builder_append_nulls(GArrowNullArrayBuilder *builder,
+                                       gint64 n,
+                                       GError **error);
 #endif
 
-
-#define GARROW_TYPE_BOOLEAN_ARRAY_BUILDER       \
-  (garrow_boolean_array_builder_get_type())
+#define GARROW_TYPE_BOOLEAN_ARRAY_BUILDER (garrow_boolean_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowBooleanArrayBuilder,
                          garrow_boolean_array_builder,
                          GARROW,
@@ -123,35 +124,40 @@ struct _GArrowBooleanArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowBooleanArrayBuilder *garrow_boolean_array_builder_new(void);
+GArrowBooleanArrayBuilder *
+garrow_boolean_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_boolean_array_builder_append_value)
-gboolean garrow_boolean_array_builder_append(GArrowBooleanArrayBuilder *builder,
-                                             gboolean value,
-                                             GError **error);
+gboolean
+garrow_boolean_array_builder_append(GArrowBooleanArrayBuilder *builder,
+                                    gboolean value,
+                                    GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_boolean_array_builder_append_value(GArrowBooleanArrayBuilder *builder,
-                                                   gboolean value,
-                                                   GError **error);
-gboolean garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder,
-                                                    const gboolean *values,
-                                                    gint64 values_length,
-                                                    const gboolean *is_valids,
-                                                    gint64 is_valids_length,
-                                                    GError **error);
+gboolean
+garrow_boolean_array_builder_append_value(GArrowBooleanArrayBuilder *builder,
+                                          gboolean value,
+                                          GError **error);
+gboolean
+garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder,
+                                           const gboolean *values,
+                                           gint64 values_length,
+                                           const gboolean *is_valids,
+                                           gint64 is_valids_length,
+                                           GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_boolean_array_builder_append_null(GArrowBooleanArrayBuilder *builder,
-                                                  GError **error);
+gboolean
+garrow_boolean_array_builder_append_null(GArrowBooleanArrayBuilder *builder,
+                                         GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_boolean_array_builder_append_nulls(GArrowBooleanArrayBuilder *builder,
-                                                   gint64 n,
-                                                   GError **error);
+gboolean
+garrow_boolean_array_builder_append_nulls(GArrowBooleanArrayBuilder *builder,
+                                          gint64 n,
+                                          GError **error);
 #endif
 
-
 #define GARROW_TYPE_INT_ARRAY_BUILDER (garrow_int_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowIntArrayBuilder,
                          garrow_int_array_builder,
@@ -163,35 +169,39 @@ struct _GArrowIntArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowIntArrayBuilder *garrow_int_array_builder_new(void);
+GArrowIntArrayBuilder *
+garrow_int_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_int_array_builder_append_value)
-gboolean garrow_int_array_builder_append(GArrowIntArrayBuilder *builder,
-                                         gint64 value,
-                                         GError **error);
+gboolean
+garrow_int_array_builder_append(GArrowIntArrayBuilder *builder,
+                                gint64 value,
+                                GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_int_array_builder_append_value(GArrowIntArrayBuilder *builder,
-                                               gint64 value,
-                                               GError **error);
-gboolean garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder,
-                                                const gint64 *values,
-                                                gint64 values_length,
-                                                const gboolean *is_valids,
-                                                gint64 is_valids_length,
-                                                GError **error);
+gboolean
+garrow_int_array_builder_append_value(GArrowIntArrayBuilder *builder,
+                                      gint64 value,
+                                      GError **error);
+gboolean
+garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder,
+                                       const gint64 *values,
+                                       gint64 values_length,
+                                       const gboolean *is_valids,
+                                       gint64 is_valids_length,
+                                       GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_int_array_builder_append_null(GArrowIntArrayBuilder *builder,
-                                              GError **error);
+gboolean
+garrow_int_array_builder_append_null(GArrowIntArrayBuilder *builder, GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_int_array_builder_append_nulls(GArrowIntArrayBuilder *builder,
-                                               gint64 n,
-                                               GError **error);
+gboolean
+garrow_int_array_builder_append_nulls(GArrowIntArrayBuilder *builder,
+                                      gint64 n,
+                                      GError **error);
 #endif
 
-
 #define GARROW_TYPE_UINT_ARRAY_BUILDER (garrow_uint_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowUIntArrayBuilder,
                          garrow_uint_array_builder,
@@ -203,35 +213,39 @@ struct _GArrowUIntArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowUIntArrayBuilder *garrow_uint_array_builder_new(void);
+GArrowUIntArrayBuilder *
+garrow_uint_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint_array_builder_append_value)
-gboolean garrow_uint_array_builder_append(GArrowUIntArrayBuilder *builder,
-                                          guint64 value,
-                                          GError **error);
+gboolean
+garrow_uint_array_builder_append(GArrowUIntArrayBuilder *builder,
+                                 guint64 value,
+                                 GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_uint_array_builder_append_value(GArrowUIntArrayBuilder *builder,
-                                                guint64 value,
-                                                GError **error);
-gboolean garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder,
-                                                 const guint64 *values,
-                                                 gint64 values_length,
-                                                 const gboolean *is_valids,
-                                                 gint64 is_valids_length,
-                                                 GError **error);
+gboolean
+garrow_uint_array_builder_append_value(GArrowUIntArrayBuilder *builder,
+                                       guint64 value,
+                                       GError **error);
+gboolean
+garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder,
+                                        const guint64 *values,
+                                        gint64 values_length,
+                                        const gboolean *is_valids,
+                                        gint64 is_valids_length,
+                                        GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_uint_array_builder_append_null(GArrowUIntArrayBuilder *builder,
-                                               GError **error);
+gboolean
+garrow_uint_array_builder_append_null(GArrowUIntArrayBuilder *builder, GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_uint_array_builder_append_nulls(GArrowUIntArrayBuilder *builder,
-                                                gint64 n,
-                                                GError **error);
+gboolean
+garrow_uint_array_builder_append_nulls(GArrowUIntArrayBuilder *builder,
+                                       gint64 n,
+                                       GError **error);
 #endif
 
-
 #define GARROW_TYPE_INT8_ARRAY_BUILDER (garrow_int8_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowInt8ArrayBuilder,
                          garrow_int8_array_builder,
@@ -243,35 +257,39 @@ struct _GArrowInt8ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowInt8ArrayBuilder *garrow_int8_array_builder_new(void);
+GArrowInt8ArrayBuilder *
+garrow_int8_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_int8_array_builder_append_value)
-gboolean garrow_int8_array_builder_append(GArrowInt8ArrayBuilder *builder,
-                                          gint8 value,
-                                          GError **error);
+gboolean
+garrow_int8_array_builder_append(GArrowInt8ArrayBuilder *builder,
+                                 gint8 value,
+                                 GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_int8_array_builder_append_value(GArrowInt8ArrayBuilder *builder,
-                                                gint8 value,
-                                                GError **error);
-gboolean garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder,
-                                                 const gint8 *values,
-                                                 gint64 values_length,
-                                                 const gboolean *is_valids,
-                                                 gint64 is_valids_length,
-                                                 GError **error);
+gboolean
+garrow_int8_array_builder_append_value(GArrowInt8ArrayBuilder *builder,
+                                       gint8 value,
+                                       GError **error);
+gboolean
+garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder,
+                                        const gint8 *values,
+                                        gint64 values_length,
+                                        const gboolean *is_valids,
+                                        gint64 is_valids_length,
+                                        GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_int8_array_builder_append_null(GArrowInt8ArrayBuilder *builder,
-                                               GError **error);
+gboolean
+garrow_int8_array_builder_append_null(GArrowInt8ArrayBuilder *builder, GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_int8_array_builder_append_nulls(GArrowInt8ArrayBuilder *builder,
-                                                gint64 n,
-                                                GError **error);
+gboolean
+garrow_int8_array_builder_append_nulls(GArrowInt8ArrayBuilder *builder,
+                                       gint64 n,
+                                       GError **error);
 #endif
 
-
 #define GARROW_TYPE_UINT8_ARRAY_BUILDER (garrow_uint8_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowUInt8ArrayBuilder,
                          garrow_uint8_array_builder,
@@ -283,35 +301,39 @@ struct _GArrowUInt8ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowUInt8ArrayBuilder *garrow_uint8_array_builder_new(void);
+GArrowUInt8ArrayBuilder *
+garrow_uint8_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint8_array_builder_append_value)
-gboolean garrow_uint8_array_builder_append(GArrowUInt8ArrayBuilder *builder,
-                                           guint8 value,
-                                           GError **error);
+gboolean
+garrow_uint8_array_builder_append(GArrowUInt8ArrayBuilder *builder,
+                                  guint8 value,
+                                  GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_uint8_array_builder_append_value(GArrowUInt8ArrayBuilder *builder,
-                                                 guint8 value,
-                                                 GError **error);
-gboolean garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder,
-                                                  const guint8 *values,
-                                                  gint64 values_length,
-                                                  const gboolean *is_valids,
-                                                  gint64 is_valids_length,
-                                                  GError **error);
+gboolean
+garrow_uint8_array_builder_append_value(GArrowUInt8ArrayBuilder *builder,
+                                        guint8 value,
+                                        GError **error);
+gboolean
+garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder,
+                                         const guint8 *values,
+                                         gint64 values_length,
+                                         const gboolean *is_valids,
+                                         gint64 is_valids_length,
+                                         GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_uint8_array_builder_append_null(GArrowUInt8ArrayBuilder *builder,
-                                                GError **error);
+gboolean
+garrow_uint8_array_builder_append_null(GArrowUInt8ArrayBuilder *builder, GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_uint8_array_builder_append_nulls(GArrowUInt8ArrayBuilder *builder,
-                                                 gint64 n,
-                                                 GError **error);
+gboolean
+garrow_uint8_array_builder_append_nulls(GArrowUInt8ArrayBuilder *builder,
+                                        gint64 n,
+                                        GError **error);
 #endif
 
-
 #define GARROW_TYPE_INT16_ARRAY_BUILDER (garrow_int16_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowInt16ArrayBuilder,
                          garrow_int16_array_builder,
@@ -323,37 +345,40 @@ struct _GArrowInt16ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowInt16ArrayBuilder *garrow_int16_array_builder_new(void);
+GArrowInt16ArrayBuilder *
+garrow_int16_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_int16_array_builder_append_value)
-gboolean garrow_int16_array_builder_append(GArrowInt16ArrayBuilder *builder,
-                                           gint16 value,
-                                           GError **error);
+gboolean
+garrow_int16_array_builder_append(GArrowInt16ArrayBuilder *builder,
+                                  gint16 value,
+                                  GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_int16_array_builder_append_value(GArrowInt16ArrayBuilder *builder,
-                                                 gint16 value,
-                                                 GError **error);
-gboolean garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder,
-                                                  const gint16 *values,
-                                                  gint64 values_length,
-                                                  const gboolean *is_valids,
-                                                  gint64 is_valids_length,
-                                                  GError **error);
+gboolean
+garrow_int16_array_builder_append_value(GArrowInt16ArrayBuilder *builder,
+                                        gint16 value,
+                                        GError **error);
+gboolean
+garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder,
+                                         const gint16 *values,
+                                         gint64 values_length,
+                                         const gboolean *is_valids,
+                                         gint64 is_valids_length,
+                                         GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_int16_array_builder_append_null(GArrowInt16ArrayBuilder *builder,
-                                                GError **error);
+gboolean
+garrow_int16_array_builder_append_null(GArrowInt16ArrayBuilder *builder, GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_int16_array_builder_append_nulls(GArrowInt16ArrayBuilder *builder,
-                                                 gint64 n,
-                                                 GError **error);
+gboolean
+garrow_int16_array_builder_append_nulls(GArrowInt16ArrayBuilder *builder,
+                                        gint64 n,
+                                        GError **error);
 #endif
 
-
-#define GARROW_TYPE_UINT16_ARRAY_BUILDER        \
-  (garrow_uint16_array_builder_get_type())
+#define GARROW_TYPE_UINT16_ARRAY_BUILDER (garrow_uint16_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowUInt16ArrayBuilder,
                          garrow_uint16_array_builder,
                          GARROW,
@@ -364,35 +389,40 @@ struct _GArrowUInt16ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowUInt16ArrayBuilder *garrow_uint16_array_builder_new(void);
+GArrowUInt16ArrayBuilder *
+garrow_uint16_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint16_array_builder_append_value)
-gboolean garrow_uint16_array_builder_append(GArrowUInt16ArrayBuilder *builder,
-                                            guint16 value,
-                                            GError **error);
+gboolean
+garrow_uint16_array_builder_append(GArrowUInt16ArrayBuilder *builder,
+                                   guint16 value,
+                                   GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_uint16_array_builder_append_value(GArrowUInt16ArrayBuilder *builder,
-                                                  guint16 value,
-                                                  GError **error);
-gboolean garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder,
-                                                   const guint16 *values,
-                                                   gint64 values_length,
-                                                   const gboolean *is_valids,
-                                                   gint64 is_valids_length,
-                                                   GError **error);
+gboolean
+garrow_uint16_array_builder_append_value(GArrowUInt16ArrayBuilder *builder,
+                                         guint16 value,
+                                         GError **error);
+gboolean
+garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder,
+                                          const guint16 *values,
+                                          gint64 values_length,
+                                          const gboolean *is_valids,
+                                          gint64 is_valids_length,
+                                          GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_uint16_array_builder_append_null(GArrowUInt16ArrayBuilder *builder,
-                                                 GError **error);
+gboolean
+garrow_uint16_array_builder_append_null(GArrowUInt16ArrayBuilder *builder,
+                                        GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_uint16_array_builder_append_nulls(GArrowUInt16ArrayBuilder *builder,
-                                                  gint64 n,
-                                                  GError **error);
+gboolean
+garrow_uint16_array_builder_append_nulls(GArrowUInt16ArrayBuilder *builder,
+                                         gint64 n,
+                                         GError **error);
 #endif
 
-
 #define GARROW_TYPE_INT32_ARRAY_BUILDER (garrow_int32_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowInt32ArrayBuilder,
                          garrow_int32_array_builder,
@@ -404,37 +434,40 @@ struct _GArrowInt32ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowInt32ArrayBuilder *garrow_int32_array_builder_new(void);
+GArrowInt32ArrayBuilder *
+garrow_int32_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_int32_array_builder_append_value)
-gboolean garrow_int32_array_builder_append(GArrowInt32ArrayBuilder *builder,
-                                           gint32 value,
-                                           GError **error);
+gboolean
+garrow_int32_array_builder_append(GArrowInt32ArrayBuilder *builder,
+                                  gint32 value,
+                                  GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_int32_array_builder_append_value(GArrowInt32ArrayBuilder *builder,
-                                                 gint32 value,
-                                                 GError **error);
-gboolean garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder,
-                                                  const gint32 *values,
-                                                  gint64 values_length,
-                                                  const gboolean *is_valids,
-                                                  gint64 is_valids_length,
-                                                  GError **error);
+gboolean
+garrow_int32_array_builder_append_value(GArrowInt32ArrayBuilder *builder,
+                                        gint32 value,
+                                        GError **error);
+gboolean
+garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder,
+                                         const gint32 *values,
+                                         gint64 values_length,
+                                         const gboolean *is_valids,
+                                         gint64 is_valids_length,
+                                         GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_int32_array_builder_append_null(GArrowInt32ArrayBuilder *builder,
-                                                GError **error);
+gboolean
+garrow_int32_array_builder_append_null(GArrowInt32ArrayBuilder *builder, GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_int32_array_builder_append_nulls(GArrowInt32ArrayBuilder *builder,
-                                                 gint64 n,
-                                                 GError **error);
+gboolean
+garrow_int32_array_builder_append_nulls(GArrowInt32ArrayBuilder *builder,
+                                        gint64 n,
+                                        GError **error);
 #endif
 
-
-#define GARROW_TYPE_UINT32_ARRAY_BUILDER        \
-  (garrow_uint32_array_builder_get_type())
+#define GARROW_TYPE_UINT32_ARRAY_BUILDER (garrow_uint32_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowUInt32ArrayBuilder,
                          garrow_uint32_array_builder,
                          GARROW,
@@ -445,35 +478,40 @@ struct _GArrowUInt32ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowUInt32ArrayBuilder *garrow_uint32_array_builder_new(void);
+GArrowUInt32ArrayBuilder *
+garrow_uint32_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint32_array_builder_append_value)
-gboolean garrow_uint32_array_builder_append(GArrowUInt32ArrayBuilder *builder,
-                                            guint32 value,
-                                            GError **error);
+gboolean
+garrow_uint32_array_builder_append(GArrowUInt32ArrayBuilder *builder,
+                                   guint32 value,
+                                   GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_uint32_array_builder_append_value(GArrowUInt32ArrayBuilder *builder,
-                                                  guint32 value,
-                                                  GError **error);
-gboolean garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder,
-                                                   const guint32 *values,
-                                                   gint64 values_length,
-                                                   const gboolean *is_valids,
-                                                   gint64 is_valids_length,
-                                                   GError **error);
+gboolean
+garrow_uint32_array_builder_append_value(GArrowUInt32ArrayBuilder *builder,
+                                         guint32 value,
+                                         GError **error);
+gboolean
+garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder,
+                                          const guint32 *values,
+                                          gint64 values_length,
+                                          const gboolean *is_valids,
+                                          gint64 is_valids_length,
+                                          GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_uint32_array_builder_append_null(GArrowUInt32ArrayBuilder *builder,
-                                                 GError **error);
+gboolean
+garrow_uint32_array_builder_append_null(GArrowUInt32ArrayBuilder *builder,
+                                        GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_uint32_array_builder_append_nulls(GArrowUInt32ArrayBuilder *builder,
-                                                  gint64 n,
-                                                  GError **error);
+gboolean
+garrow_uint32_array_builder_append_nulls(GArrowUInt32ArrayBuilder *builder,
+                                         gint64 n,
+                                         GError **error);
 #endif
 
-
 #define GARROW_TYPE_INT64_ARRAY_BUILDER (garrow_int64_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowInt64ArrayBuilder,
                          garrow_int64_array_builder,
@@ -485,37 +523,40 @@ struct _GArrowInt64ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowInt64ArrayBuilder *garrow_int64_array_builder_new(void);
+GArrowInt64ArrayBuilder *
+garrow_int64_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_int64_array_builder_append_value)
-gboolean garrow_int64_array_builder_append(GArrowInt64ArrayBuilder *builder,
-                                           gint64 value,
-                                           GError **error);
+gboolean
+garrow_int64_array_builder_append(GArrowInt64ArrayBuilder *builder,
+                                  gint64 value,
+                                  GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_int64_array_builder_append_value(GArrowInt64ArrayBuilder *builder,
-                                                 gint64 value,
-                                                 GError **error);
-gboolean garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder,
-                                                  const gint64 *values,
-                                                  gint64 values_length,
-                                                  const gboolean *is_valids,
-                                                  gint64 is_valids_length,
-                                                  GError **error);
+gboolean
+garrow_int64_array_builder_append_value(GArrowInt64ArrayBuilder *builder,
+                                        gint64 value,
+                                        GError **error);
+gboolean
+garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder,
+                                         const gint64 *values,
+                                         gint64 values_length,
+                                         const gboolean *is_valids,
+                                         gint64 is_valids_length,
+                                         GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_int64_array_builder_append_null(GArrowInt64ArrayBuilder *builder,
-                                                GError **error);
+gboolean
+garrow_int64_array_builder_append_null(GArrowInt64ArrayBuilder *builder, GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_int64_array_builder_append_nulls(GArrowInt64ArrayBuilder *builder,
-                                                 gint64 n,
-                                                 GError **error);
+gboolean
+garrow_int64_array_builder_append_nulls(GArrowInt64ArrayBuilder *builder,
+                                        gint64 n,
+                                        GError **error);
 #endif
 
-
-#define GARROW_TYPE_UINT64_ARRAY_BUILDER        \
-  (garrow_uint64_array_builder_get_type())
+#define GARROW_TYPE_UINT64_ARRAY_BUILDER (garrow_uint64_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowUInt64ArrayBuilder,
                          garrow_uint64_array_builder,
                          GARROW,
@@ -526,37 +567,41 @@ struct _GArrowUInt64ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowUInt64ArrayBuilder *garrow_uint64_array_builder_new(void);
+GArrowUInt64ArrayBuilder *
+garrow_uint64_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint64_array_builder_append_value)
-gboolean garrow_uint64_array_builder_append(GArrowUInt64ArrayBuilder *builder,
-                                            guint64 value,
-                                            GError **error);
+gboolean
+garrow_uint64_array_builder_append(GArrowUInt64ArrayBuilder *builder,
+                                   guint64 value,
+                                   GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_uint64_array_builder_append_value(GArrowUInt64ArrayBuilder *builder,
-                                                  guint64 value,
-                                                  GError **error);
-gboolean garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder,
-                                                   const guint64 *values,
-                                                   gint64 values_length,
-                                                   const gboolean *is_valids,
-                                                   gint64 is_valids_length,
-                                                   GError **error);
+gboolean
+garrow_uint64_array_builder_append_value(GArrowUInt64ArrayBuilder *builder,
+                                         guint64 value,
+                                         GError **error);
+gboolean
+garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder,
+                                          const guint64 *values,
+                                          gint64 values_length,
+                                          const gboolean *is_valids,
+                                          gint64 is_valids_length,
+                                          GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_uint64_array_builder_append_null(GArrowUInt64ArrayBuilder *builder,
-                                                 GError **error);
+gboolean
+garrow_uint64_array_builder_append_null(GArrowUInt64ArrayBuilder *builder,
+                                        GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_uint64_array_builder_append_nulls(GArrowUInt64ArrayBuilder *builder,
-                                                  gint64 n,
-                                                  GError **error);
+gboolean
+garrow_uint64_array_builder_append_nulls(GArrowUInt64ArrayBuilder *builder,
+                                         gint64 n,
+                                         GError **error);
 #endif
 
-
-#define GARROW_TYPE_HALF_FLOAT_ARRAY_BUILDER    \
-  (garrow_half_float_array_builder_get_type())
+#define GARROW_TYPE_HALF_FLOAT_ARRAY_BUILDER (garrow_half_float_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatArrayBuilder,
                          garrow_half_float_array_builder,
                          GARROW,
@@ -573,19 +618,17 @@ garrow_half_float_array_builder_new(void);
 
 GARROW_AVAILABLE_IN_11_0
 gboolean
-garrow_half_float_array_builder_append_value(
-  GArrowHalfFloatArrayBuilder *builder,
-  guint16 value,
-  GError **error);
+garrow_half_float_array_builder_append_value(GArrowHalfFloatArrayBuilder *builder,
+                                             guint16 value,
+                                             GError **error);
 GARROW_AVAILABLE_IN_11_0
-gboolean garrow_half_float_array_builder_append_values(
-  GArrowHalfFloatArrayBuilder *builder,
-  const guint16 *values,
-  gint64 values_length,
-  const gboolean *is_valids,
-  gint64 is_valids_length,
-  GError **error);
-
+gboolean
+garrow_half_float_array_builder_append_values(GArrowHalfFloatArrayBuilder *builder,
+                                              const guint16 *values,
+                                              gint64 values_length,
+                                              const gboolean *is_valids,
+                                              gint64 is_valids_length,
+                                              GError **error);
 
 #define GARROW_TYPE_FLOAT_ARRAY_BUILDER (garrow_float_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFloatArrayBuilder,
@@ -598,37 +641,40 @@ struct _GArrowFloatArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowFloatArrayBuilder *garrow_float_array_builder_new(void);
+GArrowFloatArrayBuilder *
+garrow_float_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_float_array_builder_append_value)
-gboolean garrow_float_array_builder_append(GArrowFloatArrayBuilder *builder,
-                                           gfloat value,
-                                           GError **error);
+gboolean
+garrow_float_array_builder_append(GArrowFloatArrayBuilder *builder,
+                                  gfloat value,
+                                  GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_float_array_builder_append_value(GArrowFloatArrayBuilder *builder,
-                                                 gfloat value,
-                                                 GError **error);
-gboolean garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder,
-                                                  const gfloat *values,
-                                                  gint64 values_length,
-                                                  const gboolean *is_valids,
-                                                  gint64 is_valids_length,
-                                                  GError **error);
+gboolean
+garrow_float_array_builder_append_value(GArrowFloatArrayBuilder *builder,
+                                        gfloat value,
+                                        GError **error);
+gboolean
+garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder,
+                                         const gfloat *values,
+                                         gint64 values_length,
+                                         const gboolean *is_valids,
+                                         gint64 is_valids_length,
+                                         GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_float_array_builder_append_null(GArrowFloatArrayBuilder *builder,
-                                                GError **error);
+gboolean
+garrow_float_array_builder_append_null(GArrowFloatArrayBuilder *builder, GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_float_array_builder_append_nulls(GArrowFloatArrayBuilder *builder,
-                                                 gint64 n,
-                                                 GError **error);
+gboolean
+garrow_float_array_builder_append_nulls(GArrowFloatArrayBuilder *builder,
+                                        gint64 n,
+                                        GError **error);
 #endif
 
-
-#define GARROW_TYPE_DOUBLE_ARRAY_BUILDER        \
-  (garrow_double_array_builder_get_type())
+#define GARROW_TYPE_DOUBLE_ARRAY_BUILDER (garrow_double_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDoubleArrayBuilder,
                          garrow_double_array_builder,
                          GARROW,
@@ -639,37 +685,41 @@ struct _GArrowDoubleArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowDoubleArrayBuilder *garrow_double_array_builder_new(void);
+GArrowDoubleArrayBuilder *
+garrow_double_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_double_array_builder_append_value)
-gboolean garrow_double_array_builder_append(GArrowDoubleArrayBuilder *builder,
-                                            gdouble value,
-                                            GError **error);
+gboolean
+garrow_double_array_builder_append(GArrowDoubleArrayBuilder *builder,
+                                   gdouble value,
+                                   GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_double_array_builder_append_value(GArrowDoubleArrayBuilder *builder,
-                                                  gdouble value,
-                                                  GError **error);
-gboolean garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder,
-                                                   const gdouble *values,
-                                                   gint64 values_length,
-                                                   const gboolean *is_valids,
-                                                   gint64 is_valids_length,
-                                                   GError **error);
+gboolean
+garrow_double_array_builder_append_value(GArrowDoubleArrayBuilder *builder,
+                                         gdouble value,
+                                         GError **error);
+gboolean
+garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder,
+                                          const gdouble *values,
+                                          gint64 values_length,
+                                          const gboolean *is_valids,
+                                          gint64 is_valids_length,
+                                          GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_double_array_builder_append_null(GArrowDoubleArrayBuilder *builder,
-                                                 GError **error);
+gboolean
+garrow_double_array_builder_append_null(GArrowDoubleArrayBuilder *builder,
+                                        GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_double_array_builder_append_nulls(GArrowDoubleArrayBuilder *builder,
-                                                  gint64 n,
-                                                  GError **error);
+gboolean
+garrow_double_array_builder_append_nulls(GArrowDoubleArrayBuilder *builder,
+                                         gint64 n,
+                                         GError **error);
 #endif
 
-
-#define GARROW_TYPE_BINARY_ARRAY_BUILDER        \
-  (garrow_binary_array_builder_get_type())
+#define GARROW_TYPE_BINARY_ARRAY_BUILDER (garrow_binary_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowBinaryArrayBuilder,
                          garrow_binary_array_builder,
                          GARROW,
@@ -680,44 +730,50 @@ struct _GArrowBinaryArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowBinaryArrayBuilder *garrow_binary_array_builder_new(void);
+GArrowBinaryArrayBuilder *
+garrow_binary_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_binary_array_builder_append_value)
-gboolean garrow_binary_array_builder_append(GArrowBinaryArrayBuilder *builder,
-                                            const guint8 *value,
-                                            gint32 length,
-                                            GError **error);
+gboolean
+garrow_binary_array_builder_append(GArrowBinaryArrayBuilder *builder,
+                                   const guint8 *value,
+                                   gint32 length,
+                                   GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_binary_array_builder_append_value(GArrowBinaryArrayBuilder *builder,
-                                                  const guint8 *value,
-                                                  gint32 length,
-                                                  GError **error);
+gboolean
+garrow_binary_array_builder_append_value(GArrowBinaryArrayBuilder *builder,
+                                         const guint8 *value,
+                                         gint32 length,
+                                         GError **error);
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_binary_array_builder_append_value_bytes(GArrowBinaryArrayBuilder *builder,
-                                                        GBytes *value,
-                                                        GError **error);
+gboolean
+garrow_binary_array_builder_append_value_bytes(GArrowBinaryArrayBuilder *builder,
+                                               GBytes *value,
+                                               GError **error);
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_binary_array_builder_append_values(GArrowBinaryArrayBuilder *builder,
-                                                   GBytes **values,
-                                                   gint64 values_length,
-                                                   const gboolean *is_valids,
-                                                   gint64 is_valids_length,
-                                                   GError **error);
+gboolean
+garrow_binary_array_builder_append_values(GArrowBinaryArrayBuilder *builder,
+                                          GBytes **values,
+                                          gint64 values_length,
+                                          const gboolean *is_valids,
+                                          gint64 is_valids_length,
+                                          GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_binary_array_builder_append_null(GArrowBinaryArrayBuilder *builder,
-                                                 GError **error);
+gboolean
+garrow_binary_array_builder_append_null(GArrowBinaryArrayBuilder *builder,
+                                        GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_binary_array_builder_append_nulls(GArrowBinaryArrayBuilder *builder,
-                                                  gint64 n,
-                                                  GError **error);
+gboolean
+garrow_binary_array_builder_append_nulls(GArrowBinaryArrayBuilder *builder,
+                                         gint64 n,
+                                         GError **error);
 #endif
 
-
-#define GARROW_TYPE_LARGE_BINARY_ARRAY_BUILDER        \
+#define GARROW_TYPE_LARGE_BINARY_ARRAY_BUILDER                                           \
   (garrow_large_binary_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryArrayBuilder,
                          garrow_large_binary_array_builder,
@@ -730,38 +786,41 @@ struct _GArrowLargeBinaryArrayBuilderClass
 };
 
 GARROW_AVAILABLE_IN_0_16
-GArrowLargeBinaryArrayBuilder *garrow_large_binary_array_builder_new(void);
+GArrowLargeBinaryArrayBuilder *
+garrow_large_binary_array_builder_new(void);
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_large_binary_array_builder_append_value(GArrowLargeBinaryArrayBuilder *builder,
-                                                        const guint8 *value,
-                                                        gint64 length,
-                                                        GError **error);
+gboolean
+garrow_large_binary_array_builder_append_value(GArrowLargeBinaryArrayBuilder *builder,
+                                               const guint8 *value,
+                                               gint64 length,
+                                               GError **error);
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_large_binary_array_builder_append_value_bytes(GArrowLargeBinaryArrayBuilder *builder,
-                                                              GBytes *value,
-                                                              GError **error);
+gboolean
+garrow_large_binary_array_builder_append_value_bytes(
+  GArrowLargeBinaryArrayBuilder *builder, GBytes *value, GError **error);
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_large_binary_array_builder_append_values(GArrowLargeBinaryArrayBuilder *builder,
-                                                         GBytes **values,
-                                                         gint64 values_length,
-                                                         const gboolean *is_valids,
-                                                         gint64 is_valids_length,
-                                                         GError **error);
+gboolean
+garrow_large_binary_array_builder_append_values(GArrowLargeBinaryArrayBuilder *builder,
+                                                GBytes **values,
+                                                gint64 values_length,
+                                                const gboolean *is_valids,
+                                                gint64 is_valids_length,
+                                                GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_large_binary_array_builder_append_null(GArrowLargeBinaryArrayBuilder *builder,
-                                                       GError **error);
+gboolean
+garrow_large_binary_array_builder_append_null(GArrowLargeBinaryArrayBuilder *builder,
+                                              GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_large_binary_array_builder_append_nulls(GArrowLargeBinaryArrayBuilder *builder,
-                                                        gint64 n,
-                                                        GError **error);
+gboolean
+garrow_large_binary_array_builder_append_nulls(GArrowLargeBinaryArrayBuilder *builder,
+                                               gint64 n,
+                                               GError **error);
 #endif
 
-
-#define GARROW_TYPE_STRING_ARRAY_BUILDER        \
-  (garrow_string_array_builder_get_type())
+#define GARROW_TYPE_STRING_ARRAY_BUILDER (garrow_string_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowStringArrayBuilder,
                          garrow_string_array_builder,
                          GARROW,
@@ -772,25 +831,29 @@ struct _GArrowStringArrayBuilderClass
   GArrowBinaryArrayBuilderClass parent_class;
 };
 
-GArrowStringArrayBuilder *garrow_string_array_builder_new(void);
+GArrowStringArrayBuilder *
+garrow_string_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_string_array_builder_append_value)
-gboolean garrow_string_array_builder_append(GArrowStringArrayBuilder *builder,
-                                            const gchar *value,
-                                            GError **error);
+gboolean
+garrow_string_array_builder_append(GArrowStringArrayBuilder *builder,
+                                   const gchar *value,
+                                   GError **error);
 #endif
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_16_FOR(garrow_string_array_builder_append_string)
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_string_array_builder_append_value(GArrowStringArrayBuilder *builder,
-                                                  const gchar *value,
-                                                  GError **error);
+gboolean
+garrow_string_array_builder_append_value(GArrowStringArrayBuilder *builder,
+                                         const gchar *value,
+                                         GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_string_array_builder_append_string(GArrowStringArrayBuilder *builder,
-                                                   const gchar *value,
-                                                   GError **error);
+gboolean
+garrow_string_array_builder_append_string(GArrowStringArrayBuilder *builder,
+                                          const gchar *value,
+                                          GError **error);
 
 GARROW_AVAILABLE_IN_8_0
 gboolean
@@ -801,23 +864,24 @@ garrow_string_array_builder_append_string_len(GArrowStringArrayBuilder *builder,
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_16_FOR(garrow_string_array_builder_append_strings)
-gboolean garrow_string_array_builder_append_values(GArrowStringArrayBuilder *builder,
-                                                   const gchar **values,
-                                                   gint64 values_length,
-                                                   const gboolean *is_valids,
-                                                   gint64 is_valids_length,
-                                                   GError **error);
+gboolean
+garrow_string_array_builder_append_values(GArrowStringArrayBuilder *builder,
+                                          const gchar **values,
+                                          gint64 values_length,
+                                          const gboolean *is_valids,
+                                          gint64 is_valids_length,
+                                          GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_string_array_builder_append_strings(GArrowStringArrayBuilder *builder,
-                                                    const gchar **values,
-                                                    gint64 values_length,
-                                                    const gboolean *is_valids,
-                                                    gint64 is_valids_length,
-                                                    GError **error);
-
+gboolean
+garrow_string_array_builder_append_strings(GArrowStringArrayBuilder *builder,
+                                           const gchar **values,
+                                           gint64 values_length,
+                                           const gboolean *is_valids,
+                                           gint64 is_valids_length,
+                                           GError **error);
 
-#define GARROW_TYPE_LARGE_STRING_ARRAY_BUILDER        \
+#define GARROW_TYPE_LARGE_STRING_ARRAY_BUILDER                                           \
   (garrow_large_string_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringArrayBuilder,
                          garrow_large_string_array_builder,
@@ -830,27 +894,30 @@ struct _GArrowLargeStringArrayBuilderClass
 };
 
 GARROW_AVAILABLE_IN_0_16
-GArrowLargeStringArrayBuilder *garrow_large_string_array_builder_new(void);
+GArrowLargeStringArrayBuilder *
+garrow_large_string_array_builder_new(void);
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_large_string_array_builder_append_string(GArrowLargeStringArrayBuilder *builder,
-                                                         const gchar *value,
-                                                         GError **error);
+gboolean
+garrow_large_string_array_builder_append_string(GArrowLargeStringArrayBuilder *builder,
+                                                const gchar *value,
+                                                GError **error);
 GARROW_AVAILABLE_IN_8_0
-gboolean garrow_large_string_array_builder_append_string_len(
+gboolean
+garrow_large_string_array_builder_append_string_len(
   GArrowLargeStringArrayBuilder *builder,
   const gchar *value,
   gint64 length,
   GError **error);
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_large_string_array_builder_append_strings(GArrowLargeStringArrayBuilder *builder,
-                                                          const gchar **values,
-                                                          gint64 values_length,
-                                                          const gboolean *is_valids,
-                                                          gint64 is_valids_length,
-                                                          GError **error);
-
+gboolean
+garrow_large_string_array_builder_append_strings(GArrowLargeStringArrayBuilder *builder,
+                                                 const gchar **values,
+                                                 gint64 values_length,
+                                                 const gboolean *is_valids,
+                                                 gint64 is_valids_length,
+                                                 GError **error);
 
-#define GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER      \
+#define GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER                                      \
   (garrow_fixed_size_binary_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryArrayBuilder,
                          garrow_fixed_size_binary_array_builder,
@@ -864,8 +931,7 @@ struct _GArrowFixedSizeBinaryArrayBuilderClass
 
 GARROW_AVAILABLE_IN_3_0
 GArrowFixedSizeBinaryArrayBuilder *
-garrow_fixed_size_binary_array_builder_new(
-  GArrowFixedSizeBinaryDataType *data_type);
+garrow_fixed_size_binary_array_builder_new(GArrowFixedSizeBinaryDataType *data_type);
 
 GARROW_AVAILABLE_IN_3_0
 gboolean
@@ -877,9 +943,7 @@ garrow_fixed_size_binary_array_builder_append_value(
 GARROW_AVAILABLE_IN_3_0
 gboolean
 garrow_fixed_size_binary_array_builder_append_value_bytes(
-  GArrowFixedSizeBinaryArrayBuilder *builder,
-  GBytes *value,
-  GError **error);
+  GArrowFixedSizeBinaryArrayBuilder *builder, GBytes *value, GError **error);
 GARROW_AVAILABLE_IN_3_0
 gboolean
 garrow_fixed_size_binary_array_builder_append_values(
@@ -898,8 +962,7 @@ garrow_fixed_size_binary_array_builder_append_values_packed(
   gint64 is_valids_length,
   GError **error);
 
-#define GARROW_TYPE_DATE32_ARRAY_BUILDER        \
-  (garrow_date32_array_builder_get_type())
+#define GARROW_TYPE_DATE32_ARRAY_BUILDER (garrow_date32_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDate32ArrayBuilder,
                          garrow_date32_array_builder,
                          GARROW,
@@ -910,37 +973,41 @@ struct _GArrowDate32ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowDate32ArrayBuilder *garrow_date32_array_builder_new(void);
+GArrowDate32ArrayBuilder *
+garrow_date32_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_date32_array_builder_append_value)
-gboolean garrow_date32_array_builder_append(GArrowDate32ArrayBuilder *builder,
-                                            gint32 value,
-                                            GError **error);
+gboolean
+garrow_date32_array_builder_append(GArrowDate32ArrayBuilder *builder,
+                                   gint32 value,
+                                   GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_date32_array_builder_append_value(GArrowDate32ArrayBuilder *builder,
-                                                  gint32 value,
-                                                  GError **error);
-gboolean garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder,
-                                                   const gint32 *values,
-                                                   gint64 values_length,
-                                                   const gboolean *is_valids,
-                                                   gint64 is_valids_length,
-                                                   GError **error);
+gboolean
+garrow_date32_array_builder_append_value(GArrowDate32ArrayBuilder *builder,
+                                         gint32 value,
+                                         GError **error);
+gboolean
+garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder,
+                                          const gint32 *values,
+                                          gint64 values_length,
+                                          const gboolean *is_valids,
+                                          gint64 is_valids_length,
+                                          GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_date32_array_builder_append_null(GArrowDate32ArrayBuilder *builder,
-                                                 GError **error);
+gboolean
+garrow_date32_array_builder_append_null(GArrowDate32ArrayBuilder *builder,
+                                        GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_date32_array_builder_append_nulls(GArrowDate32ArrayBuilder *builder,
-                                                  gint64 n,
-                                                  GError **error);
+gboolean
+garrow_date32_array_builder_append_nulls(GArrowDate32ArrayBuilder *builder,
+                                         gint64 n,
+                                         GError **error);
 #endif
 
-
-#define GARROW_TYPE_DATE64_ARRAY_BUILDER        \
-  (garrow_date64_array_builder_get_type())
+#define GARROW_TYPE_DATE64_ARRAY_BUILDER (garrow_date64_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDate64ArrayBuilder,
                          garrow_date64_array_builder,
                          GARROW,
@@ -951,37 +1018,41 @@ struct _GArrowDate64ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowDate64ArrayBuilder *garrow_date64_array_builder_new(void);
+GArrowDate64ArrayBuilder *
+garrow_date64_array_builder_new(void);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_date64_array_builder_append_value)
-gboolean garrow_date64_array_builder_append(GArrowDate64ArrayBuilder *builder,
-                                            gint64 value,
-                                            GError **error);
+gboolean
+garrow_date64_array_builder_append(GArrowDate64ArrayBuilder *builder,
+                                   gint64 value,
+                                   GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_date64_array_builder_append_value(GArrowDate64ArrayBuilder *builder,
-                                                  gint64 value,
-                                                  GError **error);
-gboolean garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder,
-                                                   const gint64 *values,
-                                                   gint64 values_length,
-                                                   const gboolean *is_valids,
-                                                   gint64 is_valids_length,
-                                                   GError **error);
+gboolean
+garrow_date64_array_builder_append_value(GArrowDate64ArrayBuilder *builder,
+                                         gint64 value,
+                                         GError **error);
+gboolean
+garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder,
+                                          const gint64 *values,
+                                          gint64 values_length,
+                                          const gboolean *is_valids,
+                                          gint64 is_valids_length,
+                                          GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_date64_array_builder_append_null(GArrowDate64ArrayBuilder *builder,
-                                                 GError **error);
+gboolean
+garrow_date64_array_builder_append_null(GArrowDate64ArrayBuilder *builder,
+                                        GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_date64_array_builder_append_nulls(GArrowDate64ArrayBuilder *builder,
-                                                  gint64 n,
-                                                  GError **error);
+gboolean
+garrow_date64_array_builder_append_nulls(GArrowDate64ArrayBuilder *builder,
+                                         gint64 n,
+                                         GError **error);
 #endif
 
-
-#define GARROW_TYPE_TIMESTAMP_ARRAY_BUILDER     \
-  (garrow_timestamp_array_builder_get_type())
+#define GARROW_TYPE_TIMESTAMP_ARRAY_BUILDER (garrow_timestamp_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowTimestampArrayBuilder,
                          garrow_timestamp_array_builder,
                          GARROW,
@@ -997,33 +1068,36 @@ garrow_timestamp_array_builder_new(GArrowTimestampDataType *data_type);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_timestamp_array_builder_append_value)
-gboolean garrow_timestamp_array_builder_append(GArrowTimestampArrayBuilder *builder,
-                                               gint64 value,
-                                               GError **error);
+gboolean
+garrow_timestamp_array_builder_append(GArrowTimestampArrayBuilder *builder,
+                                      gint64 value,
+                                      GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_timestamp_array_builder_append_value(GArrowTimestampArrayBuilder *builder,
-                                                     gint64 value,
-                                                     GError **error);
-gboolean garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builder,
-                                                      const gint64 *values,
-                                                      gint64 values_length,
-                                                      const gboolean *is_valids,
-                                                      gint64 is_valids_length,
-                                                      GError **error);
+gboolean
+garrow_timestamp_array_builder_append_value(GArrowTimestampArrayBuilder *builder,
+                                            gint64 value,
+                                            GError **error);
+gboolean
+garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builder,
+                                             const gint64 *values,
+                                             gint64 values_length,
+                                             const gboolean *is_valids,
+                                             gint64 is_valids_length,
+                                             GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_timestamp_array_builder_append_null(GArrowTimestampArrayBuilder *builder,
-                                                    GError **error);
+gboolean
+garrow_timestamp_array_builder_append_null(GArrowTimestampArrayBuilder *builder,
+                                           GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_timestamp_array_builder_append_nulls(GArrowTimestampArrayBuilder *builder,
-                                                     gint64 n,
-                                                     GError **error);
+gboolean
+garrow_timestamp_array_builder_append_nulls(GArrowTimestampArrayBuilder *builder,
+                                            gint64 n,
+                                            GError **error);
 #endif
 
-
-#define GARROW_TYPE_TIME32_ARRAY_BUILDER        \
-  (garrow_time32_array_builder_get_type())
+#define GARROW_TYPE_TIME32_ARRAY_BUILDER (garrow_time32_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowTime32ArrayBuilder,
                          garrow_time32_array_builder,
                          GARROW,
@@ -1034,37 +1108,41 @@ struct _GArrowTime32ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowTime32ArrayBuilder *garrow_time32_array_builder_new(GArrowTime32DataType *data_type);
+GArrowTime32ArrayBuilder *
+garrow_time32_array_builder_new(GArrowTime32DataType *data_type);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_time32_array_builder_append_value)
-gboolean garrow_time32_array_builder_append(GArrowTime32ArrayBuilder *builder,
-                                            gint32 value,
-                                            GError **error);
+gboolean
+garrow_time32_array_builder_append(GArrowTime32ArrayBuilder *builder,
+                                   gint32 value,
+                                   GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_time32_array_builder_append_value(GArrowTime32ArrayBuilder *builder,
-                                                  gint32 value,
-                                                  GError **error);
-gboolean garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder,
-                                                   const gint32 *values,
-                                                   gint64 values_length,
-                                                   const gboolean *is_valids,
-                                                   gint64 is_valids_length,
-                                                   GError **error);
+gboolean
+garrow_time32_array_builder_append_value(GArrowTime32ArrayBuilder *builder,
+                                         gint32 value,
+                                         GError **error);
+gboolean
+garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder,
+                                          const gint32 *values,
+                                          gint64 values_length,
+                                          const gboolean *is_valids,
+                                          gint64 is_valids_length,
+                                          GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_time32_array_builder_append_null(GArrowTime32ArrayBuilder *builder,
-                                                 GError **error);
+gboolean
+garrow_time32_array_builder_append_null(GArrowTime32ArrayBuilder *builder,
+                                        GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_time32_array_builder_append_nulls(GArrowTime32ArrayBuilder *builder,
-                                                  gint64 n,
-                                                  GError **error);
+gboolean
+garrow_time32_array_builder_append_nulls(GArrowTime32ArrayBuilder *builder,
+                                         gint64 n,
+                                         GError **error);
 #endif
 
-
-#define GARROW_TYPE_TIME64_ARRAY_BUILDER        \
-  (garrow_time64_array_builder_get_type())
+#define GARROW_TYPE_TIME64_ARRAY_BUILDER (garrow_time64_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowTime64ArrayBuilder,
                          garrow_time64_array_builder,
                          GARROW,
@@ -1075,36 +1153,41 @@ struct _GArrowTime64ArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowTime64ArrayBuilder *garrow_time64_array_builder_new(GArrowTime64DataType *data_type);
+GArrowTime64ArrayBuilder *
+garrow_time64_array_builder_new(GArrowTime64DataType *data_type);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_time64_array_builder_append_value)
-gboolean garrow_time64_array_builder_append(GArrowTime64ArrayBuilder *builder,
-                                            gint64 value,
-                                            GError **error);
+gboolean
+garrow_time64_array_builder_append(GArrowTime64ArrayBuilder *builder,
+                                   gint64 value,
+                                   GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_time64_array_builder_append_value(GArrowTime64ArrayBuilder *builder,
-                                                  gint64 value,
-                                                  GError **error);
-gboolean garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder,
-                                                   const gint64 *values,
-                                                   gint64 values_length,
-                                                   const gboolean *is_valids,
-                                                   gint64 is_valids_length,
-                                                   GError **error);
+gboolean
+garrow_time64_array_builder_append_value(GArrowTime64ArrayBuilder *builder,
+                                         gint64 value,
+                                         GError **error);
+gboolean
+garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder,
+                                          const gint64 *values,
+                                          gint64 values_length,
+                                          const gboolean *is_valids,
+                                          gint64 is_valids_length,
+                                          GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_time64_array_builder_append_null(GArrowTime64ArrayBuilder *builder,
-                                                 GError **error);
+gboolean
+garrow_time64_array_builder_append_null(GArrowTime64ArrayBuilder *builder,
+                                        GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
-gboolean garrow_time64_array_builder_append_nulls(GArrowTime64ArrayBuilder *builder,
-                                                  gint64 n,
-                                                  GError **error);
+gboolean
+garrow_time64_array_builder_append_nulls(GArrowTime64ArrayBuilder *builder,
+                                         gint64 n,
+                                         GError **error);
 #endif
 
-
-#define GARROW_TYPE_MONTH_INTERVAL_ARRAY_BUILDER        \
+#define GARROW_TYPE_MONTH_INTERVAL_ARRAY_BUILDER                                         \
   (garrow_month_interval_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalArrayBuilder,
                          garrow_month_interval_array_builder,
@@ -1122,10 +1205,9 @@ garrow_month_interval_array_builder_new(void);
 
 GARROW_AVAILABLE_IN_8_0
 gboolean
-garrow_month_interval_array_builder_append_value(
-  GArrowMonthIntervalArrayBuilder *builder,
-  gint32 value,
-  GError **error);
+garrow_month_interval_array_builder_append_value(GArrowMonthIntervalArrayBuilder *builder,
+                                                 gint32 value,
+                                                 GError **error);
 GARROW_AVAILABLE_IN_8_0
 gboolean
 garrow_month_interval_array_builder_append_values(
@@ -1136,8 +1218,7 @@ garrow_month_interval_array_builder_append_values(
   gint64 is_valids_length,
   GError **error);
 
-
-#define GARROW_TYPE_DAY_TIME_INTERVAL_ARRAY_BUILDER     \
+#define GARROW_TYPE_DAY_TIME_INTERVAL_ARRAY_BUILDER                                      \
   (garrow_day_time_interval_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalArrayBuilder,
                          garrow_day_time_interval_array_builder,
@@ -1169,8 +1250,7 @@ garrow_day_time_interval_array_builder_append_values(
   gint64 is_valids_length,
   GError **error);
 
-
-#define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_ARRAY_BUILDER       \
+#define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_ARRAY_BUILDER                                \
   (garrow_month_day_nano_interval_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalArrayBuilder,
                          garrow_month_day_nano_interval_array_builder,
@@ -1202,8 +1282,8 @@ garrow_month_day_nano_interval_array_builder_append_values(
   gint64 is_valids_length,
   GError **error);
 
-
-#define GARROW_TYPE_BINARY_DICTIONARY_ARRAY_BUILDER (garrow_binary_dictionary_array_builder_get_type())
+#define GARROW_TYPE_BINARY_DICTIONARY_ARRAY_BUILDER                                      \
+  (garrow_binary_dictionary_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowBinaryDictionaryArrayBuilder,
                          garrow_binary_dictionary_array_builder,
                          GARROW,
@@ -1221,53 +1301,55 @@ garrow_binary_dictionary_array_builder_new(void);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_binary_dictionary_array_builder_append_null(GArrowBinaryDictionaryArrayBuilder *builder,
-                                                   GError **error);
+garrow_binary_dictionary_array_builder_append_null(
+  GArrowBinaryDictionaryArrayBuilder *builder, GError **error);
 #endif
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_binary_dictionary_array_builder_append_value(GArrowBinaryDictionaryArrayBuilder *builder,
-                                                    const guint8 *value,
-                                                    gint32 length,
-                                                    GError **error);
+garrow_binary_dictionary_array_builder_append_value(
+  GArrowBinaryDictionaryArrayBuilder *builder,
+  const guint8 *value,
+  gint32 length,
+  GError **error);
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_binary_dictionary_array_builder_append_value_bytes(GArrowBinaryDictionaryArrayBuilder *builder,
-                                                          GBytes *value,
-                                                          GError **error);
+garrow_binary_dictionary_array_builder_append_value_bytes(
+  GArrowBinaryDictionaryArrayBuilder *builder, GBytes *value, GError **error);
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_binary_dictionary_array_builder_append_array(GArrowBinaryDictionaryArrayBuilder *builder,
-                                                    GArrowBinaryArray *array,
-                                                    GError **error);
+garrow_binary_dictionary_array_builder_append_array(
+  GArrowBinaryDictionaryArrayBuilder *builder, GArrowBinaryArray *array, GError **error);
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_binary_dictionary_array_builder_append_indices(GArrowBinaryDictionaryArrayBuilder *builder,
-                                                      const gint64 *values,
-                                                      gint64 values_length,
-                                                      const gboolean *is_valids,
-                                                      gint64 is_valids_length,
-                                                      GError **error);
+garrow_binary_dictionary_array_builder_append_indices(
+  GArrowBinaryDictionaryArrayBuilder *builder,
+  const gint64 *values,
+  gint64 values_length,
+  const gboolean *is_valids,
+  gint64 is_valids_length,
+  GError **error);
 GARROW_AVAILABLE_IN_2_0
 gint64
-garrow_binary_dictionary_array_builder_get_dictionary_length(GArrowBinaryDictionaryArrayBuilder *builder);
+garrow_binary_dictionary_array_builder_get_dictionary_length(
+  GArrowBinaryDictionaryArrayBuilder *builder);
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_binary_dictionary_array_builder_finish_delta(GArrowBinaryDictionaryArrayBuilder* builder,
-                                                    GArrowArray **out_indices,
-                                                    GArrowArray **out_delta,
-                                                    GError **error);
+garrow_binary_dictionary_array_builder_finish_delta(
+  GArrowBinaryDictionaryArrayBuilder *builder,
+  GArrowArray **out_indices,
+  GArrowArray **out_delta,
+  GError **error);
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_binary_dictionary_array_builder_insert_memo_values(GArrowBinaryDictionaryArrayBuilder *builder,
-                                                          GArrowBinaryArray *values,
-                                                          GError **error);
+garrow_binary_dictionary_array_builder_insert_memo_values(
+  GArrowBinaryDictionaryArrayBuilder *builder, GArrowBinaryArray *values, GError **error);
 GARROW_AVAILABLE_IN_2_0
 void
-garrow_binary_dictionary_array_builder_reset_full(GArrowBinaryDictionaryArrayBuilder *builder);
+garrow_binary_dictionary_array_builder_reset_full(
+  GArrowBinaryDictionaryArrayBuilder *builder);
 
-
-#define GARROW_TYPE_STRING_DICTIONARY_ARRAY_BUILDER (garrow_string_dictionary_array_builder_get_type())
+#define GARROW_TYPE_STRING_DICTIONARY_ARRAY_BUILDER                                      \
+  (garrow_string_dictionary_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowStringDictionaryArrayBuilder,
                          garrow_string_dictionary_array_builder,
                          GARROW,
@@ -1285,45 +1367,45 @@ garrow_string_dictionary_array_builder_new(void);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_string_dictionary_array_builder_append_null(GArrowStringDictionaryArrayBuilder *builder,
-                                                   GError **error);
+garrow_string_dictionary_array_builder_append_null(
+  GArrowStringDictionaryArrayBuilder *builder, GError **error);
 #endif
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_string_dictionary_array_builder_append_string(GArrowStringDictionaryArrayBuilder *builder,
-                                                     const gchar *value,
-                                                     GError **error);
+garrow_string_dictionary_array_builder_append_string(
+  GArrowStringDictionaryArrayBuilder *builder, const gchar *value, GError **error);
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_string_dictionary_array_builder_append_array(GArrowStringDictionaryArrayBuilder *builder,
-                                                    GArrowStringArray *array,
-                                                    GError **error);
+garrow_string_dictionary_array_builder_append_array(
+  GArrowStringDictionaryArrayBuilder *builder, GArrowStringArray *array, GError **error);
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_string_dictionary_array_builder_append_indices(GArrowStringDictionaryArrayBuilder *builder,
-                                                      const gint64 *values,
-                                                      gint64 values_length,
-                                                      const gboolean *is_valids,
-                                                      gint64 is_valids_length,
-                                                      GError **error);
+garrow_string_dictionary_array_builder_append_indices(
+  GArrowStringDictionaryArrayBuilder *builder,
+  const gint64 *values,
+  gint64 values_length,
+  const gboolean *is_valids,
+  gint64 is_valids_length,
+  GError **error);
 GARROW_AVAILABLE_IN_2_0
 gint64
-garrow_string_dictionary_array_builder_get_dictionary_length(GArrowStringDictionaryArrayBuilder *builder);
+garrow_string_dictionary_array_builder_get_dictionary_length(
+  GArrowStringDictionaryArrayBuilder *builder);
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_string_dictionary_array_builder_finish_delta(GArrowStringDictionaryArrayBuilder* builder,
-                                                    GArrowArray **out_indices,
-                                                    GArrowArray **out_delta,
-                                                    GError **error);
+garrow_string_dictionary_array_builder_finish_delta(
+  GArrowStringDictionaryArrayBuilder *builder,
+  GArrowArray **out_indices,
+  GArrowArray **out_delta,
+  GError **error);
 GARROW_AVAILABLE_IN_2_0
 gboolean
-garrow_string_dictionary_array_builder_insert_memo_values(GArrowStringDictionaryArrayBuilder *builder,
-                                                          GArrowStringArray *values,
-                                                          GError **error);
+garrow_string_dictionary_array_builder_insert_memo_values(
+  GArrowStringDictionaryArrayBuilder *builder, GArrowStringArray *values, GError **error);
 GARROW_AVAILABLE_IN_2_0
 void
-garrow_string_dictionary_array_builder_reset_full(GArrowStringDictionaryArrayBuilder *builder);
-
+garrow_string_dictionary_array_builder_reset_full(
+  GArrowStringDictionaryArrayBuilder *builder);
 
 #define GARROW_TYPE_LIST_ARRAY_BUILDER (garrow_list_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowListArrayBuilder,
@@ -1336,25 +1418,25 @@ struct _GArrowListArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowListArrayBuilder *garrow_list_array_builder_new(GArrowListDataType *data_type,
-                                                      GError **error);
+GArrowListArrayBuilder *
+garrow_list_array_builder_new(GArrowListDataType *data_type, GError **error);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_list_array_builder_append_value)
-gboolean garrow_list_array_builder_append(GArrowListArrayBuilder *builder,
-                                          GError **error);
+gboolean
+garrow_list_array_builder_append(GArrowListArrayBuilder *builder, GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder,
-                                                GError **error);
+gboolean
+garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder, GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_list_array_builder_append_null(GArrowListArrayBuilder *builder,
-                                               GError **error);
+gboolean
+garrow_list_array_builder_append_null(GArrowListArrayBuilder *builder, GError **error);
 #endif
 
-GArrowArrayBuilder *garrow_list_array_builder_get_value_builder(GArrowListArrayBuilder *builder);
-
+GArrowArrayBuilder *
+garrow_list_array_builder_get_value_builder(GArrowListArrayBuilder *builder);
 
 #define GARROW_TYPE_LARGE_LIST_ARRAY_BUILDER (garrow_large_list_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLargeListArrayBuilder,
@@ -1368,23 +1450,24 @@ struct _GArrowLargeListArrayBuilderClass
 };
 
 GARROW_AVAILABLE_IN_0_16
-GArrowLargeListArrayBuilder *garrow_large_list_array_builder_new(GArrowLargeListDataType *data_type,
-                                                                 GError **error);
+GArrowLargeListArrayBuilder *
+garrow_large_list_array_builder_new(GArrowLargeListDataType *data_type, GError **error);
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_large_list_array_builder_append_value(GArrowLargeListArrayBuilder *builder,
-                                                      GError **error);
+gboolean
+garrow_large_list_array_builder_append_value(GArrowLargeListArrayBuilder *builder,
+                                             GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_large_list_array_builder_append_null(GArrowLargeListArrayBuilder *builder,
-                                                     GError **error);
+gboolean
+garrow_large_list_array_builder_append_null(GArrowLargeListArrayBuilder *builder,
+                                            GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_16
-GArrowArrayBuilder *garrow_large_list_array_builder_get_value_builder(GArrowLargeListArrayBuilder *builder);
-
+GArrowArrayBuilder *
+garrow_large_list_array_builder_get_value_builder(GArrowLargeListArrayBuilder *builder);
 
-#define GARROW_TYPE_STRUCT_ARRAY_BUILDER        \
-  (garrow_struct_array_builder_get_type())
+#define GARROW_TYPE_STRUCT_ARRAY_BUILDER (garrow_struct_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowStructArrayBuilder,
                          garrow_struct_array_builder,
                          GARROW,
@@ -1395,36 +1478,35 @@ struct _GArrowStructArrayBuilderClass
   GArrowArrayBuilderClass parent_class;
 };
 
-GArrowStructArrayBuilder *garrow_struct_array_builder_new(GArrowStructDataType *data_type,
-                                                          GError **error);
+GArrowStructArrayBuilder *
+garrow_struct_array_builder_new(GArrowStructDataType *data_type, GError **error);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_struct_array_builder_append_value)
-gboolean garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder,
-                                            GError **error);
+gboolean
+garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder, GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder,
-                                                  GError **error);
+gboolean
+garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder,
+                                         GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
-gboolean garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder,
-                                                 GError **error);
+gboolean
+garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder,
+                                        GError **error);
 #endif
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_12_0_FOR(garrow_array_builder_get_child)
 GArrowArrayBuilder *
-garrow_struct_array_builder_get_field_builder(GArrowStructArrayBuilder *builder,
-                                              gint i);
+garrow_struct_array_builder_get_field_builder(GArrowStructArrayBuilder *builder, gint i);
 GARROW_DEPRECATED_IN_12_0_FOR(garrow_array_builder_get_children)
 GList *
 garrow_struct_array_builder_get_field_builders(GArrowStructArrayBuilder *builder);
 #endif
 
-
-#define GARROW_TYPE_MAP_ARRAY_BUILDER        \
-  (garrow_map_array_builder_get_type())
+#define GARROW_TYPE_MAP_ARRAY_BUILDER (garrow_map_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowMapArrayBuilder,
                          garrow_map_array_builder,
                          GARROW,
@@ -1436,12 +1518,11 @@ struct _GArrowMapArrayBuilderClass
 };
 
 GARROW_AVAILABLE_IN_0_17
-GArrowMapArrayBuilder *garrow_map_array_builder_new(GArrowMapDataType *data_type,
-                                                    GError **error);
+GArrowMapArrayBuilder *
+garrow_map_array_builder_new(GArrowMapDataType *data_type, GError **error);
 GARROW_AVAILABLE_IN_0_17
 gboolean
-garrow_map_array_builder_append_value(GArrowMapArrayBuilder *builder,
-                                      GError **error);
+garrow_map_array_builder_append_value(GArrowMapArrayBuilder *builder, GError **error);
 GARROW_AVAILABLE_IN_0_17
 gboolean
 garrow_map_array_builder_append_values(GArrowMapArrayBuilder *builder,
@@ -1454,8 +1535,7 @@ garrow_map_array_builder_append_values(GArrowMapArrayBuilder *builder,
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
 GARROW_AVAILABLE_IN_0_17
 gboolean
-garrow_map_array_builder_append_null(GArrowMapArrayBuilder *builder,
-                                     GError **error);
+garrow_map_array_builder_append_null(GArrowMapArrayBuilder *builder, GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls)
 GARROW_AVAILABLE_IN_0_17
 gboolean
@@ -1473,7 +1553,6 @@ GARROW_AVAILABLE_IN_0_17
 GArrowArrayBuilder *
 garrow_map_array_builder_get_value_builder(GArrowMapArrayBuilder *builder);
 
-
 #define GARROW_TYPE_DECIMAL128_ARRAY_BUILDER (garrow_decimal128_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128ArrayBuilder,
                          garrow_decimal128_array_builder,
@@ -1485,35 +1564,37 @@ struct _GArrowDecimal128ArrayBuilderClass
   GArrowFixedSizeBinaryArrayBuilderClass parent_class;
 };
 
-GArrowDecimal128ArrayBuilder *garrow_decimal128_array_builder_new(GArrowDecimal128DataType *data_type);
+GArrowDecimal128ArrayBuilder *
+garrow_decimal128_array_builder_new(GArrowDecimal128DataType *data_type);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_decimal128_array_builder_append_value)
-gboolean garrow_decimal128_array_builder_append(GArrowDecimal128ArrayBuilder *builder,
-                                                GArrowDecimal128 *value,
-                                                GError **error);
+gboolean
+garrow_decimal128_array_builder_append(GArrowDecimal128ArrayBuilder *builder,
+                                       GArrowDecimal128 *value,
+                                       GError **error);
 #endif
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_decimal128_array_builder_append_value(GArrowDecimal128ArrayBuilder *builder,
-                                                      GArrowDecimal128 *value,
-                                                      GError **error);
+gboolean
+garrow_decimal128_array_builder_append_value(GArrowDecimal128ArrayBuilder *builder,
+                                             GArrowDecimal128 *value,
+                                             GError **error);
 GARROW_AVAILABLE_IN_3_0
 gboolean
-garrow_decimal128_array_builder_append_values(
-  GArrowDecimal128ArrayBuilder *builder,
-  GArrowDecimal128 **values,
-  gint64 values_length,
-  const gboolean *is_valids,
-  gint64 is_valids_length,
-  GError **error);
+garrow_decimal128_array_builder_append_values(GArrowDecimal128ArrayBuilder *builder,
+                                              GArrowDecimal128 **values,
+                                              gint64 values_length,
+                                              const gboolean *is_valids,
+                                              gint64 is_valids_length,
+                                              GError **error);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null)
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_decimal128_array_builder_append_null(GArrowDecimal128ArrayBuilder *builder,
-                                                     GError **error);
+gboolean
+garrow_decimal128_array_builder_append_null(GArrowDecimal128ArrayBuilder *builder,
+                                            GError **error);
 #endif
 
-
 #define GARROW_TYPE_DECIMAL256_ARRAY_BUILDER (garrow_decimal256_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256ArrayBuilder,
                          garrow_decimal256_array_builder,
@@ -1525,25 +1606,24 @@ struct _GArrowDecimal256ArrayBuilderClass
   GArrowFixedSizeBinaryArrayBuilderClass parent_class;
 };
 
-GArrowDecimal256ArrayBuilder *garrow_decimal256_array_builder_new(GArrowDecimal256DataType *data_type);
+GArrowDecimal256ArrayBuilder *
+garrow_decimal256_array_builder_new(GArrowDecimal256DataType *data_type);
 
 GARROW_AVAILABLE_IN_3_0
-gboolean garrow_decimal256_array_builder_append_value(GArrowDecimal256ArrayBuilder *builder,
-                                                      GArrowDecimal256 *value,
-                                                      GError **error);
+gboolean
+garrow_decimal256_array_builder_append_value(GArrowDecimal256ArrayBuilder *builder,
+                                             GArrowDecimal256 *value,
+                                             GError **error);
 GARROW_AVAILABLE_IN_3_0
 gboolean
-garrow_decimal256_array_builder_append_values(
-  GArrowDecimal256ArrayBuilder *builder,
-  GArrowDecimal256 **values,
-  gint64 values_length,
-  const gboolean *is_valids,
-  gint64 is_valids_length,
-  GError **error);
-
+garrow_decimal256_array_builder_append_values(GArrowDecimal256ArrayBuilder *builder,
+                                              GArrowDecimal256 **values,
+                                              gint64 values_length,
+                                              const gboolean *is_valids,
+                                              gint64 is_valids_length,
+                                              GError **error);
 
-#define GARROW_TYPE_UNION_ARRAY_BUILDER         \
-  (garrow_union_array_builder_get_type())
+#define GARROW_TYPE_UNION_ARRAY_BUILDER (garrow_union_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowUnionArrayBuilder,
                          garrow_union_array_builder,
                          GARROW,
@@ -1566,8 +1646,7 @@ garrow_union_array_builder_append_value(GArrowUnionArrayBuilder *builder,
                                         gint8 value,
                                         GError **error);
 
-
-#define GARROW_TYPE_DENSE_UNION_ARRAY_BUILDER   \
+#define GARROW_TYPE_DENSE_UNION_ARRAY_BUILDER                                            \
   (garrow_dense_union_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionArrayBuilder,
                          garrow_dense_union_array_builder,
@@ -1581,11 +1660,9 @@ struct _GArrowDenseUnionArrayBuilderClass
 
 GARROW_AVAILABLE_IN_12_0
 GArrowDenseUnionArrayBuilder *
-garrow_dense_union_array_builder_new(GArrowDenseUnionDataType *data_type,
-                                     GError **error);
-
+garrow_dense_union_array_builder_new(GArrowDenseUnionDataType *data_type, GError **error);
 
-#define GARROW_TYPE_SPARSE_UNION_ARRAY_BUILDER   \
+#define GARROW_TYPE_SPARSE_UNION_ARRAY_BUILDER                                           \
   (garrow_sparse_union_array_builder_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArrayBuilder,
                          garrow_sparse_union_array_builder,
@@ -1602,5 +1679,4 @@ GArrowSparseUnionArrayBuilder *
 garrow_sparse_union_array_builder_new(GArrowSparseUnionDataType *data_type,
                                       GError **error);
 
-
 G_END_DECLS
diff --git a/c_glib/arrow-glib/array-builder.hpp b/c_glib/arrow-glib/array-builder.hpp
index 89b91ac5798a6..561099f1479ec 100644
--- a/c_glib/arrow-glib/array-builder.hpp
+++ b/c_glib/arrow-glib/array-builder.hpp
@@ -24,9 +24,9 @@
 
 GArrowArrayBuilder *
 garrow_array_builder_new_raw(std::shared_ptr<arrow::ArrayBuilder> *arrow_builder,
-                             GType type=G_TYPE_INVALID);
+                             GType type = G_TYPE_INVALID);
 GArrowArrayBuilder *
 garrow_array_builder_new_raw(arrow::ArrayBuilder *arrow_builder,
-                             GType type=G_TYPE_INVALID);
+                             GType type = G_TYPE_INVALID);
 std::shared_ptr<arrow::ArrayBuilder>
 garrow_array_builder_get_raw(GArrowArrayBuilder *builder);
diff --git a/c_glib/arrow-glib/basic-array-definition.h b/c_glib/arrow-glib/basic-array-definition.h
index d504422b66689..803e63f58e4cf 100644
--- a/c_glib/arrow-glib/basic-array-definition.h
+++ b/c_glib/arrow-glib/basic-array-definition.h
@@ -24,22 +24,15 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_ARRAY (garrow_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowArray,
-                         garrow_array,
-                         GARROW,
-                         ARRAY,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowArray, garrow_array, GARROW, ARRAY, GObject)
 struct _GArrowArrayClass
 {
   GObjectClass parent_class;
 };
 
 #define GARROW_TYPE_EXTENSION_ARRAY (garrow_extension_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowExtensionArray,
-                         garrow_extension_array,
-                         GARROW,
-                         EXTENSION_ARRAY,
-                         GArrowArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowExtensionArray, garrow_extension_array, GARROW, EXTENSION_ARRAY, GArrowArray)
 struct _GArrowExtensionArrayClass
 {
   GArrowArrayClass parent_class;
diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp
index c0860662c0346..f102a252467a3 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -157,16 +157,16 @@ G_BEGIN_DECLS
  * you need to use #GArrowTime64ArrayBuilder to create a new array.
  *
  * #GArrowMonthIntervalArray is a class for the month intarval array.
- * It can store zero or more date data. If you don't have Arrow format 
- * data, you need to use #GArrowMonthIntervalArrayBuilder to create a 
+ * It can store zero or more date data. If you don't have Arrow format
+ * data, you need to use #GArrowMonthIntervalArrayBuilder to create a
  * new array.
  *
  * #GArrowDayTimeIntervalArray is a class for the day time intarval array.
- * It can store zero or more date data. If you don't have Arrow format 
- * data, you need to use #GArrowDayTimeIntervalArrayBuilder to create a 
+ * It can store zero or more date data. If you don't have Arrow format
+ * data, you need to use #GArrowDayTimeIntervalArrayBuilder to create a
  * new array.
  *
- * #GArrowMonthDayNanoIntervalArray is a class for the month day nano 
+ * #GArrowMonthDayNanoIntervalArray is a class for the month day nano
  * intarval array. It can store zero or more date data. If you don't
  * have Arrow format data, you need to use #GArrowMonthDayNanoIntervalArray
  * to create a new array.
@@ -185,7 +185,8 @@ G_BEGIN_DECLS
  * extension types.
  */
 
-typedef struct GArrowEqualOptionsPrivate_ {
+typedef struct GArrowEqualOptionsPrivate_
+{
   gboolean approx;
   arrow::EqualOptions options;
 } GArrowEqualOptionsPrivate;
@@ -196,14 +197,11 @@ enum {
   PROP_ABSOLUTE_TOLERANCE,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowEqualOptions,
-                           garrow_equal_options,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowEqualOptions, garrow_equal_options, G_TYPE_OBJECT)
 
-#define GARROW_EQUAL_OPTIONS_GET_PRIVATE(object) \
-  static_cast<GArrowEqualOptionsPrivate *>(      \
-    garrow_equal_options_get_instance_private(   \
-      GARROW_EQUAL_OPTIONS(object)))
+#define GARROW_EQUAL_OPTIONS_GET_PRIVATE(object)                                         \
+  static_cast<GArrowEqualOptionsPrivate *>(                                              \
+    garrow_equal_options_get_instance_private(GARROW_EQUAL_OPTIONS(object)))
 
 static void
 garrow_equal_options_finalize(GObject *object)
@@ -266,7 +264,7 @@ garrow_equal_options_init(GArrowEqualOptions *object)
 {
   auto priv = GARROW_EQUAL_OPTIONS_GET_PRIVATE(object);
   priv->approx = FALSE;
-  new(&priv->options) arrow::EqualOptions;
+  new (&priv->options) arrow::EqualOptions;
   priv->options = arrow::EqualOptions::Defaults();
 }
 
@@ -274,7 +272,7 @@ static void
 garrow_equal_options_class_init(GArrowEqualOptionsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->finalize     = garrow_equal_options_finalize;
+  gobject_class->finalize = garrow_equal_options_finalize;
   gobject_class->set_property = garrow_equal_options_set_property;
   gobject_class->get_property = garrow_equal_options_get_property;
 
@@ -356,8 +354,8 @@ garrow_equal_options_is_approx(GArrowEqualOptions *options)
   return priv->approx;
 }
 
-
-typedef struct GArrowArrayPrivate_ {
+typedef struct GArrowArrayPrivate_
+{
   std::shared_ptr<arrow::Array> array;
   GArrowDataType *value_data_type;
   GArrowBuffer *null_bitmap;
@@ -379,20 +377,15 @@ enum {
   PROP_PARENT,
 };
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowArray,
-                                    garrow_array,
-                                    G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowArray, garrow_array, G_TYPE_OBJECT)
 
-#define GARROW_ARRAY_GET_PRIVATE(obj)         \
-  static_cast<GArrowArrayPrivate *>(          \
-    garrow_array_get_instance_private(        \
-      GARROW_ARRAY(obj)))
+#define GARROW_ARRAY_GET_PRIVATE(obj)                                                    \
+  static_cast<GArrowArrayPrivate *>(garrow_array_get_instance_private(GARROW_ARRAY(obj)))
 
 G_END_DECLS
 template <typename T>
 const typename T::c_type *
-garrow_array_get_values_raw(std::shared_ptr<arrow::Array> arrow_array,
-                            gint64 *length)
+garrow_array_get_values_raw(std::shared_ptr<arrow::Array> arrow_array, gint64 *length)
 {
   auto arrow_specific_array =
     std::static_pointer_cast<typename arrow::TypeTraits<T>::ArrayType>(arrow_array);
@@ -512,7 +505,7 @@ static void
 garrow_array_init(GArrowArray *object)
 {
   auto priv = GARROW_ARRAY_GET_PRIVATE(object);
-  new(&priv->array) std::shared_ptr<arrow::Array>;
+  new (&priv->array) std::shared_ptr<arrow::Array>;
 }
 
 static void
@@ -520,57 +513,57 @@ garrow_array_class_init(GArrowArrayClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_array_dispose;
-  gobject_class->finalize     = garrow_array_finalize;
+  gobject_class->dispose = garrow_array_dispose;
+  gobject_class->finalize = garrow_array_finalize;
   gobject_class->set_property = garrow_array_set_property;
   gobject_class->get_property = garrow_array_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("array",
-                              "Array",
-                              "The raw std::shared<arrow::Array> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "array",
+    "Array",
+    "The raw std::shared<arrow::Array> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_ARRAY, spec);
 
-  spec = g_param_spec_object("value-data-type",
-                             "Value data type",
-                             "The data type of each value",
-                             GARROW_TYPE_DATA_TYPE,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "value-data-type",
+    "Value data type",
+    "The data type of each value",
+    GARROW_TYPE_DATA_TYPE,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUE_DATA_TYPE, spec);
 
-  spec = g_param_spec_object("null-bitmap",
-                             "NULL bitmap",
-                             "The NULL bitmap",
-                             GARROW_TYPE_BUFFER,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "null-bitmap",
+    "NULL bitmap",
+    "The NULL bitmap",
+    GARROW_TYPE_BUFFER,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_NULL_BITMAP, spec);
 
-  spec = g_param_spec_object("buffer1",
-                             "Buffer1",
-                             "The first buffer",
-                             GARROW_TYPE_BUFFER,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "buffer1",
+    "Buffer1",
+    "The first buffer",
+    GARROW_TYPE_BUFFER,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_BUFFER1, spec);
 
-  spec = g_param_spec_object("buffer2",
-                             "Buffer2",
-                             "The second buffer",
-                             GARROW_TYPE_BUFFER,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "buffer2",
+    "Buffer2",
+    "The second buffer",
+    GARROW_TYPE_BUFFER,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_BUFFER2, spec);
 
-  spec = g_param_spec_object("parent",
-                             "Parent",
-                             "The parent array",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "parent",
+    "Parent",
+    "The parent array",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_PARENT, spec);
 }
 
@@ -589,14 +582,11 @@ garrow_array_class_init(GArrowArrayClass *klass)
  * Since: 6.0.0
  */
 GArrowArray *
-garrow_array_import(gpointer c_abi_array,
-                    GArrowDataType *data_type,
-                    GError **error)
+garrow_array_import(gpointer c_abi_array, GArrowDataType *data_type, GError **error)
 {
   auto arrow_data_type = garrow_data_type_get_raw(data_type);
   auto arrow_array_result =
-    arrow::ImportArray(static_cast<ArrowArray *>(c_abi_array),
-                       arrow_data_type);
+    arrow::ImportArray(static_cast<ArrowArray *>(c_abi_array), arrow_data_type);
   if (garrow::check(error, arrow_array_result, "[array][import]")) {
     return garrow_array_new_raw(&(*arrow_array_result));
   } else {
@@ -635,8 +625,7 @@ garrow_array_export(GArrowArray *array,
                                 static_cast<ArrowArray *>(*c_abi_array),
                                 static_cast<ArrowSchema *>(*c_abi_schema));
   } else {
-    status = arrow::ExportArray(*arrow_array,
-                                static_cast<ArrowArray *>(*c_abi_array));
+    status = arrow::ExportArray(*arrow_array, static_cast<ArrowArray *>(*c_abi_array));
   }
   if (garrow::check(error, status, "[array][export]")) {
     return true;
@@ -901,15 +890,15 @@ garrow_array_get_value_type(GArrowArray *array)
  *   values with the base #GArrowArray.
  */
 GArrowArray *
-garrow_array_slice(GArrowArray *array,
-                   gint64 offset,
-                   gint64 length)
+garrow_array_slice(GArrowArray *array, gint64 offset, gint64 length)
 {
   const auto arrow_array = garrow_array_get_raw(array);
   auto arrow_sub_array = arrow_array->Slice(offset, length);
   return garrow_array_new_raw(&arrow_sub_array,
-                              "array", &arrow_sub_array,
-                              "parent", array,
+                              "array",
+                              &arrow_sub_array,
+                              "parent",
+                              array,
                               NULL);
 }
 
@@ -946,9 +935,7 @@ garrow_array_to_string(GArrowArray *array, GError **error)
  * Since: 0.15.0
  */
 GArrowArray *
-garrow_array_view(GArrowArray *array,
-                  GArrowDataType *return_type,
-                  GError **error)
+garrow_array_view(GArrowArray *array, GArrowDataType *return_type, GError **error)
 {
   auto arrow_array_raw = garrow_array_get_raw(array);
   auto arrow_return_type = garrow_data_type_get_raw(return_type);
@@ -979,8 +966,7 @@ garrow_array_diff_unified(GArrowArray *array, GArrowArray *other_array)
   const auto arrow_array = garrow_array_get_raw(array);
   const auto arrow_other_array = garrow_array_get_raw(other_array);
   std::stringstream diff;
-  arrow_array->Equals(arrow_other_array,
-                      arrow::EqualOptions().diff_sink(&diff));
+  arrow_array->Equals(arrow_other_array, arrow::EqualOptions().diff_sink(&diff));
   auto string = diff.str();
   if (string.empty()) {
     return NULL;
@@ -1001,9 +987,7 @@ garrow_array_diff_unified(GArrowArray *array, GArrowArray *other_array)
  * Since: 4.0.0
  */
 GArrowArray *
-garrow_array_concatenate(GArrowArray *array,
-                         GList *other_arrays,
-                         GError **error)
+garrow_array_concatenate(GArrowArray *array, GList *other_arrays, GError **error)
 {
   if (!other_arrays) {
     g_object_ref(array);
@@ -1016,19 +1000,14 @@ garrow_array_concatenate(GArrowArray *array,
     arrow_arrays.push_back(garrow_array_get_raw(other_array));
   }
   auto arrow_concatenated_array = arrow::Concatenate(arrow_arrays);
-  if (garrow::check(error,
-                    arrow_concatenated_array,
-                    "[array][concatenate]")) {
+  if (garrow::check(error, arrow_concatenated_array, "[array][concatenate]")) {
     return garrow_array_new_raw(&(*arrow_concatenated_array));
   } else {
     return NULL;
   }
 }
 
-
-G_DEFINE_TYPE(GArrowNullArray,
-              garrow_null_array,
-              GARROW_TYPE_ARRAY)
+G_DEFINE_TYPE(GArrowNullArray, garrow_null_array, GARROW_TYPE_ARRAY)
 
 static void
 garrow_null_array_init(GArrowNullArray *object)
@@ -1055,10 +1034,7 @@ garrow_null_array_new(gint64 length)
   return GARROW_NULL_ARRAY(array);
 }
 
-
-G_DEFINE_TYPE(GArrowPrimitiveArray,
-              garrow_primitive_array,
-              GARROW_TYPE_ARRAY)
+G_DEFINE_TYPE(GArrowPrimitiveArray, garrow_primitive_array, GARROW_TYPE_ARRAY)
 
 G_END_DECLS
 template <typename T>
@@ -1075,12 +1051,14 @@ garrow_primitive_array_new(gint64 length,
                                                                arrow_data,
                                                                arrow_bitmap,
                                                                n_nulls);
-  auto arrow_array =
-    std::static_pointer_cast<arrow::Array>(arrow_specific_array);
+  auto arrow_array = std::static_pointer_cast<arrow::Array>(arrow_specific_array);
   return garrow_array_new_raw(&arrow_array,
-                              "array", &arrow_array,
-                              "null-bitmap", null_bitmap,
-                              "buffer1", data,
+                              "array",
+                              &arrow_array,
+                              "null-bitmap",
+                              null_bitmap,
+                              "buffer1",
+                              data,
                               NULL);
 };
 
@@ -1101,12 +1079,14 @@ garrow_primitive_array_new(GArrowDataType *data_type,
                                                                arrow_data,
                                                                arrow_bitmap,
                                                                n_nulls);
-  auto arrow_array =
-    std::static_pointer_cast<arrow::Array>(arrow_specific_array);
+  auto arrow_array = std::static_pointer_cast<arrow::Array>(arrow_specific_array);
   return garrow_array_new_raw(&arrow_array,
-                              "array", &arrow_array,
-                              "null-bitmap", null_bitmap,
-                              "buffer1", data,
+                              "array",
+                              &arrow_array,
+                              "null-bitmap",
+                              null_bitmap,
+                              "buffer1",
+                              data,
                               NULL);
 };
 G_BEGIN_DECLS
@@ -1159,10 +1139,7 @@ garrow_primitive_array_get_data_buffer(GArrowPrimitiveArray *array)
   return garrow_buffer_new_raw(&arrow_data);
 }
 
-
-G_DEFINE_TYPE(GArrowBooleanArray,
-              garrow_boolean_array,
-              GARROW_TYPE_PRIMITIVE_ARRAY)
+G_DEFINE_TYPE(GArrowBooleanArray, garrow_boolean_array, GARROW_TYPE_PRIMITIVE_ARRAY)
 
 static void
 garrow_boolean_array_init(GArrowBooleanArray *object)
@@ -1195,10 +1172,8 @@ garrow_boolean_array_new(gint64 length,
                          GArrowBuffer *null_bitmap,
                          gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::BooleanType>(length,
-                                                              data,
-                                                              null_bitmap,
-                                                              n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::BooleanType>(length, data, null_bitmap, n_nulls);
   return GARROW_BOOLEAN_ARRAY(array);
 }
 
@@ -1210,8 +1185,7 @@ garrow_boolean_array_new(gint64 length,
  * Returns: The @i-th value.
  */
 gboolean
-garrow_boolean_array_get_value(GArrowBooleanArray *array,
-                               gint64 i)
+garrow_boolean_array_get_value(GArrowBooleanArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::BooleanArray *>(arrow_array.get())->Value(i);
@@ -1228,12 +1202,10 @@ garrow_boolean_array_get_value(GArrowBooleanArray *array,
  *   It should be freed with g_free() when no longer needed.
  */
 gboolean *
-garrow_boolean_array_get_values(GArrowBooleanArray *array,
-                                gint64 *length)
+garrow_boolean_array_get_values(GArrowBooleanArray *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto arrow_boolean_array =
-    std::static_pointer_cast<arrow::BooleanArray>(arrow_array);
+  auto arrow_boolean_array = std::static_pointer_cast<arrow::BooleanArray>(arrow_array);
   *length = arrow_boolean_array->length();
   auto values = static_cast<gboolean *>(g_new(gboolean, *length));
   for (gint64 i = 0; i < *length; ++i) {
@@ -1242,10 +1214,7 @@ garrow_boolean_array_get_values(GArrowBooleanArray *array,
   return values;
 }
 
-
-G_DEFINE_TYPE(GArrowNumericArray,
-              garrow_numeric_array,
-              GARROW_TYPE_PRIMITIVE_ARRAY)
+G_DEFINE_TYPE(GArrowNumericArray, garrow_numeric_array, GARROW_TYPE_PRIMITIVE_ARRAY)
 
 static void
 garrow_numeric_array_init(GArrowNumericArray *object)
@@ -1257,10 +1226,7 @@ garrow_numeric_array_class_init(GArrowNumericArrayClass *klass)
 {
 }
 
-
-G_DEFINE_TYPE(GArrowInt8Array,
-              garrow_int8_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowInt8Array, garrow_int8_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_int8_array_init(GArrowInt8Array *object)
@@ -1293,10 +1259,8 @@ garrow_int8_array_new(gint64 length,
                       GArrowBuffer *null_bitmap,
                       gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::Int8Type>(length,
-                                                           data,
-                                                           null_bitmap,
-                                                           n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::Int8Type>(length, data, null_bitmap, n_nulls);
   return GARROW_INT8_ARRAY(array);
 }
 
@@ -1308,8 +1272,7 @@ garrow_int8_array_new(gint64 length,
  * Returns: The @i-th value.
  */
 gint8
-garrow_int8_array_get_value(GArrowInt8Array *array,
-                            gint64 i)
+garrow_int8_array_get_value(GArrowInt8Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::Int8Array *>(arrow_array.get())->Value(i);
@@ -1323,16 +1286,13 @@ garrow_int8_array_get_value(GArrowInt8Array *array,
  * Returns: (array length=length): The raw values.
  */
 const gint8 *
-garrow_int8_array_get_values(GArrowInt8Array *array,
-                             gint64 *length)
+garrow_int8_array_get_values(GArrowInt8Array *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return garrow_array_get_values_raw<arrow::Int8Type>(arrow_array, length);
 }
 
-G_DEFINE_TYPE(GArrowUInt8Array,
-              garrow_uint8_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowUInt8Array, garrow_uint8_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_uint8_array_init(GArrowUInt8Array *object)
@@ -1365,10 +1325,8 @@ garrow_uint8_array_new(gint64 length,
                        GArrowBuffer *null_bitmap,
                        gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::UInt8Type>(length,
-                                                            data,
-                                                            null_bitmap,
-                                                            n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::UInt8Type>(length, data, null_bitmap, n_nulls);
   return GARROW_UINT8_ARRAY(array);
 }
 
@@ -1380,8 +1338,7 @@ garrow_uint8_array_new(gint64 length,
  * Returns: The @i-th value.
  */
 guint8
-garrow_uint8_array_get_value(GArrowUInt8Array *array,
-                             gint64 i)
+garrow_uint8_array_get_value(GArrowUInt8Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::UInt8Array *>(arrow_array.get())->Value(i);
@@ -1395,17 +1352,13 @@ garrow_uint8_array_get_value(GArrowUInt8Array *array,
  * Returns: (array length=length): The raw values.
  */
 const guint8 *
-garrow_uint8_array_get_values(GArrowUInt8Array *array,
-                              gint64 *length)
+garrow_uint8_array_get_values(GArrowUInt8Array *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return garrow_array_get_values_raw<arrow::UInt8Type>(arrow_array, length);
 }
 
-
-G_DEFINE_TYPE(GArrowInt16Array,
-              garrow_int16_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowInt16Array, garrow_int16_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_int16_array_init(GArrowInt16Array *object)
@@ -1438,10 +1391,8 @@ garrow_int16_array_new(gint64 length,
                        GArrowBuffer *null_bitmap,
                        gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::Int16Type>(length,
-                                                            data,
-                                                            null_bitmap,
-                                                            n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::Int16Type>(length, data, null_bitmap, n_nulls);
   return GARROW_INT16_ARRAY(array);
 }
 
@@ -1453,8 +1404,7 @@ garrow_int16_array_new(gint64 length,
  * Returns: The @i-th value.
  */
 gint16
-garrow_int16_array_get_value(GArrowInt16Array *array,
-                             gint64 i)
+garrow_int16_array_get_value(GArrowInt16Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::Int16Array *>(arrow_array.get())->Value(i);
@@ -1468,17 +1418,13 @@ garrow_int16_array_get_value(GArrowInt16Array *array,
  * Returns: (array length=length): The raw values.
  */
 const gint16 *
-garrow_int16_array_get_values(GArrowInt16Array *array,
-                              gint64 *length)
+garrow_int16_array_get_values(GArrowInt16Array *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return garrow_array_get_values_raw<arrow::Int16Type>(arrow_array, length);
 }
 
-
-G_DEFINE_TYPE(GArrowUInt16Array,
-              garrow_uint16_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowUInt16Array, garrow_uint16_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_uint16_array_init(GArrowUInt16Array *object)
@@ -1511,10 +1457,8 @@ garrow_uint16_array_new(gint64 length,
                         GArrowBuffer *null_bitmap,
                         gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::UInt16Type>(length,
-                                                             data,
-                                                             null_bitmap,
-                                                             n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::UInt16Type>(length, data, null_bitmap, n_nulls);
   return GARROW_UINT16_ARRAY(array);
 }
 
@@ -1526,8 +1470,7 @@ garrow_uint16_array_new(gint64 length,
  * Returns: The @i-th value.
  */
 guint16
-garrow_uint16_array_get_value(GArrowUInt16Array *array,
-                              gint64 i)
+garrow_uint16_array_get_value(GArrowUInt16Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::UInt16Array *>(arrow_array.get())->Value(i);
@@ -1541,17 +1484,13 @@ garrow_uint16_array_get_value(GArrowUInt16Array *array,
  * Returns: (array length=length): The raw values.
  */
 const guint16 *
-garrow_uint16_array_get_values(GArrowUInt16Array *array,
-                               gint64 *length)
+garrow_uint16_array_get_values(GArrowUInt16Array *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return garrow_array_get_values_raw<arrow::UInt16Type>(arrow_array, length);
 }
 
-
-G_DEFINE_TYPE(GArrowInt32Array,
-              garrow_int32_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowInt32Array, garrow_int32_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_int32_array_init(GArrowInt32Array *object)
@@ -1584,10 +1523,8 @@ garrow_int32_array_new(gint64 length,
                        GArrowBuffer *null_bitmap,
                        gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::Int32Type>(length,
-                                                            data,
-                                                            null_bitmap,
-                                                            n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::Int32Type>(length, data, null_bitmap, n_nulls);
   return GARROW_INT32_ARRAY(array);
 }
 
@@ -1599,8 +1536,7 @@ garrow_int32_array_new(gint64 length,
  * Returns: The @i-th value.
  */
 gint32
-garrow_int32_array_get_value(GArrowInt32Array *array,
-                             gint64 i)
+garrow_int32_array_get_value(GArrowInt32Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::Int32Array *>(arrow_array.get())->Value(i);
@@ -1614,17 +1550,13 @@ garrow_int32_array_get_value(GArrowInt32Array *array,
  * Returns: (array length=length): The raw values.
  */
 const gint32 *
-garrow_int32_array_get_values(GArrowInt32Array *array,
-                              gint64 *length)
+garrow_int32_array_get_values(GArrowInt32Array *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return garrow_array_get_values_raw<arrow::Int32Type>(arrow_array, length);
 }
 
-
-G_DEFINE_TYPE(GArrowUInt32Array,
-              garrow_uint32_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowUInt32Array, garrow_uint32_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_uint32_array_init(GArrowUInt32Array *object)
@@ -1657,10 +1589,8 @@ garrow_uint32_array_new(gint64 length,
                         GArrowBuffer *null_bitmap,
                         gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::UInt32Type>(length,
-                                                             data,
-                                                             null_bitmap,
-                                                             n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::UInt32Type>(length, data, null_bitmap, n_nulls);
   return GARROW_UINT32_ARRAY(array);
 }
 
@@ -1672,8 +1602,7 @@ garrow_uint32_array_new(gint64 length,
  * Returns: The @i-th value.
  */
 guint32
-garrow_uint32_array_get_value(GArrowUInt32Array *array,
-                              gint64 i)
+garrow_uint32_array_get_value(GArrowUInt32Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::UInt32Array *>(arrow_array.get())->Value(i);
@@ -1687,17 +1616,13 @@ garrow_uint32_array_get_value(GArrowUInt32Array *array,
  * Returns: (array length=length): The raw values.
  */
 const guint32 *
-garrow_uint32_array_get_values(GArrowUInt32Array *array,
-                               gint64 *length)
+garrow_uint32_array_get_values(GArrowUInt32Array *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return garrow_array_get_values_raw<arrow::UInt32Type>(arrow_array, length);
 }
 
-
-G_DEFINE_TYPE(GArrowInt64Array,
-              garrow_int64_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowInt64Array, garrow_int64_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_int64_array_init(GArrowInt64Array *object)
@@ -1730,10 +1655,8 @@ garrow_int64_array_new(gint64 length,
                        GArrowBuffer *null_bitmap,
                        gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::Int64Type>(length,
-                                                            data,
-                                                            null_bitmap,
-                                                            n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::Int64Type>(length, data, null_bitmap, n_nulls);
   return GARROW_INT64_ARRAY(array);
 }
 
@@ -1745,8 +1668,7 @@ garrow_int64_array_new(gint64 length,
  * Returns: The @i-th value.
  */
 gint64
-garrow_int64_array_get_value(GArrowInt64Array *array,
-                             gint64 i)
+garrow_int64_array_get_value(GArrowInt64Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::Int64Array *>(arrow_array.get())->Value(i);
@@ -1760,19 +1682,14 @@ garrow_int64_array_get_value(GArrowInt64Array *array,
  * Returns: (array length=length): The raw values.
  */
 const gint64 *
-garrow_int64_array_get_values(GArrowInt64Array *array,
-                              gint64 *length)
+garrow_int64_array_get_values(GArrowInt64Array *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto values =
-    garrow_array_get_values_raw<arrow::Int64Type>(arrow_array, length);
+  auto values = garrow_array_get_values_raw<arrow::Int64Type>(arrow_array, length);
   return reinterpret_cast<const gint64 *>(values);
 }
 
-
-G_DEFINE_TYPE(GArrowUInt64Array,
-              garrow_uint64_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowUInt64Array, garrow_uint64_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_uint64_array_init(GArrowUInt64Array *object)
@@ -1805,10 +1722,8 @@ garrow_uint64_array_new(gint64 length,
                         GArrowBuffer *null_bitmap,
                         gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::UInt64Type>(length,
-                                                             data,
-                                                             null_bitmap,
-                                                             n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::UInt64Type>(length, data, null_bitmap, n_nulls);
   return GARROW_UINT64_ARRAY(array);
 }
 
@@ -1820,8 +1735,7 @@ garrow_uint64_array_new(gint64 length,
  * Returns: The @i-th value.
  */
 guint64
-garrow_uint64_array_get_value(GArrowUInt64Array *array,
-                              gint64 i)
+garrow_uint64_array_get_value(GArrowUInt64Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::UInt64Array *>(arrow_array.get())->Value(i);
@@ -1835,19 +1749,14 @@ garrow_uint64_array_get_value(GArrowUInt64Array *array,
  * Returns: (array length=length): The raw values.
  */
 const guint64 *
-garrow_uint64_array_get_values(GArrowUInt64Array *array,
-                               gint64 *length)
+garrow_uint64_array_get_values(GArrowUInt64Array *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto values =
-    garrow_array_get_values_raw<arrow::UInt64Type>(arrow_array, length);
+  auto values = garrow_array_get_values_raw<arrow::UInt64Type>(arrow_array, length);
   return reinterpret_cast<const guint64 *>(values);
 }
 
-
-G_DEFINE_TYPE(GArrowHalfFloatArray,
-              garrow_half_float_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowHalfFloatArray, garrow_half_float_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_half_float_array_init(GArrowHalfFloatArray *object)
@@ -1880,10 +1789,8 @@ garrow_half_float_array_new(gint64 length,
                             GArrowBuffer *null_bitmap,
                             gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::HalfFloatType>(length,
-                                                                data,
-                                                                null_bitmap,
-                                                                n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::HalfFloatType>(length, data, null_bitmap, n_nulls);
   return GARROW_HALF_FLOAT_ARRAY(array);
 }
 
@@ -1897,8 +1804,7 @@ garrow_half_float_array_new(gint64 length,
  * Since: 11.0.0
  */
 guint16
-garrow_half_float_array_get_value(GArrowHalfFloatArray *array,
-                                  gint64 i)
+garrow_half_float_array_get_value(GArrowHalfFloatArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return std::static_pointer_cast<arrow::HalfFloatArray>(arrow_array)->Value(i);
@@ -1914,17 +1820,13 @@ garrow_half_float_array_get_value(GArrowHalfFloatArray *array,
  * Since: 11.0.0
  */
 const guint16 *
-garrow_half_float_array_get_values(GArrowHalfFloatArray *array,
-                                   gint64 *length)
+garrow_half_float_array_get_values(GArrowHalfFloatArray *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return garrow_array_get_values_raw<arrow::HalfFloatType>(arrow_array, length);
 }
 
-
-G_DEFINE_TYPE(GArrowFloatArray,
-              garrow_float_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowFloatArray, garrow_float_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_float_array_init(GArrowFloatArray *object)
@@ -1957,10 +1859,8 @@ garrow_float_array_new(gint64 length,
                        GArrowBuffer *null_bitmap,
                        gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::FloatType>(length,
-                                                            data,
-                                                            null_bitmap,
-                                                            n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::FloatType>(length, data, null_bitmap, n_nulls);
   return GARROW_FLOAT_ARRAY(array);
 }
 
@@ -1972,8 +1872,7 @@ garrow_float_array_new(gint64 length,
  * Returns: The @i-th value.
  */
 gfloat
-garrow_float_array_get_value(GArrowFloatArray *array,
-                             gint64 i)
+garrow_float_array_get_value(GArrowFloatArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::FloatArray *>(arrow_array.get())->Value(i);
@@ -1987,17 +1886,13 @@ garrow_float_array_get_value(GArrowFloatArray *array,
  * Returns: (array length=length): The raw values.
  */
 const gfloat *
-garrow_float_array_get_values(GArrowFloatArray *array,
-                              gint64 *length)
+garrow_float_array_get_values(GArrowFloatArray *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return garrow_array_get_values_raw<arrow::FloatType>(arrow_array, length);
 }
 
-
-G_DEFINE_TYPE(GArrowDoubleArray,
-              garrow_double_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowDoubleArray, garrow_double_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_double_array_init(GArrowDoubleArray *object)
@@ -2030,10 +1925,8 @@ garrow_double_array_new(gint64 length,
                         GArrowBuffer *null_bitmap,
                         gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::DoubleType>(length,
-                                                             data,
-                                                             null_bitmap,
-                                                             n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::DoubleType>(length, data, null_bitmap, n_nulls);
   return GARROW_DOUBLE_ARRAY(array);
 }
 
@@ -2045,8 +1938,7 @@ garrow_double_array_new(gint64 length,
  * Returns: The @i-th value.
  */
 gdouble
-garrow_double_array_get_value(GArrowDoubleArray *array,
-                              gint64 i)
+garrow_double_array_get_value(GArrowDoubleArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::DoubleArray *>(arrow_array.get())->Value(i);
@@ -2060,14 +1952,12 @@ garrow_double_array_get_value(GArrowDoubleArray *array,
  * Returns: (array length=length): The raw values.
  */
 const gdouble *
-garrow_double_array_get_values(GArrowDoubleArray *array,
-                               gint64 *length)
+garrow_double_array_get_values(GArrowDoubleArray *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return garrow_array_get_values_raw<arrow::DoubleType>(arrow_array, length);
 }
 
-
 G_END_DECLS
 template <typename BINARY_ARRAY_CLASS>
 GArrowArray *
@@ -2080,30 +1970,30 @@ garrow_base_binary_array_new(gint64 length,
   const auto arrow_value_offsets = garrow_buffer_get_raw(value_offsets);
   const auto arrow_value_data = garrow_buffer_get_raw(value_data);
   const auto arrow_null_bitmap = garrow_buffer_get_raw(null_bitmap);
-  auto arrow_binary_array =
-    std::make_shared<BINARY_ARRAY_CLASS>(length,
-                                         arrow_value_offsets,
-                                         arrow_value_data,
-                                         arrow_null_bitmap,
-                                         n_nulls);
-  auto arrow_array =
-    std::static_pointer_cast<arrow::Array>(arrow_binary_array);
+  auto arrow_binary_array = std::make_shared<BINARY_ARRAY_CLASS>(length,
+                                                                 arrow_value_offsets,
+                                                                 arrow_value_data,
+                                                                 arrow_null_bitmap,
+                                                                 n_nulls);
+  auto arrow_array = std::static_pointer_cast<arrow::Array>(arrow_binary_array);
   return garrow_array_new_raw(&arrow_array,
-                              "array", &arrow_array,
-                              "null-bitmap", null_bitmap,
-                              "buffer1", value_offsets,
-                              "buffer2", value_data,
+                              "array",
+                              &arrow_array,
+                              "null-bitmap",
+                              null_bitmap,
+                              "buffer1",
+                              value_offsets,
+                              "buffer2",
+                              value_data,
                               NULL);
 };
 
 template <typename BINARY_ARRAY_CLASS>
 GBytes *
-garrow_base_binary_array_get_value(GArrowArray *array,
-                                   gint64 i)
+garrow_base_binary_array_get_value(GArrowArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(array);
-  auto arrow_binary_array =
-    std::static_pointer_cast<BINARY_ARRAY_CLASS>(arrow_array);
+  auto arrow_binary_array = std::static_pointer_cast<BINARY_ARRAY_CLASS>(arrow_array);
   auto view = arrow_binary_array->GetView(i);
   return g_bytes_new_static(view.data(), view.length());
 };
@@ -2119,8 +2009,7 @@ garrow_base_binary_array_get_data_buffer(GArrowArray *array)
   }
 
   auto arrow_array = garrow_array_get_raw(array);
-  auto arrow_binary_array =
-    std::static_pointer_cast<BINARY_ARRAY_CLASS>(arrow_array);
+  auto arrow_binary_array = std::static_pointer_cast<BINARY_ARRAY_CLASS>(arrow_array);
   auto arrow_data = arrow_binary_array->value_data();
   return garrow_buffer_new_raw(&arrow_data);
 };
@@ -2136,16 +2025,13 @@ garrow_base_binary_array_get_offsets_buffer(GArrowArray *array)
   }
 
   auto arrow_array = garrow_array_get_raw(array);
-  auto arrow_binary_array =
-    std::static_pointer_cast<BINARY_ARRAY_CLASS>(arrow_array);
+  auto arrow_binary_array = std::static_pointer_cast<BINARY_ARRAY_CLASS>(arrow_array);
   auto arrow_offsets = arrow_binary_array->value_offsets();
   return garrow_buffer_new_raw(&arrow_offsets);
 };
 G_BEGIN_DECLS
 
-G_DEFINE_TYPE(GArrowBinaryArray,
-              garrow_binary_array,
-              GARROW_TYPE_ARRAY)
+G_DEFINE_TYPE(GArrowBinaryArray, garrow_binary_array, GARROW_TYPE_ARRAY)
 
 static void
 garrow_binary_array_init(GArrowBinaryArray *object)
@@ -2180,12 +2066,11 @@ garrow_binary_array_new(gint64 length,
                         GArrowBuffer *null_bitmap,
                         gint64 n_nulls)
 {
-  auto binary_array =
-    garrow_base_binary_array_new<arrow::BinaryArray>(length,
-                                                     value_offsets,
-                                                     value_data,
-                                                     null_bitmap,
-                                                     n_nulls);
+  auto binary_array = garrow_base_binary_array_new<arrow::BinaryArray>(length,
+                                                                       value_offsets,
+                                                                       value_data,
+                                                                       null_bitmap,
+                                                                       n_nulls);
   return GARROW_BINARY_ARRAY(binary_array);
 }
 
@@ -2197,11 +2082,9 @@ garrow_binary_array_new(gint64 length,
  * Returns: (transfer full): The @i-th value.
  */
 GBytes *
-garrow_binary_array_get_value(GArrowBinaryArray *array,
-                              gint64 i)
+garrow_binary_array_get_value(GArrowBinaryArray *array, gint64 i)
 {
-  return garrow_base_binary_array_get_value<arrow::BinaryArray>(
-    GARROW_ARRAY(array), i);
+  return garrow_base_binary_array_get_value<arrow::BinaryArray>(GARROW_ARRAY(array), i);
 }
 
 /**
@@ -2246,10 +2129,7 @@ garrow_binary_array_get_offsets_buffer(GArrowBinaryArray *array)
     GARROW_ARRAY(array));
 }
 
-
-G_DEFINE_TYPE(GArrowLargeBinaryArray,
-              garrow_large_binary_array,
-              GARROW_TYPE_ARRAY)
+G_DEFINE_TYPE(GArrowLargeBinaryArray, garrow_large_binary_array, GARROW_TYPE_ARRAY)
 
 static void
 garrow_large_binary_array_init(GArrowLargeBinaryArray *object)
@@ -2303,11 +2183,10 @@ garrow_large_binary_array_new(gint64 length,
  * Since: 0.16.0
  */
 GBytes *
-garrow_large_binary_array_get_value(GArrowLargeBinaryArray *array,
-                                    gint64 i)
+garrow_large_binary_array_get_value(GArrowLargeBinaryArray *array, gint64 i)
 {
-  return garrow_base_binary_array_get_value<arrow::LargeBinaryArray>(
-    GARROW_ARRAY(array), i);
+  return garrow_base_binary_array_get_value<arrow::LargeBinaryArray>(GARROW_ARRAY(array),
+                                                                     i);
 }
 
 /**
@@ -2356,24 +2235,19 @@ garrow_large_binary_array_get_offsets_buffer(GArrowLargeBinaryArray *array)
     GARROW_ARRAY(array));
 }
 
-
 G_END_DECLS
 template <typename STRING_ARRAY_CLASS>
 gchar *
-garrow_base_string_array_get_value(GArrowArray *array,
-                                   gint64 i)
+garrow_base_string_array_get_value(GArrowArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(array);
-  auto arrow_string_array =
-    std::static_pointer_cast<STRING_ARRAY_CLASS>(arrow_array);
+  auto arrow_string_array = std::static_pointer_cast<STRING_ARRAY_CLASS>(arrow_array);
   auto view = arrow_string_array->GetView(i);
   return g_strndup(view.data(), view.length());
 };
 G_BEGIN_DECLS
 
-G_DEFINE_TYPE(GArrowStringArray,
-              garrow_string_array,
-              GARROW_TYPE_BINARY_ARRAY)
+G_DEFINE_TYPE(GArrowStringArray, garrow_string_array, GARROW_TYPE_BINARY_ARRAY)
 
 static void
 garrow_string_array_init(GArrowStringArray *object)
@@ -2408,12 +2282,11 @@ garrow_string_array_new(gint64 length,
                         GArrowBuffer *null_bitmap,
                         gint64 n_nulls)
 {
-  auto string_array =
-    garrow_base_binary_array_new<arrow::StringArray>(length,
-                                                     value_offsets,
-                                                     value_data,
-                                                     null_bitmap,
-                                                     n_nulls);
+  auto string_array = garrow_base_binary_array_new<arrow::StringArray>(length,
+                                                                       value_offsets,
+                                                                       value_data,
+                                                                       null_bitmap,
+                                                                       n_nulls);
   return GARROW_STRING_ARRAY(string_array);
 }
 
@@ -2425,14 +2298,11 @@ garrow_string_array_new(gint64 length,
  * Returns: The @i-th UTF-8 encoded string.
  */
 gchar *
-garrow_string_array_get_string(GArrowStringArray *array,
-                               gint64 i)
+garrow_string_array_get_string(GArrowStringArray *array, gint64 i)
 {
-  return garrow_base_string_array_get_value<arrow::StringArray>(
-    GARROW_ARRAY(array), i);
+  return garrow_base_string_array_get_value<arrow::StringArray>(GARROW_ARRAY(array), i);
 }
 
-
 G_DEFINE_TYPE(GArrowLargeStringArray,
               garrow_large_string_array,
               GARROW_TYPE_LARGE_BINARY_ARRAY)
@@ -2489,17 +2359,13 @@ garrow_large_string_array_new(gint64 length,
  * Since: 0.16.0
  */
 gchar *
-garrow_large_string_array_get_string(GArrowLargeStringArray *array,
-                                     gint64 i)
+garrow_large_string_array_get_string(GArrowLargeStringArray *array, gint64 i)
 {
-  return garrow_base_string_array_get_value<arrow::LargeStringArray>(
-    GARROW_ARRAY(array), i);
+  return garrow_base_string_array_get_value<arrow::LargeStringArray>(GARROW_ARRAY(array),
+                                                                     i);
 }
 
-
-G_DEFINE_TYPE(GArrowDate32Array,
-              garrow_date32_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowDate32Array, garrow_date32_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_date32_array_init(GArrowDate32Array *object)
@@ -2532,10 +2398,8 @@ garrow_date32_array_new(gint64 length,
                         GArrowBuffer *null_bitmap,
                         gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::Date32Type>(length,
-                                                             data,
-                                                             null_bitmap,
-                                                             n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::Date32Type>(length, data, null_bitmap, n_nulls);
   return GARROW_DATE32_ARRAY(array);
 }
 
@@ -2549,8 +2413,7 @@ garrow_date32_array_new(gint64 length,
  * Since: 0.7.0
  */
 gint32
-garrow_date32_array_get_value(GArrowDate32Array *array,
-                              gint64 i)
+garrow_date32_array_get_value(GArrowDate32Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::Date32Array *>(arrow_array.get())->Value(i);
@@ -2566,17 +2429,13 @@ garrow_date32_array_get_value(GArrowDate32Array *array,
  * Since: 0.7.0
  */
 const gint32 *
-garrow_date32_array_get_values(GArrowDate32Array *array,
-                               gint64 *length)
+garrow_date32_array_get_values(GArrowDate32Array *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return garrow_array_get_values_raw<arrow::Date32Type>(arrow_array, length);
 }
 
-
-G_DEFINE_TYPE(GArrowDate64Array,
-              garrow_date64_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowDate64Array, garrow_date64_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_date64_array_init(GArrowDate64Array *object)
@@ -2609,10 +2468,8 @@ garrow_date64_array_new(gint64 length,
                         GArrowBuffer *null_bitmap,
                         gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::Date64Type>(length,
-                                                             data,
-                                                             null_bitmap,
-                                                             n_nulls);
+  auto array =
+    garrow_primitive_array_new<arrow::Date64Type>(length, data, null_bitmap, n_nulls);
   return GARROW_DATE64_ARRAY(array);
 }
 
@@ -2626,8 +2483,7 @@ garrow_date64_array_new(gint64 length,
  * Since: 0.7.0
  */
 gint64
-garrow_date64_array_get_value(GArrowDate64Array *array,
-                              gint64 i)
+garrow_date64_array_get_value(GArrowDate64Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::Date64Array *>(arrow_array.get())->Value(i);
@@ -2643,19 +2499,14 @@ garrow_date64_array_get_value(GArrowDate64Array *array,
  * Since: 0.7.0
  */
 const gint64 *
-garrow_date64_array_get_values(GArrowDate64Array *array,
-                               gint64 *length)
+garrow_date64_array_get_values(GArrowDate64Array *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto values =
-    garrow_array_get_values_raw<arrow::Date64Type>(arrow_array, length);
+  auto values = garrow_array_get_values_raw<arrow::Date64Type>(arrow_array, length);
   return reinterpret_cast<const gint64 *>(values);
 }
 
-
-G_DEFINE_TYPE(GArrowTimestampArray,
-              garrow_timestamp_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowTimestampArray, garrow_timestamp_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_timestamp_array_init(GArrowTimestampArray *object)
@@ -2709,8 +2560,7 @@ garrow_timestamp_array_new(GArrowTimestampDataType *data_type,
  * Since: 0.7.0
  */
 gint64
-garrow_timestamp_array_get_value(GArrowTimestampArray *array,
-                                 gint64 i)
+garrow_timestamp_array_get_value(GArrowTimestampArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::TimestampArray *>(arrow_array.get())->Value(i);
@@ -2726,19 +2576,14 @@ garrow_timestamp_array_get_value(GArrowTimestampArray *array,
  * Since: 0.7.0
  */
 const gint64 *
-garrow_timestamp_array_get_values(GArrowTimestampArray *array,
-                                  gint64 *length)
+garrow_timestamp_array_get_values(GArrowTimestampArray *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto values =
-    garrow_array_get_values_raw<arrow::TimestampType>(arrow_array, length);
+  auto values = garrow_array_get_values_raw<arrow::TimestampType>(arrow_array, length);
   return reinterpret_cast<const gint64 *>(values);
 }
 
-
-G_DEFINE_TYPE(GArrowTime32Array,
-              garrow_time32_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowTime32Array, garrow_time32_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_time32_array_init(GArrowTime32Array *object)
@@ -2773,12 +2618,11 @@ garrow_time32_array_new(GArrowTime32DataType *data_type,
                         GArrowBuffer *null_bitmap,
                         gint64 n_nulls)
 {
-  auto array =
-    garrow_primitive_array_new<arrow::Time32Type>(GARROW_DATA_TYPE(data_type),
-                                                  length,
-                                                  data,
-                                                  null_bitmap,
-                                                  n_nulls);
+  auto array = garrow_primitive_array_new<arrow::Time32Type>(GARROW_DATA_TYPE(data_type),
+                                                             length,
+                                                             data,
+                                                             null_bitmap,
+                                                             n_nulls);
   return GARROW_TIME32_ARRAY(array);
 }
 
@@ -2792,8 +2636,7 @@ garrow_time32_array_new(GArrowTime32DataType *data_type,
  * Since: 0.7.0
  */
 gint32
-garrow_time32_array_get_value(GArrowTime32Array *array,
-                              gint64 i)
+garrow_time32_array_get_value(GArrowTime32Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::Time32Array *>(arrow_array.get())->Value(i);
@@ -2809,17 +2652,13 @@ garrow_time32_array_get_value(GArrowTime32Array *array,
  * Since: 0.7.0
  */
 const gint32 *
-garrow_time32_array_get_values(GArrowTime32Array *array,
-                               gint64 *length)
+garrow_time32_array_get_values(GArrowTime32Array *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return garrow_array_get_values_raw<arrow::Time32Type>(arrow_array, length);
 }
 
-
-G_DEFINE_TYPE(GArrowTime64Array,
-              garrow_time64_array,
-              GARROW_TYPE_NUMERIC_ARRAY)
+G_DEFINE_TYPE(GArrowTime64Array, garrow_time64_array, GARROW_TYPE_NUMERIC_ARRAY)
 
 static void
 garrow_time64_array_init(GArrowTime64Array *object)
@@ -2854,12 +2693,11 @@ garrow_time64_array_new(GArrowTime64DataType *data_type,
                         GArrowBuffer *null_bitmap,
                         gint64 n_nulls)
 {
-  auto array =
-    garrow_primitive_array_new<arrow::Time64Type>(GARROW_DATA_TYPE(data_type),
-                                                  length,
-                                                  data,
-                                                  null_bitmap,
-                                                  n_nulls);
+  auto array = garrow_primitive_array_new<arrow::Time64Type>(GARROW_DATA_TYPE(data_type),
+                                                             length,
+                                                             data,
+                                                             null_bitmap,
+                                                             n_nulls);
   return GARROW_TIME64_ARRAY(array);
 }
 
@@ -2873,8 +2711,7 @@ garrow_time64_array_new(GArrowTime64DataType *data_type,
  * Since: 0.7.0
  */
 gint64
-garrow_time64_array_get_value(GArrowTime64Array *array,
-                              gint64 i)
+garrow_time64_array_get_value(GArrowTime64Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::Time64Array *>(arrow_array.get())->Value(i);
@@ -2890,16 +2727,13 @@ garrow_time64_array_get_value(GArrowTime64Array *array,
  * Since: 0.7.0
  */
 const gint64 *
-garrow_time64_array_get_values(GArrowTime64Array *array,
-                               gint64 *length)
+garrow_time64_array_get_values(GArrowTime64Array *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto values =
-    garrow_array_get_values_raw<arrow::Time64Type>(arrow_array, length);
+  auto values = garrow_array_get_values_raw<arrow::Time64Type>(arrow_array, length);
   return reinterpret_cast<const gint64 *>(values);
 }
 
-
 G_DEFINE_TYPE(GArrowMonthIntervalArray,
               garrow_month_interval_array,
               GARROW_TYPE_NUMERIC_ARRAY)
@@ -2952,8 +2786,7 @@ garrow_month_interval_array_new(gint64 length,
  * Since: 8.0.0
  */
 gint32
-garrow_month_interval_array_get_value(GArrowMonthIntervalArray *array,
-                                      gint64 i)
+garrow_month_interval_array_get_value(GArrowMonthIntervalArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   return static_cast<arrow::MonthIntervalArray *>(arrow_array.get())->Value(i);
@@ -2969,15 +2802,12 @@ garrow_month_interval_array_get_value(GArrowMonthIntervalArray *array,
  * Since: 8.0.0
  */
 const gint32 *
-garrow_month_interval_array_get_values(GArrowMonthIntervalArray *array,
-                                       gint64 *length)
+garrow_month_interval_array_get_values(GArrowMonthIntervalArray *array, gint64 *length)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  return garrow_array_get_values_raw<arrow::MonthIntervalType>(
-    arrow_array, length);
+  return garrow_array_get_values_raw<arrow::MonthIntervalType>(arrow_array, length);
 }
 
-
 G_DEFINE_TYPE(GArrowDayTimeIntervalArray,
               garrow_day_time_interval_array,
               GARROW_TYPE_PRIMITIVE_ARRAY)
@@ -3013,11 +2843,10 @@ garrow_day_time_interval_array_new(gint64 length,
                                    GArrowBuffer *null_bitmap,
                                    gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::DayTimeIntervalType>(
-    length,
-    data,
-    null_bitmap,
-    n_nulls);
+  auto array = garrow_primitive_array_new<arrow::DayTimeIntervalType>(length,
+                                                                      data,
+                                                                      null_bitmap,
+                                                                      n_nulls);
   return GARROW_DAY_TIME_INTERVAL_ARRAY(array);
 }
 
@@ -3031,8 +2860,7 @@ garrow_day_time_interval_array_new(gint64 length,
  * Since: 8.0.0
  */
 GArrowDayMillisecond *
-garrow_day_time_interval_array_get_value(GArrowDayTimeIntervalArray *array,
-                                         gint64 i)
+garrow_day_time_interval_array_get_value(GArrowDayTimeIntervalArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_day_time_interval_array =
@@ -3070,14 +2898,12 @@ garrow_day_time_interval_array_get_values(GArrowDayTimeIntervalArray *array)
   return g_list_reverse(values);
 }
 
-
 G_DEFINE_TYPE(GArrowMonthDayNanoIntervalArray,
               garrow_month_day_nano_interval_array,
               GARROW_TYPE_PRIMITIVE_ARRAY)
 
 static void
-garrow_month_day_nano_interval_array_init(
-  GArrowMonthDayNanoIntervalArray *object)
+garrow_month_day_nano_interval_array_init(GArrowMonthDayNanoIntervalArray *object)
 {
 }
 
@@ -3108,11 +2934,10 @@ garrow_month_day_nano_interval_array_new(gint64 length,
                                          GArrowBuffer *null_bitmap,
                                          gint64 n_nulls)
 {
-  auto array = garrow_primitive_array_new<arrow::MonthDayNanoIntervalType>(
-    length,
-    data,
-    null_bitmap,
-    n_nulls);
+  auto array = garrow_primitive_array_new<arrow::MonthDayNanoIntervalType>(length,
+                                                                           data,
+                                                                           null_bitmap,
+                                                                           n_nulls);
   return GARROW_MONTH_DAY_NANO_INTERVAL_ARRAY(array);
 }
 
@@ -3126,9 +2951,8 @@ garrow_month_day_nano_interval_array_new(gint64 length,
  * Since: 8.0.0
  */
 GArrowMonthDayNano *
-garrow_month_day_nano_interval_array_get_value(
-  GArrowMonthDayNanoIntervalArray *array,
-  gint64 i)
+garrow_month_day_nano_interval_array_get_value(GArrowMonthDayNanoIntervalArray *array,
+                                               gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_month_day_nano_interval_array =
@@ -3147,8 +2971,7 @@ garrow_month_day_nano_interval_array_get_value(
  * Since: 8.0.0
  */
 GList *
-garrow_month_day_nano_interval_array_get_values(
-  GArrowMonthDayNanoIntervalArray *array)
+garrow_month_day_nano_interval_array_get_values(GArrowMonthDayNanoIntervalArray *array)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_month_day_nano_interval_array =
@@ -3167,7 +2990,6 @@ garrow_month_day_nano_interval_array_get_values(
   return g_list_reverse(values);
 }
 
-
 G_DEFINE_TYPE(GArrowFixedSizeBinaryArray,
               garrow_fixed_size_binary_array,
               GARROW_TYPE_PRIMITIVE_ARRAY)
@@ -3205,12 +3027,11 @@ garrow_fixed_size_binary_array_new(GArrowFixedSizeBinaryDataType *data_type,
                                    gint64 n_nulls)
 {
   auto array =
-    garrow_primitive_array_new<arrow::FixedSizeBinaryType>(
-      GARROW_DATA_TYPE(data_type),
-      length,
-      data,
-      null_bitmap,
-      n_nulls);
+    garrow_primitive_array_new<arrow::FixedSizeBinaryType>(GARROW_DATA_TYPE(data_type),
+                                                           length,
+                                                           data,
+                                                           null_bitmap,
+                                                           n_nulls);
   return GARROW_FIXED_SIZE_BINARY_ARRAY(array);
 }
 
@@ -3241,15 +3062,13 @@ garrow_fixed_size_binary_array_get_byte_width(GArrowFixedSizeBinaryArray *array)
  * Since: 3.0.0
  */
 GBytes *
-garrow_fixed_size_binary_array_get_value(GArrowFixedSizeBinaryArray *array,
-                                         gint64 i)
+garrow_fixed_size_binary_array_get_value(GArrowFixedSizeBinaryArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_binary_array =
     std::static_pointer_cast<arrow::FixedSizeBinaryArray>(arrow_array);
   auto value = arrow_binary_array->GetValue(i);
-  return g_bytes_new_static(value,
-                            arrow_binary_array->byte_width());
+  return g_bytes_new_static(value, arrow_binary_array->byte_width());
 }
 
 /**
@@ -3268,11 +3087,9 @@ garrow_fixed_size_binary_array_get_values_bytes(GArrowFixedSizeBinaryArray *arra
     std::static_pointer_cast<arrow::FixedSizeBinaryArray>(arrow_array);
   auto value = arrow_binary_array->raw_values();
   return g_bytes_new_static(value,
-                            arrow_binary_array->byte_width() *
-                            arrow_array->length());
+                            arrow_binary_array->byte_width() * arrow_array->length());
 }
 
-
 G_DEFINE_TYPE(GArrowDecimal128Array,
               garrow_decimal128_array,
               GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY)
@@ -3298,8 +3115,7 @@ garrow_decimal128_array_class_init(GArrowDecimal128ArrayClass *klass)
  * Since: 0.10.0
  */
 gchar *
-garrow_decimal128_array_format_value(GArrowDecimal128Array *array,
-                                     gint64 i)
+garrow_decimal128_array_format_value(GArrowDecimal128Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_decimal128_array =
@@ -3318,8 +3134,7 @@ garrow_decimal128_array_format_value(GArrowDecimal128Array *array,
  * Since: 0.10.0
  */
 GArrowDecimal128 *
-garrow_decimal128_array_get_value(GArrowDecimal128Array *array,
-                                  gint64 i)
+garrow_decimal128_array_get_value(GArrowDecimal128Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_decimal128_array =
@@ -3329,7 +3144,6 @@ garrow_decimal128_array_get_value(GArrowDecimal128Array *array,
   return garrow_decimal128_new_raw(&arrow_decimal128);
 }
 
-
 G_DEFINE_TYPE(GArrowDecimal256Array,
               garrow_decimal256_array,
               GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY)
@@ -3355,8 +3169,7 @@ garrow_decimal256_array_class_init(GArrowDecimal256ArrayClass *klass)
  * Since: 3.0.0
  */
 gchar *
-garrow_decimal256_array_format_value(GArrowDecimal256Array *array,
-                                     gint64 i)
+garrow_decimal256_array_format_value(GArrowDecimal256Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_decimal256_array =
@@ -3375,8 +3188,7 @@ garrow_decimal256_array_format_value(GArrowDecimal256Array *array,
  * Since: 3.0.0
  */
 GArrowDecimal256 *
-garrow_decimal256_array_get_value(GArrowDecimal256Array *array,
-                                  gint64 i)
+garrow_decimal256_array_get_value(GArrowDecimal256Array *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_decimal256_array =
@@ -3386,8 +3198,8 @@ garrow_decimal256_array_get_value(GArrowDecimal256Array *array,
   return garrow_decimal256_new_raw(&arrow_decimal256);
 }
 
-
-typedef struct GArrowExtensionArrayPrivate_ {
+typedef struct GArrowExtensionArrayPrivate_
+{
   GArrowArray *storage;
 } GArrowExtensionArrayPrivate;
 
@@ -3399,10 +3211,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowExtensionArray,
                            garrow_extension_array,
                            GARROW_TYPE_ARRAY)
 
-#define GARROW_EXTENSION_ARRAY_GET_PRIVATE(obj)         \
-  static_cast<GArrowExtensionArrayPrivate *>(           \
-    garrow_extension_array_get_instance_private(        \
-      GARROW_EXTENSION_ARRAY(obj)))
+#define GARROW_EXTENSION_ARRAY_GET_PRIVATE(obj)                                          \
+  static_cast<GArrowExtensionArrayPrivate *>(                                            \
+    garrow_extension_array_get_instance_private(GARROW_EXTENSION_ARRAY(obj)))
 
 static void
 garrow_extension_array_dispose(GObject *object)
@@ -3463,17 +3274,17 @@ garrow_extension_array_class_init(GArrowExtensionArrayClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_extension_array_dispose;
+  gobject_class->dispose = garrow_extension_array_dispose;
   gobject_class->set_property = garrow_extension_array_set_property;
   gobject_class->get_property = garrow_extension_array_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("storage",
-                             "storage",
-                             "The storage array",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "storage",
+    "storage",
+    "The storage array",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_STORAGE, spec);
 }
 
@@ -3498,7 +3309,6 @@ garrow_extension_array_get_storage(GArrowExtensionArray *array)
   return garrow_array_new_raw(&(array_priv->array));
 }
 
-
 G_END_DECLS
 
 arrow::EqualOptions *
@@ -3511,9 +3321,7 @@ garrow_equal_options_get_raw(GArrowEqualOptions *equal_options)
 GArrowArray *
 garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array)
 {
-  return garrow_array_new_raw(arrow_array,
-                              "array", arrow_array,
-                              NULL);
+  return garrow_array_new_raw(arrow_array, "array", arrow_array, NULL);
 }
 
 GArrowArray *
@@ -3523,9 +3331,7 @@ garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array,
 {
   va_list args;
   va_start(args, first_property_name);
-  auto array = garrow_array_new_raw_valist(arrow_array,
-                                           first_property_name,
-                                           args);
+  auto array = garrow_array_new_raw_valist(arrow_array, first_property_name, args);
   va_end(args);
   return array;
 }
@@ -3662,19 +3468,15 @@ garrow_array_new_raw_valist(std::shared_ptr<arrow::Array> *arrow_array,
     type = GARROW_TYPE_ARRAY;
     break;
   }
-  return GARROW_ARRAY(g_object_new_valist(type,
-                                          first_property_name,
-                                          args));
+  return GARROW_ARRAY(g_object_new_valist(type, first_property_name, args));
 }
 
 GArrowExtensionArray *
 garrow_extension_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array,
                                GArrowArray *storage)
 {
-  auto array = garrow_array_new_raw(arrow_array,
-                                    "array", arrow_array,
-                                    "storage", storage,
-                                    NULL);
+  auto array =
+    garrow_array_new_raw(arrow_array, "array", arrow_array, "storage", storage, NULL);
   return GARROW_EXTENSION_ARRAY(array);
 }
 
diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h
index 1a846c2320a03..ee6f40b1ddc24 100644
--- a/c_glib/arrow-glib/basic-array.h
+++ b/c_glib/arrow-glib/basic-array.h
@@ -27,11 +27,8 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_EQUAL_OPTIONS (garrow_equal_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowEqualOptions,
-                         garrow_equal_options,
-                         GARROW,
-                         EQUAL_OPTIONS,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowEqualOptions, garrow_equal_options, GARROW, EQUAL_OPTIONS, GObject)
 struct _GArrowEqualOptionsClass
 {
   GObjectClass parent_class;
@@ -44,12 +41,9 @@ GARROW_AVAILABLE_IN_5_0
 gboolean
 garrow_equal_options_is_approx(GArrowEqualOptions *options);
 
-
 GARROW_AVAILABLE_IN_6_0
 GArrowArray *
-garrow_array_import(gpointer c_abi_array,
-                    GArrowDataType *data_type,
-                    GError **error);
+garrow_array_import(gpointer c_abi_array, GArrowDataType *data_type, GError **error);
 
 GARROW_AVAILABLE_IN_6_0
 gboolean
@@ -58,69 +52,67 @@ garrow_array_export(GArrowArray *array,
                     gpointer *c_abi_schema,
                     GError **error);
 
-gboolean       garrow_array_equal       (GArrowArray *array,
-                                         GArrowArray *other_array);
+gboolean
+garrow_array_equal(GArrowArray *array, GArrowArray *other_array);
 GARROW_AVAILABLE_IN_5_0
-gboolean       garrow_array_equal_options(GArrowArray *array,
-                                          GArrowArray *other_array,
-                                          GArrowEqualOptions *options);
-gboolean       garrow_array_equal_approx(GArrowArray *array,
-                                         GArrowArray *other_array);
-gboolean       garrow_array_equal_range (GArrowArray *array,
-                                         gint64 start_index,
-                                         GArrowArray *other_array,
-                                         gint64 other_start_index,
-                                         gint64 end_index,
-                                         GArrowEqualOptions *options);
-
-gboolean       garrow_array_is_null     (GArrowArray *array,
-                                         gint64 i);
-gboolean       garrow_array_is_valid    (GArrowArray *array,
-                                         gint64 i);
-gint64         garrow_array_get_length  (GArrowArray *array);
-gint64         garrow_array_get_offset  (GArrowArray *array);
-gint64         garrow_array_get_n_nulls (GArrowArray *array);
-GArrowBuffer  *garrow_array_get_null_bitmap(GArrowArray *array);
-GArrowDataType *garrow_array_get_value_data_type(GArrowArray *array);
-GArrowType     garrow_array_get_value_type(GArrowArray *array);
-GArrowArray   *garrow_array_slice       (GArrowArray *array,
-                                         gint64 offset,
-                                         gint64 length);
-gchar         *garrow_array_to_string   (GArrowArray *array,
-                                         GError **error);
+gboolean
+garrow_array_equal_options(GArrowArray *array,
+                           GArrowArray *other_array,
+                           GArrowEqualOptions *options);
+gboolean
+garrow_array_equal_approx(GArrowArray *array, GArrowArray *other_array);
+gboolean
+garrow_array_equal_range(GArrowArray *array,
+                         gint64 start_index,
+                         GArrowArray *other_array,
+                         gint64 other_start_index,
+                         gint64 end_index,
+                         GArrowEqualOptions *options);
+
+gboolean
+garrow_array_is_null(GArrowArray *array, gint64 i);
+gboolean
+garrow_array_is_valid(GArrowArray *array, gint64 i);
+gint64
+garrow_array_get_length(GArrowArray *array);
+gint64
+garrow_array_get_offset(GArrowArray *array);
+gint64
+garrow_array_get_n_nulls(GArrowArray *array);
+GArrowBuffer *
+garrow_array_get_null_bitmap(GArrowArray *array);
+GArrowDataType *
+garrow_array_get_value_data_type(GArrowArray *array);
+GArrowType
+garrow_array_get_value_type(GArrowArray *array);
+GArrowArray *
+garrow_array_slice(GArrowArray *array, gint64 offset, gint64 length);
+gchar *
+garrow_array_to_string(GArrowArray *array, GError **error);
 GARROW_AVAILABLE_IN_0_15
-GArrowArray *garrow_array_view(GArrowArray *array,
-                               GArrowDataType *return_type,
-                               GError **error);
+GArrowArray *
+garrow_array_view(GArrowArray *array, GArrowDataType *return_type, GError **error);
 GARROW_AVAILABLE_IN_0_15
-gchar *garrow_array_diff_unified(GArrowArray *array,
-                                 GArrowArray *other_array);
+gchar *
+garrow_array_diff_unified(GArrowArray *array, GArrowArray *other_array);
 GARROW_AVAILABLE_IN_4_0
-GArrowArray *garrow_array_concatenate(GArrowArray *array,
-                                      GList *other_arrays,
-                                      GError **error);
-
+GArrowArray *
+garrow_array_concatenate(GArrowArray *array, GList *other_arrays, GError **error);
 
 #define GARROW_TYPE_NULL_ARRAY (garrow_null_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowNullArray,
-                         garrow_null_array,
-                         GARROW,
-                         NULL_ARRAY,
-                         GArrowArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowNullArray, garrow_null_array, GARROW, NULL_ARRAY, GArrowArray)
 struct _GArrowNullArrayClass
 {
   GArrowArrayClass parent_class;
 };
 
-GArrowNullArray *garrow_null_array_new(gint64 length);
-
+GArrowNullArray *
+garrow_null_array_new(gint64 length);
 
 #define GARROW_TYPE_PRIMITIVE_ARRAY (garrow_primitive_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowPrimitiveArray,
-                         garrow_primitive_array,
-                         GARROW,
-                         PRIMITIVE_ARRAY,
-                         GArrowArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowPrimitiveArray, garrow_primitive_array, GARROW, PRIMITIVE_ARRAY, GArrowArray)
 struct _GArrowPrimitiveArrayClass
 {
   GArrowArrayClass parent_class;
@@ -135,215 +127,184 @@ GARROW_AVAILABLE_IN_1_0
 GArrowBuffer *
 garrow_primitive_array_get_data_buffer(GArrowPrimitiveArray *array);
 
-
 #define GARROW_TYPE_BOOLEAN_ARRAY (garrow_boolean_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowBooleanArray,
-                         garrow_boolean_array,
-                         GARROW,
-                         BOOLEAN_ARRAY,
-                         GArrowPrimitiveArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowBooleanArray, garrow_boolean_array, GARROW, BOOLEAN_ARRAY, GArrowPrimitiveArray)
 struct _GArrowBooleanArrayClass
 {
   GArrowPrimitiveArrayClass parent_class;
 };
 
-GArrowBooleanArray *garrow_boolean_array_new(gint64 length,
-                                             GArrowBuffer *data,
-                                             GArrowBuffer *null_bitmap,
-                                             gint64 n_nulls);
+GArrowBooleanArray *
+garrow_boolean_array_new(gint64 length,
+                         GArrowBuffer *data,
+                         GArrowBuffer *null_bitmap,
+                         gint64 n_nulls);
 
-gboolean       garrow_boolean_array_get_value (GArrowBooleanArray *array,
-                                               gint64 i);
-gboolean      *garrow_boolean_array_get_values(GArrowBooleanArray *array,
-                                               gint64 *length);
+gboolean
+garrow_boolean_array_get_value(GArrowBooleanArray *array, gint64 i);
+gboolean *
+garrow_boolean_array_get_values(GArrowBooleanArray *array, gint64 *length);
 
 #define GARROW_TYPE_NUMERIC_ARRAY (garrow_numeric_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowNumericArray,
-                         garrow_numeric_array,
-                         GARROW,
-                         NUMERIC_ARRAY,
-                         GArrowPrimitiveArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowNumericArray, garrow_numeric_array, GARROW, NUMERIC_ARRAY, GArrowPrimitiveArray)
 struct _GArrowNumericArrayClass
 {
   GArrowPrimitiveArrayClass parent_class;
 };
 
-
 #define GARROW_TYPE_INT8_ARRAY (garrow_int8_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowInt8Array,
-                         garrow_int8_array,
-                         GARROW,
-                         INT8_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowInt8Array, garrow_int8_array, GARROW, INT8_ARRAY, GArrowNumericArray)
 struct _GArrowInt8ArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowInt8Array *garrow_int8_array_new(gint64 length,
-                                       GArrowBuffer *data,
-                                       GArrowBuffer *null_bitmap,
-                                       gint64 n_nulls);
-
-gint8 garrow_int8_array_get_value(GArrowInt8Array *array,
-                                  gint64 i);
-const gint8 *garrow_int8_array_get_values(GArrowInt8Array *array,
-                                          gint64 *length);
+GArrowInt8Array *
+garrow_int8_array_new(gint64 length,
+                      GArrowBuffer *data,
+                      GArrowBuffer *null_bitmap,
+                      gint64 n_nulls);
 
+gint8
+garrow_int8_array_get_value(GArrowInt8Array *array, gint64 i);
+const gint8 *
+garrow_int8_array_get_values(GArrowInt8Array *array, gint64 *length);
 
 #define GARROW_TYPE_UINT8_ARRAY (garrow_uint8_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowUInt8Array,
-                         garrow_uint8_array,
-                         GARROW,
-                         UINT8_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowUInt8Array, garrow_uint8_array, GARROW, UINT8_ARRAY, GArrowNumericArray)
 struct _GArrowUInt8ArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowUInt8Array *garrow_uint8_array_new(gint64 length,
-                                         GArrowBuffer *data,
-                                         GArrowBuffer *null_bitmap,
-                                         gint64 n_nulls);
-
-guint8 garrow_uint8_array_get_value(GArrowUInt8Array *array,
-                                    gint64 i);
-const guint8 *garrow_uint8_array_get_values(GArrowUInt8Array *array,
-                                            gint64 *length);
+GArrowUInt8Array *
+garrow_uint8_array_new(gint64 length,
+                       GArrowBuffer *data,
+                       GArrowBuffer *null_bitmap,
+                       gint64 n_nulls);
 
+guint8
+garrow_uint8_array_get_value(GArrowUInt8Array *array, gint64 i);
+const guint8 *
+garrow_uint8_array_get_values(GArrowUInt8Array *array, gint64 *length);
 
 #define GARROW_TYPE_INT16_ARRAY (garrow_int16_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowInt16Array,
-                         garrow_int16_array,
-                         GARROW,
-                         INT16_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowInt16Array, garrow_int16_array, GARROW, INT16_ARRAY, GArrowNumericArray)
 struct _GArrowInt16ArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowInt16Array *garrow_int16_array_new(gint64 length,
-                                         GArrowBuffer *data,
-                                         GArrowBuffer *null_bitmap,
-                                         gint64 n_nulls);
-
-gint16 garrow_int16_array_get_value(GArrowInt16Array *array,
-                                    gint64 i);
-const gint16 *garrow_int16_array_get_values(GArrowInt16Array *array,
-                                            gint64 *length);
+GArrowInt16Array *
+garrow_int16_array_new(gint64 length,
+                       GArrowBuffer *data,
+                       GArrowBuffer *null_bitmap,
+                       gint64 n_nulls);
 
+gint16
+garrow_int16_array_get_value(GArrowInt16Array *array, gint64 i);
+const gint16 *
+garrow_int16_array_get_values(GArrowInt16Array *array, gint64 *length);
 
 #define GARROW_TYPE_UINT16_ARRAY (garrow_uint16_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowUInt16Array,
-                         garrow_uint16_array,
-                         GARROW,
-                         UINT16_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowUInt16Array, garrow_uint16_array, GARROW, UINT16_ARRAY, GArrowNumericArray)
 struct _GArrowUInt16ArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowUInt16Array *garrow_uint16_array_new(gint64 length,
-                                           GArrowBuffer *data,
-                                           GArrowBuffer *null_bitmap,
-                                           gint64 n_nulls);
-
-guint16 garrow_uint16_array_get_value(GArrowUInt16Array *array,
-                                      gint64 i);
-const guint16 *garrow_uint16_array_get_values(GArrowUInt16Array *array,
-                                              gint64 *length);
+GArrowUInt16Array *
+garrow_uint16_array_new(gint64 length,
+                        GArrowBuffer *data,
+                        GArrowBuffer *null_bitmap,
+                        gint64 n_nulls);
 
+guint16
+garrow_uint16_array_get_value(GArrowUInt16Array *array, gint64 i);
+const guint16 *
+garrow_uint16_array_get_values(GArrowUInt16Array *array, gint64 *length);
 
 #define GARROW_TYPE_INT32_ARRAY (garrow_int32_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowInt32Array,
-                         garrow_int32_array,
-                         GARROW,
-                         INT32_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowInt32Array, garrow_int32_array, GARROW, INT32_ARRAY, GArrowNumericArray)
 struct _GArrowInt32ArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowInt32Array *garrow_int32_array_new(gint64 length,
-                                         GArrowBuffer *data,
-                                         GArrowBuffer *null_bitmap,
-                                         gint64 n_nulls);
-
-gint32 garrow_int32_array_get_value(GArrowInt32Array *array,
-                                    gint64 i);
-const gint32 *garrow_int32_array_get_values(GArrowInt32Array *array,
-                                            gint64 *length);
+GArrowInt32Array *
+garrow_int32_array_new(gint64 length,
+                       GArrowBuffer *data,
+                       GArrowBuffer *null_bitmap,
+                       gint64 n_nulls);
 
+gint32
+garrow_int32_array_get_value(GArrowInt32Array *array, gint64 i);
+const gint32 *
+garrow_int32_array_get_values(GArrowInt32Array *array, gint64 *length);
 
 #define GARROW_TYPE_UINT32_ARRAY (garrow_uint32_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowUInt32Array,
-                         garrow_uint32_array,
-                         GARROW,
-                         UINT32_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowUInt32Array, garrow_uint32_array, GARROW, UINT32_ARRAY, GArrowNumericArray)
 struct _GArrowUInt32ArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowUInt32Array *garrow_uint32_array_new(gint64 length,
-                                           GArrowBuffer *data,
-                                           GArrowBuffer *null_bitmap,
-                                           gint64 n_nulls);
-
-guint32 garrow_uint32_array_get_value(GArrowUInt32Array *array,
-                                      gint64 i);
-const guint32 *garrow_uint32_array_get_values(GArrowUInt32Array *array,
-                                              gint64 *length);
+GArrowUInt32Array *
+garrow_uint32_array_new(gint64 length,
+                        GArrowBuffer *data,
+                        GArrowBuffer *null_bitmap,
+                        gint64 n_nulls);
 
+guint32
+garrow_uint32_array_get_value(GArrowUInt32Array *array, gint64 i);
+const guint32 *
+garrow_uint32_array_get_values(GArrowUInt32Array *array, gint64 *length);
 
 #define GARROW_TYPE_INT64_ARRAY (garrow_int64_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowInt64Array,
-                         garrow_int64_array,
-                         GARROW,
-                         INT64_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowInt64Array, garrow_int64_array, GARROW, INT64_ARRAY, GArrowNumericArray)
 struct _GArrowInt64ArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowInt64Array *garrow_int64_array_new(gint64 length,
-                                         GArrowBuffer *data,
-                                         GArrowBuffer *null_bitmap,
-                                         gint64 n_nulls);
-
-gint64 garrow_int64_array_get_value(GArrowInt64Array *array,
-                                    gint64 i);
-const gint64 *garrow_int64_array_get_values(GArrowInt64Array *array,
-                                            gint64 *length);
+GArrowInt64Array *
+garrow_int64_array_new(gint64 length,
+                       GArrowBuffer *data,
+                       GArrowBuffer *null_bitmap,
+                       gint64 n_nulls);
 
+gint64
+garrow_int64_array_get_value(GArrowInt64Array *array, gint64 i);
+const gint64 *
+garrow_int64_array_get_values(GArrowInt64Array *array, gint64 *length);
 
 #define GARROW_TYPE_UINT64_ARRAY (garrow_uint64_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowUInt64Array,
-                         garrow_uint64_array,
-                         GARROW,
-                         UINT64_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowUInt64Array, garrow_uint64_array, GARROW, UINT64_ARRAY, GArrowNumericArray)
 struct _GArrowUInt64ArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowUInt64Array *garrow_uint64_array_new(gint64 length,
-                                           GArrowBuffer *data,
-                                           GArrowBuffer *null_bitmap,
-                                           gint64 n_nulls);
-
-guint64 garrow_uint64_array_get_value(GArrowUInt64Array *array,
-                                      gint64 i);
-const guint64 *garrow_uint64_array_get_values(GArrowUInt64Array *array,
-                                              gint64 *length);
+GArrowUInt64Array *
+garrow_uint64_array_new(gint64 length,
+                        GArrowBuffer *data,
+                        GArrowBuffer *null_bitmap,
+                        gint64 n_nulls);
 
+guint64
+garrow_uint64_array_get_value(GArrowUInt64Array *array, gint64 i);
+const guint64 *
+garrow_uint64_array_get_values(GArrowUInt64Array *array, gint64 *length);
 
 #define GARROW_TYPE_HALF_FLOAT_ARRAY (garrow_half_float_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatArray,
@@ -356,7 +317,6 @@ struct _GArrowHalfFloatArrayClass
   GArrowNumericArrayClass parent_class;
 };
 
-
 GARROW_AVAILABLE_IN_11_0
 GArrowHalfFloatArray *
 garrow_half_float_array_new(gint64 length,
@@ -366,85 +326,76 @@ garrow_half_float_array_new(gint64 length,
 
 GARROW_AVAILABLE_IN_11_0
 guint16
-garrow_half_float_array_get_value(GArrowHalfFloatArray *array,
-                                  gint64 i);
+garrow_half_float_array_get_value(GArrowHalfFloatArray *array, gint64 i);
 GARROW_AVAILABLE_IN_11_0
-const guint16*
-garrow_half_float_array_get_values(GArrowHalfFloatArray *array,
-                                   gint64 *length);
-
+const guint16 *
+garrow_half_float_array_get_values(GArrowHalfFloatArray *array, gint64 *length);
 
 #define GARROW_TYPE_FLOAT_ARRAY (garrow_float_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowFloatArray,
-                         garrow_float_array,
-                         GARROW,
-                         FLOAT_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowFloatArray, garrow_float_array, GARROW, FLOAT_ARRAY, GArrowNumericArray)
 struct _GArrowFloatArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowFloatArray *garrow_float_array_new(gint64 length,
-                                         GArrowBuffer *data,
-                                         GArrowBuffer *null_bitmap,
-                                         gint64 n_nulls);
-
-gfloat garrow_float_array_get_value(GArrowFloatArray *array,
-                                    gint64 i);
-const gfloat *garrow_float_array_get_values(GArrowFloatArray *array,
-                                            gint64 *length);
+GArrowFloatArray *
+garrow_float_array_new(gint64 length,
+                       GArrowBuffer *data,
+                       GArrowBuffer *null_bitmap,
+                       gint64 n_nulls);
 
+gfloat
+garrow_float_array_get_value(GArrowFloatArray *array, gint64 i);
+const gfloat *
+garrow_float_array_get_values(GArrowFloatArray *array, gint64 *length);
 
 #define GARROW_TYPE_DOUBLE_ARRAY (garrow_double_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowDoubleArray,
-                         garrow_double_array,
-                         GARROW,
-                         DOUBLE_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowDoubleArray, garrow_double_array, GARROW, DOUBLE_ARRAY, GArrowNumericArray)
 struct _GArrowDoubleArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowDoubleArray *garrow_double_array_new(gint64 length,
-                                           GArrowBuffer *data,
-                                           GArrowBuffer *null_bitmap,
-                                           gint64 n_nulls);
-
-gdouble garrow_double_array_get_value(GArrowDoubleArray *array,
-                                      gint64 i);
-const gdouble *garrow_double_array_get_values(GArrowDoubleArray *array,
-                                              gint64 *length);
+GArrowDoubleArray *
+garrow_double_array_new(gint64 length,
+                        GArrowBuffer *data,
+                        GArrowBuffer *null_bitmap,
+                        gint64 n_nulls);
 
+gdouble
+garrow_double_array_get_value(GArrowDoubleArray *array, gint64 i);
+const gdouble *
+garrow_double_array_get_values(GArrowDoubleArray *array, gint64 *length);
 
 #define GARROW_TYPE_BINARY_ARRAY (garrow_binary_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowBinaryArray,
-                         garrow_binary_array,
-                         GARROW,
-                         BINARY_ARRAY,
-                         GArrowArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowBinaryArray, garrow_binary_array, GARROW, BINARY_ARRAY, GArrowArray)
 struct _GArrowBinaryArrayClass
 {
   GArrowArrayClass parent_class;
 };
 
-GArrowBinaryArray *garrow_binary_array_new(gint64 length,
-                                           GArrowBuffer *value_offsets,
-                                           GArrowBuffer *value_data,
-                                           GArrowBuffer *null_bitmap,
-                                           gint64 n_nulls);
+GArrowBinaryArray *
+garrow_binary_array_new(gint64 length,
+                        GArrowBuffer *value_offsets,
+                        GArrowBuffer *value_data,
+                        GArrowBuffer *null_bitmap,
+                        gint64 n_nulls);
 
-GBytes *garrow_binary_array_get_value(GArrowBinaryArray *array,
-                                      gint64 i);
+GBytes *
+garrow_binary_array_get_value(GArrowBinaryArray *array, gint64 i);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_1_0_FOR(garrow_binary_array_get_data_buffer)
-GArrowBuffer *garrow_binary_array_get_buffer(GArrowBinaryArray *array);
+GArrowBuffer *
+garrow_binary_array_get_buffer(GArrowBinaryArray *array);
 #endif
 GARROW_AVAILABLE_IN_1_0
-GArrowBuffer *garrow_binary_array_get_data_buffer(GArrowBinaryArray *array);
-GArrowBuffer *garrow_binary_array_get_offsets_buffer(GArrowBinaryArray *array);
-
+GArrowBuffer *
+garrow_binary_array_get_data_buffer(GArrowBinaryArray *array);
+GArrowBuffer *
+garrow_binary_array_get_offsets_buffer(GArrowBinaryArray *array);
 
 #define GARROW_TYPE_LARGE_BINARY_ARRAY (garrow_large_binary_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryArray,
@@ -458,15 +409,16 @@ struct _GArrowLargeBinaryArrayClass
 };
 
 GARROW_AVAILABLE_IN_0_16
-GArrowLargeBinaryArray *garrow_large_binary_array_new(gint64 length,
-                                                      GArrowBuffer *value_offsets,
-                                                      GArrowBuffer *value_data,
-                                                      GArrowBuffer *null_bitmap,
-                                                      gint64 n_nulls);
+GArrowLargeBinaryArray *
+garrow_large_binary_array_new(gint64 length,
+                              GArrowBuffer *value_offsets,
+                              GArrowBuffer *value_data,
+                              GArrowBuffer *null_bitmap,
+                              gint64 n_nulls);
 
 GARROW_AVAILABLE_IN_0_16
-GBytes *garrow_large_binary_array_get_value(GArrowLargeBinaryArray *array,
-                                            gint64 i);
+GBytes *
+garrow_large_binary_array_get_value(GArrowLargeBinaryArray *array, gint64 i);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_1_0_FOR(garrow_large_binary_array_get_data_buffer)
 GARROW_AVAILABLE_IN_0_16
@@ -477,29 +429,26 @@ GARROW_AVAILABLE_IN_1_0
 GArrowBuffer *
 garrow_large_binary_array_get_data_buffer(GArrowLargeBinaryArray *array);
 GARROW_AVAILABLE_IN_0_16
-GArrowBuffer *garrow_large_binary_array_get_offsets_buffer(GArrowLargeBinaryArray *array);
-
+GArrowBuffer *
+garrow_large_binary_array_get_offsets_buffer(GArrowLargeBinaryArray *array);
 
 #define GARROW_TYPE_STRING_ARRAY (garrow_string_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowStringArray,
-                         garrow_string_array,
-                         GARROW,
-                         STRING_ARRAY,
-                         GArrowBinaryArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowStringArray, garrow_string_array, GARROW, STRING_ARRAY, GArrowBinaryArray)
 struct _GArrowStringArrayClass
 {
   GArrowBinaryArrayClass parent_class;
 };
 
-GArrowStringArray *garrow_string_array_new(gint64 length,
-                                           GArrowBuffer *value_offsets,
-                                           GArrowBuffer *value_data,
-                                           GArrowBuffer *null_bitmap,
-                                           gint64 n_nulls);
-
-gchar *garrow_string_array_get_string(GArrowStringArray *array,
-                                      gint64 i);
+GArrowStringArray *
+garrow_string_array_new(gint64 length,
+                        GArrowBuffer *value_offsets,
+                        GArrowBuffer *value_data,
+                        GArrowBuffer *null_bitmap,
+                        gint64 n_nulls);
 
+gchar *
+garrow_string_array_get_string(GArrowStringArray *array, gint64 i);
 
 #define GARROW_TYPE_LARGE_STRING_ARRAY (garrow_large_string_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringArray,
@@ -513,60 +462,54 @@ struct _GArrowLargeStringArrayClass
 };
 
 GARROW_AVAILABLE_IN_0_16
-GArrowLargeStringArray *garrow_large_string_array_new(gint64 length,
-                                                      GArrowBuffer *value_offsets,
-                                                      GArrowBuffer *value_data,
-                                                      GArrowBuffer *null_bitmap,
-                                                      gint64 n_nulls);
+GArrowLargeStringArray *
+garrow_large_string_array_new(gint64 length,
+                              GArrowBuffer *value_offsets,
+                              GArrowBuffer *value_data,
+                              GArrowBuffer *null_bitmap,
+                              gint64 n_nulls);
 
 GARROW_AVAILABLE_IN_0_16
-gchar *garrow_large_string_array_get_string(GArrowLargeStringArray *array,
-                                            gint64 i);
-
+gchar *
+garrow_large_string_array_get_string(GArrowLargeStringArray *array, gint64 i);
 
 #define GARROW_TYPE_DATE32_ARRAY (garrow_date32_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowDate32Array,
-                         garrow_date32_array,
-                         GARROW,
-                         DATE32_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowDate32Array, garrow_date32_array, GARROW, DATE32_ARRAY, GArrowNumericArray)
 struct _GArrowDate32ArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowDate32Array *garrow_date32_array_new(gint64 length,
-                                           GArrowBuffer *data,
-                                           GArrowBuffer *null_bitmap,
-                                           gint64 n_nulls);
-
-gint32 garrow_date32_array_get_value(GArrowDate32Array *array,
-                                     gint64 i);
-const gint32 *garrow_date32_array_get_values(GArrowDate32Array *array,
-                                             gint64 *length);
+GArrowDate32Array *
+garrow_date32_array_new(gint64 length,
+                        GArrowBuffer *data,
+                        GArrowBuffer *null_bitmap,
+                        gint64 n_nulls);
 
+gint32
+garrow_date32_array_get_value(GArrowDate32Array *array, gint64 i);
+const gint32 *
+garrow_date32_array_get_values(GArrowDate32Array *array, gint64 *length);
 
 #define GARROW_TYPE_DATE64_ARRAY (garrow_date64_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowDate64Array,
-                         garrow_date64_array,
-                         GARROW,
-                         DATE64_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowDate64Array, garrow_date64_array, GARROW, DATE64_ARRAY, GArrowNumericArray)
 struct _GArrowDate64ArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowDate64Array *garrow_date64_array_new(gint64 length,
-                                           GArrowBuffer *data,
-                                           GArrowBuffer *null_bitmap,
-                                           gint64 n_nulls);
-
-gint64 garrow_date64_array_get_value(GArrowDate64Array *array,
-                                     gint64 i);
-const gint64 *garrow_date64_array_get_values(GArrowDate64Array *array,
-                                             gint64 *length);
+GArrowDate64Array *
+garrow_date64_array_new(gint64 length,
+                        GArrowBuffer *data,
+                        GArrowBuffer *null_bitmap,
+                        gint64 n_nulls);
 
+gint64
+garrow_date64_array_get_value(GArrowDate64Array *array, gint64 i);
+const gint64 *
+garrow_date64_array_get_values(GArrowDate64Array *array, gint64 *length);
 
 #define GARROW_TYPE_TIMESTAMP_ARRAY (garrow_timestamp_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowTimestampArray,
@@ -579,66 +522,59 @@ struct _GArrowTimestampArrayClass
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowTimestampArray *garrow_timestamp_array_new(GArrowTimestampDataType *data_type,
-                                                 gint64 length,
-                                                 GArrowBuffer *data,
-                                                 GArrowBuffer *null_bitmap,
-                                                 gint64 n_nulls);
-
-gint64 garrow_timestamp_array_get_value(GArrowTimestampArray *array,
-                                        gint64 i);
-const gint64 *garrow_timestamp_array_get_values(GArrowTimestampArray *array,
-                                                gint64 *length);
+GArrowTimestampArray *
+garrow_timestamp_array_new(GArrowTimestampDataType *data_type,
+                           gint64 length,
+                           GArrowBuffer *data,
+                           GArrowBuffer *null_bitmap,
+                           gint64 n_nulls);
 
+gint64
+garrow_timestamp_array_get_value(GArrowTimestampArray *array, gint64 i);
+const gint64 *
+garrow_timestamp_array_get_values(GArrowTimestampArray *array, gint64 *length);
 
 #define GARROW_TYPE_TIME32_ARRAY (garrow_time32_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowTime32Array,
-                         garrow_time32_array,
-                         GARROW,
-                         TIME32_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowTime32Array, garrow_time32_array, GARROW, TIME32_ARRAY, GArrowNumericArray)
 struct _GArrowTime32ArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowTime32Array *garrow_time32_array_new(GArrowTime32DataType *data_type,
-                                           gint64 length,
-                                           GArrowBuffer *data,
-                                           GArrowBuffer *null_bitmap,
-                                           gint64 n_nulls);
-
-gint32 garrow_time32_array_get_value(GArrowTime32Array *array,
-                                     gint64 i);
-const gint32 *garrow_time32_array_get_values(GArrowTime32Array *array,
-                                             gint64 *length);
+GArrowTime32Array *
+garrow_time32_array_new(GArrowTime32DataType *data_type,
+                        gint64 length,
+                        GArrowBuffer *data,
+                        GArrowBuffer *null_bitmap,
+                        gint64 n_nulls);
 
+gint32
+garrow_time32_array_get_value(GArrowTime32Array *array, gint64 i);
+const gint32 *
+garrow_time32_array_get_values(GArrowTime32Array *array, gint64 *length);
 
 #define GARROW_TYPE_TIME64_ARRAY (garrow_time64_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowTime64Array,
-                         garrow_time64_array,
-                         GARROW,
-                         TIME64_ARRAY,
-                         GArrowNumericArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowTime64Array, garrow_time64_array, GARROW, TIME64_ARRAY, GArrowNumericArray)
 struct _GArrowTime64ArrayClass
 {
   GArrowNumericArrayClass parent_class;
 };
 
-GArrowTime64Array *garrow_time64_array_new(GArrowTime64DataType *data_type,
-                                           gint64 length,
-                                           GArrowBuffer *data,
-                                           GArrowBuffer *null_bitmap,
-                                           gint64 n_nulls);
-
-gint64 garrow_time64_array_get_value(GArrowTime64Array *array,
-                                     gint64 i);
-const gint64 *garrow_time64_array_get_values(GArrowTime64Array *array,
-                                             gint64 *length);
+GArrowTime64Array *
+garrow_time64_array_new(GArrowTime64DataType *data_type,
+                        gint64 length,
+                        GArrowBuffer *data,
+                        GArrowBuffer *null_bitmap,
+                        gint64 n_nulls);
 
+gint64
+garrow_time64_array_get_value(GArrowTime64Array *array, gint64 i);
+const gint64 *
+garrow_time64_array_get_values(GArrowTime64Array *array, gint64 *length);
 
-#define GARROW_TYPE_MONTH_INTERVAL_ARRAY        \
-  (garrow_month_interval_array_get_type())
+#define GARROW_TYPE_MONTH_INTERVAL_ARRAY (garrow_month_interval_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalArray,
                          garrow_month_interval_array,
                          GARROW,
@@ -657,16 +593,12 @@ garrow_month_interval_array_new(gint64 length,
                                 gint64 n_nulls);
 GARROW_AVAILABLE_IN_8_0
 gint32
-garrow_month_interval_array_get_value(GArrowMonthIntervalArray *array,
-                                      gint64 i);
+garrow_month_interval_array_get_value(GArrowMonthIntervalArray *array, gint64 i);
 GARROW_AVAILABLE_IN_8_0
 const gint32 *
-garrow_month_interval_array_get_values(GArrowMonthIntervalArray *array,
-                                       gint64 *length);
+garrow_month_interval_array_get_values(GArrowMonthIntervalArray *array, gint64 *length);
 
-
-#define GARROW_TYPE_DAY_TIME_INTERVAL_ARRAY     \
-  (garrow_day_time_interval_array_get_type())
+#define GARROW_TYPE_DAY_TIME_INTERVAL_ARRAY (garrow_day_time_interval_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalArray,
                          garrow_day_time_interval_array,
                          GARROW,
@@ -685,14 +617,12 @@ garrow_day_time_interval_array_new(gint64 length,
                                    gint64 n_nulls);
 GARROW_AVAILABLE_IN_8_0
 GArrowDayMillisecond *
-garrow_day_time_interval_array_get_value(GArrowDayTimeIntervalArray *array,
-                                         gint64 i);
+garrow_day_time_interval_array_get_value(GArrowDayTimeIntervalArray *array, gint64 i);
 GARROW_AVAILABLE_IN_8_0
 GList *
 garrow_day_time_interval_array_get_values(GArrowDayTimeIntervalArray *array);
 
-
-#define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_ARRAY       \
+#define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_ARRAY                                        \
   (garrow_month_day_nano_interval_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalArray,
                          garrow_month_day_nano_interval_array,
@@ -712,14 +642,11 @@ garrow_month_day_nano_interval_array_new(gint64 length,
                                          gint64 n_nulls);
 GARROW_AVAILABLE_IN_8_0
 GArrowMonthDayNano *
-garrow_month_day_nano_interval_array_get_value(
-  GArrowMonthDayNanoIntervalArray *array,
-  gint64 i);
+garrow_month_day_nano_interval_array_get_value(GArrowMonthDayNanoIntervalArray *array,
+                                               gint64 i);
 GARROW_AVAILABLE_IN_8_0
 GList *
-garrow_month_day_nano_interval_array_get_values(
-  GArrowMonthDayNanoIntervalArray *array);
-
+garrow_month_day_nano_interval_array_get_values(GArrowMonthDayNanoIntervalArray *array);
 
 #define GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY (garrow_fixed_size_binary_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryArray,
@@ -744,13 +671,11 @@ gint32
 garrow_fixed_size_binary_array_get_byte_width(GArrowFixedSizeBinaryArray *array);
 GARROW_AVAILABLE_IN_3_0
 GBytes *
-garrow_fixed_size_binary_array_get_value(GArrowFixedSizeBinaryArray *array,
-                                         gint64 i);
+garrow_fixed_size_binary_array_get_value(GArrowFixedSizeBinaryArray *array, gint64 i);
 GARROW_AVAILABLE_IN_3_0
 GBytes *
 garrow_fixed_size_binary_array_get_values_bytes(GArrowFixedSizeBinaryArray *array);
 
-
 #define GARROW_TYPE_DECIMAL128_ARRAY (garrow_decimal128_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Array,
                          garrow_decimal128_array,
@@ -762,10 +687,10 @@ struct _GArrowDecimal128ArrayClass
   GArrowFixedSizeBinaryArrayClass parent_class;
 };
 
-gchar *garrow_decimal128_array_format_value(GArrowDecimal128Array *array,
-                                            gint64 i);
-GArrowDecimal128 *garrow_decimal128_array_get_value(GArrowDecimal128Array *array,
-                                                    gint64 i);
+gchar *
+garrow_decimal128_array_format_value(GArrowDecimal128Array *array, gint64 i);
+GArrowDecimal128 *
+garrow_decimal128_array_get_value(GArrowDecimal128Array *array, gint64 i);
 
 #define GARROW_TYPE_DECIMAL256_ARRAY (garrow_decimal256_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256Array,
@@ -778,15 +703,13 @@ struct _GArrowDecimal256ArrayClass
   GArrowFixedSizeBinaryArrayClass parent_class;
 };
 
-gchar *garrow_decimal256_array_format_value(GArrowDecimal256Array *array,
-                                            gint64 i);
-GArrowDecimal256 *garrow_decimal256_array_get_value(GArrowDecimal256Array *array,
-                                                    gint64 i);
-
+gchar *
+garrow_decimal256_array_format_value(GArrowDecimal256Array *array, gint64 i);
+GArrowDecimal256 *
+garrow_decimal256_array_get_value(GArrowDecimal256Array *array, gint64 i);
 
 GARROW_AVAILABLE_IN_3_0
 GArrowArray *
 garrow_extension_array_get_storage(GArrowExtensionArray *array);
 
-
 G_END_DECLS
diff --git a/c_glib/arrow-glib/basic-array.hpp b/c_glib/arrow-glib/basic-array.hpp
index 3ef1c1969767f..f010cf3db4bc3 100644
--- a/c_glib/arrow-glib/basic-array.hpp
+++ b/c_glib/arrow-glib/basic-array.hpp
@@ -44,7 +44,9 @@ garrow_array_get_raw(GArrowArray *array);
 
 template <typename DataType>
 inline std::shared_ptr<typename arrow::TypeTraits<DataType>::ArrayType>
-garrow_array_get_raw(GArrowArray *array) {
+garrow_array_get_raw(GArrowArray *array)
+{
   auto arrow_array = garrow_array_get_raw(array);
-  return std::static_pointer_cast<typename arrow::TypeTraits<DataType>::ArrayType>(arrow_array);
+  return std::static_pointer_cast<typename arrow::TypeTraits<DataType>::ArrayType>(
+    arrow_array);
 }
diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp
index 98b2c92104507..36646a9733cd3 100644
--- a/c_glib/arrow-glib/basic-data-type.cpp
+++ b/c_glib/arrow-glib/basic-data-type.cpp
@@ -125,7 +125,8 @@ G_BEGIN_DECLS
  * data types.
  */
 
-struct GArrowDataTypePrivate {
+struct GArrowDataTypePrivate
+{
   std::shared_ptr<arrow::DataType> data_type;
 };
 
@@ -133,14 +134,11 @@ enum {
   PROP_DATA_TYPE = 1
 };
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowDataType,
-                                    garrow_data_type,
-                                    G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowDataType, garrow_data_type, G_TYPE_OBJECT)
 
-#define GARROW_DATA_TYPE_GET_PRIVATE(obj)         \
-  static_cast<GArrowDataTypePrivate *>(           \
-    garrow_data_type_get_instance_private(        \
-      GARROW_DATA_TYPE(obj)))
+#define GARROW_DATA_TYPE_GET_PRIVATE(obj)                                                \
+  static_cast<GArrowDataTypePrivate *>(                                                  \
+    garrow_data_type_get_instance_private(GARROW_DATA_TYPE(obj)))
 
 static void
 garrow_data_type_finalize(GObject *object)
@@ -165,8 +163,7 @@ garrow_data_type_set_property(GObject *object,
     {
       auto data_type = g_value_get_pointer(value);
       if (data_type) {
-        priv->data_type =
-          *static_cast<std::shared_ptr<arrow::DataType> *>(data_type);
+        priv->data_type = *static_cast<std::shared_ptr<arrow::DataType> *>(data_type);
       }
     }
     break;
@@ -193,7 +190,7 @@ static void
 garrow_data_type_init(GArrowDataType *object)
 {
   auto priv = GARROW_DATA_TYPE_GET_PRIVATE(object);
-  new(&priv->data_type) std::shared_ptr<arrow::DataType>;
+  new (&priv->data_type) std::shared_ptr<arrow::DataType>;
 }
 
 static void
@@ -204,15 +201,15 @@ garrow_data_type_class_init(GArrowDataTypeClass *klass)
 
   gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_data_type_finalize;
+  gobject_class->finalize = garrow_data_type_finalize;
   gobject_class->set_property = garrow_data_type_set_property;
   gobject_class->get_property = garrow_data_type_get_property;
 
-  spec = g_param_spec_pointer("data-type",
-                              "Data type",
-                              "The raw std::shared<arrow::DataType> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "data-type",
+    "Data type",
+    "The raw std::shared<arrow::DataType> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_DATA_TYPE, spec);
 }
 
@@ -277,8 +274,7 @@ garrow_data_type_export(GArrowDataType *data_type, GError **error)
  *   otherwise.
  */
 gboolean
-garrow_data_type_equal(GArrowDataType *data_type,
-                       GArrowDataType *other_data_type)
+garrow_data_type_equal(GArrowDataType *data_type, GArrowDataType *other_data_type)
 {
   const auto arrow_data_type = garrow_data_type_get_raw(data_type);
   const auto arrow_other_data_type = garrow_data_type_get_raw(other_data_type);
@@ -332,7 +328,6 @@ garrow_data_type_get_name(GArrowDataType *data_type)
   return g_strdup(name.c_str());
 }
 
-
 G_DEFINE_ABSTRACT_TYPE(GArrowFixedWidthDataType,
                        garrow_fixed_width_data_type,
                        GARROW_TYPE_DATA_TYPE)
@@ -356,17 +351,13 @@ garrow_fixed_width_data_type_class_init(GArrowFixedWidthDataTypeClass *klass)
 gint
 garrow_fixed_width_data_type_get_bit_width(GArrowFixedWidthDataType *data_type)
 {
-  const auto arrow_data_type =
-    garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  const auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   const auto arrow_fixed_width_type =
     std::static_pointer_cast<arrow::FixedWidthType>(arrow_data_type);
   return arrow_fixed_width_type->bit_width();
 }
 
-
-G_DEFINE_TYPE(GArrowNullDataType,
-              garrow_null_data_type,
-              GARROW_TYPE_DATA_TYPE)
+G_DEFINE_TYPE(GArrowNullDataType, garrow_null_data_type, GARROW_TYPE_DATA_TYPE)
 
 static void
 garrow_null_data_type_init(GArrowNullDataType *object)
@@ -388,14 +379,11 @@ garrow_null_data_type_new(void)
 {
   auto arrow_data_type = arrow::null();
 
-  GArrowNullDataType *data_type =
-    GARROW_NULL_DATA_TYPE(g_object_new(GARROW_TYPE_NULL_DATA_TYPE,
-                                       "data-type", &arrow_data_type,
-                                       NULL));
+  GArrowNullDataType *data_type = GARROW_NULL_DATA_TYPE(
+    g_object_new(GARROW_TYPE_NULL_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
 G_DEFINE_TYPE(GArrowBooleanDataType,
               garrow_boolean_data_type,
               GARROW_TYPE_FIXED_WIDTH_DATA_TYPE)
@@ -420,14 +408,11 @@ garrow_boolean_data_type_new(void)
 {
   auto arrow_data_type = arrow::boolean();
 
-  GArrowBooleanDataType *data_type =
-    GARROW_BOOLEAN_DATA_TYPE(g_object_new(GARROW_TYPE_BOOLEAN_DATA_TYPE,
-                                          "data-type", &arrow_data_type,
-                                          NULL));
+  GArrowBooleanDataType *data_type = GARROW_BOOLEAN_DATA_TYPE(
+    g_object_new(GARROW_TYPE_BOOLEAN_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
 G_DEFINE_ABSTRACT_TYPE(GArrowNumericDataType,
                        garrow_numeric_data_type,
                        GARROW_TYPE_FIXED_WIDTH_DATA_TYPE)
@@ -442,7 +427,6 @@ garrow_numeric_data_type_class_init(GArrowNumericDataTypeClass *klass)
 {
 }
 
-
 G_DEFINE_ABSTRACT_TYPE(GArrowIntegerDataType,
                        garrow_integer_data_type,
                        GARROW_TYPE_NUMERIC_DATA_TYPE)
@@ -468,16 +452,13 @@ garrow_integer_data_type_class_init(GArrowIntegerDataTypeClass *klass)
 gboolean
 garrow_integer_data_type_is_signed(GArrowIntegerDataType *data_type)
 {
-  const auto arrow_data_type =
-    garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  const auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   const auto arrow_integer_type =
     std::static_pointer_cast<arrow::IntegerType>(arrow_data_type);
   return arrow_integer_type->is_signed();
 }
 
-G_DEFINE_TYPE(GArrowInt8DataType,
-              garrow_int8_data_type,
-              GARROW_TYPE_INTEGER_DATA_TYPE)
+G_DEFINE_TYPE(GArrowInt8DataType, garrow_int8_data_type, GARROW_TYPE_INTEGER_DATA_TYPE)
 
 static void
 garrow_int8_data_type_init(GArrowInt8DataType *object)
@@ -499,17 +480,12 @@ garrow_int8_data_type_new(void)
 {
   auto arrow_data_type = arrow::int8();
 
-  GArrowInt8DataType *data_type =
-    GARROW_INT8_DATA_TYPE(g_object_new(GARROW_TYPE_INT8_DATA_TYPE,
-                                       "data-type", &arrow_data_type,
-                                       NULL));
+  GArrowInt8DataType *data_type = GARROW_INT8_DATA_TYPE(
+    g_object_new(GARROW_TYPE_INT8_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
-G_DEFINE_TYPE(GArrowUInt8DataType,
-              garrow_uint8_data_type,
-              GARROW_TYPE_INTEGER_DATA_TYPE)
+G_DEFINE_TYPE(GArrowUInt8DataType, garrow_uint8_data_type, GARROW_TYPE_INTEGER_DATA_TYPE)
 
 static void
 garrow_uint8_data_type_init(GArrowUInt8DataType *object)
@@ -531,17 +507,12 @@ garrow_uint8_data_type_new(void)
 {
   auto arrow_data_type = arrow::uint8();
 
-  GArrowUInt8DataType *data_type =
-    GARROW_UINT8_DATA_TYPE(g_object_new(GARROW_TYPE_UINT8_DATA_TYPE,
-                                        "data-type", &arrow_data_type,
-                                        NULL));
+  GArrowUInt8DataType *data_type = GARROW_UINT8_DATA_TYPE(
+    g_object_new(GARROW_TYPE_UINT8_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
-G_DEFINE_TYPE(GArrowInt16DataType,
-              garrow_int16_data_type,
-              GARROW_TYPE_INTEGER_DATA_TYPE)
+G_DEFINE_TYPE(GArrowInt16DataType, garrow_int16_data_type, GARROW_TYPE_INTEGER_DATA_TYPE)
 
 static void
 garrow_int16_data_type_init(GArrowInt16DataType *object)
@@ -563,14 +534,11 @@ garrow_int16_data_type_new(void)
 {
   auto arrow_data_type = arrow::int16();
 
-  GArrowInt16DataType *data_type =
-    GARROW_INT16_DATA_TYPE(g_object_new(GARROW_TYPE_INT16_DATA_TYPE,
-                                        "data-type", &arrow_data_type,
-                                        NULL));
+  GArrowInt16DataType *data_type = GARROW_INT16_DATA_TYPE(
+    g_object_new(GARROW_TYPE_INT16_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
 G_DEFINE_TYPE(GArrowUInt16DataType,
               garrow_uint16_data_type,
               GARROW_TYPE_INTEGER_DATA_TYPE)
@@ -595,17 +563,12 @@ garrow_uint16_data_type_new(void)
 {
   auto arrow_data_type = arrow::uint16();
 
-  GArrowUInt16DataType *data_type =
-    GARROW_UINT16_DATA_TYPE(g_object_new(GARROW_TYPE_UINT16_DATA_TYPE,
-                                         "data-type", &arrow_data_type,
-                                         NULL));
+  GArrowUInt16DataType *data_type = GARROW_UINT16_DATA_TYPE(
+    g_object_new(GARROW_TYPE_UINT16_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
-G_DEFINE_TYPE(GArrowInt32DataType,
-              garrow_int32_data_type,
-              GARROW_TYPE_INTEGER_DATA_TYPE)
+G_DEFINE_TYPE(GArrowInt32DataType, garrow_int32_data_type, GARROW_TYPE_INTEGER_DATA_TYPE)
 
 static void
 garrow_int32_data_type_init(GArrowInt32DataType *object)
@@ -627,14 +590,11 @@ garrow_int32_data_type_new(void)
 {
   auto arrow_data_type = arrow::int32();
 
-  GArrowInt32DataType *data_type =
-    GARROW_INT32_DATA_TYPE(g_object_new(GARROW_TYPE_INT32_DATA_TYPE,
-                                        "data-type", &arrow_data_type,
-                                        NULL));
+  GArrowInt32DataType *data_type = GARROW_INT32_DATA_TYPE(
+    g_object_new(GARROW_TYPE_INT32_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
 G_DEFINE_TYPE(GArrowUInt32DataType,
               garrow_uint32_data_type,
               GARROW_TYPE_INTEGER_DATA_TYPE)
@@ -659,17 +619,12 @@ garrow_uint32_data_type_new(void)
 {
   auto arrow_data_type = arrow::uint32();
 
-  GArrowUInt32DataType *data_type =
-    GARROW_UINT32_DATA_TYPE(g_object_new(GARROW_TYPE_UINT32_DATA_TYPE,
-                                         "data-type", &arrow_data_type,
-                                         NULL));
+  GArrowUInt32DataType *data_type = GARROW_UINT32_DATA_TYPE(
+    g_object_new(GARROW_TYPE_UINT32_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
-G_DEFINE_TYPE(GArrowInt64DataType,
-              garrow_int64_data_type,
-              GARROW_TYPE_INTEGER_DATA_TYPE)
+G_DEFINE_TYPE(GArrowInt64DataType, garrow_int64_data_type, GARROW_TYPE_INTEGER_DATA_TYPE)
 
 static void
 garrow_int64_data_type_init(GArrowInt64DataType *object)
@@ -691,14 +646,11 @@ garrow_int64_data_type_new(void)
 {
   auto arrow_data_type = arrow::int64();
 
-  GArrowInt64DataType *data_type =
-    GARROW_INT64_DATA_TYPE(g_object_new(GARROW_TYPE_INT64_DATA_TYPE,
-                                        "data-type", &arrow_data_type,
-                                        NULL));
+  GArrowInt64DataType *data_type = GARROW_INT64_DATA_TYPE(
+    g_object_new(GARROW_TYPE_INT64_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
 G_DEFINE_TYPE(GArrowUInt64DataType,
               garrow_uint64_data_type,
               GARROW_TYPE_INTEGER_DATA_TYPE)
@@ -723,14 +675,11 @@ garrow_uint64_data_type_new(void)
 {
   auto arrow_data_type = arrow::uint64();
 
-  GArrowUInt64DataType *data_type =
-    GARROW_UINT64_DATA_TYPE(g_object_new(GARROW_TYPE_UINT64_DATA_TYPE,
-                                         "data-type", &arrow_data_type,
-                                         NULL));
+  GArrowUInt64DataType *data_type = GARROW_UINT64_DATA_TYPE(
+    g_object_new(GARROW_TYPE_UINT64_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
 G_DEFINE_ABSTRACT_TYPE(GArrowFloatingPointDataType,
                        garrow_floating_point_data_type,
                        GARROW_TYPE_NUMERIC_DATA_TYPE)
@@ -745,7 +694,6 @@ garrow_floating_point_data_type_class_init(GArrowFloatingPointDataTypeClass *kla
 {
 }
 
-
 G_DEFINE_TYPE(GArrowHalfFloatDataType,
               garrow_half_float_data_type,
               GARROW_TYPE_FLOATING_POINT_DATA_TYPE)
@@ -771,14 +719,11 @@ GArrowHalfFloatDataType *
 garrow_half_float_data_type_new(void)
 {
   auto arrow_data_type = arrow::float16();
-  auto data_type =
-    GARROW_HALF_FLOAT_DATA_TYPE(g_object_new(GARROW_TYPE_HALF_FLOAT_DATA_TYPE,
-                                             "data-type", &arrow_data_type,
-                                             NULL));
+  auto data_type = GARROW_HALF_FLOAT_DATA_TYPE(
+    g_object_new(GARROW_TYPE_HALF_FLOAT_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
 G_DEFINE_TYPE(GArrowFloatDataType,
               garrow_float_data_type,
               GARROW_TYPE_FLOATING_POINT_DATA_TYPE)
@@ -803,14 +748,11 @@ garrow_float_data_type_new(void)
 {
   auto arrow_data_type = arrow::float32();
 
-  GArrowFloatDataType *data_type =
-    GARROW_FLOAT_DATA_TYPE(g_object_new(GARROW_TYPE_FLOAT_DATA_TYPE,
-                                        "data-type", &arrow_data_type,
-                                        NULL));
+  GArrowFloatDataType *data_type = GARROW_FLOAT_DATA_TYPE(
+    g_object_new(GARROW_TYPE_FLOAT_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
 G_DEFINE_TYPE(GArrowDoubleDataType,
               garrow_double_data_type,
               GARROW_TYPE_FLOATING_POINT_DATA_TYPE)
@@ -835,17 +777,12 @@ garrow_double_data_type_new(void)
 {
   auto arrow_data_type = arrow::float64();
 
-  GArrowDoubleDataType *data_type =
-    GARROW_DOUBLE_DATA_TYPE(g_object_new(GARROW_TYPE_DOUBLE_DATA_TYPE,
-                                         "data-type", &arrow_data_type,
-                                         NULL));
+  GArrowDoubleDataType *data_type = GARROW_DOUBLE_DATA_TYPE(
+    g_object_new(GARROW_TYPE_DOUBLE_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
-G_DEFINE_TYPE(GArrowBinaryDataType,
-              garrow_binary_data_type,
-              GARROW_TYPE_DATA_TYPE)
+G_DEFINE_TYPE(GArrowBinaryDataType, garrow_binary_data_type, GARROW_TYPE_DATA_TYPE)
 
 static void
 garrow_binary_data_type_init(GArrowBinaryDataType *object)
@@ -867,14 +804,11 @@ garrow_binary_data_type_new(void)
 {
   auto arrow_data_type = arrow::binary();
 
-  GArrowBinaryDataType *data_type =
-    GARROW_BINARY_DATA_TYPE(g_object_new(GARROW_TYPE_BINARY_DATA_TYPE,
-                                         "data-type", &arrow_data_type,
-                                         NULL));
+  GArrowBinaryDataType *data_type = GARROW_BINARY_DATA_TYPE(
+    g_object_new(GARROW_TYPE_BINARY_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
 G_DEFINE_TYPE(GArrowFixedSizeBinaryDataType,
               garrow_fixed_size_binary_data_type,
               GARROW_TYPE_FIXED_WIDTH_DATA_TYPE)
@@ -902,10 +836,11 @@ garrow_fixed_size_binary_data_type_new(gint32 byte_width)
 {
   auto arrow_fixed_size_binary_data_type = arrow::fixed_size_binary(byte_width);
 
-  auto fixed_size_binary_data_type =
-    GARROW_FIXED_SIZE_BINARY_DATA_TYPE(g_object_new(GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE,
-                                                    "data-type", &arrow_fixed_size_binary_data_type,
-                                                    NULL));
+  auto fixed_size_binary_data_type = GARROW_FIXED_SIZE_BINARY_DATA_TYPE(
+    g_object_new(GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE,
+                 "data-type",
+                 &arrow_fixed_size_binary_data_type,
+                 NULL));
   return fixed_size_binary_data_type;
 }
 
@@ -918,16 +853,15 @@ garrow_fixed_size_binary_data_type_new(gint32 byte_width)
  * Since: 0.12.0
  */
 gint32
-garrow_fixed_size_binary_data_type_get_byte_width(GArrowFixedSizeBinaryDataType *data_type)
+garrow_fixed_size_binary_data_type_get_byte_width(
+  GArrowFixedSizeBinaryDataType *data_type)
 {
-  const auto arrow_data_type =
-    garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  const auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   const auto arrow_fixed_size_binary_type =
     std::static_pointer_cast<arrow::FixedSizeBinaryType>(arrow_data_type);
   return arrow_fixed_size_binary_type->byte_width();
 }
 
-
 G_DEFINE_TYPE(GArrowLargeBinaryDataType,
               garrow_large_binary_data_type,
               GARROW_TYPE_DATA_TYPE)
@@ -956,15 +890,13 @@ garrow_large_binary_data_type_new(void)
 
   GArrowLargeBinaryDataType *data_type =
     GARROW_LARGE_BINARY_DATA_TYPE(g_object_new(GARROW_TYPE_LARGE_BINARY_DATA_TYPE,
-                                               "data-type", &arrow_data_type,
+                                               "data-type",
+                                               &arrow_data_type,
                                                NULL));
   return data_type;
 }
 
-
-G_DEFINE_TYPE(GArrowStringDataType,
-              garrow_string_data_type,
-              GARROW_TYPE_DATA_TYPE)
+G_DEFINE_TYPE(GArrowStringDataType, garrow_string_data_type, GARROW_TYPE_DATA_TYPE)
 
 static void
 garrow_string_data_type_init(GArrowStringDataType *object)
@@ -986,14 +918,11 @@ garrow_string_data_type_new(void)
 {
   auto arrow_data_type = arrow::utf8();
 
-  GArrowStringDataType *data_type =
-    GARROW_STRING_DATA_TYPE(g_object_new(GARROW_TYPE_STRING_DATA_TYPE,
-                                         "data-type", &arrow_data_type,
-                                         NULL));
+  GArrowStringDataType *data_type = GARROW_STRING_DATA_TYPE(
+    g_object_new(GARROW_TYPE_STRING_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
 G_DEFINE_TYPE(GArrowLargeStringDataType,
               garrow_large_string_data_type,
               GARROW_TYPE_DATA_TYPE)
@@ -1022,12 +951,12 @@ garrow_large_string_data_type_new(void)
 
   GArrowLargeStringDataType *data_type =
     GARROW_LARGE_STRING_DATA_TYPE(g_object_new(GARROW_TYPE_LARGE_STRING_DATA_TYPE,
-                                               "data-type", &arrow_data_type,
+                                               "data-type",
+                                               &arrow_data_type,
                                                NULL));
   return data_type;
 }
 
-
 G_DEFINE_ABSTRACT_TYPE(GArrowTemporalDataType,
                        garrow_temporal_data_type,
                        GARROW_TYPE_FIXED_WIDTH_DATA_TYPE)
@@ -1042,7 +971,6 @@ garrow_temporal_data_type_class_init(GArrowTemporalDataTypeClass *klass)
 {
 }
 
-
 G_DEFINE_TYPE(GArrowDate32DataType,
               garrow_date32_data_type,
               GARROW_TYPE_TEMPORAL_DATA_TYPE)
@@ -1070,14 +998,11 @@ garrow_date32_data_type_new(void)
 {
   auto arrow_data_type = arrow::date32();
 
-  GArrowDate32DataType *data_type =
-    GARROW_DATE32_DATA_TYPE(g_object_new(GARROW_TYPE_DATE32_DATA_TYPE,
-                                         "data-type", &arrow_data_type,
-                                         NULL));
+  GArrowDate32DataType *data_type = GARROW_DATE32_DATA_TYPE(
+    g_object_new(GARROW_TYPE_DATE32_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
 G_DEFINE_TYPE(GArrowDate64DataType,
               garrow_date64_data_type,
               GARROW_TYPE_TEMPORAL_DATA_TYPE)
@@ -1105,15 +1030,13 @@ garrow_date64_data_type_new(void)
 {
   auto arrow_data_type = arrow::date64();
 
-  GArrowDate64DataType *data_type =
-    GARROW_DATE64_DATA_TYPE(g_object_new(GARROW_TYPE_DATE64_DATA_TYPE,
-                                         "data-type", &arrow_data_type,
-                                         NULL));
+  GArrowDate64DataType *data_type = GARROW_DATE64_DATA_TYPE(
+    g_object_new(GARROW_TYPE_DATE64_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
-struct GArrowTimestampDataTypePrivate {
+struct GArrowTimestampDataTypePrivate
+{
   GTimeZone *time_zone;
 };
 
@@ -1125,10 +1048,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowTimestampDataType,
                            garrow_timestamp_data_type,
                            GARROW_TYPE_TEMPORAL_DATA_TYPE)
 
-#define GARROW_TIMESTAMP_DATA_TYPE_GET_PRIVATE(object)  \
-  static_cast<GArrowTimestampDataTypePrivate *>(        \
-    garrow_timestamp_data_type_get_instance_private(    \
-      GARROW_TIMESTAMP_DATA_TYPE(object)))
+#define GARROW_TIMESTAMP_DATA_TYPE_GET_PRIVATE(object)                                   \
+  static_cast<GArrowTimestampDataTypePrivate *>(                                         \
+    garrow_timestamp_data_type_get_instance_private(GARROW_TIMESTAMP_DATA_TYPE(object)))
 
 static void
 garrow_timestamp_data_type_dispose(GObject *object)
@@ -1188,7 +1110,7 @@ static void
 garrow_timestamp_data_type_class_init(GArrowTimestampDataTypeClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->dispose      = garrow_timestamp_data_type_dispose;
+  gobject_class->dispose = garrow_timestamp_data_type_dispose;
   gobject_class->set_property = garrow_timestamp_data_type_set_property;
   gobject_class->get_property = garrow_timestamp_data_type_get_property;
 
@@ -1200,12 +1122,12 @@ garrow_timestamp_data_type_class_init(GArrowTimestampDataTypeClass *klass)
    *
    * Since: 16.0.0
    */
-  spec = g_param_spec_boxed("time-zone",
-                            "Time zone",
-                            "The time zone of this data type",
-                            G_TYPE_TIME_ZONE,
-                            static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                     G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_boxed(
+    "time-zone",
+    "Time zone",
+    "The time zone of this data type",
+    G_TYPE_TIME_ZONE,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_TIME_ZONE, spec);
 }
 
@@ -1222,8 +1144,7 @@ garrow_timestamp_data_type_class_init(GArrowTimestampDataTypeClass *klass)
  * Since: 0.7.0
  */
 GArrowTimestampDataType *
-garrow_timestamp_data_type_new(GArrowTimeUnit unit,
-                               GTimeZone *time_zone)
+garrow_timestamp_data_type_new(GArrowTimeUnit unit, GTimeZone *time_zone)
 {
   auto arrow_unit = garrow_time_unit_to_raw(unit);
   std::string arrow_timezone;
@@ -1235,8 +1156,10 @@ garrow_timestamp_data_type_new(GArrowTimeUnit unit,
   auto arrow_data_type = arrow::timestamp(arrow_unit, arrow_timezone);
   auto data_type =
     GARROW_TIMESTAMP_DATA_TYPE(g_object_new(GARROW_TYPE_TIMESTAMP_DATA_TYPE,
-                                            "data-type", &arrow_data_type,
-                                            "time-zone", time_zone,
+                                            "data-type",
+                                            &arrow_data_type,
+                                            "time-zone",
+                                            time_zone,
                                             NULL));
   return data_type;
 }
@@ -1252,14 +1175,12 @@ garrow_timestamp_data_type_new(GArrowTimeUnit unit,
 GArrowTimeUnit
 garrow_timestamp_data_type_get_unit(GArrowTimestampDataType *data_type)
 {
-  const auto arrow_data_type =
-    garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
+  const auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   const auto arrow_timestamp_data_type =
     std::static_pointer_cast<arrow::TimestampType>(arrow_data_type);
   return garrow_time_unit_from_raw(arrow_timestamp_data_type->unit());
 }
 
-
 G_DEFINE_ABSTRACT_TYPE(GArrowTimeDataType,
                        garrow_time_data_type,
                        GARROW_TYPE_TEMPORAL_DATA_TYPE)
@@ -1285,17 +1206,13 @@ garrow_time_data_type_class_init(GArrowTimeDataTypeClass *klass)
 GArrowTimeUnit
 garrow_time_data_type_get_unit(GArrowTimeDataType *time_data_type)
 {
-  const auto arrow_data_type =
-    garrow_data_type_get_raw(GARROW_DATA_TYPE(time_data_type));
+  const auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(time_data_type));
   const auto arrow_time_data_type =
     std::static_pointer_cast<arrow::TimeType>(arrow_data_type);
   return garrow_time_unit_from_raw(arrow_time_data_type->unit());
 }
 
-
-G_DEFINE_TYPE(GArrowTime32DataType,
-              garrow_time32_data_type,
-              GARROW_TYPE_TIME_DATA_TYPE)
+G_DEFINE_TYPE(GArrowTime32DataType, garrow_time32_data_type, GARROW_TYPE_TIME_DATA_TYPE)
 
 static void
 garrow_time32_data_type_init(GArrowTime32DataType *object)
@@ -1353,17 +1270,12 @@ garrow_time32_data_type_new(GArrowTimeUnit unit, GError **error)
 
   auto arrow_unit = garrow_time_unit_to_raw(unit);
   auto arrow_data_type = arrow::time32(arrow_unit);
-  auto data_type =
-    GARROW_TIME32_DATA_TYPE(g_object_new(GARROW_TYPE_TIME32_DATA_TYPE,
-                                         "data-type", &arrow_data_type,
-                                         NULL));
+  auto data_type = GARROW_TIME32_DATA_TYPE(
+    g_object_new(GARROW_TYPE_TIME32_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
-G_DEFINE_TYPE(GArrowTime64DataType,
-              garrow_time64_data_type,
-              GARROW_TYPE_TIME_DATA_TYPE)
+G_DEFINE_TYPE(GArrowTime64DataType, garrow_time64_data_type, GARROW_TYPE_TIME_DATA_TYPE)
 
 static void
 garrow_time64_data_type_init(GArrowTime64DataType *object)
@@ -1421,14 +1333,11 @@ garrow_time64_data_type_new(GArrowTimeUnit unit, GError **error)
 
   auto arrow_unit = garrow_time_unit_to_raw(unit);
   auto arrow_data_type = arrow::time64(arrow_unit);
-  auto data_type =
-    GARROW_TIME64_DATA_TYPE(g_object_new(GARROW_TYPE_TIME64_DATA_TYPE,
-                                         "data-type", &arrow_data_type,
-                                         NULL));
+  auto data_type = GARROW_TIME64_DATA_TYPE(
+    g_object_new(GARROW_TYPE_TIME64_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
-
 G_DEFINE_ABSTRACT_TYPE(GArrowIntervalDataType,
                        garrow_interval_data_type,
                        GARROW_TYPE_TEMPORAL_DATA_TYPE)
@@ -1460,7 +1369,6 @@ garrow_interval_data_type_get_interval_type(GArrowIntervalDataType *type)
   return garrow_interval_type_from_raw(arrow_interval_type->interval_type());
 }
 
-
 G_DEFINE_TYPE(GArrowMonthIntervalDataType,
               garrow_month_interval_data_type,
               GARROW_TYPE_INTERVAL_DATA_TYPE)
@@ -1470,8 +1378,7 @@ garrow_month_interval_data_type_init(GArrowMonthIntervalDataType *object)
 }
 
 static void
-garrow_month_interval_data_type_class_init(
-  GArrowMonthIntervalDataTypeClass *klass)
+garrow_month_interval_data_type_class_init(GArrowMonthIntervalDataTypeClass *klass)
 {
 }
 
@@ -1488,12 +1395,12 @@ garrow_month_interval_data_type_new(void)
   auto arrow_data_type = arrow::month_interval();
 
   auto data_type = g_object_new(GARROW_TYPE_MONTH_INTERVAL_DATA_TYPE,
-                                "data-type", &arrow_data_type,
+                                "data-type",
+                                &arrow_data_type,
                                 NULL);
   return GARROW_MONTH_INTERVAL_DATA_TYPE(data_type);
 }
 
-
 G_DEFINE_TYPE(GArrowDayTimeIntervalDataType,
               garrow_day_time_interval_data_type,
               GARROW_TYPE_INTERVAL_DATA_TYPE)
@@ -1504,8 +1411,7 @@ garrow_day_time_interval_data_type_init(GArrowDayTimeIntervalDataType *object)
 }
 
 static void
-garrow_day_time_interval_data_type_class_init(
-  GArrowDayTimeIntervalDataTypeClass *klass)
+garrow_day_time_interval_data_type_class_init(GArrowDayTimeIntervalDataTypeClass *klass)
 {
 }
 
@@ -1522,19 +1428,18 @@ garrow_day_time_interval_data_type_new(void)
   auto arrow_data_type = arrow::day_time_interval();
 
   auto data_type = g_object_new(GARROW_TYPE_DAY_TIME_INTERVAL_DATA_TYPE,
-                                "data-type", &arrow_data_type,
+                                "data-type",
+                                &arrow_data_type,
                                 NULL);
   return GARROW_DAY_TIME_INTERVAL_DATA_TYPE(data_type);
 }
 
-
 G_DEFINE_TYPE(GArrowMonthDayNanoIntervalDataType,
               garrow_month_day_nano_interval_data_type,
               GARROW_TYPE_INTERVAL_DATA_TYPE)
 
 static void
-garrow_month_day_nano_interval_data_type_init(
-  GArrowMonthDayNanoIntervalDataType *object)
+garrow_month_day_nano_interval_data_type_init(GArrowMonthDayNanoIntervalDataType *object)
 {
 }
 
@@ -1557,12 +1462,12 @@ garrow_month_day_nano_interval_data_type_new(void)
   auto arrow_data_type = arrow::month_day_nano_interval();
 
   auto data_type = g_object_new(GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_DATA_TYPE,
-                                "data-type", &arrow_data_type,
+                                "data-type",
+                                &arrow_data_type,
                                 NULL);
   return GARROW_MONTH_DAY_NANO_INTERVAL_DATA_TYPE(data_type);
 }
 
-
 G_DEFINE_ABSTRACT_TYPE(GArrowDecimalDataType,
                        garrow_decimal_data_type,
                        GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE)
@@ -1593,9 +1498,7 @@ garrow_decimal_data_type_class_init(GArrowDecimalDataTypeClass *klass)
  * Since: 0.10.0
  */
 GArrowDecimalDataType *
-garrow_decimal_data_type_new(gint32 precision,
-                             gint32 scale,
-                             GError **error)
+garrow_decimal_data_type_new(gint32 precision, gint32 scale, GError **error)
 {
   if (precision <= garrow_decimal128_data_type_max_precision()) {
     return GARROW_DECIMAL_DATA_TYPE(
@@ -1642,7 +1545,6 @@ garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type)
   return arrow_decimal_type->scale();
 }
 
-
 G_DEFINE_TYPE(GArrowDecimal128DataType,
               garrow_decimal128_data_type,
               GARROW_TYPE_DECIMAL_DATA_TYPE)
@@ -1682,25 +1584,20 @@ garrow_decimal128_data_type_max_precision()
  * Since: 0.12.0
  */
 GArrowDecimal128DataType *
-garrow_decimal128_data_type_new(gint32 precision,
-                                gint32 scale,
-                                GError **error)
+garrow_decimal128_data_type_new(gint32 precision, gint32 scale, GError **error)
 {
   auto arrow_data_type_result = arrow::Decimal128Type::Make(precision, scale);
-  if (garrow::check(error,
-                    arrow_data_type_result,
-                    "[decimal128-data-type][new]")) {
+  if (garrow::check(error, arrow_data_type_result, "[decimal128-data-type][new]")) {
     auto arrow_data_type = *arrow_data_type_result;
-    return GARROW_DECIMAL128_DATA_TYPE(
-      g_object_new(GARROW_TYPE_DECIMAL128_DATA_TYPE,
-                   "data-type", &arrow_data_type,
-                   NULL));
+    return GARROW_DECIMAL128_DATA_TYPE(g_object_new(GARROW_TYPE_DECIMAL128_DATA_TYPE,
+                                                    "data-type",
+                                                    &arrow_data_type,
+                                                    NULL));
   } else {
     return NULL;
   }
 }
 
-
 G_DEFINE_TYPE(GArrowDecimal256DataType,
               garrow_decimal256_data_type,
               GARROW_TYPE_DECIMAL_DATA_TYPE)
@@ -1740,26 +1637,22 @@ garrow_decimal256_data_type_max_precision()
  * Since: 3.0.0
  */
 GArrowDecimal256DataType *
-garrow_decimal256_data_type_new(gint32 precision,
-                                gint32 scale,
-                                GError **error)
+garrow_decimal256_data_type_new(gint32 precision, gint32 scale, GError **error)
 {
   auto arrow_data_type_result = arrow::Decimal256Type::Make(precision, scale);
-  if (garrow::check(error,
-                    arrow_data_type_result,
-                    "[decimal256-data-type][new]")) {
+  if (garrow::check(error, arrow_data_type_result, "[decimal256-data-type][new]")) {
     auto arrow_data_type = *arrow_data_type_result;
-    return GARROW_DECIMAL256_DATA_TYPE(
-      g_object_new(GARROW_TYPE_DECIMAL256_DATA_TYPE,
-                   "data-type", &arrow_data_type,
-                   NULL));
+    return GARROW_DECIMAL256_DATA_TYPE(g_object_new(GARROW_TYPE_DECIMAL256_DATA_TYPE,
+                                                    "data-type",
+                                                    &arrow_data_type,
+                                                    NULL));
   } else {
     return NULL;
   }
 }
 
-
-typedef struct GArrowExtensionDataTypePrivate_ {
+typedef struct GArrowExtensionDataTypePrivate_
+{
   GArrowDataType *storage_data_type;
 } GArrowExtensionDataTypePrivate;
 
@@ -1771,10 +1664,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowExtensionDataType,
                            garrow_extension_data_type,
                            GARROW_TYPE_DATA_TYPE)
 
-#define GARROW_EXTENSION_DATA_TYPE_GET_PRIVATE(obj)         \
-  static_cast<GArrowExtensionDataTypePrivate *>(            \
-    garrow_extension_data_type_get_instance_private(        \
-      GARROW_EXTENSION_DATA_TYPE(obj)))
+#define GARROW_EXTENSION_DATA_TYPE_GET_PRIVATE(obj)                                      \
+  static_cast<GArrowExtensionDataTypePrivate *>(                                         \
+    garrow_extension_data_type_get_instance_private(GARROW_EXTENSION_DATA_TYPE(obj)))
 
 static void
 garrow_extension_data_type_dispose(GObject *object)
@@ -1834,17 +1726,17 @@ static void
 garrow_extension_data_type_class_init(GArrowExtensionDataTypeClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->dispose      = garrow_extension_data_type_dispose;
+  gobject_class->dispose = garrow_extension_data_type_dispose;
   gobject_class->set_property = garrow_extension_data_type_set_property;
   gobject_class->get_property = garrow_extension_data_type_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("storage-data-type",
-                             "Storage data type",
-                             "The underlying GArrowDataType",
-                             GARROW_TYPE_DATA_TYPE,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "storage-data-type",
+    "Storage data type",
+    "The underlying GArrowDataType",
+    GARROW_TYPE_DATA_TYPE,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_STORAGE_DATA_TYPE, spec);
 }
 
@@ -1861,9 +1753,8 @@ garrow_extension_data_type_class_init(GArrowExtensionDataTypeClass *klass)
 gchar *
 garrow_extension_data_type_get_extension_name(GArrowExtensionDataType *data_type)
 {
-  auto arrow_data_type =
-    std::static_pointer_cast<arrow::ExtensionType>(
-      garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)));
+  auto arrow_data_type = std::static_pointer_cast<arrow::ExtensionType>(
+    garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)));
   const auto name = arrow_data_type->extension_name();
   return g_strdup(name.c_str());
 }
@@ -1883,8 +1774,8 @@ garrow_extension_data_type_wrap_array(GArrowExtensionDataType *data_type,
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   auto arrow_storage = garrow_array_get_raw(storage);
-  auto arrow_extension_array = arrow::ExtensionType::WrapArray(arrow_data_type,
-                                                               arrow_storage);
+  auto arrow_extension_array =
+    arrow::ExtensionType::WrapArray(arrow_data_type, arrow_storage);
   auto array = garrow_extension_array_new_raw(&arrow_extension_array, storage);
   return GARROW_EXTENSION_ARRAY(array);
 }
@@ -1906,15 +1797,12 @@ garrow_extension_data_type_wrap_chunked_array(GArrowExtensionDataType *data_type
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   auto arrow_storage = garrow_chunked_array_get_raw(storage);
   auto arrow_extension_chunked_array =
-    arrow::ExtensionType::WrapArray(arrow_data_type,
-                                    arrow_storage);
+    arrow::ExtensionType::WrapArray(arrow_data_type, arrow_storage);
   return garrow_chunked_array_new_raw(&arrow_extension_chunked_array);
 }
 
-
 static std::shared_ptr<arrow::DataType>
-garrow_extension_data_type_get_storage_data_type_raw(
-  GArrowExtensionDataType *data_type)
+garrow_extension_data_type_get_storage_data_type_raw(GArrowExtensionDataType *data_type)
 {
   auto priv = GARROW_EXTENSION_DATA_TYPE_GET_PRIVATE(data_type);
   return garrow_data_type_get_raw(priv->storage_data_type);
@@ -1923,22 +1811,25 @@ garrow_extension_data_type_get_storage_data_type_raw(
 G_END_DECLS
 
 namespace garrow {
-  GExtensionType::GExtensionType(GArrowExtensionDataType *garrow_data_type) :
-    arrow::ExtensionType(
-      garrow_extension_data_type_get_storage_data_type_raw(garrow_data_type)),
-    garrow_data_type_(garrow_data_type) {
+  GExtensionType::GExtensionType(GArrowExtensionDataType *garrow_data_type)
+    : arrow::ExtensionType(
+        garrow_extension_data_type_get_storage_data_type_raw(garrow_data_type)),
+      garrow_data_type_(garrow_data_type)
+  {
     g_object_ref(garrow_data_type_);
   }
 
-  GExtensionType::~GExtensionType() {
-    g_object_unref(garrow_data_type_);
-  }
+  GExtensionType::~GExtensionType() { g_object_unref(garrow_data_type_); }
 
-  GArrowExtensionDataType *GExtensionType::garrow_data_type() const {
+  GArrowExtensionDataType *
+  GExtensionType::garrow_data_type() const
+  {
     return garrow_data_type_;
   }
 
-  std::string GExtensionType::extension_name() const {
+  std::string
+  GExtensionType::extension_name() const
+  {
     auto klass = GARROW_EXTENSION_DATA_TYPE_GET_CLASS(garrow_data_type_);
     auto c_name = klass->get_extension_name(garrow_data_type_);
     std::string name(c_name);
@@ -1946,35 +1837,37 @@ namespace garrow {
     return name;
   }
 
-  bool GExtensionType::ExtensionEquals(const arrow::ExtensionType& other) const {
+  bool
+  GExtensionType::ExtensionEquals(const arrow::ExtensionType &other) const
+  {
     if (extension_name() != other.extension_name()) {
       return false;
     }
     auto klass = GARROW_EXTENSION_DATA_TYPE_GET_CLASS(garrow_data_type_);
     auto garrow_other_data_type =
-      static_cast<const GExtensionType&>(other).garrow_data_type_;
-    return klass->equal(garrow_data_type_,
-                        garrow_other_data_type);
+      static_cast<const GExtensionType &>(other).garrow_data_type_;
+    return klass->equal(garrow_data_type_, garrow_other_data_type);
   }
 
   std::shared_ptr<arrow::Array>
-  GExtensionType::MakeArray(std::shared_ptr<arrow::ArrayData> data) const {
+  GExtensionType::MakeArray(std::shared_ptr<arrow::ArrayData> data) const
+  {
     return std::make_shared<arrow::ExtensionArray>(data);
   }
 
   arrow::Result<std::shared_ptr<arrow::DataType>>
   GExtensionType::Deserialize(std::shared_ptr<arrow::DataType> storage_data_type,
-                              const std::string& serialized_data) const {
+                              const std::string &serialized_data) const
+  {
     auto klass = GARROW_EXTENSION_DATA_TYPE_GET_CLASS(garrow_data_type_);
     auto garrow_storage_data_type = garrow_data_type_new_raw(&storage_data_type);
-    GBytes *g_serialized_data = g_bytes_new_static(serialized_data.data(),
-                                                   serialized_data.size());
+    GBytes *g_serialized_data =
+      g_bytes_new_static(serialized_data.data(), serialized_data.size());
     GError *error = NULL;
-    auto garrow_deserialized_data_type =
-      klass->deserialize(garrow_data_type_,
-                         garrow_storage_data_type,
-                         g_serialized_data,
-                         &error);
+    auto garrow_deserialized_data_type = klass->deserialize(garrow_data_type_,
+                                                            garrow_storage_data_type,
+                                                            g_serialized_data,
+                                                            &error);
     g_bytes_unref(g_serialized_data);
     g_object_unref(garrow_storage_data_type);
     if (error) {
@@ -1983,34 +1876,35 @@ namespace garrow {
                                     "[extension-type][deserialize]");
     }
 
-    auto deserialized_data_type =
-      garrow_data_type_get_raw(garrow_deserialized_data_type);
+    auto deserialized_data_type = garrow_data_type_get_raw(garrow_deserialized_data_type);
     g_object_unref(garrow_deserialized_data_type);
     return deserialized_data_type;
   }
 
   std::string
-  GExtensionType::Serialize() const {
+  GExtensionType::Serialize() const
+  {
     auto klass = GARROW_EXTENSION_DATA_TYPE_GET_CLASS(garrow_data_type_);
     auto g_bytes = klass->serialize(garrow_data_type_);
     gsize raw_data_size = 0;
     auto raw_data = g_bytes_get_data(g_bytes, &raw_data_size);
-    std::string data(static_cast<const char *>(raw_data),
-                     raw_data_size);
+    std::string data(static_cast<const char *>(raw_data), raw_data_size);
     g_bytes_unref(g_bytes);
     return data;
   }
 
-  GType GExtensionType::array_gtype() const {
+  GType
+  GExtensionType::array_gtype() const
+  {
     auto klass = GARROW_EXTENSION_DATA_TYPE_GET_CLASS(garrow_data_type_);
     return klass->get_array_gtype(garrow_data_type_);
   }
-}
+} // namespace garrow
 
 G_BEGIN_DECLS
 
-
-typedef struct GArrowExtensionDataTypeRegistryPrivate_ {
+typedef struct GArrowExtensionDataTypeRegistryPrivate_
+{
   std::shared_ptr<arrow::ExtensionTypeRegistry> registry;
 } GArrowExtensionDataTypeRegistryPrivate;
 
@@ -2022,9 +1916,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowExtensionDataTypeRegistry,
                            garrow_extension_data_type_registry,
                            G_TYPE_OBJECT)
 
-#define GARROW_EXTENSION_DATA_TYPE_REGISTRY_GET_PRIVATE(obj)    \
-  static_cast<GArrowExtensionDataTypeRegistryPrivate *>(        \
-    garrow_extension_data_type_registry_get_instance_private(   \
+#define GARROW_EXTENSION_DATA_TYPE_REGISTRY_GET_PRIVATE(obj)                             \
+  static_cast<GArrowExtensionDataTypeRegistryPrivate *>(                                 \
+    garrow_extension_data_type_registry_get_instance_private(                            \
       GARROW_EXTENSION_DATA_TYPE_REGISTRY(obj)))
 
 static void
@@ -2047,8 +1941,8 @@ garrow_extension_data_type_registry_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_REGISTRY:
-    priv->registry =
-      *static_cast<std::shared_ptr<arrow::ExtensionTypeRegistry> *>(g_value_get_pointer(value));
+    priv->registry = *static_cast<std::shared_ptr<arrow::ExtensionTypeRegistry> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -2060,23 +1954,24 @@ static void
 garrow_extension_data_type_registry_init(GArrowExtensionDataTypeRegistry *object)
 {
   auto priv = GARROW_EXTENSION_DATA_TYPE_REGISTRY_GET_PRIVATE(object);
-  new(&priv->registry) std::shared_ptr<arrow::ExtensionTypeRegistry>;
+  new (&priv->registry) std::shared_ptr<arrow::ExtensionTypeRegistry>;
 }
 
 static void
-garrow_extension_data_type_registry_class_init(GArrowExtensionDataTypeRegistryClass *klass)
+garrow_extension_data_type_registry_class_init(
+  GArrowExtensionDataTypeRegistryClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_extension_data_type_registry_finalize;
+  gobject_class->finalize = garrow_extension_data_type_registry_finalize;
   gobject_class->set_property = garrow_extension_data_type_registry_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("registry",
-                              "Registry",
-                              "The raw std::shared<arrow::ExtensionTypeRegistry> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "registry",
+    "Registry",
+    "The raw std::shared<arrow::ExtensionTypeRegistry> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_REGISTRY, spec);
 }
 
@@ -2107,10 +2002,9 @@ garrow_extension_data_type_registry_default(void)
  * Since: 3.0.0
  */
 gboolean
-garrow_extension_data_type_registry_register(
-  GArrowExtensionDataTypeRegistry *registry,
-  GArrowExtensionDataType *data_type,
-  GError **error)
+garrow_extension_data_type_registry_register(GArrowExtensionDataTypeRegistry *registry,
+                                             GArrowExtensionDataType *data_type,
+                                             GError **error)
 {
   const gchar *context = "[extension-data-type-registry][register]";
   auto klass = GARROW_EXTENSION_DATA_TYPE_GET_CLASS(data_type);
@@ -2146,9 +2040,8 @@ garrow_extension_data_type_registry_register(
   }
 
   auto arrow_registry = garrow_extension_data_type_registry_get_raw(registry);
-  auto arrow_data_type =
-    std::static_pointer_cast<arrow::ExtensionType>(
-      garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)));
+  auto arrow_data_type = std::static_pointer_cast<arrow::ExtensionType>(
+    garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)));
   auto status = arrow_registry->RegisterType(arrow_data_type);
   return garrow::check(error, status, context);
 }
@@ -2167,16 +2060,13 @@ garrow_extension_data_type_registry_register(
  * Since: 3.0.0
  */
 gboolean
-garrow_extension_data_type_registry_unregister(
-  GArrowExtensionDataTypeRegistry *registry,
-  const gchar *name,
-  GError **error)
+garrow_extension_data_type_registry_unregister(GArrowExtensionDataTypeRegistry *registry,
+                                               const gchar *name,
+                                               GError **error)
 {
   auto arrow_registry = garrow_extension_data_type_registry_get_raw(registry);
   auto status = arrow_registry->UnregisterType(name);
-  return garrow::check(error,
-                       status,
-                       "[extension-data-type-registry][unregister]");
+  return garrow::check(error, status, "[extension-data-type-registry][unregister]");
 }
 
 /**
@@ -2190,9 +2080,8 @@ garrow_extension_data_type_registry_unregister(
  * Since: 3.0.0
  */
 GArrowExtensionDataType *
-garrow_extension_data_type_registry_lookup(
-  GArrowExtensionDataTypeRegistry *registry,
-  const gchar *name)
+garrow_extension_data_type_registry_lookup(GArrowExtensionDataTypeRegistry *registry,
+                                           const gchar *name)
 {
   auto arrow_registry = garrow_extension_data_type_registry_get_raw(registry);
   auto arrow_extension_data_type = arrow_registry->GetType(name);
@@ -2205,7 +2094,6 @@ garrow_extension_data_type_registry_lookup(
   return GARROW_EXTENSION_DATA_TYPE(data_type);
 }
 
-
 G_END_DECLS
 
 GArrowDataType *
@@ -2339,9 +2227,7 @@ garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
     type = GARROW_TYPE_DATA_TYPE;
     break;
   }
-  data_type = GARROW_DATA_TYPE(g_object_new(type,
-                                            "data-type", arrow_data_type,
-                                            NULL));
+  data_type = GARROW_DATA_TYPE(g_object_new(type, "data-type", arrow_data_type, NULL));
   return data_type;
 }
 
@@ -2351,8 +2237,8 @@ garrow_data_type_get_raw(GArrowDataType *data_type)
   auto priv = GARROW_DATA_TYPE_GET_PRIVATE(data_type);
   if (!priv->data_type &&
       g_type_is_a(G_OBJECT_TYPE(data_type), GARROW_TYPE_EXTENSION_DATA_TYPE)) {
-    priv->data_type = std::make_shared<garrow::GExtensionType>(
-      GARROW_EXTENSION_DATA_TYPE(data_type));
+    priv->data_type =
+      std::make_shared<garrow::GExtensionType>(GARROW_EXTENSION_DATA_TYPE(data_type));
   }
   return priv->data_type;
 }
@@ -2362,14 +2248,14 @@ garrow_extension_data_type_registry_new_raw(
   std::shared_ptr<arrow::ExtensionTypeRegistry> *arrow_registry)
 {
   auto registry = g_object_new(GARROW_TYPE_EXTENSION_DATA_TYPE_REGISTRY,
-                               "registry", arrow_registry,
+                               "registry",
+                               arrow_registry,
                                NULL);
   return GARROW_EXTENSION_DATA_TYPE_REGISTRY(registry);
 }
 
 std::shared_ptr<arrow::ExtensionTypeRegistry>
-garrow_extension_data_type_registry_get_raw(
-  GArrowExtensionDataTypeRegistry *registry)
+garrow_extension_data_type_registry_get_raw(GArrowExtensionDataTypeRegistry *registry)
 {
   auto priv = GARROW_EXTENSION_DATA_TYPE_REGISTRY_GET_PRIVATE(registry);
   return priv->registry;
diff --git a/c_glib/arrow-glib/basic-data-type.h b/c_glib/arrow-glib/basic-data-type.h
index f1c5af409c9da..01c9e5ef6e40a 100644
--- a/c_glib/arrow-glib/basic-data-type.h
+++ b/c_glib/arrow-glib/basic-data-type.h
@@ -28,11 +28,7 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_DATA_TYPE (garrow_data_type_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowDataType,
-                         garrow_data_type,
-                         GARROW,
-                         DATA_TYPE,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowDataType, garrow_data_type, GARROW, DATA_TYPE, GObject)
 struct _GArrowDataTypeClass
 {
   GObjectClass parent_class;
@@ -40,23 +36,22 @@ struct _GArrowDataTypeClass
 
 GARROW_AVAILABLE_IN_6_0
 GArrowDataType *
-garrow_data_type_import(gpointer c_abi_schema,
-                        GError **error);
+garrow_data_type_import(gpointer c_abi_schema, GError **error);
 
 GARROW_AVAILABLE_IN_6_0
 gpointer
-garrow_data_type_export(GArrowDataType *data_type,
-                        GError **error);
+garrow_data_type_export(GArrowDataType *data_type, GError **error);
 
-gboolean   garrow_data_type_equal     (GArrowDataType *data_type,
-                                       GArrowDataType *other_data_type);
-gchar     *garrow_data_type_to_string (GArrowDataType *data_type);
-GArrowType garrow_data_type_get_id    (GArrowDataType *data_type);
+gboolean
+garrow_data_type_equal(GArrowDataType *data_type, GArrowDataType *other_data_type);
+gchar *
+garrow_data_type_to_string(GArrowDataType *data_type);
+GArrowType
+garrow_data_type_get_id(GArrowDataType *data_type);
 GARROW_AVAILABLE_IN_3_0
 gchar *
 garrow_data_type_get_name(GArrowDataType *data_type);
 
-
 #define GARROW_TYPE_FIXED_WIDTH_DATA_TYPE (garrow_fixed_width_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFixedWidthDataType,
                          garrow_fixed_width_data_type,
@@ -68,26 +63,23 @@ struct _GArrowFixedWidthDataTypeClass
   GArrowDataTypeClass parent_class;
 };
 
-gint garrow_fixed_width_data_type_get_bit_width(GArrowFixedWidthDataType *data_type);
+gint
+garrow_fixed_width_data_type_get_bit_width(GArrowFixedWidthDataType *data_type);
 /* TODO:
-GList *garrow_fixed_width_data_type_get_buffer_layout(GArrowFixedWidthDataType *data_type);
+GList *garrow_fixed_width_data_type_get_buffer_layout(GArrowFixedWidthDataType
+*data_type);
 */
 
-
-#define GARROW_TYPE_NULL_DATA_TYPE              \
-  (garrow_null_data_type_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowNullDataType,
-                         garrow_null_data_type,
-                         GARROW,
-                         NULL_DATA_TYPE,
-                         GArrowDataType)
+#define GARROW_TYPE_NULL_DATA_TYPE (garrow_null_data_type_get_type())
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowNullDataType, garrow_null_data_type, GARROW, NULL_DATA_TYPE, GArrowDataType)
 struct _GArrowNullDataTypeClass
 {
   GArrowDataTypeClass parent_class;
 };
 
-GArrowNullDataType *garrow_null_data_type_new      (void);
-
+GArrowNullDataType *
+garrow_null_data_type_new(void);
 
 #define GARROW_TYPE_BOOLEAN_DATA_TYPE (garrow_boolean_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowBooleanDataType,
@@ -100,8 +92,8 @@ struct _GArrowBooleanDataTypeClass
   GArrowFixedWidthDataTypeClass parent_class;
 };
 
-GArrowBooleanDataType *garrow_boolean_data_type_new      (void);
-
+GArrowBooleanDataType *
+garrow_boolean_data_type_new(void);
 
 #define GARROW_TYPE_NUMERIC_DATA_TYPE (garrow_numeric_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowNumericDataType,
@@ -114,7 +106,6 @@ struct _GArrowNumericDataTypeClass
   GArrowFixedWidthDataTypeClass parent_class;
 };
 
-
 #define GARROW_TYPE_INTEGER_DATA_TYPE (garrow_integer_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowIntegerDataType,
                          garrow_integer_data_type,
@@ -127,7 +118,8 @@ struct _GArrowIntegerDataTypeClass
 };
 
 GARROW_AVAILABLE_IN_0_16
-gboolean garrow_integer_data_type_is_signed(GArrowIntegerDataType *data_type);
+gboolean
+garrow_integer_data_type_is_signed(GArrowIntegerDataType *data_type);
 
 #define GARROW_TYPE_INT8_DATA_TYPE (garrow_int8_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowInt8DataType,
@@ -140,8 +132,8 @@ struct _GArrowInt8DataTypeClass
   GArrowIntegerDataTypeClass parent_class;
 };
 
-GArrowInt8DataType   *garrow_int8_data_type_new      (void);
-
+GArrowInt8DataType *
+garrow_int8_data_type_new(void);
 
 #define GARROW_TYPE_UINT8_DATA_TYPE (garrow_uint8_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowUInt8DataType,
@@ -154,8 +146,8 @@ struct _GArrowUInt8DataTypeClass
   GArrowIntegerDataTypeClass parent_class;
 };
 
-GArrowUInt8DataType  *garrow_uint8_data_type_new      (void);
-
+GArrowUInt8DataType *
+garrow_uint8_data_type_new(void);
 
 #define GARROW_TYPE_INT16_DATA_TYPE (garrow_int16_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowInt16DataType,
@@ -168,8 +160,8 @@ struct _GArrowInt16DataTypeClass
   GArrowIntegerDataTypeClass parent_class;
 };
 
-GArrowInt16DataType  *garrow_int16_data_type_new      (void);
-
+GArrowInt16DataType *
+garrow_int16_data_type_new(void);
 
 #define GARROW_TYPE_UINT16_DATA_TYPE (garrow_uint16_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowUInt16DataType,
@@ -182,8 +174,8 @@ struct _GArrowUInt16DataTypeClass
   GArrowIntegerDataTypeClass parent_class;
 };
 
-GArrowUInt16DataType *garrow_uint16_data_type_new      (void);
-
+GArrowUInt16DataType *
+garrow_uint16_data_type_new(void);
 
 #define GARROW_TYPE_INT32_DATA_TYPE (garrow_int32_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowInt32DataType,
@@ -196,8 +188,8 @@ struct _GArrowInt32DataTypeClass
   GArrowIntegerDataTypeClass parent_class;
 };
 
-GArrowInt32DataType  *garrow_int32_data_type_new      (void);
-
+GArrowInt32DataType *
+garrow_int32_data_type_new(void);
 
 #define GARROW_TYPE_UINT32_DATA_TYPE (garrow_uint32_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowUInt32DataType,
@@ -210,8 +202,8 @@ struct _GArrowUInt32DataTypeClass
   GArrowIntegerDataTypeClass parent_class;
 };
 
-GArrowUInt32DataType *garrow_uint32_data_type_new      (void);
-
+GArrowUInt32DataType *
+garrow_uint32_data_type_new(void);
 
 #define GARROW_TYPE_INT64_DATA_TYPE (garrow_int64_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowInt64DataType,
@@ -224,8 +216,8 @@ struct _GArrowInt64DataTypeClass
   GArrowIntegerDataTypeClass parent_class;
 };
 
-GArrowInt64DataType  *garrow_int64_data_type_new      (void);
-
+GArrowInt64DataType *
+garrow_int64_data_type_new(void);
 
 #define GARROW_TYPE_UINT64_DATA_TYPE (garrow_uint64_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowUInt64DataType,
@@ -238,11 +230,10 @@ struct _GArrowUInt64DataTypeClass
   GArrowIntegerDataTypeClass parent_class;
 };
 
-GArrowUInt64DataType *garrow_uint64_data_type_new      (void);
-
+GArrowUInt64DataType *
+garrow_uint64_data_type_new(void);
 
-#define GARROW_TYPE_FLOATING_POINT_DATA_TYPE    \
-  (garrow_floating_point_data_type_get_type())
+#define GARROW_TYPE_FLOATING_POINT_DATA_TYPE (garrow_floating_point_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFloatingPointDataType,
                          garrow_floating_point_data_type,
                          GARROW,
@@ -253,7 +244,6 @@ struct _GArrowFloatingPointDataTypeClass
   GArrowNumericDataTypeClass parent_class;
 };
 
-
 #define GARROW_TYPE_HALF_FLOAT_DATA_TYPE (garrow_half_float_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatDataType,
                          garrow_half_float_data_type,
@@ -266,8 +256,8 @@ struct _GArrowHalfFloatDataTypeClass
 };
 
 GARROW_AVAILABLE_IN_11_0
-GArrowHalfFloatDataType *garrow_half_float_data_type_new(void);
-
+GArrowHalfFloatDataType *
+garrow_half_float_data_type_new(void);
 
 #define GARROW_TYPE_FLOAT_DATA_TYPE (garrow_float_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFloatDataType,
@@ -280,8 +270,8 @@ struct _GArrowFloatDataTypeClass
   GArrowFloatingPointDataTypeClass parent_class;
 };
 
-GArrowFloatDataType *garrow_float_data_type_new      (void);
-
+GArrowFloatDataType *
+garrow_float_data_type_new(void);
 
 #define GARROW_TYPE_DOUBLE_DATA_TYPE (garrow_double_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDoubleDataType,
@@ -294,24 +284,22 @@ struct _GArrowDoubleDataTypeClass
   GArrowFloatingPointDataTypeClass parent_class;
 };
 
-GArrowDoubleDataType *garrow_double_data_type_new      (void);
-
+GArrowDoubleDataType *
+garrow_double_data_type_new(void);
 
 #define GARROW_TYPE_BINARY_DATA_TYPE (garrow_binary_data_type_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowBinaryDataType,
-                         garrow_binary_data_type,
-                         GARROW,
-                         BINARY_DATA_TYPE,
-                         GArrowDataType)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowBinaryDataType, garrow_binary_data_type, GARROW, BINARY_DATA_TYPE, GArrowDataType)
 struct _GArrowBinaryDataTypeClass
 {
   GArrowDataTypeClass parent_class;
 };
 
-GArrowBinaryDataType *garrow_binary_data_type_new      (void);
-
+GArrowBinaryDataType *
+garrow_binary_data_type_new(void);
 
-#define GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE (garrow_fixed_size_binary_data_type_get_type())
+#define GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE                                          \
+  (garrow_fixed_size_binary_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryDataType,
                          garrow_fixed_size_binary_data_type,
                          GARROW,
@@ -327,8 +315,8 @@ GArrowFixedSizeBinaryDataType *
 garrow_fixed_size_binary_data_type_new(gint32 byte_width);
 GARROW_AVAILABLE_IN_0_12
 gint32
-garrow_fixed_size_binary_data_type_get_byte_width(GArrowFixedSizeBinaryDataType *data_type);
-
+garrow_fixed_size_binary_data_type_get_byte_width(
+  GArrowFixedSizeBinaryDataType *data_type);
 
 #define GARROW_TYPE_LARGE_BINARY_DATA_TYPE (garrow_large_binary_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryDataType,
@@ -342,8 +330,8 @@ struct _GArrowLargeBinaryDataTypeClass
 };
 
 GARROW_AVAILABLE_IN_0_17
-GArrowLargeBinaryDataType *garrow_large_binary_data_type_new(void);
-
+GArrowLargeBinaryDataType *
+garrow_large_binary_data_type_new(void);
 
 #define GARROW_TYPE_STRING_DATA_TYPE (garrow_string_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowStringDataType,
@@ -356,8 +344,8 @@ struct _GArrowStringDataTypeClass
   GArrowBinaryDataTypeClass parent_class;
 };
 
-GArrowStringDataType *garrow_string_data_type_new      (void);
-
+GArrowStringDataType *
+garrow_string_data_type_new(void);
 
 #define GARROW_TYPE_LARGE_STRING_DATA_TYPE (garrow_large_string_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringDataType,
@@ -371,8 +359,8 @@ struct _GArrowLargeStringDataTypeClass
 };
 
 GARROW_AVAILABLE_IN_0_17
-GArrowLargeStringDataType *garrow_large_string_data_type_new(void);
-
+GArrowLargeStringDataType *
+garrow_large_string_data_type_new(void);
 
 #define GARROW_TYPE_TEMPORAL_DATA_TYPE (garrow_temporal_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowTemporalDataType,
@@ -385,7 +373,6 @@ struct _GArrowTemporalDataTypeClass
   GArrowFixedWidthDataTypeClass parent_class;
 };
 
-
 #define GARROW_TYPE_DATE32_DATA_TYPE (garrow_date32_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDate32DataType,
                          garrow_date32_data_type,
@@ -397,8 +384,8 @@ struct _GArrowDate32DataTypeClass
   GArrowTemporalDataTypeClass parent_class;
 };
 
-GArrowDate32DataType *garrow_date32_data_type_new      (void);
-
+GArrowDate32DataType *
+garrow_date32_data_type_new(void);
 
 #define GARROW_TYPE_DATE64_DATA_TYPE (garrow_date64_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDate64DataType,
@@ -411,8 +398,8 @@ struct _GArrowDate64DataTypeClass
   GArrowTemporalDataTypeClass parent_class;
 };
 
-GArrowDate64DataType *garrow_date64_data_type_new      (void);
-
+GArrowDate64DataType *
+garrow_date64_data_type_new(void);
 
 #define GARROW_TYPE_TIMESTAMP_DATA_TYPE (garrow_timestamp_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowTimestampDataType,
@@ -426,12 +413,10 @@ struct _GArrowTimestampDataTypeClass
 };
 
 GArrowTimestampDataType *
-garrow_timestamp_data_type_new(GArrowTimeUnit unit,
-                               GTimeZone *time_zone);
+garrow_timestamp_data_type_new(GArrowTimeUnit unit, GTimeZone *time_zone);
 GArrowTimeUnit
 garrow_timestamp_data_type_get_unit(GArrowTimestampDataType *data_type);
 
-
 #define GARROW_TYPE_TIME_DATA_TYPE (garrow_time_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowTimeDataType,
                          garrow_time_data_type,
@@ -443,8 +428,8 @@ struct _GArrowTimeDataTypeClass
   GArrowTemporalDataTypeClass parent_class;
 };
 
-GArrowTimeUnit garrow_time_data_type_get_unit (GArrowTimeDataType *time_data_type);
-
+GArrowTimeUnit
+garrow_time_data_type_get_unit(GArrowTimeDataType *time_data_type);
 
 #define GARROW_TYPE_TIME32_DATA_TYPE (garrow_time32_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowTime32DataType,
@@ -457,9 +442,8 @@ struct _GArrowTime32DataTypeClass
   GArrowTimeDataTypeClass parent_class;
 };
 
-GArrowTime32DataType *garrow_time32_data_type_new      (GArrowTimeUnit unit,
-                                                        GError **error);
-
+GArrowTime32DataType *
+garrow_time32_data_type_new(GArrowTimeUnit unit, GError **error);
 
 #define GARROW_TYPE_TIME64_DATA_TYPE (garrow_time64_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowTime64DataType,
@@ -472,9 +456,8 @@ struct _GArrowTime64DataTypeClass
   GArrowTimeDataTypeClass parent_class;
 };
 
-GArrowTime64DataType *garrow_time64_data_type_new      (GArrowTimeUnit unit,
-                                                        GError **error);
-
+GArrowTime64DataType *
+garrow_time64_data_type_new(GArrowTimeUnit unit, GError **error);
 
 #define GARROW_TYPE_INTERVAL_DATA_TYPE (garrow_interval_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowIntervalDataType,
@@ -491,9 +474,7 @@ GARROW_AVAILABLE_IN_7_0
 GArrowIntervalType
 garrow_interval_data_type_get_interval_type(GArrowIntervalDataType *type);
 
-
-#define GARROW_TYPE_MONTH_INTERVAL_DATA_TYPE   \
-  (garrow_month_interval_data_type_get_type())
+#define GARROW_TYPE_MONTH_INTERVAL_DATA_TYPE (garrow_month_interval_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalDataType,
                          garrow_month_interval_data_type,
                          GARROW,
@@ -508,8 +489,7 @@ GARROW_AVAILABLE_IN_7_0
 GArrowMonthIntervalDataType *
 garrow_month_interval_data_type_new(void);
 
-
-#define GARROW_TYPE_DAY_TIME_INTERVAL_DATA_TYPE         \
+#define GARROW_TYPE_DAY_TIME_INTERVAL_DATA_TYPE                                          \
   (garrow_day_time_interval_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalDataType,
                          garrow_day_time_interval_data_type,
@@ -525,8 +505,7 @@ GARROW_AVAILABLE_IN_7_0
 GArrowDayTimeIntervalDataType *
 garrow_day_time_interval_data_type_new(void);
 
-
-#define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_DATA_TYPE \
+#define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_DATA_TYPE                                    \
   (garrow_month_day_nano_interval_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalDataType,
                          garrow_month_day_nano_interval_data_type,
@@ -542,7 +521,6 @@ GARROW_AVAILABLE_IN_7_0
 GArrowMonthDayNanoIntervalDataType *
 garrow_month_day_nano_interval_data_type_new(void);
 
-
 #define GARROW_TYPE_DECIMAL_DATA_TYPE (garrow_decimal_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDecimalDataType,
                          garrow_decimal_data_type,
@@ -556,9 +534,10 @@ struct _GArrowDecimalDataTypeClass
 
 GArrowDecimalDataType *
 garrow_decimal_data_type_new(gint32 precision, gint32 scale, GError **error);
-gint32 garrow_decimal_data_type_get_precision(GArrowDecimalDataType *decimal_data_type);
-gint32 garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type);
-
+gint32
+garrow_decimal_data_type_get_precision(GArrowDecimalDataType *decimal_data_type);
+gint32
+garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type);
 
 #define GARROW_TYPE_DECIMAL128_DATA_TYPE (garrow_decimal128_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128DataType,
@@ -579,7 +558,6 @@ GARROW_AVAILABLE_IN_0_12
 GArrowDecimal128DataType *
 garrow_decimal128_data_type_new(gint32 precision, gint32 scale, GError **error);
 
-
 #define GARROW_TYPE_DECIMAL256_DATA_TYPE (garrow_decimal256_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256DataType,
                          garrow_decimal256_data_type,
@@ -648,8 +626,7 @@ GArrowChunkedArray *
 garrow_extension_data_type_wrap_chunked_array(GArrowExtensionDataType *data_type,
                                               GArrowChunkedArray *storage);
 
-
-#define GARROW_TYPE_EXTENSION_DATA_TYPE_REGISTRY        \
+#define GARROW_TYPE_EXTENSION_DATA_TYPE_REGISTRY                                         \
   (garrow_extension_data_type_registry_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowExtensionDataTypeRegistry,
                          garrow_extension_data_type_registry,
@@ -667,20 +644,17 @@ garrow_extension_data_type_registry_default(void);
 
 GARROW_AVAILABLE_IN_3_0
 gboolean
-garrow_extension_data_type_registry_register(
-  GArrowExtensionDataTypeRegistry *registry,
-  GArrowExtensionDataType *data_type,
-  GError **error);
+garrow_extension_data_type_registry_register(GArrowExtensionDataTypeRegistry *registry,
+                                             GArrowExtensionDataType *data_type,
+                                             GError **error);
 GARROW_AVAILABLE_IN_3_0
 gboolean
-garrow_extension_data_type_registry_unregister(
-  GArrowExtensionDataTypeRegistry *registry,
-  const gchar *name,
-  GError **error);
+garrow_extension_data_type_registry_unregister(GArrowExtensionDataTypeRegistry *registry,
+                                               const gchar *name,
+                                               GError **error);
 GARROW_AVAILABLE_IN_3_0
 GArrowExtensionDataType *
-garrow_extension_data_type_registry_lookup(
-  GArrowExtensionDataTypeRegistry *registry,
-  const gchar *name);
+garrow_extension_data_type_registry_lookup(GArrowExtensionDataTypeRegistry *registry,
+                                           const gchar *name);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/basic-data-type.hpp b/c_glib/arrow-glib/basic-data-type.hpp
index ff1fe5e28f2db..c4d613590304a 100644
--- a/c_glib/arrow-glib/basic-data-type.hpp
+++ b/c_glib/arrow-glib/basic-data-type.hpp
@@ -32,8 +32,7 @@ GArrowExtensionDataTypeRegistry *
 garrow_extension_data_type_registry_new_raw(
   std::shared_ptr<arrow::ExtensionTypeRegistry> *arrow_registry);
 std::shared_ptr<arrow::ExtensionTypeRegistry>
-garrow_extension_data_type_registry_get_raw(
-  GArrowExtensionDataTypeRegistry *registry);
+garrow_extension_data_type_registry_get_raw(GArrowExtensionDataTypeRegistry *registry);
 
 namespace garrow {
   class GExtensionType : public arrow::ExtensionType {
@@ -47,16 +46,18 @@ namespace garrow {
     GType
     array_gtype() const;
 
-    std::string extension_name() const override;
+    std::string
+    extension_name() const override;
 
-    bool ExtensionEquals(const arrow::ExtensionType& other) const override;
+    bool
+    ExtensionEquals(const arrow::ExtensionType &other) const override;
 
     std::shared_ptr<arrow::Array>
     MakeArray(std::shared_ptr<arrow::ArrayData> data) const override;
 
     arrow::Result<std::shared_ptr<arrow::DataType>>
     Deserialize(std::shared_ptr<arrow::DataType> storage_data_type,
-                const std::string& serialized_data) const override;
+                const std::string &serialized_data) const override;
 
     std::string
     Serialize() const override;
@@ -64,4 +65,4 @@ namespace garrow {
   private:
     GArrowExtensionDataType *garrow_data_type_;
   };
-}
+} // namespace garrow
diff --git a/c_glib/arrow-glib/buffer.cpp b/c_glib/arrow-glib/buffer.cpp
index 86d88cebd5ee4..c5ca418f63f0a 100644
--- a/c_glib/arrow-glib/buffer.cpp
+++ b/c_glib/arrow-glib/buffer.cpp
@@ -38,7 +38,8 @@ G_BEGIN_DECLS
  * #GArrowResizableBuffer is mutable and resizable.
  */
 
-typedef struct GArrowBufferPrivate_ {
+typedef struct GArrowBufferPrivate_
+{
   std::shared_ptr<arrow::Buffer> buffer;
   GBytes *data;
   GArrowBuffer *parent;
@@ -52,10 +53,9 @@ enum {
 
 G_DEFINE_TYPE_WITH_PRIVATE(GArrowBuffer, garrow_buffer, G_TYPE_OBJECT)
 
-#define GARROW_BUFFER_GET_PRIVATE(obj)         \
-  static_cast<GArrowBufferPrivate *>(          \
-     garrow_buffer_get_instance_private(       \
-       GARROW_BUFFER(obj)))
+#define GARROW_BUFFER_GET_PRIVATE(obj)                                                   \
+  static_cast<GArrowBufferPrivate *>(                                                    \
+    garrow_buffer_get_instance_private(GARROW_BUFFER(obj)))
 
 static void
 garrow_buffer_dispose(GObject *object)
@@ -132,7 +132,7 @@ static void
 garrow_buffer_init(GArrowBuffer *object)
 {
   auto priv = GARROW_BUFFER_GET_PRIVATE(object);
-  new(&priv->buffer) std::shared_ptr<arrow::Buffer>;
+  new (&priv->buffer) std::shared_ptr<arrow::Buffer>;
 }
 
 static void
@@ -142,32 +142,32 @@ garrow_buffer_class_init(GArrowBufferClass *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_buffer_dispose;
-  gobject_class->finalize     = garrow_buffer_finalize;
+  gobject_class->dispose = garrow_buffer_dispose;
+  gobject_class->finalize = garrow_buffer_finalize;
   gobject_class->set_property = garrow_buffer_set_property;
   gobject_class->get_property = garrow_buffer_get_property;
 
-  spec = g_param_spec_pointer("buffer",
-                              "Buffer",
-                              "The raw std::shared_ptr<arrow::Buffer> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "buffer",
+    "Buffer",
+    "The raw std::shared_ptr<arrow::Buffer> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_BUFFER, spec);
 
-  spec = g_param_spec_boxed("data",
-                            "Data",
-                            "The raw data passed as GBytes *",
-                            G_TYPE_BYTES,
-                            static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                     G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_boxed(
+    "data",
+    "Data",
+    "The raw data passed as GBytes *",
+    G_TYPE_BYTES,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_DATA, spec);
 
-  spec = g_param_spec_object("parent",
-                             "Parent",
-                             "The parent GArrowBuffer *",
-                             GARROW_TYPE_BUFFER,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "parent",
+    "Parent",
+    "The parent GArrowBuffer *",
+    GARROW_TYPE_BUFFER,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_PARENT, spec);
 }
 
@@ -203,8 +203,7 @@ garrow_buffer_new_bytes(GBytes *data)
   size_t data_size;
   auto raw_data = g_bytes_get_data(data, &data_size);
   auto arrow_buffer =
-    std::make_shared<arrow::Buffer>(static_cast<const uint8_t *>(raw_data),
-                                    data_size);
+    std::make_shared<arrow::Buffer>(static_cast<const uint8_t *>(raw_data), data_size);
   return garrow_buffer_new_raw_bytes(&arrow_buffer, data);
 }
 
@@ -297,8 +296,7 @@ garrow_buffer_get_data(GArrowBuffer *buffer)
   }
 
   auto arrow_buffer = garrow_buffer_get_raw(buffer);
-  auto data = g_bytes_new_static(arrow_buffer->data(),
-                                 arrow_buffer->size());
+  auto data = g_bytes_new_static(arrow_buffer->data(), arrow_buffer->size());
   return data;
 }
 
@@ -326,8 +324,7 @@ garrow_buffer_get_mutable_data(GArrowBuffer *buffer)
     return priv->data;
   }
 
-  return g_bytes_new_static(arrow_buffer->mutable_data(),
-                            arrow_buffer->size());
+  return g_bytes_new_static(arrow_buffer->mutable_data(), arrow_buffer->size());
 }
 
 /**
@@ -385,10 +382,7 @@ garrow_buffer_get_parent(GArrowBuffer *buffer)
  * Since: 0.3.0
  */
 GArrowBuffer *
-garrow_buffer_copy(GArrowBuffer *buffer,
-                   gint64 start,
-                   gint64 size,
-                   GError **error)
+garrow_buffer_copy(GArrowBuffer *buffer, gint64 start, gint64 size, GError **error)
 {
   auto arrow_buffer = garrow_buffer_get_raw(buffer);
   auto maybe_copied_buffer = arrow_buffer->CopySlice(start, size);
@@ -416,16 +410,11 @@ GArrowBuffer *
 garrow_buffer_slice(GArrowBuffer *buffer, gint64 offset, gint64 size)
 {
   auto arrow_parent_buffer = garrow_buffer_get_raw(buffer);
-  auto arrow_buffer = std::make_shared<arrow::Buffer>(arrow_parent_buffer,
-                                                      offset,
-                                                      size);
+  auto arrow_buffer = std::make_shared<arrow::Buffer>(arrow_parent_buffer, offset, size);
   return garrow_buffer_new_raw_parent(&arrow_buffer, buffer);
 }
 
-
-G_DEFINE_TYPE(GArrowMutableBuffer,
-              garrow_mutable_buffer,
-              GARROW_TYPE_BUFFER)
+G_DEFINE_TYPE(GArrowMutableBuffer, garrow_mutable_buffer, GARROW_TYPE_BUFFER)
 
 static void
 garrow_mutable_buffer_init(GArrowMutableBuffer *object)
@@ -489,19 +478,13 @@ garrow_mutable_buffer_new_bytes(GBytes *data)
  * Since: 0.3.0
  */
 GArrowMutableBuffer *
-garrow_mutable_buffer_slice(GArrowMutableBuffer *buffer,
-                            gint64 offset,
-                            gint64 size)
+garrow_mutable_buffer_slice(GArrowMutableBuffer *buffer, gint64 offset, gint64 size)
 {
   auto arrow_parent_buffer = garrow_buffer_get_raw(GARROW_BUFFER(buffer));
   auto arrow_mutable_buffer =
-    std::make_shared<arrow::MutableBuffer>(arrow_parent_buffer,
-                                           offset,
-                                           size);
-  auto arrow_buffer =
-    std::static_pointer_cast<arrow::Buffer>(arrow_mutable_buffer);
-  auto sliced_buffer = garrow_buffer_new_raw_parent(&arrow_buffer,
-                                                    GARROW_BUFFER(buffer));
+    std::make_shared<arrow::MutableBuffer>(arrow_parent_buffer, offset, size);
+  auto arrow_buffer = std::static_pointer_cast<arrow::Buffer>(arrow_mutable_buffer);
+  auto sliced_buffer = garrow_buffer_new_raw_parent(&arrow_buffer, GARROW_BUFFER(buffer));
   return GARROW_MUTABLE_BUFFER(sliced_buffer);
 }
 
@@ -543,10 +526,7 @@ garrow_mutable_buffer_set_data(GArrowMutableBuffer *buffer,
   return TRUE;
 }
 
-
-G_DEFINE_TYPE(GArrowResizableBuffer,
-              garrow_resizable_buffer,
-              GARROW_TYPE_MUTABLE_BUFFER)
+G_DEFINE_TYPE(GArrowResizableBuffer, garrow_resizable_buffer, GARROW_TYPE_MUTABLE_BUFFER)
 
 static void
 garrow_resizable_buffer_init(GArrowResizableBuffer *object)
@@ -568,20 +548,17 @@ garrow_resizable_buffer_class_init(GArrowResizableBufferClass *klass)
  * Since: 0.10.0
  */
 GArrowResizableBuffer *
-garrow_resizable_buffer_new(gint64 initial_size,
-                            GError **error)
+garrow_resizable_buffer_new(gint64 initial_size, GError **error)
 {
   auto maybe_buffer = arrow::AllocateResizableBuffer(initial_size);
   if (garrow::check(error, maybe_buffer, "[resizable-buffer][new]")) {
-    auto arrow_buffer = std::shared_ptr<arrow::ResizableBuffer>(
-      *std::move(maybe_buffer));
+    auto arrow_buffer = std::shared_ptr<arrow::ResizableBuffer>(*std::move(maybe_buffer));
     return garrow_resizable_buffer_new_raw(&arrow_buffer);
   } else {
     return NULL;
   }
 }
 
-
 /**
  * garrow_resizable_buffer_resize:
  * @buffer: A #GArrowResizableBuffer.
@@ -626,7 +603,6 @@ garrow_resizable_buffer_reserve(GArrowResizableBuffer *buffer,
   return garrow_error_check(error, status, "[resizable-buffer][capacity]");
 }
 
-
 G_END_DECLS
 
 GArrowBuffer *
@@ -636,13 +612,10 @@ garrow_buffer_new_raw(std::shared_ptr<arrow::Buffer> *arrow_buffer)
 }
 
 GArrowBuffer *
-garrow_buffer_new_raw_bytes(std::shared_ptr<arrow::Buffer> *arrow_buffer,
-                            GBytes *data)
+garrow_buffer_new_raw_bytes(std::shared_ptr<arrow::Buffer> *arrow_buffer, GBytes *data)
 {
-  auto buffer = GARROW_BUFFER(g_object_new(GARROW_TYPE_BUFFER,
-                                           "buffer", arrow_buffer,
-                                           "data", data,
-                                           NULL));
+  auto buffer = GARROW_BUFFER(
+    g_object_new(GARROW_TYPE_BUFFER, "buffer", arrow_buffer, "data", data, NULL));
   return buffer;
 }
 
@@ -650,10 +623,8 @@ GArrowBuffer *
 garrow_buffer_new_raw_parent(std::shared_ptr<arrow::Buffer> *arrow_buffer,
                              GArrowBuffer *parent)
 {
-  auto buffer = GARROW_BUFFER(g_object_new(G_OBJECT_TYPE(parent),
-                                           "buffer", arrow_buffer,
-                                           "parent", parent,
-                                           NULL));
+  auto buffer = GARROW_BUFFER(
+    g_object_new(G_OBJECT_TYPE(parent), "buffer", arrow_buffer, "parent", parent, NULL));
   return buffer;
 }
 
@@ -677,19 +648,15 @@ GArrowMutableBuffer *
 garrow_mutable_buffer_new_raw_bytes(std::shared_ptr<arrow::MutableBuffer> *arrow_buffer,
                                     GBytes *data)
 {
-  auto buffer = GARROW_MUTABLE_BUFFER(g_object_new(GARROW_TYPE_MUTABLE_BUFFER,
-                                                   "buffer", arrow_buffer,
-                                                   "data", data,
-                                                   NULL));
+  auto buffer = GARROW_MUTABLE_BUFFER(
+    g_object_new(GARROW_TYPE_MUTABLE_BUFFER, "buffer", arrow_buffer, "data", data, NULL));
   return buffer;
 }
 
 GArrowResizableBuffer *
 garrow_resizable_buffer_new_raw(std::shared_ptr<arrow::ResizableBuffer> *arrow_buffer)
 {
-  auto buffer =
-    GARROW_RESIZABLE_BUFFER(g_object_new(GARROW_TYPE_RESIZABLE_BUFFER,
-                                         "buffer", arrow_buffer,
-                                         NULL));
+  auto buffer = GARROW_RESIZABLE_BUFFER(
+    g_object_new(GARROW_TYPE_RESIZABLE_BUFFER, "buffer", arrow_buffer, NULL));
   return buffer;
 }
diff --git a/c_glib/arrow-glib/buffer.h b/c_glib/arrow-glib/buffer.h
index a176071f25240..42bcb8702d41b 100644
--- a/c_glib/arrow-glib/buffer.h
+++ b/c_glib/arrow-glib/buffer.h
@@ -24,63 +24,60 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_BUFFER (garrow_buffer_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowBuffer,
-                         garrow_buffer,
-                         GARROW,
-                         BUFFER,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowBuffer, garrow_buffer, GARROW, BUFFER, GObject)
 struct _GArrowBufferClass
 {
   GObjectClass parent_class;
 };
 
-GArrowBuffer  *garrow_buffer_new          (const guint8 *data,
-                                           gint64 size);
-GArrowBuffer  *garrow_buffer_new_bytes    (GBytes *data);
-gboolean       garrow_buffer_equal        (GArrowBuffer *buffer,
-                                           GArrowBuffer *other_buffer);
-gboolean       garrow_buffer_equal_n_bytes(GArrowBuffer *buffer,
-                                           GArrowBuffer *other_buffer,
-                                           gint64 n_bytes);
-gboolean       garrow_buffer_is_mutable   (GArrowBuffer *buffer);
-gint64         garrow_buffer_get_capacity (GArrowBuffer *buffer);
-GBytes        *garrow_buffer_get_data     (GArrowBuffer *buffer);
-GBytes        *garrow_buffer_get_mutable_data(GArrowBuffer *buffer);
-gint64         garrow_buffer_get_size     (GArrowBuffer *buffer);
-GArrowBuffer  *garrow_buffer_get_parent   (GArrowBuffer *buffer);
-
-GArrowBuffer  *garrow_buffer_copy         (GArrowBuffer *buffer,
-                                           gint64 start,
-                                           gint64 size,
-                                           GError **error);
-GArrowBuffer  *garrow_buffer_slice        (GArrowBuffer *buffer,
-                                           gint64 offset,
-                                           gint64 size);
+GArrowBuffer *
+garrow_buffer_new(const guint8 *data, gint64 size);
+GArrowBuffer *
+garrow_buffer_new_bytes(GBytes *data);
+gboolean
+garrow_buffer_equal(GArrowBuffer *buffer, GArrowBuffer *other_buffer);
+gboolean
+garrow_buffer_equal_n_bytes(GArrowBuffer *buffer,
+                            GArrowBuffer *other_buffer,
+                            gint64 n_bytes);
+gboolean
+garrow_buffer_is_mutable(GArrowBuffer *buffer);
+gint64
+garrow_buffer_get_capacity(GArrowBuffer *buffer);
+GBytes *
+garrow_buffer_get_data(GArrowBuffer *buffer);
+GBytes *
+garrow_buffer_get_mutable_data(GArrowBuffer *buffer);
+gint64
+garrow_buffer_get_size(GArrowBuffer *buffer);
+GArrowBuffer *
+garrow_buffer_get_parent(GArrowBuffer *buffer);
 
+GArrowBuffer *
+garrow_buffer_copy(GArrowBuffer *buffer, gint64 start, gint64 size, GError **error);
+GArrowBuffer *
+garrow_buffer_slice(GArrowBuffer *buffer, gint64 offset, gint64 size);
 
 #define GARROW_TYPE_MUTABLE_BUFFER (garrow_mutable_buffer_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowMutableBuffer,
-                         garrow_mutable_buffer,
-                         GARROW,
-                         MUTABLE_BUFFER,
-                         GArrowBuffer)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowMutableBuffer, garrow_mutable_buffer, GARROW, MUTABLE_BUFFER, GArrowBuffer)
 struct _GArrowMutableBufferClass
 {
   GArrowBufferClass parent_class;
 };
 
-GArrowMutableBuffer *garrow_mutable_buffer_new  (guint8 *data,
-                                                 gint64 size);
-GArrowMutableBuffer *garrow_mutable_buffer_new_bytes(GBytes *data);
-GArrowMutableBuffer *garrow_mutable_buffer_slice(GArrowMutableBuffer *buffer,
-                                                 gint64 offset,
-                                                 gint64 size);
-gboolean garrow_mutable_buffer_set_data(GArrowMutableBuffer *buffer,
-                                        gint64 offset,
-                                        const guint8 *data,
-                                        gint64 size,
-                                        GError **error);
-
+GArrowMutableBuffer *
+garrow_mutable_buffer_new(guint8 *data, gint64 size);
+GArrowMutableBuffer *
+garrow_mutable_buffer_new_bytes(GBytes *data);
+GArrowMutableBuffer *
+garrow_mutable_buffer_slice(GArrowMutableBuffer *buffer, gint64 offset, gint64 size);
+gboolean
+garrow_mutable_buffer_set_data(GArrowMutableBuffer *buffer,
+                               gint64 offset,
+                               const guint8 *data,
+                               gint64 size,
+                               GError **error);
 
 #define GARROW_TYPE_RESIZABLE_BUFFER (garrow_resizable_buffer_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowResizableBuffer,
@@ -93,14 +90,15 @@ struct _GArrowResizableBufferClass
   GArrowMutableBufferClass parent_class;
 };
 
-
-GArrowResizableBuffer *garrow_resizable_buffer_new(gint64 initial_size,
-                                                   GError **error);
-gboolean garrow_resizable_buffer_resize(GArrowResizableBuffer *buffer,
-                                        gint64 new_size,
-                                        GError **error);
-gboolean garrow_resizable_buffer_reserve(GArrowResizableBuffer *buffer,
-                                         gint64 new_capacity,
-                                         GError **error);
+GArrowResizableBuffer *
+garrow_resizable_buffer_new(gint64 initial_size, GError **error);
+gboolean
+garrow_resizable_buffer_resize(GArrowResizableBuffer *buffer,
+                               gint64 new_size,
+                               GError **error);
+gboolean
+garrow_resizable_buffer_reserve(GArrowResizableBuffer *buffer,
+                                gint64 new_capacity,
+                                GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/buffer.hpp b/c_glib/arrow-glib/buffer.hpp
index 34a28be46cf3a..5d922371c3b6b 100644
--- a/c_glib/arrow-glib/buffer.hpp
+++ b/c_glib/arrow-glib/buffer.hpp
@@ -26,8 +26,7 @@
 GArrowBuffer *
 garrow_buffer_new_raw(std::shared_ptr<arrow::Buffer> *arrow_buffer);
 GArrowBuffer *
-garrow_buffer_new_raw_bytes(std::shared_ptr<arrow::Buffer> *arrow_buffer,
-                            GBytes *data);
+garrow_buffer_new_raw_bytes(std::shared_ptr<arrow::Buffer> *arrow_buffer, GBytes *data);
 GArrowBuffer *
 garrow_buffer_new_raw_parent(std::shared_ptr<arrow::Buffer> *arrow_buffer,
                              GArrowBuffer *parent);
diff --git a/c_glib/arrow-glib/chunked-array-definition.h b/c_glib/arrow-glib/chunked-array-definition.h
index 539a4848e8060..b53574ca3296c 100644
--- a/c_glib/arrow-glib/chunked-array-definition.h
+++ b/c_glib/arrow-glib/chunked-array-definition.h
@@ -24,15 +24,11 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_CHUNKED_ARRAY (garrow_chunked_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowChunkedArray,
-                         garrow_chunked_array,
-                         GARROW,
-                         CHUNKED_ARRAY,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowChunkedArray, garrow_chunked_array, GARROW, CHUNKED_ARRAY, GObject)
 struct _GArrowChunkedArrayClass
 {
   GObjectClass parent_class;
 };
 
-
 G_END_DECLS
diff --git a/c_glib/arrow-glib/chunked-array.cpp b/c_glib/arrow-glib/chunked-array.cpp
index c0f2be4c083f1..39c8faad6c085 100644
--- a/c_glib/arrow-glib/chunked-array.cpp
+++ b/c_glib/arrow-glib/chunked-array.cpp
@@ -35,7 +35,8 @@ G_BEGIN_DECLS
  * makes a list of #GArrowArrays one logical large array.
  */
 
-struct GArrowChunkedArrayPrivate {
+struct GArrowChunkedArrayPrivate
+{
   std::shared_ptr<arrow::ChunkedArray> chunked_array;
   GArrowDataType *data_type;
 };
@@ -45,14 +46,11 @@ enum {
   PROP_DATA_TYPE,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowChunkedArray,
-                           garrow_chunked_array,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowChunkedArray, garrow_chunked_array, G_TYPE_OBJECT)
 
-#define GARROW_CHUNKED_ARRAY_GET_PRIVATE(obj)         \
-  static_cast<GArrowChunkedArrayPrivate *>(           \
-     garrow_chunked_array_get_instance_private(       \
-       GARROW_CHUNKED_ARRAY(obj)))
+#define GARROW_CHUNKED_ARRAY_GET_PRIVATE(obj)                                            \
+  static_cast<GArrowChunkedArrayPrivate *>(                                              \
+    garrow_chunked_array_get_instance_private(GARROW_CHUNKED_ARRAY(obj)))
 
 static void
 garrow_chunked_array_dispose(GObject *object)
@@ -116,7 +114,7 @@ static void
 garrow_chunked_array_init(GArrowChunkedArray *object)
 {
   auto priv = GARROW_CHUNKED_ARRAY_GET_PRIVATE(object);
-  new(&priv->chunked_array) std::shared_ptr<arrow::ChunkedArray>;
+  new (&priv->chunked_array) std::shared_ptr<arrow::ChunkedArray>;
 }
 
 static void
@@ -127,24 +125,24 @@ garrow_chunked_array_class_init(GArrowChunkedArrayClass *klass)
 
   gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_chunked_array_dispose;
-  gobject_class->finalize     = garrow_chunked_array_finalize;
+  gobject_class->dispose = garrow_chunked_array_dispose;
+  gobject_class->finalize = garrow_chunked_array_finalize;
   gobject_class->set_property = garrow_chunked_array_set_property;
   gobject_class->get_property = garrow_chunked_array_get_property;
 
-  spec = g_param_spec_pointer("chunked-array",
-                              "Chunked array",
-                              "The raw std::shared<arrow::ChunkedArray> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "chunked-array",
+    "Chunked array",
+    "The raw std::shared<arrow::ChunkedArray> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CHUNKED_ARRAY, spec);
 
-  spec = g_param_spec_object("data-type",
-                             "Data type",
-                             "The data type of this chunked array",
-                             GARROW_TYPE_DATA_TYPE,
-                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "data-type",
+    "Data type",
+    "The data type of this chunked array",
+    GARROW_TYPE_DATA_TYPE,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_DATA_TYPE, spec);
 }
 
@@ -188,8 +186,7 @@ GArrowChunkedArray *
 garrow_chunked_array_new_empty(GArrowDataType *data_type, GError **error)
 {
   auto arrow_data_type = garrow_data_type_get_raw(data_type);
-  auto arrow_chunked_array_result =
-    arrow::ChunkedArray::MakeEmpty(arrow_data_type);
+  auto arrow_chunked_array_result = arrow::ChunkedArray::MakeEmpty(arrow_data_type);
   if (garrow::check(error, arrow_chunked_array_result, "[chunked-array][new]")) {
     auto arrow_chunked_array = *arrow_chunked_array_result;
     return garrow_chunked_array_new_raw(&arrow_chunked_array);
@@ -319,8 +316,7 @@ garrow_chunked_array_get_n_chunks(GArrowChunkedArray *chunked_array)
  * Returns: (transfer full): The i-th chunk of the chunked array.
  */
 GArrowArray *
-garrow_chunked_array_get_chunk(GArrowChunkedArray *chunked_array,
-                               guint i)
+garrow_chunked_array_get_chunk(GArrowChunkedArray *chunked_array, guint i)
 {
   const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array);
   auto arrow_chunk = arrow_chunked_array->chunk(i);
@@ -358,7 +354,7 @@ garrow_chunked_array_get_chunks(GArrowChunkedArray *chunked_array)
  *   `offset` to `offset + length` range. The sub #GArrowChunkedArray shares
  *   values with the base #GArrowChunkedArray.
  */
-GArrowChunkedArray  *
+GArrowChunkedArray *
 garrow_chunked_array_slice(GArrowChunkedArray *chunked_array,
                            guint64 offset,
                            guint64 length)
@@ -403,9 +399,7 @@ garrow_chunked_array_combine(GArrowChunkedArray *chunked_array, GError **error)
 {
   const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array);
   auto arrow_combined_array = arrow::Concatenate(arrow_chunked_array->chunks());
-  if (garrow::check(error,
-                    arrow_combined_array,
-                    "[chunked-array][combine]")) {
+  if (garrow::check(error, arrow_combined_array, "[chunked-array][combine]")) {
     return garrow_array_new_raw(&(*arrow_combined_array));
   } else {
     return NULL;
@@ -415,22 +409,21 @@ garrow_chunked_array_combine(GArrowChunkedArray *chunked_array, GError **error)
 G_END_DECLS
 
 GArrowChunkedArray *
-garrow_chunked_array_new_raw(
-  std::shared_ptr<arrow::ChunkedArray> *arrow_chunked_array)
+garrow_chunked_array_new_raw(std::shared_ptr<arrow::ChunkedArray> *arrow_chunked_array)
 {
   return garrow_chunked_array_new_raw(arrow_chunked_array, nullptr);
 }
 
 GArrowChunkedArray *
-garrow_chunked_array_new_raw(
-  std::shared_ptr<arrow::ChunkedArray> *arrow_chunked_array,
-  GArrowDataType *data_type)
+garrow_chunked_array_new_raw(std::shared_ptr<arrow::ChunkedArray> *arrow_chunked_array,
+                             GArrowDataType *data_type)
 {
-  auto chunked_array =
-    GARROW_CHUNKED_ARRAY(g_object_new(GARROW_TYPE_CHUNKED_ARRAY,
-                                      "chunked-array", arrow_chunked_array,
-                                      "data-type", data_type,
-                                      NULL));
+  auto chunked_array = GARROW_CHUNKED_ARRAY(g_object_new(GARROW_TYPE_CHUNKED_ARRAY,
+                                                         "chunked-array",
+                                                         arrow_chunked_array,
+                                                         "data-type",
+                                                         data_type,
+                                                         NULL));
   return chunked_array;
 }
 
diff --git a/c_glib/arrow-glib/chunked-array.h b/c_glib/arrow-glib/chunked-array.h
index e8a2df931f42f..6ca497942ff2e 100644
--- a/c_glib/arrow-glib/chunked-array.h
+++ b/c_glib/arrow-glib/chunked-array.h
@@ -25,15 +25,14 @@
 G_BEGIN_DECLS
 
 GArrowChunkedArray *
-garrow_chunked_array_new(GList *chunks,
-                         GError **error);
+garrow_chunked_array_new(GList *chunks, GError **error);
 GARROW_AVAILABLE_IN_11_0
 GArrowChunkedArray *
-garrow_chunked_array_new_empty(GArrowDataType *data_type,
-                               GError **error);
+garrow_chunked_array_new_empty(GArrowDataType *data_type, GError **error);
 
-gboolean garrow_chunked_array_equal(GArrowChunkedArray *chunked_array,
-                                    GArrowChunkedArray *other_chunked_array);
+gboolean
+garrow_chunked_array_equal(GArrowChunkedArray *chunked_array,
+                           GArrowChunkedArray *other_chunked_array);
 
 GArrowDataType *
 garrow_chunked_array_get_value_data_type(GArrowChunkedArray *chunked_array);
@@ -41,22 +40,28 @@ GArrowType
 garrow_chunked_array_get_value_type(GArrowChunkedArray *chunked_array);
 
 GARROW_DEPRECATED_IN_0_15_FOR(garrow_chunked_array_get_n_rows)
-guint64 garrow_chunked_array_get_length (GArrowChunkedArray *chunked_array);
+guint64
+garrow_chunked_array_get_length(GArrowChunkedArray *chunked_array);
 GARROW_AVAILABLE_IN_0_15
-guint64 garrow_chunked_array_get_n_rows (GArrowChunkedArray *chunked_array);
-guint64 garrow_chunked_array_get_n_nulls(GArrowChunkedArray *chunked_array);
-guint   garrow_chunked_array_get_n_chunks (GArrowChunkedArray *chunked_array);
-
-GArrowArray *garrow_chunked_array_get_chunk(GArrowChunkedArray *chunked_array,
-                                            guint i);
-GList *garrow_chunked_array_get_chunks(GArrowChunkedArray *chunked_array);
-GArrowChunkedArray *garrow_chunked_array_slice(GArrowChunkedArray *chunked_array,
-                                               guint64 offset,
-                                               guint64 length);
-gchar *garrow_chunked_array_to_string(GArrowChunkedArray *chunked_array,
-                                      GError **error);
+guint64
+garrow_chunked_array_get_n_rows(GArrowChunkedArray *chunked_array);
+guint64
+garrow_chunked_array_get_n_nulls(GArrowChunkedArray *chunked_array);
+guint
+garrow_chunked_array_get_n_chunks(GArrowChunkedArray *chunked_array);
+
+GArrowArray *
+garrow_chunked_array_get_chunk(GArrowChunkedArray *chunked_array, guint i);
+GList *
+garrow_chunked_array_get_chunks(GArrowChunkedArray *chunked_array);
+GArrowChunkedArray *
+garrow_chunked_array_slice(GArrowChunkedArray *chunked_array,
+                           guint64 offset,
+                           guint64 length);
+gchar *
+garrow_chunked_array_to_string(GArrowChunkedArray *chunked_array, GError **error);
 GARROW_AVAILABLE_IN_4_0
-GArrowArray *garrow_chunked_array_combine(GArrowChunkedArray *chunked_array,
-                                          GError **error);
+GArrowArray *
+garrow_chunked_array_combine(GArrowChunkedArray *chunked_array, GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/chunked-array.hpp b/c_glib/arrow-glib/chunked-array.hpp
index 06802366ec1fb..9ce6cc76adfbf 100644
--- a/c_glib/arrow-glib/chunked-array.hpp
+++ b/c_glib/arrow-glib/chunked-array.hpp
@@ -24,11 +24,9 @@
 #include <arrow-glib/chunked-array.h>
 
 GArrowChunkedArray *
-garrow_chunked_array_new_raw(
-  std::shared_ptr<arrow::ChunkedArray> *arrow_chunked_array);
+garrow_chunked_array_new_raw(std::shared_ptr<arrow::ChunkedArray> *arrow_chunked_array);
 GArrowChunkedArray *
-garrow_chunked_array_new_raw(
-  std::shared_ptr<arrow::ChunkedArray> *arrow_chunked_array,
-  GArrowDataType *data_type);
+garrow_chunked_array_new_raw(std::shared_ptr<arrow::ChunkedArray> *arrow_chunked_array,
+                             GArrowDataType *data_type);
 std::shared_ptr<arrow::ChunkedArray>
 garrow_chunked_array_get_raw(GArrowChunkedArray *chunked_array);
diff --git a/c_glib/arrow-glib/codec.cpp b/c_glib/arrow-glib/codec.cpp
index fecf9770487cc..a14cda231761b 100644
--- a/c_glib/arrow-glib/codec.cpp
+++ b/c_glib/arrow-glib/codec.cpp
@@ -33,7 +33,8 @@ G_BEGIN_DECLS
  * #GArrowCodec is a class for compressing and decompressing data.
  */
 
-typedef struct GArrowCodecPrivate_ {
+typedef struct GArrowCodecPrivate_
+{
   std::shared_ptr<arrow::util::Codec> codec;
 } GArrowCodecPrivate;
 
@@ -43,10 +44,9 @@ enum {
 
 G_DEFINE_TYPE_WITH_PRIVATE(GArrowCodec, garrow_codec, G_TYPE_OBJECT)
 
-#define GARROW_CODEC_GET_PRIVATE(object)        \
-  static_cast<GArrowCodecPrivate *>(            \
-    garrow_codec_get_instance_private(          \
-      GARROW_CODEC(object)))
+#define GARROW_CODEC_GET_PRIVATE(object)                                                 \
+  static_cast<GArrowCodecPrivate *>(                                                     \
+    garrow_codec_get_instance_private(GARROW_CODEC(object)))
 
 static void
 garrow_codec_finalize(GObject *object)
@@ -94,7 +94,7 @@ static void
 garrow_codec_init(GArrowCodec *object)
 {
   auto priv = GARROW_CODEC_GET_PRIVATE(object);
-  new(&priv->codec) std::shared_ptr<arrow::util::Codec>;
+  new (&priv->codec) std::shared_ptr<arrow::util::Codec>;
 }
 
 static void
@@ -104,15 +104,15 @@ garrow_codec_class_init(GArrowCodecClass *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_codec_finalize;
+  gobject_class->finalize = garrow_codec_finalize;
   gobject_class->set_property = garrow_codec_set_property;
   gobject_class->get_property = garrow_codec_get_property;
 
-  spec = g_param_spec_pointer("codec",
-                              "Codec",
-                              "The raw std::shared_ptr<arrow::util::Codec> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "codec",
+    "Codec",
+    "The raw std::shared_ptr<arrow::util::Codec> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CODEC, spec);
 }
 
@@ -126,14 +126,12 @@ garrow_codec_class_init(GArrowCodecClass *klass)
  * Since: 0.12.0
  */
 GArrowCodec *
-garrow_codec_new(GArrowCompressionType type,
-                 GError **error)
+garrow_codec_new(GArrowCompressionType type, GError **error)
 {
   auto arrow_type = garrow_compression_type_to_raw(type);
   auto arrow_codec = arrow::util::Codec::Create(arrow_type);
   if (garrow::check(error, arrow_codec, "[codec][new]")) {
-    std::shared_ptr<arrow::util::Codec> arrow_codec_shared =
-      std::move(*arrow_codec);
+    std::shared_ptr<arrow::util::Codec> arrow_codec_shared = std::move(*arrow_codec);
     return garrow_codec_new_raw(&arrow_codec_shared);
   } else {
     return NULL;
@@ -249,9 +247,7 @@ garrow_compression_type_to_raw(GArrowCompressionType type)
 GArrowCodec *
 garrow_codec_new_raw(std::shared_ptr<arrow::util::Codec> *arrow_codec)
 {
-  auto codec = GARROW_CODEC(g_object_new(GARROW_TYPE_CODEC,
-                                         "codec", arrow_codec,
-                                         NULL));
+  auto codec = GARROW_CODEC(g_object_new(GARROW_TYPE_CODEC, "codec", arrow_codec, NULL));
   return codec;
 }
 
diff --git a/c_glib/arrow-glib/codec.h b/c_glib/arrow-glib/codec.h
index 6e177af9eede3..5f7d53c8f875d 100644
--- a/c_glib/arrow-glib/codec.h
+++ b/c_glib/arrow-glib/codec.h
@@ -48,22 +48,18 @@ typedef enum {
   GARROW_COMPRESSION_TYPE_BZ2
 } GArrowCompressionType;
 
-
 #define GARROW_TYPE_CODEC (garrow_codec_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowCodec,
-                         garrow_codec,
-                         GARROW,
-                         CODEC,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowCodec, garrow_codec, GARROW, CODEC, GObject)
 struct _GArrowCodecClass
 {
   GObjectClass parent_class;
 };
 
-GArrowCodec *garrow_codec_new(GArrowCompressionType type,
-                              GError **error);
+GArrowCodec *
+garrow_codec_new(GArrowCompressionType type, GError **error);
 
-const gchar *garrow_codec_get_name(GArrowCodec *codec);
+const gchar *
+garrow_codec_get_name(GArrowCodec *codec);
 GARROW_AVAILABLE_IN_2_0
 GArrowCompressionType
 garrow_codec_get_compression_type(GArrowCodec *codec);
diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp
index 6ca22e82389e4..cc254b26e1e4c 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -70,7 +70,8 @@ G_BEGIN_DECLS
  * array to a normal array by garrow_run_end_encoded_array_decode().
  */
 
-typedef struct GArrowListArrayPrivate_ {
+typedef struct GArrowListArrayPrivate_
+{
   GArrowArray *raw_values;
 } GArrowListArrayPrivate;
 
@@ -78,14 +79,11 @@ enum {
   PROP_RAW_VALUES = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowListArray,
-                           garrow_list_array,
-                           GARROW_TYPE_ARRAY)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowListArray, garrow_list_array, GARROW_TYPE_ARRAY)
 
-#define GARROW_LIST_ARRAY_GET_PRIVATE(obj)      \
-  static_cast<GArrowListArrayPrivate *>(        \
-    garrow_list_array_get_instance_private(     \
-      GARROW_LIST_ARRAY(obj)))
+#define GARROW_LIST_ARRAY_GET_PRIVATE(obj)                                               \
+  static_cast<GArrowListArrayPrivate *>(                                                 \
+    garrow_list_array_get_instance_private(GARROW_LIST_ARRAY(obj)))
 
 G_END_DECLS
 template <typename LIST_ARRAY_CLASS>
@@ -101,21 +99,24 @@ garrow_base_list_array_new(GArrowDataType *data_type,
   const auto arrow_value_offsets = garrow_buffer_get_raw(value_offsets);
   const auto arrow_values = garrow_array_get_raw(values);
   const auto arrow_null_bitmap = garrow_buffer_get_raw(null_bitmap);
-  auto arrow_list_array =
-    std::make_shared<LIST_ARRAY_CLASS>(arrow_data_type,
-                                       length,
-                                       arrow_value_offsets,
-                                       arrow_values,
-                                       arrow_null_bitmap,
-                                       n_nulls);
-  auto arrow_array =
-    std::static_pointer_cast<arrow::Array>(arrow_list_array);
+  auto arrow_list_array = std::make_shared<LIST_ARRAY_CLASS>(arrow_data_type,
+                                                             length,
+                                                             arrow_value_offsets,
+                                                             arrow_values,
+                                                             arrow_null_bitmap,
+                                                             n_nulls);
+  auto arrow_array = std::static_pointer_cast<arrow::Array>(arrow_list_array);
   return garrow_array_new_raw(&arrow_array,
-                              "array", &arrow_array,
-                              "value-data-type", data_type,
-                              "null-bitmap", null_bitmap,
-                              "buffer1", value_offsets,
-                              "raw-values", values,
+                              "array",
+                              &arrow_array,
+                              "value-data-type",
+                              data_type,
+                              "null-bitmap",
+                              null_bitmap,
+                              "buffer1",
+                              value_offsets,
+                              "raw-values",
+                              values,
                               NULL);
 };
 
@@ -124,25 +125,19 @@ GArrowDataType *
 garrow_base_list_array_get_value_type(GArrowArray *array)
 {
   auto arrow_array = garrow_array_get_raw(array);
-  auto arrow_list_array =
-    std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
+  auto arrow_list_array = std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
   auto arrow_value_type = arrow_list_array->value_type();
   return garrow_data_type_new_raw(&arrow_value_type);
 };
 
 template <typename LIST_ARRAY_CLASS>
 GArrowArray *
-garrow_base_list_array_get_value(GArrowArray *array,
-                                 gint64 i)
+garrow_base_list_array_get_value(GArrowArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(array);
-  auto arrow_list_array =
-    std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
+  auto arrow_list_array = std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
   auto arrow_list = arrow_list_array->value_slice(i);
-  return garrow_array_new_raw(&arrow_list,
-                              "array", &arrow_list,
-                              "parent", array,
-                              NULL);
+  return garrow_array_new_raw(&arrow_list, "array", &arrow_list, "parent", array, NULL);
 };
 
 template <typename LIST_ARRAY_CLASS>
@@ -150,12 +145,13 @@ GArrowArray *
 garrow_base_list_array_get_values(GArrowArray *array)
 {
   auto arrow_array = garrow_array_get_raw(array);
-  auto arrow_list_array =
-    std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
+  auto arrow_list_array = std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
   auto arrow_values = arrow_list_array->values();
   return garrow_array_new_raw(&arrow_values,
-                              "array", &arrow_values,
-                              "parent", array,
+                              "array",
+                              &arrow_values,
+                              "parent",
+                              array,
                               NULL);
 };
 
@@ -164,8 +160,7 @@ typename LIST_ARRAY_CLASS::offset_type
 garrow_base_list_array_get_value_offset(GArrowArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(array);
-  auto arrow_list_array =
-    std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
+  auto arrow_list_array = std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
   return arrow_list_array->value_offset(i);
 };
 
@@ -174,8 +169,7 @@ typename LIST_ARRAY_CLASS::offset_type
 garrow_base_list_array_get_value_length(GArrowArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(array);
-  auto arrow_list_array =
-    std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
+  auto arrow_list_array = std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
   return arrow_list_array->value_length(i);
 };
 
@@ -185,12 +179,10 @@ garrow_base_list_array_get_value_offsets(GArrowArray *array, gint64 *n_offsets)
 {
   auto arrow_array = garrow_array_get_raw(array);
   *n_offsets = arrow_array->length() + 1;
-  auto arrow_list_array =
-    std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
+  auto arrow_list_array = std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
   return arrow_list_array->raw_value_offsets();
 };
 
-
 G_BEGIN_DECLS
 
 static void
@@ -203,7 +195,6 @@ garrow_list_array_dispose(GObject *object)
     priv->raw_values = NULL;
   }
 
-
   G_OBJECT_CLASS(garrow_list_array_parent_class)->dispose(object);
 }
 
@@ -258,12 +249,12 @@ garrow_list_array_class_init(GArrowListArrayClass *klass)
   gobject_class->get_property = garrow_list_array_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("raw-values",
-                             "Raw values",
-                             "The raw values",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "raw-values",
+    "Raw values",
+    "The raw values",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_RAW_VALUES, spec);
 }
 
@@ -292,13 +283,12 @@ garrow_list_array_new(GArrowDataType *data_type,
                       GArrowBuffer *null_bitmap,
                       gint64 n_nulls)
 {
-  auto list_array = garrow_base_list_array_new<arrow::ListArray>(
-    data_type,
-    length,
-    value_offsets,
-    values,
-    null_bitmap,
-    n_nulls);
+  auto list_array = garrow_base_list_array_new<arrow::ListArray>(data_type,
+                                                                 length,
+                                                                 value_offsets,
+                                                                 values,
+                                                                 null_bitmap,
+                                                                 n_nulls);
   return GARROW_LIST_ARRAY(list_array);
 }
 
@@ -311,8 +301,7 @@ garrow_list_array_new(GArrowDataType *data_type,
 GArrowDataType *
 garrow_list_array_get_value_type(GArrowListArray *array)
 {
-  return garrow_base_list_array_get_value_type<arrow::ListArray>(
-    GARROW_ARRAY(array));
+  return garrow_base_list_array_get_value_type<arrow::ListArray>(GARROW_ARRAY(array));
 }
 
 /**
@@ -323,11 +312,9 @@ garrow_list_array_get_value_type(GArrowListArray *array)
  * Returns: (transfer full): The i-th list.
  */
 GArrowArray *
-garrow_list_array_get_value(GArrowListArray *array,
-                            gint64 i)
+garrow_list_array_get_value(GArrowListArray *array, gint64 i)
 {
-  return garrow_base_list_array_get_value<arrow::ListArray>(
-    GARROW_ARRAY(array), i);
+  return garrow_base_list_array_get_value<arrow::ListArray>(GARROW_ARRAY(array), i);
 }
 
 /**
@@ -341,8 +328,7 @@ garrow_list_array_get_value(GArrowListArray *array,
 GArrowArray *
 garrow_list_array_get_values(GArrowListArray *array)
 {
-  return garrow_base_list_array_get_values<arrow::ListArray>(
-    GARROW_ARRAY(array));
+  return garrow_base_list_array_get_values<arrow::ListArray>(GARROW_ARRAY(array));
 }
 
 /**
@@ -357,8 +343,8 @@ garrow_list_array_get_values(GArrowListArray *array)
 gint32
 garrow_list_array_get_value_offset(GArrowListArray *array, gint64 i)
 {
-  return garrow_base_list_array_get_value_offset<arrow::ListArray>(
-    GARROW_ARRAY(array), i);
+  return garrow_base_list_array_get_value_offset<arrow::ListArray>(GARROW_ARRAY(array),
+                                                                   i);
 }
 
 /**
@@ -373,8 +359,8 @@ garrow_list_array_get_value_offset(GArrowListArray *array, gint64 i)
 gint32
 garrow_list_array_get_value_length(GArrowListArray *array, gint64 i)
 {
-  return garrow_base_list_array_get_value_length<arrow::ListArray>(
-    GARROW_ARRAY(array), i);
+  return garrow_base_list_array_get_value_length<arrow::ListArray>(GARROW_ARRAY(array),
+                                                                   i);
 }
 
 /**
@@ -390,12 +376,12 @@ garrow_list_array_get_value_length(GArrowListArray *array, gint64 i)
 const gint32 *
 garrow_list_array_get_value_offsets(GArrowListArray *array, gint64 *n_offsets)
 {
-  return garrow_base_list_array_get_value_offsets<arrow::ListArray>(
-    GARROW_ARRAY(array), n_offsets);
+  return garrow_base_list_array_get_value_offsets<arrow::ListArray>(GARROW_ARRAY(array),
+                                                                    n_offsets);
 }
 
-
-typedef struct GArrowLargeListArrayPrivate_ {
+typedef struct GArrowLargeListArrayPrivate_
+{
   GArrowArray *raw_values;
 } GArrowLargeListArrayPrivate;
 
@@ -403,10 +389,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowLargeListArray,
                            garrow_large_list_array,
                            GARROW_TYPE_ARRAY)
 
-#define GARROW_LARGE_LIST_ARRAY_GET_PRIVATE(obj)        \
-  static_cast<GArrowLargeListArrayPrivate *>(           \
-    garrow_large_list_array_get_instance_private(       \
-      GARROW_LARGE_LIST_ARRAY(obj)))
+#define GARROW_LARGE_LIST_ARRAY_GET_PRIVATE(obj)                                         \
+  static_cast<GArrowLargeListArrayPrivate *>(                                            \
+    garrow_large_list_array_get_instance_private(GARROW_LARGE_LIST_ARRAY(obj)))
 
 static void
 garrow_large_list_array_dispose(GObject *object)
@@ -472,12 +457,12 @@ garrow_large_list_array_class_init(GArrowLargeListArrayClass *klass)
   gobject_class->get_property = garrow_large_list_array_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("raw-values",
-                             "Raw values",
-                             "The raw values",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "raw-values",
+    "Raw values",
+    "The raw values",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_RAW_VALUES, spec);
 }
 
@@ -506,13 +491,12 @@ garrow_large_list_array_new(GArrowDataType *data_type,
                             GArrowBuffer *null_bitmap,
                             gint64 n_nulls)
 {
-  auto large_list_array = garrow_base_list_array_new<arrow::LargeListArray>(
-    data_type,
-    length,
-    value_offsets,
-    values,
-    null_bitmap,
-    n_nulls);
+  auto large_list_array = garrow_base_list_array_new<arrow::LargeListArray>(data_type,
+                                                                            length,
+                                                                            value_offsets,
+                                                                            values,
+                                                                            null_bitmap,
+                                                                            n_nulls);
   return GARROW_LARGE_LIST_ARRAY(large_list_array);
 }
 
@@ -541,12 +525,9 @@ garrow_large_list_array_get_value_type(GArrowLargeListArray *array)
  * Since: 0.16.0
  */
 GArrowArray *
-garrow_large_list_array_get_value(GArrowLargeListArray *array,
-                                  gint64 i)
+garrow_large_list_array_get_value(GArrowLargeListArray *array, gint64 i)
 {
-  return garrow_base_list_array_get_value<arrow::LargeListArray>(
-    GARROW_ARRAY(array),
-    i);
+  return garrow_base_list_array_get_value<arrow::LargeListArray>(GARROW_ARRAY(array), i);
 }
 
 /**
@@ -560,8 +541,7 @@ garrow_large_list_array_get_value(GArrowLargeListArray *array,
 GArrowArray *
 garrow_large_list_array_get_values(GArrowLargeListArray *array)
 {
-  return garrow_base_list_array_get_values<arrow::LargeListArray>(
-    GARROW_ARRAY(array));
+  return garrow_base_list_array_get_values<arrow::LargeListArray>(GARROW_ARRAY(array));
 }
 
 /**
@@ -577,7 +557,8 @@ gint64
 garrow_large_list_array_get_value_offset(GArrowLargeListArray *array, gint64 i)
 {
   return garrow_base_list_array_get_value_offset<arrow::LargeListArray>(
-    GARROW_ARRAY(array), i);
+    GARROW_ARRAY(array),
+    i);
 }
 
 /**
@@ -593,7 +574,8 @@ gint64
 garrow_large_list_array_get_value_length(GArrowLargeListArray *array, gint64 i)
 {
   return garrow_base_list_array_get_value_length<arrow::LargeListArray>(
-    GARROW_ARRAY(array), i);
+    GARROW_ARRAY(array),
+    i);
 }
 
 /**
@@ -607,26 +589,23 @@ garrow_large_list_array_get_value_length(GArrowLargeListArray *array, gint64 i)
  * Since: 2.0.0
  */
 const gint64 *
-garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array,
-                                          gint64 *n_offsets)
+garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, gint64 *n_offsets)
 {
   return garrow_base_list_array_get_value_offsets<arrow::LargeListArray>(
-    GARROW_ARRAY(array), n_offsets);
+    GARROW_ARRAY(array),
+    n_offsets);
 }
 
-
-typedef struct GArrowStructArrayPrivate_ {
+typedef struct GArrowStructArrayPrivate_
+{
   GPtrArray *fields;
 } GArrowStructArrayPrivate;
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowStructArray,
-                           garrow_struct_array,
-                           GARROW_TYPE_ARRAY)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowStructArray, garrow_struct_array, GARROW_TYPE_ARRAY)
 
-#define GARROW_STRUCT_ARRAY_GET_PRIVATE(obj)    \
-  static_cast<GArrowStructArrayPrivate *>(      \
-    garrow_struct_array_get_instance_private(   \
-      GARROW_STRUCT_ARRAY(obj)))
+#define GARROW_STRUCT_ARRAY_GET_PRIVATE(obj)                                             \
+  static_cast<GArrowStructArrayPrivate *>(                                               \
+    garrow_struct_array_get_instance_private(GARROW_STRUCT_ARRAY(obj)))
 
 static void
 garrow_struct_array_dispose(GObject *object)
@@ -684,19 +663,18 @@ garrow_struct_array_new(GArrowDataType *data_type,
     arrow_fields.push_back(garrow_array_get_raw(field));
   }
   const auto arrow_null_bitmap = garrow_buffer_get_raw(null_bitmap);
-  auto arrow_struct_array =
-    std::make_shared<arrow::StructArray>(arrow_data_type,
-                                         length,
-                                         arrow_fields,
-                                         arrow_null_bitmap,
-                                         n_nulls);
-  auto arrow_array =
-    std::static_pointer_cast<arrow::Array>(arrow_struct_array);
-  auto struct_array =
-    garrow_array_new_raw(&arrow_array,
-                         "array", &arrow_array,
-                         "null-bitmap", null_bitmap,
-                         NULL);
+  auto arrow_struct_array = std::make_shared<arrow::StructArray>(arrow_data_type,
+                                                                 length,
+                                                                 arrow_fields,
+                                                                 arrow_null_bitmap,
+                                                                 n_nulls);
+  auto arrow_array = std::static_pointer_cast<arrow::Array>(arrow_struct_array);
+  auto struct_array = garrow_array_new_raw(&arrow_array,
+                                           "array",
+                                           &arrow_array,
+                                           "null-bitmap",
+                                           null_bitmap,
+                                           NULL);
   auto priv = GARROW_STRUCT_ARRAY_GET_PRIVATE(struct_array);
   priv->fields = g_ptr_array_sized_new(arrow_fields.size());
   g_ptr_array_set_free_func(priv->fields, g_object_unref);
@@ -713,8 +691,7 @@ garrow_struct_array_get_fields_internal(GArrowStructArray *array)
   auto priv = GARROW_STRUCT_ARRAY_GET_PRIVATE(array);
   if (!priv->fields) {
     auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-    auto arrow_struct_array =
-      std::static_pointer_cast<arrow::StructArray>(arrow_array);
+    auto arrow_struct_array = std::static_pointer_cast<arrow::StructArray>(arrow_array);
     auto arrow_fields = arrow_struct_array->fields();
     priv->fields = g_ptr_array_sized_new(arrow_fields.size());
     g_ptr_array_set_free_func(priv->fields, g_object_unref);
@@ -733,8 +710,7 @@ garrow_struct_array_get_fields_internal(GArrowStructArray *array)
  * Returns: (transfer full): The i-th field.
  */
 GArrowArray *
-garrow_struct_array_get_field(GArrowStructArray *array,
-                              gint i)
+garrow_struct_array_get_field(GArrowStructArray *array, gint i)
 {
   auto fields = garrow_struct_array_get_fields_internal(array);
   if (i < 0) {
@@ -785,8 +761,7 @@ GList *
 garrow_struct_array_flatten(GArrowStructArray *array, GError **error)
 {
   const auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto arrow_struct_array =
-    std::static_pointer_cast<arrow::StructArray>(arrow_array);
+  auto arrow_struct_array = std::static_pointer_cast<arrow::StructArray>(arrow_array);
 
   auto memory_pool = arrow::default_memory_pool();
   auto arrow_arrays = arrow_struct_array->Flatten(memory_pool);
@@ -803,8 +778,8 @@ garrow_struct_array_flatten(GArrowStructArray *array, GError **error)
   return g_list_reverse(arrays);
 }
 
-
-typedef struct GArrowMapArrayPrivate_ {
+typedef struct GArrowMapArrayPrivate_
+{
   GArrowArray *offsets;
   GArrowArray *keys;
   GArrowArray *items;
@@ -816,14 +791,11 @@ enum {
   PROP_ITEMS,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowMapArray,
-                           garrow_map_array,
-                           GARROW_TYPE_LIST_ARRAY)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowMapArray, garrow_map_array, GARROW_TYPE_LIST_ARRAY)
 
-#define GARROW_MAP_ARRAY_GET_PRIVATE(obj)       \
-  static_cast<GArrowMapArrayPrivate *>(         \
-    garrow_map_array_get_instance_private(      \
-      GARROW_MAP_ARRAY(obj)))
+#define GARROW_MAP_ARRAY_GET_PRIVATE(obj)                                                \
+  static_cast<GArrowMapArrayPrivate *>(                                                  \
+    garrow_map_array_get_instance_private(GARROW_MAP_ARRAY(obj)))
 
 static void
 garrow_map_array_dispose(GObject *object)
@@ -911,28 +883,28 @@ garrow_map_array_class_init(GArrowMapArrayClass *klass)
   gobject_class->get_property = garrow_map_array_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("offsets",
-                             "Offsets",
-                             "The GArrowArray for offsets",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "offsets",
+    "Offsets",
+    "The GArrowArray for offsets",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_OFFSETS, spec);
 
-  spec = g_param_spec_object("keys",
-                             "Keys",
-                             "The GArrowArray for keys",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "keys",
+    "Keys",
+    "The GArrowArray for keys",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_KEYS, spec);
 
-  spec = g_param_spec_object("items",
-                             "Items",
-                             "The GArrowArray for items",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "items",
+    "Items",
+    "The GArrowArray for items",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_ITEMS, spec);
 }
 
@@ -965,10 +937,14 @@ garrow_map_array_new(GArrowArray *offsets,
   if (garrow::check(error, arrow_array_result, "[map-array][new]")) {
     auto arrow_array = *arrow_array_result;
     return GARROW_MAP_ARRAY(garrow_array_new_raw(&arrow_array,
-                                                 "array", &arrow_array,
-                                                 "offsets", offsets,
-                                                 "keys", keys,
-                                                 "items", items,
+                                                 "array",
+                                                 &arrow_array,
+                                                 "offsets",
+                                                 offsets,
+                                                 "keys",
+                                                 keys,
+                                                 "items",
+                                                 items,
                                                  NULL));
   } else {
     return NULL;
@@ -993,8 +969,7 @@ garrow_map_array_get_keys(GArrowMapArray *array)
   }
 
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto arrow_map_array =
-    std::static_pointer_cast<arrow::MapArray>(arrow_array);
+  auto arrow_map_array = std::static_pointer_cast<arrow::MapArray>(arrow_array);
   auto arrow_keys = arrow_map_array->keys();
   return garrow_array_new_raw(&arrow_keys);
 }
@@ -1017,14 +992,13 @@ garrow_map_array_get_items(GArrowMapArray *array)
   }
 
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto arrow_map_array =
-    std::static_pointer_cast<arrow::MapArray>(arrow_array);
+  auto arrow_map_array = std::static_pointer_cast<arrow::MapArray>(arrow_array);
   auto arrow_items = arrow_map_array->items();
   return garrow_array_new_raw(&arrow_items);
 }
 
-
-typedef struct GArrowUnionArrayPrivate_ {
+typedef struct GArrowUnionArrayPrivate_
+{
   GArrowInt8Array *type_ids;
   GPtrArray *fields;
 } GArrowUnionArrayPrivate;
@@ -1034,14 +1008,11 @@ enum {
   PROP_FIELDS,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowUnionArray,
-                           garrow_union_array,
-                           GARROW_TYPE_ARRAY)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowUnionArray, garrow_union_array, GARROW_TYPE_ARRAY)
 
-#define GARROW_UNION_ARRAY_GET_PRIVATE(obj)     \
-  static_cast<GArrowUnionArrayPrivate *>(       \
-    garrow_union_array_get_instance_private(    \
-      GARROW_UNION_ARRAY(obj)))
+#define GARROW_UNION_ARRAY_GET_PRIVATE(obj)                                              \
+  static_cast<GArrowUnionArrayPrivate *>(                                                \
+    garrow_union_array_get_instance_private(GARROW_UNION_ARRAY(obj)))
 
 static void
 garrow_union_array_dispose(GObject *object)
@@ -1112,12 +1083,12 @@ garrow_union_array_class_init(GArrowUnionArrayClass *klass)
   gobject_class->get_property = garrow_union_array_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("type-ids",
-                             "Type IDs",
-                             "The GArrowInt8Array for type IDs",
-                             GARROW_TYPE_INT8_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "type-ids",
+    "Type IDs",
+    "The GArrowInt8Array for type IDs",
+    GARROW_TYPE_INT8_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_TYPE_IDS, spec);
 }
 
@@ -1131,12 +1102,10 @@ garrow_union_array_class_init(GArrowUnionArrayClass *klass)
  * Since: 12.0.0
  */
 gint8
-garrow_union_array_get_type_code(GArrowUnionArray *array,
-                                 gint64 i)
+garrow_union_array_get_type_code(GArrowUnionArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto arrow_union_array =
-    std::static_pointer_cast<arrow::UnionArray>(arrow_array);
+  auto arrow_union_array = std::static_pointer_cast<arrow::UnionArray>(arrow_array);
   return arrow_union_array->type_code(i);
 }
 
@@ -1150,12 +1119,10 @@ garrow_union_array_get_type_code(GArrowUnionArray *array,
  * Since: 12.0.0
  */
 gint
-garrow_union_array_get_child_id(GArrowUnionArray *array,
-                                gint64 i)
+garrow_union_array_get_child_id(GArrowUnionArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto arrow_union_array =
-    std::static_pointer_cast<arrow::UnionArray>(arrow_array);
+  auto arrow_union_array = std::static_pointer_cast<arrow::UnionArray>(arrow_array);
   return arrow_union_array->child_id(i);
 }
 
@@ -1168,14 +1135,12 @@ garrow_union_array_get_child_id(GArrowUnionArray *array,
  *   #GArrowArray or %NULL on out of range.
  */
 GArrowArray *
-garrow_union_array_get_field(GArrowUnionArray *array,
-                             gint i)
+garrow_union_array_get_field(GArrowUnionArray *array, gint i)
 {
   auto priv = GARROW_UNION_ARRAY_GET_PRIVATE(array);
   if (!priv->fields) {
     auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-    auto arrow_union_array =
-      std::static_pointer_cast<arrow::UnionArray>(arrow_array);
+    auto arrow_union_array = std::static_pointer_cast<arrow::UnionArray>(arrow_array);
     auto n_fields = arrow_union_array->num_fields();
     priv->fields = g_ptr_array_sized_new(n_fields);
     g_ptr_array_set_free_func(priv->fields, g_object_unref);
@@ -1199,10 +1164,7 @@ garrow_union_array_get_field(GArrowUnionArray *array,
   return field;
 }
 
-
-G_DEFINE_TYPE(GArrowSparseUnionArray,
-              garrow_sparse_union_array,
-              GARROW_TYPE_UNION_ARRAY)
+G_DEFINE_TYPE(GArrowSparseUnionArray, garrow_sparse_union_array, GARROW_TYPE_UNION_ARRAY)
 
 static void
 garrow_sparse_union_array_init(GArrowSparseUnionArray *object)
@@ -1245,16 +1207,16 @@ garrow_sparse_union_array_new_internal(GArrowSparseUnionDataType *data_type,
     arrow_sparse_union_array_result =
       arrow::SparseUnionArray::Make(*arrow_type_ids, arrow_fields);
   }
-  if (garrow::check(error,
-                    arrow_sparse_union_array_result,
-                    context)) {
+  if (garrow::check(error, arrow_sparse_union_array_result, context)) {
     auto arrow_sparse_union_array = *arrow_sparse_union_array_result;
-    auto sparse_union_array =
-      garrow_array_new_raw(&arrow_sparse_union_array,
-                           "array", &arrow_sparse_union_array,
-                           "value-data-type", data_type,
-                           "type-ids", type_ids,
-                           NULL);
+    auto sparse_union_array = garrow_array_new_raw(&arrow_sparse_union_array,
+                                                   "array",
+                                                   &arrow_sparse_union_array,
+                                                   "value-data-type",
+                                                   data_type,
+                                                   "type-ids",
+                                                   type_ids,
+                                                   NULL);
     auto priv = GARROW_UNION_ARRAY_GET_PRIVATE(sparse_union_array);
     priv->fields = g_ptr_array_sized_new(arrow_fields.size());
     g_ptr_array_set_free_func(priv->fields, g_object_unref);
@@ -1281,9 +1243,7 @@ garrow_sparse_union_array_new_internal(GArrowSparseUnionDataType *data_type,
  * Since: 0.12.0
  */
 GArrowSparseUnionArray *
-garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
-                              GList *fields,
-                              GError **error)
+garrow_sparse_union_array_new(GArrowInt8Array *type_ids, GList *fields, GError **error)
 {
   return garrow_sparse_union_array_new_internal(NULL,
                                                 type_ids,
@@ -1311,16 +1271,15 @@ garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type,
                                         GList *fields,
                                         GError **error)
 {
-  return garrow_sparse_union_array_new_internal(
-    data_type,
-    type_ids,
-    fields,
-    error,
-    "[sparse-union-array][new][data-type]");
+  return garrow_sparse_union_array_new_internal(data_type,
+                                                type_ids,
+                                                fields,
+                                                error,
+                                                "[sparse-union-array][new][data-type]");
 }
 
-
-typedef struct GArrowDenseUnionArrayPrivate_ {
+typedef struct GArrowDenseUnionArrayPrivate_
+{
   GArrowInt32Array *value_offsets;
 } GArrowDenseUnionArrayPrivate;
 
@@ -1332,10 +1291,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowDenseUnionArray,
                            garrow_dense_union_array,
                            GARROW_TYPE_UNION_ARRAY)
 
-#define GARROW_DENSE_UNION_ARRAY_GET_PRIVATE(obj)       \
-  static_cast<GArrowDenseUnionArrayPrivate *>(          \
-    garrow_dense_union_array_get_instance_private(      \
-      GARROW_DENSE_UNION_ARRAY(obj)))
+#define GARROW_DENSE_UNION_ARRAY_GET_PRIVATE(obj)                                        \
+  static_cast<GArrowDenseUnionArrayPrivate *>(                                           \
+    garrow_dense_union_array_get_instance_private(GARROW_DENSE_UNION_ARRAY(obj)))
 
 static void
 garrow_dense_union_array_dispose(GObject *object)
@@ -1401,12 +1359,12 @@ garrow_dense_union_array_class_init(GArrowDenseUnionArrayClass *klass)
   gobject_class->get_property = garrow_dense_union_array_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("value-offsets",
-                             "Value offsets",
-                             "The GArrowInt32Array for value offsets",
-                             GARROW_TYPE_INT32_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "value-offsets",
+    "Value offsets",
+    "The GArrowInt32Array for value offsets",
+    GARROW_TYPE_INT32_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUE_OFFSETS, spec);
 }
 
@@ -1442,21 +1400,20 @@ garrow_dense_union_array_new_internal(GArrowDenseUnionDataType *data_type,
                                    arrow_union_data_type->type_codes());
   } else {
     arrow_dense_union_array_result =
-      arrow::DenseUnionArray::Make(*arrow_type_ids,
-                                   *arrow_value_offsets,
-                                   arrow_fields);
+      arrow::DenseUnionArray::Make(*arrow_type_ids, *arrow_value_offsets, arrow_fields);
   }
-  if (garrow::check(error,
-                    arrow_dense_union_array_result,
-                    context)) {
+  if (garrow::check(error, arrow_dense_union_array_result, context)) {
     auto arrow_dense_union_array = *arrow_dense_union_array_result;
-    auto dense_union_array =
-      garrow_array_new_raw(&arrow_dense_union_array,
-                           "array", &arrow_dense_union_array,
-                           "value-data-type", data_type,
-                           "type-ids", type_ids,
-                           "value-offsets", value_offsets,
-                           NULL);
+    auto dense_union_array = garrow_array_new_raw(&arrow_dense_union_array,
+                                                  "array",
+                                                  &arrow_dense_union_array,
+                                                  "value-data-type",
+                                                  data_type,
+                                                  "type-ids",
+                                                  type_ids,
+                                                  "value-offsets",
+                                                  value_offsets,
+                                                  NULL);
     auto priv = GARROW_UNION_ARRAY_GET_PRIVATE(dense_union_array);
     priv->fields = g_ptr_array_sized_new(arrow_fields.size());
     g_ptr_array_set_free_func(priv->fields, g_object_unref);
@@ -1520,13 +1477,12 @@ garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type,
                                        GList *fields,
                                        GError **error)
 {
-  return garrow_dense_union_array_new_internal(
-    data_type,
-    type_ids,
-    value_offsets,
-    fields,
-    error,
-    "[dense-union-array][new][data-type]");
+  return garrow_dense_union_array_new_internal(data_type,
+                                               type_ids,
+                                               value_offsets,
+                                               fields,
+                                               error,
+                                               "[dense-union-array][new][data-type]");
 }
 
 /**
@@ -1539,8 +1495,7 @@ garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type,
  * Since: 12.0.0
  */
 gint32
-garrow_dense_union_array_get_value_offset(GArrowDenseUnionArray *array,
-                                          gint64 i)
+garrow_dense_union_array_get_value_offset(GArrowDenseUnionArray *array, gint64 i)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_dense_union_array =
@@ -1548,8 +1503,8 @@ garrow_dense_union_array_get_value_offset(GArrowDenseUnionArray *array,
   return arrow_dense_union_array->value_offset(i);
 }
 
-
-typedef struct GArrowDictionaryArrayPrivate_ {
+typedef struct GArrowDictionaryArrayPrivate_
+{
   GArrowArray *indices;
   GArrowArray *dictionary;
 } GArrowDictionaryArrayPrivate;
@@ -1563,10 +1518,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowDictionaryArray,
                            garrow_dictionary_array,
                            GARROW_TYPE_ARRAY)
 
-#define GARROW_DICTIONARY_ARRAY_GET_PRIVATE(obj)        \
-  static_cast<GArrowDictionaryArrayPrivate *>(          \
-    garrow_dictionary_array_get_instance_private(       \
-      GARROW_DICTIONARY_ARRAY(obj)))
+#define GARROW_DICTIONARY_ARRAY_GET_PRIVATE(obj)                                         \
+  static_cast<GArrowDictionaryArrayPrivate *>(                                           \
+    garrow_dictionary_array_get_instance_private(GARROW_DICTIONARY_ARRAY(obj)))
 
 static void
 garrow_dictionary_array_dispose(GObject *object)
@@ -1643,20 +1597,20 @@ garrow_dictionary_array_class_init(GArrowDictionaryArrayClass *klass)
   gobject_class->get_property = garrow_dictionary_array_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("indices",
-                             "The indices",
-                             "The GArrowArray for indices",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "indices",
+    "The indices",
+    "The GArrowArray for indices",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_INDICES, spec);
 
-  spec = g_param_spec_object("dictionary",
-                             "The dictionary",
-                             "The GArrowArray for dictionary",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "dictionary",
+    "The dictionary",
+    "The GArrowArray for dictionary",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_DICTIONARY, spec);
 }
 
@@ -1682,20 +1636,19 @@ garrow_dictionary_array_new(GArrowDataType *data_type,
   const auto arrow_indices = garrow_array_get_raw(indices);
   const auto arrow_dictionary = garrow_array_get_raw(dictionary);
   auto arrow_dictionary_array_result =
-    arrow::DictionaryArray::FromArrays(
-      arrow_data_type,
-      arrow_indices,
-      arrow_dictionary);
-  if (garrow::check(error,
-                    arrow_dictionary_array_result,
-                    "[dictionary-array][new]")) {
+    arrow::DictionaryArray::FromArrays(arrow_data_type, arrow_indices, arrow_dictionary);
+  if (garrow::check(error, arrow_dictionary_array_result, "[dictionary-array][new]")) {
     auto arrow_array =
       std::static_pointer_cast<arrow::Array>(*arrow_dictionary_array_result);
     auto dictionary_array = garrow_array_new_raw(&arrow_array,
-                                                 "array", &arrow_array,
-                                                 "value-data-type", data_type,
-                                                 "indices", indices,
-                                                 "dictionary", dictionary,
+                                                 "array",
+                                                 &arrow_array,
+                                                 "value-data-type",
+                                                 data_type,
+                                                 "indices",
+                                                 indices,
+                                                 "dictionary",
+                                                 dictionary,
                                                  NULL);
     return GARROW_DICTIONARY_ARRAY(dictionary_array);
   } else {
@@ -1768,8 +1721,8 @@ garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array)
   return GARROW_DICTIONARY_DATA_TYPE(data_type);
 }
 
-
-struct GArrowRunEndEncodedArrayPrivate {
+struct GArrowRunEndEncodedArrayPrivate
+{
   GArrowArray *run_ends;
   GArrowArray *values;
 };
@@ -1783,9 +1736,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowRunEndEncodedArray,
                            garrow_run_end_encoded_array,
                            GARROW_TYPE_ARRAY)
 
-#define GARROW_RUN_END_ENCODED_ARRAY_GET_PRIVATE(obj)        \
-  static_cast<GArrowRunEndEncodedArrayPrivate *>(            \
-    garrow_run_end_encoded_array_get_instance_private(       \
+#define GARROW_RUN_END_ENCODED_ARRAY_GET_PRIVATE(obj)                                    \
+  static_cast<GArrowRunEndEncodedArrayPrivate *>(                                        \
+    garrow_run_end_encoded_array_get_instance_private(                                   \
       GARROW_RUN_END_ENCODED_ARRAY(obj)))
 
 static void
@@ -1863,20 +1816,20 @@ garrow_run_end_encoded_array_class_init(GArrowRunEndEncodedArrayClass *klass)
   gobject_class->get_property = garrow_run_end_encoded_array_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("run-ends",
-                             "The run-ends",
-                             "The GArrowArray for run-ends",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "run-ends",
+    "The run-ends",
+    "The GArrowArray for run-ends",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_RUN_ENDS, spec);
 
-  spec = g_param_spec_object("values",
-                             "The values",
-                             "The GArrowArray for values",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "values",
+    "The values",
+    "The GArrowArray for values",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUES, spec);
 }
 
@@ -1906,24 +1859,26 @@ garrow_run_end_encoded_array_new(GArrowDataType *data_type,
   const auto arrow_run_ends = garrow_array_get_raw(run_ends);
   const auto arrow_values = garrow_array_get_raw(values);
   auto arrow_run_end_encoded_array_result =
-      arrow::RunEndEncodedArray::Make(
-        arrow_data_type,
-        logical_length,
-        arrow_run_ends,
-        arrow_values,
-        logical_offset);
+    arrow::RunEndEncodedArray::Make(arrow_data_type,
+                                    logical_length,
+                                    arrow_run_ends,
+                                    arrow_values,
+                                    logical_offset);
   if (garrow::check(error,
                     arrow_run_end_encoded_array_result,
                     "[run-end-encoded-array][new]")) {
     auto arrow_array =
       std::static_pointer_cast<arrow::Array>(*arrow_run_end_encoded_array_result);
-    auto run_end_encoded_array =
-      garrow_array_new_raw(&arrow_array,
-                           "array", &arrow_array,
-                           "value-data-type", data_type,
-                           "run-ends", run_ends,
-                           "values", values,
-                           NULL);
+    auto run_end_encoded_array = garrow_array_new_raw(&arrow_array,
+                                                      "array",
+                                                      &arrow_array,
+                                                      "value-data-type",
+                                                      data_type,
+                                                      "run-ends",
+                                                      run_ends,
+                                                      "values",
+                                                      values,
+                                                      NULL);
     return GARROW_RUN_END_ENCODED_ARRAY(run_end_encoded_array);
   } else {
     return nullptr;
@@ -1998,9 +1953,8 @@ garrow_run_end_encoded_array_get_values(GArrowRunEndEncodedArray *array)
  * Since: 13.0.0
  */
 GArrowArray *
-garrow_run_end_encoded_array_get_logical_run_ends(
-  GArrowRunEndEncodedArray *array,
-  GError **error)
+garrow_run_end_encoded_array_get_logical_run_ends(GArrowRunEndEncodedArray *array,
+                                                  GError **error)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_run_end_encoded_array =
@@ -2013,9 +1967,7 @@ garrow_run_end_encoded_array_get_logical_run_ends(
                     "[run-end-encoded-array][get-logical-run-ends]")) {
     auto arrow_array =
       std::static_pointer_cast<arrow::Array>(*arrow_logical_run_ends_result);
-    return garrow_array_new_raw(&arrow_array,
-                                "array", &arrow_array,
-                                NULL);
+    return garrow_array_new_raw(&arrow_array, "array", &arrow_array, NULL);
   } else {
     return nullptr;
   }
@@ -2039,10 +1991,10 @@ garrow_run_end_encoded_array_get_logical_values(GArrowRunEndEncodedArray *array)
   auto arrow_run_end_encoded_array =
     std::static_pointer_cast<arrow::RunEndEncodedArray>(arrow_array);
   auto arrow_logical_values =
-    std::static_pointer_cast<arrow::Array>(
-      arrow_run_end_encoded_array->LogicalValues());
+    std::static_pointer_cast<arrow::Array>(arrow_run_end_encoded_array->LogicalValues());
   return garrow_array_new_raw(&arrow_logical_values,
-                              "array", &arrow_logical_values,
+                              "array",
+                              &arrow_logical_values,
                               NULL);
 }
 
@@ -2057,8 +2009,7 @@ garrow_run_end_encoded_array_get_logical_values(GArrowRunEndEncodedArray *array)
  * Since: 13.0.0
  */
 gint64
-garrow_run_end_encoded_array_find_physical_offset(
-  GArrowRunEndEncodedArray *array)
+garrow_run_end_encoded_array_find_physical_offset(GArrowRunEndEncodedArray *array)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_run_end_encoded_array =
@@ -2084,8 +2035,7 @@ garrow_run_end_encoded_array_find_physical_offset(
  * Since: 13.0.0
  */
 gint64
-garrow_run_end_encoded_array_find_physical_length(
-  GArrowRunEndEncodedArray *array)
+garrow_run_end_encoded_array_find_physical_length(GArrowRunEndEncodedArray *array)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_run_end_encoded_array =
@@ -2093,5 +2043,4 @@ garrow_run_end_encoded_array_find_physical_length(
   return arrow_run_end_encoded_array->FindPhysicalLength();
 }
 
-
 G_END_DECLS
diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h
index 3637e03fc2395..c6e19f1c74e22 100644
--- a/c_glib/arrow-glib/composite-array.h
+++ b/c_glib/arrow-glib/composite-array.h
@@ -27,110 +27,102 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_LIST_ARRAY (garrow_list_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowListArray,
-                         garrow_list_array,
-                         GARROW,
-                         LIST_ARRAY,
-                         GArrowArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowListArray, garrow_list_array, GARROW, LIST_ARRAY, GArrowArray)
 struct _GArrowListArrayClass
 {
   GArrowArrayClass parent_class;
 };
 
-GArrowListArray *garrow_list_array_new(GArrowDataType *data_type,
-                                       gint64 length,
-                                       GArrowBuffer *value_offsets,
-                                       GArrowArray *values,
-                                       GArrowBuffer *null_bitmap,
-                                       gint64 n_nulls);
+GArrowListArray *
+garrow_list_array_new(GArrowDataType *data_type,
+                      gint64 length,
+                      GArrowBuffer *value_offsets,
+                      GArrowArray *values,
+                      GArrowBuffer *null_bitmap,
+                      gint64 n_nulls);
 
-GArrowDataType *garrow_list_array_get_value_type(GArrowListArray *array);
-GArrowArray *garrow_list_array_get_value(GArrowListArray *array,
-                                         gint64 i);
+GArrowDataType *
+garrow_list_array_get_value_type(GArrowListArray *array);
+GArrowArray *
+garrow_list_array_get_value(GArrowListArray *array, gint64 i);
 GARROW_AVAILABLE_IN_2_0
-GArrowArray *garrow_list_array_get_values(GArrowListArray *array);
+GArrowArray *
+garrow_list_array_get_values(GArrowListArray *array);
 GARROW_AVAILABLE_IN_2_0
-gint32 garrow_list_array_get_value_offset(GArrowListArray *array,
-                                          gint64 i);
+gint32
+garrow_list_array_get_value_offset(GArrowListArray *array, gint64 i);
 GARROW_AVAILABLE_IN_2_0
-gint32 garrow_list_array_get_value_length(GArrowListArray *array,
-                                          gint64 i);
+gint32
+garrow_list_array_get_value_length(GArrowListArray *array, gint64 i);
 GARROW_AVAILABLE_IN_2_0
 const gint32 *
-garrow_list_array_get_value_offsets(GArrowListArray *array,
-                                    gint64 *n_offsets);
-
+garrow_list_array_get_value_offsets(GArrowListArray *array, gint64 *n_offsets);
 
 #define GARROW_TYPE_LARGE_LIST_ARRAY (garrow_large_list_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowLargeListArray,
-                         garrow_large_list_array,
-                         GARROW,
-                         LARGE_LIST_ARRAY,
-                         GArrowArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowLargeListArray, garrow_large_list_array, GARROW, LARGE_LIST_ARRAY, GArrowArray)
 struct _GArrowLargeListArrayClass
 {
   GArrowArrayClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_0_16
-GArrowLargeListArray *garrow_large_list_array_new(GArrowDataType *data_type,
-                                                  gint64 length,
-                                                  GArrowBuffer *value_offsets,
-                                                  GArrowArray *values,
-                                                  GArrowBuffer *null_bitmap,
-                                                  gint64 n_nulls);
+GArrowLargeListArray *
+garrow_large_list_array_new(GArrowDataType *data_type,
+                            gint64 length,
+                            GArrowBuffer *value_offsets,
+                            GArrowArray *values,
+                            GArrowBuffer *null_bitmap,
+                            gint64 n_nulls);
 
 GARROW_AVAILABLE_IN_0_16
-GArrowDataType *garrow_large_list_array_get_value_type(GArrowLargeListArray *array);
+GArrowDataType *
+garrow_large_list_array_get_value_type(GArrowLargeListArray *array);
 GARROW_AVAILABLE_IN_0_16
-GArrowArray *garrow_large_list_array_get_value(GArrowLargeListArray *array,
-                                               gint64 i);
+GArrowArray *
+garrow_large_list_array_get_value(GArrowLargeListArray *array, gint64 i);
 GARROW_AVAILABLE_IN_2_0
-GArrowArray *garrow_large_list_array_get_values(GArrowLargeListArray *array);
+GArrowArray *
+garrow_large_list_array_get_values(GArrowLargeListArray *array);
 GARROW_AVAILABLE_IN_2_0
-gint64 garrow_large_list_array_get_value_offset(GArrowLargeListArray *array,
-                                                gint64 i);
+gint64
+garrow_large_list_array_get_value_offset(GArrowLargeListArray *array, gint64 i);
 GARROW_AVAILABLE_IN_2_0
-gint64 garrow_large_list_array_get_value_length(GArrowLargeListArray *array,
-                                                gint64 i);
+gint64
+garrow_large_list_array_get_value_length(GArrowLargeListArray *array, gint64 i);
 GARROW_AVAILABLE_IN_2_0
 const gint64 *
-garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array,
-                                          gint64 *n_offsets);
-
+garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, gint64 *n_offsets);
 
 #define GARROW_TYPE_STRUCT_ARRAY (garrow_struct_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowStructArray,
-                         garrow_struct_array,
-                         GARROW,
-                         STRUCT_ARRAY,
-                         GArrowArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowStructArray, garrow_struct_array, GARROW, STRUCT_ARRAY, GArrowArray)
 struct _GArrowStructArrayClass
 {
   GArrowArrayClass parent_class;
 };
 
-GArrowStructArray *garrow_struct_array_new(GArrowDataType *data_type,
-                                           gint64 length,
-                                           GList *fields,
-                                           GArrowBuffer *null_bitmap,
-                                           gint64 n_nulls);
+GArrowStructArray *
+garrow_struct_array_new(GArrowDataType *data_type,
+                        gint64 length,
+                        GList *fields,
+                        GArrowBuffer *null_bitmap,
+                        gint64 n_nulls);
 
-GArrowArray *garrow_struct_array_get_field(GArrowStructArray *array,
-                                           gint i);
+GArrowArray *
+garrow_struct_array_get_field(GArrowStructArray *array, gint i);
 
-GList *garrow_struct_array_get_fields(GArrowStructArray *array);
+GList *
+garrow_struct_array_get_fields(GArrowStructArray *array);
 
 GARROW_AVAILABLE_IN_0_10
-GList *garrow_struct_array_flatten(GArrowStructArray *array, GError **error);
-
+GList *
+garrow_struct_array_flatten(GArrowStructArray *array, GError **error);
 
 #define GARROW_TYPE_MAP_ARRAY (garrow_map_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowMapArray,
-                         garrow_map_array,
-                         GARROW,
-                         MAP_ARRAY,
-                         GArrowListArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowMapArray, garrow_map_array, GARROW, MAP_ARRAY, GArrowListArray)
 struct _GArrowMapArrayClass
 {
   GArrowListArrayClass parent_class;
@@ -149,13 +141,9 @@ GARROW_AVAILABLE_IN_0_17
 GArrowArray *
 garrow_map_array_get_items(GArrowMapArray *array);
 
-
 #define GARROW_TYPE_UNION_ARRAY (garrow_union_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowUnionArray,
-                         garrow_union_array,
-                         GARROW,
-                         UNION_ARRAY,
-                         GArrowArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowUnionArray, garrow_union_array, GARROW, UNION_ARRAY, GArrowArray)
 struct _GArrowUnionArrayClass
 {
   GArrowArrayClass parent_class;
@@ -163,15 +151,12 @@ struct _GArrowUnionArrayClass
 
 GARROW_AVAILABLE_IN_12_0
 gint8
-garrow_union_array_get_type_code(GArrowUnionArray *array,
-                                 gint64 i);
+garrow_union_array_get_type_code(GArrowUnionArray *array, gint64 i);
 GARROW_AVAILABLE_IN_12_0
 gint
-garrow_union_array_get_child_id(GArrowUnionArray *array,
-                                gint64 i);
+garrow_union_array_get_child_id(GArrowUnionArray *array, gint64 i);
 GArrowArray *
-garrow_union_array_get_field(GArrowUnionArray *array,
-                             gint i);
+garrow_union_array_get_field(GArrowUnionArray *array, gint i);
 
 #define GARROW_TYPE_SPARSE_UNION_ARRAY (garrow_sparse_union_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArray,
@@ -185,16 +170,13 @@ struct _GArrowSparseUnionArrayClass
 };
 
 GArrowSparseUnionArray *
-garrow_sparse_union_array_new(GArrowInt8Array *type_ids,
-                              GList *fields,
-                              GError **error);
+garrow_sparse_union_array_new(GArrowInt8Array *type_ids, GList *fields, GError **error);
 GArrowSparseUnionArray *
 garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type,
                                         GArrowInt8Array *type_ids,
                                         GList *fields,
                                         GError **error);
 
-
 #define GARROW_TYPE_DENSE_UNION_ARRAY (garrow_dense_union_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionArray,
                          garrow_dense_union_array,
@@ -219,16 +201,11 @@ garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type,
                                        GError **error);
 GARROW_AVAILABLE_IN_12_0
 gint32
-garrow_dense_union_array_get_value_offset(GArrowDenseUnionArray *array,
-                                          gint64 i);
-
+garrow_dense_union_array_get_value_offset(GArrowDenseUnionArray *array, gint64 i);
 
 #define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryArray,
-                         garrow_dictionary_array,
-                         GARROW,
-                         DICTIONARY_ARRAY,
-                         GArrowArray)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowDictionaryArray, garrow_dictionary_array, GARROW, DICTIONARY_ARRAY, GArrowArray)
 struct _GArrowDictionaryArrayClass
 {
   GArrowArrayClass parent_class;
@@ -249,9 +226,7 @@ GArrowDictionaryDataType *
 garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array);
 #endif
 
-
-#define GARROW_TYPE_RUN_END_ENCODED_ARRAY       \
-  (garrow_run_end_encoded_array_get_type())
+#define GARROW_TYPE_RUN_END_ENCODED_ARRAY (garrow_run_end_encoded_array_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowRunEndEncodedArray,
                          garrow_run_end_encoded_array,
                          GARROW,
@@ -278,21 +253,16 @@ GArrowArray *
 garrow_run_end_encoded_array_get_values(GArrowRunEndEncodedArray *array);
 GARROW_AVAILABLE_IN_13_0
 GArrowArray *
-garrow_run_end_encoded_array_get_logical_run_ends(
-  GArrowRunEndEncodedArray *array,
-  GError **error);
+garrow_run_end_encoded_array_get_logical_run_ends(GArrowRunEndEncodedArray *array,
+                                                  GError **error);
 GARROW_AVAILABLE_IN_13_0
 GArrowArray *
 garrow_run_end_encoded_array_get_logical_values(GArrowRunEndEncodedArray *array);
 GARROW_AVAILABLE_IN_13_0
 gint64
-garrow_run_end_encoded_array_find_physical_offset(
-  GArrowRunEndEncodedArray *array);
+garrow_run_end_encoded_array_find_physical_offset(GArrowRunEndEncodedArray *array);
 GARROW_AVAILABLE_IN_13_0
 gint64
-garrow_run_end_encoded_array_find_physical_length(
-  GArrowRunEndEncodedArray *array);
-
-
+garrow_run_end_encoded_array_find_physical_length(GArrowRunEndEncodedArray *array);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/composite-data-type.cpp b/c_glib/arrow-glib/composite-data-type.cpp
index f3dc7938d506f..914bb3196fa3a 100644
--- a/c_glib/arrow-glib/composite-data-type.cpp
+++ b/c_glib/arrow-glib/composite-data-type.cpp
@@ -51,9 +51,7 @@ G_BEGIN_DECLS
  * #GArrowRunEndEncodedDataType is a class for run end encoded data type.
  */
 
-G_DEFINE_TYPE(GArrowListDataType,
-              garrow_list_data_type,
-              GARROW_TYPE_DATA_TYPE)
+G_DEFINE_TYPE(GArrowListDataType, garrow_list_data_type, GARROW_TYPE_DATA_TYPE)
 
 static void
 garrow_list_data_type_init(GArrowListDataType *object)
@@ -75,13 +73,10 @@ GArrowListDataType *
 garrow_list_data_type_new(GArrowField *field)
 {
   auto arrow_field = garrow_field_get_raw(field);
-  auto arrow_data_type =
-    std::make_shared<arrow::ListType>(arrow_field);
+  auto arrow_data_type = std::make_shared<arrow::ListType>(arrow_field);
 
-  GArrowListDataType *data_type =
-    GARROW_LIST_DATA_TYPE(g_object_new(GARROW_TYPE_LIST_DATA_TYPE,
-                                       "data-type", &arrow_data_type,
-                                       NULL));
+  GArrowListDataType *data_type = GARROW_LIST_DATA_TYPE(
+    g_object_new(GARROW_TYPE_LIST_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
@@ -113,17 +108,13 @@ garrow_list_data_type_get_field(GArrowListDataType *list_data_type)
 {
   auto data_type = GARROW_DATA_TYPE(list_data_type);
   auto arrow_data_type = garrow_data_type_get_raw(data_type);
-  auto arrow_list_data_type =
-    static_cast<arrow::ListType *>(arrow_data_type.get());
+  auto arrow_list_data_type = static_cast<arrow::ListType *>(arrow_data_type.get());
 
   auto arrow_field = arrow_list_data_type->value_field();
   return garrow_field_new_raw(&arrow_field, nullptr);
 }
 
-
-G_DEFINE_TYPE(GArrowLargeListDataType,
-              garrow_large_list_data_type,
-              GARROW_TYPE_DATA_TYPE)
+G_DEFINE_TYPE(GArrowLargeListDataType, garrow_large_list_data_type, GARROW_TYPE_DATA_TYPE)
 
 static void
 garrow_large_list_data_type_init(GArrowLargeListDataType *object)
@@ -147,13 +138,10 @@ GArrowLargeListDataType *
 garrow_large_list_data_type_new(GArrowField *field)
 {
   auto arrow_field = garrow_field_get_raw(field);
-  auto arrow_data_type =
-    std::make_shared<arrow::LargeListType>(arrow_field);
+  auto arrow_data_type = std::make_shared<arrow::LargeListType>(arrow_field);
 
-  GArrowLargeListDataType *data_type =
-    GARROW_LARGE_LIST_DATA_TYPE(g_object_new(GARROW_TYPE_LARGE_LIST_DATA_TYPE,
-                                             "data-type", &arrow_data_type,
-                                             NULL));
+  GArrowLargeListDataType *data_type = GARROW_LARGE_LIST_DATA_TYPE(
+    g_object_new(GARROW_TYPE_LARGE_LIST_DATA_TYPE, "data-type", &arrow_data_type, NULL));
   return data_type;
 }
 
@@ -177,10 +165,7 @@ garrow_large_list_data_type_get_field(GArrowLargeListDataType *large_list_data_t
   return garrow_field_new_raw(&arrow_field, nullptr);
 }
 
-
-G_DEFINE_TYPE(GArrowStructDataType,
-              garrow_struct_data_type,
-              GARROW_TYPE_DATA_TYPE)
+G_DEFINE_TYPE(GArrowStructDataType, garrow_struct_data_type, GARROW_TYPE_DATA_TYPE)
 
 static void
 garrow_struct_data_type_init(GArrowStructDataType *object)
@@ -209,9 +194,8 @@ garrow_struct_data_type_new(GList *fields)
   }
 
   auto arrow_data_type = std::make_shared<arrow::StructType>(arrow_fields);
-  auto data_type = g_object_new(GARROW_TYPE_STRUCT_DATA_TYPE,
-                                "data-type", &arrow_data_type,
-                                NULL);
+  auto data_type =
+    g_object_new(GARROW_TYPE_STRUCT_DATA_TYPE, "data-type", &arrow_data_type, NULL);
   return GARROW_STRUCT_DATA_TYPE(data_type);
 }
 
@@ -264,8 +248,7 @@ garrow_struct_data_type_get_fields(GArrowStructDataType *struct_data_type)
  * Since: 0.12.0
  */
 GArrowField *
-garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type,
-                                  gint i)
+garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type, gint i)
 {
   auto data_type = GARROW_DATA_TYPE(struct_data_type);
   auto arrow_data_type = garrow_data_type_get_raw(data_type);
@@ -336,10 +319,7 @@ garrow_struct_data_type_get_field_index(GArrowStructDataType *struct_data_type,
   return arrow_struct_data_type->GetFieldIndex(name);
 }
 
-
-G_DEFINE_TYPE(GArrowMapDataType,
-              garrow_map_data_type,
-              GARROW_TYPE_LIST_DATA_TYPE)
+G_DEFINE_TYPE(GArrowMapDataType, garrow_map_data_type, GARROW_TYPE_LIST_DATA_TYPE)
 
 static void
 garrow_map_data_type_init(GArrowMapDataType *object)
@@ -361,16 +341,14 @@ garrow_map_data_type_class_init(GArrowMapDataTypeClass *klass)
  * Since: 0.17.0
  */
 GArrowMapDataType *
-garrow_map_data_type_new(GArrowDataType *key_type,
-                         GArrowDataType *item_type)
+garrow_map_data_type_new(GArrowDataType *key_type, GArrowDataType *item_type)
 {
   auto arrow_key_type = garrow_data_type_get_raw(key_type);
   auto arrow_item_type = garrow_data_type_get_raw(item_type);
-  auto arrow_data_type = std::make_shared<arrow::MapType>(arrow_key_type,
-                                                          arrow_item_type);
-  auto data_type = g_object_new(GARROW_TYPE_MAP_DATA_TYPE,
-                                "data-type", &arrow_data_type,
-                                NULL);
+  auto arrow_data_type =
+    std::make_shared<arrow::MapType>(arrow_key_type, arrow_item_type);
+  auto data_type =
+    g_object_new(GARROW_TYPE_MAP_DATA_TYPE, "data-type", &arrow_data_type, NULL);
   return GARROW_MAP_DATA_TYPE(data_type);
 }
 
@@ -387,8 +365,7 @@ garrow_map_data_type_get_key_type(GArrowMapDataType *map_data_type)
 {
   auto data_type = GARROW_DATA_TYPE(map_data_type);
   auto arrow_data_type = garrow_data_type_get_raw(data_type);
-  auto arrow_map_data_type =
-    std::static_pointer_cast<arrow::MapType>(arrow_data_type);
+  auto arrow_map_data_type = std::static_pointer_cast<arrow::MapType>(arrow_data_type);
   auto arrow_key_type = arrow_map_data_type->key_type();
   return garrow_data_type_new_raw(&arrow_key_type);
 }
@@ -406,16 +383,12 @@ garrow_map_data_type_get_item_type(GArrowMapDataType *map_data_type)
 {
   auto data_type = GARROW_DATA_TYPE(map_data_type);
   auto arrow_data_type = garrow_data_type_get_raw(data_type);
-  auto arrow_map_data_type =
-    std::static_pointer_cast<arrow::MapType>(arrow_data_type);
+  auto arrow_map_data_type = std::static_pointer_cast<arrow::MapType>(arrow_data_type);
   auto arrow_item_type = arrow_map_data_type->item_type();
   return garrow_data_type_new_raw(&arrow_item_type);
 }
 
-
-G_DEFINE_ABSTRACT_TYPE(GArrowUnionDataType,
-                       garrow_union_data_type,
-                       GARROW_TYPE_DATA_TYPE)
+G_DEFINE_ABSTRACT_TYPE(GArrowUnionDataType, garrow_union_data_type, GARROW_TYPE_DATA_TYPE)
 
 static void
 garrow_union_data_type_init(GArrowUnionDataType *object)
@@ -476,8 +449,7 @@ garrow_union_data_type_get_fields(GArrowUnionDataType *union_data_type)
  * Since: 0.12.0
  */
 GArrowField *
-garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type,
-                                 gint i)
+garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type, gint i)
 {
   auto data_type = GARROW_DATA_TYPE(union_data_type);
   auto arrow_data_type = garrow_data_type_get_raw(data_type);
@@ -530,7 +502,6 @@ garrow_union_data_type_get_type_codes(GArrowUnionDataType *union_data_type,
   return type_codes;
 }
 
-
 G_DEFINE_TYPE(GArrowSparseUnionDataType,
               garrow_sparse_union_data_type,
               GARROW_TYPE_UNION_DATA_TYPE)
@@ -554,9 +525,7 @@ garrow_sparse_union_data_type_class_init(GArrowSparseUnionDataTypeClass *klass)
  * Returns: The newly created sparse union data type.
  */
 GArrowSparseUnionDataType *
-garrow_sparse_union_data_type_new(GList *fields,
-                                  gint8 *type_codes,
-                                  gsize n_type_codes)
+garrow_sparse_union_data_type_new(GList *fields, gint8 *type_codes, gsize n_type_codes)
 {
   std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
   for (auto node = fields; node; node = g_list_next(node)) {
@@ -571,15 +540,12 @@ garrow_sparse_union_data_type_new(GList *fields,
   }
 
   auto arrow_data_type =
-    std::make_shared<arrow::SparseUnionType>(arrow_fields,
-                                             arrow_type_codes);
-  auto data_type = g_object_new(GARROW_TYPE_SPARSE_UNION_DATA_TYPE,
-                                "data-type", &arrow_data_type,
-                                NULL);
+    std::make_shared<arrow::SparseUnionType>(arrow_fields, arrow_type_codes);
+  auto data_type =
+    g_object_new(GARROW_TYPE_SPARSE_UNION_DATA_TYPE, "data-type", &arrow_data_type, NULL);
   return GARROW_SPARSE_UNION_DATA_TYPE(data_type);
 }
 
-
 G_DEFINE_TYPE(GArrowDenseUnionDataType,
               garrow_dense_union_data_type,
               GARROW_TYPE_UNION_DATA_TYPE)
@@ -603,9 +569,7 @@ garrow_dense_union_data_type_class_init(GArrowDenseUnionDataTypeClass *klass)
  * Returns: The newly created dense union data type.
  */
 GArrowDenseUnionDataType *
-garrow_dense_union_data_type_new(GList *fields,
-                                 gint8 *type_codes,
-                                 gsize n_type_codes)
+garrow_dense_union_data_type_new(GList *fields, gint8 *type_codes, gsize n_type_codes)
 {
   std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
   for (auto node = fields; node; node = g_list_next(node)) {
@@ -620,15 +584,12 @@ garrow_dense_union_data_type_new(GList *fields,
   }
 
   auto arrow_data_type =
-    std::make_shared<arrow::DenseUnionType>(arrow_fields,
-                                            arrow_type_codes);
-  auto data_type = g_object_new(GARROW_TYPE_DENSE_UNION_DATA_TYPE,
-                                "data-type", &arrow_data_type,
-                                NULL);
+    std::make_shared<arrow::DenseUnionType>(arrow_fields, arrow_type_codes);
+  auto data_type =
+    g_object_new(GARROW_TYPE_DENSE_UNION_DATA_TYPE, "data-type", &arrow_data_type, NULL);
   return GARROW_DENSE_UNION_DATA_TYPE(data_type);
 }
 
-
 G_DEFINE_TYPE(GArrowDictionaryDataType,
               garrow_dictionary_data_type,
               GARROW_TYPE_FIXED_WIDTH_DATA_TYPE)
@@ -660,9 +621,8 @@ garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
 {
   auto arrow_index_data_type = garrow_data_type_get_raw(index_data_type);
   auto arrow_value_data_type = garrow_data_type_get_raw(value_data_type);
-  auto arrow_data_type = arrow::dictionary(arrow_index_data_type,
-                                           arrow_value_data_type,
-                                           ordered);
+  auto arrow_data_type =
+    arrow::dictionary(arrow_index_data_type, arrow_value_data_type, ordered);
   return GARROW_DICTIONARY_DATA_TYPE(garrow_data_type_new_raw(&arrow_data_type));
 }
 
@@ -675,7 +635,8 @@ garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
  * Since: 0.8.0
  */
 GArrowDataType *
-garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *dictionary_data_type)
+garrow_dictionary_data_type_get_index_data_type(
+  GArrowDictionaryDataType *dictionary_data_type)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(dictionary_data_type));
   auto arrow_dictionary_data_type =
@@ -693,7 +654,8 @@ garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *dictio
  * Since: 0.14.0
  */
 GArrowDataType *
-garrow_dictionary_data_type_get_value_data_type(GArrowDictionaryDataType *dictionary_data_type)
+garrow_dictionary_data_type_get_value_data_type(
+  GArrowDictionaryDataType *dictionary_data_type)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(dictionary_data_type));
   auto arrow_dictionary_data_type =
@@ -719,7 +681,6 @@ garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *dictionary_data
   return arrow_dictionary_data_type->ordered();
 }
 
-
 G_DEFINE_TYPE(GArrowRunEndEncodedDataType,
               garrow_run_end_encoded_data_type,
               GARROW_TYPE_FIXED_WIDTH_DATA_TYPE)
@@ -730,8 +691,7 @@ garrow_run_end_encoded_data_type_init(GArrowRunEndEncodedDataType *object)
 }
 
 static void
-garrow_run_end_encoded_data_type_class_init(
-  GArrowRunEndEncodedDataTypeClass *klass)
+garrow_run_end_encoded_data_type_class_init(GArrowRunEndEncodedDataTypeClass *klass)
 {
 }
 
@@ -750,10 +710,9 @@ garrow_run_end_encoded_data_type_new(GArrowDataType *run_end_data_type,
 {
   auto arrow_run_end_data_type = garrow_data_type_get_raw(run_end_data_type);
   auto arrow_value_data_type = garrow_data_type_get_raw(value_data_type);
-  auto arrow_data_type = arrow::run_end_encoded(arrow_run_end_data_type,
-                                                arrow_value_data_type);
-  return GARROW_RUN_END_ENCODED_DATA_TYPE(
-    garrow_data_type_new_raw(&arrow_data_type));
+  auto arrow_data_type =
+    arrow::run_end_encoded(arrow_run_end_data_type, arrow_value_data_type);
+  return GARROW_RUN_END_ENCODED_DATA_TYPE(garrow_data_type_new_raw(&arrow_data_type));
 }
 
 /**
@@ -794,5 +753,4 @@ garrow_run_end_encoded_data_type_get_value_data_type(
   return garrow_data_type_new_raw(&arrow_value_data_type);
 }
 
-
 G_END_DECLS
diff --git a/c_glib/arrow-glib/composite-data-type.h b/c_glib/arrow-glib/composite-data-type.h
index 0113b556e8e82..e71d277a305c6 100644
--- a/c_glib/arrow-glib/composite-data-type.h
+++ b/c_glib/arrow-glib/composite-data-type.h
@@ -27,24 +27,23 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_LIST_DATA_TYPE (garrow_list_data_type_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowListDataType,
-                         garrow_list_data_type,
-                         GARROW,
-                         LIST_DATA_TYPE,
-                         GArrowDataType)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowListDataType, garrow_list_data_type, GARROW, LIST_DATA_TYPE, GArrowDataType)
 struct _GArrowListDataTypeClass
 {
   GArrowDataTypeClass parent_class;
 };
 
-GArrowListDataType *garrow_list_data_type_new      (GArrowField *field);
+GArrowListDataType *
+garrow_list_data_type_new(GArrowField *field);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_13_FOR(garrow_list_data_type_get_field)
-GArrowField *garrow_list_data_type_get_value_field (GArrowListDataType *list_data_type);
+GArrowField *
+garrow_list_data_type_get_value_field(GArrowListDataType *list_data_type);
 #endif
 GARROW_AVAILABLE_IN_0_13
-GArrowField *garrow_list_data_type_get_field (GArrowListDataType *list_data_type);
-
+GArrowField *
+garrow_list_data_type_get_field(GArrowListDataType *list_data_type);
 
 #define GARROW_TYPE_LARGE_LIST_DATA_TYPE (garrow_large_list_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLargeListDataType,
@@ -58,30 +57,28 @@ struct _GArrowLargeListDataTypeClass
 };
 
 GARROW_AVAILABLE_IN_0_16
-GArrowLargeListDataType *garrow_large_list_data_type_new(GArrowField *field);
+GArrowLargeListDataType *
+garrow_large_list_data_type_new(GArrowField *field);
 GARROW_AVAILABLE_IN_0_16
-GArrowField *garrow_large_list_data_type_get_field(GArrowLargeListDataType *large_list_data_type);
-
+GArrowField *
+garrow_large_list_data_type_get_field(GArrowLargeListDataType *large_list_data_type);
 
 #define GARROW_TYPE_STRUCT_DATA_TYPE (garrow_struct_data_type_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowStructDataType,
-                         garrow_struct_data_type,
-                         GARROW,
-                         STRUCT_DATA_TYPE,
-                         GArrowDataType)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowStructDataType, garrow_struct_data_type, GARROW, STRUCT_DATA_TYPE, GArrowDataType)
 struct _GArrowStructDataTypeClass
 {
   GArrowDataTypeClass parent_class;
 };
 
-GArrowStructDataType *garrow_struct_data_type_new      (GList *fields);
+GArrowStructDataType *
+garrow_struct_data_type_new(GList *fields);
 gint
 garrow_struct_data_type_get_n_fields(GArrowStructDataType *struct_data_type);
 GList *
 garrow_struct_data_type_get_fields(GArrowStructDataType *struct_data_type);
 GArrowField *
-garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type,
-                                  gint i);
+garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type, gint i);
 GArrowField *
 garrow_struct_data_type_get_field_by_name(GArrowStructDataType *struct_data_type,
                                           const gchar *name);
@@ -89,13 +86,9 @@ gint
 garrow_struct_data_type_get_field_index(GArrowStructDataType *struct_data_type,
                                         const gchar *name);
 
-
 #define GARROW_TYPE_MAP_DATA_TYPE (garrow_map_data_type_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowMapDataType,
-                         garrow_map_data_type,
-                         GARROW,
-                         MAP_DATA_TYPE,
-                         GArrowListDataType)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowMapDataType, garrow_map_data_type, GARROW, MAP_DATA_TYPE, GArrowListDataType)
 struct _GArrowMapDataTypeClass
 {
   GArrowListDataTypeClass parent_class;
@@ -103,8 +96,7 @@ struct _GArrowMapDataTypeClass
 
 GARROW_AVAILABLE_IN_0_17
 GArrowMapDataType *
-garrow_map_data_type_new(GArrowDataType *key_type,
-                         GArrowDataType *item_type);
+garrow_map_data_type_new(GArrowDataType *key_type, GArrowDataType *item_type);
 GARROW_AVAILABLE_IN_0_17
 GArrowDataType *
 garrow_map_data_type_get_key_type(GArrowMapDataType *map_data_type);
@@ -112,13 +104,9 @@ GARROW_AVAILABLE_IN_0_17
 GArrowDataType *
 garrow_map_data_type_get_item_type(GArrowMapDataType *map_data_type);
 
-
 #define GARROW_TYPE_UNION_DATA_TYPE (garrow_union_data_type_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowUnionDataType,
-                         garrow_union_data_type,
-                         GARROW,
-                         UNION_DATA_TYPE,
-                         GArrowDataType)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowUnionDataType, garrow_union_data_type, GARROW, UNION_DATA_TYPE, GArrowDataType)
 struct _GArrowUnionDataTypeClass
 {
   GArrowDataTypeClass parent_class;
@@ -129,15 +117,12 @@ garrow_union_data_type_get_n_fields(GArrowUnionDataType *union_data_type);
 GList *
 garrow_union_data_type_get_fields(GArrowUnionDataType *union_data_type);
 GArrowField *
-garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type,
-                                 gint i);
+garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type, gint i);
 gint8 *
 garrow_union_data_type_get_type_codes(GArrowUnionDataType *union_data_type,
                                       gsize *n_type_codes);
 
-
-#define GARROW_TYPE_SPARSE_UNION_DATA_TYPE      \
-  (garrow_sparse_union_data_type_get_type())
+#define GARROW_TYPE_SPARSE_UNION_DATA_TYPE (garrow_sparse_union_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionDataType,
                          garrow_sparse_union_data_type,
                          GARROW,
@@ -149,13 +134,9 @@ struct _GArrowSparseUnionDataTypeClass
 };
 
 GArrowSparseUnionDataType *
-garrow_sparse_union_data_type_new(GList *fields,
-                                  gint8 *type_codes,
-                                  gsize n_type_codes);
+garrow_sparse_union_data_type_new(GList *fields, gint8 *type_codes, gsize n_type_codes);
 
-
-#define GARROW_TYPE_DENSE_UNION_DATA_TYPE       \
-  (garrow_dense_union_data_type_get_type())
+#define GARROW_TYPE_DENSE_UNION_DATA_TYPE (garrow_dense_union_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionDataType,
                          garrow_dense_union_data_type,
                          GARROW,
@@ -167,10 +148,7 @@ struct _GArrowDenseUnionDataTypeClass
 };
 
 GArrowDenseUnionDataType *
-garrow_dense_union_data_type_new(GList *fields,
-                                 gint8 *type_codes,
-                                 gsize n_type_codes);
-
+garrow_dense_union_data_type_new(GList *fields, gint8 *type_codes, gsize n_type_codes);
 
 #define GARROW_TYPE_DICTIONARY_DATA_TYPE (garrow_dictionary_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryDataType,
@@ -188,15 +166,16 @@ garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
                                 GArrowDataType *value_data_type,
                                 gboolean ordered);
 GArrowDataType *
-garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *dictionary_data_type);
+garrow_dictionary_data_type_get_index_data_type(
+  GArrowDictionaryDataType *dictionary_data_type);
 GARROW_AVAILABLE_IN_0_14
 GArrowDataType *
-garrow_dictionary_data_type_get_value_data_type(GArrowDictionaryDataType *dictionary_data_type);
+garrow_dictionary_data_type_get_value_data_type(
+  GArrowDictionaryDataType *dictionary_data_type);
 gboolean
 garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *dictionary_data_type);
 
-
-#define GARROW_TYPE_RUN_END_ENCODED_DATA_TYPE   \
+#define GARROW_TYPE_RUN_END_ENCODED_DATA_TYPE                                            \
   (garrow_run_end_encoded_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowRunEndEncodedDataType,
                          garrow_run_end_encoded_data_type,
@@ -221,5 +200,4 @@ GArrowDataType *
 garrow_run_end_encoded_data_type_get_value_data_type(
   GArrowRunEndEncodedDataType *data_type);
 
-
 G_END_DECLS
diff --git a/c_glib/arrow-glib/compute-definition.h b/c_glib/arrow-glib/compute-definition.h
index e27bc4bd5c5be..40b4e8ec9ae63 100644
--- a/c_glib/arrow-glib/compute-definition.h
+++ b/c_glib/arrow-glib/compute-definition.h
@@ -24,38 +24,26 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_FUNCTION_OPTIONS (garrow_function_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowFunctionOptions,
-                         garrow_function_options,
-                         GARROW,
-                         FUNCTION_OPTIONS,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowFunctionOptions, garrow_function_options, GARROW, FUNCTION_OPTIONS, GObject)
 struct _GArrowFunctionOptionsClass
 {
   GObjectClass parent_class;
 };
 
-
 #define GARROW_TYPE_CAST_OPTIONS (garrow_cast_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowCastOptions,
-                         garrow_cast_options,
-                         GARROW,
-                         CAST_OPTIONS,
-                         GArrowFunctionOptions)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowCastOptions, garrow_cast_options, GARROW, CAST_OPTIONS, GArrowFunctionOptions)
 struct _GArrowCastOptionsClass
 {
   GArrowFunctionOptionsClass parent_class;
 };
 
 #define GARROW_TYPE_EXPRESSION (garrow_expression_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowExpression,
-                         garrow_expression,
-                         GARROW,
-                         EXPRESSION,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowExpression, garrow_expression, GARROW, EXPRESSION, GObject)
 struct _GArrowExpressionClass
 {
   GObjectClass parent_class;
 };
 
-
 G_END_DECLS
diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 1415d953a584d..9b9faeb44952d 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -72,12 +72,9 @@ garrow_take(arrow::Datum arrow_values,
   arrow::Result<arrow::Datum> arrow_taken_datum;
   if (options) {
     auto arrow_options = garrow_take_options_get_raw(options);
-    arrow_taken_datum = arrow::compute::Take(arrow_values,
-                                             arrow_indices,
-                                             *arrow_options);
+    arrow_taken_datum = arrow::compute::Take(arrow_values, arrow_indices, *arrow_options);
   } else {
-    arrow_taken_datum = arrow::compute::Take(arrow_values,
-                                             arrow_indices);
+    arrow_taken_datum = arrow::compute::Take(arrow_values, arrow_indices);
   }
   if (garrow::check(error, arrow_taken_datum, tag)) {
     return garrow_type_new_raw(*arrow_taken_datum);
@@ -117,9 +114,8 @@ namespace {
   }
 
   gboolean
-  garrow_raw_sort_keys_equal(
-    std::vector<arrow::compute::SortKey> &arrow_sort_keys,
-    std::vector<arrow::compute::SortKey> &arrow_other_sort_keys)
+  garrow_raw_sort_keys_equal(std::vector<arrow::compute::SortKey> &arrow_sort_keys,
+                             std::vector<arrow::compute::SortKey> &arrow_other_sort_keys)
   {
     if (arrow_sort_keys.size() != arrow_other_sort_keys.size()) {
       return FALSE;
@@ -154,7 +150,7 @@ namespace {
     auto arrow_sort_key = garrow_sort_key_get_raw(sort_key);
     arrow_sort_keys.push_back(*arrow_sort_key);
   }
-}
+} // namespace
 
 G_BEGIN_DECLS
 
@@ -254,18 +250,16 @@ G_BEGIN_DECLS
  * There are many functions to compute data on an array.
  */
 
-typedef struct GArrowExecuteContextPrivate_ {
+typedef struct GArrowExecuteContextPrivate_
+{
   arrow::compute::ExecContext context;
 } GArrowExecuteContextPrivate;
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowExecuteContext,
-                           garrow_execute_context,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowExecuteContext, garrow_execute_context, G_TYPE_OBJECT)
 
-#define GARROW_EXECUTE_CONTEXT_GET_PRIVATE(object) \
-  static_cast<GArrowExecuteContextPrivate *>(      \
-    garrow_execute_context_get_instance_private(   \
-      GARROW_EXECUTE_CONTEXT(object)))
+#define GARROW_EXECUTE_CONTEXT_GET_PRIVATE(object)                                       \
+  static_cast<GArrowExecuteContextPrivate *>(                                            \
+    garrow_execute_context_get_instance_private(GARROW_EXECUTE_CONTEXT(object)))
 
 static void
 garrow_execute_context_finalize(GObject *object)
@@ -279,8 +273,7 @@ static void
 garrow_execute_context_init(GArrowExecuteContext *object)
 {
   auto priv = GARROW_EXECUTE_CONTEXT_GET_PRIVATE(object);
-  new(&priv->context) arrow::compute::ExecContext(arrow::default_memory_pool(),
-                                                  nullptr);
+  new (&priv->context) arrow::compute::ExecContext(arrow::default_memory_pool(), nullptr);
 }
 
 static void
@@ -305,19 +298,16 @@ garrow_execute_context_new(void)
   return GARROW_EXECUTE_CONTEXT(execute_context);
 }
 
-
-typedef struct GArrowFunctionOptionsPrivate_ {
+typedef struct GArrowFunctionOptionsPrivate_
+{
   arrow::compute::FunctionOptions *options;
 } GArrowFunctionOptionsPrivate;
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowFunctionOptions,
-                           garrow_function_options,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowFunctionOptions, garrow_function_options, G_TYPE_OBJECT)
 
-#define GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object) \
-  static_cast<GArrowFunctionOptionsPrivate *>(      \
-    garrow_function_options_get_instance_private(   \
-      GARROW_FUNCTION_OPTIONS(object)))
+#define GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object)                                      \
+  static_cast<GArrowFunctionOptionsPrivate *>(                                           \
+    garrow_function_options_get_instance_private(GARROW_FUNCTION_OPTIONS(object)))
 
 static void
 garrow_function_options_finalize(GObject *object)
@@ -379,8 +369,8 @@ garrow_function_options_to_string(GArrowFunctionOptions *options)
   return g_strndup(string.data(), string.size());
 }
 
-
-typedef struct GArrowFunctionDocPrivate_ {
+typedef struct GArrowFunctionDocPrivate_
+{
   arrow::compute::FunctionDoc *doc;
 } GArrowFunctionDocPrivate;
 
@@ -388,14 +378,11 @@ enum {
   PROP_DOC = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowFunctionDoc,
-                           garrow_function_doc,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowFunctionDoc, garrow_function_doc, G_TYPE_OBJECT)
 
-#define GARROW_FUNCTION_DOC_GET_PRIVATE(object) \
-  static_cast<GArrowFunctionDocPrivate *>(      \
-    garrow_function_doc_get_instance_private(   \
-      GARROW_FUNCTION_DOC(object)))
+#define GARROW_FUNCTION_DOC_GET_PRIVATE(object)                                          \
+  static_cast<GArrowFunctionDocPrivate *>(                                               \
+    garrow_function_doc_get_instance_private(GARROW_FUNCTION_DOC(object)))
 
 static void
 garrow_function_doc_set_property(GObject *object,
@@ -407,8 +394,7 @@ garrow_function_doc_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_DOC:
-    priv->doc =
-      static_cast<arrow::compute::FunctionDoc *>(g_value_get_pointer(value));
+    priv->doc = static_cast<arrow::compute::FunctionDoc *>(g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -428,11 +414,11 @@ garrow_function_doc_class_init(GArrowFunctionDocClass *klass)
   gobject_class->set_property = garrow_function_doc_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("doc",
-                              "Doc",
-                              "The raw arrow::compute::FunctionDoc *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "doc",
+    "Doc",
+    "The raw arrow::compute::FunctionDoc *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_DOC, spec);
 }
 
@@ -450,8 +436,7 @@ gchar *
 garrow_function_doc_get_summary(GArrowFunctionDoc *doc)
 {
   auto arrow_doc = garrow_function_doc_get_raw(doc);
-  return g_strndup(arrow_doc->summary.data(),
-                   arrow_doc->summary.size());
+  return g_strndup(arrow_doc->summary.data(), arrow_doc->summary.size());
 }
 
 /**
@@ -469,8 +454,7 @@ gchar *
 garrow_function_doc_get_description(GArrowFunctionDoc *doc)
 {
   auto arrow_doc = garrow_function_doc_get_raw(doc);
-  return g_strndup(arrow_doc->description.data(),
-                   arrow_doc->description.size());
+  return g_strndup(arrow_doc->description.data(), arrow_doc->description.size());
 }
 
 /**
@@ -493,8 +477,7 @@ garrow_function_doc_get_arg_names(GArrowFunctionDoc *doc)
   auto n = arrow_arg_names.size();
   auto arg_names = g_new(gchar *, n + 1);
   for (size_t i = 0; i < n; ++i) {
-    arg_names[i] = g_strndup(arrow_arg_names[i].data(),
-                             arrow_arg_names[i].size());
+    arg_names[i] = g_strndup(arrow_arg_names[i].data(), arrow_arg_names[i].size());
   }
   arg_names[n] = NULL;
   return arg_names;
@@ -514,12 +497,11 @@ gchar *
 garrow_function_doc_get_options_class_name(GArrowFunctionDoc *doc)
 {
   auto arrow_doc = garrow_function_doc_get_raw(doc);
-  return g_strndup(arrow_doc->options_class.data(),
-                   arrow_doc->options_class.size());
+  return g_strndup(arrow_doc->options_class.data(), arrow_doc->options_class.size());
 }
 
-
-typedef struct GArrowFunctionPrivate_ {
+typedef struct GArrowFunctionPrivate_
+{
   std::shared_ptr<arrow::compute::Function> function;
   gchar *name;
 } GArrowFunctionPrivate;
@@ -528,14 +510,11 @@ enum {
   PROP_FUNCTION = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowFunction,
-                           garrow_function,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowFunction, garrow_function, G_TYPE_OBJECT)
 
-#define GARROW_FUNCTION_GET_PRIVATE(object)        \
-  static_cast<GArrowFunctionPrivate *>(            \
-    garrow_function_get_instance_private(          \
-      GARROW_FUNCTION(object)))
+#define GARROW_FUNCTION_GET_PRIVATE(object)                                              \
+  static_cast<GArrowFunctionPrivate *>(                                                  \
+    garrow_function_get_instance_private(GARROW_FUNCTION(object)))
 
 static void
 garrow_function_finalize(GObject *object)
@@ -558,9 +537,8 @@ garrow_function_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_FUNCTION:
-    priv->function =
-      *static_cast<std::shared_ptr<arrow::compute::Function> *>(
-        g_value_get_pointer(value));
+    priv->function = *static_cast<std::shared_ptr<arrow::compute::Function> *>(
+      g_value_get_pointer(value));
     {
       const auto &arrow_name = priv->function->name();
       priv->name = g_strndup(arrow_name.data(), arrow_name.length());
@@ -576,7 +554,7 @@ static void
 garrow_function_init(GArrowFunction *object)
 {
   auto priv = GARROW_FUNCTION_GET_PRIVATE(object);
-  new(&priv->function) std::shared_ptr<arrow::compute::Function>;
+  new (&priv->function) std::shared_ptr<arrow::compute::Function>;
 }
 
 static void
@@ -584,15 +562,15 @@ garrow_function_class_init(GArrowFunctionClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_function_finalize;
+  gobject_class->finalize = garrow_function_finalize;
   gobject_class->set_property = garrow_function_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("function",
-                              "Function",
-                              "The raw std::shared<arrow::compute::Function> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "function",
+    "Function",
+    "The raw std::shared<arrow::compute::Function> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FUNCTION, spec);
 }
 
@@ -639,8 +617,7 @@ garrow_function_all(void)
       continue;
     }
     auto arrow_function = *arrow_function_result;
-    functions = g_list_prepend(functions,
-                               garrow_function_new_raw(&arrow_function));
+    functions = g_list_prepend(functions, garrow_function_new_raw(&arrow_function));
   }
   return g_list_reverse(functions);
 }
@@ -681,14 +658,12 @@ garrow_function_execute(GArrowFunction *function,
   arrow::Result<arrow::Datum> arrow_result_result;
   if (context) {
     auto arrow_context = garrow_execute_context_get_raw(context);
-    arrow_result_result = arrow_function->Execute(arrow_args,
-                                                  arrow_options,
-                                                  arrow_context);
+    arrow_result_result =
+      arrow_function->Execute(arrow_args, arrow_options, arrow_context);
   } else {
     arrow::compute::ExecContext arrow_context;
-    arrow_result_result = arrow_function->Execute(arrow_args,
-                                                  arrow_options,
-                                                  &arrow_context);
+    arrow_result_result =
+      arrow_function->Execute(arrow_args, arrow_options, &arrow_context);
   }
   if (garrow::check(error, arrow_result_result, "[function][execute]")) {
     auto arrow_result = *arrow_result_result;
@@ -783,8 +758,7 @@ garrow_function_get_options_type(GArrowFunction *function)
  * Since: 7.0.0
  */
 gboolean
-garrow_function_equal(GArrowFunction *function,
-                      GArrowFunction *other_function)
+garrow_function_equal(GArrowFunction *function, GArrowFunction *other_function)
 {
   auto priv = GARROW_FUNCTION_GET_PRIVATE(function);
   auto other_priv = GARROW_FUNCTION_GET_PRIVATE(other_function);
@@ -829,8 +803,8 @@ garrow_function_to_string(GArrowFunction *function)
   return g_string_free(string, FALSE);
 }
 
-
-typedef struct GArrowExecuteNodeOptionsPrivate_ {
+typedef struct GArrowExecuteNodeOptionsPrivate_
+{
   arrow::acero::ExecNodeOptions *options;
 } GArrowExecuteNodeOptionsPrivate;
 
@@ -842,9 +816,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowExecuteNodeOptions,
                            garrow_execute_node_options,
                            G_TYPE_OBJECT)
 
-#define GARROW_EXECUTE_NODE_OPTIONS_GET_PRIVATE(object) \
-  static_cast<GArrowExecuteNodeOptionsPrivate *>(       \
-    garrow_execute_node_options_get_instance_private(   \
+#define GARROW_EXECUTE_NODE_OPTIONS_GET_PRIVATE(object)                                  \
+  static_cast<GArrowExecuteNodeOptionsPrivate *>(                                        \
+    garrow_execute_node_options_get_instance_private(                                    \
       GARROW_EXECUTE_NODE_OPTIONS(object)))
 
 static void
@@ -889,18 +863,16 @@ garrow_execute_node_options_class_init(GArrowExecuteNodeOptionsClass *klass)
   gobject_class->set_property = garrow_execute_node_options_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("options",
-                              "Options",
-                              "The raw arrow::acero::ExecNodeOptions *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class,
-                                  PROP_EXECUTE_NODE_OPTIONS,
-                                  spec);
+  spec = g_param_spec_pointer(
+    "options",
+    "Options",
+    "The raw arrow::acero::ExecNodeOptions *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_EXECUTE_NODE_OPTIONS, spec);
 }
 
-
-typedef struct GArrowSourceNodeOptionsPrivate_ {
+typedef struct GArrowSourceNodeOptionsPrivate_
+{
   GArrowRecordBatchReader *reader;
   GArrowRecordBatch *record_batch;
 } GArrowSourceNodeOptionsPrivate;
@@ -914,10 +886,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowSourceNodeOptions,
                            garrow_source_node_options,
                            GARROW_TYPE_EXECUTE_NODE_OPTIONS)
 
-#define GARROW_SOURCE_NODE_OPTIONS_GET_PRIVATE(object)  \
-  static_cast<GArrowSourceNodeOptionsPrivate *>(        \
-    garrow_source_node_options_get_instance_private(    \
-      GARROW_SOURCE_NODE_OPTIONS(object)))
+#define GARROW_SOURCE_NODE_OPTIONS_GET_PRIVATE(object)                                   \
+  static_cast<GArrowSourceNodeOptionsPrivate *>(                                         \
+    garrow_source_node_options_get_instance_private(GARROW_SOURCE_NODE_OPTIONS(object)))
 
 static void
 garrow_source_node_options_dispose(GObject *object)
@@ -993,21 +964,21 @@ garrow_source_node_options_class_init(GArrowSourceNodeOptionsClass *klass)
   gobject_class->get_property = garrow_source_node_options_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("reader",
-                             "Reader",
-                             "The GArrowRecordBatchReader that produces "
-                             "record batches",
-                             GARROW_TYPE_RECORD_BATCH_READER,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "reader",
+    "Reader",
+    "The GArrowRecordBatchReader that produces "
+    "record batches",
+    GARROW_TYPE_RECORD_BATCH_READER,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_READER, spec);
 
-  spec = g_param_spec_object("record-batch",
-                             "Record batch",
-                             "The GArrowRecordBatch to be produced",
-                             GARROW_TYPE_RECORD_BATCH,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "record-batch",
+    "Record batch",
+    "The GArrowRecordBatch to be produced",
+    GARROW_TYPE_RECORD_BATCH,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_RECORD_BATCH, spec);
 }
 
@@ -1020,13 +991,11 @@ garrow_source_node_options_class_init(GArrowSourceNodeOptionsClass *klass)
  * Since: 6.0.0
  */
 GArrowSourceNodeOptions *
-garrow_source_node_options_new_record_batch_reader(
-  GArrowRecordBatchReader *reader)
+garrow_source_node_options_new_record_batch_reader(GArrowRecordBatchReader *reader)
 {
   auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
-  auto arrow_options = new arrow::acero::SourceNodeOptions(
-    arrow_reader->schema(),
-    [arrow_reader]() {
+  auto arrow_options =
+    new arrow::acero::SourceNodeOptions(arrow_reader->schema(), [arrow_reader]() {
       using ExecBatch = arrow::compute::ExecBatch;
       using ExecBatchOptional = std::optional<ExecBatch>;
       auto arrow_record_batch_result = arrow_reader->Next();
@@ -1041,8 +1010,10 @@ garrow_source_node_options_new_record_batch_reader(
         ExecBatch(*arrow_record_batch));
     });
   auto options = g_object_new(GARROW_TYPE_SOURCE_NODE_OPTIONS,
-                              "options", arrow_options,
-                              "reader", reader,
+                              "options",
+                              arrow_options,
+                              "reader",
+                              reader,
                               NULL);
   return GARROW_SOURCE_NODE_OPTIONS(options);
 }
@@ -1058,16 +1029,16 @@ garrow_source_node_options_new_record_batch_reader(
 GArrowSourceNodeOptions *
 garrow_source_node_options_new_record_batch(GArrowRecordBatch *record_batch)
 {
-  struct State {
+  struct State
+  {
     std::shared_ptr<arrow::RecordBatch> record_batch;
     bool generated;
   };
   auto state = std::make_shared<State>();
   state->record_batch = garrow_record_batch_get_raw(record_batch);
   state->generated = false;
-  auto arrow_options = new arrow::acero::SourceNodeOptions(
-    state->record_batch->schema(),
-    [state]() {
+  auto arrow_options =
+    new arrow::acero::SourceNodeOptions(state->record_batch->schema(), [state]() {
       using ExecBatch = arrow::compute::ExecBatch;
       using ExecBatchOptional = std::optional<ExecBatch>;
       if (!state->generated) {
@@ -1079,8 +1050,10 @@ garrow_source_node_options_new_record_batch(GArrowRecordBatch *record_batch)
       }
     });
   auto options = g_object_new(GARROW_TYPE_SOURCE_NODE_OPTIONS,
-                              "options", arrow_options,
-                              "record-batch", record_batch,
+                              "options",
+                              arrow_options,
+                              "record-batch",
+                              record_batch,
                               NULL);
   return GARROW_SOURCE_NODE_OPTIONS(options);
 }
@@ -1103,7 +1076,6 @@ garrow_source_node_options_new_table(GArrowTable *table)
   return options;
 }
 
-
 G_DEFINE_TYPE(GArrowFilterNodeOptions,
               garrow_filter_node_options,
               GARROW_TYPE_EXECUTE_NODE_OPTIONS)
@@ -1130,15 +1102,12 @@ GArrowFilterNodeOptions *
 garrow_filter_node_options_new(GArrowExpression *expression)
 {
   auto arrow_expression = garrow_expression_get_raw(expression);
-  auto arrow_options =
-    new arrow::acero::FilterNodeOptions(*arrow_expression);
-  auto options = g_object_new(GARROW_TYPE_FILTER_NODE_OPTIONS,
-                              "options", arrow_options,
-                              NULL);
+  auto arrow_options = new arrow::acero::FilterNodeOptions(*arrow_expression);
+  auto options =
+    g_object_new(GARROW_TYPE_FILTER_NODE_OPTIONS, "options", arrow_options, NULL);
   return GARROW_FILTER_NODE_OPTIONS(options);
 }
 
-
 G_DEFINE_TYPE(GArrowProjectNodeOptions,
               garrow_project_node_options,
               GARROW_TYPE_EXECUTE_NODE_OPTIONS)
@@ -1167,9 +1136,7 @@ garrow_project_node_options_class_init(GArrowProjectNodeOptionsClass *klass)
  * Since: 11.0.0
  */
 GArrowProjectNodeOptions *
-garrow_project_node_options_new(GList *expressions,
-                                gchar **names,
-                                gsize n_names)
+garrow_project_node_options_new(GList *expressions, gchar **names, gsize n_names)
 {
   std::vector<arrow::compute::Expression> arrow_expressions;
   std::vector<std::string> arrow_names;
@@ -1187,14 +1154,13 @@ garrow_project_node_options_new(GList *expressions,
   }
   auto arrow_options =
     new arrow::acero::ProjectNodeOptions(arrow_expressions, arrow_names);
-  auto options = g_object_new(GARROW_TYPE_PROJECT_NODE_OPTIONS,
-                              "options", arrow_options,
-                              NULL);
+  auto options =
+    g_object_new(GARROW_TYPE_PROJECT_NODE_OPTIONS, "options", arrow_options, NULL);
   return GARROW_PROJECT_NODE_OPTIONS(options);
 }
 
-
-typedef struct GArrowAggregationPrivate_ {
+typedef struct GArrowAggregationPrivate_
+{
   gchar *function;
   GArrowFunctionOptions *options;
   gchar *input;
@@ -1208,14 +1174,11 @@ enum {
   PROP_AGGREGATION_OUTPUT,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowAggregation,
-                           garrow_aggregation,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowAggregation, garrow_aggregation, G_TYPE_OBJECT)
 
-#define GARROW_AGGREGATION_GET_PRIVATE(object)   \
-  static_cast<GArrowAggregationPrivate *>(       \
-    garrow_aggregation_get_instance_private(     \
-      GARROW_AGGREGATION(object)))
+#define GARROW_AGGREGATION_GET_PRIVATE(object)                                           \
+  static_cast<GArrowAggregationPrivate *>(                                               \
+    garrow_aggregation_get_instance_private(GARROW_AGGREGATION(object)))
 
 static void
 garrow_aggregation_dispose(GObject *object)
@@ -1314,15 +1277,13 @@ garrow_aggregation_class_init(GArrowAggregationClass *klass)
    *
    * Since: 6.0.0
    */
-  spec = g_param_spec_string("function",
-                             "Function",
-                             "The function name to aggregate",
-                             NULL,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class,
-                                  PROP_AGGREGATION_FUNCTION,
-                                  spec);
+  spec = g_param_spec_string(
+    "function",
+    "Function",
+    "The function name to aggregate",
+    NULL,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_AGGREGATION_FUNCTION, spec);
 
   /**
    * GArrowAggregation:options:
@@ -1331,15 +1292,13 @@ garrow_aggregation_class_init(GArrowAggregationClass *klass)
    *
    * Since: 6.0.0
    */
-  spec = g_param_spec_object("options",
-                             "Options",
-                             "The options of aggregate function",
-                             GARROW_TYPE_FUNCTION_OPTIONS,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class,
-                                  PROP_AGGREGATION_OPTIONS,
-                                  spec);
+  spec = g_param_spec_object(
+    "options",
+    "Options",
+    "The options of aggregate function",
+    GARROW_TYPE_FUNCTION_OPTIONS,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_AGGREGATION_OPTIONS, spec);
 
   /**
    * GArrowAggregation:input:
@@ -1348,15 +1307,13 @@ garrow_aggregation_class_init(GArrowAggregationClass *klass)
    *
    * Since: 6.0.0
    */
-  spec = g_param_spec_string("input",
-                             "Input",
-                             "The input field name of aggregate function",
-                             NULL,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class,
-                                  PROP_AGGREGATION_INPUT,
-                                  spec);
+  spec = g_param_spec_string(
+    "input",
+    "Input",
+    "The input field name of aggregate function",
+    NULL,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_AGGREGATION_INPUT, spec);
 
   /**
    * GArrowAggregation:output:
@@ -1365,15 +1322,13 @@ garrow_aggregation_class_init(GArrowAggregationClass *klass)
    *
    * Since: 6.0.0
    */
-  spec = g_param_spec_string("output",
-                             "Output",
-                             "The output field name of aggregate function",
-                             NULL,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class,
-                                  PROP_AGGREGATION_OUTPUT,
-                                  spec);
+  spec = g_param_spec_string(
+    "output",
+    "Output",
+    "The output field name of aggregate function",
+    NULL,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_AGGREGATION_OUTPUT, spec);
 }
 
 /**
@@ -1394,14 +1349,17 @@ garrow_aggregation_new(const gchar *function,
                        const gchar *output)
 {
   return GARROW_AGGREGATION(g_object_new(GARROW_TYPE_AGGREGATION,
-                                         "function", function,
-                                         "options", options,
-                                         "input", input,
-                                         "output", output,
+                                         "function",
+                                         function,
+                                         "options",
+                                         options,
+                                         "input",
+                                         input,
+                                         "output",
+                                         output,
                                          NULL));
 }
 
-
 G_DEFINE_TYPE(GArrowAggregateNodeOptions,
               garrow_aggregate_node_options,
               GARROW_TYPE_EXECUTE_NODE_OPTIONS)
@@ -1440,8 +1398,7 @@ garrow_aggregate_node_options_new(GList *aggregations,
     auto aggregation_priv = GARROW_AGGREGATION_GET_PRIVATE(node->data);
     arrow::compute::FunctionOptions *function_options = nullptr;
     if (aggregation_priv->options) {
-      function_options =
-        garrow_function_options_get_raw(aggregation_priv->options);
+      function_options = garrow_function_options_get_raw(aggregation_priv->options);
     };
     std::vector<arrow::FieldRef> arrow_targets;
     if (!garrow_field_refs_add(arrow_targets,
@@ -1465,17 +1422,15 @@ garrow_aggregate_node_options_new(GList *aggregations,
       return NULL;
     }
   }
-  auto arrow_options =
-    new arrow::acero::AggregateNodeOptions(std::move(arrow_aggregates),
-                                             std::move(arrow_keys));
-  auto options = g_object_new(GARROW_TYPE_AGGREGATE_NODE_OPTIONS,
-                              "options", arrow_options,
-                              NULL);
+  auto arrow_options = new arrow::acero::AggregateNodeOptions(std::move(arrow_aggregates),
+                                                              std::move(arrow_keys));
+  auto options =
+    g_object_new(GARROW_TYPE_AGGREGATE_NODE_OPTIONS, "options", arrow_options, NULL);
   return GARROW_AGGREGATE_NODE_OPTIONS(options);
 }
 
-
-typedef struct GArrowSinkNodeOptionsPrivate_ {
+typedef struct GArrowSinkNodeOptionsPrivate_
+{
   arrow::AsyncGenerator<std::optional<arrow::compute::ExecBatch>> generator;
   GArrowRecordBatchReader *reader;
 } GArrowSinkNodeOptionsPrivate;
@@ -1484,10 +1439,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowSinkNodeOptions,
                            garrow_sink_node_options,
                            GARROW_TYPE_EXECUTE_NODE_OPTIONS)
 
-#define GARROW_SINK_NODE_OPTIONS_GET_PRIVATE(object)    \
-  static_cast<GArrowSinkNodeOptionsPrivate *>(          \
-    garrow_sink_node_options_get_instance_private(      \
-      GARROW_SINK_NODE_OPTIONS(object)))
+#define GARROW_SINK_NODE_OPTIONS_GET_PRIVATE(object)                                     \
+  static_cast<GArrowSinkNodeOptionsPrivate *>(                                           \
+    garrow_sink_node_options_get_instance_private(GARROW_SINK_NODE_OPTIONS(object)))
 
 static void
 garrow_sink_node_options_dispose(GObject *object)
@@ -1512,7 +1466,7 @@ static void
 garrow_sink_node_options_init(GArrowSinkNodeOptions *object)
 {
   auto priv = GARROW_SINK_NODE_OPTIONS_GET_PRIVATE(object);
-  new(&(priv->generator))
+  new (&(priv->generator))
     arrow::AsyncGenerator<std::optional<arrow::compute::ExecBatch>>();
 }
 
@@ -1552,23 +1506,20 @@ garrow_sink_node_options_new(void)
  * Since: 6.0.0
  */
 GArrowRecordBatchReader *
-garrow_sink_node_options_get_reader(GArrowSinkNodeOptions *options,
-                                    GArrowSchema *schema)
+garrow_sink_node_options_get_reader(GArrowSinkNodeOptions *options, GArrowSchema *schema)
 {
   auto arrow_schema = garrow_schema_get_raw(schema);
   auto priv = GARROW_SINK_NODE_OPTIONS_GET_PRIVATE(options);
   if (!priv->reader) {
-    auto arrow_reader =
-      arrow::acero::MakeGeneratorReader(arrow_schema,
-                                        std::move(priv->generator),
-                                        arrow::default_memory_pool());
+    auto arrow_reader = arrow::acero::MakeGeneratorReader(arrow_schema,
+                                                          std::move(priv->generator),
+                                                          arrow::default_memory_pool());
     priv->reader = garrow_record_batch_reader_new_raw(&arrow_reader, nullptr);
   }
   g_object_ref(priv->reader);
   return priv->reader;
 }
 
-
 G_DEFINE_TYPE(GArrowHashJoinNodeOptions,
               garrow_hash_join_node_options,
               GARROW_TYPE_EXECUTE_NODE_OPTIONS)
@@ -1625,13 +1576,11 @@ garrow_hash_join_node_options_new(GArrowJoinType type,
       return NULL;
     }
   }
-  auto arrow_options =
-    new arrow::acero::HashJoinNodeOptions(arrow_type,
-                                            std::move(arrow_left_keys),
-                                            std::move(arrow_right_keys));
-  auto options = g_object_new(GARROW_TYPE_HASH_JOIN_NODE_OPTIONS,
-                              "options", arrow_options,
-                              NULL);
+  auto arrow_options = new arrow::acero::HashJoinNodeOptions(arrow_type,
+                                                             std::move(arrow_left_keys),
+                                                             std::move(arrow_right_keys));
+  auto options =
+    g_object_new(GARROW_TYPE_HASH_JOIN_NODE_OPTIONS, "options", arrow_options, NULL);
   return GARROW_HASH_JOIN_NODE_OPTIONS(options);
 }
 
@@ -1647,16 +1596,13 @@ garrow_hash_join_node_options_new(GArrowJoinType type,
  * Since: 7.0.0
  */
 gboolean
-garrow_hash_join_node_options_set_left_outputs(
-  GArrowHashJoinNodeOptions *options,
-  const gchar **outputs,
-  gsize n_outputs,
-  GError **error)
+garrow_hash_join_node_options_set_left_outputs(GArrowHashJoinNodeOptions *options,
+                                               const gchar **outputs,
+                                               gsize n_outputs,
+                                               GError **error)
 {
-  auto arrow_options =
-    static_cast<arrow::acero::HashJoinNodeOptions *>(
-      garrow_execute_node_options_get_raw(
-        GARROW_EXECUTE_NODE_OPTIONS(options)));
+  auto arrow_options = static_cast<arrow::acero::HashJoinNodeOptions *>(
+    garrow_execute_node_options_get_raw(GARROW_EXECUTE_NODE_OPTIONS(options)));
   arrow_options->output_all = false;
   arrow_options->left_output.clear();
   for (gsize i = 0; i < n_outputs; ++i) {
@@ -1682,16 +1628,13 @@ garrow_hash_join_node_options_set_left_outputs(
  * Since: 7.0.0
  */
 gboolean
-garrow_hash_join_node_options_set_right_outputs(
-  GArrowHashJoinNodeOptions *options,
-  const gchar **outputs,
-  gsize n_outputs,
-  GError **error)
+garrow_hash_join_node_options_set_right_outputs(GArrowHashJoinNodeOptions *options,
+                                                const gchar **outputs,
+                                                gsize n_outputs,
+                                                GError **error)
 {
-  auto arrow_options =
-    static_cast<arrow::acero::HashJoinNodeOptions *>(
-      garrow_execute_node_options_get_raw(
-        GARROW_EXECUTE_NODE_OPTIONS(options)));
+  auto arrow_options = static_cast<arrow::acero::HashJoinNodeOptions *>(
+    garrow_execute_node_options_get_raw(GARROW_EXECUTE_NODE_OPTIONS(options)));
   arrow_options->output_all = false;
   arrow_options->right_output.clear();
   for (gsize i = 0; i < n_outputs; ++i) {
@@ -1705,8 +1648,8 @@ garrow_hash_join_node_options_set_right_outputs(
   return TRUE;
 }
 
-
-struct GArrowExecuteNodePrivate {
+struct GArrowExecuteNodePrivate
+{
   arrow::acero::ExecNode *node;
   GArrowExecuteNodeOptions *options;
 };
@@ -1716,14 +1659,11 @@ enum {
   PROP_OPTIONS,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowExecuteNode,
-                           garrow_execute_node,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowExecuteNode, garrow_execute_node, G_TYPE_OBJECT)
 
-#define GARROW_EXECUTE_NODE_GET_PRIVATE(object)   \
-  static_cast<GArrowExecuteNodePrivate *>(        \
-    garrow_execute_node_get_instance_private(     \
-      GARROW_EXECUTE_NODE(object)))
+#define GARROW_EXECUTE_NODE_GET_PRIVATE(object)                                          \
+  static_cast<GArrowExecuteNodePrivate *>(                                               \
+    garrow_execute_node_get_instance_private(GARROW_EXECUTE_NODE(object)))
 
 static void
 garrow_execute_node_dispose(GObject *object)
@@ -1746,12 +1686,10 @@ garrow_execute_node_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_NODE:
-    priv->node =
-      static_cast<arrow::acero::ExecNode *>(g_value_get_pointer(value));
+    priv->node = static_cast<arrow::acero::ExecNode *>(g_value_get_pointer(value));
     break;
   case PROP_OPTIONS:
-    priv->options =
-      static_cast<GArrowExecuteNodeOptions *>(g_value_dup_object(value));
+    priv->options = static_cast<GArrowExecuteNodeOptions *>(g_value_dup_object(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -1791,19 +1729,19 @@ garrow_execute_node_class_init(GArrowExecuteNodeClass *klass)
   gobject_class->get_property = garrow_execute_node_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("node",
-                              "Node",
-                              "The raw arrow::acero::ExecNode *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "node",
+    "Node",
+    "The raw arrow::acero::ExecNode *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_NODE, spec);
 
-  spec = g_param_spec_object("options",
-                             "Options",
-                             "The options of this node",
-                             GARROW_TYPE_EXECUTE_NODE_OPTIONS,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "options",
+    "Options",
+    "The options of this node",
+    GARROW_TYPE_EXECUTE_NODE_OPTIONS,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_OPTIONS, spec);
 }
 
@@ -1838,8 +1776,8 @@ garrow_execute_node_get_output_schema(GArrowExecuteNode *node)
   return garrow_schema_new_raw(&arrow_schema);
 }
 
-
-struct GArrowExecutePlanPrivate {
+struct GArrowExecutePlanPrivate
+{
   std::shared_ptr<arrow::acero::ExecPlan> plan;
   GList *nodes;
 };
@@ -1848,14 +1786,11 @@ enum {
   PROP_PLAN = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowExecutePlan,
-                           garrow_execute_plan,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowExecutePlan, garrow_execute_plan, G_TYPE_OBJECT)
 
-#define GARROW_EXECUTE_PLAN_GET_PRIVATE(object)  \
-  static_cast<GArrowExecutePlanPrivate *>(       \
-    garrow_execute_plan_get_instance_private(    \
-      GARROW_EXECUTE_PLAN(object)))
+#define GARROW_EXECUTE_PLAN_GET_PRIVATE(object)                                          \
+  static_cast<GArrowExecutePlanPrivate *>(                                               \
+    garrow_execute_plan_get_instance_private(GARROW_EXECUTE_PLAN(object)))
 
 static void
 garrow_execute_plan_finalize(GObject *object)
@@ -1885,8 +1820,7 @@ garrow_execute_plan_set_property(GObject *object,
   switch (prop_id) {
   case PROP_PLAN:
     priv->plan =
-      *static_cast<std::shared_ptr<arrow::acero::ExecPlan> *>(
-        g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<arrow::acero::ExecPlan> *>(g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -1898,7 +1832,7 @@ static void
 garrow_execute_plan_init(GArrowExecutePlan *object)
 {
   auto priv = GARROW_EXECUTE_PLAN_GET_PRIVATE(object);
-  new(&(priv->plan)) std::shared_ptr<arrow::acero::ExecPlan>;
+  new (&(priv->plan)) std::shared_ptr<arrow::acero::ExecPlan>;
 }
 
 static void
@@ -1910,11 +1844,11 @@ garrow_execute_plan_class_init(GArrowExecutePlanClass *klass)
   gobject_class->set_property = garrow_execute_plan_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("plan",
-                              "Plan",
-                              "The raw std::shared_ptr<arrow::acero::ExecPlan>",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "plan",
+    "Plan",
+    "The raw std::shared_ptr<arrow::acero::ExecPlan>",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_PLAN, spec);
 }
 
@@ -1932,9 +1866,8 @@ garrow_execute_plan_new(GError **error)
 {
   auto arrow_plan_result = arrow::acero::ExecPlan::Make();
   if (garrow::check(error, arrow_plan_result, "[execute-plan][new]")) {
-    return GARROW_EXECUTE_PLAN(g_object_new(GARROW_TYPE_EXECUTE_PLAN,
-                                            "plan", &(*arrow_plan_result),
-                                            NULL));
+    return GARROW_EXECUTE_PLAN(
+      g_object_new(GARROW_TYPE_EXECUTE_PLAN, "plan", &(*arrow_plan_result), NULL));
   } else {
     return NULL;
   }
@@ -1963,8 +1896,7 @@ garrow_execute_plan_build_node(GArrowExecutePlan *plan,
   auto arrow_plan = garrow_execute_plan_get_raw(plan);
   std::vector<arrow::acero::ExecNode *> arrow_inputs;
   for (auto node = inputs; node; node = node->next) {
-    auto arrow_node =
-      garrow_execute_node_get_raw(GARROW_EXECUTE_NODE(node->data));
+    auto arrow_node = garrow_execute_node_get_raw(GARROW_EXECUTE_NODE(node->data));
     arrow_inputs.push_back(arrow_node);
   }
   auto arrow_options = garrow_execute_node_options_get_raw(options);
@@ -2034,12 +1966,11 @@ garrow_execute_plan_build_filter_node(GArrowExecutePlan *plan,
 {
   GList *inputs = nullptr;
   inputs = g_list_prepend(inputs, input);
-  auto node =
-    garrow_execute_plan_build_node(plan,
-                                   "filter",
-                                   inputs,
-                                   GARROW_EXECUTE_NODE_OPTIONS(options),
-                                   error);
+  auto node = garrow_execute_plan_build_node(plan,
+                                             "filter",
+                                             inputs,
+                                             GARROW_EXECUTE_NODE_OPTIONS(options),
+                                             error);
   g_list_free(inputs);
   return node;
 }
@@ -2067,12 +1998,11 @@ garrow_execute_plan_build_project_node(GArrowExecutePlan *plan,
 {
   GList *inputs = nullptr;
   inputs = g_list_prepend(inputs, input);
-  auto node =
-    garrow_execute_plan_build_node(plan,
-                                   "project",
-                                   inputs,
-                                   GARROW_EXECUTE_NODE_OPTIONS(options),
-                                   error);
+  auto node = garrow_execute_plan_build_node(plan,
+                                             "project",
+                                             inputs,
+                                             GARROW_EXECUTE_NODE_OPTIONS(options),
+                                             error);
   g_list_free(inputs);
   return node;
 }
@@ -2100,12 +2030,11 @@ garrow_execute_plan_build_aggregate_node(GArrowExecutePlan *plan,
 {
   GList *inputs = NULL;
   inputs = g_list_prepend(inputs, input);
-  auto node =
-    garrow_execute_plan_build_node(plan,
-                                   "aggregate",
-                                   inputs,
-                                   GARROW_EXECUTE_NODE_OPTIONS(options),
-                                   error);
+  auto node = garrow_execute_plan_build_node(plan,
+                                             "aggregate",
+                                             inputs,
+                                             GARROW_EXECUTE_NODE_OPTIONS(options),
+                                             error);
   g_list_free(inputs);
   return node;
 }
@@ -2133,12 +2062,11 @@ garrow_execute_plan_build_sink_node(GArrowExecutePlan *plan,
 {
   GList *inputs = NULL;
   inputs = g_list_prepend(inputs, input);
-  auto node =
-    garrow_execute_plan_build_node(plan,
-                                   "sink",
-                                   inputs,
-                                   GARROW_EXECUTE_NODE_OPTIONS(options),
-                                   error);
+  auto node = garrow_execute_plan_build_node(plan,
+                                             "sink",
+                                             inputs,
+                                             GARROW_EXECUTE_NODE_OPTIONS(options),
+                                             error);
   g_list_free(inputs);
   return node;
 }
@@ -2169,12 +2097,11 @@ garrow_execute_plan_build_hash_join_node(GArrowExecutePlan *plan,
   GList *inputs = NULL;
   inputs = g_list_append(inputs, left);
   inputs = g_list_append(inputs, right);
-  auto node =
-    garrow_execute_plan_build_node(plan,
-                                   "hashjoin",
-                                   inputs,
-                                   GARROW_EXECUTE_NODE_OPTIONS(options),
-                                   error);
+  auto node = garrow_execute_plan_build_node(plan,
+                                             "hashjoin",
+                                             inputs,
+                                             GARROW_EXECUTE_NODE_OPTIONS(options),
+                                             error);
   g_list_free(inputs);
   return node;
 }
@@ -2205,13 +2132,10 @@ garrow_execute_plan_get_nodes(GArrowExecutePlan *plan)
  * Since: 6.0.0
  */
 gboolean
-garrow_execute_plan_validate(GArrowExecutePlan *plan,
-                             GError **error)
+garrow_execute_plan_validate(GArrowExecutePlan *plan, GError **error)
 {
   auto arrow_plan = garrow_execute_plan_get_raw(plan);
-  return garrow::check(error,
-                       arrow_plan->Validate(),
-                       "[execute-plan][validate]");
+  return garrow::check(error, arrow_plan->Validate(), "[execute-plan][validate]");
 }
 
 /**
@@ -2260,12 +2184,11 @@ garrow_execute_plan_wait(GArrowExecutePlan *plan, GError **error)
 {
   auto arrow_plan = garrow_execute_plan_get_raw(plan);
   arrow_plan->finished().Wait();
-  return garrow::check(error, arrow_plan->finished().status(),
-                       "[execute-plan][wait]");
+  return garrow::check(error, arrow_plan->finished().status(), "[execute-plan][wait]");
 }
 
-
-typedef struct GArrowCastOptionsPrivate_ {
+typedef struct GArrowCastOptionsPrivate_
+{
   GArrowDataType *to_data_type;
 } GArrowCastOptionsPrivate;
 
@@ -2283,10 +2206,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowCastOptions,
                            garrow_cast_options,
                            GARROW_TYPE_FUNCTION_OPTIONS)
 
-#define GARROW_CAST_OPTIONS_GET_PRIVATE(object) \
-  static_cast<GArrowCastOptionsPrivate *>(      \
-    garrow_cast_options_get_instance_private(   \
-      GARROW_CAST_OPTIONS(object)))
+#define GARROW_CAST_OPTIONS_GET_PRIVATE(object)                                          \
+  static_cast<GArrowCastOptionsPrivate *>(                                               \
+    garrow_cast_options_get_instance_private(GARROW_CAST_OPTIONS(object)))
 
 static void
 garrow_cast_options_dispose(GObject *object)
@@ -2391,8 +2313,8 @@ static void
 garrow_cast_options_init(GArrowCastOptions *object)
 {
   auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
-  priv->options = static_cast<arrow::compute::FunctionOptions *>(
-    new arrow::compute::CastOptions());
+  priv->options =
+    static_cast<arrow::compute::FunctionOptions *>(new arrow::compute::CastOptions());
 }
 
 static void
@@ -2400,7 +2322,7 @@ garrow_cast_options_class_init(GArrowCastOptionsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_cast_options_dispose;
+  gobject_class->dispose = garrow_cast_options_dispose;
   gobject_class->set_property = garrow_cast_options_set_property;
   gobject_class->get_property = garrow_cast_options_get_property;
 
@@ -2519,7 +2441,6 @@ garrow_cast_options_new(void)
   return GARROW_CAST_OPTIONS(cast_options);
 }
 
-
 enum {
   PROP_SKIP_NULLS = 1,
   PROP_MIN_COUNT,
@@ -2536,8 +2457,7 @@ garrow_scalar_aggregate_options_set_property(GObject *object,
                                              GParamSpec *pspec)
 {
   auto options =
-    garrow_scalar_aggregate_options_get_raw(
-      GARROW_SCALAR_AGGREGATE_OPTIONS(object));
+    garrow_scalar_aggregate_options_get_raw(GARROW_SCALAR_AGGREGATE_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_SKIP_NULLS:
@@ -2559,8 +2479,7 @@ garrow_scalar_aggregate_options_get_property(GObject *object,
                                              GParamSpec *pspec)
 {
   auto options =
-    garrow_scalar_aggregate_options_get_raw(
-      GARROW_SCALAR_AGGREGATE_OPTIONS(object));
+    garrow_scalar_aggregate_options_get_raw(GARROW_SCALAR_AGGREGATE_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_SKIP_NULLS:
@@ -2584,8 +2503,7 @@ garrow_scalar_aggregate_options_init(GArrowScalarAggregateOptions *object)
 }
 
 static void
-garrow_scalar_aggregate_options_class_init(
-  GArrowScalarAggregateOptionsClass *klass)
+garrow_scalar_aggregate_options_class_init(GArrowScalarAggregateOptionsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
@@ -2641,14 +2559,11 @@ garrow_scalar_aggregate_options_new(void)
   return GARROW_SCALAR_AGGREGATE_OPTIONS(scalar_aggregate_options);
 }
 
-
 enum {
   PROP_MODE = 1,
 };
 
-G_DEFINE_TYPE(GArrowCountOptions,
-              garrow_count_options,
-              GARROW_TYPE_FUNCTION_OPTIONS)
+G_DEFINE_TYPE(GArrowCountOptions, garrow_count_options, GARROW_TYPE_FUNCTION_OPTIONS)
 
 static void
 garrow_count_options_set_property(GObject *object,
@@ -2661,8 +2576,7 @@ garrow_count_options_set_property(GObject *object,
   switch (prop_id) {
   case PROP_MODE:
     options->mode =
-      static_cast<arrow::compute::CountOptions::CountMode>(
-        g_value_get_enum(value));
+      static_cast<arrow::compute::CountOptions::CountMode>(g_value_get_enum(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -2692,8 +2606,8 @@ static void
 garrow_count_options_init(GArrowCountOptions *object)
 {
   auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
-  priv->options = static_cast<arrow::compute::FunctionOptions *>(
-    new arrow::compute::CountOptions());
+  priv->options =
+    static_cast<arrow::compute::FunctionOptions *>(new arrow::compute::CountOptions());
 }
 
 static void
@@ -2737,14 +2651,11 @@ garrow_count_options_new(void)
   return GARROW_COUNT_OPTIONS(count_options);
 }
 
-
 enum {
   PROP_NULL_SELECTION_BEHAVIOR = 1,
 };
 
-G_DEFINE_TYPE(GArrowFilterOptions,
-              garrow_filter_options,
-              GARROW_TYPE_FUNCTION_OPTIONS)
+G_DEFINE_TYPE(GArrowFilterOptions, garrow_filter_options, GARROW_TYPE_FUNCTION_OPTIONS)
 
 static void
 garrow_filter_options_set_property(GObject *object,
@@ -2788,8 +2699,8 @@ static void
 garrow_filter_options_init(GArrowFilterOptions *object)
 {
   auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
-  priv->options = static_cast<arrow::compute::FunctionOptions *>(
-    new arrow::compute::FilterOptions());
+  priv->options =
+    static_cast<arrow::compute::FunctionOptions *>(new arrow::compute::FilterOptions());
 }
 
 static void
@@ -2810,16 +2721,14 @@ garrow_filter_options_class_init(GArrowFilterOptionsClass *klass)
    *
    * Since: 0.17.0
    */
-  spec = g_param_spec_enum("null-selection-behavior",
-                           "NULL selection behavior",
-                           "How to handle filtered values",
-                           GARROW_TYPE_FILTER_NULL_SELECTION_BEHAVIOR,
-                           static_cast<GArrowFilterNullSelectionBehavior>(
-                             options.null_selection_behavior),
-                           static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_NULL_SELECTION_BEHAVIOR,
-                                  spec);
+  spec = g_param_spec_enum(
+    "null-selection-behavior",
+    "NULL selection behavior",
+    "How to handle filtered values",
+    GARROW_TYPE_FILTER_NULL_SELECTION_BEHAVIOR,
+    static_cast<GArrowFilterNullSelectionBehavior>(options.null_selection_behavior),
+    static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_NULL_SELECTION_BEHAVIOR, spec);
 }
 
 /**
@@ -2836,17 +2745,14 @@ garrow_filter_options_new(void)
   return GARROW_FILTER_OPTIONS(filter_options);
 }
 
-
-G_DEFINE_TYPE(GArrowTakeOptions,
-              garrow_take_options,
-              GARROW_TYPE_FUNCTION_OPTIONS)
+G_DEFINE_TYPE(GArrowTakeOptions, garrow_take_options, GARROW_TYPE_FUNCTION_OPTIONS)
 
 static void
 garrow_take_options_init(GArrowTakeOptions *object)
 {
   auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
-  priv->options = static_cast<arrow::compute::FunctionOptions *>(
-    new arrow::compute::TakeOptions());
+  priv->options =
+    static_cast<arrow::compute::FunctionOptions *>(new arrow::compute::TakeOptions());
 }
 
 static void
@@ -2868,7 +2774,6 @@ garrow_take_options_new(void)
   return GARROW_TAKE_OPTIONS(take_options);
 }
 
-
 enum {
   PROP_ARRAY_SORT_OPTIONS_ORDER = 1,
 };
@@ -2883,13 +2788,11 @@ garrow_array_sort_options_set_property(GObject *object,
                                        const GValue *value,
                                        GParamSpec *pspec)
 {
-  auto options =
-    garrow_array_sort_options_get_raw(GARROW_ARRAY_SORT_OPTIONS(object));
+  auto options = garrow_array_sort_options_get_raw(GARROW_ARRAY_SORT_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_ARRAY_SORT_OPTIONS_ORDER:
-    options->order =
-      static_cast<arrow::compute::SortOrder>(g_value_get_enum(value));
+    options->order = static_cast<arrow::compute::SortOrder>(g_value_get_enum(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -2903,8 +2806,7 @@ garrow_array_sort_options_get_property(GObject *object,
                                        GValue *value,
                                        GParamSpec *pspec)
 {
-  auto options =
-    garrow_array_sort_options_get_raw(GARROW_ARRAY_SORT_OPTIONS(object));
+  auto options = garrow_array_sort_options_get_raw(GARROW_ARRAY_SORT_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_ARRAY_SORT_OPTIONS_ORDER:
@@ -2948,9 +2850,7 @@ garrow_array_sort_options_class_init(GArrowArraySortOptionsClass *klass)
                            GARROW_TYPE_SORT_ORDER,
                            static_cast<GArrowSortOrder>(options.order),
                            static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_ARRAY_SORT_OPTIONS_ORDER,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_ARRAY_SORT_OPTIONS_ORDER, spec);
 }
 
 /**
@@ -2965,9 +2865,7 @@ GArrowArraySortOptions *
 garrow_array_sort_options_new(GArrowSortOrder order)
 {
   auto array_sort_options =
-    g_object_new(GARROW_TYPE_ARRAY_SORT_OPTIONS,
-                 "order", order,
-                 NULL);
+    g_object_new(GARROW_TYPE_ARRAY_SORT_OPTIONS, "order", order, NULL);
   return GARROW_ARRAY_SORT_OPTIONS(array_sort_options);
 }
 
@@ -2990,8 +2888,8 @@ garrow_array_sort_options_equal(GArrowArraySortOptions *options,
   return arrow_options->order == arrow_other_options->order;
 }
 
-
-typedef struct GArrowSortKeyPrivate_ {
+typedef struct GArrowSortKeyPrivate_
+{
   arrow::compute::SortKey sort_key;
 } GArrowSortKeyPrivate;
 
@@ -3000,14 +2898,11 @@ enum {
   PROP_SORT_KEY_ORDER,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowSortKey,
-                           garrow_sort_key,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowSortKey, garrow_sort_key, G_TYPE_OBJECT)
 
-#define GARROW_SORT_KEY_GET_PRIVATE(object)     \
-  static_cast<GArrowSortKeyPrivate *>(          \
-    garrow_sort_key_get_instance_private(       \
-      GARROW_SORT_KEY(object)))
+#define GARROW_SORT_KEY_GET_PRIVATE(object)                                              \
+  static_cast<GArrowSortKeyPrivate *>(                                                   \
+    garrow_sort_key_get_instance_private(GARROW_SORT_KEY(object)))
 
 static void
 garrow_sort_key_finalize(GObject *object)
@@ -3068,7 +2963,7 @@ static void
 garrow_sort_key_init(GArrowSortKey *object)
 {
   auto priv = GARROW_SORT_KEY_GET_PRIVATE(object);
-  new(&priv->sort_key) arrow::compute::SortKey("");
+  new (&priv->sort_key) arrow::compute::SortKey("");
 }
 
 static void
@@ -3076,7 +2971,7 @@ garrow_sort_key_class_init(GArrowSortKeyClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_sort_key_finalize;
+  gobject_class->finalize = garrow_sort_key_finalize;
   gobject_class->set_property = garrow_sort_key_set_property;
   gobject_class->get_property = garrow_sort_key_get_property;
 
@@ -3106,13 +3001,13 @@ garrow_sort_key_class_init(GArrowSortKeyClass *klass)
    *
    * Since: 3.0.0
    */
-  spec = g_param_spec_enum("order",
-                           "Order",
-                           "How to order values",
-                           GARROW_TYPE_SORT_ORDER,
-                           0,
-                           static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                    G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_enum(
+    "order",
+    "Order",
+    "How to order values",
+    GARROW_TYPE_SORT_ORDER,
+    0,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_SORT_KEY_ORDER, spec);
 }
 
@@ -3126,19 +3021,13 @@ garrow_sort_key_class_init(GArrowSortKeyClass *klass)
  * Since: 3.0.0
  */
 GArrowSortKey *
-garrow_sort_key_new(const gchar *target,
-                    GArrowSortOrder order,
-                    GError **error)
+garrow_sort_key_new(const gchar *target, GArrowSortOrder order, GError **error)
 {
   auto arrow_reference_result = garrow_field_reference_resolve_raw(target);
-  if (!garrow::check(error,
-                     arrow_reference_result,
-                     "[sort-key][new]")) {
+  if (!garrow::check(error, arrow_reference_result, "[sort-key][new]")) {
     return NULL;
   }
-  auto sort_key = g_object_new(GARROW_TYPE_SORT_KEY,
-                               "order", order,
-                               NULL);
+  auto sort_key = g_object_new(GARROW_TYPE_SORT_KEY, "order", order, NULL);
   auto priv = GARROW_SORT_KEY_GET_PRIVATE(sort_key);
   priv->sort_key.target = *arrow_reference_result;
   return GARROW_SORT_KEY(sort_key);
@@ -3155,25 +3044,21 @@ garrow_sort_key_new(const gchar *target,
  * Since: 3.0.0
  */
 gboolean
-garrow_sort_key_equal(GArrowSortKey *sort_key,
-                      GArrowSortKey *other_sort_key)
+garrow_sort_key_equal(GArrowSortKey *sort_key, GArrowSortKey *other_sort_key)
 {
   auto arrow_sort_key = garrow_sort_key_get_raw(sort_key);
   auto arrow_other_sort_key = garrow_sort_key_get_raw(other_sort_key);
   return arrow_sort_key->Equals(*arrow_other_sort_key);
 }
 
-
-G_DEFINE_TYPE(GArrowSortOptions,
-              garrow_sort_options,
-              GARROW_TYPE_FUNCTION_OPTIONS)
+G_DEFINE_TYPE(GArrowSortOptions, garrow_sort_options, GARROW_TYPE_FUNCTION_OPTIONS)
 
 static void
 garrow_sort_options_init(GArrowSortOptions *object)
 {
   auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
-  priv->options = static_cast<arrow::compute::FunctionOptions *>(
-    new arrow::compute::SortOptions());
+  priv->options =
+    static_cast<arrow::compute::FunctionOptions *>(new arrow::compute::SortOptions());
 }
 
 static void
@@ -3192,8 +3077,7 @@ garrow_sort_options_class_init(GArrowSortOptionsClass *klass)
 GArrowSortOptions *
 garrow_sort_options_new(GList *sort_keys)
 {
-  auto sort_options =
-    GARROW_SORT_OPTIONS(g_object_new(GARROW_TYPE_SORT_OPTIONS, NULL));
+  auto sort_options = GARROW_SORT_OPTIONS(g_object_new(GARROW_TYPE_SORT_OPTIONS, NULL));
   if (sort_keys) {
     garrow_sort_options_set_sort_keys(sort_options, sort_keys);
   }
@@ -3211,8 +3095,7 @@ garrow_sort_options_new(GList *sort_keys)
  * Since: 3.0.0
  */
 gboolean
-garrow_sort_options_equal(GArrowSortOptions *options,
-                          GArrowSortOptions *other_options)
+garrow_sort_options_equal(GArrowSortOptions *options, GArrowSortOptions *other_options)
 {
   auto arrow_options = garrow_sort_options_get_raw(options);
   auto arrow_other_options = garrow_sort_options_get_raw(other_options);
@@ -3249,8 +3132,7 @@ garrow_sort_options_get_sort_keys(GArrowSortOptions *options)
  * Since: 3.0.0
  */
 void
-garrow_sort_options_set_sort_keys(GArrowSortOptions *options,
-                                  GList *sort_keys)
+garrow_sort_options_set_sort_keys(GArrowSortOptions *options, GList *sort_keys)
 {
   auto arrow_options = garrow_sort_options_get_raw(options);
   garrow_raw_sort_keys_set(arrow_options->sort_keys, sort_keys);
@@ -3266,15 +3148,14 @@ garrow_sort_options_set_sort_keys(GArrowSortOptions *options,
  * Since: 3.0.0
  */
 void
-garrow_sort_options_add_sort_key(GArrowSortOptions *options,
-                                 GArrowSortKey *sort_key)
+garrow_sort_options_add_sort_key(GArrowSortOptions *options, GArrowSortKey *sort_key)
 {
   auto arrow_options = garrow_sort_options_get_raw(options);
   garrow_raw_sort_keys_add(arrow_options->sort_keys, sort_key);
 }
 
-
-typedef struct GArrowSetLookupOptionsPrivate_ {
+typedef struct GArrowSetLookupOptionsPrivate_
+{
   GArrowDatum *value_set;
 } GArrowSetLookupOptionsPrivate;
 
@@ -3287,10 +3168,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowSetLookupOptions,
                            garrow_set_lookup_options,
                            GARROW_TYPE_FUNCTION_OPTIONS)
 
-#define GARROW_SET_LOOKUP_OPTIONS_GET_PRIVATE(object) \
-  static_cast<GArrowSetLookupOptionsPrivate *>(       \
-    garrow_set_lookup_options_get_instance_private(   \
-      GARROW_SET_LOOKUP_OPTIONS(object)))
+#define GARROW_SET_LOOKUP_OPTIONS_GET_PRIVATE(object)                                    \
+  static_cast<GArrowSetLookupOptionsPrivate *>(                                          \
+    garrow_set_lookup_options_get_instance_private(GARROW_SET_LOOKUP_OPTIONS(object)))
 
 static void
 garrow_set_lookup_options_dispose(GObject *object)
@@ -3312,8 +3192,7 @@ garrow_set_lookup_options_set_property(GObject *object,
                                        GParamSpec *pspec)
 {
   auto priv = GARROW_SET_LOOKUP_OPTIONS_GET_PRIVATE(object);
-  auto options =
-    garrow_set_lookup_options_get_raw(GARROW_SET_LOOKUP_OPTIONS(object));
+  auto options = garrow_set_lookup_options_get_raw(GARROW_SET_LOOKUP_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_SET_LOOKUP_OPTIONS_VALUE_SET:
@@ -3349,15 +3228,15 @@ garrow_set_lookup_options_get_property(GObject *object,
                                        GParamSpec *pspec)
 {
   auto priv = GARROW_SET_LOOKUP_OPTIONS_GET_PRIVATE(object);
-  auto options =
-    garrow_set_lookup_options_get_raw(GARROW_SET_LOOKUP_OPTIONS(object));
+  auto options = garrow_set_lookup_options_get_raw(GARROW_SET_LOOKUP_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_SET_LOOKUP_OPTIONS_VALUE_SET:
     g_value_set_object(value, priv->value_set);
     break;
   case PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS:
-    g_value_set_boolean(value, options->skip_nulls.has_value() && options->skip_nulls.value());
+    g_value_set_boolean(value,
+                        options->skip_nulls.has_value() && options->skip_nulls.value());
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -3382,7 +3261,6 @@ garrow_set_lookup_options_class_init(GArrowSetLookupOptionsClass *klass)
   gobject_class->set_property = garrow_set_lookup_options_set_property;
   gobject_class->get_property = garrow_set_lookup_options_get_property;
 
-
   arrow::compute::SetLookupOptions options;
 
   GParamSpec *spec;
@@ -3398,9 +3276,7 @@ garrow_set_lookup_options_class_init(GArrowSetLookupOptionsClass *klass)
                              "The set of values to look up input values into",
                              GARROW_TYPE_DATUM,
                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_SET_LOOKUP_OPTIONS_VALUE_SET,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_SET_LOOKUP_OPTIONS_VALUE_SET, spec);
 
   /**
    * GArrowSetLookupOptions:skip-nulls:
@@ -3410,10 +3286,13 @@ garrow_set_lookup_options_class_init(GArrowSetLookupOptionsClass *klass)
    * Since: 6.0.0
    */
   auto skip_nulls = (options.skip_nulls.has_value() && options.skip_nulls.value());
-  spec =
-      g_param_spec_boolean("skip-nulls", "Skip NULLs", "Whether NULLs are skipped or not",
-                           skip_nulls, static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class, PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS,
+  spec = g_param_spec_boolean("skip-nulls",
+                              "Skip NULLs",
+                              "Whether NULLs are skipped or not",
+                              skip_nulls,
+                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_SET_LOOKUP_OPTIONS_SKIP_NULLS,
                                   spec);
 }
 
@@ -3430,12 +3309,9 @@ GArrowSetLookupOptions *
 garrow_set_lookup_options_new(GArrowDatum *value_set)
 {
   return GARROW_SET_LOOKUP_OPTIONS(
-    g_object_new(GARROW_TYPE_SET_LOOKUP_OPTIONS,
-                 "value-set", value_set,
-                 NULL));
+    g_object_new(GARROW_TYPE_SET_LOOKUP_OPTIONS, "value-set", value_set, NULL));
 }
 
-
 enum {
   PROP_VARIANCE_OPTIONS_DDOF = 1,
   PROP_VARIANCE_OPTIONS_SKIP_NULLS,
@@ -3446,10 +3322,9 @@ G_DEFINE_TYPE(GArrowVarianceOptions,
               garrow_variance_options,
               GARROW_TYPE_FUNCTION_OPTIONS)
 
-#define GARROW_VARIANCE_OPTIONS_GET_PRIVATE(object)  \
-  static_cast<GArrowVarianceOptionsPrivate *>(       \
-    garrow_variance_options_get_instance_private(    \
-      GARROW_VARIANCE_OPTIONS(object)))
+#define GARROW_VARIANCE_OPTIONS_GET_PRIVATE(object)                                      \
+  static_cast<GArrowVarianceOptionsPrivate *>(                                           \
+    garrow_variance_options_get_instance_private(GARROW_VARIANCE_OPTIONS(object)))
 
 static void
 garrow_variance_options_set_property(GObject *object,
@@ -3457,8 +3332,7 @@ garrow_variance_options_set_property(GObject *object,
                                      const GValue *value,
                                      GParamSpec *pspec)
 {
-  auto options =
-    garrow_variance_options_get_raw(GARROW_VARIANCE_OPTIONS(object));
+  auto options = garrow_variance_options_get_raw(GARROW_VARIANCE_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_VARIANCE_OPTIONS_DDOF:
@@ -3482,8 +3356,7 @@ garrow_variance_options_get_property(GObject *object,
                                      GValue *value,
                                      GParamSpec *pspec)
 {
-  auto options =
-    garrow_variance_options_get_raw(GARROW_VARIANCE_OPTIONS(object));
+  auto options = garrow_variance_options_get_raw(GARROW_VARIANCE_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_VARIANCE_OPTIONS_DDOF:
@@ -3505,8 +3378,8 @@ static void
 garrow_variance_options_init(GArrowVarianceOptions *object)
 {
   auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
-  priv->options = static_cast<arrow::compute::FunctionOptions *>(
-    new arrow::compute::VarianceOptions());
+  priv->options =
+    static_cast<arrow::compute::FunctionOptions *>(new arrow::compute::VarianceOptions());
 }
 
 static void
@@ -3517,7 +3390,6 @@ garrow_variance_options_class_init(GArrowVarianceOptionsClass *klass)
   gobject_class->set_property = garrow_variance_options_set_property;
   gobject_class->get_property = garrow_variance_options_get_property;
 
-
   arrow::compute::VarianceOptions options;
 
   GParamSpec *spec;
@@ -3535,9 +3407,7 @@ garrow_variance_options_class_init(GArrowVarianceOptionsClass *klass)
                           G_MAXINT,
                           options.ddof,
                           static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_VARIANCE_OPTIONS_DDOF,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_VARIANCE_OPTIONS_DDOF, spec);
 
   /**
    * GArrowVarianceOptions:skip-nulls:
@@ -3551,9 +3421,7 @@ garrow_variance_options_class_init(GArrowVarianceOptionsClass *klass)
                               "Whether NULLs are skipped or not",
                               options.skip_nulls,
                               static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_VARIANCE_OPTIONS_SKIP_NULLS,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_VARIANCE_OPTIONS_SKIP_NULLS, spec);
 
   /**
    * GArrowVarianceOptions:min-count:
@@ -3570,10 +3438,7 @@ garrow_variance_options_class_init(GArrowVarianceOptionsClass *klass)
                            G_MAXUINT,
                            options.min_count,
                            static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_VARIANCE_OPTIONS_MIN_COUNT,
-                                  spec);
-
+  g_object_class_install_property(gobject_class, PROP_VARIANCE_OPTIONS_MIN_COUNT, spec);
 }
 
 /**
@@ -3586,24 +3451,19 @@ garrow_variance_options_class_init(GArrowVarianceOptionsClass *klass)
 GArrowVarianceOptions *
 garrow_variance_options_new(void)
 {
-  return GARROW_VARIANCE_OPTIONS(
-    g_object_new(GARROW_TYPE_VARIANCE_OPTIONS, NULL));
+  return GARROW_VARIANCE_OPTIONS(g_object_new(GARROW_TYPE_VARIANCE_OPTIONS, NULL));
 }
 
-
 enum {
   PROP_ROUND_OPTIONS_N_DIGITS = 1,
   PROP_ROUND_OPTIONS_MODE,
 };
 
-G_DEFINE_TYPE(GArrowRoundOptions,
-              garrow_round_options,
-              GARROW_TYPE_FUNCTION_OPTIONS)
+G_DEFINE_TYPE(GArrowRoundOptions, garrow_round_options, GARROW_TYPE_FUNCTION_OPTIONS)
 
-#define GARROW_ROUND_OPTIONS_GET_PRIVATE(object)  \
-  static_cast<GArrowRoundOptionsPrivate *>(       \
-    garrow_round_options_get_instance_private(    \
-      GARROW_ROUND_OPTIONS(object)))
+#define GARROW_ROUND_OPTIONS_GET_PRIVATE(object)                                         \
+  static_cast<GArrowRoundOptionsPrivate *>(                                              \
+    garrow_round_options_get_instance_private(GARROW_ROUND_OPTIONS(object)))
 
 static void
 garrow_round_options_set_property(GObject *object,
@@ -3618,8 +3478,7 @@ garrow_round_options_set_property(GObject *object,
     options->ndigits = g_value_get_int64(value);
     break;
   case PROP_ROUND_OPTIONS_MODE:
-    options->round_mode =
-      static_cast<arrow::compute::RoundMode>(g_value_get_enum(value));
+    options->round_mode = static_cast<arrow::compute::RoundMode>(g_value_get_enum(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -3652,8 +3511,8 @@ static void
 garrow_round_options_init(GArrowRoundOptions *object)
 {
   auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
-  priv->options = static_cast<arrow::compute::FunctionOptions *>(
-    new arrow::compute::RoundOptions());
+  priv->options =
+    static_cast<arrow::compute::FunctionOptions *>(new arrow::compute::RoundOptions());
 }
 
 static void
@@ -3664,7 +3523,6 @@ garrow_round_options_class_init(GArrowRoundOptionsClass *klass)
   gobject_class->set_property = garrow_round_options_set_property;
   gobject_class->get_property = garrow_round_options_get_property;
 
-
   arrow::compute::RoundOptions options;
 
   GParamSpec *spec;
@@ -3682,9 +3540,7 @@ garrow_round_options_class_init(GArrowRoundOptionsClass *klass)
                             G_MAXINT64,
                             options.ndigits,
                             static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_ROUND_OPTIONS_N_DIGITS,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_ROUND_OPTIONS_N_DIGITS, spec);
 
   /**
    * GArrowRoundOptions:mode:
@@ -3699,9 +3555,7 @@ garrow_round_options_class_init(GArrowRoundOptionsClass *klass)
                            GARROW_TYPE_ROUND_MODE,
                            static_cast<GArrowRoundMode>(options.round_mode),
                            static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_ROUND_OPTIONS_MODE,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_ROUND_OPTIONS_MODE, spec);
 }
 
 /**
@@ -3717,8 +3571,8 @@ garrow_round_options_new(void)
   return GARROW_ROUND_OPTIONS(g_object_new(GARROW_TYPE_ROUND_OPTIONS, NULL));
 }
 
-
-typedef struct GArrowRoundToMultipleOptionsPrivate_ {
+typedef struct GArrowRoundToMultipleOptionsPrivate_
+{
   GArrowScalar *multiple;
 } GArrowRoundToMultipleOptionsPrivate;
 
@@ -3731,9 +3585,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowRoundToMultipleOptions,
                            garrow_round_to_multiple_options,
                            GARROW_TYPE_FUNCTION_OPTIONS)
 
-#define GARROW_ROUND_TO_MULTIPLE_OPTIONS_GET_PRIVATE(object)    \
-  static_cast<GArrowRoundToMultipleOptionsPrivate *>(           \
-    garrow_round_to_multiple_options_get_instance_private(      \
+#define GARROW_ROUND_TO_MULTIPLE_OPTIONS_GET_PRIVATE(object)                             \
+  static_cast<GArrowRoundToMultipleOptionsPrivate *>(                                    \
+    garrow_round_to_multiple_options_get_instance_private(                               \
       GARROW_ROUND_TO_MULTIPLE_OPTIONS(object)))
 
 static void
@@ -3757,8 +3611,7 @@ garrow_round_to_multiple_options_set_property(GObject *object,
 {
   auto priv = GARROW_ROUND_TO_MULTIPLE_OPTIONS_GET_PRIVATE(object);
   auto options =
-    garrow_round_to_multiple_options_get_raw(
-      GARROW_ROUND_TO_MULTIPLE_OPTIONS(object));
+    garrow_round_to_multiple_options_get_raw(GARROW_ROUND_TO_MULTIPLE_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_ROUND_TO_MULTIPLE_OPTIONS_MULTIPLE:
@@ -3780,8 +3633,7 @@ garrow_round_to_multiple_options_set_property(GObject *object,
     }
     break;
   case PROP_ROUND_TO_MULTIPLE_OPTIONS_MODE:
-    options->round_mode =
-      static_cast<arrow::compute::RoundMode>(g_value_get_enum(value));
+    options->round_mode = static_cast<arrow::compute::RoundMode>(g_value_get_enum(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -3797,8 +3649,7 @@ garrow_round_to_multiple_options_get_property(GObject *object,
 {
   auto priv = GARROW_ROUND_TO_MULTIPLE_OPTIONS_GET_PRIVATE(object);
   auto options =
-    garrow_round_to_multiple_options_get_raw(
-      GARROW_ROUND_TO_MULTIPLE_OPTIONS(object));
+    garrow_round_to_multiple_options_get_raw(GARROW_ROUND_TO_MULTIPLE_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_ROUND_TO_MULTIPLE_OPTIONS_MULTIPLE:
@@ -3825,15 +3676,13 @@ garrow_round_to_multiple_options_init(GArrowRoundToMultipleOptions *object)
 }
 
 static void
-garrow_round_to_multiple_options_class_init(
-  GArrowRoundToMultipleOptionsClass *klass)
+garrow_round_to_multiple_options_class_init(GArrowRoundToMultipleOptionsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
   gobject_class->dispose = garrow_round_to_multiple_options_dispose;
   gobject_class->set_property = garrow_round_to_multiple_options_set_property;
   gobject_class->get_property = garrow_round_to_multiple_options_get_property;
 
-
   arrow::compute::RoundToMultipleOptions options;
 
   GParamSpec *spec;
@@ -3890,7 +3739,6 @@ garrow_round_to_multiple_options_new(void)
     g_object_new(GARROW_TYPE_ROUND_TO_MULTIPLE_OPTIONS, NULL));
 }
 
-
 enum {
   PROP_MATCH_SUBSTRING_OPTIONS_PATTERN = 1,
   PROP_MATCH_SUBSTRING_OPTIONS_IGNORE_CASE,
@@ -3907,8 +3755,7 @@ garrow_match_substring_options_set_property(GObject *object,
                                             GParamSpec *pspec)
 {
   auto options =
-    garrow_match_substring_options_get_raw(
-      GARROW_MATCH_SUBSTRING_OPTIONS(object));
+    garrow_match_substring_options_get_raw(GARROW_MATCH_SUBSTRING_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_MATCH_SUBSTRING_OPTIONS_PATTERN:
@@ -3930,8 +3777,7 @@ garrow_match_substring_options_get_property(GObject *object,
                                             GParamSpec *pspec)
 {
   auto options =
-    garrow_match_substring_options_get_raw(
-      GARROW_MATCH_SUBSTRING_OPTIONS(object));
+    garrow_match_substring_options_get_raw(GARROW_MATCH_SUBSTRING_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_MATCH_SUBSTRING_OPTIONS_PATTERN:
@@ -3950,20 +3796,17 @@ static void
 garrow_match_substring_options_init(GArrowMatchSubstringOptions *object)
 {
   auto function_options_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
-  function_options_priv->options =
-    static_cast<arrow::compute::FunctionOptions *>(
-      new arrow::compute::MatchSubstringOptions());
+  function_options_priv->options = static_cast<arrow::compute::FunctionOptions *>(
+    new arrow::compute::MatchSubstringOptions());
 }
 
 static void
-garrow_match_substring_options_class_init(
-  GArrowMatchSubstringOptionsClass *klass)
+garrow_match_substring_options_class_init(GArrowMatchSubstringOptionsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
   gobject_class->set_property = garrow_match_substring_options_set_property;
   gobject_class->get_property = garrow_match_substring_options_get_property;
 
-
   arrow::compute::MatchSubstringOptions options;
 
   GParamSpec *spec;
@@ -4015,7 +3858,6 @@ garrow_match_substring_options_new(void)
     g_object_new(GARROW_TYPE_MATCH_SUBSTRING_OPTIONS, NULL));
 }
 
-
 enum {
   PROP_UTF8_NORMALIZE_OPTIONS_FORM = 1,
 };
@@ -4024,9 +3866,9 @@ G_DEFINE_TYPE(GArrowUTF8NormalizeOptions,
               garrow_utf8_normalize_options,
               GARROW_TYPE_FUNCTION_OPTIONS)
 
-#define GARROW_UTF8_NORMARIZE_OPTIONS_GET_PRIVATE(object)     \
-  static_cast<GArrowUTF8NormalizeOptionsPrivate *>(           \
-    garrow_utf8_normalize_options_get_instance_private(       \
+#define GARROW_UTF8_NORMARIZE_OPTIONS_GET_PRIVATE(object)                                \
+  static_cast<GArrowUTF8NormalizeOptionsPrivate *>(                                      \
+    garrow_utf8_normalize_options_get_instance_private(                                  \
       GARROW_UTF8_NORMALIZE_OPTIONS(object)))
 
 static void
@@ -4035,14 +3877,13 @@ garrow_utf8_normalize_options_set_property(GObject *object,
                                            const GValue *value,
                                            GParamSpec *pspec)
 {
-  auto options = garrow_utf8_normalize_options_get_raw(
-    GARROW_UTF8_NORMALIZE_OPTIONS(object));
+  auto options =
+    garrow_utf8_normalize_options_get_raw(GARROW_UTF8_NORMALIZE_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_UTF8_NORMALIZE_OPTIONS_FORM:
     options->form =
-      static_cast<arrow::compute::Utf8NormalizeOptions::Form>(
-        g_value_get_enum(value));
+      static_cast<arrow::compute::Utf8NormalizeOptions::Form>(g_value_get_enum(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -4056,8 +3897,8 @@ garrow_utf8_normalize_options_get_property(GObject *object,
                                            GValue *value,
                                            GParamSpec *pspec)
 {
-  auto options = garrow_utf8_normalize_options_get_raw(
-    GARROW_UTF8_NORMALIZE_OPTIONS(object));
+  auto options =
+    garrow_utf8_normalize_options_get_raw(GARROW_UTF8_NORMALIZE_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_UTF8_NORMALIZE_OPTIONS_FORM:
@@ -4085,7 +3926,6 @@ garrow_utf8_normalize_options_class_init(GArrowUTF8NormalizeOptionsClass *klass)
   gobject_class->set_property = garrow_utf8_normalize_options_set_property;
   gobject_class->get_property = garrow_utf8_normalize_options_get_property;
 
-
   arrow::compute::Utf8NormalizeOptions options;
 
   GParamSpec *spec;
@@ -4102,9 +3942,7 @@ garrow_utf8_normalize_options_class_init(GArrowUTF8NormalizeOptionsClass *klass)
                            GARROW_TYPE_UTF8_NORMALIZE_FORM,
                            static_cast<GArrowUTF8NormalizeForm>(options.form),
                            static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_UTF8_NORMALIZE_OPTIONS_FORM,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_UTF8_NORMALIZE_OPTIONS_FORM, spec);
 }
 
 /**
@@ -4121,7 +3959,6 @@ garrow_utf8_normalize_options_new(void)
     g_object_new(GARROW_TYPE_UTF8_NORMALIZE_OPTIONS, NULL));
 }
 
-
 enum {
   PROP_QUANTILE_OPTIONS_INTERPOLATION = 1,
   PROP_QUANTILE_OPTIONS_SKIP_NULLS,
@@ -4138,15 +3975,12 @@ garrow_quantile_options_set_property(GObject *object,
                                      const GValue *value,
                                      GParamSpec *pspec)
 {
-  auto options =
-    garrow_quantile_options_get_raw(
-      GARROW_QUANTILE_OPTIONS(object));
+  auto options = garrow_quantile_options_get_raw(GARROW_QUANTILE_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_QUANTILE_OPTIONS_INTERPOLATION:
-    options->interpolation =
-      static_cast<arrow::compute::QuantileOptions::Interpolation>(
-        g_value_get_enum(value));
+    options->interpolation = static_cast<arrow::compute::QuantileOptions::Interpolation>(
+      g_value_get_enum(value));
     break;
   case PROP_QUANTILE_OPTIONS_SKIP_NULLS:
     options->skip_nulls = g_value_get_boolean(value);
@@ -4166,14 +4000,12 @@ garrow_quantile_options_get_property(GObject *object,
                                      GValue *value,
                                      GParamSpec *pspec)
 {
-  auto options = garrow_quantile_options_get_raw(
-    GARROW_QUANTILE_OPTIONS(object));
+  auto options = garrow_quantile_options_get_raw(GARROW_QUANTILE_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_QUANTILE_OPTIONS_INTERPOLATION:
-    g_value_set_enum(
-      value,
-      static_cast<GArrowQuantileInterpolation>(options->interpolation));
+    g_value_set_enum(value,
+                     static_cast<GArrowQuantileInterpolation>(options->interpolation));
     break;
   case PROP_QUANTILE_OPTIONS_SKIP_NULLS:
     g_value_set_boolean(value, options->skip_nulls);
@@ -4191,8 +4023,8 @@ static void
 garrow_quantile_options_init(GArrowQuantileOptions *object)
 {
   auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
-  priv->options = static_cast<arrow::compute::FunctionOptions *>(
-    new arrow::compute::QuantileOptions());
+  priv->options =
+    static_cast<arrow::compute::FunctionOptions *>(new arrow::compute::QuantileOptions());
 }
 
 static void
@@ -4203,7 +4035,6 @@ garrow_quantile_options_class_init(GArrowQuantileOptionsClass *klass)
   gobject_class->set_property = garrow_quantile_options_set_property;
   gobject_class->get_property = garrow_quantile_options_get_property;
 
-
   auto options = arrow::compute::QuantileOptions::Defaults();
 
   GParamSpec *spec;
@@ -4215,14 +4046,14 @@ garrow_quantile_options_class_init(GArrowQuantileOptionsClass *klass)
    *
    * Since: 9.0.0
    */
-  spec = g_param_spec_enum("interpolation",
-                           "Interpolation",
-                           "Interpolation method to use when "
-                           "quantile lies between two data points.",
-                           GARROW_TYPE_QUANTILE_INTERPOLATION,
-                           static_cast<GArrowQuantileInterpolation>(
-                             options.interpolation),
-                           static_cast<GParamFlags>(G_PARAM_READWRITE));
+  spec =
+    g_param_spec_enum("interpolation",
+                      "Interpolation",
+                      "Interpolation method to use when "
+                      "quantile lies between two data points.",
+                      GARROW_TYPE_QUANTILE_INTERPOLATION,
+                      static_cast<GArrowQuantileInterpolation>(options.interpolation),
+                      static_cast<GParamFlags>(G_PARAM_READWRITE));
   g_object_class_install_property(gobject_class,
                                   PROP_QUANTILE_OPTIONS_INTERPOLATION,
                                   spec);
@@ -4243,9 +4074,7 @@ garrow_quantile_options_class_init(GArrowQuantileOptionsClass *klass)
                               "emit null.",
                               options.skip_nulls,
                               static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_QUANTILE_OPTIONS_SKIP_NULLS,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_QUANTILE_OPTIONS_SKIP_NULLS, spec);
 
   /**
    * GArrowQuantileOptions:min-count:
@@ -4262,9 +4091,7 @@ garrow_quantile_options_class_init(GArrowQuantileOptionsClass *klass)
                            G_MAXUINT32,
                            options.min_count,
                            static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_QUANTILE_OPTIONS_MIN_COUNT,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_QUANTILE_OPTIONS_MIN_COUNT, spec);
 }
 
 /**
@@ -4277,8 +4104,7 @@ garrow_quantile_options_class_init(GArrowQuantileOptionsClass *klass)
 GArrowQuantileOptions *
 garrow_quantile_options_new(void)
 {
-  return GARROW_QUANTILE_OPTIONS(
-    g_object_new(GARROW_TYPE_QUANTILE_OPTIONS, NULL));
+  return GARROW_QUANTILE_OPTIONS(g_object_new(GARROW_TYPE_QUANTILE_OPTIONS, NULL));
 }
 
 /**
@@ -4308,8 +4134,7 @@ garrow_quantile_options_get_qs(GArrowQuantileOptions *options, gsize *n)
  * Since: 9.0.0
  */
 void
-garrow_quantile_options_set_q(GArrowQuantileOptions *options,
-                              gdouble q)
+garrow_quantile_options_set_q(GArrowQuantileOptions *options, gdouble q)
 {
   auto priv = garrow_quantile_options_get_raw(options);
   priv->q.clear();
@@ -4325,9 +4150,7 @@ garrow_quantile_options_set_q(GArrowQuantileOptions *options,
  * Since: 9.0.0
  */
 void
-garrow_quantile_options_set_qs(GArrowQuantileOptions *options,
-                               const gdouble *qs,
-                               gsize n)
+garrow_quantile_options_set_qs(GArrowQuantileOptions *options, const gdouble *qs, gsize n)
 {
   auto priv = garrow_quantile_options_get_raw(options);
   priv->q.clear();
@@ -4336,8 +4159,8 @@ garrow_quantile_options_set_qs(GArrowQuantileOptions *options,
   }
 }
 
-
-struct GArrowIndexOptionsPrivate {
+struct GArrowIndexOptionsPrivate
+{
   GArrowScalar *value;
 };
 
@@ -4349,10 +4172,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowIndexOptions,
                            garrow_index_options,
                            GARROW_TYPE_FUNCTION_OPTIONS)
 
-#define GARROW_INDEX_OPTIONS_GET_PRIVATE(object)      \
-  static_cast<GArrowIndexOptionsPrivate *>(           \
-    garrow_index_options_get_instance_private(        \
-      GARROW_INDEX_OPTIONS(object)))
+#define GARROW_INDEX_OPTIONS_GET_PRIVATE(object)                                         \
+  static_cast<GArrowIndexOptionsPrivate *>(                                              \
+    garrow_index_options_get_instance_private(GARROW_INDEX_OPTIONS(object)))
 
 static void
 garrow_index_options_dispose(GObject *object)
@@ -4378,23 +4200,23 @@ garrow_index_options_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_INDEX_OPTIONS_VALUE:
-  {
-    auto scalar = GARROW_SCALAR(g_value_get_object(value));
-    if (priv->value == scalar) {
-      return;
-    }
-    if (priv->value) {
-      g_object_unref(priv->value);
-    }
-    priv->value = scalar;
-    if (priv->value) {
-      g_object_ref(priv->value);
-      options->value = garrow_scalar_get_raw(scalar);
-    } else {
-      options->value = nullptr;
+    {
+      auto scalar = GARROW_SCALAR(g_value_get_object(value));
+      if (priv->value == scalar) {
+        return;
+      }
+      if (priv->value) {
+        g_object_unref(priv->value);
+      }
+      priv->value = scalar;
+      if (priv->value) {
+        g_object_ref(priv->value);
+        options->value = garrow_scalar_get_raw(scalar);
+      } else {
+        options->value = nullptr;
+      }
+      break;
     }
-    break;
-  }
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
     break;
@@ -4439,7 +4261,6 @@ garrow_index_options_class_init(GArrowIndexOptionsClass *klass)
   gobject_class->set_property = garrow_index_options_set_property;
   gobject_class->get_property = garrow_index_options_get_property;
 
-
   GParamSpec *spec;
   /**
    * GArrowIndexOptions:value:
@@ -4466,24 +4287,19 @@ garrow_index_options_class_init(GArrowIndexOptionsClass *klass)
 GArrowIndexOptions *
 garrow_index_options_new(void)
 {
-  return GARROW_INDEX_OPTIONS(g_object_new(GARROW_TYPE_INDEX_OPTIONS,
-                                           NULL));
+  return GARROW_INDEX_OPTIONS(g_object_new(GARROW_TYPE_INDEX_OPTIONS, NULL));
 }
 
-
 enum {
   PROP_RANK_OPTIONS_NULL_PLACEMENT = 1,
   PROP_RANK_OPTIONS_TIEBREAKER,
 };
 
-G_DEFINE_TYPE(GArrowRankOptions,
-              garrow_rank_options,
-              GARROW_TYPE_FUNCTION_OPTIONS)
+G_DEFINE_TYPE(GArrowRankOptions, garrow_rank_options, GARROW_TYPE_FUNCTION_OPTIONS)
 
-#define GARROW_RANK_OPTIONS_GET_PRIVATE(object)          \
-  static_cast<GArrowRankOptionsPrivate *>(               \
-    garrow_rank_options_get_instance_private(            \
-      GARROW_RANK_OPTIONS(object)))
+#define GARROW_RANK_OPTIONS_GET_PRIVATE(object)                                          \
+  static_cast<GArrowRankOptionsPrivate *>(                                               \
+    garrow_rank_options_get_instance_private(GARROW_RANK_OPTIONS(object)))
 
 static void
 garrow_rank_options_set_property(GObject *object,
@@ -4500,8 +4316,7 @@ garrow_rank_options_set_property(GObject *object,
     break;
   case PROP_RANK_OPTIONS_TIEBREAKER:
     options->tiebreaker =
-      static_cast<arrow::compute::RankOptions::Tiebreaker>(
-        g_value_get_enum(value));
+      static_cast<arrow::compute::RankOptions::Tiebreaker>(g_value_get_enum(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -4519,14 +4334,10 @@ garrow_rank_options_get_property(GObject *object,
 
   switch (prop_id) {
   case PROP_RANK_OPTIONS_NULL_PLACEMENT:
-    g_value_set_enum(
-      value,
-      static_cast<GArrowNullPlacement>(options->null_placement));
+    g_value_set_enum(value, static_cast<GArrowNullPlacement>(options->null_placement));
     break;
   case PROP_RANK_OPTIONS_TIEBREAKER:
-    g_value_set_enum(
-      value,
-      static_cast<GArrowRankTiebreaker>(options->tiebreaker));
+    g_value_set_enum(value, static_cast<GArrowRankTiebreaker>(options->tiebreaker));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -4538,8 +4349,8 @@ static void
 garrow_rank_options_init(GArrowRankOptions *object)
 {
   auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
-  priv->options = static_cast<arrow::compute::FunctionOptions *>(
-    new arrow::compute::RankOptions());
+  priv->options =
+    static_cast<arrow::compute::FunctionOptions *>(new arrow::compute::RankOptions());
 }
 
 static void
@@ -4550,7 +4361,6 @@ garrow_rank_options_class_init(GArrowRankOptionsClass *klass)
   gobject_class->set_property = garrow_rank_options_set_property;
   gobject_class->get_property = garrow_rank_options_get_property;
 
-
   auto options = arrow::compute::RankOptions::Defaults();
 
   GParamSpec *spec;
@@ -4566,12 +4376,9 @@ garrow_rank_options_class_init(GArrowRankOptionsClass *klass)
                            "Whether nulls and NaNs are placed "
                            "at the start or at the end.",
                            GARROW_TYPE_NULL_PLACEMENT,
-                           static_cast<GArrowNullPlacement>(
-                             options.null_placement),
+                           static_cast<GArrowNullPlacement>(options.null_placement),
                            static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_RANK_OPTIONS_NULL_PLACEMENT,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_RANK_OPTIONS_NULL_PLACEMENT, spec);
 
   /**
    * GArrowRankOptions:tiebreaker:
@@ -4584,12 +4391,9 @@ garrow_rank_options_class_init(GArrowRankOptionsClass *klass)
                            "Tiebreaker",
                            "Tiebreaker for dealing with equal values in ranks.",
                            GARROW_TYPE_RANK_TIEBREAKER,
-                           static_cast<GArrowRankTiebreaker>(
-                             options.tiebreaker),
+                           static_cast<GArrowRankTiebreaker>(options.tiebreaker),
                            static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_RANK_OPTIONS_TIEBREAKER,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_RANK_OPTIONS_TIEBREAKER, spec);
 }
 
 /**
@@ -4616,8 +4420,7 @@ garrow_rank_options_new(void)
  * Since: 12.0.0
  */
 gboolean
-garrow_rank_options_equal(GArrowRankOptions *options,
-                          GArrowRankOptions *other_options)
+garrow_rank_options_equal(GArrowRankOptions *options, GArrowRankOptions *other_options)
 {
   auto arrow_options = garrow_rank_options_get_raw(options);
   auto arrow_other_options = garrow_rank_options_get_raw(other_options);
@@ -4660,8 +4463,7 @@ garrow_rank_options_get_sort_keys(GArrowRankOptions *options)
  * Since: 12.0.0
  */
 void
-garrow_rank_options_set_sort_keys(GArrowRankOptions *options,
-                                  GList *sort_keys)
+garrow_rank_options_set_sort_keys(GArrowRankOptions *options, GList *sort_keys)
 {
   auto arrow_options = garrow_rank_options_get_raw(options);
   garrow_raw_sort_keys_set(arrow_options->sort_keys, sort_keys);
@@ -4677,14 +4479,12 @@ garrow_rank_options_set_sort_keys(GArrowRankOptions *options,
  * Since: 12.0.0
  */
 void
-garrow_rank_options_add_sort_key(GArrowRankOptions *options,
-                                 GArrowSortKey *sort_key)
+garrow_rank_options_add_sort_key(GArrowRankOptions *options, GArrowSortKey *sort_key)
 {
   auto arrow_options = garrow_rank_options_get_raw(options);
   garrow_raw_sort_keys_add(arrow_options->sort_keys, sort_key);
 }
 
-
 /**
  * garrow_array_cast:
  * @array: A #GArrowArray.
@@ -4709,24 +4509,20 @@ garrow_array_cast(GArrowArray *array,
   arrow::Result<std::shared_ptr<arrow::Array>> arrow_casted_array;
   if (options) {
     auto arrow_options = garrow_cast_options_get_raw(options);
-    arrow_casted_array = arrow::compute::Cast(*arrow_array_raw,
-                                              arrow_target_data_type,
-                                              *arrow_options);
+    arrow_casted_array =
+      arrow::compute::Cast(*arrow_array_raw, arrow_target_data_type, *arrow_options);
   } else {
-    arrow_casted_array = arrow::compute::Cast(*arrow_array_raw,
-                                              arrow_target_data_type);
-  }
-  if (garrow::check(error,
-                    arrow_casted_array,
-                    [&]() {
-                      std::stringstream message;
-                      message << "[array][cast] <";
-                      message << arrow_array->type()->ToString();
-                      message << "> -> <";
-                      message << arrow_target_data_type->ToString();
-                      message << ">";
-                      return message.str();
-                    })) {
+    arrow_casted_array = arrow::compute::Cast(*arrow_array_raw, arrow_target_data_type);
+  }
+  if (garrow::check(error, arrow_casted_array, [&]() {
+        std::stringstream message;
+        message << "[array][cast] <";
+        message << arrow_array->type()->ToString();
+        message << "> -> <";
+        message << arrow_target_data_type->ToString();
+        message << ">";
+        return message.str();
+      })) {
     return garrow_array_new_raw(&(*arrow_casted_array));
   } else {
     return NULL;
@@ -4744,20 +4540,17 @@ garrow_array_cast(GArrowArray *array,
  * Since: 0.8.0
  */
 GArrowArray *
-garrow_array_unique(GArrowArray *array,
-                    GError **error)
+garrow_array_unique(GArrowArray *array, GError **error)
 {
   auto arrow_array = garrow_array_get_raw(array);
   auto arrow_unique_array = arrow::compute::Unique(arrow_array);
-  if (garrow::check(error,
-                    arrow_unique_array,
-                    [&]() {
-                      std::stringstream message;
-                      message << "[array][unique] <";
-                      message << arrow_array->type()->ToString();
-                      message << ">";
-                      return message.str();
-                    })) {
+  if (garrow::check(error, arrow_unique_array, [&]() {
+        std::stringstream message;
+        message << "[array][unique] <";
+        message << arrow_array->type()->ToString();
+        message << ">";
+        return message.str();
+      })) {
     return garrow_array_new_raw(&(*arrow_unique_array));
   } else {
     return NULL;
@@ -4776,25 +4569,19 @@ garrow_array_unique(GArrowArray *array,
  * Since: 0.8.0
  */
 GArrowDictionaryArray *
-garrow_array_dictionary_encode(GArrowArray *array,
-                               GError **error)
+garrow_array_dictionary_encode(GArrowArray *array, GError **error)
 {
   auto arrow_array = garrow_array_get_raw(array);
-  auto arrow_dictionary_encoded_datum =
-    arrow::compute::DictionaryEncode(arrow_array);
-  if (garrow::check(error,
-                    arrow_dictionary_encoded_datum,
-                    [&]() {
-                      std::stringstream message;
-                      message << "[array][dictionary-encode] <";
-                      message << arrow_array->type()->ToString();
-                      message << ">";
-                      return message.str();
-                    })) {
-    auto arrow_dictionary_encoded_array =
-      (*arrow_dictionary_encoded_datum).make_array();
-    auto dictionary_encoded_array =
-      garrow_array_new_raw(&arrow_dictionary_encoded_array);
+  auto arrow_dictionary_encoded_datum = arrow::compute::DictionaryEncode(arrow_array);
+  if (garrow::check(error, arrow_dictionary_encoded_datum, [&]() {
+        std::stringstream message;
+        message << "[array][dictionary-encode] <";
+        message << arrow_array->type()->ToString();
+        message << ">";
+        return message.str();
+      })) {
+    auto arrow_dictionary_encoded_array = (*arrow_dictionary_encoded_datum).make_array();
+    auto dictionary_encoded_array = garrow_array_new_raw(&arrow_dictionary_encoded_array);
     return GARROW_DICTIONARY_ARRAY(dictionary_encoded_array);
   } else {
     return NULL;
@@ -4813,17 +4600,14 @@ garrow_array_dictionary_encode(GArrowArray *array,
  * Since: 0.13.0
  */
 gint64
-garrow_array_count(GArrowArray *array,
-                   GArrowCountOptions *options,
-                   GError **error)
+garrow_array_count(GArrowArray *array, GArrowCountOptions *options, GError **error)
 {
   auto arrow_array = garrow_array_get_raw(array);
   auto arrow_array_raw = arrow_array.get();
   arrow::Result<arrow::Datum> arrow_counted_datum;
   if (options) {
     auto arrow_options = garrow_count_options_get_raw(options);
-    arrow_counted_datum =
-      arrow::compute::Count(*arrow_array_raw, *arrow_options);
+    arrow_counted_datum = arrow::compute::Count(*arrow_array_raw, *arrow_options);
   } else {
     arrow_counted_datum = arrow::compute::Count(*arrow_array_raw);
   }
@@ -4849,8 +4633,7 @@ garrow_array_count(GArrowArray *array,
  * Since: 0.13.0
  */
 GArrowStructArray *
-garrow_array_count_values(GArrowArray *array,
-                          GError **error)
+garrow_array_count_values(GArrowArray *array, GError **error)
 {
   auto arrow_array = garrow_array_get_raw(array);
   auto arrow_counted_values = arrow::compute::ValueCounts(arrow_array);
@@ -4862,7 +4645,6 @@ garrow_array_count_values(GArrowArray *array,
   }
 }
 
-
 /**
  * garrow_boolean_array_invert:
  * @array: A #GArrowBooleanArray.
@@ -4875,8 +4657,7 @@ garrow_array_count_values(GArrowArray *array,
  * Since: 0.13.0
  */
 GArrowBooleanArray *
-garrow_boolean_array_invert(GArrowBooleanArray *array,
-                            GError **error)
+garrow_boolean_array_invert(GArrowBooleanArray *array, GError **error)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_inverted_datum = arrow::compute::Invert(arrow_array);
@@ -4972,7 +4753,6 @@ garrow_boolean_array_xor(GArrowBooleanArray *left,
   }
 }
 
-
 /**
  * garrow_numeric_array_mean:
  * @array: A #GArrowNumericArray.
@@ -4983,8 +4763,7 @@ garrow_boolean_array_xor(GArrowBooleanArray *left,
  * Since: 0.13.0
  */
 gdouble
-garrow_numeric_array_mean(GArrowNumericArray *array,
-                          GError **error)
+garrow_numeric_array_mean(GArrowNumericArray *array, GError **error)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
   auto arrow_mean_datum = arrow::compute::Mean(arrow_array);
@@ -5002,7 +4781,6 @@ garrow_numeric_array_mean(GArrowNumericArray *array,
   }
 }
 
-
 /**
  * garrow_int8_array_sum:
  * @array: A #GArrowInt8Array.
@@ -5014,13 +4792,9 @@ garrow_numeric_array_mean(GArrowNumericArray *array,
  * Since: 0.13.0
  */
 gint64
-garrow_int8_array_sum(GArrowInt8Array *array,
-                      GError **error)
+garrow_int8_array_sum(GArrowInt8Array *array, GError **error)
 {
-  return garrow_numeric_array_sum<arrow::Int64Type>(array,
-                                                    error,
-                                                    "[int8-array][sum]",
-                                                    0);
+  return garrow_numeric_array_sum<arrow::Int64Type>(array, error, "[int8-array][sum]", 0);
 }
 
 /**
@@ -5034,8 +4808,7 @@ garrow_int8_array_sum(GArrowInt8Array *array,
  * Since: 0.13.0
  */
 guint64
-garrow_uint8_array_sum(GArrowUInt8Array *array,
-                       GError **error)
+garrow_uint8_array_sum(GArrowUInt8Array *array, GError **error)
 {
   return garrow_numeric_array_sum<arrow::UInt64Type>(array,
                                                      error,
@@ -5054,8 +4827,7 @@ garrow_uint8_array_sum(GArrowUInt8Array *array,
  * Since: 0.13.0
  */
 gint64
-garrow_int16_array_sum(GArrowInt16Array *array,
-                       GError **error)
+garrow_int16_array_sum(GArrowInt16Array *array, GError **error)
 {
   return garrow_numeric_array_sum<arrow::Int64Type>(array,
                                                     error,
@@ -5074,8 +4846,7 @@ garrow_int16_array_sum(GArrowInt16Array *array,
  * Since: 0.13.0
  */
 guint64
-garrow_uint16_array_sum(GArrowUInt16Array *array,
-                        GError **error)
+garrow_uint16_array_sum(GArrowUInt16Array *array, GError **error)
 {
   return garrow_numeric_array_sum<arrow::UInt64Type>(array,
                                                      error,
@@ -5094,8 +4865,7 @@ garrow_uint16_array_sum(GArrowUInt16Array *array,
  * Since: 0.13.0
  */
 gint64
-garrow_int32_array_sum(GArrowInt32Array *array,
-                       GError **error)
+garrow_int32_array_sum(GArrowInt32Array *array, GError **error)
 {
   return garrow_numeric_array_sum<arrow::Int64Type>(array,
                                                     error,
@@ -5114,13 +4884,12 @@ garrow_int32_array_sum(GArrowInt32Array *array,
  * Since: 0.13.0
  */
 guint64
-garrow_uint32_array_sum(GArrowUInt32Array *array,
-                        GError **error)
+garrow_uint32_array_sum(GArrowUInt32Array *array, GError **error)
 {
   return garrow_numeric_array_sum<arrow::UInt64Type>(array,
-                                                    error,
-                                                    "[uint32-array][sum]",
-                                                    0);
+                                                     error,
+                                                     "[uint32-array][sum]",
+                                                     0);
 }
 
 /**
@@ -5134,8 +4903,7 @@ garrow_uint32_array_sum(GArrowUInt32Array *array,
  * Since: 0.13.0
  */
 gint64
-garrow_int64_array_sum(GArrowInt64Array *array,
-                       GError **error)
+garrow_int64_array_sum(GArrowInt64Array *array, GError **error)
 {
   return garrow_numeric_array_sum<arrow::Int64Type>(array,
                                                     error,
@@ -5154,13 +4922,12 @@ garrow_int64_array_sum(GArrowInt64Array *array,
  * Since: 0.13.0
  */
 guint64
-garrow_uint64_array_sum(GArrowUInt64Array *array,
-                        GError **error)
+garrow_uint64_array_sum(GArrowUInt64Array *array, GError **error)
 {
   return garrow_numeric_array_sum<arrow::UInt64Type>(array,
-                                                    error,
-                                                    "[uint64-array][sum]",
-                                                    0);
+                                                     error,
+                                                     "[uint64-array][sum]",
+                                                     0);
 }
 
 /**
@@ -5174,8 +4941,7 @@ garrow_uint64_array_sum(GArrowUInt64Array *array,
  * Since: 0.13.0
  */
 gdouble
-garrow_float_array_sum(GArrowFloatArray *array,
-                       GError **error)
+garrow_float_array_sum(GArrowFloatArray *array, GError **error)
 {
   return garrow_numeric_array_sum<arrow::DoubleType>(array,
                                                      error,
@@ -5194,8 +4960,7 @@ garrow_float_array_sum(GArrowFloatArray *array,
  * Since: 0.13.0
  */
 gdouble
-garrow_double_array_sum(GArrowDoubleArray *array,
-                        GError **error)
+garrow_double_array_sum(GArrowDoubleArray *array, GError **error)
 {
   return garrow_numeric_array_sum<arrow::DoubleType>(array,
                                                      error,
@@ -5451,12 +5216,10 @@ garrow_array_filter(GArrowArray *array,
   arrow::Result<arrow::Datum> arrow_filtered_datum;
   if (options) {
     auto arrow_options = garrow_filter_options_get_raw(options);
-    arrow_filtered_datum = arrow::compute::Filter(arrow_array,
-                                                  arrow_filter,
-                                                  *arrow_options);
+    arrow_filtered_datum =
+      arrow::compute::Filter(arrow_array, arrow_filter, *arrow_options);
   } else {
-    arrow_filtered_datum = arrow::compute::Filter(arrow_array,
-                                                  arrow_filter);
+    arrow_filtered_datum = arrow::compute::Filter(arrow_array, arrow_filter);
   }
   if (garrow::check(error, arrow_filtered_datum, "[array][filter]")) {
     auto arrow_filtered_array = (*arrow_filtered_datum).make_array();
@@ -5479,9 +5242,7 @@ garrow_array_filter(GArrowArray *array,
  * Since: 0.15.0
  */
 GArrowBooleanArray *
-garrow_array_is_in(GArrowArray *left,
-                   GArrowArray *right,
-                   GError **error)
+garrow_array_is_in(GArrowArray *left, GArrowArray *right, GError **error)
 {
   auto arrow_left = garrow_array_get_raw(left);
   auto arrow_right = garrow_array_get_raw(right);
@@ -5514,9 +5275,7 @@ garrow_array_is_in_chunked_array(GArrowArray *left,
   auto arrow_left = garrow_array_get_raw(left);
   auto arrow_right = garrow_chunked_array_get_raw(right);
   auto arrow_is_in_datum = arrow::compute::IsIn(arrow_left, arrow_right);
-  if (garrow::check(error,
-                    arrow_is_in_datum,
-                    "[array][is-in][chunked-array]")) {
+  if (garrow::check(error, arrow_is_in_datum, "[array][is-in][chunked-array]")) {
     auto arrow_is_in_array = (*arrow_is_in_datum).make_array();
     return GARROW_BOOLEAN_ARRAY(garrow_array_new_raw(&arrow_is_in_array));
   } else {
@@ -5536,15 +5295,12 @@ garrow_array_is_in_chunked_array(GArrowArray *left,
  * Since: 3.0.0
  */
 GArrowUInt64Array *
-garrow_array_sort_indices(GArrowArray *array,
-                          GArrowSortOrder order,
-                          GError **error)
+garrow_array_sort_indices(GArrowArray *array, GArrowSortOrder order, GError **error)
 {
   auto arrow_array = garrow_array_get_raw(array);
   auto arrow_array_raw = arrow_array.get();
   auto arrow_order = static_cast<arrow::compute::SortOrder>(order);
-  auto arrow_indices_array =
-    arrow::compute::SortIndices(*arrow_array_raw, arrow_order);
+  auto arrow_indices_array = arrow::compute::SortIndices(*arrow_array_raw, arrow_order);
   if (garrow::check(error, arrow_indices_array, "[array][sort-indices]")) {
     return GARROW_UINT64_ARRAY(garrow_array_new_raw(&(*arrow_indices_array)));
   } else {
@@ -5565,8 +5321,7 @@ garrow_array_sort_indices(GArrowArray *array,
  * Deprecated: 3.0.0: Use garrow_array_sort_indices() instead.
  */
 GArrowUInt64Array *
-garrow_array_sort_to_indices(GArrowArray *array,
-                             GError **error)
+garrow_array_sort_to_indices(GArrowArray *array, GError **error)
 {
   return garrow_array_sort_indices(array, GARROW_SORT_ORDER_ASCENDING, error);
 }
@@ -5592,9 +5347,7 @@ garrow_chunked_array_sort_indices(GArrowChunkedArray *chunked_array,
   auto arrow_order = static_cast<arrow::compute::SortOrder>(order);
   auto arrow_indices_array =
     arrow::compute::SortIndices(*arrow_chunked_array_raw, arrow_order);
-  if (garrow::check(error,
-                    arrow_indices_array,
-                    "[chunked-array][sort-indices]")) {
+  if (garrow::check(error, arrow_indices_array, "[chunked-array][sort-indices]")) {
     return GARROW_UINT64_ARRAY(garrow_array_new_raw(&(*arrow_indices_array)));
   } else {
     return NULL;
@@ -5621,11 +5374,8 @@ garrow_record_batch_sort_indices(GArrowRecordBatch *record_batch,
   auto arrow_record_batch_raw = arrow_record_batch.get();
   auto arrow_options = garrow_sort_options_get_raw(options);
   auto arrow_indices_array =
-    arrow::compute::SortIndices(::arrow::Datum(*arrow_record_batch_raw),
-                                *arrow_options);
-  if (garrow::check(error,
-                    arrow_indices_array,
-                    "[record-batch][sort-indices]")) {
+    arrow::compute::SortIndices(::arrow::Datum(*arrow_record_batch_raw), *arrow_options);
+  if (garrow::check(error, arrow_indices_array, "[record-batch][sort-indices]")) {
     return GARROW_UINT64_ARRAY(garrow_array_new_raw(&(*arrow_indices_array)));
   } else {
     return NULL;
@@ -5644,19 +5394,14 @@ garrow_record_batch_sort_indices(GArrowRecordBatch *record_batch,
  * Since: 3.0.0
  */
 GArrowUInt64Array *
-garrow_table_sort_indices(GArrowTable *table,
-                          GArrowSortOptions *options,
-                          GError **error)
+garrow_table_sort_indices(GArrowTable *table, GArrowSortOptions *options, GError **error)
 {
   auto arrow_table = garrow_table_get_raw(table);
   auto arrow_table_raw = arrow_table.get();
   auto arrow_options = garrow_sort_options_get_raw(options);
   auto arrow_indices_array =
-    arrow::compute::SortIndices(::arrow::Datum(*arrow_table_raw),
-                                *arrow_options);
-  if (garrow::check(error,
-                    arrow_indices_array,
-                    "[table][sort-indices]")) {
+    arrow::compute::SortIndices(::arrow::Datum(*arrow_table_raw), *arrow_options);
+  if (garrow::check(error, arrow_indices_array, "[table][sort-indices]")) {
     return GARROW_UINT64_ARRAY(garrow_array_new_raw(&(*arrow_indices_array)));
   } else {
     return NULL;
@@ -5687,12 +5432,10 @@ garrow_table_filter(GArrowTable *table,
   arrow::Result<arrow::Datum> arrow_filtered_datum;
   if (options) {
     auto arrow_options = garrow_filter_options_get_raw(options);
-    arrow_filtered_datum = arrow::compute::Filter(arrow_table,
-                                                  arrow_filter,
-                                                  *arrow_options);
+    arrow_filtered_datum =
+      arrow::compute::Filter(arrow_table, arrow_filter, *arrow_options);
   } else {
-    arrow_filtered_datum = arrow::compute::Filter(arrow_table,
-                                                  arrow_filter);
+    arrow_filtered_datum = arrow::compute::Filter(arrow_table, arrow_filter);
   }
   if (garrow::check(error, arrow_filtered_datum, "[table][filter]")) {
     auto arrow_filtered_table = (*arrow_filtered_datum).table();
@@ -5726,16 +5469,12 @@ garrow_table_filter_chunked_array(GArrowTable *table,
   arrow::Result<arrow::Datum> arrow_filtered_datum;
   if (options) {
     auto arrow_options = garrow_filter_options_get_raw(options);
-    arrow_filtered_datum = arrow::compute::Filter(arrow_table,
-                                                  arrow_filter,
-                                                  *arrow_options);
+    arrow_filtered_datum =
+      arrow::compute::Filter(arrow_table, arrow_filter, *arrow_options);
   } else {
-    arrow_filtered_datum = arrow::compute::Filter(arrow_table,
-                                                  arrow_filter);
+    arrow_filtered_datum = arrow::compute::Filter(arrow_table, arrow_filter);
   }
-  if (garrow::check(error,
-                    arrow_filtered_datum,
-                    "[table][filter][chunked-array]")) {
+  if (garrow::check(error, arrow_filtered_datum, "[table][filter][chunked-array]")) {
     auto arrow_filtered_table = (*arrow_filtered_datum).table();
     return garrow_table_new_raw(&arrow_filtered_table);
   } else {
@@ -5767,12 +5506,10 @@ garrow_chunked_array_filter(GArrowChunkedArray *chunked_array,
   arrow::Result<arrow::Datum> arrow_filtered_datum;
   if (options) {
     auto arrow_options = garrow_filter_options_get_raw(options);
-    arrow_filtered_datum = arrow::compute::Filter(arrow_chunked_array,
-                                                  arrow_filter,
-                                                  *arrow_options);
+    arrow_filtered_datum =
+      arrow::compute::Filter(arrow_chunked_array, arrow_filter, *arrow_options);
   } else {
-    arrow_filtered_datum = arrow::compute::Filter(arrow_chunked_array,
-                                                  arrow_filter);
+    arrow_filtered_datum = arrow::compute::Filter(arrow_chunked_array, arrow_filter);
   }
   if (garrow::check(error, arrow_filtered_datum, "[chunked-array][filter]")) {
     auto arrow_filtered_chunked_array = (*arrow_filtered_datum).chunked_array();
@@ -5806,12 +5543,10 @@ garrow_chunked_array_filter_chunked_array(GArrowChunkedArray *chunked_array,
   arrow::Result<arrow::Datum> arrow_filtered_datum;
   if (options) {
     auto arrow_options = garrow_filter_options_get_raw(options);
-    arrow_filtered_datum = arrow::compute::Filter(arrow_chunked_array,
-                                                  arrow_filter,
-                                                  *arrow_options);
+    arrow_filtered_datum =
+      arrow::compute::Filter(arrow_chunked_array, arrow_filter, *arrow_options);
   } else {
-    arrow_filtered_datum = arrow::compute::Filter(arrow_chunked_array,
-                                                  arrow_filter);
+    arrow_filtered_datum = arrow::compute::Filter(arrow_chunked_array, arrow_filter);
   }
   if (garrow::check(error,
                     arrow_filtered_datum,
@@ -5847,12 +5582,10 @@ garrow_record_batch_filter(GArrowRecordBatch *record_batch,
   arrow::Result<arrow::Datum> arrow_filtered_datum;
   if (options) {
     auto arrow_options = garrow_filter_options_get_raw(options);
-    arrow_filtered_datum = arrow::compute::Filter(arrow_record_batch,
-                                                  arrow_filter,
-                                                  *arrow_options);
+    arrow_filtered_datum =
+      arrow::compute::Filter(arrow_record_batch, arrow_filter, *arrow_options);
   } else {
-    arrow_filtered_datum = arrow::compute::Filter(arrow_record_batch,
-                                                  arrow_filter);
+    arrow_filtered_datum = arrow::compute::Filter(arrow_record_batch, arrow_filter);
   }
   if (garrow::check(error, arrow_filtered_datum, "[record-batch][filter]")) {
     auto arrow_filtered_record_batch = (*arrow_filtered_datum).record_batch();
@@ -5862,7 +5595,8 @@ garrow_record_batch_filter(GArrowRecordBatch *record_batch,
   }
 }
 
-struct GArrowRunEndEncodeOptionsPrivate {
+struct GArrowRunEndEncodeOptionsPrivate
+{
   GArrowDataType *run_end_data_type;
 };
 
@@ -5874,9 +5608,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowRunEndEncodeOptions,
                            garrow_run_end_encode_options,
                            GARROW_TYPE_FUNCTION_OPTIONS)
 
-#define GARROW_RUN_END_ENCODE_OPTIONS_GET_PRIVATE(object)          \
-  static_cast<GArrowRunEndEncodeOptionsPrivate *>(                 \
-    garrow_run_end_encode_options_get_instance_private(            \
+#define GARROW_RUN_END_ENCODE_OPTIONS_GET_PRIVATE(object)                                \
+  static_cast<GArrowRunEndEncodeOptionsPrivate *>(                                       \
+    garrow_run_end_encode_options_get_instance_private(                                  \
       GARROW_RUN_END_ENCODE_OPTIONS(object)))
 
 static void
@@ -5959,7 +5693,6 @@ garrow_run_end_encode_options_class_init(GArrowRunEndEncodeOptionsClass *klass)
   gobject_class->set_property = garrow_run_end_encode_options_set_property;
   gobject_class->get_property = garrow_run_end_encode_options_get_property;
 
-
   GParamSpec *spec;
   /**
    * GArrowRunEndEncodeOptions:run-end-data-type:
@@ -5996,7 +5729,8 @@ garrow_run_end_encode_options_new(GArrowDataType *run_end_data_type)
     need_run_end_data_type_unref = true;
   }
   auto options = g_object_new(GARROW_TYPE_RUN_END_ENCODE_OPTIONS,
-                              "run-end-data-type", run_end_data_type,
+                              "run-end-data-type",
+                              run_end_data_type,
                               NULL);
   if (need_run_end_data_type_unref) {
     g_object_unref(run_end_data_type);
@@ -6028,22 +5762,17 @@ garrow_array_run_end_encode(GArrowArray *array,
     arrow_run_end_encoded_datum_result =
       arrow::compute::RunEndEncode(arrow_array, *arrow_options);
   } else {
-    arrow_run_end_encoded_datum_result =
-      arrow::compute::RunEndEncode(arrow_array);
-  }
-  if (garrow::check(error,
-                    arrow_run_end_encoded_datum_result,
-                    [&]() {
-                      std::stringstream message;
-                      message << "[array][run-end-encode] <";
-                      message << arrow_array->type()->ToString();
-                      message << ">";
-                      return message.str();
-                    })) {
-    auto arrow_run_end_encoded_array =
-      (*arrow_run_end_encoded_datum_result).make_array();
-    auto run_end_encoded_array =
-      garrow_array_new_raw(&arrow_run_end_encoded_array);
+    arrow_run_end_encoded_datum_result = arrow::compute::RunEndEncode(arrow_array);
+  }
+  if (garrow::check(error, arrow_run_end_encoded_datum_result, [&]() {
+        std::stringstream message;
+        message << "[array][run-end-encode] <";
+        message << arrow_array->type()->ToString();
+        message << ">";
+        return message.str();
+      })) {
+    auto arrow_run_end_encoded_array = (*arrow_run_end_encoded_datum_result).make_array();
+    auto run_end_encoded_array = garrow_array_new_raw(&arrow_run_end_encoded_array);
     return GARROW_RUN_END_ENCODED_ARRAY(run_end_encoded_array);
   } else {
     return nullptr;
@@ -6062,21 +5791,17 @@ garrow_array_run_end_encode(GArrowArray *array,
  * Since: 13.0.0
  */
 GArrowArray *
-garrow_run_end_encoded_array_decode(GArrowRunEndEncodedArray *array,
-                                    GError **error)
+garrow_run_end_encoded_array_decode(GArrowRunEndEncodedArray *array, GError **error)
 {
   auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
-  auto arrow_decoded_datum_result =
-    arrow::compute::RunEndDecode(arrow_array);
-  if (garrow::check(error,
-                    arrow_decoded_datum_result,
-                    [&]() {
-                      std::stringstream message;
-                      message << "[run-end-encoded-array][decode] <";
-                      message << arrow_array->type()->ToString();
-                      message << ">";
-                      return message.str();
-                    })) {
+  auto arrow_decoded_datum_result = arrow::compute::RunEndDecode(arrow_array);
+  if (garrow::check(error, arrow_decoded_datum_result, [&]() {
+        std::stringstream message;
+        message << "[run-end-encoded-array][decode] <";
+        message << arrow_array->type()->ToString();
+        message << ">";
+        return message.str();
+      })) {
     auto arrow_decoded_array = (*arrow_decoded_datum_result).make_array();
     return garrow_array_new_raw(&arrow_decoded_array);
   } else {
@@ -6100,16 +5825,15 @@ garrow_strptime_options_set_property(GObject *object,
                                      const GValue *value,
                                      GParamSpec *pspec)
 {
-  auto options =
-    garrow_strptime_options_get_raw(GARROW_STRPTIME_OPTIONS(object));
+  auto options = garrow_strptime_options_get_raw(GARROW_STRPTIME_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_STRPTIME_OPTIONS_FORMAT:
     options->format = g_value_get_string(value);
     break;
   case PROP_STRPTIME_OPTIONS_UNIT:
-    options->unit = garrow_time_unit_to_raw(
-      static_cast<GArrowTimeUnit>(g_value_get_enum(value)));
+    options->unit =
+      garrow_time_unit_to_raw(static_cast<GArrowTimeUnit>(g_value_get_enum(value)));
     break;
   case PROP_STRPTIME_OPTIONS_ERROR_IS_NULL:
     options->error_is_null = g_value_get_boolean(value);
@@ -6126,8 +5850,7 @@ garrow_strptime_options_get_property(GObject *object,
                                      GValue *value,
                                      GParamSpec *pspec)
 {
-  auto options =
-    garrow_strptime_options_get_raw(GARROW_STRPTIME_OPTIONS(object));
+  auto options = garrow_strptime_options_get_raw(GARROW_STRPTIME_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_STRPTIME_OPTIONS_FORMAT:
@@ -6149,8 +5872,8 @@ static void
 garrow_strptime_options_init(GArrowStrptimeOptions *object)
 {
   auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
-  priv->options = static_cast<arrow::compute::FunctionOptions *>(
-    new arrow::compute::StrptimeOptions());
+  priv->options =
+    static_cast<arrow::compute::FunctionOptions *>(new arrow::compute::StrptimeOptions());
 }
 
 static void
@@ -6176,9 +5899,7 @@ garrow_strptime_options_class_init(GArrowStrptimeOptionsClass *klass)
                              "The desired format string",
                              options.format.c_str(),
                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_STRPTIME_OPTIONS_FORMAT,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_STRPTIME_OPTIONS_FORMAT, spec);
 
   /**
    * GArrowStrptimeOptions:unit:
@@ -6193,9 +5914,7 @@ garrow_strptime_options_class_init(GArrowStrptimeOptionsClass *klass)
                            GARROW_TYPE_TIME_UNIT,
                            garrow_time_unit_from_raw(options.unit),
                            static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_STRPTIME_OPTIONS_UNIT,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_STRPTIME_OPTIONS_UNIT, spec);
 
   /**
    * GArrowStrptimeOptions:error-is-null:
@@ -6243,8 +5962,7 @@ garrow_strftime_options_set_property(GObject *object,
                                      const GValue *value,
                                      GParamSpec *pspec)
 {
-  auto options =
-    garrow_strftime_options_get_raw(GARROW_STRFTIME_OPTIONS(object));
+  auto options = garrow_strftime_options_get_raw(GARROW_STRFTIME_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_STRFTIME_OPTIONS_FORMAT:
@@ -6265,8 +5983,7 @@ garrow_strftime_options_get_property(GObject *object,
                                      GValue *value,
                                      GParamSpec *pspec)
 {
-  auto options =
-    garrow_strftime_options_get_raw(GARROW_STRFTIME_OPTIONS(object));
+  auto options = garrow_strftime_options_get_raw(GARROW_STRFTIME_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_STRFTIME_OPTIONS_FORMAT:
@@ -6285,8 +6002,8 @@ static void
 garrow_strftime_options_init(GArrowStrftimeOptions *object)
 {
   auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
-  priv->options = static_cast<arrow::compute::FunctionOptions *>(
-    new arrow::compute::StrftimeOptions());
+  priv->options =
+    static_cast<arrow::compute::FunctionOptions *>(new arrow::compute::StrftimeOptions());
 }
 
 static void
@@ -6312,9 +6029,7 @@ garrow_strftime_options_class_init(GArrowStrftimeOptionsClass *klass)
                              "The desired format string",
                              options.format.c_str(),
                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_STRFTIME_OPTIONS_FORMAT,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_STRFTIME_OPTIONS_FORMAT, spec);
 
   /**
    * GArrowStrftimeOptions:locale:
@@ -6328,9 +6043,7 @@ garrow_strftime_options_class_init(GArrowStrftimeOptionsClass *klass)
                              "The desired output locale string",
                              options.locale.c_str(),
                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_STRFTIME_OPTIONS_LOCALE,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_STRFTIME_OPTIONS_LOCALE, spec);
 }
 
 /**
@@ -6363,8 +6076,8 @@ garrow_split_pattern_options_set_property(GObject *object,
                                           const GValue *value,
                                           GParamSpec *pspec)
 {
-  auto options = garrow_split_pattern_options_get_raw(
-    GARROW_SPLIT_PATTERN_OPTIONS(object));
+  auto options =
+    garrow_split_pattern_options_get_raw(GARROW_SPLIT_PATTERN_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_SPLIT_PATTERN_OPTIONS_PATTERN:
@@ -6388,8 +6101,8 @@ garrow_split_pattern_options_get_property(GObject *object,
                                           GValue *value,
                                           GParamSpec *pspec)
 {
-  auto options = garrow_split_pattern_options_get_raw(
-    GARROW_SPLIT_PATTERN_OPTIONS(object));
+  auto options =
+    garrow_split_pattern_options_get_raw(GARROW_SPLIT_PATTERN_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_SPLIT_PATTERN_OPTIONS_PATTERN:
@@ -6468,11 +6181,12 @@ garrow_split_pattern_options_class_init(GArrowSplitPatternOptionsClass *klass)
    *
    * Since: 16.0.0
    */
-  spec = g_param_spec_boolean("reverse",
-                              "Reverse",
-                              "Start splitting from the end of the string (only relevant when max_splits != -1)",
-                              options.reverse,
-                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  spec = g_param_spec_boolean(
+    "reverse",
+    "Reverse",
+    "Start splitting from the end of the string (only relevant when max_splits != -1)",
+    options.reverse,
+    static_cast<GParamFlags>(G_PARAM_READWRITE));
   g_object_class_install_property(gobject_class,
                                   PROP_SPLIT_PATTERN_OPTIONS_REVERSE,
                                   spec);
@@ -6512,8 +6226,8 @@ garrow_struct_field_options_set_field_ref(GArrowStructFieldOptions *options,
                                           const gchar *field_ref,
                                           GError **error)
 {
-  auto arrow_options = garrow_struct_field_options_get_raw(
-    GARROW_STRUCT_FIELD_OPTIONS(options));
+  auto arrow_options =
+    garrow_struct_field_options_get_raw(GARROW_STRUCT_FIELD_OPTIONS(options));
 
   auto arrow_reference_result = garrow_field_reference_resolve_raw(field_ref);
   if (!garrow::check(error,
@@ -6526,12 +6240,11 @@ garrow_struct_field_options_set_field_ref(GArrowStructFieldOptions *options,
 
 static void
 garrow_struct_field_options_get_property(GObject *object,
-                                          guint prop_id,
-                                          GValue *value,
-                                          GParamSpec *pspec)
+                                         guint prop_id,
+                                         GValue *value,
+                                         GParamSpec *pspec)
 {
-  auto options = garrow_struct_field_options_get_raw(
-    GARROW_STRUCT_FIELD_OPTIONS(object));
+  auto options = garrow_struct_field_options_get_raw(GARROW_STRUCT_FIELD_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_STRUCT_FIELD_OPTIONS_FIELD_REF:
@@ -6577,11 +6290,12 @@ garrow_struct_field_options_class_init(GArrowStructFieldOptionsClass *klass)
    *
    * Since: 16.0.0
    */
-  spec = g_param_spec_string("field_ref",
-                             "Field ref",
-                             "The name or dot path specifying what to extract from struct or union.",
-                             "",
-                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  spec = g_param_spec_string(
+    "field_ref",
+    "Field ref",
+    "The name or dot path specifying what to extract from struct or union.",
+    "",
+    static_cast<GParamFlags>(G_PARAM_READABLE));
   g_object_class_install_property(gobject_class,
                                   PROP_STRUCT_FIELD_OPTIONS_FIELD_REF,
                                   spec);
@@ -6603,7 +6317,6 @@ garrow_struct_field_options_new(void)
 
 G_END_DECLS
 
-
 arrow::Result<arrow::FieldRef>
 garrow_field_reference_resolve_raw(const gchar *reference)
 {
@@ -6615,7 +6328,6 @@ garrow_field_reference_resolve_raw(const gchar *reference)
   }
 }
 
-
 arrow::compute::ExecContext *
 garrow_execute_context_get_raw(GArrowExecuteContext *context)
 {
@@ -6623,10 +6335,8 @@ garrow_execute_context_get_raw(GArrowExecuteContext *context)
   return &priv->context;
 }
 
-
 GArrowFunctionOptions *
-garrow_function_options_new_raw(
-  const arrow::compute::FunctionOptions *arrow_options)
+garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_options)
 {
   std::string_view arrow_type_name(arrow_options->type_name());
   if (arrow_type_name == "CastOptions") {
@@ -6689,14 +6399,12 @@ garrow_function_options_new_raw(
   } else if (arrow_type_name == "MatchSubstringOptions") {
     const auto arrow_match_substring_options =
       static_cast<const arrow::compute::MatchSubstringOptions *>(arrow_options);
-    auto options =
-      garrow_match_substring_options_new_raw(arrow_match_substring_options);
+    auto options = garrow_match_substring_options_new_raw(arrow_match_substring_options);
     return GARROW_FUNCTION_OPTIONS(options);
   } else if (arrow_type_name == "UTF8NormalizedOptions") {
     const auto arrow_utf8_normalize_options =
       static_cast<const arrow::compute::Utf8NormalizeOptions *>(arrow_options);
-    auto options = garrow_utf8_normalize_options_new_raw(
-      arrow_utf8_normalize_options);
+    auto options = garrow_utf8_normalize_options_new_raw(arrow_utf8_normalize_options);
     return GARROW_FUNCTION_OPTIONS(options);
   } else if (arrow_type_name == "QuantileOptions") {
     const auto arrow_quantile_options =
@@ -6716,8 +6424,7 @@ garrow_function_options_new_raw(
   } else if (arrow_type_name == "RunEndEncodeOptions") {
     const auto arrow_run_end_encode_options =
       static_cast<const arrow::compute::RunEndEncodeOptions *>(arrow_options);
-    auto options =
-      garrow_run_end_encode_options_new_raw(arrow_run_end_encode_options);
+    auto options = garrow_run_end_encode_options_new_raw(arrow_run_end_encode_options);
     return GARROW_FUNCTION_OPTIONS(options);
   } else if (arrow_type_name == "StrptimeOptions") {
     const auto arrow_strptime_options =
@@ -6740,8 +6447,7 @@ garrow_function_options_new_raw(
     auto options = garrow_struct_field_options_new_raw(arrow_struct_field_options);
     return GARROW_FUNCTION_OPTIONS(options);
   } else {
-    auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS,
-                                NULL);
+    auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL);
     return GARROW_FUNCTION_OPTIONS(options);
   }
 }
@@ -6753,13 +6459,11 @@ garrow_function_options_get_raw(GArrowFunctionOptions *options)
   return priv->options;
 }
 
-
 GArrowFunctionDoc *
 garrow_function_doc_new_raw(const arrow::compute::FunctionDoc *arrow_doc)
 {
-  return GARROW_FUNCTION_DOC(g_object_new(GARROW_TYPE_FUNCTION_DOC,
-                                          "doc", arrow_doc,
-                                          NULL));
+  return GARROW_FUNCTION_DOC(
+    g_object_new(GARROW_TYPE_FUNCTION_DOC, "doc", arrow_doc, NULL));
 }
 
 arrow::compute::FunctionDoc *
@@ -6769,14 +6473,11 @@ garrow_function_doc_get_raw(GArrowFunctionDoc *doc)
   return priv->doc;
 }
 
-
 GArrowFunction *
-garrow_function_new_raw(
-  std::shared_ptr<arrow::compute::Function> *arrow_function)
+garrow_function_new_raw(std::shared_ptr<arrow::compute::Function> *arrow_function)
 {
-  return GARROW_FUNCTION(g_object_new(GARROW_TYPE_FUNCTION,
-                                      "function", arrow_function,
-                                      NULL));
+  return GARROW_FUNCTION(
+    g_object_new(GARROW_TYPE_FUNCTION, "function", arrow_function, NULL));
 }
 
 std::shared_ptr<arrow::compute::Function>
@@ -6786,15 +6487,11 @@ garrow_function_get_raw(GArrowFunction *function)
   return priv->function;
 }
 
-
 GArrowExecuteNodeOptions *
-garrow_execute_node_options_new_raw(
-  arrow::acero::ExecNodeOptions *arrow_options)
+garrow_execute_node_options_new_raw(arrow::acero::ExecNodeOptions *arrow_options)
 {
   return GARROW_EXECUTE_NODE_OPTIONS(
-    g_object_new(GARROW_TYPE_EXECUTE_NODE_OPTIONS,
-                 "options", arrow_options,
-                 NULL));
+    g_object_new(GARROW_TYPE_EXECUTE_NODE_OPTIONS, "options", arrow_options, NULL));
 }
 
 arrow::acero::ExecNodeOptions *
@@ -6804,15 +6501,12 @@ garrow_execute_node_options_get_raw(GArrowExecuteNodeOptions *options)
   return priv->options;
 }
 
-
 GArrowExecuteNode *
 garrow_execute_node_new_raw(arrow::acero::ExecNode *arrow_node,
                             GArrowExecuteNodeOptions *options)
 {
-  return GARROW_EXECUTE_NODE(g_object_new(GARROW_TYPE_EXECUTE_NODE,
-                                          "node", arrow_node,
-                                          "options", options,
-                                          NULL));
+  return GARROW_EXECUTE_NODE(
+    g_object_new(GARROW_TYPE_EXECUTE_NODE, "node", arrow_node, "options", options, NULL));
 }
 
 arrow::acero::ExecNode *
@@ -6822,7 +6516,6 @@ garrow_execute_node_get_raw(GArrowExecuteNode *node)
   return priv->node;
 }
 
-
 std::shared_ptr<arrow::acero::ExecPlan>
 garrow_execute_plan_get_raw(GArrowExecutePlan *plan)
 {
@@ -6830,7 +6523,6 @@ garrow_execute_plan_get_raw(GArrowExecutePlan *plan)
   return priv->plan;
 }
 
-
 GArrowCastOptions *
 garrow_cast_options_new_raw(const arrow::compute::CastOptions *arrow_options)
 {
@@ -6839,16 +6531,22 @@ garrow_cast_options_new_raw(const arrow::compute::CastOptions *arrow_options)
     auto arrow_to_data_type = arrow_options->to_type.GetSharedPtr();
     to_data_type = garrow_data_type_new_raw(&arrow_to_data_type);
   }
-  auto options =
-    g_object_new(GARROW_TYPE_CAST_OPTIONS,
-                 "to-data-type", to_data_type,
-                 "allow-int-overflow", arrow_options->allow_int_overflow,
-                 "allow-time-truncate", arrow_options->allow_time_truncate,
-                 "allow-time-overflow", arrow_options->allow_time_overflow,
-                 "allow-decimal-truncate", arrow_options->allow_decimal_truncate,
-                 "allow-float-truncate", arrow_options->allow_float_truncate,
-                 "allow-invalid-utf8", arrow_options->allow_invalid_utf8,
-                 NULL);
+  auto options = g_object_new(GARROW_TYPE_CAST_OPTIONS,
+                              "to-data-type",
+                              to_data_type,
+                              "allow-int-overflow",
+                              arrow_options->allow_int_overflow,
+                              "allow-time-truncate",
+                              arrow_options->allow_time_truncate,
+                              "allow-time-overflow",
+                              arrow_options->allow_time_overflow,
+                              "allow-decimal-truncate",
+                              arrow_options->allow_decimal_truncate,
+                              "allow-float-truncate",
+                              arrow_options->allow_float_truncate,
+                              "allow-invalid-utf8",
+                              arrow_options->allow_invalid_utf8,
+                              NULL);
   return GARROW_CAST_OPTIONS(options);
 }
 
@@ -6859,16 +6557,16 @@ garrow_cast_options_get_raw(GArrowCastOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowScalarAggregateOptions *
 garrow_scalar_aggregate_options_new_raw(
   const arrow::compute::ScalarAggregateOptions *arrow_options)
 {
-  auto options =
-    g_object_new(GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS,
-                 "skip-nulls", arrow_options->skip_nulls,
-                 "min-count", arrow_options->min_count,
-                 NULL);
+  auto options = g_object_new(GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS,
+                              "skip-nulls",
+                              arrow_options->skip_nulls,
+                              "min-count",
+                              arrow_options->min_count,
+                              NULL);
   return GARROW_SCALAR_AGGREGATE_OPTIONS(options);
 }
 
@@ -6879,14 +6577,11 @@ garrow_scalar_aggregate_options_get_raw(GArrowScalarAggregateOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowCountOptions *
-garrow_count_options_new_raw(
-  const arrow::compute::CountOptions *arrow_options)
+garrow_count_options_new_raw(const arrow::compute::CountOptions *arrow_options)
 {
-  auto options = g_object_new(GARROW_TYPE_COUNT_OPTIONS,
-                              "mode", arrow_options->mode,
-                              NULL);
+  auto options =
+    g_object_new(GARROW_TYPE_COUNT_OPTIONS, "mode", arrow_options->mode, NULL);
   return GARROW_COUNT_OPTIONS(options);
 }
 
@@ -6897,16 +6592,13 @@ garrow_count_options_get_raw(GArrowCountOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowFilterOptions *
-garrow_filter_options_new_raw(
-  const arrow::compute::FilterOptions *arrow_options)
+garrow_filter_options_new_raw(const arrow::compute::FilterOptions *arrow_options)
 {
-  auto options =
-    g_object_new(GARROW_TYPE_FILTER_OPTIONS,
-                 "null-selection-behavior",
-                 arrow_options->null_selection_behavior,
-                 NULL);
+  auto options = g_object_new(GARROW_TYPE_FILTER_OPTIONS,
+                              "null-selection-behavior",
+                              arrow_options->null_selection_behavior,
+                              NULL);
   return GARROW_FILTER_OPTIONS(options);
 }
 
@@ -6917,13 +6609,10 @@ garrow_filter_options_get_raw(GArrowFilterOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowTakeOptions *
-garrow_take_options_new_raw(
-  const arrow::compute::TakeOptions *arrow_options)
+garrow_take_options_new_raw(const arrow::compute::TakeOptions *arrow_options)
 {
-  auto options =
-    GARROW_TAKE_OPTIONS(g_object_new(GARROW_TYPE_TAKE_OPTIONS, NULL));
+  auto options = GARROW_TAKE_OPTIONS(g_object_new(GARROW_TYPE_TAKE_OPTIONS, NULL));
   /* TODO: Use property when we add support for boundscheck. */
   auto arrow_new_options = garrow_take_options_get_raw(options);
   arrow_new_options->boundscheck = arrow_options->boundscheck;
@@ -6937,14 +6626,11 @@ garrow_take_options_get_raw(GArrowTakeOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowArraySortOptions *
-garrow_array_sort_options_new_raw(
-  const arrow::compute::ArraySortOptions *arrow_options)
+garrow_array_sort_options_new_raw(const arrow::compute::ArraySortOptions *arrow_options)
 {
-  auto options = g_object_new(GARROW_TYPE_ARRAY_SORT_OPTIONS,
-                              "order", arrow_options->order,
-                              NULL);
+  auto options =
+    g_object_new(GARROW_TYPE_ARRAY_SORT_OPTIONS, "order", arrow_options->order, NULL);
   return GARROW_ARRAY_SORT_OPTIONS(options);
 }
 
@@ -6955,7 +6641,6 @@ garrow_array_sort_options_get_raw(GArrowArraySortOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowSortKey *
 garrow_sort_key_new_raw(const arrow::compute::SortKey &arrow_sort_key)
 {
@@ -6972,14 +6657,10 @@ garrow_sort_key_get_raw(GArrowSortKey *sort_key)
   return &(priv->sort_key);
 }
 
-
 GArrowSortOptions *
-garrow_sort_options_new_raw(
-  const arrow::compute::SortOptions *arrow_options)
+garrow_sort_options_new_raw(const arrow::compute::SortOptions *arrow_options)
 {
-  auto options =
-    GARROW_SORT_OPTIONS(g_object_new(GARROW_TYPE_SORT_OPTIONS,
-                                     NULL));
+  auto options = GARROW_SORT_OPTIONS(g_object_new(GARROW_TYPE_SORT_OPTIONS, NULL));
   auto arrow_new_options = garrow_sort_options_get_raw(options);
   arrow_new_options->sort_keys = arrow_options->sort_keys;
   /* TODO: Use property when we add support for null_placement. */
@@ -6995,19 +6676,19 @@ garrow_sort_options_get_raw(GArrowSortOptions *options)
 }
 
 GArrowSetLookupOptions *
-garrow_set_lookup_options_new_raw(
-  const arrow::compute::SetLookupOptions *arrow_options)
+garrow_set_lookup_options_new_raw(const arrow::compute::SetLookupOptions *arrow_options)
 {
   auto arrow_copied_options = arrow_options->Copy();
   auto arrow_copied_set_lookup_options =
-    static_cast<arrow::compute::SetLookupOptions *>(
-      arrow_copied_options.get());
-  auto value_set =
-    garrow_datum_new_raw(&(arrow_copied_set_lookup_options->value_set));
-  auto skip_nulls = (arrow_options->skip_nulls.has_value() && arrow_options->skip_nulls.value());
+    static_cast<arrow::compute::SetLookupOptions *>(arrow_copied_options.get());
+  auto value_set = garrow_datum_new_raw(&(arrow_copied_set_lookup_options->value_set));
+  auto skip_nulls =
+    (arrow_options->skip_nulls.has_value() && arrow_options->skip_nulls.value());
   auto options = g_object_new(GARROW_TYPE_SET_LOOKUP_OPTIONS,
-                              "value-set", value_set,
-                              "skip-nulls", skip_nulls,
+                              "value-set",
+                              value_set,
+                              "skip-nulls",
+                              skip_nulls,
                               NULL);
   return GARROW_SET_LOOKUP_OPTIONS(options);
 }
@@ -7019,15 +6700,16 @@ garrow_set_lookup_options_get_raw(GArrowSetLookupOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowVarianceOptions *
-garrow_variance_options_new_raw(
-  const arrow::compute::VarianceOptions *arrow_options)
+garrow_variance_options_new_raw(const arrow::compute::VarianceOptions *arrow_options)
 {
   auto options = g_object_new(GARROW_TYPE_VARIANCE_OPTIONS,
-                              "ddof", arrow_options->ddof,
-                              "skip-nulls", arrow_options->skip_nulls,
-                              "min-count", arrow_options->min_count,
+                              "ddof",
+                              arrow_options->ddof,
+                              "skip-nulls",
+                              arrow_options->skip_nulls,
+                              "min-count",
+                              arrow_options->min_count,
                               NULL);
   return GARROW_VARIANCE_OPTIONS(options);
 }
@@ -7039,16 +6721,15 @@ garrow_variance_options_get_raw(GArrowVarianceOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowRoundOptions *
-garrow_round_options_new_raw(
-  const arrow::compute::RoundOptions *arrow_options)
-{
-  auto options = g_object_new(
-    GARROW_TYPE_ROUND_OPTIONS,
-    "n-digits", arrow_options->ndigits,
-    "mode", static_cast<GArrowRoundMode>(arrow_options->round_mode),
-    NULL);
+garrow_round_options_new_raw(const arrow::compute::RoundOptions *arrow_options)
+{
+  auto options = g_object_new(GARROW_TYPE_ROUND_OPTIONS,
+                              "n-digits",
+                              arrow_options->ndigits,
+                              "mode",
+                              static_cast<GArrowRoundMode>(arrow_options->round_mode),
+                              NULL);
   return GARROW_ROUND_OPTIONS(options);
 }
 
@@ -7059,22 +6740,21 @@ garrow_round_options_get_raw(GArrowRoundOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowRoundToMultipleOptions *
 garrow_round_to_multiple_options_new_raw(
   const arrow::compute::RoundToMultipleOptions *arrow_options)
 {
   auto arrow_copied_options = arrow_options->Copy();
   auto arrow_copied_round_to_multiple_options =
-    static_cast<arrow::compute::RoundToMultipleOptions *>(
-      arrow_copied_options.get());
+    static_cast<arrow::compute::RoundToMultipleOptions *>(arrow_copied_options.get());
   auto multiple =
     garrow_scalar_new_raw(&(arrow_copied_round_to_multiple_options->multiple));
-  auto options =
-    g_object_new(GARROW_TYPE_ROUND_TO_MULTIPLE_OPTIONS,
-                 "multiple", multiple,
-                 "mode", static_cast<GArrowRoundMode>(arrow_options->round_mode),
-                 NULL);
+  auto options = g_object_new(GARROW_TYPE_ROUND_TO_MULTIPLE_OPTIONS,
+                              "multiple",
+                              multiple,
+                              "mode",
+                              static_cast<GArrowRoundMode>(arrow_options->round_mode),
+                              NULL);
   g_object_unref(multiple);
   return GARROW_ROUND_TO_MULTIPLE_OPTIONS(options);
 }
@@ -7086,16 +6766,16 @@ garrow_round_to_multiple_options_get_raw(GArrowRoundToMultipleOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowMatchSubstringOptions *
 garrow_match_substring_options_new_raw(
   const arrow::compute::MatchSubstringOptions *arrow_options)
 {
-  return GARROW_MATCH_SUBSTRING_OPTIONS(
-    g_object_new(GARROW_TYPE_MATCH_SUBSTRING_OPTIONS,
-                 "pattern", arrow_options->pattern.c_str(),
-                 "ignore-case", arrow_options->ignore_case,
-                 NULL));
+  return GARROW_MATCH_SUBSTRING_OPTIONS(g_object_new(GARROW_TYPE_MATCH_SUBSTRING_OPTIONS,
+                                                     "pattern",
+                                                     arrow_options->pattern.c_str(),
+                                                     "ignore-case",
+                                                     arrow_options->ignore_case,
+                                                     NULL));
 }
 
 arrow::compute::MatchSubstringOptions *
@@ -7105,14 +6785,12 @@ garrow_match_substring_options_get_raw(GArrowMatchSubstringOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowUTF8NormalizeOptions *
 garrow_utf8_normalize_options_new_raw(
   const arrow::compute::Utf8NormalizeOptions *arrow_options)
 {
-  auto options = g_object_new(GARROW_TYPE_UTF8_NORMALIZE_FORM,
-                              "form", arrow_options->form,
-                              NULL);
+  auto options =
+    g_object_new(GARROW_TYPE_UTF8_NORMALIZE_FORM, "form", arrow_options->form, NULL);
   return GARROW_UTF8_NORMALIZE_OPTIONS(options);
 }
 
@@ -7123,18 +6801,17 @@ garrow_utf8_normalize_options_get_raw(GArrowUTF8NormalizeOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowQuantileOptions *
-garrow_quantile_options_new_raw(
-  const arrow::compute::QuantileOptions *arrow_options)
-{
-  auto options =
-    GARROW_QUANTILE_OPTIONS(
-      g_object_new(GARROW_TYPE_QUANTILE_OPTIONS,
-                   "interpolation", arrow_options->interpolation,
-                   "skip-nulls", arrow_options->skip_nulls,
-                   "min-count", arrow_options->min_count,
-                   NULL));
+garrow_quantile_options_new_raw(const arrow::compute::QuantileOptions *arrow_options)
+{
+  auto options = GARROW_QUANTILE_OPTIONS(g_object_new(GARROW_TYPE_QUANTILE_OPTIONS,
+                                                      "interpolation",
+                                                      arrow_options->interpolation,
+                                                      "skip-nulls",
+                                                      arrow_options->skip_nulls,
+                                                      "min-count",
+                                                      arrow_options->min_count,
+                                                      NULL));
   garrow_quantile_options_set_qs(options,
                                  arrow_options->q.data(),
                                  arrow_options->q.size());
@@ -7148,7 +6825,6 @@ garrow_quantile_options_get_raw(GArrowQuantileOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowIndexOptions *
 garrow_index_options_new_raw(const arrow::compute::IndexOptions *arrow_options)
 {
@@ -7158,10 +6834,7 @@ garrow_index_options_new_raw(const arrow::compute::IndexOptions *arrow_options)
     value = garrow_scalar_new_raw(&arrow_value);
   }
   auto options =
-    GARROW_INDEX_OPTIONS(
-      g_object_new(GARROW_TYPE_INDEX_OPTIONS,
-                   "value", value,
-                   NULL));
+    GARROW_INDEX_OPTIONS(g_object_new(GARROW_TYPE_INDEX_OPTIONS, "value", value, NULL));
   if (value) {
     g_object_unref(value);
   }
@@ -7175,15 +6848,15 @@ garrow_index_options_get_raw(GArrowIndexOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowRankOptions *
 garrow_rank_options_new_raw(const arrow::compute::RankOptions *arrow_options)
 {
-  auto options = GARROW_RANK_OPTIONS(
-    g_object_new(GARROW_TYPE_RANK_OPTIONS,
-                 "null-placement", arrow_options->null_placement,
-                 "tiebreaker", arrow_options->tiebreaker,
-                 nullptr));
+  auto options = GARROW_RANK_OPTIONS(g_object_new(GARROW_TYPE_RANK_OPTIONS,
+                                                  "null-placement",
+                                                  arrow_options->null_placement,
+                                                  "tiebreaker",
+                                                  arrow_options->tiebreaker,
+                                                  nullptr));
   auto arrow_new_options = garrow_rank_options_get_raw(options);
   arrow_new_options->sort_keys = arrow_options->sort_keys;
   return options;
@@ -7196,7 +6869,6 @@ garrow_rank_options_get_raw(GArrowRankOptions *options)
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
 
-
 GArrowRunEndEncodeOptions *
 garrow_run_end_encode_options_new_raw(
   const arrow::compute::RunEndEncodeOptions *arrow_options)
@@ -7217,15 +6889,16 @@ garrow_run_end_encode_options_get_raw(GArrowRunEndEncodeOptions *options)
 }
 
 GArrowStrptimeOptions *
-garrow_strptime_options_new_raw(
-  const arrow::compute::StrptimeOptions *arrow_options)
+garrow_strptime_options_new_raw(const arrow::compute::StrptimeOptions *arrow_options)
 {
-  return GARROW_STRPTIME_OPTIONS(
-    g_object_new(GARROW_TYPE_STRPTIME_OPTIONS,
-                 "format", arrow_options->format.c_str(),
-                 "unit", arrow_options->unit,
-                 "error_is_null", arrow_options->error_is_null,
-                 NULL));
+  return GARROW_STRPTIME_OPTIONS(g_object_new(GARROW_TYPE_STRPTIME_OPTIONS,
+                                              "format",
+                                              arrow_options->format.c_str(),
+                                              "unit",
+                                              arrow_options->unit,
+                                              "error_is_null",
+                                              arrow_options->error_is_null,
+                                              NULL));
 }
 
 arrow::compute::StrptimeOptions *
@@ -7236,14 +6909,14 @@ garrow_strptime_options_get_raw(GArrowStrptimeOptions *options)
 }
 
 GArrowStrftimeOptions *
-garrow_strftime_options_new_raw(
-  const arrow::compute::StrftimeOptions *arrow_options)
+garrow_strftime_options_new_raw(const arrow::compute::StrftimeOptions *arrow_options)
 {
-  return GARROW_STRFTIME_OPTIONS(
-    g_object_new(GARROW_TYPE_STRFTIME_OPTIONS,
-                 "format", arrow_options->format.c_str(),
-                 "locale", arrow_options->locale.c_str(),
-                 NULL));
+  return GARROW_STRFTIME_OPTIONS(g_object_new(GARROW_TYPE_STRFTIME_OPTIONS,
+                                              "format",
+                                              arrow_options->format.c_str(),
+                                              "locale",
+                                              arrow_options->locale.c_str(),
+                                              NULL));
 }
 
 arrow::compute::StrftimeOptions *
@@ -7257,12 +6930,14 @@ GArrowSplitPatternOptions *
 garrow_split_pattern_options_new_raw(
   const arrow::compute::SplitPatternOptions *arrow_options)
 {
-  return GARROW_SPLIT_PATTERN_OPTIONS(
-    g_object_new(GARROW_TYPE_SPLIT_PATTERN_OPTIONS,
-                 "pattern", arrow_options->pattern.c_str(),
-                 "max_splits", arrow_options->max_splits,
-                 "reverse", arrow_options->reverse,
-                 NULL));
+  return GARROW_SPLIT_PATTERN_OPTIONS(g_object_new(GARROW_TYPE_SPLIT_PATTERN_OPTIONS,
+                                                   "pattern",
+                                                   arrow_options->pattern.c_str(),
+                                                   "max_splits",
+                                                   arrow_options->max_splits,
+                                                   "reverse",
+                                                   arrow_options->reverse,
+                                                   NULL));
 }
 
 arrow::compute::SplitPatternOptions *
@@ -7276,8 +6951,8 @@ GArrowStructFieldOptions *
 garrow_struct_field_options_new_raw(
   const arrow::compute::StructFieldOptions *arrow_options)
 {
-  auto options = GARROW_STRUCT_FIELD_OPTIONS(
-    g_object_new(GARROW_TYPE_STRUCT_FIELD_OPTIONS, NULL));
+  auto options =
+    GARROW_STRUCT_FIELD_OPTIONS(g_object_new(GARROW_TYPE_STRUCT_FIELD_OPTIONS, NULL));
   auto arrow_new_options = garrow_struct_field_options_get_raw(options);
   arrow_new_options->field_ref = arrow_options->field_ref;
   return options;
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 9509e6d977aca..029cab136ad8f 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -26,19 +26,16 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_EXECUTE_CONTEXT (garrow_execute_context_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowExecuteContext,
-                         garrow_execute_context,
-                         GARROW,
-                         EXECUTE_CONTEXT,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowExecuteContext, garrow_execute_context, GARROW, EXECUTE_CONTEXT, GObject)
 struct _GArrowExecuteContextClass
 {
   GObjectClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_1_0
-GArrowExecuteContext *garrow_execute_context_new(void);
-
+GArrowExecuteContext *
+garrow_execute_context_new(void);
 
 GARROW_AVAILABLE_IN_7_0
 gboolean
@@ -48,13 +45,9 @@ GARROW_AVAILABLE_IN_7_0
 gchar *
 garrow_function_options_to_string(GArrowFunctionOptions *options);
 
-
 #define GARROW_TYPE_FUNCTION_DOC (garrow_function_doc_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowFunctionDoc,
-                         garrow_function_doc,
-                         GARROW,
-                         FUNCTION_DOC,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowFunctionDoc, garrow_function_doc, GARROW, FUNCTION_DOC, GObject)
 struct _GArrowFunctionDocClass
 {
   GObjectClass parent_class;
@@ -73,31 +66,28 @@ GARROW_AVAILABLE_IN_6_0
 gchar *
 garrow_function_doc_get_options_class_name(GArrowFunctionDoc *doc);
 
-
 #define GARROW_TYPE_FUNCTION (garrow_function_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowFunction,
-                         garrow_function,
-                         GARROW,
-                         FUNCTION,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowFunction, garrow_function, GARROW, FUNCTION, GObject)
 struct _GArrowFunctionClass
 {
   GObjectClass parent_class;
 };
 
-
 GARROW_AVAILABLE_IN_1_0
-GArrowFunction *garrow_function_find(const gchar *name);
+GArrowFunction *
+garrow_function_find(const gchar *name);
 
 GARROW_AVAILABLE_IN_7_0
-GList *garrow_function_all(void);
+GList *
+garrow_function_all(void);
 
 GARROW_AVAILABLE_IN_1_0
-GArrowDatum *garrow_function_execute(GArrowFunction *function,
-                                     GList *args,
-                                     GArrowFunctionOptions *options,
-                                     GArrowExecuteContext *context,
-                                     GError **error);
+GArrowDatum *
+garrow_function_execute(GArrowFunction *function,
+                        GList *args,
+                        GArrowFunctionOptions *options,
+                        GArrowExecuteContext *context,
+                        GError **error);
 
 GARROW_AVAILABLE_IN_7_0
 const gchar *
@@ -114,13 +104,11 @@ garrow_function_get_options_type(GArrowFunction *function);
 
 GARROW_AVAILABLE_IN_7_0
 gboolean
-garrow_function_equal(GArrowFunction *function,
-                      GArrowFunction *other_function);
+garrow_function_equal(GArrowFunction *function, GArrowFunction *other_function);
 GARROW_AVAILABLE_IN_7_0
 gchar *
 garrow_function_to_string(GArrowFunction *function);
 
-
 #define GARROW_TYPE_EXECUTE_NODE_OPTIONS (garrow_execute_node_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowExecuteNodeOptions,
                          garrow_execute_node_options,
@@ -132,7 +120,6 @@ struct _GArrowExecuteNodeOptionsClass
   GObjectClass parent_class;
 };
 
-
 #define GARROW_TYPE_SOURCE_NODE_OPTIONS (garrow_source_node_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowSourceNodeOptions,
                          garrow_source_node_options,
@@ -146,8 +133,7 @@ struct _GArrowSourceNodeOptionsClass
 
 GARROW_AVAILABLE_IN_6_0
 GArrowSourceNodeOptions *
-garrow_source_node_options_new_record_batch_reader(
-  GArrowRecordBatchReader *reader);
+garrow_source_node_options_new_record_batch_reader(GArrowRecordBatchReader *reader);
 GARROW_AVAILABLE_IN_6_0
 GArrowSourceNodeOptions *
 garrow_source_node_options_new_record_batch(GArrowRecordBatch *record_batch);
@@ -155,7 +141,6 @@ GARROW_AVAILABLE_IN_6_0
 GArrowSourceNodeOptions *
 garrow_source_node_options_new_table(GArrowTable *table);
 
-
 #define GARROW_TYPE_FILTER_NODE_OPTIONS (garrow_filter_node_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFilterNodeOptions,
                          garrow_filter_node_options,
@@ -171,7 +156,6 @@ GARROW_AVAILABLE_IN_12_0
 GArrowFilterNodeOptions *
 garrow_filter_node_options_new(GArrowExpression *expression);
 
-
 #define GARROW_TYPE_PROJECT_NODE_OPTIONS (garrow_project_node_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowProjectNodeOptions,
                          garrow_project_node_options,
@@ -185,17 +169,11 @@ struct _GArrowProjectNodeOptionsClass
 
 GARROW_AVAILABLE_IN_11_0
 GArrowProjectNodeOptions *
-garrow_project_node_options_new(GList *expressions,
-                                gchar **names,
-                                gsize n_names);
-
+garrow_project_node_options_new(GList *expressions, gchar **names, gsize n_names);
 
 #define GARROW_TYPE_AGGREGATION (garrow_aggregation_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowAggregation,
-                         garrow_aggregation,
-                         GARROW,
-                         AGGREGATION,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowAggregation, garrow_aggregation, GARROW, AGGREGATION, GObject)
 struct _GArrowAggregationClass
 {
   GObjectClass parent_class;
@@ -208,8 +186,7 @@ garrow_aggregation_new(const gchar *function,
                        const gchar *input,
                        const gchar *output);
 
-#define GARROW_TYPE_AGGREGATE_NODE_OPTIONS      \
-  (garrow_aggregate_node_options_get_type())
+#define GARROW_TYPE_AGGREGATE_NODE_OPTIONS (garrow_aggregate_node_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowAggregateNodeOptions,
                          garrow_aggregate_node_options,
                          GARROW,
@@ -227,7 +204,6 @@ garrow_aggregate_node_options_new(GList *aggregations,
                                   gsize n_keys,
                                   GError **error);
 
-
 #define GARROW_TYPE_SINK_NODE_OPTIONS (garrow_sink_node_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowSinkNodeOptions,
                          garrow_sink_node_options,
@@ -244,9 +220,7 @@ GArrowSinkNodeOptions *
 garrow_sink_node_options_new(void);
 GARROW_AVAILABLE_IN_6_0
 GArrowRecordBatchReader *
-garrow_sink_node_options_get_reader(GArrowSinkNodeOptions *options,
-                                    GArrowSchema *schema);
-
+garrow_sink_node_options_get_reader(GArrowSinkNodeOptions *options, GArrowSchema *schema);
 
 /**
  * GArrowJoinType:
@@ -274,8 +248,7 @@ typedef enum {
   GARROW_JOIN_TYPE_FULL_OUTER,
 } GArrowJoinType;
 
-#define GARROW_TYPE_HASH_JOIN_NODE_OPTIONS      \
-  (garrow_hash_join_node_options_get_type())
+#define GARROW_TYPE_HASH_JOIN_NODE_OPTIONS (garrow_hash_join_node_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowHashJoinNodeOptions,
                          garrow_hash_join_node_options,
                          GARROW,
@@ -296,26 +269,20 @@ garrow_hash_join_node_options_new(GArrowJoinType type,
                                   GError **error);
 GARROW_AVAILABLE_IN_7_0
 gboolean
-garrow_hash_join_node_options_set_left_outputs(
-  GArrowHashJoinNodeOptions *options,
-  const gchar **outputs,
-  gsize n_outputs,
-  GError **error);
+garrow_hash_join_node_options_set_left_outputs(GArrowHashJoinNodeOptions *options,
+                                               const gchar **outputs,
+                                               gsize n_outputs,
+                                               GError **error);
 GARROW_AVAILABLE_IN_7_0
 gboolean
-garrow_hash_join_node_options_set_right_outputs(
-  GArrowHashJoinNodeOptions *options,
-  const gchar **outputs,
-  gsize n_outputs,
-  GError **error);
-
+garrow_hash_join_node_options_set_right_outputs(GArrowHashJoinNodeOptions *options,
+                                                const gchar **outputs,
+                                                gsize n_outputs,
+                                                GError **error);
 
 #define GARROW_TYPE_EXECUTE_NODE (garrow_execute_node_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowExecuteNode,
-                         garrow_execute_node,
-                         GARROW,
-                         EXECUTE_NODE,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowExecuteNode, garrow_execute_node, GARROW, EXECUTE_NODE, GObject)
 struct _GArrowExecuteNodeClass
 {
   GObjectClass parent_class;
@@ -328,13 +295,9 @@ GARROW_AVAILABLE_IN_6_0
 GArrowSchema *
 garrow_execute_node_get_output_schema(GArrowExecuteNode *node);
 
-
 #define GARROW_TYPE_EXECUTE_PLAN (garrow_execute_plan_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowExecutePlan,
-                         garrow_execute_plan,
-                         GARROW,
-                         EXECUTE_PLAN,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowExecutePlan, garrow_execute_plan, GARROW, EXECUTE_PLAN, GObject)
 struct _GArrowExecutePlanClass
 {
   GObjectClass parent_class;
@@ -391,8 +354,7 @@ GList *
 garrow_execute_plan_get_nodes(GArrowExecutePlan *plan);
 GARROW_AVAILABLE_IN_6_0
 gboolean
-garrow_execute_plan_validate(GArrowExecutePlan *plan,
-                             GError **error);
+garrow_execute_plan_validate(GArrowExecutePlan *plan, GError **error);
 GARROW_AVAILABLE_IN_6_0
 void
 garrow_execute_plan_start(GArrowExecutePlan *plan);
@@ -401,15 +363,12 @@ void
 garrow_execute_plan_stop(GArrowExecutePlan *plan);
 GARROW_AVAILABLE_IN_6_0
 gboolean
-garrow_execute_plan_wait(GArrowExecutePlan *plan,
-                         GError **error);
-
-
-GArrowCastOptions *garrow_cast_options_new(void);
+garrow_execute_plan_wait(GArrowExecutePlan *plan, GError **error);
 
+GArrowCastOptions *
+garrow_cast_options_new(void);
 
-#define GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS    \
-  (garrow_scalar_aggregate_options_get_type())
+#define GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS (garrow_scalar_aggregate_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowScalarAggregateOptions,
                          garrow_scalar_aggregate_options,
                          GARROW,
@@ -442,11 +401,8 @@ typedef enum {
 } GArrowCountMode;
 
 #define GARROW_TYPE_COUNT_OPTIONS (garrow_count_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowCountOptions,
-                         garrow_count_options,
-                         GARROW,
-                         COUNT_OPTIONS,
-                         GArrowFunctionOptions)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowCountOptions, garrow_count_options, GARROW, COUNT_OPTIONS, GArrowFunctionOptions)
 struct _GArrowCountOptionsClass
 {
   GArrowFunctionOptionsClass parent_class;
@@ -456,7 +412,6 @@ GARROW_AVAILABLE_IN_6_0
 GArrowCountOptions *
 garrow_count_options_new(void);
 
-
 /**
  * GArrowFilterNullSelectionBehavior:
  * @GARROW_FILTER_NULL_SELECTION_DROP:
@@ -487,13 +442,9 @@ GARROW_AVAILABLE_IN_0_17
 GArrowFilterOptions *
 garrow_filter_options_new(void);
 
-
 #define GARROW_TYPE_TAKE_OPTIONS (garrow_take_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowTakeOptions,
-                         garrow_take_options,
-                         GARROW,
-                         TAKE_OPTIONS,
-                         GArrowFunctionOptions)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowTakeOptions, garrow_take_options, GARROW, TAKE_OPTIONS, GArrowFunctionOptions)
 struct _GArrowTakeOptionsClass
 {
   GArrowFunctionOptionsClass parent_class;
@@ -503,7 +454,6 @@ GARROW_AVAILABLE_IN_0_14
 GArrowTakeOptions *
 garrow_take_options_new(void);
 
-
 /**
  * GArrowSortOrder:
  * @GARROW_SORT_ORDER_ASCENDING: Sort in ascending order.
@@ -555,13 +505,8 @@ gboolean
 garrow_array_sort_options_equal(GArrowArraySortOptions *options,
                                 GArrowArraySortOptions *other_options);
 
-
 #define GARROW_TYPE_SORT_KEY (garrow_sort_key_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowSortKey,
-                         garrow_sort_key,
-                         GARROW,
-                         SORT_KEY,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowSortKey, garrow_sort_key, GARROW, SORT_KEY, GObject)
 struct _GArrowSortKeyClass
 {
   GObjectClass parent_class;
@@ -569,22 +514,15 @@ struct _GArrowSortKeyClass
 
 GARROW_AVAILABLE_IN_3_0
 GArrowSortKey *
-garrow_sort_key_new(const gchar *target,
-                    GArrowSortOrder order,
-                    GError **error);
+garrow_sort_key_new(const gchar *target, GArrowSortOrder order, GError **error);
 
 GARROW_AVAILABLE_IN_3_0
 gboolean
-garrow_sort_key_equal(GArrowSortKey *sort_key,
-                      GArrowSortKey *other_sort_key);
-
+garrow_sort_key_equal(GArrowSortKey *sort_key, GArrowSortKey *other_sort_key);
 
 #define GARROW_TYPE_SORT_OPTIONS (garrow_sort_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowSortOptions,
-                         garrow_sort_options,
-                         GARROW,
-                         SORT_OPTIONS,
-                         GArrowFunctionOptions)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowSortOptions, garrow_sort_options, GARROW, SORT_OPTIONS, GArrowFunctionOptions)
 struct _GArrowSortOptionsClass
 {
   GArrowFunctionOptionsClass parent_class;
@@ -595,20 +533,16 @@ GArrowSortOptions *
 garrow_sort_options_new(GList *sort_keys);
 GARROW_AVAILABLE_IN_3_0
 gboolean
-garrow_sort_options_equal(GArrowSortOptions *options,
-                          GArrowSortOptions *other_options);
+garrow_sort_options_equal(GArrowSortOptions *options, GArrowSortOptions *other_options);
 GARROW_AVAILABLE_IN_3_0
 GList *
 garrow_sort_options_get_sort_keys(GArrowSortOptions *options);
 GARROW_AVAILABLE_IN_3_0
 void
-garrow_sort_options_set_sort_keys(GArrowSortOptions *options,
-                                  GList *sort_keys);
+garrow_sort_options_set_sort_keys(GArrowSortOptions *options, GList *sort_keys);
 GARROW_AVAILABLE_IN_3_0
 void
-garrow_sort_options_add_sort_key(GArrowSortOptions *options,
-                                 GArrowSortKey *sort_key);
-
+garrow_sort_options_add_sort_key(GArrowSortOptions *options, GArrowSortKey *sort_key);
 
 #define GARROW_TYPE_SET_LOOKUP_OPTIONS (garrow_set_lookup_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowSetLookupOptions,
@@ -625,7 +559,6 @@ GARROW_AVAILABLE_IN_6_0
 GArrowSetLookupOptions *
 garrow_set_lookup_options_new(GArrowDatum *value_set);
 
-
 #define GARROW_TYPE_VARIANCE_OPTIONS (garrow_variance_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowVarianceOptions,
                          garrow_variance_options,
@@ -641,7 +574,6 @@ GARROW_AVAILABLE_IN_6_0
 GArrowVarianceOptions *
 garrow_variance_options_new(void);
 
-
 /**
  * GArrowRoundMode:
  * @GARROW_ROUND_MODE_DOWN:
@@ -687,13 +619,9 @@ typedef enum {
   GARROW_ROUND_HALF_TO_ODD,
 } GArrowRoundMode;
 
-
 #define GARROW_TYPE_ROUND_OPTIONS (garrow_round_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowRoundOptions,
-                         garrow_round_options,
-                         GARROW,
-                         ROUND_OPTIONS,
-                         GArrowFunctionOptions)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowRoundOptions, garrow_round_options, GARROW, ROUND_OPTIONS, GArrowFunctionOptions)
 struct _GArrowRoundOptionsClass
 {
   GArrowFunctionOptionsClass parent_class;
@@ -703,8 +631,7 @@ GARROW_AVAILABLE_IN_7_0
 GArrowRoundOptions *
 garrow_round_options_new(void);
 
-
-#define GARROW_TYPE_ROUND_TO_MULTIPLE_OPTIONS   \
+#define GARROW_TYPE_ROUND_TO_MULTIPLE_OPTIONS                                            \
   (garrow_round_to_multiple_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowRoundToMultipleOptions,
                          garrow_round_to_multiple_options,
@@ -720,9 +647,7 @@ GARROW_AVAILABLE_IN_7_0
 GArrowRoundToMultipleOptions *
 garrow_round_to_multiple_options_new(void);
 
-
-#define GARROW_TYPE_MATCH_SUBSTRING_OPTIONS     \
-  (garrow_match_substring_options_get_type())
+#define GARROW_TYPE_MATCH_SUBSTRING_OPTIONS (garrow_match_substring_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowMatchSubstringOptions,
                          garrow_match_substring_options,
                          GARROW,
@@ -737,7 +662,6 @@ GARROW_AVAILABLE_IN_12_0
 GArrowMatchSubstringOptions *
 garrow_match_substring_options_new(void);
 
-
 /**
  * GArrowUTF8NormalizeForm:
  * @GARROW_UTF8_NORMALIZE_FORM_NFC: Normalization Form Canonical Composition.
@@ -758,8 +682,7 @@ typedef enum /*< underscore_name=garrow_utf8_normalize_form >*/ {
   GARROW_UTF8_NORMALIZE_FORM_NFKD,
 } GArrowUTF8NormalizeForm;
 
-#define GARROW_TYPE_UTF8_NORMALIZE_OPTIONS      \
-  (garrow_utf8_normalize_options_get_type())
+#define GARROW_TYPE_UTF8_NORMALIZE_OPTIONS (garrow_utf8_normalize_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowUTF8NormalizeOptions,
                          garrow_utf8_normalize_options,
                          GARROW,
@@ -774,7 +697,6 @@ GARROW_AVAILABLE_IN_8_0
 GArrowUTF8NormalizeOptions *
 garrow_utf8_normalize_options_new(void);
 
-
 /**
  * GArrowQuantileInterpolation:
  * @GARROW_QUANTILE_INTERPOLATION_LINEAR: Linear.
@@ -796,8 +718,7 @@ typedef enum {
   GARROW_QUANTILE_INTERPOLATION_MIDPOINT,
 } GArrowQuantileInterpolation;
 
-#define GARROW_TYPE_QUANTILE_OPTIONS            \
-  (garrow_quantile_options_get_type())
+#define GARROW_TYPE_QUANTILE_OPTIONS (garrow_quantile_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowQuantileOptions,
                          garrow_quantile_options,
                          GARROW,
@@ -813,25 +734,19 @@ GArrowQuantileOptions *
 garrow_quantile_options_new(void);
 GARROW_AVAILABLE_IN_9_0
 const gdouble *
-garrow_quantile_options_get_qs(GArrowQuantileOptions *options,
-                               gsize *n);
+garrow_quantile_options_get_qs(GArrowQuantileOptions *options, gsize *n);
 GARROW_AVAILABLE_IN_9_0
 void
-garrow_quantile_options_set_q(GArrowQuantileOptions *options,
-                              gdouble q);
+garrow_quantile_options_set_q(GArrowQuantileOptions *options, gdouble q);
 GARROW_AVAILABLE_IN_9_0
 void
 garrow_quantile_options_set_qs(GArrowQuantileOptions *options,
                                const gdouble *qs,
                                gsize n);
 
-
 #define GARROW_TYPE_INDEX_OPTIONS (garrow_index_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowIndexOptions,
-                         garrow_index_options,
-                         GARROW,
-                         INDEX_OPTIONS,
-                         GArrowFunctionOptions)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowIndexOptions, garrow_index_options, GARROW, INDEX_OPTIONS, GArrowFunctionOptions)
 struct _GArrowIndexOptionsClass
 {
   GArrowFunctionOptionsClass parent_class;
@@ -841,7 +756,6 @@ GARROW_AVAILABLE_IN_12_0
 GArrowIndexOptions *
 garrow_index_options_new(void);
 
-
 /**
  * GArrowRankTiebreader:
  * @GARROW_RANK_TIEBREAKER_MIN:
@@ -867,13 +781,9 @@ typedef enum {
   GARROW_RANK_TIEBREAKER_DENSE,
 } GArrowRankTiebreaker;
 
-#define GARROW_TYPE_RANK_OPTIONS                \
-  (garrow_rank_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowRankOptions,
-                         garrow_rank_options,
-                         GARROW,
-                         RANK_OPTIONS,
-                         GArrowFunctionOptions)
+#define GARROW_TYPE_RANK_OPTIONS (garrow_rank_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowRankOptions, garrow_rank_options, GARROW, RANK_OPTIONS, GArrowFunctionOptions)
 struct _GArrowRankOptionsClass
 {
   GArrowFunctionOptionsClass parent_class;
@@ -884,92 +794,92 @@ GArrowRankOptions *
 garrow_rank_options_new(void);
 GARROW_AVAILABLE_IN_12_0
 gboolean
-garrow_rank_options_equal(GArrowRankOptions *options,
-                          GArrowRankOptions *other_options);
+garrow_rank_options_equal(GArrowRankOptions *options, GArrowRankOptions *other_options);
 GARROW_AVAILABLE_IN_12_0
 GList *
 garrow_rank_options_get_sort_keys(GArrowRankOptions *options);
 GARROW_AVAILABLE_IN_12_0
 void
-garrow_rank_options_set_sort_keys(GArrowRankOptions *options,
-                                  GList *sort_keys);
+garrow_rank_options_set_sort_keys(GArrowRankOptions *options, GList *sort_keys);
 GARROW_AVAILABLE_IN_12_0
 void
-garrow_rank_options_add_sort_key(GArrowRankOptions *options,
-                                 GArrowSortKey *sort_key);
-
+garrow_rank_options_add_sort_key(GArrowRankOptions *options, GArrowSortKey *sort_key);
 
-GArrowArray *garrow_array_cast(GArrowArray *array,
-                               GArrowDataType *target_data_type,
-                               GArrowCastOptions *options,
-                               GError **error);
-GArrowArray *garrow_array_unique(GArrowArray *array,
-                                 GError **error);
-GArrowDictionaryArray *garrow_array_dictionary_encode(GArrowArray *array,
-                                                      GError **error);
+GArrowArray *
+garrow_array_cast(GArrowArray *array,
+                  GArrowDataType *target_data_type,
+                  GArrowCastOptions *options,
+                  GError **error);
+GArrowArray *
+garrow_array_unique(GArrowArray *array, GError **error);
+GArrowDictionaryArray *
+garrow_array_dictionary_encode(GArrowArray *array, GError **error);
 GARROW_AVAILABLE_IN_0_13
-gint64 garrow_array_count(GArrowArray *array,
-                          GArrowCountOptions *options,
-                          GError **error);
+gint64
+garrow_array_count(GArrowArray *array, GArrowCountOptions *options, GError **error);
 GARROW_AVAILABLE_IN_0_13
-GArrowStructArray *garrow_array_count_values(GArrowArray *array,
-                                             GError **error);
+GArrowStructArray *
+garrow_array_count_values(GArrowArray *array, GError **error);
 
 GARROW_AVAILABLE_IN_0_13
-GArrowBooleanArray *garrow_boolean_array_invert(GArrowBooleanArray *array,
-                                                GError **error);
+GArrowBooleanArray *
+garrow_boolean_array_invert(GArrowBooleanArray *array, GError **error);
 GARROW_AVAILABLE_IN_0_13
-GArrowBooleanArray *garrow_boolean_array_and(GArrowBooleanArray *left,
-                                             GArrowBooleanArray *right,
-                                             GError **error);
+GArrowBooleanArray *
+garrow_boolean_array_and(GArrowBooleanArray *left,
+                         GArrowBooleanArray *right,
+                         GError **error);
 GARROW_AVAILABLE_IN_0_13
-GArrowBooleanArray *garrow_boolean_array_or(GArrowBooleanArray *left,
-                                            GArrowBooleanArray *right,
-                                            GError **error);
+GArrowBooleanArray *
+garrow_boolean_array_or(GArrowBooleanArray *left,
+                        GArrowBooleanArray *right,
+                        GError **error);
 GARROW_AVAILABLE_IN_0_13
-GArrowBooleanArray *garrow_boolean_array_xor(GArrowBooleanArray *left,
-                                             GArrowBooleanArray *right,
-                                             GError **error);
+GArrowBooleanArray *
+garrow_boolean_array_xor(GArrowBooleanArray *left,
+                         GArrowBooleanArray *right,
+                         GError **error);
 
 GARROW_AVAILABLE_IN_0_13
-gdouble garrow_numeric_array_mean(GArrowNumericArray *array,
-                                  GError **error);
+gdouble
+garrow_numeric_array_mean(GArrowNumericArray *array, GError **error);
 
 GARROW_AVAILABLE_IN_0_13
-gint64 garrow_int8_array_sum(GArrowInt8Array *array,
-                             GError **error);
+gint64
+garrow_int8_array_sum(GArrowInt8Array *array, GError **error);
 GARROW_AVAILABLE_IN_0_13
-guint64 garrow_uint8_array_sum(GArrowUInt8Array *array,
-                               GError **error);
+guint64
+garrow_uint8_array_sum(GArrowUInt8Array *array, GError **error);
 GARROW_AVAILABLE_IN_0_13
-gint64 garrow_int16_array_sum(GArrowInt16Array *array,
-                              GError **error);
+gint64
+garrow_int16_array_sum(GArrowInt16Array *array, GError **error);
 GARROW_AVAILABLE_IN_0_13
-guint64 garrow_uint16_array_sum(GArrowUInt16Array *array,
-                                GError **error);
+guint64
+garrow_uint16_array_sum(GArrowUInt16Array *array, GError **error);
 GARROW_AVAILABLE_IN_0_13
-gint64 garrow_int32_array_sum(GArrowInt32Array *array,
-                              GError **error);
+gint64
+garrow_int32_array_sum(GArrowInt32Array *array, GError **error);
 GARROW_AVAILABLE_IN_0_13
-guint64 garrow_uint32_array_sum(GArrowUInt32Array *array,
-                                GError **error);
+guint64
+garrow_uint32_array_sum(GArrowUInt32Array *array, GError **error);
 GARROW_AVAILABLE_IN_0_13
-gint64 garrow_int64_array_sum(GArrowInt64Array *array,
-                              GError **error);
+gint64
+garrow_int64_array_sum(GArrowInt64Array *array, GError **error);
 GARROW_AVAILABLE_IN_0_13
-guint64 garrow_uint64_array_sum(GArrowUInt64Array *array,
-                                GError **error);
+guint64
+garrow_uint64_array_sum(GArrowUInt64Array *array, GError **error);
 GARROW_AVAILABLE_IN_0_13
-gdouble garrow_float_array_sum(GArrowFloatArray *array,
-                               GError **error);
+gdouble
+garrow_float_array_sum(GArrowFloatArray *array, GError **error);
 GARROW_AVAILABLE_IN_0_13
-gdouble garrow_double_array_sum(GArrowDoubleArray *array,
-                                GError **error);
+gdouble
+garrow_double_array_sum(GArrowDoubleArray *array, GError **error);
 GARROW_AVAILABLE_IN_0_14
-GArrowArray *garrow_array_take(GArrowArray *array,
-                               GArrowArray *indices,
-                               GArrowTakeOptions *options,
-                               GError **error);
+GArrowArray *
+garrow_array_take(GArrowArray *array,
+                  GArrowArray *indices,
+                  GArrowTakeOptions *options,
+                  GError **error);
 GARROW_AVAILABLE_IN_0_16
 GArrowChunkedArray *
 garrow_array_take_chunked_array(GArrowArray *array,
@@ -1014,26 +924,20 @@ garrow_array_filter(GArrowArray *array,
                     GError **error);
 GARROW_AVAILABLE_IN_0_15
 GArrowBooleanArray *
-garrow_array_is_in(GArrowArray *left,
-                   GArrowArray *right,
-                   GError **error);
+garrow_array_is_in(GArrowArray *left, GArrowArray *right, GError **error);
 GARROW_AVAILABLE_IN_0_15
 GArrowBooleanArray *
 garrow_array_is_in_chunked_array(GArrowArray *left,
                                  GArrowChunkedArray *right,
                                  GError **error);
 
-
 GARROW_AVAILABLE_IN_3_0
 GArrowUInt64Array *
-garrow_array_sort_indices(GArrowArray *array,
-                          GArrowSortOrder order,
-                          GError **error);
+garrow_array_sort_indices(GArrowArray *array, GArrowSortOrder order, GError **error);
 GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_sort_indices)
 GARROW_AVAILABLE_IN_0_15
 GArrowUInt64Array *
-garrow_array_sort_to_indices(GArrowArray *array,
-                             GError **error);
+garrow_array_sort_to_indices(GArrowArray *array, GError **error);
 
 GARROW_AVAILABLE_IN_3_0
 GArrowUInt64Array *
@@ -1041,7 +945,6 @@ garrow_chunked_array_sort_indices(GArrowChunkedArray *chunked_array,
                                   GArrowSortOrder order,
                                   GError **error);
 
-
 GARROW_AVAILABLE_IN_3_0
 GArrowUInt64Array *
 garrow_record_batch_sort_indices(GArrowRecordBatch *record_batch,
@@ -1050,10 +953,7 @@ garrow_record_batch_sort_indices(GArrowRecordBatch *record_batch,
 
 GARROW_AVAILABLE_IN_3_0
 GArrowUInt64Array *
-garrow_table_sort_indices(GArrowTable *table,
-                          GArrowSortOptions *options,
-                          GError **error);
-
+garrow_table_sort_indices(GArrowTable *table, GArrowSortOptions *options, GError **error);
 
 GARROW_AVAILABLE_IN_0_16
 GArrowTable *
@@ -1086,8 +986,7 @@ garrow_record_batch_filter(GArrowRecordBatch *record_batch,
                            GArrowFilterOptions *options,
                            GError **error);
 
-#define GARROW_TYPE_RUN_END_ENCODE_OPTIONS      \
-  (garrow_run_end_encode_options_get_type())
+#define GARROW_TYPE_RUN_END_ENCODE_OPTIONS (garrow_run_end_encode_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowRunEndEncodeOptions,
                          garrow_run_end_encode_options,
                          GARROW,
@@ -1102,7 +1001,6 @@ GARROW_AVAILABLE_IN_13_0
 GArrowRunEndEncodeOptions *
 garrow_run_end_encode_options_new(GArrowDataType *run_end_data_type);
 
-
 GARROW_AVAILABLE_IN_13_0
 GArrowRunEndEncodedArray *
 garrow_array_run_end_encode(GArrowArray *array,
@@ -1110,11 +1008,9 @@ garrow_array_run_end_encode(GArrowArray *array,
                             GError **error);
 GARROW_AVAILABLE_IN_13_0
 GArrowArray *
-garrow_run_end_encoded_array_decode(GArrowRunEndEncodedArray *array,
-                                    GError **error);
+garrow_run_end_encoded_array_decode(GArrowRunEndEncodedArray *array, GError **error);
 
-#define GARROW_TYPE_STRPTIME_OPTIONS      \
-  (garrow_strptime_options_get_type())
+#define GARROW_TYPE_STRPTIME_OPTIONS (garrow_strptime_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowStrptimeOptions,
                          garrow_strptime_options,
                          GARROW,
@@ -1129,8 +1025,7 @@ GARROW_AVAILABLE_IN_16_0
 GArrowStrptimeOptions *
 garrow_strptime_options_new(void);
 
-#define GARROW_TYPE_STRFTIME_OPTIONS      \
-  (garrow_strftime_options_get_type())
+#define GARROW_TYPE_STRFTIME_OPTIONS (garrow_strftime_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowStrftimeOptions,
                          garrow_strftime_options,
                          GARROW,
@@ -1145,8 +1040,7 @@ GARROW_AVAILABLE_IN_16_0
 GArrowStrftimeOptions *
 garrow_strftime_options_new(void);
 
-#define GARROW_TYPE_SPLIT_PATTERN_OPTIONS      \
-  (garrow_split_pattern_options_get_type())
+#define GARROW_TYPE_SPLIT_PATTERN_OPTIONS (garrow_split_pattern_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowSplitPatternOptions,
                          garrow_split_pattern_options,
                          GARROW,
@@ -1161,8 +1055,7 @@ GARROW_AVAILABLE_IN_16_0
 GArrowSplitPatternOptions *
 garrow_split_pattern_options_new(void);
 
-#define GARROW_TYPE_STRUCT_FIELD_OPTIONS      \
-  (garrow_struct_field_options_get_type())
+#define GARROW_TYPE_STRUCT_FIELD_OPTIONS (garrow_struct_field_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowStructFieldOptions,
                          garrow_struct_field_options,
                          GARROW,
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index 4179a9ee733fd..0abf62f7d2efc 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -24,22 +24,17 @@
 
 #include <arrow-glib/compute.h>
 
-
 arrow::Result<arrow::FieldRef>
 garrow_field_reference_resolve_raw(const gchar *reference);
 
-
 arrow::compute::ExecContext *
 garrow_execute_context_get_raw(GArrowExecuteContext *context);
 
-
 GArrowFunctionOptions *
-garrow_function_options_new_raw(
-  const arrow::compute::FunctionOptions *arrow_options);
+garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_options);
 arrow::compute::FunctionOptions *
 garrow_function_options_get_raw(GArrowFunctionOptions *options);
 
-
 GArrowFunctionDoc *
 garrow_function_doc_new_raw(const arrow::compute::FunctionDoc *arrow_doc);
 arrow::compute::FunctionDoc *
@@ -50,143 +45,109 @@ garrow_function_new_raw(std::shared_ptr<arrow::compute::Function> *arrow_functio
 std::shared_ptr<arrow::compute::Function>
 garrow_function_get_raw(GArrowFunction *function);
 
-
 GArrowExecuteNodeOptions *
-garrow_execute_node_options_new_raw(
-  arrow::acero::ExecNodeOptions *arrow_options);
+garrow_execute_node_options_new_raw(arrow::acero::ExecNodeOptions *arrow_options);
 arrow::acero::ExecNodeOptions *
 garrow_execute_node_options_get_raw(GArrowExecuteNodeOptions *options);
 
-
 GArrowExecuteNode *
-garrow_execute_node_new_raw(
-  arrow::acero::ExecNode* arrow_node,
-  GArrowExecuteNodeOptions *options);
+garrow_execute_node_new_raw(arrow::acero::ExecNode *arrow_node,
+                            GArrowExecuteNodeOptions *options);
 arrow::acero::ExecNode *
 garrow_execute_node_get_raw(GArrowExecuteNode *node);
 
-
 std::shared_ptr<arrow::acero::ExecPlan>
 garrow_execute_plan_get_raw(GArrowExecutePlan *plan);
 
-
 GArrowCastOptions *
 garrow_cast_options_new_raw(const arrow::compute::CastOptions *arrow_options);
 arrow::compute::CastOptions *
 garrow_cast_options_get_raw(GArrowCastOptions *options);
 
-
 GArrowScalarAggregateOptions *
 garrow_scalar_aggregate_options_new_raw(
   const arrow::compute::ScalarAggregateOptions *arrow_options);
 arrow::compute::ScalarAggregateOptions *
-garrow_scalar_aggregate_options_get_raw(
-  GArrowScalarAggregateOptions *options);
-
+garrow_scalar_aggregate_options_get_raw(GArrowScalarAggregateOptions *options);
 
 GArrowCountOptions *
-garrow_count_options_new_raw(
-  const arrow::compute::CountOptions *arrow_options);
+garrow_count_options_new_raw(const arrow::compute::CountOptions *arrow_options);
 arrow::compute::CountOptions *
 garrow_count_options_get_raw(GArrowCountOptions *options);
 
-
 GArrowFilterOptions *
-garrow_filter_options_new_raw(
-  const arrow::compute::FilterOptions *arrow_options);
+garrow_filter_options_new_raw(const arrow::compute::FilterOptions *arrow_options);
 arrow::compute::FilterOptions *
 garrow_filter_options_get_raw(GArrowFilterOptions *options);
 
-
 GArrowTakeOptions *
-garrow_take_options_new_raw(
-  const arrow::compute::TakeOptions *arrow_options);
+garrow_take_options_new_raw(const arrow::compute::TakeOptions *arrow_options);
 arrow::compute::TakeOptions *
 garrow_take_options_get_raw(GArrowTakeOptions *options);
 
-
 GArrowArraySortOptions *
-garrow_array_sort_options_new_raw(
-  const arrow::compute::ArraySortOptions *arrow_options);
+garrow_array_sort_options_new_raw(const arrow::compute::ArraySortOptions *arrow_options);
 arrow::compute::ArraySortOptions *
 garrow_array_sort_options_get_raw(GArrowArraySortOptions *options);
 
-
 GArrowSortKey *
 garrow_sort_key_new_raw(const arrow::compute::SortKey &arrow_sort_key);
 arrow::compute::SortKey *
 garrow_sort_key_get_raw(GArrowSortKey *sort_key);
 
-
 GArrowSortOptions *
-garrow_sort_options_new_raw(
-  const arrow::compute::SortOptions *arrow_options);
+garrow_sort_options_new_raw(const arrow::compute::SortOptions *arrow_options);
 arrow::compute::SortOptions *
 garrow_sort_options_get_raw(GArrowSortOptions *options);
 
-
 GArrowSetLookupOptions *
-garrow_set_lookup_options_new_raw(
-  const arrow::compute::SetLookupOptions *arrow_options);
+garrow_set_lookup_options_new_raw(const arrow::compute::SetLookupOptions *arrow_options);
 arrow::compute::SetLookupOptions *
 garrow_set_lookup_options_get_raw(GArrowSetLookupOptions *options);
 
-
 GArrowVarianceOptions *
-garrow_variance_options_new_raw(
-  const arrow::compute::VarianceOptions *arrow_options);
+garrow_variance_options_new_raw(const arrow::compute::VarianceOptions *arrow_options);
 arrow::compute::VarianceOptions *
 garrow_variance_options_get_raw(GArrowVarianceOptions *options);
 
-
 GArrowRoundOptions *
-garrow_round_options_new_raw(
-  const arrow::compute::RoundOptions *arrow_options);
+garrow_round_options_new_raw(const arrow::compute::RoundOptions *arrow_options);
 arrow::compute::RoundOptions *
 garrow_round_options_get_raw(GArrowRoundOptions *options);
 
-
 GArrowRoundToMultipleOptions *
 garrow_round_to_multiple_options_new_raw(
   const arrow::compute::RoundToMultipleOptions *arrow_options);
 arrow::compute::RoundToMultipleOptions *
 garrow_round_to_multiple_options_get_raw(GArrowRoundToMultipleOptions *options);
 
-
 GArrowMatchSubstringOptions *
 garrow_match_substring_options_new_raw(
   const arrow::compute::MatchSubstringOptions *arrow_options);
 arrow::compute::MatchSubstringOptions *
 garrow_match_substring_options_get_raw(GArrowMatchSubstringOptions *options);
 
-
 GArrowUTF8NormalizeOptions *
 garrow_utf8_normalize_options_new_raw(
   const arrow::compute::Utf8NormalizeOptions *arrow_options);
 arrow::compute::Utf8NormalizeOptions *
 garrow_utf8_normalize_options_get_raw(GArrowUTF8NormalizeOptions *options);
 
-
 GArrowQuantileOptions *
-garrow_quantile_options_new_raw(
-  const arrow::compute::QuantileOptions *arrow_options);
+garrow_quantile_options_new_raw(const arrow::compute::QuantileOptions *arrow_options);
 arrow::compute::QuantileOptions *
 garrow_quantile_options_get_raw(GArrowQuantileOptions *options);
 
-
 GArrowIndexOptions *
-garrow_index_options_new_raw(
-  const arrow::compute::IndexOptions *arrow_options);
+garrow_index_options_new_raw(const arrow::compute::IndexOptions *arrow_options);
 arrow::compute::IndexOptions *
 garrow_index_options_get_raw(GArrowIndexOptions *options);
 
-
 GArrowRankOptions *
 garrow_rank_options_new_raw(const arrow::compute::RankOptions *arrow_options);
 arrow::compute::RankOptions *
 garrow_rank_options_get_raw(GArrowRankOptions *options);
 
-
 GArrowRunEndEncodeOptions *
 garrow_run_end_encode_options_new_raw(
   const arrow::compute::RunEndEncodeOptions *arrow_options);
@@ -194,14 +155,12 @@ arrow::compute::RunEndEncodeOptions *
 garrow_run_end_encode_options_get_raw(GArrowRunEndEncodeOptions *options);
 
 GArrowStrptimeOptions *
-garrow_strptime_options_new_raw(
-  const arrow::compute::StrptimeOptions *arrow_options);
+garrow_strptime_options_new_raw(const arrow::compute::StrptimeOptions *arrow_options);
 arrow::compute::StrptimeOptions *
 garrow_strptime_options_get_raw(GArrowStrptimeOptions *options);
 
 GArrowStrftimeOptions *
-garrow_strftime_options_new_raw(
-  const arrow::compute::StrftimeOptions *arrow_options);
+garrow_strftime_options_new_raw(const arrow::compute::StrftimeOptions *arrow_options);
 arrow::compute::StrftimeOptions *
 garrow_strftime_options_get_raw(GArrowStrftimeOptions *options);
 
diff --git a/c_glib/arrow-glib/datum.cpp b/c_glib/arrow-glib/datum.cpp
index 705eb3144d604..a77f44e8dabcb 100644
--- a/c_glib/arrow-glib/datum.cpp
+++ b/c_glib/arrow-glib/datum.cpp
@@ -45,7 +45,8 @@ G_BEGIN_DECLS
  * #GArrowTableDatum is a class to hold an #GArrowTable.
  */
 
-typedef struct GArrowDatumPrivate_ {
+typedef struct GArrowDatumPrivate_
+{
   arrow::Datum datum;
 } GArrowDatumPrivate;
 
@@ -55,10 +56,8 @@ enum {
 
 G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowDatum, garrow_datum, G_TYPE_OBJECT)
 
-#define GARROW_DATUM_GET_PRIVATE(obj)         \
-  static_cast<GArrowDatumPrivate *>(          \
-    garrow_datum_get_instance_private(        \
-      GARROW_DATUM(obj)))
+#define GARROW_DATUM_GET_PRIVATE(obj)                                                    \
+  static_cast<GArrowDatumPrivate *>(garrow_datum_get_instance_private(GARROW_DATUM(obj)))
 
 static void
 garrow_datum_finalize(GObject *object)
@@ -92,7 +91,7 @@ static void
 garrow_datum_init(GArrowDatum *object)
 {
   auto priv = GARROW_DATUM_GET_PRIVATE(object);
-  new(&priv->datum) arrow::Datum;
+  new (&priv->datum) arrow::Datum;
 }
 
 static void
@@ -100,15 +99,15 @@ garrow_datum_class_init(GArrowDatumClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_datum_finalize;
+  gobject_class->finalize = garrow_datum_finalize;
   gobject_class->set_property = garrow_datum_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("datum",
-                              "Datum",
-                              "The raw arrow::Datum *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "datum",
+    "Datum",
+    "The raw arrow::Datum *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_DATUM, spec);
 }
 
@@ -211,8 +210,8 @@ garrow_datum_to_string(GArrowDatum *datum)
   return g_strdup(string.c_str());
 }
 
-
-typedef struct GArrowArrayDatumPrivate_ {
+typedef struct GArrowArrayDatumPrivate_
+{
   GArrowArray *value;
 } GArrowArrayDatumPrivate;
 
@@ -220,14 +219,11 @@ enum {
   PROP_VALUE = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowArrayDatum,
-                           garrow_array_datum,
-                           GARROW_TYPE_DATUM)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowArrayDatum, garrow_array_datum, GARROW_TYPE_DATUM)
 
-#define GARROW_ARRAY_DATUM_GET_PRIVATE(obj)         \
-  static_cast<GArrowArrayDatumPrivate *>(           \
-    garrow_array_datum_get_instance_private(        \
-      GARROW_ARRAY_DATUM(obj)))
+#define GARROW_ARRAY_DATUM_GET_PRIVATE(obj)                                              \
+  static_cast<GArrowArrayDatumPrivate *>(                                                \
+    garrow_array_datum_get_instance_private(GARROW_ARRAY_DATUM(obj)))
 
 static void
 garrow_array_datum_dispose(GObject *object)
@@ -288,17 +284,17 @@ garrow_array_datum_class_init(GArrowArrayDatumClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_array_datum_dispose;
+  gobject_class->dispose = garrow_array_datum_dispose;
   gobject_class->set_property = garrow_array_datum_set_property;
   gobject_class->get_property = garrow_array_datum_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("value",
-                             "Value",
-                             "The array held by this datum",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "value",
+    "Value",
+    "The array held by this datum",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUE, spec);
 }
 
@@ -318,19 +314,16 @@ garrow_array_datum_new(GArrowArray *value)
   return garrow_array_datum_new_raw(&arrow_datum, value);
 }
 
-
-typedef struct GArrowScalarDatumPrivate_ {
+typedef struct GArrowScalarDatumPrivate_
+{
   GArrowScalar *value;
 } GArrowScalarDatumPrivate;
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowScalarDatum,
-                           garrow_scalar_datum,
-                           GARROW_TYPE_DATUM)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowScalarDatum, garrow_scalar_datum, GARROW_TYPE_DATUM)
 
-#define GARROW_SCALAR_DATUM_GET_PRIVATE(obj)         \
-  static_cast<GArrowScalarDatumPrivate *>(           \
-    garrow_scalar_datum_get_instance_private(        \
-      GARROW_SCALAR_DATUM(obj)))
+#define GARROW_SCALAR_DATUM_GET_PRIVATE(obj)                                             \
+  static_cast<GArrowScalarDatumPrivate *>(                                               \
+    garrow_scalar_datum_get_instance_private(GARROW_SCALAR_DATUM(obj)))
 
 static void
 garrow_scalar_datum_dispose(GObject *object)
@@ -391,17 +384,17 @@ garrow_scalar_datum_class_init(GArrowScalarDatumClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_scalar_datum_dispose;
+  gobject_class->dispose = garrow_scalar_datum_dispose;
   gobject_class->set_property = garrow_scalar_datum_set_property;
   gobject_class->get_property = garrow_scalar_datum_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("value",
-                             "Value",
-                             "The scalar held by this datum",
-                             GARROW_TYPE_SCALAR,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "value",
+    "Value",
+    "The scalar held by this datum",
+    GARROW_TYPE_SCALAR,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUE, spec);
 }
 
@@ -421,8 +414,8 @@ garrow_scalar_datum_new(GArrowScalar *value)
   return garrow_scalar_datum_new_raw(&arrow_datum, value);
 }
 
-
-typedef struct GArrowChunkedArrayDatumPrivate_ {
+typedef struct GArrowChunkedArrayDatumPrivate_
+{
   GArrowChunkedArray *value;
 } GArrowChunkedArrayDatumPrivate;
 
@@ -430,10 +423,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowChunkedArrayDatum,
                            garrow_chunked_array_datum,
                            GARROW_TYPE_DATUM)
 
-#define GARROW_CHUNKED_ARRAY_DATUM_GET_PRIVATE(obj)     \
-  static_cast<GArrowChunkedArrayDatumPrivate *>(        \
-    garrow_chunked_array_datum_get_instance_private(    \
-      GARROW_CHUNKED_ARRAY_DATUM(obj)))
+#define GARROW_CHUNKED_ARRAY_DATUM_GET_PRIVATE(obj)                                      \
+  static_cast<GArrowChunkedArrayDatumPrivate *>(                                         \
+    garrow_chunked_array_datum_get_instance_private(GARROW_CHUNKED_ARRAY_DATUM(obj)))
 
 static void
 garrow_chunked_array_datum_dispose(GObject *object)
@@ -494,17 +486,17 @@ garrow_chunked_array_datum_class_init(GArrowChunkedArrayDatumClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_chunked_array_datum_dispose;
+  gobject_class->dispose = garrow_chunked_array_datum_dispose;
   gobject_class->set_property = garrow_chunked_array_datum_set_property;
   gobject_class->get_property = garrow_chunked_array_datum_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("value",
-                             "Value",
-                             "The chunked array held by this datum",
-                             GARROW_TYPE_CHUNKED_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "value",
+    "Value",
+    "The chunked array held by this datum",
+    GARROW_TYPE_CHUNKED_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUE, spec);
 }
 
@@ -524,8 +516,8 @@ garrow_chunked_array_datum_new(GArrowChunkedArray *value)
   return garrow_chunked_array_datum_new_raw(&arrow_datum, value);
 }
 
-
-typedef struct GArrowRecordBatchDatumPrivate_ {
+typedef struct GArrowRecordBatchDatumPrivate_
+{
   GArrowRecordBatch *value;
 } GArrowRecordBatchDatumPrivate;
 
@@ -533,10 +525,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchDatum,
                            garrow_record_batch_datum,
                            GARROW_TYPE_DATUM)
 
-#define GARROW_RECORD_BATCH_DATUM_GET_PRIVATE(obj)     \
-  static_cast<GArrowRecordBatchDatumPrivate *>(        \
-    garrow_record_batch_datum_get_instance_private(    \
-      GARROW_RECORD_BATCH_DATUM(obj)))
+#define GARROW_RECORD_BATCH_DATUM_GET_PRIVATE(obj)                                       \
+  static_cast<GArrowRecordBatchDatumPrivate *>(                                          \
+    garrow_record_batch_datum_get_instance_private(GARROW_RECORD_BATCH_DATUM(obj)))
 
 static void
 garrow_record_batch_datum_dispose(GObject *object)
@@ -597,17 +588,17 @@ garrow_record_batch_datum_class_init(GArrowRecordBatchDatumClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_record_batch_datum_dispose;
+  gobject_class->dispose = garrow_record_batch_datum_dispose;
   gobject_class->set_property = garrow_record_batch_datum_set_property;
   gobject_class->get_property = garrow_record_batch_datum_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("value",
-                             "Value",
-                             "The chunked array held by this datum",
-                             GARROW_TYPE_RECORD_BATCH,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "value",
+    "Value",
+    "The chunked array held by this datum",
+    GARROW_TYPE_RECORD_BATCH,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUE, spec);
 }
 
@@ -627,19 +618,16 @@ garrow_record_batch_datum_new(GArrowRecordBatch *value)
   return garrow_record_batch_datum_new_raw(&arrow_datum, value);
 }
 
-
-typedef struct GArrowTableDatumPrivate_ {
+typedef struct GArrowTableDatumPrivate_
+{
   GArrowTable *value;
 } GArrowTableDatumPrivate;
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowTableDatum,
-                           garrow_table_datum,
-                           GARROW_TYPE_DATUM)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowTableDatum, garrow_table_datum, GARROW_TYPE_DATUM)
 
-#define GARROW_TABLE_DATUM_GET_PRIVATE(obj)         \
-  static_cast<GArrowTableDatumPrivate *>(           \
-    garrow_table_datum_get_instance_private(        \
-      GARROW_TABLE_DATUM(obj)))
+#define GARROW_TABLE_DATUM_GET_PRIVATE(obj)                                              \
+  static_cast<GArrowTableDatumPrivate *>(                                                \
+    garrow_table_datum_get_instance_private(GARROW_TABLE_DATUM(obj)))
 
 static void
 garrow_table_datum_dispose(GObject *object)
@@ -700,17 +688,17 @@ garrow_table_datum_class_init(GArrowTableDatumClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_table_datum_dispose;
+  gobject_class->dispose = garrow_table_datum_dispose;
   gobject_class->set_property = garrow_table_datum_set_property;
   gobject_class->get_property = garrow_table_datum_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("value",
-                             "Value",
-                             "The table held by this datum",
-                             GARROW_TYPE_TABLE,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "value",
+    "Value",
+    "The table held by this datum",
+    GARROW_TYPE_TABLE,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUE, spec);
 }
 
@@ -730,7 +718,6 @@ garrow_table_datum_new(GArrowTable *value)
   return garrow_table_datum_new_raw(&arrow_datum, value);
 }
 
-
 G_END_DECLS
 
 arrow::Datum
@@ -793,51 +780,44 @@ garrow_datum_new_raw(arrow::Datum *arrow_datum)
 }
 
 GArrowScalarDatum *
-garrow_scalar_datum_new_raw(arrow::Datum *arrow_datum,
-                            GArrowScalar *value)
+garrow_scalar_datum_new_raw(arrow::Datum *arrow_datum, GArrowScalar *value)
 {
-  return GARROW_SCALAR_DATUM(g_object_new(GARROW_TYPE_SCALAR_DATUM,
-                                         "datum", arrow_datum,
-                                         "value", value,
-                                         NULL));
+  return GARROW_SCALAR_DATUM(
+    g_object_new(GARROW_TYPE_SCALAR_DATUM, "datum", arrow_datum, "value", value, NULL));
 }
 
 GArrowArrayDatum *
-garrow_array_datum_new_raw(arrow::Datum *arrow_datum,
-                           GArrowArray *value)
+garrow_array_datum_new_raw(arrow::Datum *arrow_datum, GArrowArray *value)
 {
-  return GARROW_ARRAY_DATUM(g_object_new(GARROW_TYPE_ARRAY_DATUM,
-                                         "datum", arrow_datum,
-                                         "value", value,
-                                         NULL));
+  return GARROW_ARRAY_DATUM(
+    g_object_new(GARROW_TYPE_ARRAY_DATUM, "datum", arrow_datum, "value", value, NULL));
 }
 
 GArrowChunkedArrayDatum *
-garrow_chunked_array_datum_new_raw(arrow::Datum *arrow_datum,
-                                   GArrowChunkedArray *value)
+garrow_chunked_array_datum_new_raw(arrow::Datum *arrow_datum, GArrowChunkedArray *value)
 {
   return GARROW_CHUNKED_ARRAY_DATUM(g_object_new(GARROW_TYPE_CHUNKED_ARRAY_DATUM,
-                                                 "datum", arrow_datum,
-                                                 "value", value,
+                                                 "datum",
+                                                 arrow_datum,
+                                                 "value",
+                                                 value,
                                                  NULL));
 }
 
 GArrowRecordBatchDatum *
-garrow_record_batch_datum_new_raw(arrow::Datum *arrow_datum,
-                                  GArrowRecordBatch *value)
+garrow_record_batch_datum_new_raw(arrow::Datum *arrow_datum, GArrowRecordBatch *value)
 {
   return GARROW_RECORD_BATCH_DATUM(g_object_new(GARROW_TYPE_RECORD_BATCH_DATUM,
-                                                "datum", arrow_datum,
-                                                "value", value,
+                                                "datum",
+                                                arrow_datum,
+                                                "value",
+                                                value,
                                                 NULL));
 }
 
 GArrowTableDatum *
-garrow_table_datum_new_raw(arrow::Datum *arrow_datum,
-                           GArrowTable *value)
+garrow_table_datum_new_raw(arrow::Datum *arrow_datum, GArrowTable *value)
 {
-  return GARROW_TABLE_DATUM(g_object_new(GARROW_TYPE_TABLE_DATUM,
-                                         "datum", arrow_datum,
-                                         "value", value,
-                                         NULL));
+  return GARROW_TABLE_DATUM(
+    g_object_new(GARROW_TYPE_TABLE_DATUM, "datum", arrow_datum, "value", value, NULL));
 }
diff --git a/c_glib/arrow-glib/datum.h b/c_glib/arrow-glib/datum.h
index bc7dda369117c..df5e9a1c2cf4f 100644
--- a/c_glib/arrow-glib/datum.h
+++ b/c_glib/arrow-glib/datum.h
@@ -28,63 +28,60 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_DATUM (garrow_datum_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowDatum,
-                         garrow_datum,
-                         GARROW,
-                         DATUM,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowDatum, garrow_datum, GARROW, DATUM, GObject)
 struct _GArrowDatumClass
 {
   GObjectClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_1_0
-gboolean garrow_datum_is_array(GArrowDatum *datum);
+gboolean
+garrow_datum_is_array(GArrowDatum *datum);
 GARROW_AVAILABLE_IN_1_0
-gboolean garrow_datum_is_array_like(GArrowDatum *datum);
+gboolean
+garrow_datum_is_array_like(GArrowDatum *datum);
 GARROW_AVAILABLE_IN_5_0
-gboolean garrow_datum_is_scalar(GArrowDatum *datum);
+gboolean
+garrow_datum_is_scalar(GArrowDatum *datum);
 GARROW_AVAILABLE_IN_5_0
-gboolean garrow_datum_is_value(GArrowDatum *datum);
+gboolean
+garrow_datum_is_value(GArrowDatum *datum);
 /*
 GARROW_AVAILABLE_IN_5_0
 gboolean garrow_datum_is_collection(GArrowDatum *datum);
 */
 GARROW_AVAILABLE_IN_1_0
-gboolean garrow_datum_equal(GArrowDatum *datum,
-                            GArrowDatum *other_datum);
+gboolean
+garrow_datum_equal(GArrowDatum *datum, GArrowDatum *other_datum);
 GARROW_AVAILABLE_IN_1_0
-gchar *garrow_datum_to_string(GArrowDatum *datum);
+gchar *
+garrow_datum_to_string(GArrowDatum *datum);
 
 /* GARROW_TYPE_NONE_DATUM */
 
 #define GARROW_TYPE_SCALAR_DATUM (garrow_scalar_datum_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowScalarDatum,
-                         garrow_scalar_datum,
-                         GARROW,
-                         SCALAR_DATUM,
-                         GArrowDatum)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowScalarDatum, garrow_scalar_datum, GARROW, SCALAR_DATUM, GArrowDatum)
 struct _GArrowScalarDatumClass
 {
   GArrowDatumClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_5_0
-GArrowScalarDatum *garrow_scalar_datum_new(GArrowScalar *value);
+GArrowScalarDatum *
+garrow_scalar_datum_new(GArrowScalar *value);
 
 #define GARROW_TYPE_ARRAY_DATUM (garrow_array_datum_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowArrayDatum,
-                         garrow_array_datum,
-                         GARROW,
-                         ARRAY_DATUM,
-                         GArrowDatum)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowArrayDatum, garrow_array_datum, GARROW, ARRAY_DATUM, GArrowDatum)
 struct _GArrowArrayDatumClass
 {
   GArrowDatumClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_1_0
-GArrowArrayDatum *garrow_array_datum_new(GArrowArray *value);
+GArrowArrayDatum *
+garrow_array_datum_new(GArrowArray *value);
 
 #define GARROW_TYPE_CHUNKED_ARRAY_DATUM (garrow_chunked_array_datum_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowChunkedArrayDatum,
@@ -117,18 +114,16 @@ GArrowRecordBatchDatum *
 garrow_record_batch_datum_new(GArrowRecordBatch *value);
 
 #define GARROW_TYPE_TABLE_DATUM (garrow_table_datum_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowTableDatum,
-                         garrow_table_datum,
-                         GARROW,
-                         TABLE_DATUM,
-                         GArrowDatum)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowTableDatum, garrow_table_datum, GARROW, TABLE_DATUM, GArrowDatum)
 struct _GArrowTableDatumClass
 {
   GArrowDatumClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_1_0
-GArrowTableDatum *garrow_table_datum_new(GArrowTable *value);
+GArrowTableDatum *
+garrow_table_datum_new(GArrowTable *value);
 
 /* GARROW_TYPE_COLLECTION_DATUM */
 
diff --git a/c_glib/arrow-glib/datum.hpp b/c_glib/arrow-glib/datum.hpp
index d1acfc58c93b2..dc985f956f8b9 100644
--- a/c_glib/arrow-glib/datum.hpp
+++ b/c_glib/arrow-glib/datum.hpp
@@ -29,17 +29,12 @@ GArrowDatum *
 garrow_datum_new_raw(arrow::Datum *arrow_datum);
 
 GArrowScalarDatum *
-garrow_scalar_datum_new_raw(arrow::Datum *arrow_datum,
-                            GArrowScalar *value);
+garrow_scalar_datum_new_raw(arrow::Datum *arrow_datum, GArrowScalar *value);
 GArrowArrayDatum *
-garrow_array_datum_new_raw(arrow::Datum *arrow_datum,
-                           GArrowArray *value);
+garrow_array_datum_new_raw(arrow::Datum *arrow_datum, GArrowArray *value);
 GArrowChunkedArrayDatum *
-garrow_chunked_array_datum_new_raw(arrow::Datum *arrow_datum,
-                                   GArrowChunkedArray *value);
+garrow_chunked_array_datum_new_raw(arrow::Datum *arrow_datum, GArrowChunkedArray *value);
 GArrowRecordBatchDatum *
-garrow_record_batch_datum_new_raw(arrow::Datum *arrow_datum,
-                                  GArrowRecordBatch *value);
+garrow_record_batch_datum_new_raw(arrow::Datum *arrow_datum, GArrowRecordBatch *value);
 GArrowTableDatum *
-garrow_table_datum_new_raw(arrow::Datum *arrow_datum,
-                           GArrowTable *value);
+garrow_table_datum_new_raw(arrow::Datum *arrow_datum, GArrowTable *value);
diff --git a/c_glib/arrow-glib/decimal.cpp b/c_glib/arrow-glib/decimal.cpp
index 51560f3360103..5947868edd3bc 100644
--- a/c_glib/arrow-glib/decimal.cpp
+++ b/c_glib/arrow-glib/decimal.cpp
@@ -20,12 +20,12 @@
 #include <arrow-glib/decimal.hpp>
 #include <arrow-glib/error.hpp>
 
-template <typename Decimal>
-struct DecimalConverter {
+template <typename Decimal> struct DecimalConverter
+{
 };
 
-template <>
-struct DecimalConverter<arrow::Decimal128> {
+template <> struct DecimalConverter<arrow::Decimal128>
+{
   using ArrowType = arrow::Decimal128;
   using GArrowType = GArrowDecimal128;
 
@@ -42,27 +42,27 @@ struct DecimalConverter<arrow::Decimal128> {
   }
 };
 
-template <>
-struct DecimalConverter<arrow::Decimal256> {
+template <> struct DecimalConverter<arrow::Decimal256>
+{
   using ArrowType = arrow::Decimal256;
   using GArrowType = GArrowDecimal256;
 
   GArrowType *
-  new_raw(std::shared_ptr<ArrowType> *arrow_decimal256) {
+  new_raw(std::shared_ptr<ArrowType> *arrow_decimal256)
+  {
     return garrow_decimal256_new_raw(arrow_decimal256);
   }
 
   std::shared_ptr<ArrowType>
-  get_raw(GArrowType *decimal256) {
+  get_raw(GArrowType *decimal256)
+  {
     return garrow_decimal256_get_raw(decimal256);
   }
 };
 
 template <typename Decimal>
 typename DecimalConverter<Decimal>::GArrowType *
-garrow_decimal_new_string(const gchar *data,
-                          GError **error,
-                          const gchar *tag)
+garrow_decimal_new_string(const gchar *data, GError **error, const gchar *tag)
 {
   auto arrow_decimal_result = Decimal::FromString(data);
   if (garrow::check(error, arrow_decimal_result, tag)) {
@@ -128,8 +128,9 @@ garrow_decimal_less_than(typename DecimalConverter<Decimal>::GArrowType *decimal
 
 template <typename Decimal>
 gboolean
-garrow_decimal_less_than_or_equal(typename DecimalConverter<Decimal>::GArrowType *decimal,
-                                  typename DecimalConverter<Decimal>::GArrowType *other_decimal)
+garrow_decimal_less_than_or_equal(
+  typename DecimalConverter<Decimal>::GArrowType *decimal,
+  typename DecimalConverter<Decimal>::GArrowType *other_decimal)
 {
   DecimalConverter<Decimal> converter;
   const auto arrow_decimal = converter.get_raw(decimal);
@@ -150,8 +151,9 @@ garrow_decimal_greater_than(typename DecimalConverter<Decimal>::GArrowType *deci
 
 template <typename Decimal>
 gboolean
-garrow_decimal_greater_than_or_equal(typename DecimalConverter<Decimal>::GArrowType *decimal,
-                                     typename DecimalConverter<Decimal>::GArrowType *other_decimal)
+garrow_decimal_greater_than_or_equal(
+  typename DecimalConverter<Decimal>::GArrowType *decimal,
+  typename DecimalConverter<Decimal>::GArrowType *other_decimal)
 {
   DecimalConverter<Decimal> converter;
   const auto arrow_decimal = converter.get_raw(decimal);
@@ -294,7 +296,6 @@ garrow_decimal_rescale(typename DecimalConverter<Decimal>::GArrowType *decimal,
   }
 }
 
-
 G_BEGIN_DECLS
 
 /**
@@ -310,7 +311,8 @@ G_BEGIN_DECLS
  * Since: 0.10.0
  */
 
-typedef struct GArrowDecimal128Private_ {
+typedef struct GArrowDecimal128Private_
+{
   std::shared_ptr<arrow::Decimal128> decimal128;
 } GArrowDecimal128Private;
 
@@ -318,14 +320,11 @@ enum {
   PROP_DECIMAL128 = 1
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal128,
-                           garrow_decimal128,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal128, garrow_decimal128, G_TYPE_OBJECT)
 
-#define GARROW_DECIMAL128_GET_PRIVATE(obj)         \
-  static_cast<GArrowDecimal128Private *>(          \
-     garrow_decimal128_get_instance_private(       \
-       GARROW_DECIMAL128(obj)))
+#define GARROW_DECIMAL128_GET_PRIVATE(obj)                                               \
+  static_cast<GArrowDecimal128Private *>(                                                \
+    garrow_decimal128_get_instance_private(GARROW_DECIMAL128(obj)))
 
 static void
 garrow_decimal128_finalize(GObject *object)
@@ -360,7 +359,7 @@ static void
 garrow_decimal128_init(GArrowDecimal128 *object)
 {
   auto priv = GARROW_DECIMAL128_GET_PRIVATE(object);
-  new(&priv->decimal128) std::shared_ptr<arrow::Decimal128>;
+  new (&priv->decimal128) std::shared_ptr<arrow::Decimal128>;
 }
 
 static void
@@ -370,14 +369,14 @@ garrow_decimal128_class_init(GArrowDecimal128Class *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_decimal128_finalize;
+  gobject_class->finalize = garrow_decimal128_finalize;
   gobject_class->set_property = garrow_decimal128_set_property;
 
-  spec = g_param_spec_pointer("decimal128",
-                              "Decimal128",
-                              "The raw std::shared<arrow::Decimal128> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "decimal128",
+    "Decimal128",
+    "The raw std::shared<arrow::Decimal128> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_DECIMAL128, spec);
 }
 
@@ -394,8 +393,9 @@ garrow_decimal128_class_init(GArrowDecimal128Class *klass)
 GArrowDecimal128 *
 garrow_decimal128_new_string(const gchar *data, GError **error)
 {
-  return garrow_decimal_new_string<arrow::Decimal128>(
-    data, error, "[decimal128][new][string]");
+  return garrow_decimal_new_string<arrow::Decimal128>(data,
+                                                      error,
+                                                      "[decimal128][new][string]");
 }
 
 /**
@@ -437,8 +437,7 @@ garrow_decimal128_copy(GArrowDecimal128 *decimal)
  * Since: 0.12.0
  */
 gboolean
-garrow_decimal128_equal(GArrowDecimal128 *decimal,
-                        GArrowDecimal128 *other_decimal)
+garrow_decimal128_equal(GArrowDecimal128 *decimal, GArrowDecimal128 *other_decimal)
 {
   return garrow_decimal_equal<arrow::Decimal128>(decimal, other_decimal);
 }
@@ -454,8 +453,7 @@ garrow_decimal128_equal(GArrowDecimal128 *decimal,
  * Since: 0.12.0
  */
 gboolean
-garrow_decimal128_not_equal(GArrowDecimal128 *decimal,
-                            GArrowDecimal128 *other_decimal)
+garrow_decimal128_not_equal(GArrowDecimal128 *decimal, GArrowDecimal128 *other_decimal)
 {
   return garrow_decimal_not_equal<arrow::Decimal128>(decimal, other_decimal);
 }
@@ -471,8 +469,7 @@ garrow_decimal128_not_equal(GArrowDecimal128 *decimal,
  * Since: 0.12.0
  */
 gboolean
-garrow_decimal128_less_than(GArrowDecimal128 *decimal,
-                            GArrowDecimal128 *other_decimal)
+garrow_decimal128_less_than(GArrowDecimal128 *decimal, GArrowDecimal128 *other_decimal)
 {
   return garrow_decimal_less_than<arrow::Decimal128>(decimal, other_decimal);
 }
@@ -505,8 +502,7 @@ garrow_decimal128_less_than_or_equal(GArrowDecimal128 *decimal,
  * Since: 0.12.0
  */
 gboolean
-garrow_decimal128_greater_than(GArrowDecimal128 *decimal,
-                               GArrowDecimal128 *other_decimal)
+garrow_decimal128_greater_than(GArrowDecimal128 *decimal, GArrowDecimal128 *other_decimal)
 {
   return garrow_decimal_greater_than<arrow::Decimal128>(decimal, other_decimal);
 }
@@ -628,8 +624,7 @@ garrow_decimal128_to_integer(GArrowDecimal128 *decimal)
  * Since: 0.11.0
  */
 GArrowDecimal128 *
-garrow_decimal128_plus(GArrowDecimal128 *left,
-                       GArrowDecimal128 *right)
+garrow_decimal128_plus(GArrowDecimal128 *left, GArrowDecimal128 *right)
 {
   return garrow_decimal_plus<arrow::Decimal128>(left, right);
 }
@@ -644,8 +639,7 @@ garrow_decimal128_plus(GArrowDecimal128 *left,
  * Since: 0.11.0
  */
 GArrowDecimal128 *
-garrow_decimal128_minus(GArrowDecimal128 *left,
-                        GArrowDecimal128 *right)
+garrow_decimal128_minus(GArrowDecimal128 *left, GArrowDecimal128 *right)
 {
   return garrow_decimal_minus<arrow::Decimal128>(left, right);
 }
@@ -660,8 +654,7 @@ garrow_decimal128_minus(GArrowDecimal128 *left,
  * Since: 0.11.0
  */
 GArrowDecimal128 *
-garrow_decimal128_multiply(GArrowDecimal128 *left,
-                           GArrowDecimal128 *right)
+garrow_decimal128_multiply(GArrowDecimal128 *left, GArrowDecimal128 *right)
 {
   return garrow_decimal_multiply<arrow::Decimal128>(left, right);
 }
@@ -717,8 +710,8 @@ garrow_decimal128_rescale(GArrowDecimal128 *decimal,
                                                    "[decimal128][rescale]");
 }
 
-
-typedef struct GArrowDecimal256Private_ {
+typedef struct GArrowDecimal256Private_
+{
   std::shared_ptr<arrow::Decimal256> decimal256;
 } GArrowDecimal256Private;
 
@@ -726,14 +719,11 @@ enum {
   PROP_DECIMAL256 = 1
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal256,
-                           garrow_decimal256,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal256, garrow_decimal256, G_TYPE_OBJECT)
 
-#define GARROW_DECIMAL256_GET_PRIVATE(obj)         \
-  static_cast<GArrowDecimal256Private *>(          \
-     garrow_decimal256_get_instance_private(       \
-       GARROW_DECIMAL256(obj)))
+#define GARROW_DECIMAL256_GET_PRIVATE(obj)                                               \
+  static_cast<GArrowDecimal256Private *>(                                                \
+    garrow_decimal256_get_instance_private(GARROW_DECIMAL256(obj)))
 
 static void
 garrow_decimal256_finalize(GObject *object)
@@ -768,7 +758,7 @@ static void
 garrow_decimal256_init(GArrowDecimal256 *object)
 {
   auto priv = GARROW_DECIMAL256_GET_PRIVATE(object);
-  new(&priv->decimal256) std::shared_ptr<arrow::Decimal256>;
+  new (&priv->decimal256) std::shared_ptr<arrow::Decimal256>;
 }
 
 static void
@@ -778,14 +768,14 @@ garrow_decimal256_class_init(GArrowDecimal256Class *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_decimal256_finalize;
+  gobject_class->finalize = garrow_decimal256_finalize;
   gobject_class->set_property = garrow_decimal256_set_property;
 
-  spec = g_param_spec_pointer("decimal256",
-                              "Decimal256",
-                              "The raw std::shared<arrow::Decimal256> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "decimal256",
+    "Decimal256",
+    "The raw std::shared<arrow::Decimal256> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_DECIMAL256, spec);
 }
 
@@ -802,8 +792,9 @@ garrow_decimal256_class_init(GArrowDecimal256Class *klass)
 GArrowDecimal256 *
 garrow_decimal256_new_string(const gchar *data, GError **error)
 {
-  return garrow_decimal_new_string<arrow::Decimal256>(
-    data, error, "[decimal256][new][string]");
+  return garrow_decimal_new_string<arrow::Decimal256>(data,
+                                                      error,
+                                                      "[decimal256][new][string]");
 }
 
 /**
@@ -845,8 +836,7 @@ garrow_decimal256_copy(GArrowDecimal256 *decimal)
  * Since: 3.0.0
  */
 gboolean
-garrow_decimal256_equal(GArrowDecimal256 *decimal,
-                        GArrowDecimal256 *other_decimal)
+garrow_decimal256_equal(GArrowDecimal256 *decimal, GArrowDecimal256 *other_decimal)
 {
   return garrow_decimal_equal<arrow::Decimal256>(decimal, other_decimal);
 }
@@ -862,8 +852,7 @@ garrow_decimal256_equal(GArrowDecimal256 *decimal,
  * Since: 3.0.0
  */
 gboolean
-garrow_decimal256_not_equal(GArrowDecimal256 *decimal,
-                            GArrowDecimal256 *other_decimal)
+garrow_decimal256_not_equal(GArrowDecimal256 *decimal, GArrowDecimal256 *other_decimal)
 {
   return garrow_decimal_not_equal<arrow::Decimal256>(decimal, other_decimal);
 }
@@ -879,8 +868,7 @@ garrow_decimal256_not_equal(GArrowDecimal256 *decimal,
  * Since: 3.0.0
  */
 gboolean
-garrow_decimal256_less_than(GArrowDecimal256 *decimal,
-                            GArrowDecimal256 *other_decimal)
+garrow_decimal256_less_than(GArrowDecimal256 *decimal, GArrowDecimal256 *other_decimal)
 {
   return garrow_decimal_less_than<arrow::Decimal256>(decimal, other_decimal);
 }
@@ -913,8 +901,7 @@ garrow_decimal256_less_than_or_equal(GArrowDecimal256 *decimal,
  * Since: 3.0.0
  */
 gboolean
-garrow_decimal256_greater_than(GArrowDecimal256 *decimal,
-                               GArrowDecimal256 *other_decimal)
+garrow_decimal256_greater_than(GArrowDecimal256 *decimal, GArrowDecimal256 *other_decimal)
 {
   return garrow_decimal_greater_than<arrow::Decimal256>(decimal, other_decimal);
 }
@@ -1021,8 +1008,7 @@ garrow_decimal256_negate(GArrowDecimal256 *decimal)
  * Since: 3.0.0
  */
 GArrowDecimal256 *
-garrow_decimal256_plus(GArrowDecimal256 *left,
-                       GArrowDecimal256 *right)
+garrow_decimal256_plus(GArrowDecimal256 *left, GArrowDecimal256 *right)
 {
   return garrow_decimal_plus<arrow::Decimal256>(left, right);
 }
@@ -1037,8 +1023,7 @@ garrow_decimal256_plus(GArrowDecimal256 *left,
  * Since: 3.0.0
  */
 GArrowDecimal256 *
-garrow_decimal256_multiply(GArrowDecimal256 *left,
-                           GArrowDecimal256 *right)
+garrow_decimal256_multiply(GArrowDecimal256 *left, GArrowDecimal256 *right)
 {
   return garrow_decimal_multiply<arrow::Decimal256>(left, right);
 }
@@ -1094,15 +1079,13 @@ garrow_decimal256_rescale(GArrowDecimal256 *decimal,
                                                    "[decimal256][rescale]");
 }
 
-
 G_END_DECLS
 
 GArrowDecimal128 *
 garrow_decimal128_new_raw(std::shared_ptr<arrow::Decimal128> *arrow_decimal128)
 {
-  auto decimal128 = g_object_new(garrow_decimal128_get_type(),
-                                 "decimal128", arrow_decimal128,
-                                 NULL);
+  auto decimal128 =
+    g_object_new(garrow_decimal128_get_type(), "decimal128", arrow_decimal128, NULL);
   return GARROW_DECIMAL128(decimal128);
 }
 
@@ -1116,9 +1099,8 @@ garrow_decimal128_get_raw(GArrowDecimal128 *decimal128)
 GArrowDecimal256 *
 garrow_decimal256_new_raw(std::shared_ptr<arrow::Decimal256> *arrow_decimal256)
 {
-  auto decimal256 = g_object_new(garrow_decimal256_get_type(),
-                                 "decimal256", arrow_decimal256,
-                                 NULL);
+  auto decimal256 =
+    g_object_new(garrow_decimal256_get_type(), "decimal256", arrow_decimal256, NULL);
   return GARROW_DECIMAL256(decimal256);
 }
 
diff --git a/c_glib/arrow-glib/decimal.h b/c_glib/arrow-glib/decimal.h
index 97b521f3fbe94..9c605d4456e2d 100644
--- a/c_glib/arrow-glib/decimal.h
+++ b/c_glib/arrow-glib/decimal.h
@@ -26,11 +26,7 @@ G_BEGIN_DECLS
 
 /* Disabled because it conflicts with GARROW_TYPE_DECIMAL128 in GArrowType. */
 /* #define GARROW_TYPE_DECIMAL128 (garrow_decimal128_get_type()) */
-G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128,
-                         garrow_decimal128,
-                         GARROW,
-                         DECIMAL128,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128, garrow_decimal128, GARROW, DECIMAL128, GObject)
 
 struct _GArrowDecimal128Class
 {
@@ -38,47 +34,57 @@ struct _GArrowDecimal128Class
 };
 
 GArrowDecimal128 *
-garrow_decimal128_new_string(const gchar *data,
-                             GError **error);
-GArrowDecimal128 *garrow_decimal128_new_integer(const gint64 data);
+garrow_decimal128_new_string(const gchar *data, GError **error);
+GArrowDecimal128 *
+garrow_decimal128_new_integer(const gint64 data);
 GARROW_AVAILABLE_IN_3_0
-GArrowDecimal128 *garrow_decimal128_copy(GArrowDecimal128 *decimal);
+GArrowDecimal128 *
+garrow_decimal128_copy(GArrowDecimal128 *decimal);
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_decimal128_equal(GArrowDecimal128 *decimal,
-                                 GArrowDecimal128 *other_decimal);
+gboolean
+garrow_decimal128_equal(GArrowDecimal128 *decimal, GArrowDecimal128 *other_decimal);
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_decimal128_not_equal(GArrowDecimal128 *decimal,
-                                     GArrowDecimal128 *other_decimal);
+gboolean
+garrow_decimal128_not_equal(GArrowDecimal128 *decimal, GArrowDecimal128 *other_decimal);
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_decimal128_less_than(GArrowDecimal128 *decimal,
+gboolean
+garrow_decimal128_less_than(GArrowDecimal128 *decimal, GArrowDecimal128 *other_decimal);
+GARROW_AVAILABLE_IN_0_12
+gboolean
+garrow_decimal128_less_than_or_equal(GArrowDecimal128 *decimal,
                                      GArrowDecimal128 *other_decimal);
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_decimal128_less_than_or_equal(GArrowDecimal128 *decimal,
-                                              GArrowDecimal128 *other_decimal);
+gboolean
+garrow_decimal128_greater_than(GArrowDecimal128 *decimal,
+                               GArrowDecimal128 *other_decimal);
 GARROW_AVAILABLE_IN_0_12
-gboolean garrow_decimal128_greater_than(GArrowDecimal128 *decimal,
+gboolean
+garrow_decimal128_greater_than_or_equal(GArrowDecimal128 *decimal,
                                         GArrowDecimal128 *other_decimal);
-GARROW_AVAILABLE_IN_0_12
-gboolean garrow_decimal128_greater_than_or_equal(GArrowDecimal128 *decimal,
-                                                 GArrowDecimal128 *other_decimal);
-gchar *garrow_decimal128_to_string_scale(GArrowDecimal128 *decimal,
-                                         gint32 scale);
-gchar *garrow_decimal128_to_string(GArrowDecimal128 *decimal);
-GARROW_AVAILABLE_IN_3_0
-GBytes *garrow_decimal128_to_bytes(GArrowDecimal128 *decimal);
-void garrow_decimal128_abs(GArrowDecimal128 *decimal);
-void garrow_decimal128_negate(GArrowDecimal128 *decimal);
-gint64 garrow_decimal128_to_integer(GArrowDecimal128 *decimal);
-GArrowDecimal128 *garrow_decimal128_plus(GArrowDecimal128 *left,
-                                         GArrowDecimal128 *right);
-GArrowDecimal128 *garrow_decimal128_minus(GArrowDecimal128 *left,
-                                          GArrowDecimal128 *right);
-GArrowDecimal128 *garrow_decimal128_multiply(GArrowDecimal128 *left,
-                                             GArrowDecimal128 *right);
-GArrowDecimal128 *garrow_decimal128_divide(GArrowDecimal128 *left,
-                                           GArrowDecimal128 *right,
-                                           GArrowDecimal128 **remainder,
-                                           GError **error);
+gchar *
+garrow_decimal128_to_string_scale(GArrowDecimal128 *decimal, gint32 scale);
+gchar *
+garrow_decimal128_to_string(GArrowDecimal128 *decimal);
+GARROW_AVAILABLE_IN_3_0
+GBytes *
+garrow_decimal128_to_bytes(GArrowDecimal128 *decimal);
+void
+garrow_decimal128_abs(GArrowDecimal128 *decimal);
+void
+garrow_decimal128_negate(GArrowDecimal128 *decimal);
+gint64
+garrow_decimal128_to_integer(GArrowDecimal128 *decimal);
+GArrowDecimal128 *
+garrow_decimal128_plus(GArrowDecimal128 *left, GArrowDecimal128 *right);
+GArrowDecimal128 *
+garrow_decimal128_minus(GArrowDecimal128 *left, GArrowDecimal128 *right);
+GArrowDecimal128 *
+garrow_decimal128_multiply(GArrowDecimal128 *left, GArrowDecimal128 *right);
+GArrowDecimal128 *
+garrow_decimal128_divide(GArrowDecimal128 *left,
+                         GArrowDecimal128 *right,
+                         GArrowDecimal128 **remainder,
+                         GError **error);
 GARROW_AVAILABLE_IN_0_15
 GArrowDecimal128 *
 garrow_decimal128_rescale(GArrowDecimal128 *decimal,
@@ -86,14 +92,9 @@ garrow_decimal128_rescale(GArrowDecimal128 *decimal,
                           gint32 new_scale,
                           GError **error);
 
-
 /* Disabled because it conflicts with GARROW_TYPE_DECIMAL256 in GArrowType. */
 /* #define GARROW_TYPE_DECIMAL256 (garrow_decimal256_get_type()) */
-G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256,
-                         garrow_decimal256,
-                         GARROW,
-                         DECIMAL256,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256, garrow_decimal256, GARROW, DECIMAL256, GObject)
 
 struct _GArrowDecimal256Class
 {
@@ -102,52 +103,61 @@ struct _GArrowDecimal256Class
 
 GARROW_AVAILABLE_IN_3_0
 GArrowDecimal256 *
-garrow_decimal256_new_string(const gchar *data,
-                             GError **error);
+garrow_decimal256_new_string(const gchar *data, GError **error);
 GARROW_AVAILABLE_IN_3_0
-GArrowDecimal256 *garrow_decimal256_new_integer(const gint64 data);
+GArrowDecimal256 *
+garrow_decimal256_new_integer(const gint64 data);
 GARROW_AVAILABLE_IN_3_0
-GArrowDecimal256 *garrow_decimal256_copy(GArrowDecimal256 *decimal);
+GArrowDecimal256 *
+garrow_decimal256_copy(GArrowDecimal256 *decimal);
 GARROW_AVAILABLE_IN_3_0
-gboolean garrow_decimal256_equal(GArrowDecimal256 *decimal,
-                                 GArrowDecimal256 *other_decimal);
+gboolean
+garrow_decimal256_equal(GArrowDecimal256 *decimal, GArrowDecimal256 *other_decimal);
 GARROW_AVAILABLE_IN_3_0
-gboolean garrow_decimal256_not_equal(GArrowDecimal256 *decimal,
-                                     GArrowDecimal256 *other_decimal);
+gboolean
+garrow_decimal256_not_equal(GArrowDecimal256 *decimal, GArrowDecimal256 *other_decimal);
+GARROW_AVAILABLE_IN_3_0
+gboolean
+garrow_decimal256_less_than(GArrowDecimal256 *decimal, GArrowDecimal256 *other_decimal);
 GARROW_AVAILABLE_IN_3_0
-gboolean garrow_decimal256_less_than(GArrowDecimal256 *decimal,
+gboolean
+garrow_decimal256_less_than_or_equal(GArrowDecimal256 *decimal,
                                      GArrowDecimal256 *other_decimal);
 GARROW_AVAILABLE_IN_3_0
-gboolean garrow_decimal256_less_than_or_equal(GArrowDecimal256 *decimal,
-                                              GArrowDecimal256 *other_decimal);
+gboolean
+garrow_decimal256_greater_than(GArrowDecimal256 *decimal,
+                               GArrowDecimal256 *other_decimal);
 GARROW_AVAILABLE_IN_3_0
-gboolean garrow_decimal256_greater_than(GArrowDecimal256 *decimal,
+gboolean
+garrow_decimal256_greater_than_or_equal(GArrowDecimal256 *decimal,
                                         GArrowDecimal256 *other_decimal);
 GARROW_AVAILABLE_IN_3_0
-gboolean garrow_decimal256_greater_than_or_equal(GArrowDecimal256 *decimal,
-                                                 GArrowDecimal256 *other_decimal);
+gchar *
+garrow_decimal256_to_string_scale(GArrowDecimal256 *decimal, gint32 scale);
 GARROW_AVAILABLE_IN_3_0
-gchar *garrow_decimal256_to_string_scale(GArrowDecimal256 *decimal,
-                                         gint32 scale);
+gchar *
+garrow_decimal256_to_string(GArrowDecimal256 *decimal);
 GARROW_AVAILABLE_IN_3_0
-gchar *garrow_decimal256_to_string(GArrowDecimal256 *decimal);
+GBytes *
+garrow_decimal256_to_bytes(GArrowDecimal256 *decimal);
 GARROW_AVAILABLE_IN_3_0
-GBytes *garrow_decimal256_to_bytes(GArrowDecimal256 *decimal);
+void
+garrow_decimal256_abs(GArrowDecimal256 *decimal);
 GARROW_AVAILABLE_IN_3_0
-void garrow_decimal256_abs(GArrowDecimal256 *decimal);
+void
+garrow_decimal256_negate(GArrowDecimal256 *decimal);
 GARROW_AVAILABLE_IN_3_0
-void garrow_decimal256_negate(GArrowDecimal256 *decimal);
-GARROW_AVAILABLE_IN_3_0
-GArrowDecimal256 *garrow_decimal256_plus(GArrowDecimal256 *left,
-                                         GArrowDecimal256 *right);
+GArrowDecimal256 *
+garrow_decimal256_plus(GArrowDecimal256 *left, GArrowDecimal256 *right);
 GARROW_AVAILABLE_IN_3_0
-GArrowDecimal256 *garrow_decimal256_multiply(GArrowDecimal256 *left,
-                                             GArrowDecimal256 *right);
+GArrowDecimal256 *
+garrow_decimal256_multiply(GArrowDecimal256 *left, GArrowDecimal256 *right);
 GARROW_AVAILABLE_IN_3_0
-GArrowDecimal256 *garrow_decimal256_divide(GArrowDecimal256 *left,
-                                           GArrowDecimal256 *right,
-                                           GArrowDecimal256 **remainder,
-                                           GError **error);
+GArrowDecimal256 *
+garrow_decimal256_divide(GArrowDecimal256 *left,
+                         GArrowDecimal256 *right,
+                         GArrowDecimal256 **remainder,
+                         GError **error);
 GARROW_AVAILABLE_IN_3_0
 GArrowDecimal256 *
 garrow_decimal256_rescale(GArrowDecimal256 *decimal,
diff --git a/c_glib/arrow-glib/error.cpp b/c_glib/arrow-glib/error.cpp
index ac61ddc499a78..84a1f69d937f6 100644
--- a/c_glib/arrow-glib/error.cpp
+++ b/c_glib/arrow-glib/error.cpp
@@ -33,14 +33,12 @@ G_BEGIN_DECLS
  * values.
  */
 
-G_DEFINE_QUARK(garrow-error-quark, garrow_error)
+G_DEFINE_QUARK(garrow - error - quark, garrow_error)
 
 G_END_DECLS
 
 gboolean
-garrow_error_check(GError **error,
-                   const arrow::Status &status,
-                   const char *context)
+garrow_error_check(GError **error, const arrow::Status &status, const char *context)
 {
   return garrow::check(error, status, context);
 }
@@ -85,8 +83,7 @@ garrow_error_from_status(const arrow::Status &status)
 }
 
 arrow::StatusCode
-garrow_error_to_status_code(GError *error,
-                            arrow::StatusCode default_code)
+garrow_error_to_status_code(GError *error, arrow::StatusCode default_code)
 {
   if (error->domain != GARROW_ERROR) {
     return default_code;
@@ -127,9 +124,7 @@ garrow_error_to_status_code(GError *error,
 }
 
 arrow::Status
-garrow_error_to_status(GError *error,
-                       arrow::StatusCode default_code,
-                       const char *context)
+garrow_error_to_status(GError *error, arrow::StatusCode default_code, const char *context)
 {
   std::stringstream message;
   message << context << ": " << g_quark_to_string(error->domain);
@@ -141,9 +136,9 @@ garrow_error_to_status(GError *error,
 }
 
 namespace garrow {
-  gboolean check(GError **error,
-                 const arrow::Status &status,
-                 const char *context) {
+  gboolean
+  check(GError **error, const arrow::Status &status, const char *context)
+  {
     if (status.ok()) {
       return TRUE;
     } else {
@@ -156,4 +151,4 @@ namespace garrow {
       return FALSE;
     }
   }
-}
+} // namespace garrow
diff --git a/c_glib/arrow-glib/error.h b/c_glib/arrow-glib/error.h
index d600663592f11..4414417a1a25b 100644
--- a/c_glib/arrow-glib/error.h
+++ b/c_glib/arrow-glib/error.h
@@ -37,8 +37,10 @@ G_BEGIN_DECLS
  * @GARROW_ERROR_SERIALIZATION: Serialization error.
  * @GARROW_ERROR_CODE_GENERATION: Error generating code for expression evaluation
  *   in Gandiva.
- * @GARROW_ERROR_EXPRESSION_VALIDATION: Validation errors in expression given for code generation.
- * @GARROW_ERROR_EXECUTION: Execution error while evaluating the expression against a record batch.
+ * @GARROW_ERROR_EXPRESSION_VALIDATION: Validation errors in expression given for code
+ * generation.
+ * @GARROW_ERROR_EXECUTION: Execution error while evaluating the expression against a
+ * record batch.
  * @GARROW_ERROR_ALREADY_EXISTS: Item already exists error.
  *
  * The error codes are used by all arrow-glib functions.
@@ -64,6 +66,7 @@ typedef enum {
 
 #define GARROW_ERROR garrow_error_quark()
 
-GQuark garrow_error_quark(void);
+GQuark
+garrow_error_quark(void);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/error.hpp b/c_glib/arrow-glib/error.hpp
index 8a8d346f44060..90a0f3161878e 100644
--- a/c_glib/arrow-glib/error.hpp
+++ b/c_glib/arrow-glib/error.hpp
@@ -23,26 +23,25 @@
 
 #include <arrow-glib/error.h>
 
-gboolean garrow_error_check(GError **error,
-                            const arrow::Status &status,
-                            const char *context);
-GArrowError garrow_error_from_status(const arrow::Status &status);
+gboolean
+garrow_error_check(GError **error, const arrow::Status &status, const char *context);
+GArrowError
+garrow_error_from_status(const arrow::Status &status);
 arrow::StatusCode
-garrow_error_to_status_code(GError *error,
-                            arrow::StatusCode default_code);
-arrow::Status garrow_error_to_status(GError *error,
-                                     arrow::StatusCode default_code,
-                                     const char *context);
+garrow_error_to_status_code(GError *error, arrow::StatusCode default_code);
+arrow::Status
+garrow_error_to_status(GError *error,
+                       arrow::StatusCode default_code,
+                       const char *context);
 
 namespace garrow {
-  gboolean check(GError **error,
-                 const arrow::Status &status,
-                 const char *context);
+  gboolean
+  check(GError **error, const arrow::Status &status, const char *context);
 
   template <typename CONTEXT_FUNC>
-  gboolean check(GError **error,
-                 const arrow::Status &status,
-                 CONTEXT_FUNC &&context_func) {
+  gboolean
+  check(GError **error, const arrow::Status &status, CONTEXT_FUNC &&context_func)
+  {
     if (status.ok()) {
       return TRUE;
     } else {
@@ -58,16 +57,16 @@ namespace garrow {
   }
 
   template <typename TYPE>
-  gboolean check(GError **error,
-                 const arrow::Result<TYPE> &result,
-                 const char *context) {
+  gboolean
+  check(GError **error, const arrow::Result<TYPE> &result, const char *context)
+  {
     return check(error, result.status(), context);
   }
 
   template <typename TYPE, typename CONTEXT_FUNC>
-  gboolean check(GError **error,
-                 const arrow::Result<TYPE> &result,
-                 CONTEXT_FUNC &&context_func) {
+  gboolean
+  check(GError **error, const arrow::Result<TYPE> &result, CONTEXT_FUNC &&context_func)
+  {
     return check(error, result.status(), context_func);
   }
-}
+} // namespace garrow
diff --git a/c_glib/arrow-glib/expression.cpp b/c_glib/arrow-glib/expression.cpp
index 48b5fd055e4d0..9be8e1f68bc1b 100644
--- a/c_glib/arrow-glib/expression.cpp
+++ b/c_glib/arrow-glib/expression.cpp
@@ -42,18 +42,16 @@ G_BEGIN_DECLS
  * Since: 6.0.0
  */
 
-typedef struct GArrowExpressionPrivate_ {
+typedef struct GArrowExpressionPrivate_
+{
   arrow::compute::Expression expression;
 } GArrowExpressionPrivate;
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowExpression,
-                                    garrow_expression,
-                                    G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowExpression, garrow_expression, G_TYPE_OBJECT)
 
-#define GARROW_EXPRESSION_GET_PRIVATE(object)  \
-  static_cast<GArrowExpressionPrivate *>(      \
-    garrow_expression_get_instance_private(    \
-      GARROW_EXPRESSION(object)))
+#define GARROW_EXPRESSION_GET_PRIVATE(object)                                            \
+  static_cast<GArrowExpressionPrivate *>(                                                \
+    garrow_expression_get_instance_private(GARROW_EXPRESSION(object)))
 
 static void
 garrow_expression_finalize(GObject *object)
@@ -67,7 +65,7 @@ static void
 garrow_expression_init(GArrowExpression *object)
 {
   auto priv = GARROW_EXPRESSION_GET_PRIVATE(object);
-  new(&priv->expression) arrow::compute::Expression();
+  new (&priv->expression) arrow::compute::Expression();
 }
 
 static void
@@ -107,18 +105,14 @@ garrow_expression_to_string(GArrowExpression *expression)
  * Since: 6.0.0
  */
 gboolean
-garrow_expression_equal(GArrowExpression *expression,
-                        GArrowExpression *other_expression)
+garrow_expression_equal(GArrowExpression *expression, GArrowExpression *other_expression)
 {
   auto priv = GARROW_EXPRESSION_GET_PRIVATE(expression);
   auto other_priv = GARROW_EXPRESSION_GET_PRIVATE(other_expression);
   return priv->expression.Equals(other_priv->expression);
 }
 
-
-G_DEFINE_TYPE(GArrowLiteralExpression,
-              garrow_literal_expression,
-              GARROW_TYPE_EXPRESSION)
+G_DEFINE_TYPE(GArrowLiteralExpression, garrow_literal_expression, GARROW_TYPE_EXPRESSION)
 
 static void
 garrow_literal_expression_init(GArrowLiteralExpression *object)
@@ -146,10 +140,7 @@ garrow_literal_expression_new(GArrowDatum *datum)
   return GARROW_LITERAL_EXPRESSION(garrow_expression_new_raw(arrow_expression));
 }
 
-
-G_DEFINE_TYPE(GArrowFieldExpression,
-              garrow_field_expression,
-              GARROW_TYPE_EXPRESSION)
+G_DEFINE_TYPE(GArrowFieldExpression, garrow_field_expression, GARROW_TYPE_EXPRESSION)
 
 static void
 garrow_field_expression_init(GArrowFieldExpression *object)
@@ -172,23 +163,17 @@ garrow_field_expression_class_init(GArrowFieldExpressionClass *klass)
  * Since: 6.0.0
  */
 GArrowFieldExpression *
-garrow_field_expression_new(const gchar *reference,
-                            GError **error)
+garrow_field_expression_new(const gchar *reference, GError **error)
 {
   auto arrow_reference_result = garrow_field_reference_resolve_raw(reference);
-  if (!garrow::check(error,
-                     arrow_reference_result,
-                     "[field-expression][new]")) {
+  if (!garrow::check(error, arrow_reference_result, "[field-expression][new]")) {
     return NULL;
   }
   auto arrow_expression = arrow::compute::field_ref(*arrow_reference_result);
   return GARROW_FIELD_EXPRESSION(garrow_expression_new_raw(arrow_expression));
 }
 
-
-G_DEFINE_TYPE(GArrowCallExpression,
-              garrow_call_expression,
-              GARROW_TYPE_EXPRESSION)
+G_DEFINE_TYPE(GArrowCallExpression, garrow_call_expression, GARROW_TYPE_EXPRESSION)
 
 static void
 garrow_call_expression_init(GArrowCallExpression *object)
@@ -225,13 +210,10 @@ garrow_call_expression_new(const gchar *function,
   if (options) {
     arrow_options.reset(garrow_function_options_get_raw(options)->Copy().release());
   }
-  auto arrow_expression = arrow::compute::call(function,
-                                               arrow_arguments,
-                                               arrow_options);
+  auto arrow_expression = arrow::compute::call(function, arrow_arguments, arrow_options);
   return GARROW_CALL_EXPRESSION(garrow_expression_new_raw(arrow_expression));
 }
 
-
 G_END_DECLS
 
 GArrowExpression *
diff --git a/c_glib/arrow-glib/expression.h b/c_glib/arrow-glib/expression.h
index a161b8306be5c..3141ed4df18b7 100644
--- a/c_glib/arrow-glib/expression.h
+++ b/c_glib/arrow-glib/expression.h
@@ -28,9 +28,7 @@ gchar *
 garrow_expression_to_string(GArrowExpression *expression);
 GARROW_AVAILABLE_IN_6_0
 gboolean
-garrow_expression_equal(GArrowExpression *expression,
-                        GArrowExpression *other_expression);
-
+garrow_expression_equal(GArrowExpression *expression, GArrowExpression *other_expression);
 
 #define GARROW_TYPE_LITERAL_EXPRESSION (garrow_literal_expression_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLiteralExpression,
@@ -47,7 +45,6 @@ GARROW_AVAILABLE_IN_6_0
 GArrowLiteralExpression *
 garrow_literal_expression_new(GArrowDatum *datum);
 
-
 #define GARROW_TYPE_FIELD_EXPRESSION (garrow_field_expression_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFieldExpression,
                          garrow_field_expression,
@@ -61,16 +58,11 @@ struct _GArrowFieldExpressionClass
 
 GARROW_AVAILABLE_IN_6_0
 GArrowFieldExpression *
-garrow_field_expression_new(const gchar *reference,
-                            GError **error);
-
+garrow_field_expression_new(const gchar *reference, GError **error);
 
 #define GARROW_TYPE_CALL_EXPRESSION (garrow_call_expression_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowCallExpression,
-                         garrow_call_expression,
-                         GARROW,
-                         CALL_EXPRESSION,
-                         GArrowExpression)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowCallExpression, garrow_call_expression, GARROW, CALL_EXPRESSION, GArrowExpression)
 struct _GArrowCallExpressionClass
 {
   GArrowExpressionClass parent_class;
diff --git a/c_glib/arrow-glib/expression.hpp b/c_glib/arrow-glib/expression.hpp
index 8d6e2f8d117a8..60d5c9fe2f1bd 100644
--- a/c_glib/arrow-glib/expression.hpp
+++ b/c_glib/arrow-glib/expression.hpp
@@ -23,7 +23,6 @@
 
 #include <arrow-glib/expression.h>
 
-
 GArrowExpression *
 garrow_expression_new_raw(const arrow::compute::Expression &arrow_expression);
 arrow::compute::Expression *
diff --git a/c_glib/arrow-glib/field.cpp b/c_glib/arrow-glib/field.cpp
index 135a4a5d771ae..7df92d3ad4016 100644
--- a/c_glib/arrow-glib/field.cpp
+++ b/c_glib/arrow-glib/field.cpp
@@ -35,7 +35,8 @@ G_BEGIN_DECLS
  * information of the column.
  */
 
-typedef struct GArrowFieldPrivate_ {
+typedef struct GArrowFieldPrivate_
+{
   std::shared_ptr<arrow::Field> field;
   GArrowDataType *data_type;
 } GArrowFieldPrivate;
@@ -45,14 +46,10 @@ enum {
   PROP_DATA_TYPE
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowField,
-                           garrow_field,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowField, garrow_field, G_TYPE_OBJECT)
 
-#define GARROW_FIELD_GET_PRIVATE(obj)         \
-  static_cast<GArrowFieldPrivate *>(          \
-     garrow_field_get_instance_private(       \
-       GARROW_FIELD(obj)))
+#define GARROW_FIELD_GET_PRIVATE(obj)                                                    \
+  static_cast<GArrowFieldPrivate *>(garrow_field_get_instance_private(GARROW_FIELD(obj)))
 
 static void
 garrow_field_dispose(GObject *object)
@@ -103,7 +100,7 @@ static void
 garrow_field_init(GArrowField *object)
 {
   auto priv = GARROW_FIELD_GET_PRIVATE(object);
-  new(&priv->field) std::shared_ptr<arrow::Field>;
+  new (&priv->field) std::shared_ptr<arrow::Field>;
 }
 
 static void
@@ -111,24 +108,24 @@ garrow_field_class_init(GArrowFieldClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_field_dispose;
-  gobject_class->finalize     = garrow_field_finalize;
+  gobject_class->dispose = garrow_field_dispose;
+  gobject_class->finalize = garrow_field_finalize;
   gobject_class->set_property = garrow_field_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("field",
-                              "Field",
-                              "The raw std::shared<arrow::Field> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "field",
+    "Field",
+    "The raw std::shared<arrow::Field> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FIELD, spec);
 
-  spec = g_param_spec_object("data-type",
-                             "Data type",
-                             "The data type",
-                             GARROW_TYPE_DATA_TYPE,
-                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "data-type",
+    "Data type",
+    "The data type",
+    GARROW_TYPE_DATA_TYPE,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_DATA_TYPE, spec);
 }
 
@@ -148,8 +145,7 @@ garrow_field_class_init(GArrowFieldClass *klass)
 GArrowField *
 garrow_field_import(gpointer c_abi_schema, GError **error)
 {
-  auto arrow_field_result =
-    arrow::ImportField(static_cast<ArrowSchema *>(c_abi_schema));
+  auto arrow_field_result = arrow::ImportField(static_cast<ArrowSchema *>(c_abi_schema));
   if (garrow::check(error, arrow_field_result, "[field][import]")) {
     return garrow_field_new_raw(&(*arrow_field_result), nullptr);
   } else {
@@ -165,8 +161,7 @@ garrow_field_import(gpointer c_abi_schema, GError **error)
  * Returns: A newly created #GArrowField.
  */
 GArrowField *
-garrow_field_new(const gchar *name,
-                 GArrowDataType *data_type)
+garrow_field_new(const gchar *name, GArrowDataType *data_type)
 {
   auto arrow_data_type = garrow_data_type_get_raw(data_type);
   auto arrow_field = std::make_shared<arrow::Field>(name, arrow_data_type);
@@ -182,14 +177,10 @@ garrow_field_new(const gchar *name,
  * Returns: A newly created #GArrowField.
  */
 GArrowField *
-garrow_field_new_full(const gchar *name,
-                      GArrowDataType *data_type,
-                      gboolean nullable)
+garrow_field_new_full(const gchar *name, GArrowDataType *data_type, gboolean nullable)
 {
   auto arrow_field =
-    std::make_shared<arrow::Field>(name,
-                                   garrow_data_type_get_raw(data_type),
-                                   nullable);
+    std::make_shared<arrow::Field>(name, garrow_data_type_get_raw(data_type), nullable);
   return garrow_field_new_raw(&arrow_field, data_type);
 }
 
@@ -268,8 +259,7 @@ garrow_field_is_nullable(GArrowField *field)
  *   otherwise.
  */
 gboolean
-garrow_field_equal(GArrowField *field,
-                   GArrowField *other_field)
+garrow_field_equal(GArrowField *field, GArrowField *other_field)
 {
   const auto arrow_field = garrow_field_get_raw(field);
   const auto arrow_other_field = garrow_field_get_raw(other_field);
@@ -364,14 +354,12 @@ garrow_field_get_metadata(GArrowField *field)
  * Since: 3.0.0
  */
 GArrowField *
-garrow_field_with_metadata(GArrowField *field,
-                           GHashTable *metadata)
+garrow_field_with_metadata(GArrowField *field, GHashTable *metadata)
 {
   const auto arrow_field = garrow_field_get_raw(field);
   auto arrow_metadata = garrow_internal_hash_table_to_metadata(metadata);
   auto arrow_new_field = arrow_field->WithMetadata(arrow_metadata);
-  return garrow_field_new_raw(&arrow_new_field,
-                              garrow_field_get_data_type(field));
+  return garrow_field_new_raw(&arrow_new_field, garrow_field_get_data_type(field));
 }
 
 /**
@@ -386,14 +374,12 @@ garrow_field_with_metadata(GArrowField *field,
  * Since: 3.0.0
  */
 GArrowField *
-garrow_field_with_merged_metadata(GArrowField *field,
-                                  GHashTable *metadata)
+garrow_field_with_merged_metadata(GArrowField *field, GHashTable *metadata)
 {
   const auto arrow_field = garrow_field_get_raw(field);
   auto arrow_metadata = garrow_internal_hash_table_to_metadata(metadata);
   auto arrow_new_field = arrow_field->WithMergedMetadata(arrow_metadata);
-  return garrow_field_new_raw(&arrow_new_field,
-                              garrow_field_get_data_type(field));
+  return garrow_field_new_raw(&arrow_new_field, garrow_field_get_data_type(field));
 }
 
 /**
@@ -409,8 +395,7 @@ garrow_field_remove_metadata(GArrowField *field)
 {
   const auto arrow_field = garrow_field_get_raw(field);
   auto arrow_new_field = arrow_field->RemoveMetadata();
-  return garrow_field_new_raw(&arrow_new_field,
-                              garrow_field_get_data_type(field));
+  return garrow_field_new_raw(&arrow_new_field, garrow_field_get_data_type(field));
 }
 
 G_END_DECLS
@@ -425,10 +410,8 @@ garrow_field_new_raw(std::shared_ptr<arrow::Field> *arrow_field,
     data_type = garrow_data_type_new_raw(&arrow_data_type);
     data_type_need_unref = true;
   }
-  auto field = GARROW_FIELD(g_object_new(GARROW_TYPE_FIELD,
-                                         "field", arrow_field,
-                                         "data-type", data_type,
-                                         NULL));
+  auto field = GARROW_FIELD(
+    g_object_new(GARROW_TYPE_FIELD, "field", arrow_field, "data-type", data_type, NULL));
   if (data_type_need_unref) {
     g_object_unref(data_type);
   }
diff --git a/c_glib/arrow-glib/field.h b/c_glib/arrow-glib/field.h
index 60689c36e76fe..8de63757878c9 100644
--- a/c_glib/arrow-glib/field.h
+++ b/c_glib/arrow-glib/field.h
@@ -24,11 +24,7 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_FIELD (garrow_field_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowField,
-                         garrow_field,
-                         GARROW,
-                         FIELD,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowField, garrow_field, GARROW, FIELD, GObject)
 struct _GArrowFieldClass
 {
   GObjectClass parent_class;
@@ -36,33 +32,32 @@ struct _GArrowFieldClass
 
 GARROW_AVAILABLE_IN_6_0
 GArrowField *
-garrow_field_import(gpointer c_abi_schema,
-                    GError **error);
+garrow_field_import(gpointer c_abi_schema, GError **error);
 
-GArrowField    *garrow_field_new           (const gchar *name,
-                                            GArrowDataType *data_type);
-GArrowField    *garrow_field_new_full      (const gchar *name,
-                                            GArrowDataType *data_type,
-                                            gboolean nullable);
+GArrowField *
+garrow_field_new(const gchar *name, GArrowDataType *data_type);
+GArrowField *
+garrow_field_new_full(const gchar *name, GArrowDataType *data_type, gboolean nullable);
 
 GARROW_AVAILABLE_IN_6_0
 gpointer
-garrow_field_export(GArrowField *field,
-                    GError **error);
+garrow_field_export(GArrowField *field, GError **error);
 
-const gchar    *garrow_field_get_name      (GArrowField *field);
-GArrowDataType *garrow_field_get_data_type (GArrowField *field);
-gboolean        garrow_field_is_nullable   (GArrowField *field);
+const gchar *
+garrow_field_get_name(GArrowField *field);
+GArrowDataType *
+garrow_field_get_data_type(GArrowField *field);
+gboolean
+garrow_field_is_nullable(GArrowField *field);
 
-gboolean        garrow_field_equal         (GArrowField *field,
-                                            GArrowField *other_field);
+gboolean
+garrow_field_equal(GArrowField *field, GArrowField *other_field);
 
 gchar *
 garrow_field_to_string(GArrowField *field);
 GARROW_AVAILABLE_IN_3_0
 gchar *
-garrow_field_to_string_metadata(GArrowField *field,
-                                gboolean show_metadata);
+garrow_field_to_string_metadata(GArrowField *field, gboolean show_metadata);
 
 GARROW_AVAILABLE_IN_3_0
 gboolean
@@ -72,12 +67,10 @@ GHashTable *
 garrow_field_get_metadata(GArrowField *field);
 GARROW_AVAILABLE_IN_3_0
 GArrowField *
-garrow_field_with_metadata(GArrowField *field,
-                           GHashTable *metadata);
+garrow_field_with_metadata(GArrowField *field, GHashTable *metadata);
 GARROW_AVAILABLE_IN_3_0
 GArrowField *
-garrow_field_with_merged_metadata(GArrowField *field,
-                                  GHashTable *metadata);
+garrow_field_with_merged_metadata(GArrowField *field, GHashTable *metadata);
 GARROW_AVAILABLE_IN_3_0
 GArrowField *
 garrow_field_remove_metadata(GArrowField *field);
diff --git a/c_glib/arrow-glib/field.hpp b/c_glib/arrow-glib/field.hpp
index f8d0d46c97ab4..67dd9e1b8fd32 100644
--- a/c_glib/arrow-glib/field.hpp
+++ b/c_glib/arrow-glib/field.hpp
@@ -23,6 +23,8 @@
 
 #include <arrow-glib/field.h>
 
-GArrowField *garrow_field_new_raw(std::shared_ptr<arrow::Field> *arrow_field,
-                                  GArrowDataType *data_type);
-std::shared_ptr<arrow::Field> garrow_field_get_raw(GArrowField *field);
+GArrowField *
+garrow_field_new_raw(std::shared_ptr<arrow::Field> *arrow_field,
+                     GArrowDataType *data_type);
+std::shared_ptr<arrow::Field>
+garrow_field_get_raw(GArrowField *field);
diff --git a/c_glib/arrow-glib/file-mode.hpp b/c_glib/arrow-glib/file-mode.hpp
index 2b67379421d5a..c860d0d50b698 100644
--- a/c_glib/arrow-glib/file-mode.hpp
+++ b/c_glib/arrow-glib/file-mode.hpp
@@ -23,5 +23,7 @@
 
 #include <arrow-glib/file-mode.h>
 
-GArrowFileMode garrow_file_mode_from_raw(arrow::io::FileMode::type mode);
-arrow::io::FileMode::type garrow_file_mode_to_raw(GArrowFileMode mode);
+GArrowFileMode
+garrow_file_mode_from_raw(arrow::io::FileMode::type mode);
+arrow::io::FileMode::type
+garrow_file_mode_to_raw(GArrowFileMode mode);
diff --git a/c_glib/arrow-glib/file-system.cpp b/c_glib/arrow-glib/file-system.cpp
index a81db683d348c..b6efa2b872635 100644
--- a/c_glib/arrow-glib/file-system.cpp
+++ b/c_glib/arrow-glib/file-system.cpp
@@ -60,7 +60,8 @@ G_BEGIN_DECLS
 
 /* arrow::fs::FileInfo */
 
-typedef struct GArrowFileInfoPrivate_ {
+typedef struct GArrowFileInfoPrivate_
+{
   arrow::fs::FileInfo file_info;
 } GArrowFileInfoPrivate;
 
@@ -76,10 +77,9 @@ enum {
 
 G_DEFINE_TYPE_WITH_PRIVATE(GArrowFileInfo, garrow_file_info, G_TYPE_OBJECT)
 
-#define GARROW_FILE_INFO_GET_PRIVATE(object)    \
-  static_cast<GArrowFileInfoPrivate *>(         \
-    garrow_file_info_get_instance_private(      \
-      GARROW_FILE_INFO(object)))
+#define GARROW_FILE_INFO_GET_PRIVATE(object)                                             \
+  static_cast<GArrowFileInfoPrivate *>(                                                  \
+    garrow_file_info_get_instance_private(GARROW_FILE_INFO(object)))
 
 static void
 garrow_file_info_finalize(GObject *object)
@@ -102,8 +102,7 @@ garrow_file_info_set_property(GObject *object,
   switch (prop_id) {
   case PROP_FILE_INFO_TYPE:
     {
-      auto arrow_file_type =
-        static_cast<arrow::fs::FileType>(g_value_get_enum(value));
+      auto arrow_file_type = static_cast<arrow::fs::FileType>(g_value_get_enum(value));
       arrow_file_info->set_type(arrow_file_type);
     }
     break;
@@ -132,8 +131,7 @@ garrow_file_info_get_property(GObject *object,
                               GValue *value,
                               GParamSpec *pspec)
 {
-  const auto arrow_file_info =
-    garrow_file_info_get_raw(GARROW_FILE_INFO(object));
+  const auto arrow_file_info = garrow_file_info_get_raw(GARROW_FILE_INFO(object));
 
   switch (prop_id) {
   case PROP_FILE_INFO_TYPE:
@@ -175,7 +173,7 @@ static void
 garrow_file_info_init(GArrowFileInfo *object)
 {
   auto priv = GARROW_FILE_INFO_GET_PRIVATE(object);
-  new(&priv->file_info) arrow::fs::FileInfo;
+  new (&priv->file_info) arrow::fs::FileInfo;
 }
 
 static void
@@ -185,7 +183,7 @@ garrow_file_info_class_init(GArrowFileInfoClass *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_file_info_finalize;
+  gobject_class->finalize = garrow_file_info_finalize;
   gobject_class->set_property = garrow_file_info_set_property;
   gobject_class->get_property = garrow_file_info_get_property;
 
@@ -232,9 +230,7 @@ garrow_file_info_class_init(GArrowFileInfoClass *klass)
                              "The file base name",
                              info.base_name().c_str(),
                              static_cast<GParamFlags>(G_PARAM_READABLE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_FILE_INFO_BASE_NAME,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_FILE_INFO_BASE_NAME, spec);
 
   /**
    * GArrowFileInfo:dir-name:
@@ -248,9 +244,7 @@ garrow_file_info_class_init(GArrowFileInfoClass *klass)
                              "The directory base name",
                              info.dir_name().c_str(),
                              static_cast<GParamFlags>(G_PARAM_READABLE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_FILE_INFO_DIR_NAME,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_FILE_INFO_DIR_NAME, spec);
 
   /**
    * GArrowFileInfo:extension:
@@ -264,9 +258,7 @@ garrow_file_info_class_init(GArrowFileInfoClass *klass)
                              "The file extension",
                              info.extension().c_str(),
                              static_cast<GParamFlags>(G_PARAM_READABLE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_FILE_INFO_EXTENSION,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_FILE_INFO_EXTENSION, spec);
 
   /**
    * GArrowFileInfo:size:
@@ -326,8 +318,7 @@ garrow_file_info_new(void)
  * Since: 0.17.0
  */
 gboolean
-garrow_file_info_equal(GArrowFileInfo *file_info,
-                       GArrowFileInfo *other_file_info)
+garrow_file_info_equal(GArrowFileInfo *file_info, GArrowFileInfo *other_file_info)
 {
   const auto arrow_file_info = garrow_file_info_get_raw(file_info);
   const auto arrow_other_file_info = garrow_file_info_get_raw(other_file_info);
@@ -384,7 +375,8 @@ garrow_file_info_to_string(GArrowFileInfo *file_info)
 
 /* arrow::fs::FileSelector */
 
-typedef struct GArrowFileSelectorPrivate_ {
+typedef struct GArrowFileSelectorPrivate_
+{
   arrow::fs::FileSelector file_selector;
 } GArrowFileSelectorPrivate;
 
@@ -397,10 +389,9 @@ enum {
 
 G_DEFINE_TYPE_WITH_PRIVATE(GArrowFileSelector, garrow_file_selector, G_TYPE_OBJECT)
 
-#define GARROW_FILE_SELECTOR_GET_PRIVATE(obj)         \
-  static_cast<GArrowFileSelectorPrivate *>(           \
-     garrow_file_selector_get_instance_private(       \
-       GARROW_FILE_SELECTOR(obj)))
+#define GARROW_FILE_SELECTOR_GET_PRIVATE(obj)                                            \
+  static_cast<GArrowFileSelectorPrivate *>(                                              \
+    garrow_file_selector_get_instance_private(GARROW_FILE_SELECTOR(obj)))
 
 static void
 garrow_file_selector_finalize(GObject *object)
@@ -470,7 +461,7 @@ static void
 garrow_file_selector_init(GArrowFileSelector *object)
 {
   auto priv = GARROW_FILE_SELECTOR_GET_PRIVATE(object);
-  new(&priv->file_selector) arrow::fs::FileSelector;
+  new (&priv->file_selector) arrow::fs::FileSelector;
 }
 
 static void
@@ -500,9 +491,7 @@ garrow_file_selector_class_init(GArrowFileSelectorClass *klass)
                              "The directory in which to select files",
                              file_selector.base_dir.c_str(),
                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_FILE_SELECTOR_BASE_DIR,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_FILE_SELECTOR_BASE_DIR, spec);
 
   /**
    * GArrowFileSelector:allow-not-found:
@@ -533,9 +522,7 @@ garrow_file_selector_class_init(GArrowFileSelectorClass *klass)
                               "Whether to recurse into subdirectories",
                               file_selector.recursive,
                               static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_FILE_SELECTOR_RECURSIVE,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_FILE_SELECTOR_RECURSIVE, spec);
 
   /**
    * GArrowFileSelector:max-recursion:
@@ -551,14 +538,13 @@ garrow_file_selector_class_init(GArrowFileSelectorClass *klass)
                           INT32_MAX,
                           file_selector.max_recursion,
                           static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_FILE_SELECTOR_MAX_RECURSION,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_FILE_SELECTOR_MAX_RECURSION, spec);
 }
 
 /* arrow::fs::FileSystem */
 
-typedef struct GArrowFileSystemPrivate_ {
+typedef struct GArrowFileSystemPrivate_
+{
   std::shared_ptr<arrow::fs::FileSystem> file_system;
 } GArrowFileSystemPrivate;
 
@@ -566,14 +552,11 @@ enum {
   PROP_FILE_SYSTEM = 1
 };
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowFileSystem,
-                                    garrow_file_system,
-                                    G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowFileSystem, garrow_file_system, G_TYPE_OBJECT)
 
-#define GARROW_FILE_SYSTEM_GET_PRIVATE(obj)         \
-  static_cast<GArrowFileSystemPrivate *>(           \
-     garrow_file_system_get_instance_private(       \
-       GARROW_FILE_SYSTEM(obj)))
+#define GARROW_FILE_SYSTEM_GET_PRIVATE(obj)                                              \
+  static_cast<GArrowFileSystemPrivate *>(                                                \
+    garrow_file_system_get_instance_private(GARROW_FILE_SYSTEM(obj)))
 
 static void
 garrow_file_system_finalize(GObject *object)
@@ -608,7 +591,7 @@ static void
 garrow_file_system_init(GArrowFileSystem *object)
 {
   auto priv = GARROW_FILE_SYSTEM_GET_PRIVATE(object);
-  new(&priv->file_system) std::shared_ptr<arrow::fs::FileSystem>;
+  new (&priv->file_system) std::shared_ptr<arrow::fs::FileSystem>;
 }
 
 static void
@@ -618,14 +601,14 @@ garrow_file_system_class_init(GArrowFileSystemClass *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_file_system_finalize;
+  gobject_class->finalize = garrow_file_system_finalize;
   gobject_class->set_property = garrow_file_system_set_property;
 
-  spec = g_param_spec_pointer("file-system",
-                              "FileSystem",
-                              "The raw std::shared<arrow::fs::FileSystem> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "file-system",
+    "FileSystem",
+    "The raw std::shared<arrow::fs::FileSystem> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FILE_SYSTEM, spec);
 }
 
@@ -647,9 +630,7 @@ GArrowFileSystem *
 garrow_file_system_create(const gchar *uri, GError **error)
 {
   auto arrow_file_system_result = arrow::fs::FileSystemFromUri(uri);
-  if (garrow::check(error,
-                    arrow_file_system_result,
-                    "[file-system][create]")) {
+  if (garrow::check(error, arrow_file_system_result, "[file-system][create]")) {
     auto arrow_file_system = *arrow_file_system_result;
     return garrow_file_system_new_raw(&arrow_file_system);
   } else {
@@ -709,7 +690,7 @@ garrow_file_system_get_file_info(GArrowFileSystem *file_system,
 }
 
 static inline GList *
-garrow_file_infos_new(arrow::Result<std::vector<arrow::fs::FileInfo>>&& arrow_result,
+garrow_file_infos_new(arrow::Result<std::vector<arrow::fs::FileInfo>> &&arrow_result,
                       GError **error,
                       const gchar *context)
 {
@@ -985,9 +966,7 @@ garrow_file_system_open_input_stream(GArrowFileSystem *file_system,
 {
   auto arrow_file_system = garrow_file_system_get_raw(file_system);
   auto arrow_input_stream = arrow_file_system->OpenInputStream(path);
-  if (garrow::check(error,
-                    arrow_input_stream,
-                    "[file-system][open-input-stream]")) {
+  if (garrow::check(error, arrow_input_stream, "[file-system][open-input-stream]")) {
     return garrow_input_stream_new_raw(&(*arrow_input_stream));
   } else {
     return NULL;
@@ -1014,9 +993,7 @@ garrow_file_system_open_input_file(GArrowFileSystem *file_system,
 {
   auto arrow_file_system = garrow_file_system_get_raw(file_system);
   auto arrow_random_access_file = arrow_file_system->OpenInputFile(path);
-  if (garrow::check(error,
-                    arrow_random_access_file,
-                    "[file-system][open-input-file]")) {
+  if (garrow::check(error, arrow_random_access_file, "[file-system][open-input-file]")) {
     return garrow_seekable_input_stream_new_raw(&(*arrow_random_access_file));
   } else {
     return NULL;
@@ -1044,9 +1021,7 @@ garrow_file_system_open_output_stream(GArrowFileSystem *file_system,
 {
   auto arrow_file_system = garrow_file_system_get_raw(file_system);
   auto arrow_output_stream = arrow_file_system->OpenOutputStream(path);
-  if (garrow::check(error,
-                    arrow_output_stream,
-                    "[file-system][open-output-stream]")) {
+  if (garrow::check(error, arrow_output_stream, "[file-system][open-output-stream]")) {
     return garrow_output_stream_new_raw(&(*arrow_output_stream));
   } else {
     return NULL;
@@ -1074,9 +1049,7 @@ garrow_file_system_open_append_stream(GArrowFileSystem *file_system,
 {
   auto arrow_file_system = garrow_file_system_get_raw(file_system);
   auto arrow_output_stream = arrow_file_system->OpenAppendStream(path);
-  if (garrow::check(error,
-                    arrow_output_stream,
-                    "[file-system][open-append-stream]")) {
+  if (garrow::check(error, arrow_output_stream, "[file-system][open-append-stream]")) {
     return garrow_output_stream_new_raw(&(*arrow_output_stream));
   } else {
     return NULL;
@@ -1085,7 +1058,8 @@ garrow_file_system_open_append_stream(GArrowFileSystem *file_system,
 
 /* arrow::fs::SubTreeFileSystem */
 
-typedef struct GArrowSubTreeFileSystemPrivate_ {
+typedef struct GArrowSubTreeFileSystemPrivate_
+{
   GArrowFileSystem *base_file_system;
 } GArrowSubTreeFileSystemPrivate;
 
@@ -1097,9 +1071,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowSubTreeFileSystem,
                            garrow_sub_tree_file_system,
                            GARROW_TYPE_FILE_SYSTEM)
 
-#define GARROW_SUB_TREE_FILE_SYSTEM_GET_PRIVATE(object) \
-  static_cast<GArrowSubTreeFileSystemPrivate *>(        \
-    garrow_sub_tree_file_system_get_instance_private(   \
+#define GARROW_SUB_TREE_FILE_SYSTEM_GET_PRIVATE(object)                                  \
+  static_cast<GArrowSubTreeFileSystemPrivate *>(                                         \
+    garrow_sub_tree_file_system_get_instance_private(                                    \
       GARROW_SUB_TREE_FILE_SYSTEM(object)))
 
 static void
@@ -1160,17 +1134,17 @@ static void
 garrow_sub_tree_file_system_class_init(GArrowSubTreeFileSystemClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->dispose      = garrow_sub_tree_file_system_dispose;
+  gobject_class->dispose = garrow_sub_tree_file_system_dispose;
   gobject_class->set_property = garrow_sub_tree_file_system_set_property;
   gobject_class->get_property = garrow_sub_tree_file_system_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("base-file-system",
-                             "Base file system",
-                             "The base GArrowFileSystem",
-                             GARROW_TYPE_FILE_SYSTEM,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "base-file-system",
+    "Base file system",
+    "The base GArrowFileSystem",
+    GARROW_TYPE_FILE_SYSTEM,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_BASE_FILE_SYSTEM, spec);
 }
 
@@ -1188,17 +1162,16 @@ garrow_sub_tree_file_system_new(const gchar *base_path,
                                 GArrowFileSystem *base_file_system)
 {
   auto arrow_base_file_system = garrow_file_system_get_raw(base_file_system);
-  auto arrow_sub_tree_file_system =
-    std::static_pointer_cast<arrow::fs::FileSystem>(
-      std::make_shared<arrow::fs::SubTreeFileSystem>(base_path,
-                                                     arrow_base_file_system));
+  auto arrow_sub_tree_file_system = std::static_pointer_cast<arrow::fs::FileSystem>(
+    std::make_shared<arrow::fs::SubTreeFileSystem>(base_path, arrow_base_file_system));
   return garrow_sub_tree_file_system_new_raw(&arrow_sub_tree_file_system,
                                              base_file_system);
 }
 
 /* arrow::fs::SlowFileSystem */
 
-typedef struct GArrowSlowFileSystemPrivate_ {
+typedef struct GArrowSlowFileSystemPrivate_
+{
   GArrowFileSystem *base_file_system;
 } GArrowSlowFileSystemPrivate;
 
@@ -1206,10 +1179,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowSlowFileSystem,
                            garrow_slow_file_system,
                            GARROW_TYPE_FILE_SYSTEM)
 
-#define GARROW_SLOW_FILE_SYSTEM_GET_PRIVATE(object)     \
-  static_cast<GArrowSlowFileSystemPrivate *>(           \
-    garrow_slow_file_system_get_instance_private(       \
-      GARROW_SLOW_FILE_SYSTEM(object)))
+#define GARROW_SLOW_FILE_SYSTEM_GET_PRIVATE(object)                                      \
+  static_cast<GArrowSlowFileSystemPrivate *>(                                            \
+    garrow_slow_file_system_get_instance_private(GARROW_SLOW_FILE_SYSTEM(object)))
 
 static void
 garrow_slow_file_system_dispose(GObject *object)
@@ -1269,17 +1241,17 @@ static void
 garrow_slow_file_system_class_init(GArrowSlowFileSystemClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->dispose      = garrow_slow_file_system_dispose;
+  gobject_class->dispose = garrow_slow_file_system_dispose;
   gobject_class->set_property = garrow_slow_file_system_set_property;
   gobject_class->get_property = garrow_slow_file_system_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("base-file-system",
-                             "Base file system",
-                             "The base GArrowFileSystem",
-                             GARROW_TYPE_FILE_SYSTEM,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "base-file-system",
+    "Base file system",
+    "The base GArrowFileSystem",
+    GARROW_TYPE_FILE_SYSTEM,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_BASE_FILE_SYSTEM, spec);
 }
 
@@ -1302,12 +1274,9 @@ garrow_slow_file_system_new_average_latency(GArrowFileSystem *base_file_system,
                                             gdouble average_latency)
 {
   auto arrow_base_file_system = garrow_file_system_get_raw(base_file_system);
-  auto arrow_slow_file_system =
-    std::static_pointer_cast<arrow::fs::FileSystem>(
-      std::make_shared<arrow::fs::SlowFileSystem>(arrow_base_file_system,
-                                                  average_latency));
-  return garrow_slow_file_system_new_raw(&arrow_slow_file_system,
-                                         base_file_system);
+  auto arrow_slow_file_system = std::static_pointer_cast<arrow::fs::FileSystem>(
+    std::make_shared<arrow::fs::SlowFileSystem>(arrow_base_file_system, average_latency));
+  return garrow_slow_file_system_new_raw(&arrow_slow_file_system, base_file_system);
 }
 
 /**
@@ -1329,19 +1298,14 @@ garrow_slow_file_system_new_average_latency_and_seed(GArrowFileSystem *base_file
                                                      gint32 seed)
 {
   auto arrow_base_file_system = garrow_file_system_get_raw(base_file_system);
-  auto arrow_slow_file_system =
-    std::static_pointer_cast<arrow::fs::FileSystem>(
-      std::make_shared<arrow::fs::SlowFileSystem>(arrow_base_file_system,
-                                                  average_latency,
-                                                  seed));
-  return garrow_slow_file_system_new_raw(&arrow_slow_file_system,
-                                         base_file_system);
+  auto arrow_slow_file_system = std::static_pointer_cast<arrow::fs::FileSystem>(
+    std::make_shared<arrow::fs::SlowFileSystem>(arrow_base_file_system,
+                                                average_latency,
+                                                seed));
+  return garrow_slow_file_system_new_raw(&arrow_slow_file_system, base_file_system);
 }
 
-
-G_DEFINE_TYPE(GArrowMockFileSystem,
-              garrow_mock_file_system,
-              GARROW_TYPE_FILE_SYSTEM)
+G_DEFINE_TYPE(GArrowMockFileSystem, garrow_mock_file_system, GARROW_TYPE_FILE_SYSTEM)
 
 static void
 garrow_mock_file_system_init(GArrowMockFileSystem *file_system)
@@ -1353,10 +1317,7 @@ garrow_mock_file_system_class_init(GArrowMockFileSystemClass *klass)
 {
 }
 
-
-G_DEFINE_TYPE(GArrowHDFSFileSystem,
-              garrow_hdfs_file_system,
-              GARROW_TYPE_FILE_SYSTEM)
+G_DEFINE_TYPE(GArrowHDFSFileSystem, garrow_hdfs_file_system, GARROW_TYPE_FILE_SYSTEM)
 
 static void
 garrow_hdfs_file_system_init(GArrowHDFSFileSystem *file_system)
@@ -1368,20 +1329,29 @@ garrow_hdfs_file_system_class_init(GArrowHDFSFileSystemClass *klass)
 {
 }
 
-
 #ifndef ARROW_S3
 namespace arrow {
   namespace fs {
-    enum class S3LogLevel : int8_t { Off, Fatal, Error, Warn, Info, Debug, Trace };
+    enum class S3LogLevel : int8_t {
+      Off,
+      Fatal,
+      Error,
+      Warn,
+      Info,
+      Debug,
+      Trace
+    };
 
-    struct ARROW_EXPORT S3GlobalOptions {
+    struct ARROW_EXPORT S3GlobalOptions
+    {
       S3LogLevel log_level;
     };
-  }
-}
+  } // namespace fs
+} // namespace arrow
 #endif
 
-typedef struct GArrowS3GlobalOptionsPrivate_ {
+typedef struct GArrowS3GlobalOptionsPrivate_
+{
   arrow::fs::S3GlobalOptions options;
 } GArrowS3GlobalOptionsPrivate;
 
@@ -1389,14 +1359,11 @@ enum {
   PROP_S3_GLOBAL_OPTIONS_LOG_LEVEL = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowS3GlobalOptions,
-                           garrow_s3_global_options,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowS3GlobalOptions, garrow_s3_global_options, G_TYPE_OBJECT)
 
-#define GARROW_S3_GLOBAL_OPTIONS_GET_PRIVATE(object)    \
-  static_cast<GArrowS3GlobalOptionsPrivate *>(          \
-    garrow_s3_global_options_get_instance_private(      \
-      GARROW_S3_GLOBAL_OPTIONS(object)))
+#define GARROW_S3_GLOBAL_OPTIONS_GET_PRIVATE(object)                                     \
+  static_cast<GArrowS3GlobalOptionsPrivate *>(                                           \
+    garrow_s3_global_options_get_instance_private(GARROW_S3_GLOBAL_OPTIONS(object)))
 
 static void
 garrow_s3_global_options_finalize(GObject *object)
@@ -1413,8 +1380,7 @@ garrow_s3_global_options_set_property(GObject *object,
                                       GParamSpec *pspec)
 {
 #ifdef ARROW_S3
-  auto arrow_options =
-    garrow_s3_global_options_get_raw(GARROW_S3_GLOBAL_OPTIONS(object));
+  auto arrow_options = garrow_s3_global_options_get_raw(GARROW_S3_GLOBAL_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_S3_GLOBAL_OPTIONS_LOG_LEVEL:
@@ -1437,13 +1403,11 @@ garrow_s3_global_options_get_property(GObject *object,
                                       GParamSpec *pspec)
 {
 #ifdef ARROW_S3
-  auto arrow_options =
-    garrow_s3_global_options_get_raw(GARROW_S3_GLOBAL_OPTIONS(object));
+  auto arrow_options = garrow_s3_global_options_get_raw(GARROW_S3_GLOBAL_OPTIONS(object));
 
   switch (prop_id) {
   case PROP_S3_GLOBAL_OPTIONS_LOG_LEVEL:
-    g_value_set_enum(value,
-                     static_cast<GArrowS3LogLevel>(arrow_options->log_level));
+    g_value_set_enum(value, static_cast<GArrowS3LogLevel>(arrow_options->log_level));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -1458,7 +1422,7 @@ static void
 garrow_s3_global_options_init(GArrowS3GlobalOptions *object)
 {
   auto priv = GARROW_S3_GLOBAL_OPTIONS_GET_PRIVATE(object);
-  new(&priv->options) arrow::fs::S3GlobalOptions;
+  new (&priv->options) arrow::fs::S3GlobalOptions;
 }
 
 static void
@@ -1468,7 +1432,7 @@ garrow_s3_global_options_class_init(GArrowS3GlobalOptionsClass *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_s3_global_options_finalize;
+  gobject_class->finalize = garrow_s3_global_options_finalize;
   gobject_class->set_property = garrow_s3_global_options_set_property;
   gobject_class->get_property = garrow_s3_global_options_get_property;
 
@@ -1479,16 +1443,14 @@ garrow_s3_global_options_class_init(GArrowS3GlobalOptionsClass *klass)
    *
    * Since: 7.0.0
    */
-  spec = g_param_spec_enum("log-level",
-                           "Log level",
-                           "The log level of S3 APIs",
-                           GARROW_TYPE_S3_LOG_LEVEL,
-                           GARROW_S3_LOG_LEVEL_FATAL,
-                           static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                    G_PARAM_CONSTRUCT));
-  g_object_class_install_property(gobject_class,
-                                  PROP_S3_GLOBAL_OPTIONS_LOG_LEVEL,
-                                  spec);
+  spec =
+    g_param_spec_enum("log-level",
+                      "Log level",
+                      "The log level of S3 APIs",
+                      GARROW_TYPE_S3_LOG_LEVEL,
+                      GARROW_S3_LOG_LEVEL_FATAL,
+                      static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+  g_object_class_install_property(gobject_class, PROP_S3_GLOBAL_OPTIONS_LOG_LEVEL, spec);
 }
 
 /**
@@ -1501,11 +1463,9 @@ garrow_s3_global_options_class_init(GArrowS3GlobalOptionsClass *klass)
 GArrowS3GlobalOptions *
 garrow_s3_global_options_new(void)
 {
-  return GARROW_S3_GLOBAL_OPTIONS(
-    g_object_new(GARROW_TYPE_S3_GLOBAL_OPTIONS, NULL));
+  return GARROW_S3_GLOBAL_OPTIONS(g_object_new(GARROW_TYPE_S3_GLOBAL_OPTIONS, NULL));
 }
 
-
 /**
  * garrow_s3_is_enabled:
  *
@@ -1539,8 +1499,7 @@ garrow_s3_is_enabled(void)
  * Since: 7.0.0
  */
 gboolean
-garrow_s3_initialize(GArrowS3GlobalOptions *options,
-                     GError **error)
+garrow_s3_initialize(GArrowS3GlobalOptions *options, GError **error)
 {
 #ifdef ARROW_S3
   auto arrow_options = garrow_s3_global_options_get_raw(options);
@@ -1548,10 +1507,10 @@ garrow_s3_initialize(GArrowS3GlobalOptions *options,
                        arrow::fs::InitializeS3(*arrow_options),
                        "[s3][initialize]");
 #else
-  return garrow::check(error,
-                       arrow::Status::NotImplemented(
-                         "Apache Arrow C++ isn't built with S3 support"),
-                       "[s3][initialize]");
+  return garrow::check(
+    error,
+    arrow::Status::NotImplemented("Apache Arrow C++ isn't built with S3 support"),
+    "[s3][initialize]");
 #endif
 }
 
@@ -1569,21 +1528,16 @@ gboolean
 garrow_s3_finalize(GError **error)
 {
 #ifdef ARROW_S3
-  return garrow::check(error,
-                       arrow::fs::FinalizeS3(),
-                       "[s3][finalize]");
+  return garrow::check(error, arrow::fs::FinalizeS3(), "[s3][finalize]");
 #else
-  return garrow::check(error,
-                       arrow::Status::NotImplemented(
-                         "Apache Arrow C++ isn't built with S3 support"),
-                       "[s3][initialize]");
+  return garrow::check(
+    error,
+    arrow::Status::NotImplemented("Apache Arrow C++ isn't built with S3 support"),
+    "[s3][initialize]");
 #endif
 }
 
-
-G_DEFINE_TYPE(GArrowS3FileSystem,
-              garrow_s3_file_system,
-              GARROW_TYPE_FILE_SYSTEM)
+G_DEFINE_TYPE(GArrowS3FileSystem, garrow_s3_file_system, GARROW_TYPE_FILE_SYSTEM)
 
 static void
 garrow_s3_file_system_init(GArrowS3FileSystem *file_system)
@@ -1595,10 +1549,7 @@ garrow_s3_file_system_class_init(GArrowS3FileSystemClass *klass)
 {
 }
 
-
-G_DEFINE_TYPE(GArrowGCSFileSystem,
-              garrow_gcs_file_system,
-              GARROW_TYPE_FILE_SYSTEM)
+G_DEFINE_TYPE(GArrowGCSFileSystem, garrow_gcs_file_system, GARROW_TYPE_FILE_SYSTEM)
 
 static void
 garrow_gcs_file_system_init(GArrowGCSFileSystem *file_system)
@@ -1610,7 +1561,6 @@ garrow_gcs_file_system_class_init(GArrowGCSFileSystemClass *klass)
 {
 }
 
-
 G_END_DECLS
 
 GArrowFileInfo *
@@ -1629,8 +1579,7 @@ garrow_file_info_get_raw(GArrowFileInfo *file_info)
 }
 
 GArrowFileSystem *
-garrow_file_system_new_raw(
-  std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system)
+garrow_file_system_new_raw(std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system)
 {
   const auto &type_name = (*arrow_file_system)->type_name();
 
@@ -1647,9 +1596,8 @@ garrow_file_system_new_raw(
     file_system_type = GARROW_TYPE_MOCK_FILE_SYSTEM;
   }
 
-  return GARROW_FILE_SYSTEM(g_object_new(file_system_type,
-                                         "file-system", arrow_file_system,
-                                         NULL));
+  return GARROW_FILE_SYSTEM(
+    g_object_new(file_system_type, "file-system", arrow_file_system, NULL));
 }
 
 std::shared_ptr<arrow::fs::FileSystem>
@@ -1664,23 +1612,24 @@ garrow_sub_tree_file_system_new_raw(
   std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system,
   GArrowFileSystem *base_file_system)
 {
-  return GARROW_SUB_TREE_FILE_SYSTEM(
-    g_object_new(GARROW_TYPE_SUB_TREE_FILE_SYSTEM,
-                 "file-system", arrow_file_system,
-                 "base-file-system", base_file_system,
-                 NULL));
+  return GARROW_SUB_TREE_FILE_SYSTEM(g_object_new(GARROW_TYPE_SUB_TREE_FILE_SYSTEM,
+                                                  "file-system",
+                                                  arrow_file_system,
+                                                  "base-file-system",
+                                                  base_file_system,
+                                                  NULL));
 }
 
 GArrowSlowFileSystem *
-garrow_slow_file_system_new_raw(
-  std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system,
-  GArrowFileSystem *base_file_system)
+garrow_slow_file_system_new_raw(std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system,
+                                GArrowFileSystem *base_file_system)
 {
-  return GARROW_SLOW_FILE_SYSTEM(
-    g_object_new(GARROW_TYPE_SLOW_FILE_SYSTEM,
-                 "file-system", arrow_file_system,
-                 "base-file-system", base_file_system,
-                 NULL));
+  return GARROW_SLOW_FILE_SYSTEM(g_object_new(GARROW_TYPE_SLOW_FILE_SYSTEM,
+                                              "file-system",
+                                              arrow_file_system,
+                                              "base-file-system",
+                                              base_file_system,
+                                              NULL));
 }
 
 #ifdef ARROW_S3
diff --git a/c_glib/arrow-glib/file-system.h b/c_glib/arrow-glib/file-system.h
index 687404734a2d9..d3d5fde73fe23 100644
--- a/c_glib/arrow-glib/file-system.h
+++ b/c_glib/arrow-glib/file-system.h
@@ -50,42 +50,38 @@ typedef enum {
   GARROW_FILE_TYPE_DIR
 } GArrowFileType;
 
-
 /* arrow::fs::FileInfo */
 
 #define GARROW_TYPE_FILE_INFO (garrow_file_info_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowFileInfo,
-                         garrow_file_info,
-                         GARROW,
-                         FILE_INFO,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowFileInfo, garrow_file_info, GARROW, FILE_INFO, GObject)
 struct _GArrowFileInfoClass
 {
   GObjectClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_0_17
-GArrowFileInfo *garrow_file_info_new(void);
+GArrowFileInfo *
+garrow_file_info_new(void);
 
 GARROW_AVAILABLE_IN_0_17
-gboolean garrow_file_info_equal(GArrowFileInfo *file_info,
-                                GArrowFileInfo *other_file_info);
+gboolean
+garrow_file_info_equal(GArrowFileInfo *file_info, GArrowFileInfo *other_file_info);
 
 GARROW_AVAILABLE_IN_0_17
-gboolean garrow_file_info_is_file(GArrowFileInfo *file_info);
+gboolean
+garrow_file_info_is_file(GArrowFileInfo *file_info);
 GARROW_AVAILABLE_IN_0_17
-gboolean garrow_file_info_is_dir(GArrowFileInfo *file_info);
+gboolean
+garrow_file_info_is_dir(GArrowFileInfo *file_info);
 GARROW_AVAILABLE_IN_0_17
-gchar *garrow_file_info_to_string(GArrowFileInfo *file_info);
+gchar *
+garrow_file_info_to_string(GArrowFileInfo *file_info);
 
 /* arrow::fs::FileSelector */
 
 #define GARROW_TYPE_FILE_SELECTOR (garrow_file_selector_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowFileSelector,
-                         garrow_file_selector,
-                         GARROW,
-                         FILE_SELECTOR,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowFileSelector, garrow_file_selector, GARROW, FILE_SELECTOR, GObject)
 struct _GArrowFileSelectorClass
 {
   GObjectClass parent_class;
@@ -94,11 +90,8 @@ struct _GArrowFileSelectorClass
 /* arrow::fs::FileSystem */
 
 #define GARROW_TYPE_FILE_SYSTEM (garrow_file_system_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowFileSystem,
-                         garrow_file_system,
-                         GARROW,
-                         FILE_SYSTEM,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowFileSystem, garrow_file_system, GARROW, FILE_SYSTEM, GObject)
 struct _GArrowFileSystemClass
 {
   GObjectClass parent_class;
@@ -106,11 +99,11 @@ struct _GArrowFileSystemClass
 
 GARROW_AVAILABLE_IN_3_0
 GArrowFileSystem *
-garrow_file_system_create(const gchar *uri,
-                          GError **error);
+garrow_file_system_create(const gchar *uri, GError **error);
 
 GARROW_AVAILABLE_IN_0_17
-gchar *garrow_file_system_get_type_name(GArrowFileSystem *file_system);
+gchar *
+garrow_file_system_get_type_name(GArrowFileSystem *file_system);
 
 GARROW_AVAILABLE_IN_0_17
 GArrowFileInfo *
@@ -119,10 +112,11 @@ garrow_file_system_get_file_info(GArrowFileSystem *file_system,
                                  GError **error);
 
 GARROW_AVAILABLE_IN_0_17
-GList *garrow_file_system_get_file_infos_paths(GArrowFileSystem *file_system,
-                                               const gchar **paths,
-                                               gsize n_paths,
-                                               GError **error);
+GList *
+garrow_file_system_get_file_infos_paths(GArrowFileSystem *file_system,
+                                        const gchar **paths,
+                                        gsize n_paths,
+                                        GError **error);
 
 GARROW_AVAILABLE_IN_0_17
 GList *
@@ -131,43 +125,50 @@ garrow_file_system_get_file_infos_selector(GArrowFileSystem *file_system,
                                            GError **error);
 
 GARROW_AVAILABLE_IN_0_17
-gboolean garrow_file_system_create_dir(GArrowFileSystem *file_system,
-                                       const gchar *path,
-                                       gboolean recursive,
-                                       GError **error);
+gboolean
+garrow_file_system_create_dir(GArrowFileSystem *file_system,
+                              const gchar *path,
+                              gboolean recursive,
+                              GError **error);
 
 GARROW_AVAILABLE_IN_0_17
-gboolean garrow_file_system_delete_dir(GArrowFileSystem *file_system,
-                                       const gchar *path,
-                                       GError **error);
+gboolean
+garrow_file_system_delete_dir(GArrowFileSystem *file_system,
+                              const gchar *path,
+                              GError **error);
 
 GARROW_AVAILABLE_IN_0_17
-gboolean garrow_file_system_delete_dir_contents(GArrowFileSystem *file_system,
-                                                const gchar *path,
-                                                GError **error);
+gboolean
+garrow_file_system_delete_dir_contents(GArrowFileSystem *file_system,
+                                       const gchar *path,
+                                       GError **error);
 
 GARROW_AVAILABLE_IN_0_17
-gboolean garrow_file_system_delete_file(GArrowFileSystem *file_system,
-                                        const gchar *path,
-                                        GError **error);
+gboolean
+garrow_file_system_delete_file(GArrowFileSystem *file_system,
+                               const gchar *path,
+                               GError **error);
 
 GARROW_AVAILABLE_IN_0_17
-gboolean garrow_file_system_delete_files(GArrowFileSystem *file_system,
-                                        const gchar **paths,
-                                        gsize n_paths,
-                                        GError **error);
+gboolean
+garrow_file_system_delete_files(GArrowFileSystem *file_system,
+                                const gchar **paths,
+                                gsize n_paths,
+                                GError **error);
 
 GARROW_AVAILABLE_IN_0_17
-gboolean garrow_file_system_move(GArrowFileSystem *file_system,
-                                 const gchar *src,
-                                 const gchar *dest,
-                                 GError **error);
+gboolean
+garrow_file_system_move(GArrowFileSystem *file_system,
+                        const gchar *src,
+                        const gchar *dest,
+                        GError **error);
 
 GARROW_AVAILABLE_IN_0_17
-gboolean garrow_file_system_copy_file(GArrowFileSystem *file_system,
-                                      const gchar *src,
-                                      const gchar *dest,
-                                      GError **error);
+gboolean
+garrow_file_system_copy_file(GArrowFileSystem *file_system,
+                             const gchar *src,
+                             const gchar *dest,
+                             GError **error);
 
 GARROW_AVAILABLE_IN_0_17
 GArrowInputStream *
@@ -242,8 +243,6 @@ garrow_slow_file_system_new_average_latency_and_seed(GArrowFileSystem *base_file
                                                      gdouble average_latency,
                                                      gint32 seed);
 
-
-
 #define GARROW_TYPE_MOCK_FILE_SYSTEM (garrow_mock_file_system_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowMockFileSystem,
                          garrow_mock_file_system,
@@ -255,7 +254,6 @@ struct _GArrowMockFileSystemClass
   GArrowFileSystemClass parent_class;
 };
 
-
 #define GARROW_TYPE_HDFS_FILE_SYSTEM (garrow_hdfs_file_system_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowHDFSFileSystem,
                          garrow_hdfs_file_system,
@@ -267,7 +265,6 @@ struct _GArrowHDFSFileSystemClass
   GArrowFileSystemClass parent_class;
 };
 
-
 /**
  * GArrowS3LogLevel:
  * @GARROW_S3_LOG_LEVEL_OFF: Off.
@@ -292,13 +289,9 @@ typedef enum {
   GARROW_S3_LOG_LEVEL_TRACE,
 } GArrowS3LogLevel;
 
-
 #define GARROW_TYPE_S3_GLOBAL_OPTIONS (garrow_s3_global_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowS3GlobalOptions,
-                         garrow_s3_global_options,
-                         GARROW,
-                         S3_GLOBAL_OPTIONS,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowS3GlobalOptions, garrow_s3_global_options, GARROW, S3_GLOBAL_OPTIONS, GObject)
 struct _GArrowS3GlobalOptionsClass
 {
   GObjectClass parent_class;
@@ -308,41 +301,30 @@ GARROW_AVAILABLE_IN_7_0
 GArrowS3GlobalOptions *
 garrow_s3_global_options_new(void);
 
-
 GARROW_AVAILABLE_IN_7_0
 gboolean
 garrow_s3_is_enabled(void);
 GARROW_AVAILABLE_IN_7_0
 gboolean
-garrow_s3_initialize(GArrowS3GlobalOptions *options,
-                     GError **error);
+garrow_s3_initialize(GArrowS3GlobalOptions *options, GError **error);
 GARROW_AVAILABLE_IN_7_0
 gboolean
 garrow_s3_finalize(GError **error);
 
-
 #define GARROW_TYPE_S3_FILE_SYSTEM (garrow_s3_file_system_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowS3FileSystem,
-                         garrow_s3_file_system,
-                         GARROW,
-                         S3_FILE_SYSTEM,
-                         GArrowFileSystem)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowS3FileSystem, garrow_s3_file_system, GARROW, S3_FILE_SYSTEM, GArrowFileSystem)
 struct _GArrowS3FileSystemClass
 {
   GArrowFileSystemClass parent_class;
 };
 
-
 #define GARROW_TYPE_GCS_FILE_SYSTEM (garrow_gcs_file_system_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowGCSFileSystem,
-                         garrow_gcs_file_system,
-                         GARROW,
-                         GCS_FILE_SYSTEM,
-                         GArrowFileSystem)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowGCSFileSystem, garrow_gcs_file_system, GARROW, GCS_FILE_SYSTEM, GArrowFileSystem)
 struct _GArrowGCSFileSystemClass
 {
   GArrowFileSystemClass parent_class;
 };
 
-
 G_END_DECLS
diff --git a/c_glib/arrow-glib/file-system.hpp b/c_glib/arrow-glib/file-system.hpp
index 6d33ba74fb199..f41fc6e9c75b0 100644
--- a/c_glib/arrow-glib/file-system.hpp
+++ b/c_glib/arrow-glib/file-system.hpp
@@ -30,8 +30,7 @@ arrow::fs::FileInfo *
 garrow_file_info_get_raw(GArrowFileInfo *file_info);
 
 GArrowFileSystem *
-garrow_file_system_new_raw(
-  std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system);
+garrow_file_system_new_raw(std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system);
 
 std::shared_ptr<arrow::fs::FileSystem>
 garrow_file_system_get_raw(GArrowFileSystem *file_system);
@@ -42,10 +41,8 @@ garrow_sub_tree_file_system_new_raw(
   GArrowFileSystem *base_file_system);
 
 GArrowSlowFileSystem *
-garrow_slow_file_system_new_raw(
-  std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system,
-  GArrowFileSystem *base_file_system);
-
+garrow_slow_file_system_new_raw(std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system,
+                                GArrowFileSystem *base_file_system);
 
 #ifdef ARROW_S3
 arrow::fs::S3GlobalOptions *
diff --git a/c_glib/arrow-glib/file.cpp b/c_glib/arrow-glib/file.cpp
index 422336b908f2d..5ab77154ab126 100644
--- a/c_glib/arrow-glib/file.cpp
+++ b/c_glib/arrow-glib/file.cpp
@@ -33,9 +33,7 @@ G_BEGIN_DECLS
  * #GArrowFile is an interface for file.
  */
 
-G_DEFINE_INTERFACE(GArrowFile,
-                   garrow_file,
-                   G_TYPE_OBJECT)
+G_DEFINE_INTERFACE(GArrowFile, garrow_file, G_TYPE_OBJECT)
 
 static void
 garrow_file_default_init(GArrowFileInterface *iface)
@@ -50,8 +48,7 @@ garrow_file_default_init(GArrowFileInterface *iface)
  * Returns: %TRUE on success, %FALSE if there was an error.
  */
 gboolean
-garrow_file_close(GArrowFile *file,
-                  GError **error)
+garrow_file_close(GArrowFile *file, GError **error)
 {
   auto arrow_file = garrow_file_get_raw(file);
 
@@ -82,8 +79,7 @@ garrow_file_is_closed(GArrowFile *file)
  * Returns: The current offset on success, -1 if there was an error.
  */
 gint64
-garrow_file_tell(GArrowFile *file,
-                    GError **error)
+garrow_file_tell(GArrowFile *file, GError **error)
 {
   auto arrow_file = garrow_file_get_raw(file);
 
diff --git a/c_glib/arrow-glib/file.h b/c_glib/arrow-glib/file.h
index 45319b85c9652..cbb2b9afd38ba 100644
--- a/c_glib/arrow-glib/file.h
+++ b/c_glib/arrow-glib/file.h
@@ -26,18 +26,16 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_FILE (garrow_file_get_type())
-G_DECLARE_INTERFACE(GArrowFile,
-                    garrow_file,
-                    GARROW,
-                    FILE,
-                    GObject)
+G_DECLARE_INTERFACE(GArrowFile, garrow_file, GARROW, FILE, GObject)
 
-gboolean garrow_file_close(GArrowFile *file,
-                           GError **error);
+gboolean
+garrow_file_close(GArrowFile *file, GError **error);
 GARROW_AVAILABLE_IN_0_13
-gboolean garrow_file_is_closed(GArrowFile *file);
-gint64 garrow_file_tell(GArrowFile *file,
-                           GError **error);
-GArrowFileMode garrow_file_get_mode(GArrowFile *file);
+gboolean
+garrow_file_is_closed(GArrowFile *file);
+gint64
+garrow_file_tell(GArrowFile *file, GError **error);
+GArrowFileMode
+garrow_file_get_mode(GArrowFile *file);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/file.hpp b/c_glib/arrow-glib/file.hpp
index c4cc78747cf6a..298d308ef2f5f 100644
--- a/c_glib/arrow-glib/file.hpp
+++ b/c_glib/arrow-glib/file.hpp
@@ -35,4 +35,5 @@ struct _GArrowFileInterface
   std::shared_ptr<arrow::io::FileInterface> (*get_raw)(GArrowFile *file);
 };
 
-std::shared_ptr<arrow::io::FileInterface> garrow_file_get_raw(GArrowFile *file);
+std::shared_ptr<arrow::io::FileInterface>
+garrow_file_get_raw(GArrowFile *file);
diff --git a/c_glib/arrow-glib/gobject-type.h b/c_glib/arrow-glib/gobject-type.h
index c9ac9ea8175af..617f8d7802569 100644
--- a/c_glib/arrow-glib/gobject-type.h
+++ b/c_glib/arrow-glib/gobject-type.h
@@ -22,95 +22,80 @@
 #include <glib-object.h>
 
 #ifndef G_DECLARE_DERIVABLE_TYPE
-#  define G_DECLARE_DERIVABLE_TYPE(ObjectName,                          \
-                                   object_name,                         \
-                                   MODULE_NAME,                         \
-                                   OBJECT_NAME,                         \
-                                   ParentName)                          \
-  typedef struct _ ## ObjectName ObjectName;                            \
-  typedef struct _ ## ObjectName ## Class ObjectName ## Class;          \
-                                                                        \
-  struct _ ## ObjectName                                                \
-  {                                                                     \
-    ParentName parent_instance;                                         \
-  };                                                                    \
-                                                                        \
-  GType object_name ## _get_type(void) G_GNUC_CONST;                    \
-                                                                        \
-  static inline ObjectName *                                            \
-  MODULE_NAME ## _ ## OBJECT_NAME(gpointer object)                      \
-  {                                                                     \
-    return G_TYPE_CHECK_INSTANCE_CAST(object,                           \
-                                      object_name ## _get_type(),       \
-                                      ObjectName);                      \
-  }                                                                     \
-                                                                        \
-  static inline ObjectName ## Class *                                   \
-  MODULE_NAME ## _ ## OBJECT_NAME ## _CLASS(gpointer klass)             \
-  {                                                                     \
-    return G_TYPE_CHECK_CLASS_CAST(klass,                               \
-                                   object_name ## _get_type(),          \
-                                   ObjectName ## Class);                \
-  }                                                                     \
-                                                                        \
-  static inline gboolean                                                \
-  MODULE_NAME ## _IS_ ## OBJECT_NAME(gpointer object)                   \
-  {                                                                     \
-    return G_TYPE_CHECK_INSTANCE_TYPE(object,                           \
-                                      object_name ## _get_type());      \
-  }                                                                     \
-                                                                        \
-  static inline gboolean                                                \
-  MODULE_NAME ## _IS_ ## OBJECT_NAME ## _CLASS(gpointer klass)          \
-  {                                                                     \
-    return G_TYPE_CHECK_CLASS_TYPE(klass,                               \
-                                   object_name ## _get_type());         \
-  }                                                                     \
-                                                                        \
-  static inline ObjectName ## Class *                                   \
-  MODULE_NAME ## _ ## ObjectName ## _GET_CLASS(gpointer object)         \
-  {                                                                     \
-    return G_TYPE_INSTANCE_GET_CLASS(object,                            \
-                                     object_name ## _get_type(),        \
-                                     ObjectName ## Class);              \
-  }
+#  define G_DECLARE_DERIVABLE_TYPE(ObjectName,                                           \
+                                   object_name,                                          \
+                                   MODULE_NAME,                                          \
+                                   OBJECT_NAME,                                          \
+                                   ParentName)                                           \
+    typedef struct _##ObjectName ObjectName;                                             \
+    typedef struct _##ObjectName##Class ObjectName##Class;                               \
+                                                                                         \
+    struct _##ObjectName                                                                 \
+    {                                                                                    \
+      ParentName parent_instance;                                                        \
+    };                                                                                   \
+                                                                                         \
+    GType object_name##_get_type(void) G_GNUC_CONST;                                     \
+                                                                                         \
+    static inline ObjectName *MODULE_NAME##_##OBJECT_NAME(gpointer object)               \
+    {                                                                                    \
+      return G_TYPE_CHECK_INSTANCE_CAST(object, object_name##_get_type(), ObjectName);   \
+    }                                                                                    \
+                                                                                         \
+    static inline ObjectName##Class *MODULE_NAME##_##OBJECT_NAME##_CLASS(gpointer klass) \
+    {                                                                                    \
+      return G_TYPE_CHECK_CLASS_CAST(klass,                                              \
+                                     object_name##_get_type(),                           \
+                                     ObjectName##Class);                                 \
+    }                                                                                    \
+                                                                                         \
+    static inline gboolean MODULE_NAME##_IS_##OBJECT_NAME(gpointer object)               \
+    {                                                                                    \
+      return G_TYPE_CHECK_INSTANCE_TYPE(object, object_name##_get_type());               \
+    }                                                                                    \
+                                                                                         \
+    static inline gboolean MODULE_NAME##_IS_##OBJECT_NAME##_CLASS(gpointer klass)        \
+    {                                                                                    \
+      return G_TYPE_CHECK_CLASS_TYPE(klass, object_name##_get_type());                   \
+    }                                                                                    \
+                                                                                         \
+    static inline ObjectName##Class *MODULE_NAME##_##ObjectName##_GET_CLASS(             \
+      gpointer object)                                                                   \
+    {                                                                                    \
+      return G_TYPE_INSTANCE_GET_CLASS(object,                                           \
+                                       object_name##_get_type(),                         \
+                                       ObjectName##Class);                               \
+    }
 #endif
 
 #ifndef G_DECLARE_INTERFACE
-#  define G_DECLARE_INTERFACE(ModuleObjectName,                         \
-                              module_object_name,                       \
-                              MODULE_NAME,                              \
-                              OBJECT_NAME,                              \
-                              PrerequisiteName)                         \
-  typedef struct                                                        \
-    _ ## ModuleObjectName                                               \
-    ModuleObjectName;                                                   \
-  typedef struct                                                        \
-    _ ## ModuleObjectName ## Interface                                  \
-    ModuleObjectName ## Interface;                                      \
-                                                                        \
-  GType module_object_name ## _get_type(void);                          \
-                                                                        \
-  static inline ModuleObjectName *                                      \
-  MODULE_NAME ## _ ## OBJECT_NAME(gpointer object)                      \
-  {                                                                     \
-   return G_TYPE_CHECK_INSTANCE_CAST(object,                            \
-                                     module_object_name ## _get_type(), \
-                                     ModuleObjectName);                 \
-  }                                                                     \
-                                                                        \
-  static inline gboolean                                                \
-  MODULE_NAME ## _IS_ ## OBJECT_NAME(gpointer object)                   \
-  {                                                                     \
-    return G_TYPE_CHECK_INSTANCE_TYPE(                                  \
-      object, module_object_name ## _get_type());                       \
-  }                                                                     \
-                                                                        \
-  static inline ModuleObjectName ## Interface *                         \
-  MODULE_NAME ## _ ## OBJECT_NAME ## _GET_IFACE(gpointer object)        \
-  {                                                                     \
-   return G_TYPE_INSTANCE_GET_INTERFACE(object,                         \
-                                        module_object_name ## _get_type(), \
-                                        ModuleObjectName ## Interface); \
-  }
+#  define G_DECLARE_INTERFACE(ModuleObjectName,                                          \
+                              module_object_name,                                        \
+                              MODULE_NAME,                                               \
+                              OBJECT_NAME,                                               \
+                              PrerequisiteName)                                          \
+    typedef struct _##ModuleObjectName ModuleObjectName;                                 \
+    typedef struct _##ModuleObjectName##Interface ModuleObjectName##Interface;           \
+                                                                                         \
+    GType module_object_name##_get_type(void);                                           \
+                                                                                         \
+    static inline ModuleObjectName *MODULE_NAME##_##OBJECT_NAME(gpointer object)         \
+    {                                                                                    \
+      return G_TYPE_CHECK_INSTANCE_CAST(object,                                          \
+                                        module_object_name##_get_type(),                 \
+                                        ModuleObjectName);                               \
+    }                                                                                    \
+                                                                                         \
+    static inline gboolean MODULE_NAME##_IS_##OBJECT_NAME(gpointer object)               \
+    {                                                                                    \
+      return G_TYPE_CHECK_INSTANCE_TYPE(object, module_object_name##_get_type());        \
+    }                                                                                    \
+                                                                                         \
+    static inline ModuleObjectName##Interface *MODULE_NAME##_##OBJECT_NAME##_GET_IFACE(  \
+      gpointer object)                                                                   \
+    {                                                                                    \
+      return G_TYPE_INSTANCE_GET_INTERFACE(object,                                       \
+                                           module_object_name##_get_type(),              \
+                                           ModuleObjectName##Interface);                 \
+    }
 #endif
diff --git a/c_glib/arrow-glib/input-stream.cpp b/c_glib/arrow-glib/input-stream.cpp
index b65e89845480c..03a3f03fff7ce 100644
--- a/c_glib/arrow-glib/input-stream.cpp
+++ b/c_glib/arrow-glib/input-stream.cpp
@@ -62,7 +62,8 @@ G_BEGIN_DECLS
  * compressed input stream.
  */
 
-typedef struct GArrowInputStreamPrivate_ {
+typedef struct GArrowInputStreamPrivate_
+{
   std::shared_ptr<arrow::io::InputStream> input_stream;
 } GArrowInputStreamPrivate;
 
@@ -74,8 +75,7 @@ static std::shared_ptr<arrow::io::FileInterface>
 garrow_input_stream_get_raw_file_interface(GArrowFile *file)
 {
   auto input_stream = GARROW_INPUT_STREAM(file);
-  auto arrow_input_stream =
-    garrow_input_stream_get_raw(input_stream);
+  auto arrow_input_stream = garrow_input_stream_get_raw(input_stream);
   return arrow_input_stream;
 }
 
@@ -99,19 +99,18 @@ garrow_input_stream_readable_interface_init(GArrowReadableInterface *iface)
   iface->get_raw = garrow_input_stream_get_raw_readable_interface;
 }
 
-G_DEFINE_TYPE_WITH_CODE(GArrowInputStream,
-                        garrow_input_stream,
-                        G_TYPE_INPUT_STREAM,
-                        G_ADD_PRIVATE(GArrowInputStream)
-                        G_IMPLEMENT_INTERFACE(GARROW_TYPE_FILE,
-                                              garrow_input_stream_file_interface_init)
-                        G_IMPLEMENT_INTERFACE(GARROW_TYPE_READABLE,
-                                              garrow_input_stream_readable_interface_init))
+G_DEFINE_TYPE_WITH_CODE(
+  GArrowInputStream,
+  garrow_input_stream,
+  G_TYPE_INPUT_STREAM,
+  G_ADD_PRIVATE(GArrowInputStream)
+    G_IMPLEMENT_INTERFACE(GARROW_TYPE_FILE, garrow_input_stream_file_interface_init)
+      G_IMPLEMENT_INTERFACE(GARROW_TYPE_READABLE,
+                            garrow_input_stream_readable_interface_init))
 
-#define GARROW_INPUT_STREAM_GET_PRIVATE(obj)         \
-  static_cast<GArrowInputStreamPrivate *>(           \
-     garrow_input_stream_get_instance_private(       \
-       GARROW_INPUT_STREAM(obj)))
+#define GARROW_INPUT_STREAM_GET_PRIVATE(obj)                                             \
+  static_cast<GArrowInputStreamPrivate *>(                                               \
+    garrow_input_stream_get_instance_private(GARROW_INPUT_STREAM(obj)))
 
 static void
 garrow_input_stream_finalize(GObject *object)
@@ -165,8 +164,7 @@ garrow_input_stream_read(GInputStream *stream,
   if (g_cancellable_set_error_if_cancelled(cancellable, error)) {
     return -1;
   }
-  auto arrow_input_stream =
-    garrow_input_stream_get_raw(GARROW_INPUT_STREAM(stream));
+  auto arrow_input_stream = garrow_input_stream_get_raw(GARROW_INPUT_STREAM(stream));
   auto n_read_bytes = arrow_input_stream->Read(count, buffer);
   if (!garrow::check(error, n_read_bytes, "[input-stream][read]")) {
     return -1;
@@ -183,8 +181,7 @@ garrow_input_stream_skip(GInputStream *stream,
   if (g_cancellable_set_error_if_cancelled(cancellable, error)) {
     return -1;
   }
-  auto arrow_input_stream =
-    garrow_input_stream_get_raw(GARROW_INPUT_STREAM(stream));
+  auto arrow_input_stream = garrow_input_stream_get_raw(GARROW_INPUT_STREAM(stream));
   auto status = arrow_input_stream->Advance(count);
   if (!garrow_error_check(error, status, "[input-stream][skip]")) {
     return -1;
@@ -193,15 +190,12 @@ garrow_input_stream_skip(GInputStream *stream,
 }
 
 static gboolean
-garrow_input_stream_close(GInputStream *stream,
-                          GCancellable *cancellable,
-                          GError **error)
+garrow_input_stream_close(GInputStream *stream, GCancellable *cancellable, GError **error)
 {
   if (g_cancellable_set_error_if_cancelled(cancellable, error)) {
     return FALSE;
   }
-  auto arrow_input_stream =
-    garrow_input_stream_get_raw(GARROW_INPUT_STREAM(stream));
+  auto arrow_input_stream = garrow_input_stream_get_raw(GARROW_INPUT_STREAM(stream));
   auto status = arrow_input_stream->Close();
   return garrow_error_check(error, status, "[input-stream][close]");
 }
@@ -210,14 +204,14 @@ static void
 garrow_input_stream_init(GArrowInputStream *object)
 {
   auto priv = GARROW_INPUT_STREAM_GET_PRIVATE(object);
-  new(&priv->input_stream) std::shared_ptr<arrow::io::InputStream>;
+  new (&priv->input_stream) std::shared_ptr<arrow::io::InputStream>;
 }
 
 static void
 garrow_input_stream_class_init(GArrowInputStreamClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->finalize     = garrow_input_stream_finalize;
+  gobject_class->finalize = garrow_input_stream_finalize;
   gobject_class->set_property = garrow_input_stream_set_property;
   gobject_class->get_property = garrow_input_stream_get_property;
 
@@ -227,11 +221,11 @@ garrow_input_stream_class_init(GArrowInputStreamClass *klass)
   input_stream_class->close_fn = garrow_input_stream_close;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("input-stream",
-                              "Input stream",
-                              "The raw std::shared<arrow::io::InputStream> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "input-stream",
+    "Input stream",
+    "The raw std::shared<arrow::io::InputStream> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_INPUT_STREAM, spec);
 }
 
@@ -272,8 +266,7 @@ garrow_input_stream_align(GArrowInputStream *input_stream,
                           GError **error)
 {
   auto arrow_input_stream = garrow_input_stream_get_raw(input_stream);
-  auto status = arrow::ipc::AlignStream(arrow_input_stream.get(),
-                                        alignment);
+  auto status = arrow::ipc::AlignStream(arrow_input_stream.get(), alignment);
   return garrow_error_check(error, status, "[input-stream][align]");
 }
 
@@ -288,8 +281,7 @@ garrow_input_stream_align(GArrowInputStream *input_stream,
  * Since: 0.11.0
  */
 GArrowTensor *
-garrow_input_stream_read_tensor(GArrowInputStream *input_stream,
-                                GError **error)
+garrow_input_stream_read_tensor(GArrowInputStream *input_stream, GError **error)
 {
   auto arrow_input_stream = garrow_input_stream_get_raw(input_stream);
 
@@ -324,30 +316,23 @@ garrow_input_stream_read_record_batch(GArrowInputStream *input_stream,
 
   if (options) {
     auto arrow_options = garrow_read_options_get_raw(options);
-    auto arrow_dictionary_memo =
-      garrow_read_options_get_dictionary_memo_raw(options);
-    auto arrow_record_batch =
-      arrow::ipc::ReadRecordBatch(arrow_schema,
-                                  arrow_dictionary_memo,
-                                  *arrow_options,
-                                  arrow_input_stream.get());
-    if (garrow::check(error,
-                      arrow_record_batch,
-                      "[input-stream][read-record-batch]")) {
+    auto arrow_dictionary_memo = garrow_read_options_get_dictionary_memo_raw(options);
+    auto arrow_record_batch = arrow::ipc::ReadRecordBatch(arrow_schema,
+                                                          arrow_dictionary_memo,
+                                                          *arrow_options,
+                                                          arrow_input_stream.get());
+    if (garrow::check(error, arrow_record_batch, "[input-stream][read-record-batch]")) {
       return garrow_record_batch_new_raw(&(*arrow_record_batch));
     } else {
       return NULL;
     }
   } else {
     auto arrow_options = arrow::ipc::IpcReadOptions::Defaults();
-    auto arrow_record_batch =
-      arrow::ipc::ReadRecordBatch(arrow_schema,
-                                  nullptr,
-                                  arrow_options,
-                                  arrow_input_stream.get());
-    if (garrow::check(error,
-                      arrow_record_batch,
-                      "[input-stream][read-record-batch]")) {
+    auto arrow_record_batch = arrow::ipc::ReadRecordBatch(arrow_schema,
+                                                          nullptr,
+                                                          arrow_options,
+                                                          arrow_input_stream.get());
+    if (garrow::check(error, arrow_record_batch, "[input-stream][read-record-batch]")) {
       return garrow_record_batch_new_raw(&(*arrow_record_batch));
     } else {
       return NULL;
@@ -355,7 +340,6 @@ garrow_input_stream_read_record_batch(GArrowInputStream *input_stream,
   }
 }
 
-
 G_DEFINE_TYPE(GArrowSeekableInputStream,
               garrow_seekable_input_stream,
               GARROW_TYPE_INPUT_STREAM);
@@ -381,8 +365,7 @@ guint64
 garrow_seekable_input_stream_get_size(GArrowSeekableInputStream *input_stream,
                                       GError **error)
 {
-  auto arrow_random_access_file =
-    garrow_seekable_input_stream_get_raw(input_stream);
+  auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(input_stream);
   auto size = arrow_random_access_file->GetSize();
   if (garrow::check(error, size, "[seekable-input-stream][get-size]")) {
     return size.ValueOrDie();
@@ -398,10 +381,10 @@ garrow_seekable_input_stream_get_size(GArrowSeekableInputStream *input_stream,
  * Returns: Whether zero copy read is supported or not.
  */
 gboolean
-garrow_seekable_input_stream_get_support_zero_copy(GArrowSeekableInputStream *input_stream)
+garrow_seekable_input_stream_get_support_zero_copy(
+  GArrowSeekableInputStream *input_stream)
 {
-  auto arrow_random_access_file =
-    garrow_seekable_input_stream_get_raw(input_stream);
+  auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(input_stream);
   return arrow_random_access_file->supports_zero_copy();
 }
 
@@ -421,8 +404,7 @@ garrow_seekable_input_stream_read_at(GArrowSeekableInputStream *input_stream,
                                      gint64 n_bytes,
                                      GError **error)
 {
-  auto arrow_random_access_file =
-    garrow_seekable_input_stream_get_raw(input_stream);
+  auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(input_stream);
 
   auto arrow_buffer = arrow_random_access_file->ReadAt(position, n_bytes);
   if (garrow::check(error, arrow_buffer, "[seekable-input-stream][read-at]")) {
@@ -450,8 +432,7 @@ garrow_seekable_input_stream_read_at_bytes(GArrowSeekableInputStream *input_stre
                                            gint64 n_bytes,
                                            GError **error)
 {
-  auto arrow_random_access_file =
-    garrow_seekable_input_stream_get_raw(input_stream);
+  auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(input_stream);
 
   auto arrow_buffer_result = arrow_random_access_file->ReadAt(position, n_bytes);
   if (!garrow::check(error,
@@ -461,8 +442,7 @@ garrow_seekable_input_stream_read_at_bytes(GArrowSeekableInputStream *input_stre
   }
 
   auto arrow_cpu_buffer_result =
-    arrow::Buffer::ViewOrCopy(*arrow_buffer_result,
-                              arrow::default_cpu_memory_manager());
+    arrow::Buffer::ViewOrCopy(*arrow_buffer_result, arrow::default_cpu_memory_manager());
   if (!garrow::check(error,
                      arrow_cpu_buffer_result,
                      "[seekable-input-stream][read-at][bytes][view-or-copy]")) {
@@ -470,11 +450,9 @@ garrow_seekable_input_stream_read_at_bytes(GArrowSeekableInputStream *input_stre
   }
 
   auto arrow_cpu_buffer = *arrow_cpu_buffer_result;
-  return g_bytes_new(arrow_cpu_buffer->data(),
-                     arrow_cpu_buffer->size());
+  return g_bytes_new(arrow_cpu_buffer->data(), arrow_cpu_buffer->size());
 }
 
-
 /**
  * garrow_seekable_input_stream_peek:
  * @input_stream: A #GArrowSeekableInputStream.
@@ -494,8 +472,7 @@ garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream,
                                   gint64 n_bytes,
                                   GError **error)
 {
-  auto arrow_random_access_file =
-    garrow_seekable_input_stream_get_raw(input_stream);
+  auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(input_stream);
 
   auto view_result = arrow_random_access_file->Peek(n_bytes);
   if (garrow::check(error, view_result, "[seekable-input-stream][peek]")) {
@@ -506,8 +483,8 @@ garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream,
   }
 }
 
-
-typedef struct GArrowBufferInputStreamPrivate_ {
+typedef struct GArrowBufferInputStreamPrivate_
+{
   GArrowBuffer *buffer;
 } GArrowBufferInputStreamPrivate;
 
@@ -519,10 +496,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowBufferInputStream,
                            garrow_buffer_input_stream,
                            GARROW_TYPE_SEEKABLE_INPUT_STREAM);
 
-#define GARROW_BUFFER_INPUT_STREAM_GET_PRIVATE(obj)         \
-  static_cast<GArrowBufferInputStreamPrivate *>(            \
-     garrow_buffer_input_stream_get_instance_private(       \
-       GARROW_BUFFER_INPUT_STREAM(obj)))
+#define GARROW_BUFFER_INPUT_STREAM_GET_PRIVATE(obj)                                      \
+  static_cast<GArrowBufferInputStreamPrivate *>(                                         \
+    garrow_buffer_input_stream_get_instance_private(GARROW_BUFFER_INPUT_STREAM(obj)))
 
 static void
 garrow_buffer_input_stream_dispose(GObject *object)
@@ -583,17 +559,17 @@ garrow_buffer_input_stream_class_init(GArrowBufferInputStreamClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_buffer_input_stream_dispose;
+  gobject_class->dispose = garrow_buffer_input_stream_dispose;
   gobject_class->set_property = garrow_buffer_input_stream_set_property;
   gobject_class->get_property = garrow_buffer_input_stream_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("buffer",
-                             "Buffer",
-                             "The data",
-                             GARROW_TYPE_BUFFER,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "buffer",
+    "Buffer",
+    "The data",
+    GARROW_TYPE_BUFFER,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_BUFFER, spec);
 }
 
@@ -607,8 +583,7 @@ GArrowBufferInputStream *
 garrow_buffer_input_stream_new(GArrowBuffer *buffer)
 {
   auto arrow_buffer = garrow_buffer_get_raw(buffer);
-  auto arrow_buffer_reader =
-    std::make_shared<arrow::io::BufferReader>(arrow_buffer);
+  auto arrow_buffer_reader = std::make_shared<arrow::io::BufferReader>(arrow_buffer);
   return garrow_buffer_input_stream_new_raw(&arrow_buffer_reader, buffer);
 }
 
@@ -632,7 +607,6 @@ garrow_buffer_input_stream_get_buffer(GArrowBufferInputStream *input_stream)
   return garrow_buffer_new_raw(&arrow_buffer);
 }
 
-
 G_DEFINE_TYPE(GArrowFileInputStream,
               garrow_file_input_stream,
               GARROW_TYPE_SEEKABLE_INPUT_STREAM);
@@ -658,8 +632,7 @@ garrow_file_input_stream_class_init(GArrowFileInputStreamClass *klass)
  * Since: 6.0.0
  */
 GArrowFileInputStream *
-garrow_file_input_stream_new(const gchar *path,
-                             GError **error)
+garrow_file_input_stream_new(const gchar *path, GError **error)
 {
   auto arrow_stream_result = arrow::io::ReadableFile::Open(path);
   if (garrow::check(error, arrow_stream_result, "[file-input-stream][new]")) {
@@ -681,8 +654,7 @@ garrow_file_input_stream_new(const gchar *path,
  * Since: 6.0.0
  */
 GArrowFileInputStream *
-garrow_file_input_stream_new_file_descriptor(gint file_descriptor,
-                                             GError **error)
+garrow_file_input_stream_new_file_descriptor(gint file_descriptor, GError **error)
 {
   auto arrow_stream_result = arrow::io::ReadableFile::Open(file_descriptor);
   if (garrow::check(error,
@@ -706,13 +678,11 @@ garrow_file_input_stream_new_file_descriptor(gint file_descriptor,
 gint
 garrow_file_input_stream_get_file_descriptor(GArrowFileInputStream *stream)
 {
-  auto arrow_stream =
-    std::static_pointer_cast<arrow::io::ReadableFile>(
-      garrow_input_stream_get_raw(GARROW_INPUT_STREAM(stream)));
+  auto arrow_stream = std::static_pointer_cast<arrow::io::ReadableFile>(
+    garrow_input_stream_get_raw(GARROW_INPUT_STREAM(stream)));
   return arrow_stream->file_descriptor();
 }
 
-
 G_DEFINE_TYPE(GArrowMemoryMappedInputStream,
               garrow_memory_mapped_input_stream,
               GARROW_TYPE_SEEKABLE_INPUT_STREAM);
@@ -736,14 +706,11 @@ garrow_memory_mapped_input_stream_class_init(GArrowMemoryMappedInputStreamClass
  *   or %NULL on error.
  */
 GArrowMemoryMappedInputStream *
-garrow_memory_mapped_input_stream_new(const gchar *path,
-                                      GError **error)
+garrow_memory_mapped_input_stream_new(const gchar *path, GError **error)
 {
   auto arrow_stream_result =
     arrow::io::MemoryMappedFile::Open(path, arrow::io::FileMode::READ);
-  if (garrow::check(error,
-                    arrow_stream_result,
-                    "[memory-mapped-input-stream][new]")) {
+  if (garrow::check(error, arrow_stream_result, "[memory-mapped-input-stream][new]")) {
     auto arrow_stream = *arrow_stream_result;
     return garrow_memory_mapped_input_stream_new_raw(&arrow_stream);
   } else {
@@ -751,31 +718,33 @@ garrow_memory_mapped_input_stream_new(const gchar *path,
   }
 }
 
-
 G_END_DECLS
 
 namespace garrow {
   class GIOInputStream : public arrow::io::RandomAccessFile {
   public:
-    GIOInputStream(GInputStream *input_stream) :
-      input_stream_(input_stream),
-      lock_() {
+    GIOInputStream(GInputStream *input_stream) : input_stream_(input_stream), lock_()
+    {
       g_object_ref(input_stream_);
     }
 
-    ~GIOInputStream() {
-      g_object_unref(input_stream_);
-    }
+    ~GIOInputStream() { g_object_unref(input_stream_); }
 
-    GInputStream *get_input_stream() {
+    GInputStream *
+    get_input_stream()
+    {
       return input_stream_;
     }
 
-    bool closed() const override {
+    bool
+    closed() const override
+    {
       return static_cast<bool>(g_input_stream_is_closed(input_stream_));
     }
 
-    arrow::Status Close() override {
+    arrow::Status
+    Close() override
+    {
       std::lock_guard<std::mutex> guard(lock_);
       GError *error = NULL;
       if (g_input_stream_close(input_stream_, NULL, &error)) {
@@ -787,7 +756,9 @@ namespace garrow {
       }
     }
 
-    arrow::Result<int64_t> Tell() const override {
+    arrow::Result<int64_t>
+    Tell() const override
+    {
       if (!(G_IS_SEEKABLE(input_stream_) &&
             g_seekable_can_seek(G_SEEKABLE(input_stream_)))) {
         std::string message("[gio-input-stream][tell] "
@@ -800,16 +771,14 @@ namespace garrow {
       return g_seekable_tell(G_SEEKABLE(input_stream_));
     }
 
-    arrow::Result<int64_t> Read(int64_t n_bytes, void *out) override {
+    arrow::Result<int64_t>
+    Read(int64_t n_bytes, void *out) override
+    {
       std::lock_guard<std::mutex> guard(lock_);
       GError *error = NULL;
       gsize n_read_bytes = 0;
-      auto success = g_input_stream_read_all(input_stream_,
-                                             out,
-                                             n_bytes,
-                                             &n_read_bytes,
-                                             NULL,
-                                             &error);
+      auto success =
+        g_input_stream_read_all(input_stream_, out, n_bytes, &n_read_bytes, NULL, &error);
       if (success) {
         return n_read_bytes;
       } else {
@@ -819,19 +788,21 @@ namespace garrow {
       }
     }
 
-    arrow::Result<int64_t> ReadAt(int64_t position,
-                                  int64_t n_bytes,
-                                  void* out) override {
+    arrow::Result<int64_t>
+    ReadAt(int64_t position, int64_t n_bytes, void *out) override
+    {
       return arrow::io::RandomAccessFile::ReadAt(position, n_bytes, out);
     }
 
     arrow::Result<std::shared_ptr<arrow::Buffer>>
-    ReadAt(int64_t position, int64_t n_bytes) override {
+    ReadAt(int64_t position, int64_t n_bytes) override
+    {
       return arrow::io::RandomAccessFile::ReadAt(position, n_bytes);
     }
 
     arrow::Result<std::shared_ptr<arrow::Buffer>>
-    Read(int64_t n_bytes) override {
+    Read(int64_t n_bytes) override
+    {
       ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(n_bytes));
 
       std::lock_guard<std::mutex> guard(lock_);
@@ -855,7 +826,9 @@ namespace garrow {
       }
     }
 
-    arrow::Result<std::string_view> Peek(int64_t nbytes) override {
+    arrow::Result<std::string_view>
+    Peek(int64_t nbytes) override
+    {
       if (!G_IS_BUFFERED_INPUT_STREAM(input_stream_)) {
         std::string message("[gio-input-stream][peek] "
                             "not peekable input stream: <");
@@ -868,8 +841,7 @@ namespace garrow {
       auto available_n_bytes = g_buffered_input_stream_get_available(stream);
       if (available_n_bytes < static_cast<gsize>(nbytes)) {
         GError *error = NULL;
-        auto filled_size =
-          g_buffered_input_stream_fill(stream, nbytes, NULL, &error);
+        auto filled_size = g_buffered_input_stream_fill(stream, nbytes, NULL, &error);
         if (filled_size == -1) {
           return garrow_error_to_status(error,
                                         arrow::StatusCode::IOError,
@@ -885,7 +857,9 @@ namespace garrow {
       return std::string_view(static_cast<const char *>(data), data_size);
     }
 
-    arrow::Status Seek(int64_t position) override {
+    arrow::Status
+    Seek(int64_t position) override
+    {
       if (!G_IS_SEEKABLE(input_stream_)) {
         std::string message("[gio-input-stream][seek] "
                             "not seekable input stream: <");
@@ -909,7 +883,9 @@ namespace garrow {
       }
     }
 
-    arrow::Result<int64_t> GetSize() override {
+    arrow::Result<int64_t>
+    GetSize() override
+    {
       if (!G_IS_SEEKABLE(input_stream_)) {
         std::string message("[gio-input-stream][size] "
                             "not seekable input stream: <");
@@ -921,11 +897,7 @@ namespace garrow {
       std::lock_guard<std::mutex> guard(lock_);
       auto current_position = g_seekable_tell(G_SEEKABLE(input_stream_));
       GError *error = NULL;
-      if (!g_seekable_seek(G_SEEKABLE(input_stream_),
-                           0,
-                           G_SEEK_END,
-                           NULL,
-                           &error)) {
+      if (!g_seekable_seek(G_SEEKABLE(input_stream_), 0, G_SEEK_END, NULL, &error)) {
         return garrow_error_to_status(error,
                                       arrow::StatusCode::IOError,
                                       "[gio-input-stream][size][seek]");
@@ -943,7 +915,9 @@ namespace garrow {
       return size;
     }
 
-    bool supports_zero_copy() const override {
+    bool
+    supports_zero_copy() const override
+    {
       return false;
     }
 
@@ -951,12 +925,12 @@ namespace garrow {
     GInputStream *input_stream_;
     std::mutex lock_;
   };
-};
+}; // namespace garrow
 
 G_BEGIN_DECLS
 
-
-typedef struct GArrowGIOInputStreamPrivate_ {
+typedef struct GArrowGIOInputStreamPrivate_
+{
   GInputStream *raw;
 } GArrowGIOInputStreamPrivate;
 
@@ -968,10 +942,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowGIOInputStream,
                            garrow_gio_input_stream,
                            GARROW_TYPE_SEEKABLE_INPUT_STREAM);
 
-#define GARROW_GIO_INPUT_STREAM_GET_PRIVATE(object)     \
-  static_cast<GArrowGIOInputStreamPrivate *>(           \
-    garrow_gio_input_stream_get_instance_private(       \
-      GARROW_GIO_INPUT_STREAM(object)))
+#define GARROW_GIO_INPUT_STREAM_GET_PRIVATE(object)                                      \
+  static_cast<GArrowGIOInputStreamPrivate *>(                                            \
+    garrow_gio_input_stream_get_instance_private(GARROW_GIO_INPUT_STREAM(object)))
 
 static void
 garrow_gio_input_stream_dispose(GObject *object)
@@ -1032,17 +1005,17 @@ garrow_gio_input_stream_class_init(GArrowGIOInputStreamClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_gio_input_stream_dispose;
+  gobject_class->dispose = garrow_gio_input_stream_dispose;
   gobject_class->set_property = garrow_gio_input_stream_set_property;
   gobject_class->get_property = garrow_gio_input_stream_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("raw",
-                             "Raw",
-                             "The raw GInputStream *",
-                             G_TYPE_INPUT_STREAM,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "raw",
+    "Raw",
+    "The raw GInputStream *",
+    G_TYPE_INPUT_STREAM,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_GIO_RAW, spec);
 }
 
@@ -1057,11 +1030,12 @@ garrow_gio_input_stream_class_init(GArrowGIOInputStreamClass *klass)
 GArrowGIOInputStream *
 garrow_gio_input_stream_new(GInputStream *gio_input_stream)
 {
-  auto arrow_input_stream =
-    std::make_shared<garrow::GIOInputStream>(gio_input_stream);
+  auto arrow_input_stream = std::make_shared<garrow::GIOInputStream>(gio_input_stream);
   auto object = g_object_new(GARROW_TYPE_GIO_INPUT_STREAM,
-                             "input-stream", &arrow_input_stream,
-                             "raw", gio_input_stream,
+                             "input-stream",
+                             &arrow_input_stream,
+                             "raw",
+                             gio_input_stream,
                              NULL);
   auto input_stream = GARROW_GIO_INPUT_STREAM(object);
   return input_stream;
@@ -1084,7 +1058,8 @@ garrow_gio_input_stream_get_raw(GArrowGIOInputStream *input_stream)
   return priv->raw;
 }
 
-typedef struct GArrowCompressedInputStreamPrivate_ {
+typedef struct GArrowCompressedInputStreamPrivate_
+{
   GArrowCodec *codec;
   GArrowInputStream *raw;
 } GArrowCompressedInputStreamPrivate;
@@ -1098,9 +1073,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowCompressedInputStream,
                            garrow_compressed_input_stream,
                            GARROW_TYPE_INPUT_STREAM)
 
-#define GARROW_COMPRESSED_INPUT_STREAM_GET_PRIVATE(object)      \
-  static_cast<GArrowCompressedInputStreamPrivate *>(            \
-    garrow_compressed_input_stream_get_instance_private(        \
+#define GARROW_COMPRESSED_INPUT_STREAM_GET_PRIVATE(object)                               \
+  static_cast<GArrowCompressedInputStreamPrivate *>(                                     \
+    garrow_compressed_input_stream_get_instance_private(                                 \
       GARROW_COMPRESSED_INPUT_STREAM(object)))
 
 static void
@@ -1173,25 +1148,25 @@ garrow_compressed_input_stream_class_init(GArrowCompressedInputStreamClass *klas
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_compressed_input_stream_dispose;
+  gobject_class->dispose = garrow_compressed_input_stream_dispose;
   gobject_class->set_property = garrow_compressed_input_stream_set_property;
   gobject_class->get_property = garrow_compressed_input_stream_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("codec",
-                             "Codec",
-                             "The codec for the stream",
-                             GARROW_TYPE_CODEC,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "codec",
+    "Codec",
+    "The codec for the stream",
+    GARROW_TYPE_CODEC,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CODEC, spec);
 
-  spec = g_param_spec_object("raw",
-                             "Raw",
-                             "The underlying raw input stream",
-                             GARROW_TYPE_INPUT_STREAM,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "raw",
+    "Raw",
+    "The underlying raw input stream",
+    GARROW_TYPE_INPUT_STREAM,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_RAW, spec);
 }
 
@@ -1212,8 +1187,7 @@ garrow_compressed_input_stream_new(GArrowCodec *codec,
 {
   auto arrow_codec = garrow_codec_get_raw(codec).get();
   auto arrow_raw = garrow_input_stream_get_raw(raw);
-  auto arrow_stream =
-    arrow::io::CompressedInputStream::Make(arrow_codec, arrow_raw);
+  auto arrow_stream = arrow::io::CompressedInputStream::Make(arrow_codec, arrow_raw);
   if (garrow::check(error, arrow_stream, "[compressed-input-stream][new]")) {
     return garrow_compressed_input_stream_new_raw(&(arrow_stream.ValueOrDie()),
                                                   codec,
@@ -1228,10 +1202,8 @@ G_END_DECLS
 GArrowInputStream *
 garrow_input_stream_new_raw(std::shared_ptr<arrow::io::InputStream> *arrow_input_stream)
 {
-  auto input_stream =
-    GARROW_INPUT_STREAM(g_object_new(GARROW_TYPE_INPUT_STREAM,
-                                     "input-stream", arrow_input_stream,
-                                     NULL));
+  auto input_stream = GARROW_INPUT_STREAM(
+    g_object_new(GARROW_TYPE_INPUT_STREAM, "input-stream", arrow_input_stream, NULL));
   return input_stream;
 }
 
@@ -1247,14 +1219,14 @@ garrow_seekable_input_stream_new_raw(
   std::shared_ptr<arrow::io::RandomAccessFile> *arrow_random_access_file)
 {
   auto object = g_object_new(GARROW_TYPE_SEEKABLE_INPUT_STREAM,
-                             "input-stream", arrow_random_access_file,
+                             "input-stream",
+                             arrow_random_access_file,
                              NULL);
   return GARROW_SEEKABLE_INPUT_STREAM(object);
 }
 
 std::shared_ptr<arrow::io::RandomAccessFile>
-garrow_seekable_input_stream_get_raw(
-  GArrowSeekableInputStream *seekable_input_stream)
+garrow_seekable_input_stream_get_raw(GArrowSeekableInputStream *seekable_input_stream)
 {
   auto arrow_input_stream =
     garrow_input_stream_get_raw(GARROW_INPUT_STREAM(seekable_input_stream));
@@ -1264,13 +1236,15 @@ garrow_seekable_input_stream_get_raw(
 }
 
 GArrowBufferInputStream *
-garrow_buffer_input_stream_new_raw(std::shared_ptr<arrow::io::BufferReader> *arrow_buffer_reader,
-                                   GArrowBuffer *buffer)
+garrow_buffer_input_stream_new_raw(
+  std::shared_ptr<arrow::io::BufferReader> *arrow_buffer_reader, GArrowBuffer *buffer)
 {
   auto buffer_input_stream =
     GARROW_BUFFER_INPUT_STREAM(g_object_new(GARROW_TYPE_BUFFER_INPUT_STREAM,
-                                            "input-stream", arrow_buffer_reader,
-                                            "buffer", buffer,
+                                            "input-stream",
+                                            arrow_buffer_reader,
+                                            "buffer",
+                                            buffer,
                                             NULL));
   return buffer_input_stream;
 }
@@ -1285,44 +1259,44 @@ garrow_buffer_input_stream_get_raw(GArrowBufferInputStream *buffer_input_stream)
   return arrow_buffer_reader;
 }
 
-
 GArrowFileInputStream *
-garrow_file_input_stream_new_raw(
-  std::shared_ptr<arrow::io::ReadableFile> *arrow_stream)
+garrow_file_input_stream_new_raw(std::shared_ptr<arrow::io::ReadableFile> *arrow_stream)
 {
-  return GARROW_FILE_INPUT_STREAM(g_object_new(GARROW_TYPE_FILE_INPUT_STREAM,
-                                               "input-stream", arrow_stream,
-                                               NULL));
+  return GARROW_FILE_INPUT_STREAM(
+    g_object_new(GARROW_TYPE_FILE_INPUT_STREAM, "input-stream", arrow_stream, NULL));
 }
 
-
 GArrowMemoryMappedInputStream *
 garrow_memory_mapped_input_stream_new_raw(
   std::shared_ptr<arrow::io::MemoryMappedFile> *arrow_stream)
 {
   return GARROW_MEMORY_MAPPED_INPUT_STREAM(
     g_object_new(GARROW_TYPE_MEMORY_MAPPED_INPUT_STREAM,
-                 "input-stream", arrow_stream,
+                 "input-stream",
+                 arrow_stream,
                  NULL));
 }
 
-
 GArrowCompressedInputStream *
-garrow_compressed_input_stream_new_raw(std::shared_ptr<arrow::io::CompressedInputStream> *arrow_raw,
-                                       GArrowCodec *codec,
-                                       GArrowInputStream *raw)
-{
-  auto compressed_input_stream =
-    g_object_new(GARROW_TYPE_COMPRESSED_INPUT_STREAM,
-                 "input-stream", arrow_raw,
-                 "codec", codec,
-                 "raw", raw,
-                 NULL);
+garrow_compressed_input_stream_new_raw(
+  std::shared_ptr<arrow::io::CompressedInputStream> *arrow_raw,
+  GArrowCodec *codec,
+  GArrowInputStream *raw)
+{
+  auto compressed_input_stream = g_object_new(GARROW_TYPE_COMPRESSED_INPUT_STREAM,
+                                              "input-stream",
+                                              arrow_raw,
+                                              "codec",
+                                              codec,
+                                              "raw",
+                                              raw,
+                                              NULL);
   return GARROW_COMPRESSED_INPUT_STREAM(compressed_input_stream);
 }
 
 std::shared_ptr<arrow::io::InputStream>
-garrow_compressed_input_stream_get_raw(GArrowCompressedInputStream *compressed_input_stream)
+garrow_compressed_input_stream_get_raw(
+  GArrowCompressedInputStream *compressed_input_stream)
 {
   auto input_stream = GARROW_INPUT_STREAM(compressed_input_stream);
   auto arrow_input_stream = garrow_input_stream_get_raw(input_stream);
diff --git a/c_glib/arrow-glib/input-stream.h b/c_glib/arrow-glib/input-stream.h
index 5f583c80486f4..29f6288b74f08 100644
--- a/c_glib/arrow-glib/input-stream.h
+++ b/c_glib/arrow-glib/input-stream.h
@@ -30,24 +30,23 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_INPUT_STREAM (garrow_input_stream_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowInputStream,
-                         garrow_input_stream,
-                         GARROW,
-                         INPUT_STREAM,
-                         GInputStream)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowInputStream, garrow_input_stream, GARROW, INPUT_STREAM, GInputStream)
 struct _GArrowInputStreamClass
 {
   GInputStreamClass parent_class;
 };
 
-gboolean garrow_input_stream_advance(GArrowInputStream *input_stream,
-                                     gint64 n_bytes,
-                                     GError **error);
-gboolean garrow_input_stream_align(GArrowInputStream *input_stream,
-                                   gint32 alignment,
-                                   GError **error);
-GArrowTensor *garrow_input_stream_read_tensor(GArrowInputStream *input_stream,
-                                              GError **error);
+gboolean
+garrow_input_stream_advance(GArrowInputStream *input_stream,
+                            gint64 n_bytes,
+                            GError **error);
+gboolean
+garrow_input_stream_align(GArrowInputStream *input_stream,
+                          gint32 alignment,
+                          GError **error);
+GArrowTensor *
+garrow_input_stream_read_tensor(GArrowInputStream *input_stream, GError **error);
 GARROW_AVAILABLE_IN_1_0
 GArrowRecordBatch *
 garrow_input_stream_read_record_batch(GArrowInputStream *input_stream,
@@ -55,8 +54,7 @@ garrow_input_stream_read_record_batch(GArrowInputStream *input_stream,
                                       GArrowReadOptions *options,
                                       GError **error);
 
-#define GARROW_TYPE_SEEKABLE_INPUT_STREAM       \
-  (garrow_seekable_input_stream_get_type())
+#define GARROW_TYPE_SEEKABLE_INPUT_STREAM (garrow_seekable_input_stream_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowSeekableInputStream,
                          garrow_seekable_input_stream,
                          GARROW,
@@ -67,9 +65,12 @@ struct _GArrowSeekableInputStreamClass
   GArrowInputStreamClass parent_class;
 };
 
-guint64 garrow_seekable_input_stream_get_size(GArrowSeekableInputStream *input_stream,
-                                              GError **error);
-gboolean garrow_seekable_input_stream_get_support_zero_copy(GArrowSeekableInputStream *input_stream);
+guint64
+garrow_seekable_input_stream_get_size(GArrowSeekableInputStream *input_stream,
+                                      GError **error);
+gboolean
+garrow_seekable_input_stream_get_support_zero_copy(
+  GArrowSeekableInputStream *input_stream);
 GArrowBuffer *
 garrow_seekable_input_stream_read_at(GArrowSeekableInputStream *input_stream,
                                      gint64 position,
@@ -82,13 +83,12 @@ garrow_seekable_input_stream_read_at_bytes(GArrowSeekableInputStream *input_stre
                                            gint64 n_bytes,
                                            GError **error);
 GARROW_AVAILABLE_IN_0_12
-GBytes *garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream,
-                                          gint64 n_bytes,
-                                          GError **error);
-
+GBytes *
+garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream,
+                                  gint64 n_bytes,
+                                  GError **error);
 
-#define GARROW_TYPE_BUFFER_INPUT_STREAM         \
-  (garrow_buffer_input_stream_get_type())
+#define GARROW_TYPE_BUFFER_INPUT_STREAM (garrow_buffer_input_stream_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowBufferInputStream,
                          garrow_buffer_input_stream,
                          GARROW,
@@ -99,10 +99,11 @@ struct _GArrowBufferInputStreamClass
   GArrowSeekableInputStreamClass parent_class;
 };
 
-GArrowBufferInputStream *garrow_buffer_input_stream_new(GArrowBuffer *buffer);
-
-GArrowBuffer *garrow_buffer_input_stream_get_buffer(GArrowBufferInputStream *input_stream);
+GArrowBufferInputStream *
+garrow_buffer_input_stream_new(GArrowBuffer *buffer);
 
+GArrowBuffer *
+garrow_buffer_input_stream_get_buffer(GArrowBufferInputStream *input_stream);
 
 #define GARROW_TYPE_FILE_INPUT_STREAM (garrow_file_input_stream_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFileInputStream,
@@ -116,16 +117,13 @@ struct _GArrowFileInputStreamClass
 };
 
 GArrowFileInputStream *
-garrow_file_input_stream_new(const gchar *path,
-                             GError **error);
+garrow_file_input_stream_new(const gchar *path, GError **error);
 GArrowFileInputStream *
-garrow_file_input_stream_new_file_descriptor(gint file_descriptor,
-                                             GError **error);
+garrow_file_input_stream_new_file_descriptor(gint file_descriptor, GError **error);
 gint
 garrow_file_input_stream_get_file_descriptor(GArrowFileInputStream *stream);
 
-
-#define GARROW_TYPE_MEMORY_MAPPED_INPUT_STREAM          \
+#define GARROW_TYPE_MEMORY_MAPPED_INPUT_STREAM                                           \
   (garrow_memory_mapped_input_stream_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowMemoryMappedInputStream,
                          garrow_memory_mapped_input_stream,
@@ -138,34 +136,27 @@ struct _GArrowMemoryMappedInputStreamClass
 };
 
 GArrowMemoryMappedInputStream *
-garrow_memory_mapped_input_stream_new(const gchar *path,
-                                      GError **error);
-
-
-#define GARROW_TYPE_GIO_INPUT_STREAM            \
-  (garrow_gio_input_stream_get_type())
-#define GARROW_GIO_INPUT_STREAM(obj)                            \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                            \
-                              GARROW_TYPE_GIO_INPUT_STREAM,     \
-                              GArrowGIOInputStream))
-#define GARROW_GIO_INPUT_STREAM_CLASS(klass)                    \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                             \
-                           GARROW_TYPE_GIO_INPUT_STREAM,        \
+garrow_memory_mapped_input_stream_new(const gchar *path, GError **error);
+
+#define GARROW_TYPE_GIO_INPUT_STREAM (garrow_gio_input_stream_get_type())
+#define GARROW_GIO_INPUT_STREAM(obj)                                                     \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj), GARROW_TYPE_GIO_INPUT_STREAM, GArrowGIOInputStream))
+#define GARROW_GIO_INPUT_STREAM_CLASS(klass)                                             \
+  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
+                           GARROW_TYPE_GIO_INPUT_STREAM,                                 \
                            GArrowGIOInputStreamClass))
-#define GARROW_IS_GIO_INPUT_STREAM(obj)                         \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj),                            \
-                              GARROW_TYPE_GIO_INPUT_STREAM))
-#define GARROW_IS_GIO_INPUT_STREAM_CLASS(klass)                 \
-  (G_TYPE_CHECK_CLASS_TYPE((klass),                             \
-                           GARROW_TYPE_GIO_INPUT_STREAM))
-#define GARROW_GIO_INPUT_STREAM_GET_CLASS(obj)                  \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                             \
-                             GARROW_TYPE_GIO_INPUT_STREAM,      \
+#define GARROW_IS_GIO_INPUT_STREAM(obj)                                                  \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_GIO_INPUT_STREAM))
+#define GARROW_IS_GIO_INPUT_STREAM_CLASS(klass)                                          \
+  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_GIO_INPUT_STREAM))
+#define GARROW_GIO_INPUT_STREAM_GET_CLASS(obj)                                           \
+  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
+                             GARROW_TYPE_GIO_INPUT_STREAM,                               \
                              GArrowGIOInputStreamClass))
 
-typedef struct _GArrowGIOInputStream         GArrowGIOInputStream;
+typedef struct _GArrowGIOInputStream GArrowGIOInputStream;
 #ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowGIOInputStreamClass    GArrowGIOInputStreamClass;
+typedef struct _GArrowGIOInputStreamClass GArrowGIOInputStreamClass;
 #endif
 
 /**
@@ -186,17 +177,18 @@ struct _GArrowGIOInputStreamClass
 };
 #endif
 
-GType garrow_gio_input_stream_get_type(void) G_GNUC_CONST;
+GType
+garrow_gio_input_stream_get_type(void) G_GNUC_CONST;
 
-GArrowGIOInputStream *garrow_gio_input_stream_new(GInputStream *gio_input_stream);
+GArrowGIOInputStream *
+garrow_gio_input_stream_new(GInputStream *gio_input_stream);
 #ifndef GARROW_DISABLE_DEPRECATED
 G_GNUC_DEPRECATED
 GInputStream *
 garrow_gio_input_stream_get_raw(GArrowGIOInputStream *input_stream);
 #endif
 
-#define GARROW_TYPE_COMPRESSED_INPUT_STREAM     \
-  (garrow_compressed_input_stream_get_type())
+#define GARROW_TYPE_COMPRESSED_INPUT_STREAM (garrow_compressed_input_stream_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowCompressedInputStream,
                          garrow_compressed_input_stream,
                          GARROW,
diff --git a/c_glib/arrow-glib/input-stream.hpp b/c_glib/arrow-glib/input-stream.hpp
index 2a0a3d3ddccad..7ae759370ddbd 100644
--- a/c_glib/arrow-glib/input-stream.hpp
+++ b/c_glib/arrow-glib/input-stream.hpp
@@ -26,8 +26,10 @@
 
 #include <arrow-glib/input-stream.h>
 
-GArrowInputStream *garrow_input_stream_new_raw(std::shared_ptr<arrow::io::InputStream> *arrow_input_stream);
-std::shared_ptr<arrow::io::InputStream> garrow_input_stream_get_raw(GArrowInputStream *input_stream);
+GArrowInputStream *
+garrow_input_stream_new_raw(std::shared_ptr<arrow::io::InputStream> *arrow_input_stream);
+std::shared_ptr<arrow::io::InputStream>
+garrow_input_stream_get_raw(GArrowInputStream *input_stream);
 
 GArrowSeekableInputStream *
 garrow_seekable_input_stream_new_raw(
@@ -36,24 +38,22 @@ std::shared_ptr<arrow::io::RandomAccessFile>
 garrow_seekable_input_stream_get_raw(GArrowSeekableInputStream *input_stream);
 
 GArrowBufferInputStream *
-garrow_buffer_input_stream_new_raw(std::shared_ptr<arrow::io::BufferReader> *arrow_buffer_reader,
-                                   GArrowBuffer *buffer);
-std::shared_ptr<arrow::io::BufferReader> garrow_buffer_input_stream_get_raw(GArrowBufferInputStream *input_stream);
-
+garrow_buffer_input_stream_new_raw(
+  std::shared_ptr<arrow::io::BufferReader> *arrow_buffer_reader, GArrowBuffer *buffer);
+std::shared_ptr<arrow::io::BufferReader>
+garrow_buffer_input_stream_get_raw(GArrowBufferInputStream *input_stream);
 
 GArrowFileInputStream *
-garrow_file_input_stream_new_raw(
-  std::shared_ptr<arrow::io::ReadableFile> *arrow_stream);
-
+garrow_file_input_stream_new_raw(std::shared_ptr<arrow::io::ReadableFile> *arrow_stream);
 
 GArrowMemoryMappedInputStream *
 garrow_memory_mapped_input_stream_new_raw(
   std::shared_ptr<arrow::io::MemoryMappedFile> *arrow_stream);
 
-
 GArrowCompressedInputStream *
-garrow_compressed_input_stream_new_raw(std::shared_ptr<arrow::io::CompressedInputStream> *arrow_raw,
-                                       GArrowCodec *codec,
-                                       GArrowInputStream *raw);
+garrow_compressed_input_stream_new_raw(
+  std::shared_ptr<arrow::io::CompressedInputStream> *arrow_raw,
+  GArrowCodec *codec,
+  GArrowInputStream *raw);
 std::shared_ptr<arrow::io::InputStream>
 garrow_compressed_input_stream_get_raw(GArrowCompressedInputStream *stream);
diff --git a/c_glib/arrow-glib/internal-hash-table.hpp b/c_glib/arrow-glib/internal-hash-table.hpp
index 3def4606cd653..27ec060994c98 100644
--- a/c_glib/arrow-glib/internal-hash-table.hpp
+++ b/c_glib/arrow-glib/internal-hash-table.hpp
@@ -27,15 +27,13 @@ static inline std::shared_ptr<arrow::KeyValueMetadata>
 garrow_internal_hash_table_to_metadata(GHashTable *metadata)
 {
   auto arrow_metadata = std::make_shared<arrow::KeyValueMetadata>();
-  g_hash_table_foreach(metadata,
-                       [](gpointer key,
-                          gpointer value,
-                          gpointer user_data) {
-                         auto arrow_metadata =
-                           static_cast<std::shared_ptr<arrow::KeyValueMetadata> *>(user_data);
-                         (*arrow_metadata)->Append(static_cast<gchar *>(key),
-                                                   static_cast<gchar *>(value));
-                       },
-                       &arrow_metadata);
+  g_hash_table_foreach(
+    metadata,
+    [](gpointer key, gpointer value, gpointer user_data) {
+      auto arrow_metadata =
+        static_cast<std::shared_ptr<arrow::KeyValueMetadata> *>(user_data);
+      (*arrow_metadata)->Append(static_cast<gchar *>(key), static_cast<gchar *>(value));
+    },
+    &arrow_metadata);
   return arrow_metadata;
 }
diff --git a/c_glib/arrow-glib/interval.cpp b/c_glib/arrow-glib/interval.cpp
index 3a2a82f66bcc1..0bde364bb789f 100644
--- a/c_glib/arrow-glib/interval.cpp
+++ b/c_glib/arrow-glib/interval.cpp
@@ -38,18 +38,16 @@ G_BEGIN_DECLS
  * Since: 8.0.0
  */
 
-typedef struct GArrowDayMillisecondPrivate_ {
+typedef struct GArrowDayMillisecondPrivate_
+{
   arrow::DayTimeIntervalType::DayMilliseconds day_millisecond;
 } GArrowDayMillisecondPrivate;
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowDayMillisecond,
-                           garrow_day_millisecond,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowDayMillisecond, garrow_day_millisecond, G_TYPE_OBJECT)
 
-#define GARROW_DAY_MILLISECOND_GET_PRIVATE(object)  \
-  static_cast<GArrowDayMillisecondPrivate *>(       \
-    garrow_day_millisecond_get_instance_private(    \
-      GARROW_DAY_MILLISECOND(object)))
+#define GARROW_DAY_MILLISECOND_GET_PRIVATE(object)                                       \
+  static_cast<GArrowDayMillisecondPrivate *>(                                            \
+    garrow_day_millisecond_get_instance_private(GARROW_DAY_MILLISECOND(object)))
 
 enum {
   PROP_DAY_MILLISECOND_DAY = 1,
@@ -129,9 +127,7 @@ garrow_day_millisecond_class_init(GArrowDayMillisecondClass *klass)
                           G_MAXINT32,
                           day_millisecond.days,
                           static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_DAY_MILLISECOND_DAY,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_DAY_MILLISECOND_DAY, spec);
 
   /**
    * GArrowDayMillisecond:millisecond:
@@ -147,9 +143,7 @@ garrow_day_millisecond_class_init(GArrowDayMillisecondClass *klass)
                           G_MAXINT32,
                           day_millisecond.milliseconds,
                           static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_DAY_MILLISECOND_MILLISECOND,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_DAY_MILLISECOND_MILLISECOND, spec);
 }
 
 /**
@@ -165,8 +159,7 @@ garrow_day_millisecond_class_init(GArrowDayMillisecondClass *klass)
 GArrowDayMillisecond *
 garrow_day_millisecond_new(gint32 day, gint32 millisecond)
 {
-  arrow::DayTimeIntervalType::DayMilliseconds
-    arrow_day_millisecond(day, millisecond);
+  arrow::DayTimeIntervalType::DayMilliseconds arrow_day_millisecond(day, millisecond);
   return garrow_day_millisecond_new_raw(&arrow_day_millisecond);
 }
 
@@ -208,19 +201,16 @@ garrow_day_millisecond_less_than(GArrowDayMillisecond *day_millisecond,
   return priv->day_millisecond < other_priv->day_millisecond;
 }
 
-
-typedef struct GArrowMonthDayNanoPrivate_ {
+typedef struct GArrowMonthDayNanoPrivate_
+{
   arrow::MonthDayNanoIntervalType::MonthDayNanos month_day_nano;
 } GArrowMonthDayNanoPrivate;
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowMonthDayNano,
-                           garrow_month_day_nano,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowMonthDayNano, garrow_month_day_nano, G_TYPE_OBJECT)
 
-#define GARROW_MONTH_DAY_NANO_GET_PRIVATE(object)  \
-  static_cast<GArrowMonthDayNanoPrivate *>(        \
-    garrow_month_day_nano_get_instance_private(    \
-      GARROW_MONTH_DAY_NANO(object)))
+#define GARROW_MONTH_DAY_NANO_GET_PRIVATE(object)                                        \
+  static_cast<GArrowMonthDayNanoPrivate *>(                                              \
+    garrow_month_day_nano_get_instance_private(GARROW_MONTH_DAY_NANO(object)))
 
 enum {
   PROP_MONTH_DAY_NANO_MONTH = 1,
@@ -308,9 +298,7 @@ garrow_month_day_nano_class_init(GArrowMonthDayNanoClass *klass)
                           G_MAXINT32,
                           month_day_nano.months,
                           static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_MONTH_DAY_NANO_MONTH,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_MONTH_DAY_NANO_MONTH, spec);
 
   /**
    * GArrowMonthDayNano:day:
@@ -326,9 +314,7 @@ garrow_month_day_nano_class_init(GArrowMonthDayNanoClass *klass)
                           G_MAXINT32,
                           month_day_nano.days,
                           static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_MONTH_DAY_NANO_DAY,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_MONTH_DAY_NANO_DAY, spec);
 
   /**
    * GArrowMonthDayNano:nanosecond:
@@ -344,9 +330,7 @@ garrow_month_day_nano_class_init(GArrowMonthDayNanoClass *klass)
                             G_MAXINT64,
                             month_day_nano.nanoseconds,
                             static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_MONTH_DAY_NANO_NANOSECOND,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_MONTH_DAY_NANO_NANOSECOND, spec);
 }
 
 /**
@@ -389,18 +373,18 @@ garrow_month_day_nano_equal(GArrowMonthDayNano *month_nano_day,
   return priv->month_day_nano == other_priv->month_day_nano;
 }
 
-
 G_END_DECLS
 
 GArrowDayMillisecond *
 garrow_day_millisecond_new_raw(
   arrow::DayTimeIntervalType::DayMilliseconds *arrow_day_millisecond)
 {
-  auto day_millisecond =
-    g_object_new(garrow_day_millisecond_get_type(),
-                 "day", arrow_day_millisecond->days,
-                 "millisecond", arrow_day_millisecond->milliseconds,
-                 NULL);
+  auto day_millisecond = g_object_new(garrow_day_millisecond_get_type(),
+                                      "day",
+                                      arrow_day_millisecond->days,
+                                      "millisecond",
+                                      arrow_day_millisecond->milliseconds,
+                                      NULL);
   return GARROW_DAY_MILLISECOND(day_millisecond);
 }
 
@@ -419,17 +403,18 @@ garrow_day_millisecond_get_raw(const GArrowDayMillisecond *day_millisecond)
   return &priv->day_millisecond;
 }
 
-
 GArrowMonthDayNano *
 garrow_month_day_nano_new_raw(
   arrow::MonthDayNanoIntervalType::MonthDayNanos *arrow_month_day_nano)
 {
-  auto month_day_nano =
-    g_object_new(garrow_month_day_nano_get_type(),
-                 "month", arrow_month_day_nano->months,
-                 "day", arrow_month_day_nano->days,
-                 "nanosecond", arrow_month_day_nano->nanoseconds,
-                 NULL);
+  auto month_day_nano = g_object_new(garrow_month_day_nano_get_type(),
+                                     "month",
+                                     arrow_month_day_nano->months,
+                                     "day",
+                                     arrow_month_day_nano->days,
+                                     "nanosecond",
+                                     arrow_month_day_nano->nanoseconds,
+                                     NULL);
   return GARROW_MONTH_DAY_NANO(month_day_nano);
 }
 
@@ -441,10 +426,9 @@ garrow_month_day_nano_get_raw(GArrowMonthDayNano *month_day_nano)
 }
 
 const arrow::MonthDayNanoIntervalType::MonthDayNanos *
-garrow_month_day_nano_get_raw(
-  const GArrowMonthDayNano *month_day_nano)
+garrow_month_day_nano_get_raw(const GArrowMonthDayNano *month_day_nano)
 {
-  auto priv = GARROW_MONTH_DAY_NANO_GET_PRIVATE(
-    const_cast<GArrowMonthDayNano *>(month_day_nano));
+  auto priv =
+    GARROW_MONTH_DAY_NANO_GET_PRIVATE(const_cast<GArrowMonthDayNano *>(month_day_nano));
   return &priv->month_day_nano;
 }
diff --git a/c_glib/arrow-glib/interval.h b/c_glib/arrow-glib/interval.h
index 88d974e1b3eca..3f60db503f6a2 100644
--- a/c_glib/arrow-glib/interval.h
+++ b/c_glib/arrow-glib/interval.h
@@ -25,11 +25,8 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_DAY_MILLISECOND (garrow_day_millisecond_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowDayMillisecond,
-                         garrow_day_millisecond,
-                         GARROW,
-                         DAY_MILLISECOND,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowDayMillisecond, garrow_day_millisecond, GARROW, DAY_MILLISECOND, GObject)
 
 struct _GArrowDayMillisecondClass
 {
@@ -48,13 +45,9 @@ gboolean
 garrow_day_millisecond_less_than(GArrowDayMillisecond *day_millisecond,
                                  GArrowDayMillisecond *other_day_millisecond);
 
-
 #define GARROW_TYPE_MONTH_DAY_NANO (garrow_month_day_nano_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNano,
-                         garrow_month_day_nano,
-                         GARROW,
-                         MONTH_DAY_NANO,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowMonthDayNano, garrow_month_day_nano, GARROW, MONTH_DAY_NANO, GObject)
 
 struct _GArrowMonthDayNanoClass
 {
@@ -69,5 +62,4 @@ gboolean
 garrow_month_day_nano_equal(GArrowMonthDayNano *month_nano_day,
                             GArrowMonthDayNano *other_month_nano_day);
 
-
 G_END_DECLS
diff --git a/c_glib/arrow-glib/interval.hpp b/c_glib/arrow-glib/interval.hpp
index 3123d920805fe..fb22c3e697d88 100644
--- a/c_glib/arrow-glib/interval.hpp
+++ b/c_glib/arrow-glib/interval.hpp
@@ -40,4 +40,3 @@ arrow::MonthDayNanoIntervalType::MonthDayNanos *
 garrow_month_day_nano_get_raw(GArrowMonthDayNano *month_day_nano);
 const arrow::MonthDayNanoIntervalType::MonthDayNanos *
 garrow_month_day_nano_get_raw(const GArrowMonthDayNano *month_day_nano);
-
diff --git a/c_glib/arrow-glib/ipc-options.cpp b/c_glib/arrow-glib/ipc-options.cpp
index 86bae4adf4f4d..3e1e3e7eae66d 100644
--- a/c_glib/arrow-glib/ipc-options.cpp
+++ b/c_glib/arrow-glib/ipc-options.cpp
@@ -34,7 +34,8 @@ G_BEGIN_DECLS
  * #GArrowWriteOptions provides options for writing data.
  */
 
-typedef struct GArrowReadOptionsPrivate_ {
+typedef struct GArrowReadOptionsPrivate_
+{
   arrow::ipc::IpcReadOptions options;
   arrow::ipc::DictionaryMemo dictionary_memo;
 } GArrowReadOptionsPrivate;
@@ -44,14 +45,11 @@ enum {
   PROP_READ_OPTIONS_USE_THREADS,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowReadOptions,
-                           garrow_read_options,
-                           G_TYPE_OBJECT);
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowReadOptions, garrow_read_options, G_TYPE_OBJECT);
 
-#define GARROW_READ_OPTIONS_GET_PRIVATE(obj)                \
-  static_cast<GArrowReadOptionsPrivate *>(                  \
-    garrow_read_options_get_instance_private(               \
-      GARROW_READ_OPTIONS(obj)))
+#define GARROW_READ_OPTIONS_GET_PRIVATE(obj)                                             \
+  static_cast<GArrowReadOptionsPrivate *>(                                               \
+    garrow_read_options_get_instance_private(GARROW_READ_OPTIONS(obj)))
 
 static void
 garrow_read_options_finalize(GObject *object)
@@ -110,9 +108,9 @@ static void
 garrow_read_options_init(GArrowReadOptions *object)
 {
   auto priv = GARROW_READ_OPTIONS_GET_PRIVATE(object);
-  new(&priv->options) arrow::ipc::IpcReadOptions;
+  new (&priv->options) arrow::ipc::IpcReadOptions;
   priv->options = arrow::ipc::IpcReadOptions::Defaults();
-  new(&priv->dictionary_memo) arrow::ipc::DictionaryMemo;
+  new (&priv->dictionary_memo) arrow::ipc::DictionaryMemo;
 }
 
 static void
@@ -120,7 +118,7 @@ garrow_read_options_class_init(GArrowReadOptionsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_read_options_finalize;
+  gobject_class->finalize = garrow_read_options_finalize;
   gobject_class->set_property = garrow_read_options_set_property;
   gobject_class->get_property = garrow_read_options_get_property;
 
@@ -158,9 +156,7 @@ garrow_read_options_class_init(GArrowReadOptionsClass *klass)
                               "Whether to use the global CPU thread pool",
                               options.use_threads,
                               static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_READ_OPTIONS_USE_THREADS,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_READ_OPTIONS_USE_THREADS, spec);
 }
 
 /**
@@ -191,8 +187,7 @@ garrow_read_options_new(void)
  * Since: 1.0.0
  */
 int *
-garrow_read_options_get_included_fields(GArrowReadOptions *options,
-                                        gsize *n_fields)
+garrow_read_options_get_included_fields(GArrowReadOptions *options, gsize *n_fields)
 {
   auto priv = GARROW_READ_OPTIONS_GET_PRIVATE(options);
   if (priv->options.included_fields.empty()) {
@@ -236,8 +231,8 @@ garrow_read_options_set_included_fields(GArrowReadOptions *options,
   }
 }
 
-
-typedef struct GArrowWriteOptionsPrivate_ {
+typedef struct GArrowWriteOptionsPrivate_
+{
   arrow::ipc::IpcWriteOptions options;
   GArrowCodec *codec;
 } GArrowWriteOptionsPrivate;
@@ -251,14 +246,11 @@ enum {
   PROP_WRITE_OPTIONS_USE_THREADS,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowWriteOptions,
-                           garrow_write_options,
-                           G_TYPE_OBJECT);
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowWriteOptions, garrow_write_options, G_TYPE_OBJECT);
 
-#define GARROW_WRITE_OPTIONS_GET_PRIVATE(obj)                \
-  static_cast<GArrowWriteOptionsPrivate *>(                  \
-    garrow_write_options_get_instance_private(               \
-      GARROW_WRITE_OPTIONS(obj)))
+#define GARROW_WRITE_OPTIONS_GET_PRIVATE(obj)                                            \
+  static_cast<GArrowWriteOptionsPrivate *>(                                              \
+    garrow_write_options_get_instance_private(GARROW_WRITE_OPTIONS(obj)))
 
 static void
 garrow_write_options_dispose(GObject *object)
@@ -357,7 +349,7 @@ static void
 garrow_write_options_init(GArrowWriteOptions *object)
 {
   auto priv = GARROW_WRITE_OPTIONS_GET_PRIVATE(object);
-  new(&priv->options) arrow::ipc::IpcWriteOptions;
+  new (&priv->options) arrow::ipc::IpcWriteOptions;
   priv->options = arrow::ipc::IpcWriteOptions::Defaults();
   if (priv->options.codec) {
     priv->codec = garrow_codec_new_raw(&(priv->options.codec));
@@ -371,8 +363,8 @@ garrow_write_options_class_init(GArrowWriteOptionsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_write_options_dispose;
-  gobject_class->finalize     = garrow_write_options_finalize;
+  gobject_class->dispose = garrow_write_options_dispose;
+  gobject_class->finalize = garrow_write_options_finalize;
   gobject_class->set_property = garrow_write_options_set_property;
   gobject_class->get_property = garrow_write_options_get_property;
 
@@ -394,9 +386,7 @@ garrow_write_options_class_init(GArrowWriteOptionsClass *klass)
                               "for field length",
                               options.allow_64bit,
                               static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_WRITE_OPTIONS_ALLOW_64BIT,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_WRITE_OPTIONS_ALLOW_64BIT, spec);
 
   /**
    * GArrowWriteOptions:max-recursion-depth:
@@ -432,9 +422,7 @@ garrow_write_options_class_init(GArrowWriteOptionsClass *klass)
                           G_MAXINT,
                           options.alignment,
                           static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_WRITE_OPTIONS_ALIGNMENT,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_WRITE_OPTIONS_ALIGNMENT, spec);
 
   /**
    * GArrowWriteOptions:write-legacy-ipc-format:
@@ -470,9 +458,7 @@ garrow_write_options_class_init(GArrowWriteOptionsClass *klass)
                              "compressing record batch body buffers.",
                              GARROW_TYPE_CODEC,
                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_WRITE_OPTIONS_CODEC,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_WRITE_OPTIONS_CODEC, spec);
 
   /**
    * GArrowWriteOptions:use-threads:
@@ -486,9 +472,7 @@ garrow_write_options_class_init(GArrowWriteOptionsClass *klass)
                               "Whether to use the global CPU thread pool",
                               options.use_threads,
                               static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_WRITE_OPTIONS_USE_THREADS,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_WRITE_OPTIONS_USE_THREADS, spec);
 }
 
 /**
diff --git a/c_glib/arrow-glib/ipc-options.h b/c_glib/arrow-glib/ipc-options.h
index 3b1d99e38c8e1..188e451c51325 100644
--- a/c_glib/arrow-glib/ipc-options.h
+++ b/c_glib/arrow-glib/ipc-options.h
@@ -25,11 +25,8 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_READ_OPTIONS (garrow_read_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowReadOptions,
-                         garrow_read_options,
-                         GARROW,
-                         READ_OPTIONS,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowReadOptions, garrow_read_options, GARROW, READ_OPTIONS, GObject)
 struct _GArrowReadOptionsClass
 {
   GObjectClass parent_class;
@@ -40,8 +37,7 @@ GArrowReadOptions *
 garrow_read_options_new(void);
 GARROW_AVAILABLE_IN_1_0
 int *
-garrow_read_options_get_included_fields(GArrowReadOptions *options,
-                                        gsize *n_fields);
+garrow_read_options_get_included_fields(GArrowReadOptions *options, gsize *n_fields);
 GARROW_AVAILABLE_IN_1_0
 void
 garrow_read_options_set_included_fields(GArrowReadOptions *options,
@@ -49,11 +45,8 @@ garrow_read_options_set_included_fields(GArrowReadOptions *options,
                                         gsize n_fields);
 
 #define GARROW_TYPE_WRITE_OPTIONS (garrow_write_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowWriteOptions,
-                         garrow_write_options,
-                         GARROW,
-                         WRITE_OPTIONS,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowWriteOptions, garrow_write_options, GARROW, WRITE_OPTIONS, GObject)
 struct _GArrowWriteOptionsClass
 {
   GObjectClass parent_class;
diff --git a/c_glib/arrow-glib/local-file-system.cpp b/c_glib/arrow-glib/local-file-system.cpp
index ae503bf73136b..ee16bfe4128ca 100644
--- a/c_glib/arrow-glib/local-file-system.cpp
+++ b/c_glib/arrow-glib/local-file-system.cpp
@@ -35,7 +35,8 @@ G_BEGIN_DECLS
  * that accesses files on the local machine.
  */
 
-typedef struct GArrowLocalFileSystemOptionsPrivate_ {
+typedef struct GArrowLocalFileSystemOptionsPrivate_
+{
   arrow::fs::LocalFileSystemOptions local_file_system_options;
 } GArrowLocalFileSystemOptionsPrivate;
 
@@ -47,10 +48,10 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowLocalFileSystemOptions,
                            garrow_local_file_system_options,
                            G_TYPE_OBJECT)
 
-#define GARROW_LOCAL_FILE_SYSTEM_OPTIONS_GET_PRIVATE(obj)       \
-  static_cast<GArrowLocalFileSystemOptionsPrivate *>(           \
-     garrow_local_file_system_options_get_instance_private(     \
-       GARROW_LOCAL_FILE_SYSTEM_OPTIONS(obj)))
+#define GARROW_LOCAL_FILE_SYSTEM_OPTIONS_GET_PRIVATE(obj)                                \
+  static_cast<GArrowLocalFileSystemOptionsPrivate *>(                                    \
+    garrow_local_file_system_options_get_instance_private(                               \
+      GARROW_LOCAL_FILE_SYSTEM_OPTIONS(obj)))
 
 static void
 garrow_local_file_system_options_finalize(GObject *object)
@@ -102,7 +103,7 @@ static void
 garrow_local_file_system_options_init(GArrowLocalFileSystemOptions *object)
 {
   auto priv = GARROW_LOCAL_FILE_SYSTEM_OPTIONS_GET_PRIVATE(object);
-  new(&priv->local_file_system_options) arrow::fs::LocalFileSystemOptions;
+  new (&priv->local_file_system_options) arrow::fs::LocalFileSystemOptions;
 }
 
 static void
@@ -112,7 +113,7 @@ garrow_local_file_system_options_class_init(GArrowLocalFileSystemOptionsClass *k
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_local_file_system_options_finalize;
+  gobject_class->finalize = garrow_local_file_system_options_finalize;
   gobject_class->set_property = garrow_local_file_system_options_set_property;
   gobject_class->get_property = garrow_local_file_system_options_get_property;
 
@@ -152,9 +153,7 @@ garrow_local_file_system_options_new(void)
 
 /* arrow::fs::LocalFileSystem */
 
-G_DEFINE_TYPE(GArrowLocalFileSystem,
-              garrow_local_file_system,
-              GARROW_TYPE_FILE_SYSTEM)
+G_DEFINE_TYPE(GArrowLocalFileSystem, garrow_local_file_system, GARROW_TYPE_FILE_SYSTEM)
 
 static void
 garrow_local_file_system_init(GArrowLocalFileSystem *file_system)
@@ -178,16 +177,13 @@ GArrowLocalFileSystem *
 garrow_local_file_system_new(GArrowLocalFileSystemOptions *options)
 {
   if (options) {
-    const auto &arrow_options =
-      garrow_local_file_system_options_get_raw(options);
-    auto arrow_local_file_system =
-      std::static_pointer_cast<arrow::fs::FileSystem>(
-        std::make_shared<arrow::fs::LocalFileSystem>(arrow_options));
+    const auto &arrow_options = garrow_local_file_system_options_get_raw(options);
+    auto arrow_local_file_system = std::static_pointer_cast<arrow::fs::FileSystem>(
+      std::make_shared<arrow::fs::LocalFileSystem>(arrow_options));
     return garrow_local_file_system_new_raw(&arrow_local_file_system);
   } else {
-    auto arrow_local_file_system =
-      std::static_pointer_cast<arrow::fs::FileSystem>(
-        std::make_shared<arrow::fs::LocalFileSystem>());
+    auto arrow_local_file_system = std::static_pointer_cast<arrow::fs::FileSystem>(
+      std::make_shared<arrow::fs::LocalFileSystem>());
     return garrow_local_file_system_new_raw(&arrow_local_file_system);
   }
 }
@@ -202,10 +198,9 @@ garrow_local_file_system_options_get_raw(GArrowLocalFileSystemOptions *options)
 }
 
 GArrowLocalFileSystem *
-garrow_local_file_system_new_raw(std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system)
+garrow_local_file_system_new_raw(
+  std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system)
 {
   return GARROW_LOCAL_FILE_SYSTEM(
-    g_object_new(GARROW_TYPE_LOCAL_FILE_SYSTEM,
-                 "file-system", arrow_file_system,
-                 NULL));
+    g_object_new(GARROW_TYPE_LOCAL_FILE_SYSTEM, "file-system", arrow_file_system, NULL));
 }
diff --git a/c_glib/arrow-glib/local-file-system.h b/c_glib/arrow-glib/local-file-system.h
index 7741497963476..9af4f8e8b168d 100644
--- a/c_glib/arrow-glib/local-file-system.h
+++ b/c_glib/arrow-glib/local-file-system.h
@@ -25,7 +25,8 @@ G_BEGIN_DECLS
 
 /* arrow::fs::LocalFileSystemOptions */
 
-#define GARROW_TYPE_LOCAL_FILE_SYSTEM_OPTIONS (garrow_local_file_system_options_get_type())
+#define GARROW_TYPE_LOCAL_FILE_SYSTEM_OPTIONS                                            \
+  (garrow_local_file_system_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLocalFileSystemOptions,
                          garrow_local_file_system_options,
                          GARROW,
diff --git a/c_glib/arrow-glib/local-file-system.hpp b/c_glib/arrow-glib/local-file-system.hpp
index ac7a9de0a0734..0ef69ca1275ca 100644
--- a/c_glib/arrow-glib/local-file-system.hpp
+++ b/c_glib/arrow-glib/local-file-system.hpp
@@ -29,4 +29,3 @@ garrow_local_file_system_options_get_raw(GArrowLocalFileSystemOptions *options);
 GArrowLocalFileSystem *
 garrow_local_file_system_new_raw(
   std::shared_ptr<arrow::fs::FileSystem> *arrow_file_system);
-
diff --git a/c_glib/arrow-glib/memory-pool.cpp b/c_glib/arrow-glib/memory-pool.cpp
index e0d9035f88c64..4fdc5bcc7380f 100644
--- a/c_glib/arrow-glib/memory-pool.cpp
+++ b/c_glib/arrow-glib/memory-pool.cpp
@@ -30,7 +30,8 @@ G_BEGIN_DECLS
  * #GArrowMemoryPool is a class for memory allocation.
  */
 
-typedef struct GArrowMemoryPoolPrivate_ {
+typedef struct GArrowMemoryPoolPrivate_
+{
   arrow::MemoryPool *memory_pool;
 } GArrowMemoryPoolPrivate;
 
@@ -40,10 +41,9 @@ enum {
 
 G_DEFINE_TYPE_WITH_PRIVATE(GArrowMemoryPool, garrow_memory_pool, G_TYPE_OBJECT)
 
-#define GARROW_MEMORY_POOL_GET_PRIVATE(obj)        \
-  static_cast<GArrowMemoryPoolPrivate *>(          \
-     garrow_memory_pool_get_instance_private(      \
-       GARROW_MEMORY_POOL(obj)))
+#define GARROW_MEMORY_POOL_GET_PRIVATE(obj)                                              \
+  static_cast<GArrowMemoryPoolPrivate *>(                                                \
+    garrow_memory_pool_get_instance_private(GARROW_MEMORY_POOL(obj)))
 
 static void
 garrow_memory_pool_set_property(GObject *object,
@@ -55,8 +55,7 @@ garrow_memory_pool_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_MEMORY_POOL:
-    priv->memory_pool =
-      static_cast<arrow::MemoryPool *>(g_value_get_pointer(value));
+    priv->memory_pool = static_cast<arrow::MemoryPool *>(g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -78,11 +77,11 @@ garrow_memory_pool_class_init(GArrowMemoryPoolClass *klass)
 
   gobject_class->set_property = garrow_memory_pool_set_property;
 
-  spec = g_param_spec_pointer("memory-pool",
-                              "Memory Pool",
-                              "The raw arrow::MemoryPool *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "memory-pool",
+    "Memory Pool",
+    "The raw arrow::MemoryPool *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_MEMORY_POOL, spec);
 }
 
@@ -158,9 +157,8 @@ G_END_DECLS
 GArrowMemoryPool *
 garrow_memory_pool_new_raw(arrow::MemoryPool *memory_pool)
 {
-  return GARROW_MEMORY_POOL(g_object_new(GARROW_TYPE_MEMORY_POOL,
-                            "memory-pool", memory_pool,
-                            NULL));
+  return GARROW_MEMORY_POOL(
+    g_object_new(GARROW_TYPE_MEMORY_POOL, "memory-pool", memory_pool, NULL));
 }
 
 arrow::MemoryPool *
diff --git a/c_glib/arrow-glib/memory-pool.h b/c_glib/arrow-glib/memory-pool.h
index d6bba176d3def..1d7fdc93e20a2 100644
--- a/c_glib/arrow-glib/memory-pool.h
+++ b/c_glib/arrow-glib/memory-pool.h
@@ -24,20 +24,20 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_MEMORY_POOL (garrow_memory_pool_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowMemoryPool,
-                         garrow_memory_pool,
-                         GARROW,
-                         MEMORY_POOL,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowMemoryPool, garrow_memory_pool, GARROW, MEMORY_POOL, GObject)
 struct _GArrowMemoryPoolClass
 {
   GObjectClass parent_class;
 };
 
-GArrowMemoryPool *garrow_memory_pool_default();
-gint64            garrow_memory_pool_get_bytes_allocated(GArrowMemoryPool *memory_pool);
-gint64            garrow_memory_pool_get_max_memory(GArrowMemoryPool *memory_pool);
-gchar            *garrow_memory_pool_get_backend_name(GArrowMemoryPool *memory_pool);
-
+GArrowMemoryPool *
+garrow_memory_pool_default();
+gint64
+garrow_memory_pool_get_bytes_allocated(GArrowMemoryPool *memory_pool);
+gint64
+garrow_memory_pool_get_max_memory(GArrowMemoryPool *memory_pool);
+gchar *
+garrow_memory_pool_get_backend_name(GArrowMemoryPool *memory_pool);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/metadata-version.hpp b/c_glib/arrow-glib/metadata-version.hpp
index 7b3865e59216b..c5241e0f51f8d 100644
--- a/c_glib/arrow-glib/metadata-version.hpp
+++ b/c_glib/arrow-glib/metadata-version.hpp
@@ -23,5 +23,7 @@
 
 #include <arrow-glib/metadata-version.h>
 
-GArrowMetadataVersion garrow_metadata_version_from_raw(arrow::ipc::MetadataVersion version);
-arrow::ipc::MetadataVersion garrow_metadata_version_to_raw(GArrowMetadataVersion version);
+GArrowMetadataVersion
+garrow_metadata_version_from_raw(arrow::ipc::MetadataVersion version);
+arrow::ipc::MetadataVersion
+garrow_metadata_version_to_raw(GArrowMetadataVersion version);
diff --git a/c_glib/arrow-glib/orc-file-reader.cpp b/c_glib/arrow-glib/orc-file-reader.cpp
index 084198a82a5bb..dd51c08651af7 100644
--- a/c_glib/arrow-glib/orc-file-reader.cpp
+++ b/c_glib/arrow-glib/orc-file-reader.cpp
@@ -36,7 +36,8 @@ G_BEGIN_DECLS
  * format from input.
  */
 
-typedef struct GArrowORCFileReaderPrivate_ {
+typedef struct GArrowORCFileReaderPrivate_
+{
   GArrowSeekableInputStream *input;
   arrow::adapters::orc::ORCFileReader *orc_file_reader;
   GArray *field_indices;
@@ -48,14 +49,11 @@ enum {
   PROP_ORC_FILE_READER
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowORCFileReader,
-                           garrow_orc_file_reader,
-                           G_TYPE_OBJECT);
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowORCFileReader, garrow_orc_file_reader, G_TYPE_OBJECT);
 
-#define GARROW_ORC_FILE_READER_GET_PRIVATE(obj)         \
-  static_cast<GArrowORCFileReaderPrivate *>(            \
-     garrow_orc_file_reader_get_instance_private(       \
-       GARROW_ORC_FILE_READER(obj)))
+#define GARROW_ORC_FILE_READER_GET_PRIVATE(obj)                                          \
+  static_cast<GArrowORCFileReaderPrivate *>(                                             \
+    garrow_orc_file_reader_get_instance_private(GARROW_ORC_FILE_READER(obj)))
 
 static void
 garrow_orc_file_reader_dispose(GObject *object)
@@ -134,29 +132,28 @@ garrow_orc_file_reader_class_init(GArrowORCFileReaderClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_orc_file_reader_dispose;
-  gobject_class->finalize     = garrow_orc_file_reader_finalize;
+  gobject_class->dispose = garrow_orc_file_reader_dispose;
+  gobject_class->finalize = garrow_orc_file_reader_finalize;
   gobject_class->set_property = garrow_orc_file_reader_set_property;
   gobject_class->get_property = garrow_orc_file_reader_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("input",
-                             "Input",
-                             "The input stream",
-                             GARROW_TYPE_SEEKABLE_INPUT_STREAM,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "input",
+    "Input",
+    "The input stream",
+    GARROW_TYPE_SEEKABLE_INPUT_STREAM,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_INPUT, spec);
 
-  spec = g_param_spec_pointer("orc-file-reader",
-                              "arrow::adapters::orc::ORCFileReader",
-                              "The raw arrow::adapters::orc::ORCFileReader *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "orc-file-reader",
+    "arrow::adapters::orc::ORCFileReader",
+    "The raw arrow::adapters::orc::ORCFileReader *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_ORC_FILE_READER, spec);
 }
 
-
 /**
  * garrow_orc_file_reader_new:
  * @file: The file to be read.
@@ -168,17 +165,14 @@ garrow_orc_file_reader_class_init(GArrowORCFileReaderClass *klass)
  * Since: 0.10.0
  */
 GArrowORCFileReader *
-garrow_orc_file_reader_new(GArrowSeekableInputStream *input,
-                           GError **error)
+garrow_orc_file_reader_new(GArrowSeekableInputStream *input, GError **error)
 {
   auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(input);
   auto pool = arrow::default_memory_pool();
   auto arrow_reader_result =
-    arrow::adapters::orc::ORCFileReader::Open(arrow_random_access_file,
-                                              pool);
+    arrow::adapters::orc::ORCFileReader::Open(arrow_random_access_file, pool);
   if (garrow::check(error, arrow_reader_result, "[orc-file-reader][new]")) {
-    return garrow_orc_file_reader_new_raw(input,
-                                          (*arrow_reader_result).release());
+    return garrow_orc_file_reader_new_raw(input, (*arrow_reader_result).release());
   } else {
     return NULL;
   }
@@ -201,9 +195,7 @@ garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader,
                                          const gint *field_indexes,
                                          guint n_field_indexes)
 {
-  garrow_orc_file_reader_set_field_indices(reader,
-                                           field_indexes,
-                                           n_field_indexes);
+  garrow_orc_file_reader_set_field_indices(reader, field_indexes, n_field_indexes);
 }
 
 /**
@@ -227,10 +219,7 @@ garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader,
   if (n_field_indices == 0) {
     priv->field_indices = NULL;
   } else {
-    priv->field_indices = g_array_sized_new(FALSE,
-                                            FALSE,
-                                            sizeof(gint),
-                                            n_field_indices);
+    priv->field_indices = g_array_sized_new(FALSE, FALSE, sizeof(gint), n_field_indices);
     g_array_append_vals(priv->field_indices, field_indices, n_field_indices);
   }
 }
@@ -290,14 +279,11 @@ garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader,
  * Since: 0.10.0
  */
 GArrowSchema *
-garrow_orc_file_reader_read_type(GArrowORCFileReader *reader,
-                                 GError **error)
+garrow_orc_file_reader_read_type(GArrowORCFileReader *reader, GError **error)
 {
   auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
   auto arrow_schema_result = arrow_reader->ReadSchema();
-  if (garrow::check(error,
-                    arrow_schema_result,
-                    "[orc-file-reader][read-type]")) {
+  if (garrow::check(error, arrow_schema_result, "[orc-file-reader][read-type]")) {
     auto arrow_schema = *arrow_schema_result;
     return garrow_schema_new_raw(&arrow_schema);
   } else {
@@ -316,8 +302,7 @@ garrow_orc_file_reader_read_type(GArrowORCFileReader *reader,
  * Since: 0.10.0
  */
 GArrowTable *
-garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader,
-                                    GError **error)
+garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader, GError **error)
 {
   auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
   auto priv = GARROW_ORC_FILE_READER_GET_PRIVATE(reader);
@@ -328,9 +313,7 @@ garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader,
       arrow_field_indices.push_back(g_array_index(field_indices, gint, i));
     }
     auto arrow_table_result = arrow_reader->Read(arrow_field_indices);
-    if (garrow::check(error,
-                      arrow_table_result,
-                      "[orc-file-reader][read-stripes]")) {
+    if (garrow::check(error, arrow_table_result, "[orc-file-reader][read-stripes]")) {
       auto arrow_table = *arrow_table_result;
       return garrow_table_new_raw(&arrow_table);
     } else {
@@ -338,9 +321,7 @@ garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader,
     }
   } else {
     auto arrow_table_result = arrow_reader->Read();
-    if (garrow::check(error,
-                      arrow_table_result,
-                      "[orc-file-reader][read-stripes]")) {
+    if (garrow::check(error, arrow_table_result, "[orc-file-reader][read-stripes]")) {
       auto arrow_table = *arrow_table_result;
       return garrow_table_new_raw(&arrow_table);
     } else {
@@ -361,9 +342,7 @@ garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader,
  * Since: 0.10.0
  */
 GArrowRecordBatch *
-garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader,
-                                   gint64 i,
-                                   GError **error)
+garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader, gint64 i, GError **error)
 {
   auto arrow_reader = garrow_orc_file_reader_get_raw(reader);
   if (i < 0) {
@@ -377,8 +356,7 @@ garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader,
       arrow_field_indices.push_back(g_array_index(field_indices, gint, j));
     }
     std::shared_ptr<arrow::RecordBatch> arrow_record_batch;
-    auto arrow_record_batch_result =
-      arrow_reader->ReadStripe(i, arrow_field_indices);
+    auto arrow_record_batch_result = arrow_reader->ReadStripe(i, arrow_field_indices);
     if (garrow::check(error,
                       arrow_record_batch_result,
                       "[orc-file-reader][read-stripe]")) {
@@ -430,19 +408,18 @@ garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader)
   return arrow_reader->NumberOfRows();
 }
 
-
 G_END_DECLS
 
-
 GArrowORCFileReader *
 garrow_orc_file_reader_new_raw(GArrowSeekableInputStream *input,
                                arrow::adapters::orc::ORCFileReader *arrow_reader)
 {
-  auto reader =
-    GARROW_ORC_FILE_READER(g_object_new(GARROW_TYPE_ORC_FILE_READER,
-                                        "input", input,
-                                        "orc-file-reader", arrow_reader,
-                                        NULL));
+  auto reader = GARROW_ORC_FILE_READER(g_object_new(GARROW_TYPE_ORC_FILE_READER,
+                                                    "input",
+                                                    input,
+                                                    "orc-file-reader",
+                                                    arrow_reader,
+                                                    NULL));
   return reader;
 }
 
diff --git a/c_glib/arrow-glib/orc-file-reader.h b/c_glib/arrow-glib/orc-file-reader.h
index 9551d52e0fd55..20089eb2866c6 100644
--- a/c_glib/arrow-glib/orc-file-reader.h
+++ b/c_glib/arrow-glib/orc-file-reader.h
@@ -24,19 +24,15 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_ORC_FILE_READER (garrow_orc_file_reader_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowORCFileReader,
-                         garrow_orc_file_reader,
-                         GARROW,
-                         ORC_FILE_READER,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowORCFileReader, garrow_orc_file_reader, GARROW, ORC_FILE_READER, GObject)
 struct _GArrowORCFileReaderClass
 {
   GObjectClass parent_class;
 };
 
 GArrowORCFileReader *
-garrow_orc_file_reader_new(GArrowSeekableInputStream *file,
-                           GError **error);
+garrow_orc_file_reader_new(GArrowSeekableInputStream *file, GError **error);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_set_field_indices)
@@ -61,16 +57,14 @@ const gint *
 garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader,
                                          guint *n_field_indices);
 GArrowSchema *
-garrow_orc_file_reader_read_type(GArrowORCFileReader *reader,
-                                 GError **error);
+garrow_orc_file_reader_read_type(GArrowORCFileReader *reader, GError **error);
 GArrowTable *
-garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader,
-                                    GError **error);
+garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader, GError **error);
 GArrowRecordBatch *
-garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader,
-                                   gint64 i,
-                                   GError **error);
-gint64 garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader);
-gint64 garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader);
+garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader, gint64 i, GError **error);
+gint64
+garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader);
+gint64
+garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/output-stream.cpp b/c_glib/arrow-glib/output-stream.cpp
index a9317e9f28007..83de2eb38a72a 100644
--- a/c_glib/arrow-glib/output-stream.cpp
+++ b/c_glib/arrow-glib/output-stream.cpp
@@ -55,7 +55,8 @@ G_BEGIN_DECLS
  * output stream.
  */
 
-typedef struct GArrowOutputStreamPrivate_ {
+typedef struct GArrowOutputStreamPrivate_
+{
   std::shared_ptr<arrow::io::OutputStream> output_stream;
 } GArrowOutputStreamPrivate;
 
@@ -92,19 +93,18 @@ garrow_output_stream_writable_interface_init(GArrowWritableInterface *iface)
   iface->get_raw = garrow_output_stream_get_raw_writable_interface;
 }
 
-G_DEFINE_TYPE_WITH_CODE(GArrowOutputStream,
-                        garrow_output_stream,
-                        G_TYPE_OBJECT,
-                        G_ADD_PRIVATE(GArrowOutputStream)
-                        G_IMPLEMENT_INTERFACE(GARROW_TYPE_FILE,
-                                              garrow_output_stream_file_interface_init)
-                        G_IMPLEMENT_INTERFACE(GARROW_TYPE_WRITABLE,
-                                              garrow_output_stream_writable_interface_init));
+G_DEFINE_TYPE_WITH_CODE(
+  GArrowOutputStream,
+  garrow_output_stream,
+  G_TYPE_OBJECT,
+  G_ADD_PRIVATE(GArrowOutputStream)
+    G_IMPLEMENT_INTERFACE(GARROW_TYPE_FILE, garrow_output_stream_file_interface_init)
+      G_IMPLEMENT_INTERFACE(GARROW_TYPE_WRITABLE,
+                            garrow_output_stream_writable_interface_init));
 
-#define GARROW_OUTPUT_STREAM_GET_PRIVATE(obj)         \
-  static_cast<GArrowOutputStreamPrivate *>(           \
-     garrow_output_stream_get_instance_private(       \
-       GARROW_OUTPUT_STREAM(obj)))
+#define GARROW_OUTPUT_STREAM_GET_PRIVATE(obj)                                            \
+  static_cast<GArrowOutputStreamPrivate *>(                                              \
+    garrow_output_stream_get_instance_private(GARROW_OUTPUT_STREAM(obj)))
 
 static void
 garrow_output_stream_finalize(GObject *object)
@@ -118,16 +118,16 @@ garrow_output_stream_finalize(GObject *object)
 
 static void
 garrow_output_stream_set_property(GObject *object,
-                                          guint prop_id,
-                                          const GValue *value,
-                                          GParamSpec *pspec)
+                                  guint prop_id,
+                                  const GValue *value,
+                                  GParamSpec *pspec)
 {
   auto priv = GARROW_OUTPUT_STREAM_GET_PRIVATE(object);
 
   switch (prop_id) {
   case PROP_OUTPUT_STREAM:
-    priv->output_stream =
-      *static_cast<std::shared_ptr<arrow::io::OutputStream> *>(g_value_get_pointer(value));
+    priv->output_stream = *static_cast<std::shared_ptr<arrow::io::OutputStream> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -152,7 +152,7 @@ static void
 garrow_output_stream_init(GArrowOutputStream *object)
 {
   auto priv = GARROW_OUTPUT_STREAM_GET_PRIVATE(object);
-  new(&priv->output_stream) std::shared_ptr<arrow::io::OutputStream>;
+  new (&priv->output_stream) std::shared_ptr<arrow::io::OutputStream>;
 }
 
 static void
@@ -160,16 +160,16 @@ garrow_output_stream_class_init(GArrowOutputStreamClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_output_stream_finalize;
+  gobject_class->finalize = garrow_output_stream_finalize;
   gobject_class->set_property = garrow_output_stream_set_property;
   gobject_class->get_property = garrow_output_stream_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("output-stream",
-                              "io::OutputStream",
-                              "The raw std::shared<arrow::io::OutputStream> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "output-stream",
+    "io::OutputStream",
+    "The raw std::shared<arrow::io::OutputStream> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_OUTPUT_STREAM, spec);
 }
 
@@ -185,9 +185,7 @@ garrow_output_stream_class_init(GArrowOutputStreamClass *klass)
  * Since: 0.11.0
  */
 gboolean
-garrow_output_stream_align(GArrowOutputStream *stream,
-                           gint32 alignment,
-                           GError **error)
+garrow_output_stream_align(GArrowOutputStream *stream, gint32 alignment, GError **error)
 {
   auto arrow_stream = garrow_output_stream_get_raw(stream);
   auto status = arrow::ipc::AlignStream(arrow_stream.get(), alignment);
@@ -271,7 +269,6 @@ garrow_output_stream_write_record_batch(GArrowOutputStream *stream,
   }
 }
 
-
 G_DEFINE_TYPE(GArrowFileOutputStream,
               garrow_file_output_stream,
               GARROW_TYPE_OUTPUT_STREAM);
@@ -296,15 +293,12 @@ garrow_file_output_stream_class_init(GArrowFileOutputStreamClass *klass)
  *   %NULL on error.
  */
 GArrowFileOutputStream *
-garrow_file_output_stream_new(const gchar *path,
-                              gboolean append,
-                              GError **error)
+garrow_file_output_stream_new(const gchar *path, gboolean append, GError **error)
 {
   auto arrow_file_output_stream_result =
     arrow::io::FileOutputStream::Open(std::string(path), append);
   if (arrow_file_output_stream_result.ok()) {
-    auto arrow_file_output_stream =
-      arrow_file_output_stream_result.ValueOrDie();
+    auto arrow_file_output_stream = arrow_file_output_stream_result.ValueOrDie();
     return garrow_file_output_stream_new_raw(&arrow_file_output_stream);
   } else {
     std::string context("[io][file-output-stream][open]: <");
@@ -315,7 +309,6 @@ garrow_file_output_stream_new(const gchar *path,
   }
 }
 
-
 G_DEFINE_TYPE(GArrowBufferOutputStream,
               garrow_buffer_output_stream,
               GARROW_TYPE_OUTPUT_STREAM);
@@ -349,29 +342,33 @@ garrow_buffer_output_stream_new(GArrowResizableBuffer *buffer)
 
 G_END_DECLS
 
-
 namespace garrow {
   class GIOOutputStream : public arrow::io::OutputStream {
   public:
-    GIOOutputStream(GOutputStream *output_stream) :
-      output_stream_(output_stream),
-      position_(0) {
+    GIOOutputStream(GOutputStream *output_stream)
+      : output_stream_(output_stream),
+        position_(0)
+    {
       g_object_ref(output_stream_);
     }
 
-    ~GIOOutputStream() {
-      g_object_unref(output_stream_);
-    }
+    ~GIOOutputStream() { g_object_unref(output_stream_); }
 
-    GOutputStream *get_output_stream() {
+    GOutputStream *
+    get_output_stream()
+    {
       return output_stream_;
     }
 
-    bool closed() const override {
+    bool
+    closed() const override
+    {
       return static_cast<bool>(g_output_stream_is_closed(output_stream_));
     }
 
-    arrow::Status Close() override {
+    arrow::Status
+    Close() override
+    {
       GError *error = NULL;
       if (g_output_stream_close(output_stream_, NULL, &error)) {
         return arrow::Status::OK();
@@ -382,7 +379,9 @@ namespace garrow {
       }
     }
 
-    arrow::Result<int64_t> Tell() const override {
+    arrow::Result<int64_t>
+    Tell() const override
+    {
       if (G_IS_SEEKABLE(output_stream_) &&
           g_seekable_can_seek(G_SEEKABLE(output_stream_))) {
         return g_seekable_tell(G_SEEKABLE(output_stream_));
@@ -391,8 +390,9 @@ namespace garrow {
       }
     }
 
-    arrow::Status Write(const void *data,
-                        int64_t n_bytes) override {
+    arrow::Status
+    Write(const void *data, int64_t n_bytes) override
+    {
       GError *error = NULL;
       gsize n_written_bytes;
       auto succeeded = g_output_stream_write_all(output_stream_,
@@ -413,7 +413,9 @@ namespace garrow {
       }
     }
 
-    arrow::Status Flush() override {
+    arrow::Status
+    Flush() override
+    {
       GError *error = NULL;
       auto succeeded = g_output_stream_flush(output_stream_, NULL, &error);
       if (succeeded) {
@@ -429,11 +431,12 @@ namespace garrow {
     GOutputStream *output_stream_;
     int64_t position_;
   };
-};
+}; // namespace garrow
 
 G_BEGIN_DECLS
 
-typedef struct GArrowGIOOutputStreamPrivate_ {
+typedef struct GArrowGIOOutputStreamPrivate_
+{
   GOutputStream *raw;
 } GArrowGIOOutputStreamPrivate;
 
@@ -445,10 +448,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowGIOOutputStream,
                            garrow_gio_output_stream,
                            GARROW_TYPE_OUTPUT_STREAM);
 
-#define GARROW_GIO_OUTPUT_STREAM_GET_PRIVATE(object)    \
-  static_cast<GArrowGIOOutputStreamPrivate *>(          \
-    garrow_gio_output_stream_get_instance_private(      \
-      GARROW_GIO_OUTPUT_STREAM(object)))
+#define GARROW_GIO_OUTPUT_STREAM_GET_PRIVATE(object)                                     \
+  static_cast<GArrowGIOOutputStreamPrivate *>(                                           \
+    garrow_gio_output_stream_get_instance_private(GARROW_GIO_OUTPUT_STREAM(object)))
 
 static void
 garrow_gio_output_stream_dispose(GObject *object)
@@ -509,17 +511,17 @@ garrow_gio_output_stream_class_init(GArrowGIOOutputStreamClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_gio_output_stream_dispose;
+  gobject_class->dispose = garrow_gio_output_stream_dispose;
   gobject_class->set_property = garrow_gio_output_stream_set_property;
   gobject_class->get_property = garrow_gio_output_stream_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("raw",
-                             "Raw",
-                             "The raw GOutputStream *",
-                             G_TYPE_OUTPUT_STREAM,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "raw",
+    "Raw",
+    "The raw GOutputStream *",
+    G_TYPE_OUTPUT_STREAM,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_GIO_RAW, spec);
 }
 
@@ -532,11 +534,12 @@ garrow_gio_output_stream_class_init(GArrowGIOOutputStreamClass *klass)
 GArrowGIOOutputStream *
 garrow_gio_output_stream_new(GOutputStream *gio_output_stream)
 {
-  auto arrow_output_stream =
-    std::make_shared<garrow::GIOOutputStream>(gio_output_stream);
+  auto arrow_output_stream = std::make_shared<garrow::GIOOutputStream>(gio_output_stream);
   auto object = g_object_new(GARROW_TYPE_GIO_OUTPUT_STREAM,
-                             "output-stream", &arrow_output_stream,
-                             "raw", gio_output_stream,
+                             "output-stream",
+                             &arrow_output_stream,
+                             "raw",
+                             gio_output_stream,
                              NULL);
   auto output_stream = GARROW_GIO_OUTPUT_STREAM(object);
   return output_stream;
@@ -559,7 +562,8 @@ garrow_gio_output_stream_get_raw(GArrowGIOOutputStream *output_stream)
   return priv->raw;
 }
 
-typedef struct GArrowCompressedOutputStreamPrivate_ {
+typedef struct GArrowCompressedOutputStreamPrivate_
+{
   GArrowCodec *codec;
   GArrowOutputStream *raw;
 } GArrowCompressedOutputStreamPrivate;
@@ -573,9 +577,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowCompressedOutputStream,
                            garrow_compressed_output_stream,
                            GARROW_TYPE_OUTPUT_STREAM)
 
-#define GARROW_COMPRESSED_OUTPUT_STREAM_GET_PRIVATE(object)     \
-  static_cast<GArrowCompressedOutputStreamPrivate *>(           \
-    garrow_compressed_output_stream_get_instance_private(       \
+#define GARROW_COMPRESSED_OUTPUT_STREAM_GET_PRIVATE(object)                              \
+  static_cast<GArrowCompressedOutputStreamPrivate *>(                                    \
+    garrow_compressed_output_stream_get_instance_private(                                \
       GARROW_COMPRESSED_OUTPUT_STREAM(object)))
 
 static void
@@ -648,25 +652,25 @@ garrow_compressed_output_stream_class_init(GArrowCompressedOutputStreamClass *kl
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_compressed_output_stream_dispose;
+  gobject_class->dispose = garrow_compressed_output_stream_dispose;
   gobject_class->set_property = garrow_compressed_output_stream_set_property;
   gobject_class->get_property = garrow_compressed_output_stream_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("codec",
-                             "Codec",
-                             "The codec for the stream",
-                             GARROW_TYPE_CODEC,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "codec",
+    "Codec",
+    "The codec for the stream",
+    GARROW_TYPE_CODEC,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CODEC, spec);
 
-  spec = g_param_spec_object("raw",
-                             "Raw",
-                             "The underlying raw output stream",
-                             GARROW_TYPE_OUTPUT_STREAM,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "raw",
+    "Raw",
+    "The underlying raw output stream",
+    GARROW_TYPE_OUTPUT_STREAM,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_RAW, spec);
 }
 
@@ -687,8 +691,7 @@ garrow_compressed_output_stream_new(GArrowCodec *codec,
 {
   auto arrow_codec = garrow_codec_get_raw(codec).get();
   auto arrow_raw = garrow_output_stream_get_raw(raw);
-  auto arrow_stream = arrow::io::CompressedOutputStream::Make(arrow_codec,
-                                                              arrow_raw);
+  auto arrow_stream = arrow::io::CompressedOutputStream::Make(arrow_codec, arrow_raw);
   if (garrow::check(error, arrow_stream, "[compressed-output-stream][new]")) {
     return garrow_compressed_output_stream_new_raw(&(arrow_stream.ValueOrDie()),
                                                    codec,
@@ -700,14 +703,12 @@ garrow_compressed_output_stream_new(GArrowCodec *codec,
 
 G_END_DECLS
 
-
 GArrowOutputStream *
-garrow_output_stream_new_raw(std::shared_ptr<arrow::io::OutputStream> *arrow_output_stream)
+garrow_output_stream_new_raw(
+  std::shared_ptr<arrow::io::OutputStream> *arrow_output_stream)
 {
-  auto output_stream =
-    GARROW_OUTPUT_STREAM(g_object_new(GARROW_TYPE_OUTPUT_STREAM,
-                                      "output-stream", arrow_output_stream,
-                                      NULL));
+  auto output_stream = GARROW_OUTPUT_STREAM(
+    g_object_new(GARROW_TYPE_OUTPUT_STREAM, "output-stream", arrow_output_stream, NULL));
   return output_stream;
 }
 
@@ -718,43 +719,50 @@ garrow_output_stream_get_raw(GArrowOutputStream *output_stream)
   return priv->output_stream;
 }
 
-
 GArrowFileOutputStream *
-garrow_file_output_stream_new_raw(std::shared_ptr<arrow::io::FileOutputStream> *arrow_file_output_stream)
+garrow_file_output_stream_new_raw(
+  std::shared_ptr<arrow::io::FileOutputStream> *arrow_file_output_stream)
 {
   auto file_output_stream =
     GARROW_FILE_OUTPUT_STREAM(g_object_new(GARROW_TYPE_FILE_OUTPUT_STREAM,
-                                           "output-stream", arrow_file_output_stream,
+                                           "output-stream",
+                                           arrow_file_output_stream,
                                            NULL));
   return file_output_stream;
 }
 
 GArrowBufferOutputStream *
-garrow_buffer_output_stream_new_raw(std::shared_ptr<arrow::io::BufferOutputStream> *arrow_buffer_output_stream)
+garrow_buffer_output_stream_new_raw(
+  std::shared_ptr<arrow::io::BufferOutputStream> *arrow_buffer_output_stream)
 {
   auto buffer_output_stream =
     GARROW_BUFFER_OUTPUT_STREAM(g_object_new(GARROW_TYPE_BUFFER_OUTPUT_STREAM,
-                                             "output-stream", arrow_buffer_output_stream,
+                                             "output-stream",
+                                             arrow_buffer_output_stream,
                                              NULL));
   return buffer_output_stream;
 }
 
 GArrowCompressedOutputStream *
-garrow_compressed_output_stream_new_raw(std::shared_ptr<arrow::io::CompressedOutputStream> *arrow_raw,
-                                        GArrowCodec *codec,
-                                        GArrowOutputStream *raw)
-{
-  auto compressed_output_stream =
-    g_object_new(GARROW_TYPE_COMPRESSED_OUTPUT_STREAM,
-                 "output-stream", arrow_raw,
-                 "codec", codec,
-                 "raw", raw,
-                 NULL);
+garrow_compressed_output_stream_new_raw(
+  std::shared_ptr<arrow::io::CompressedOutputStream> *arrow_raw,
+  GArrowCodec *codec,
+  GArrowOutputStream *raw)
+{
+  auto compressed_output_stream = g_object_new(GARROW_TYPE_COMPRESSED_OUTPUT_STREAM,
+                                               "output-stream",
+                                               arrow_raw,
+                                               "codec",
+                                               codec,
+                                               "raw",
+                                               raw,
+                                               NULL);
   return GARROW_COMPRESSED_OUTPUT_STREAM(compressed_output_stream);
 }
 
 std::shared_ptr<arrow::io::OutputStream>
-garrow_compressed_output_stream_get_raw(GArrowCompressedOutputStream *compressed_output_stream)
+garrow_compressed_output_stream_get_raw(
+  GArrowCompressedOutputStream *compressed_output_stream)
 {
   auto output_stream = GARROW_OUTPUT_STREAM(compressed_output_stream);
   auto arrow_output_stream = garrow_output_stream_get_raw(output_stream);
diff --git a/c_glib/arrow-glib/output-stream.h b/c_glib/arrow-glib/output-stream.h
index eeef24891792b..cf876897fa63a 100644
--- a/c_glib/arrow-glib/output-stream.h
+++ b/c_glib/arrow-glib/output-stream.h
@@ -30,22 +30,19 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_OUTPUT_STREAM (garrow_output_stream_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowOutputStream,
-                         garrow_output_stream,
-                         GARROW,
-                         OUTPUT_STREAM,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowOutputStream, garrow_output_stream, GARROW, OUTPUT_STREAM, GObject)
 struct _GArrowOutputStreamClass
 {
   GObjectClass parent_class;
 };
 
-gboolean garrow_output_stream_align(GArrowOutputStream *stream,
-                                    gint32 alignment,
-                                    GError **error);
-gint64 garrow_output_stream_write_tensor(GArrowOutputStream *stream,
-                                         GArrowTensor *tensor,
-                                         GError **error);
+gboolean
+garrow_output_stream_align(GArrowOutputStream *stream, gint32 alignment, GError **error);
+gint64
+garrow_output_stream_write_tensor(GArrowOutputStream *stream,
+                                  GArrowTensor *tensor,
+                                  GError **error);
 GARROW_AVAILABLE_IN_1_0
 gint64
 garrow_output_stream_write_record_batch(GArrowOutputStream *stream,
@@ -53,31 +50,27 @@ garrow_output_stream_write_record_batch(GArrowOutputStream *stream,
                                         GArrowWriteOptions *options,
                                         GError **error);
 
-
-#define GARROW_TYPE_FILE_OUTPUT_STREAM          \
-  (garrow_file_output_stream_get_type())
-#define GARROW_FILE_OUTPUT_STREAM(obj)                          \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                            \
-                              GARROW_TYPE_FILE_OUTPUT_STREAM,   \
+#define GARROW_TYPE_FILE_OUTPUT_STREAM (garrow_file_output_stream_get_type())
+#define GARROW_FILE_OUTPUT_STREAM(obj)                                                   \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
+                              GARROW_TYPE_FILE_OUTPUT_STREAM,                            \
                               GArrowFileOutputStream))
-#define GARROW_FILE_OUTPUT_STREAM_CLASS(klass)                  \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                             \
-                           GARROW_TYPE_FILE_OUTPUT_STREAM,      \
+#define GARROW_FILE_OUTPUT_STREAM_CLASS(klass)                                           \
+  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
+                           GARROW_TYPE_FILE_OUTPUT_STREAM,                               \
                            GArrowFileOutputStreamClass))
-#define GARROW_IS_FILE_OUTPUT_STREAM(obj)                       \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj),                            \
-                              GARROW_TYPE_FILE_OUTPUT_STREAM))
-#define GARROW_IS_FILE_OUTPUT_STREAM_CLASS(klass)               \
-  (G_TYPE_CHECK_CLASS_TYPE((klass),                             \
-                           GARROW_TYPE_FILE_OUTPUT_STREAM))
-#define GARROW_FILE_OUTPUT_STREAM_GET_CLASS(obj)                \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                             \
-                             GARROW_TYPE_FILE_OUTPUT_STREAM,    \
+#define GARROW_IS_FILE_OUTPUT_STREAM(obj)                                                \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_FILE_OUTPUT_STREAM))
+#define GARROW_IS_FILE_OUTPUT_STREAM_CLASS(klass)                                        \
+  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_FILE_OUTPUT_STREAM))
+#define GARROW_FILE_OUTPUT_STREAM_GET_CLASS(obj)                                         \
+  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
+                             GARROW_TYPE_FILE_OUTPUT_STREAM,                             \
                              GArrowFileOutputStreamClass))
 
-typedef struct _GArrowFileOutputStream         GArrowFileOutputStream;
+typedef struct _GArrowFileOutputStream GArrowFileOutputStream;
 #ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowFileOutputStreamClass    GArrowFileOutputStreamClass;
+typedef struct _GArrowFileOutputStreamClass GArrowFileOutputStreamClass;
 #endif
 
 /**
@@ -98,37 +91,33 @@ struct _GArrowFileOutputStreamClass
 };
 #endif
 
-GType garrow_file_output_stream_get_type(void) G_GNUC_CONST;
-
-GArrowFileOutputStream *garrow_file_output_stream_new(const gchar *path,
-                                                      gboolean append,
-                                                      GError **error);
+GType
+garrow_file_output_stream_get_type(void) G_GNUC_CONST;
 
+GArrowFileOutputStream *
+garrow_file_output_stream_new(const gchar *path, gboolean append, GError **error);
 
-#define GARROW_TYPE_BUFFER_OUTPUT_STREAM        \
-  (garrow_buffer_output_stream_get_type())
-#define GARROW_BUFFER_OUTPUT_STREAM(obj)                        \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                            \
-                              GARROW_TYPE_BUFFER_OUTPUT_STREAM, \
+#define GARROW_TYPE_BUFFER_OUTPUT_STREAM (garrow_buffer_output_stream_get_type())
+#define GARROW_BUFFER_OUTPUT_STREAM(obj)                                                 \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
+                              GARROW_TYPE_BUFFER_OUTPUT_STREAM,                          \
                               GArrowBufferOutputStream))
-#define GARROW_BUFFER_OUTPUT_STREAM_CLASS(klass)                \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                             \
-                           GARROW_TYPE_BUFFER_OUTPUT_STREAM,    \
+#define GARROW_BUFFER_OUTPUT_STREAM_CLASS(klass)                                         \
+  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
+                           GARROW_TYPE_BUFFER_OUTPUT_STREAM,                             \
                            GArrowBufferOutputStreamClass))
-#define GARROW_IS_BUFFER_OUTPUT_STREAM(obj)                             \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj),                                    \
-                              GARROW_TYPE_BUFFER_OUTPUT_STREAM))
-#define GARROW_IS_BUFFER_OUTPUT_STREAM_CLASS(klass)             \
-  (G_TYPE_CHECK_CLASS_TYPE((klass),                             \
-                           GARROW_TYPE_BUFFER_OUTPUT_STREAM))
-#define GARROW_BUFFER_OUTPUT_STREAM_GET_CLASS(obj)              \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                             \
-                             GARROW_TYPE_BUFFER_OUTPUT_STREAM,  \
+#define GARROW_IS_BUFFER_OUTPUT_STREAM(obj)                                              \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_BUFFER_OUTPUT_STREAM))
+#define GARROW_IS_BUFFER_OUTPUT_STREAM_CLASS(klass)                                      \
+  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_BUFFER_OUTPUT_STREAM))
+#define GARROW_BUFFER_OUTPUT_STREAM_GET_CLASS(obj)                                       \
+  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
+                             GARROW_TYPE_BUFFER_OUTPUT_STREAM,                           \
                              GArrowBufferOutputStreamClass))
 
-typedef struct _GArrowBufferOutputStream         GArrowBufferOutputStream;
+typedef struct _GArrowBufferOutputStream GArrowBufferOutputStream;
 #ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowBufferOutputStreamClass    GArrowBufferOutputStreamClass;
+typedef struct _GArrowBufferOutputStreamClass GArrowBufferOutputStreamClass;
 #endif
 
 /**
@@ -149,35 +138,33 @@ struct _GArrowBufferOutputStreamClass
 };
 #endif
 
-GType garrow_buffer_output_stream_get_type(void) G_GNUC_CONST;
-
-GArrowBufferOutputStream *garrow_buffer_output_stream_new(GArrowResizableBuffer *buffer);
+GType
+garrow_buffer_output_stream_get_type(void) G_GNUC_CONST;
 
+GArrowBufferOutputStream *
+garrow_buffer_output_stream_new(GArrowResizableBuffer *buffer);
 
-#define GARROW_TYPE_GIO_OUTPUT_STREAM           \
-  (garrow_gio_output_stream_get_type())
-#define GARROW_GIO_OUTPUT_STREAM(obj)                           \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                            \
-                              GARROW_TYPE_GIO_OUTPUT_STREAM,    \
+#define GARROW_TYPE_GIO_OUTPUT_STREAM (garrow_gio_output_stream_get_type())
+#define GARROW_GIO_OUTPUT_STREAM(obj)                                                    \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
+                              GARROW_TYPE_GIO_OUTPUT_STREAM,                             \
                               GArrowGIOOutputStream))
-#define GARROW_GIO_OUTPUT_STREAM_CLASS(klass)                   \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                             \
-                           GARROW_TYPE_GIO_OUTPUT_STREAM,       \
+#define GARROW_GIO_OUTPUT_STREAM_CLASS(klass)                                            \
+  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
+                           GARROW_TYPE_GIO_OUTPUT_STREAM,                                \
                            GArrowGIOOutputStreamClass))
-#define GARROW_IS_GIO_OUTPUT_STREAM(obj)                        \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj),                            \
-                              GARROW_TYPE_GIO_OUTPUT_STREAM))
-#define GARROW_IS_GIO_OUTPUT_STREAM_CLASS(klass)                \
-  (G_TYPE_CHECK_CLASS_TYPE((klass),                             \
-                           GARROW_TYPE_GIO_OUTPUT_STREAM))
-#define GARROW_GIO_OUTPUT_STREAM_GET_CLASS(obj)                 \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                             \
-                             GARROW_TYPE_GIO_OUTPUT_STREAM,     \
+#define GARROW_IS_GIO_OUTPUT_STREAM(obj)                                                 \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_GIO_OUTPUT_STREAM))
+#define GARROW_IS_GIO_OUTPUT_STREAM_CLASS(klass)                                         \
+  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_GIO_OUTPUT_STREAM))
+#define GARROW_GIO_OUTPUT_STREAM_GET_CLASS(obj)                                          \
+  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
+                             GARROW_TYPE_GIO_OUTPUT_STREAM,                              \
                              GArrowGIOOutputStreamClass))
 
-typedef struct _GArrowGIOOutputStream         GArrowGIOOutputStream;
+typedef struct _GArrowGIOOutputStream GArrowGIOOutputStream;
 #ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowGIOOutputStreamClass    GArrowGIOOutputStreamClass;
+typedef struct _GArrowGIOOutputStreamClass GArrowGIOOutputStreamClass;
 #endif
 
 /**
@@ -198,17 +185,18 @@ struct _GArrowGIOOutputStreamClass
 };
 #endif
 
-GType garrow_gio_output_stream_get_type(void) G_GNUC_CONST;
+GType
+garrow_gio_output_stream_get_type(void) G_GNUC_CONST;
 
-GArrowGIOOutputStream *garrow_gio_output_stream_new(GOutputStream *gio_output_stream);
+GArrowGIOOutputStream *
+garrow_gio_output_stream_new(GOutputStream *gio_output_stream);
 #ifndef GARROW_DISABLE_DEPRECATED
 G_GNUC_DEPRECATED
 GOutputStream *
 garrow_gio_output_stream_get_raw(GArrowGIOOutputStream *output_stream);
 #endif
 
-#define GARROW_TYPE_COMPRESSED_OUTPUT_STREAM    \
-  (garrow_compressed_output_stream_get_type())
+#define GARROW_TYPE_COMPRESSED_OUTPUT_STREAM (garrow_compressed_output_stream_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowCompressedOutputStream,
                          garrow_compressed_output_stream,
                          GARROW,
diff --git a/c_glib/arrow-glib/output-stream.hpp b/c_glib/arrow-glib/output-stream.hpp
index b39b3bdfcfb78..e41c65da88d82 100644
--- a/c_glib/arrow-glib/output-stream.hpp
+++ b/c_glib/arrow-glib/output-stream.hpp
@@ -25,16 +25,23 @@
 
 #include <arrow-glib/output-stream.h>
 
-GArrowOutputStream *garrow_output_stream_new_raw(std::shared_ptr<arrow::io::OutputStream> *arrow_output_stream);
-std::shared_ptr<arrow::io::OutputStream> garrow_output_stream_get_raw(GArrowOutputStream *output_stream);
-
+GArrowOutputStream *
+garrow_output_stream_new_raw(
+  std::shared_ptr<arrow::io::OutputStream> *arrow_output_stream);
+std::shared_ptr<arrow::io::OutputStream>
+garrow_output_stream_get_raw(GArrowOutputStream *output_stream);
 
-GArrowFileOutputStream *garrow_file_output_stream_new_raw(std::shared_ptr<arrow::io::FileOutputStream> *arrow_file_output_stream);
-GArrowBufferOutputStream *garrow_buffer_output_stream_new_raw(std::shared_ptr<arrow::io::BufferOutputStream> *arrow_buffer_output_stream);
+GArrowFileOutputStream *
+garrow_file_output_stream_new_raw(
+  std::shared_ptr<arrow::io::FileOutputStream> *arrow_file_output_stream);
+GArrowBufferOutputStream *
+garrow_buffer_output_stream_new_raw(
+  std::shared_ptr<arrow::io::BufferOutputStream> *arrow_buffer_output_stream);
 
 GArrowCompressedOutputStream *
-garrow_compressed_output_stream_new_raw(std::shared_ptr<arrow::io::CompressedOutputStream> *arrow_raw,
-                                        GArrowCodec *codec,
-                                        GArrowOutputStream *raw);
+garrow_compressed_output_stream_new_raw(
+  std::shared_ptr<arrow::io::CompressedOutputStream> *arrow_raw,
+  GArrowCodec *codec,
+  GArrowOutputStream *raw);
 std::shared_ptr<arrow::io::OutputStream>
 garrow_compressed_output_stream_get_raw(GArrowCompressedOutputStream *stream);
diff --git a/c_glib/arrow-glib/readable.cpp b/c_glib/arrow-glib/readable.cpp
index fbe5270c7491f..e7bd603f25fea 100644
--- a/c_glib/arrow-glib/readable.cpp
+++ b/c_glib/arrow-glib/readable.cpp
@@ -34,12 +34,10 @@ G_BEGIN_DECLS
  * readable.
  */
 
-G_DEFINE_INTERFACE(GArrowReadable,
-                   garrow_readable,
-                   G_TYPE_OBJECT)
+G_DEFINE_INTERFACE(GArrowReadable, garrow_readable, G_TYPE_OBJECT)
 
 static void
-garrow_readable_default_init (GArrowReadableInterface *iface)
+garrow_readable_default_init(GArrowReadableInterface *iface)
 {
   iface->buffer_new_raw = garrow_buffer_new_raw;
 }
@@ -54,9 +52,7 @@ garrow_readable_default_init (GArrowReadableInterface *iface)
  *   data on success, %NULL if there was an error.
  */
 GArrowBuffer *
-garrow_readable_read(GArrowReadable *readable,
-                     gint64 n_bytes,
-                     GError **error)
+garrow_readable_read(GArrowReadable *readable, gint64 n_bytes, GError **error)
 {
   const auto arrow_readable = garrow_readable_get_raw(readable);
 
@@ -81,9 +77,7 @@ garrow_readable_read(GArrowReadable *readable,
  * Since: 0.17.0
  */
 GBytes *
-garrow_readable_read_bytes(GArrowReadable *readable,
-                           gint64 n_bytes,
-                           GError **error)
+garrow_readable_read_bytes(GArrowReadable *readable, gint64 n_bytes, GError **error)
 {
   const auto arrow_readable = garrow_readable_get_raw(readable);
 
@@ -92,16 +86,14 @@ garrow_readable_read_bytes(GArrowReadable *readable,
     return NULL;
   }
   auto arrow_cpu_buffer_result =
-    arrow::Buffer::ViewOrCopy(*arrow_buffer_result,
-                              arrow::default_cpu_memory_manager());
+    arrow::Buffer::ViewOrCopy(*arrow_buffer_result, arrow::default_cpu_memory_manager());
   if (!garrow::check(error,
                      arrow_cpu_buffer_result,
                      "[readable][read-bytes][view-or-copy]")) {
     return NULL;
   }
   auto arrow_cpu_buffer = *arrow_cpu_buffer_result;
-  return g_bytes_new(arrow_cpu_buffer->data(),
-                     arrow_cpu_buffer->size());
+  return g_bytes_new(arrow_cpu_buffer->data(), arrow_cpu_buffer->size());
 }
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/readable.h b/c_glib/arrow-glib/readable.h
index bb70f4b54ead4..0481bde7ab074 100644
--- a/c_glib/arrow-glib/readable.h
+++ b/c_glib/arrow-glib/readable.h
@@ -26,18 +26,12 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_READABLE (garrow_readable_get_type())
-G_DECLARE_INTERFACE(GArrowReadable,
-                    garrow_readable,
-                    GARROW,
-                    READABLE,
-                    GObject)
+G_DECLARE_INTERFACE(GArrowReadable, garrow_readable, GARROW, READABLE, GObject)
 
-GArrowBuffer *garrow_readable_read(GArrowReadable *readable,
-                                   gint64 n_bytes,
-                                   GError **error);
+GArrowBuffer *
+garrow_readable_read(GArrowReadable *readable, gint64 n_bytes, GError **error);
 GARROW_AVAILABLE_IN_0_17
-GBytes *garrow_readable_read_bytes(GArrowReadable *readable,
-                                   gint64 n_bytes,
-                                   GError **error);
+GBytes *
+garrow_readable_read_bytes(GArrowReadable *readable, gint64 n_bytes, GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/readable.hpp b/c_glib/arrow-glib/readable.hpp
index b002de40d1ae9..0439dee624aed 100644
--- a/c_glib/arrow-glib/readable.hpp
+++ b/c_glib/arrow-glib/readable.hpp
@@ -36,4 +36,5 @@ struct _GArrowReadableInterface
   std::shared_ptr<arrow::io::Readable> (*get_raw)(GArrowReadable *file);
 };
 
-std::shared_ptr<arrow::io::Readable> garrow_readable_get_raw(GArrowReadable *readable);
+std::shared_ptr<arrow::io::Readable>
+garrow_readable_get_raw(GArrowReadable *readable);
diff --git a/c_glib/arrow-glib/reader.cpp b/c_glib/arrow-glib/reader.cpp
index 997b865a64515..4bde9ee32d5c6 100644
--- a/c_glib/arrow-glib/reader.cpp
+++ b/c_glib/arrow-glib/reader.cpp
@@ -59,7 +59,8 @@ G_BEGIN_DECLS
  * input.
  */
 
-struct GArrowRecordBatchReaderPrivate {
+struct GArrowRecordBatchReaderPrivate
+{
   std::shared_ptr<arrow::ipc::RecordBatchReader> record_batch_reader;
   GList *sources;
 };
@@ -73,10 +74,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchReader,
                            garrow_record_batch_reader,
                            G_TYPE_OBJECT);
 
-#define GARROW_RECORD_BATCH_READER_GET_PRIVATE(obj)         \
-  static_cast<GArrowRecordBatchReaderPrivate *>(            \
-     garrow_record_batch_reader_get_instance_private(       \
-       GARROW_RECORD_BATCH_READER(obj)))
+#define GARROW_RECORD_BATCH_READER_GET_PRIVATE(obj)                                      \
+  static_cast<GArrowRecordBatchReaderPrivate *>(                                         \
+    garrow_record_batch_reader_get_instance_private(GARROW_RECORD_BATCH_READER(obj)))
 
 static void
 garrow_record_batch_reader_finalize(GObject *object)
@@ -110,7 +110,8 @@ garrow_record_batch_reader_set_property(GObject *object,
   switch (prop_id) {
   case PROP_RECORD_BATCH_READER:
     priv->record_batch_reader =
-      *static_cast<std::shared_ptr<arrow::ipc::RecordBatchReader> *>(g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<arrow::ipc::RecordBatchReader> *>(
+        g_value_get_pointer(value));
     break;
   case PROP_SOURCES:
     priv->sources = g_list_copy_deep(static_cast<GList *>(g_value_get_pointer(value)),
@@ -127,30 +128,30 @@ static void
 garrow_record_batch_reader_init(GArrowRecordBatchReader *object)
 {
   auto priv = GARROW_RECORD_BATCH_READER_GET_PRIVATE(object);
-  new(&priv->record_batch_reader) std::shared_ptr<arrow::ipc::RecordBatchReader>;
+  new (&priv->record_batch_reader) std::shared_ptr<arrow::ipc::RecordBatchReader>;
 }
 
 static void
 garrow_record_batch_reader_class_init(GArrowRecordBatchReaderClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->finalize     = garrow_record_batch_reader_finalize;
-  gobject_class->finalize     = garrow_record_batch_reader_dispose;
+  gobject_class->finalize = garrow_record_batch_reader_finalize;
+  gobject_class->finalize = garrow_record_batch_reader_dispose;
   gobject_class->set_property = garrow_record_batch_reader_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("record-batch-reader",
-                              "arrow::ipc::RecordBatchReader",
-                              "The raw std::shared<arrow::ipc::RecordBatchRecordBatchReader> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "record-batch-reader",
+    "arrow::ipc::RecordBatchReader",
+    "The raw std::shared<arrow::ipc::RecordBatchRecordBatchReader> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_READER, spec);
 
-  spec = g_param_spec_pointer("sources",
-                              "Sources",
-                              "The sources of this reader",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "sources",
+    "Sources",
+    "The sources of this reader",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_SOURCES, spec);
 }
 
@@ -171,11 +172,8 @@ GArrowRecordBatchReader *
 garrow_record_batch_reader_import(gpointer c_abi_array_stream, GError **error)
 {
   auto arrow_reader_result =
-    arrow::ImportRecordBatchReader(
-      static_cast<ArrowArrayStream *>(c_abi_array_stream));
-  if (garrow::check(error,
-                    arrow_reader_result,
-                    "[record-batch-reader][import]")) {
+    arrow::ImportRecordBatchReader(static_cast<ArrowArrayStream *>(c_abi_array_stream));
+  if (garrow::check(error, arrow_reader_result, "[record-batch-reader][import]")) {
     return garrow_record_batch_reader_new_raw(&(*arrow_reader_result), nullptr);
   } else {
     return NULL;
@@ -209,11 +207,8 @@ garrow_record_batch_reader_new(GList *record_batches,
   }
   auto arrow_reader_result =
     arrow::RecordBatchReader::Make(arrow_record_batches, arrow_schema);
-  if (garrow::check(error,
-                    arrow_reader_result,
-                    "[record-batch-stream-reader][new]")) {
-    return garrow_record_batch_reader_new_raw(&*arrow_reader_result,
-                                              record_batches);
+  if (garrow::check(error, arrow_reader_result, "[record-batch-stream-reader][new]")) {
+    return garrow_record_batch_reader_new_raw(&*arrow_reader_result, record_batches);
   } else {
     return NULL;
   }
@@ -234,13 +229,11 @@ garrow_record_batch_reader_new(GList *record_batches,
  * Since: 6.0.0
  */
 gpointer
-garrow_record_batch_reader_export(GArrowRecordBatchReader *reader,
-                                  GError **error)
+garrow_record_batch_reader_export(GArrowRecordBatchReader *reader, GError **error)
 {
   auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
   auto c_abi_array_stream = g_new(ArrowArrayStream, 1);
-  auto status = arrow::ExportRecordBatchReader(arrow_reader,
-                                               c_abi_array_stream);
+  auto status = arrow::ExportRecordBatchReader(arrow_reader, c_abi_array_stream);
   if (garrow::check(error, status, "[record-batch-reader][export]")) {
     return c_abi_array_stream;
   } else {
@@ -316,16 +309,13 @@ garrow_record_batch_reader_read_next_record_batch(GArrowRecordBatchReader *reade
  * Since: 0.8.0
  */
 GArrowRecordBatch *
-garrow_record_batch_reader_read_next(GArrowRecordBatchReader *reader,
-                                     GError **error)
+garrow_record_batch_reader_read_next(GArrowRecordBatchReader *reader, GError **error)
 {
   auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
   std::shared_ptr<arrow::RecordBatch> arrow_record_batch;
   auto status = arrow_reader->ReadNext(&arrow_record_batch);
 
-  if (garrow_error_check(error,
-                         status,
-                         "[record-batch-reader][read-next]")) {
+  if (garrow_error_check(error, status, "[record-batch-reader][read-next]")) {
     if (arrow_record_batch == nullptr) {
       return NULL;
     } else {
@@ -347,16 +337,13 @@ garrow_record_batch_reader_read_next(GArrowRecordBatchReader *reader,
  * Since: 6.0.0
  */
 GArrowTable *
-garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader,
-                                    GError **error)
+garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader, GError **error)
 {
   auto arrow_reader = garrow_record_batch_reader_get_raw(reader);
   std::shared_ptr<arrow::Table> arrow_table;
   auto status = arrow_reader->ToTable().Value(&arrow_table);
 
-  if (garrow::check(error,
-                    status,
-                    "[record-batch-reader][read-all]")) {
+  if (garrow::check(error, status, "[record-batch-reader][read-all]")) {
     return garrow_table_new_raw(&arrow_table);
   } else {
     return NULL;
@@ -379,7 +366,6 @@ garrow_record_batch_reader_get_sources(GArrowRecordBatchReader *reader)
   return priv->sources;
 }
 
-
 G_DEFINE_TYPE(GArrowTableBatchReader,
               garrow_table_batch_reader,
               GARROW_TYPE_RECORD_BATCH_READER);
@@ -406,8 +392,7 @@ GArrowTableBatchReader *
 garrow_table_batch_reader_new(GArrowTable *table)
 {
   auto arrow_table = garrow_table_get_raw(table);
-  auto arrow_table_batch_reader =
-    std::make_shared<arrow::TableBatchReader>(*arrow_table);
+  auto arrow_table_batch_reader = std::make_shared<arrow::TableBatchReader>(*arrow_table);
   return garrow_table_batch_reader_new_raw(&arrow_table_batch_reader, table);
 }
 
@@ -431,7 +416,6 @@ garrow_table_batch_reader_set_max_chunk_size(GArrowTableBatchReader *reader,
   arrow_reader->set_chunksize(max_chunk_size);
 }
 
-
 G_DEFINE_TYPE(GArrowRecordBatchStreamReader,
               garrow_record_batch_stream_reader,
               GARROW_TYPE_RECORD_BATCH_READER);
@@ -457,8 +441,7 @@ garrow_record_batch_stream_reader_class_init(GArrowRecordBatchStreamReaderClass
  * Since: 0.4.0
  */
 GArrowRecordBatchStreamReader *
-garrow_record_batch_stream_reader_new(GArrowInputStream *stream,
-                                      GError **error)
+garrow_record_batch_stream_reader_new(GArrowInputStream *stream, GError **error)
 {
   using ReaderType = arrow::ipc::RecordBatchStreamReader;
 
@@ -472,8 +455,8 @@ garrow_record_batch_stream_reader_new(GArrowInputStream *stream,
   }
 }
 
-
-typedef struct GArrowRecordBatchFileReaderPrivate_ {
+typedef struct GArrowRecordBatchFileReaderPrivate_
+{
   std::shared_ptr<arrow::ipc::RecordBatchFileReader> record_batch_file_reader;
 } GArrowRecordBatchFileReaderPrivate;
 
@@ -486,10 +469,10 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchFileReader,
                            garrow_record_batch_file_reader,
                            G_TYPE_OBJECT);
 
-#define GARROW_RECORD_BATCH_FILE_READER_GET_PRIVATE(obj)        \
-  static_cast<GArrowRecordBatchFileReaderPrivate *>(            \
-     garrow_record_batch_file_reader_get_instance_private(      \
-       GARROW_RECORD_BATCH_FILE_READER(obj)))
+#define GARROW_RECORD_BATCH_FILE_READER_GET_PRIVATE(obj)                                 \
+  static_cast<GArrowRecordBatchFileReaderPrivate *>(                                     \
+    garrow_record_batch_file_reader_get_instance_private(                                \
+      GARROW_RECORD_BATCH_FILE_READER(obj)))
 
 static void
 garrow_record_batch_file_reader_finalize(GObject *object)
@@ -512,7 +495,8 @@ garrow_record_batch_file_reader_set_property(GObject *object,
   switch (prop_id) {
   case PROP_RECORD_BATCH_FILE_READER:
     priv->record_batch_file_reader =
-      *static_cast<std::shared_ptr<arrow::ipc::RecordBatchFileReader> *>(g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<arrow::ipc::RecordBatchFileReader> *>(
+        g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -537,7 +521,7 @@ static void
 garrow_record_batch_file_reader_init(GArrowRecordBatchFileReader *object)
 {
   auto priv = GARROW_RECORD_BATCH_FILE_READER_GET_PRIVATE(object);
-  new(&priv->record_batch_file_reader)
+  new (&priv->record_batch_file_reader)
     std::shared_ptr<arrow::ipc::RecordBatchFileReader>;
 }
 
@@ -549,19 +533,18 @@ garrow_record_batch_file_reader_class_init(GArrowRecordBatchFileReaderClass *kla
 
   gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_record_batch_file_reader_finalize;
+  gobject_class->finalize = garrow_record_batch_file_reader_finalize;
   gobject_class->set_property = garrow_record_batch_file_reader_set_property;
   gobject_class->get_property = garrow_record_batch_file_reader_get_property;
 
-  spec = g_param_spec_pointer("record-batch-file-reader",
-                              "arrow::ipc::RecordBatchFileReader",
-                              "The raw std::shared<arrow::ipc::RecordBatchFileReader> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "record-batch-file-reader",
+    "arrow::ipc::RecordBatchFileReader",
+    "The raw std::shared<arrow::ipc::RecordBatchFileReader> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_FILE_READER, spec);
 }
 
-
 /**
  * garrow_record_batch_file_reader_new:
  * @file: The file to be read.
@@ -573,8 +556,7 @@ garrow_record_batch_file_reader_class_init(GArrowRecordBatchFileReaderClass *kla
  * Since: 0.4.0
  */
 GArrowRecordBatchFileReader *
-garrow_record_batch_file_reader_new(GArrowSeekableInputStream *file,
-                                    GError **error)
+garrow_record_batch_file_reader_new(GArrowSeekableInputStream *file, GError **error)
 {
   using ReaderType = arrow::ipc::RecordBatchFileReader;
 
@@ -676,7 +658,8 @@ garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *r
   auto arrow_reader = garrow_record_batch_file_reader_get_raw(reader);
   auto arrow_record_batch = arrow_reader->ReadRecordBatch(i);
 
-  if (garrow::check(error, arrow_record_batch,
+  if (garrow::check(error,
+                    arrow_record_batch,
                     "[record-batch-file-reader][read-record-batch]")) {
     return garrow_record_batch_new_raw(&(*arrow_record_batch));
   } else {
@@ -684,8 +667,8 @@ garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *r
   }
 }
 
-
-typedef struct GArrowFeatherFileReaderPrivate_ {
+typedef struct GArrowFeatherFileReaderPrivate_
+{
   std::shared_ptr<arrow::ipc::feather::Reader> feather_reader;
 } GArrowFeatherFileReaderPrivate;
 
@@ -697,10 +680,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowFeatherFileReader,
                            garrow_feather_file_reader,
                            G_TYPE_OBJECT);
 
-#define GARROW_FEATHER_FILE_READER_GET_PRIVATE(obj)             \
-  static_cast<GArrowFeatherFileReaderPrivate *>(                \
-    garrow_feather_file_reader_get_instance_private(            \
-      GARROW_FEATHER_FILE_READER(obj)))
+#define GARROW_FEATHER_FILE_READER_GET_PRIVATE(obj)                                      \
+  static_cast<GArrowFeatherFileReaderPrivate *>(                                         \
+    garrow_feather_file_reader_get_instance_private(GARROW_FEATHER_FILE_READER(obj)))
 
 static void
 garrow_feather_file_reader_finalize(GObject *object)
@@ -722,8 +704,8 @@ garrow_feather_file_reader_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_FEATHER_READER:
-    priv->feather_reader =
-      *static_cast<std::shared_ptr<arrow::ipc::feather::Reader> *>(g_value_get_pointer(value));
+    priv->feather_reader = *static_cast<std::shared_ptr<arrow::ipc::feather::Reader> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -754,20 +736,19 @@ garrow_feather_file_reader_class_init(GArrowFeatherFileReaderClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_feather_file_reader_finalize;
+  gobject_class->finalize = garrow_feather_file_reader_finalize;
   gobject_class->set_property = garrow_feather_file_reader_set_property;
   gobject_class->get_property = garrow_feather_file_reader_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("feather-reader",
-                              "arrow::ipc::feather::Reader",
-                              "The raw std::shared<arrow::ipc::feather::Reader> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "feather-reader",
+    "arrow::ipc::feather::Reader",
+    "The raw std::shared<arrow::ipc::feather::Reader> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FEATHER_READER, spec);
 }
 
-
 /**
  * garrow_feather_file_reader_new:
  * @file: The file to be read.
@@ -779,8 +760,7 @@ garrow_feather_file_reader_class_init(GArrowFeatherFileReaderClass *klass)
  * Since: 0.4.0
  */
 GArrowFeatherFileReader *
-garrow_feather_file_reader_new(GArrowSeekableInputStream *file,
-                               GError **error)
+garrow_feather_file_reader_new(GArrowSeekableInputStream *file, GError **error)
 {
   auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(file);
   auto reader = arrow::ipc::feather::Reader::Open(arrow_random_access_file);
@@ -816,8 +796,7 @@ garrow_feather_file_reader_get_version(GArrowFeatherFileReader *reader)
  * Since: 0.12.0
  */
 GArrowTable *
-garrow_feather_file_reader_read(GArrowFeatherFileReader *reader,
-                                GError **error)
+garrow_feather_file_reader_read(GArrowFeatherFileReader *reader, GError **error)
 {
   auto arrow_reader = garrow_feather_file_reader_get_raw(reader);
   std::shared_ptr<arrow::Table> arrow_table;
@@ -893,8 +872,8 @@ garrow_feather_file_reader_read_names(GArrowFeatherFileReader *reader,
   }
 }
 
-
-typedef struct GArrowCSVReadOptionsPrivate_ {
+typedef struct GArrowCSVReadOptionsPrivate_
+{
   arrow::csv::ReadOptions read_options;
   arrow::csv::ParseOptions parse_options;
   arrow::csv::ConvertOptions convert_options;
@@ -917,14 +896,11 @@ enum {
   PROP_GENERATE_COLUMN_NAMES
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowCSVReadOptions,
-                           garrow_csv_read_options,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowCSVReadOptions, garrow_csv_read_options, G_TYPE_OBJECT)
 
-#define GARROW_CSV_READ_OPTIONS_GET_PRIVATE(object) \
-  static_cast<GArrowCSVReadOptionsPrivate *>(       \
-    garrow_csv_read_options_get_instance_private(   \
-      GARROW_CSV_READ_OPTIONS(object)))
+#define GARROW_CSV_READ_OPTIONS_GET_PRIVATE(object)                                      \
+  static_cast<GArrowCSVReadOptionsPrivate *>(                                            \
+    garrow_csv_read_options_get_instance_private(GARROW_CSV_READ_OPTIONS(object)))
 
 static void
 garrow_csv_read_options_set_property(GObject *object,
@@ -1122,17 +1098,17 @@ garrow_csv_read_options_class_init(GArrowCSVReadOptionsClass *klass)
    *
    * Since: 0.15.0
    */
-  spec = g_param_spec_boolean("generate-column-names",
-                              "Generate column names",
-                              "Whether to autogenerate column names if column-names is empty. "
-                              "If TRUE, column names will be of the form 'f0', 'f1'... "
-                              "If FALSE, column names will be read from the first CSV row "
-                              "after n-skip-rows",
-                              read_options.autogenerate_column_names,
-                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  spec =
+    g_param_spec_boolean("generate-column-names",
+                         "Generate column names",
+                         "Whether to autogenerate column names if column-names is empty. "
+                         "If TRUE, column names will be of the form 'f0', 'f1'... "
+                         "If FALSE, column names will be read from the first CSV row "
+                         "after n-skip-rows",
+                         read_options.autogenerate_column_names,
+                         static_cast<GParamFlags>(G_PARAM_READWRITE));
   g_object_class_install_property(gobject_class, PROP_GENERATE_COLUMN_NAMES, spec);
 
-
   auto parse_options = arrow::csv::ParseOptions::Defaults();
 
   /**
@@ -1240,9 +1216,7 @@ garrow_csv_read_options_class_init(GArrowCSVReadOptionsClass *klass)
                               "CR (0x0d) and LF (0x0a) characters.",
                               parse_options.newlines_in_values,
                               static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_ALLOW_NEWLINES_IN_VALUES,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_ALLOW_NEWLINES_IN_VALUES, spec);
 
   /**
    * GArrowCSVReadOptions:ignore-empty-lines:
@@ -1260,9 +1234,7 @@ garrow_csv_read_options_class_init(GArrowCSVReadOptionsClass *klass)
                               "(assuming a one-column CSV file).",
                               parse_options.ignore_empty_lines,
                               static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_IGNORE_EMPTY_LINES,
-                                  spec);
+  g_object_class_install_property(gobject_class, PROP_IGNORE_EMPTY_LINES, spec);
 
   auto convert_options = arrow::csv::ConvertOptions::Defaults();
 
@@ -1289,13 +1261,14 @@ garrow_csv_read_options_class_init(GArrowCSVReadOptionsClass *klass)
    *
    * Since: 0.14.0
    */
-  spec = g_param_spec_boolean("allow-null-strings",
-                              "Allow null strings",
-                              "Whether string / binary columns can have null values. "
-                              "If TRUE, then strings in null_values are considered null for string columns. "
-                              "If FALSE, then all strings are valid string values.",
-                              convert_options.strings_can_be_null,
-                              static_cast<GParamFlags>(G_PARAM_READWRITE));
+  spec = g_param_spec_boolean(
+    "allow-null-strings",
+    "Allow null strings",
+    "Whether string / binary columns can have null values. "
+    "If TRUE, then strings in null_values are considered null for string columns. "
+    "If FALSE, then all strings are valid string values.",
+    convert_options.strings_can_be_null,
+    static_cast<GParamFlags>(G_PARAM_READWRITE));
   g_object_class_install_property(gobject_class, PROP_ALLOW_NULL_STRINGS, spec);
 }
 
@@ -1343,8 +1316,7 @@ garrow_csv_read_options_add_column_type(GArrowCSVReadOptions *options,
  * Since: 0.12.0
  */
 void
-garrow_csv_read_options_add_schema(GArrowCSVReadOptions *options,
-                                   GArrowSchema *schema)
+garrow_csv_read_options_add_schema(GArrowCSVReadOptions *options, GArrowSchema *schema)
 {
   auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options);
   auto arrow_schema = garrow_schema_get_raw(schema);
@@ -1366,10 +1338,8 @@ GHashTable *
 garrow_csv_read_options_get_column_types(GArrowCSVReadOptions *options)
 {
   auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options);
-  GHashTable *types = g_hash_table_new_full(g_str_hash,
-                                            g_str_equal,
-                                            g_free,
-                                            g_object_unref);
+  GHashTable *types =
+    g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_object_unref);
   for (const auto &iter : priv->convert_options.column_types) {
     auto arrow_name = iter.first;
     auto arrow_data_type = iter.second;
@@ -1653,7 +1623,8 @@ garrow_csv_read_options_add_column_name(GArrowCSVReadOptions *options,
   priv->read_options.column_names.push_back(column_name);
 }
 
-typedef struct GArrowCSVReaderPrivate_ {
+typedef struct GArrowCSVReaderPrivate_
+{
   std::shared_ptr<arrow::csv::TableReader> reader;
   GArrowInputStream *input;
 } GArrowCSVReaderPrivate;
@@ -1663,14 +1634,11 @@ enum {
   PROP_CSV_READER_INPUT,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowCSVReader,
-                           garrow_csv_reader,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowCSVReader, garrow_csv_reader, G_TYPE_OBJECT)
 
-#define GARROW_CSV_READER_GET_PRIVATE(object)   \
-  static_cast<GArrowCSVReaderPrivate *>(        \
-    garrow_csv_reader_get_instance_private(     \
-      GARROW_CSV_READER(object)))
+#define GARROW_CSV_READER_GET_PRIVATE(object)                                            \
+  static_cast<GArrowCSVReaderPrivate *>(                                                 \
+    garrow_csv_reader_get_instance_private(GARROW_CSV_READER(object)))
 
 static void
 garrow_csv_reader_dispose(GObject *object)
@@ -1705,8 +1673,8 @@ garrow_csv_reader_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_CSV_TABLE_READER:
-    priv->reader =
-      *static_cast<std::shared_ptr<arrow::csv::TableReader> *>(g_value_get_pointer(value));
+    priv->reader = *static_cast<std::shared_ptr<arrow::csv::TableReader> *>(
+      g_value_get_pointer(value));
     break;
   case PROP_CSV_READER_INPUT:
     priv->input = GARROW_INPUT_STREAM(g_value_dup_object(value));
@@ -1739,7 +1707,7 @@ static void
 garrow_csv_reader_init(GArrowCSVReader *object)
 {
   auto priv = GARROW_CSV_READER_GET_PRIVATE(object);
-  new(&priv->reader) std::shared_ptr<arrow::csv::TableReader>;
+  new (&priv->reader) std::shared_ptr<arrow::csv::TableReader>;
 }
 
 static void
@@ -1747,28 +1715,26 @@ garrow_csv_reader_class_init(GArrowCSVReaderClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_csv_reader_dispose;
-  gobject_class->finalize     = garrow_csv_reader_finalize;
+  gobject_class->dispose = garrow_csv_reader_dispose;
+  gobject_class->finalize = garrow_csv_reader_finalize;
   gobject_class->set_property = garrow_csv_reader_set_property;
   gobject_class->get_property = garrow_csv_reader_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("csv-table-reader",
-                              "CSV table reader",
-                              "The raw std::shared<arrow::csv::TableReader> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "csv-table-reader",
+    "CSV table reader",
+    "The raw std::shared<arrow::csv::TableReader> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CSV_TABLE_READER, spec);
 
-  spec = g_param_spec_object("input",
-                             "Input",
-                             "The input stream to be read",
-                             GARROW_TYPE_INPUT_STREAM,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class,
-                                  PROP_CSV_READER_INPUT,
-                                  spec);
+  spec = g_param_spec_object(
+    "input",
+    "Input",
+    "The input stream to be read",
+    GARROW_TYPE_INPUT_STREAM,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_CSV_READER_INPUT, spec);
 }
 
 /**
@@ -1801,12 +1767,11 @@ garrow_csv_reader_new(GArrowInputStream *input,
     convert_options = arrow::csv::ConvertOptions::Defaults();
   }
 
-  auto arrow_reader =
-    arrow::csv::TableReader::Make(arrow::io::default_io_context(),
-                                  arrow_input,
-                                  read_options,
-                                  parse_options,
-                                  convert_options);
+  auto arrow_reader = arrow::csv::TableReader::Make(arrow::io::default_io_context(),
+                                                    arrow_input,
+                                                    read_options,
+                                                    parse_options,
+                                                    convert_options);
   if (garrow::check(error, arrow_reader, "[csv-reader][new]")) {
     return garrow_csv_reader_new_raw(&(*arrow_reader), input);
   } else {
@@ -1824,8 +1789,7 @@ garrow_csv_reader_new(GArrowInputStream *input,
  * Since: 0.12.0
  */
 GArrowTable *
-garrow_csv_reader_read(GArrowCSVReader *reader,
-                       GError **error)
+garrow_csv_reader_read(GArrowCSVReader *reader, GError **error)
 {
   auto arrow_reader = garrow_csv_reader_get_raw(reader);
   auto arrow_table = arrow_reader->Read();
@@ -1836,8 +1800,8 @@ garrow_csv_reader_read(GArrowCSVReader *reader,
   }
 }
 
-
-typedef struct GArrowJSONReadOptionsPrivate_ {
+typedef struct GArrowJSONReadOptionsPrivate_
+{
   arrow::json::ReadOptions read_options;
   arrow::json::ParseOptions parse_options;
   GArrowSchema *schema;
@@ -1851,14 +1815,11 @@ enum {
   PROP_JSON_READ_OPTIONS_SCHEMA,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowJSONReadOptions,
-                           garrow_json_read_options,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowJSONReadOptions, garrow_json_read_options, G_TYPE_OBJECT)
 
-#define GARROW_JSON_READ_OPTIONS_GET_PRIVATE(object) \
-  static_cast<GArrowJSONReadOptionsPrivate *>(       \
-    garrow_json_read_options_get_instance_private(   \
-      GARROW_JSON_READ_OPTIONS(object)))
+#define GARROW_JSON_READ_OPTIONS_GET_PRIVATE(object)                                     \
+  static_cast<GArrowJSONReadOptionsPrivate *>(                                           \
+    garrow_json_read_options_get_instance_private(GARROW_JSON_READ_OPTIONS(object)))
 
 static void
 garrow_json_read_options_dispose(GObject *object)
@@ -1935,7 +1896,8 @@ garrow_json_read_options_get_property(GObject *object,
     g_value_set_boolean(value, priv->parse_options.newlines_in_values);
     break;
   case PROP_JSON_READ_OPTIONS_UNEXPECTED_FIELD_BEHAVIOR:
-    g_value_set_enum(value, static_cast<int>(priv->parse_options.unexpected_field_behavior));
+    g_value_set_enum(value,
+                     static_cast<int>(priv->parse_options.unexpected_field_behavior));
     break;
   case PROP_JSON_READ_OPTIONS_SCHEMA:
     g_value_set_object(value, priv->schema);
@@ -1961,7 +1923,7 @@ garrow_json_read_options_class_init(GArrowJSONReadOptionsClass *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_json_read_options_dispose;
+  gobject_class->dispose = garrow_json_read_options_dispose;
   gobject_class->set_property = garrow_json_read_options_set_property;
   gobject_class->get_property = garrow_json_read_options_get_property;
 
@@ -2000,10 +1962,7 @@ garrow_json_read_options_class_init(GArrowJSONReadOptionsClass *klass)
                           G_MAXINT,
                           read_options.block_size,
                           static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_JSON_READ_OPTIONS_BLOCK_SIZE,
-                                  spec);
-
+  g_object_class_install_property(gobject_class, PROP_JSON_READ_OPTIONS_BLOCK_SIZE, spec);
 
   auto parse_options = arrow::json::ParseOptions::Defaults();
 
@@ -2053,11 +2012,9 @@ garrow_json_read_options_class_init(GArrowJSONReadOptionsClass *klass)
   spec = g_param_spec_object("schema",
                              "Schema",
                              "Schema for passing custom conversion rules.",
-                              GARROW_TYPE_SCHEMA,
-                              static_cast<GParamFlags>(G_PARAM_READWRITE));
-  g_object_class_install_property(gobject_class,
-                                  PROP_JSON_READ_OPTIONS_SCHEMA,
-                                  spec);
+                             GARROW_TYPE_SCHEMA,
+                             static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_JSON_READ_OPTIONS_SCHEMA, spec);
 }
 
 /**
@@ -2074,8 +2031,8 @@ garrow_json_read_options_new(void)
   return GARROW_JSON_READ_OPTIONS(json_read_options);
 }
 
-
-typedef struct GArrowJSONReaderPrivate_ {
+typedef struct GArrowJSONReaderPrivate_
+{
   std::shared_ptr<arrow::json::TableReader> reader;
   GArrowInputStream *input;
 } GArrowJSONReaderPrivate;
@@ -2085,14 +2042,11 @@ enum {
   PROP_JSON_READER_INPUT,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowJSONReader,
-                           garrow_json_reader,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowJSONReader, garrow_json_reader, G_TYPE_OBJECT)
 
-#define GARROW_JSON_READER_GET_PRIVATE(object)   \
-  static_cast<GArrowJSONReaderPrivate *>(        \
-    garrow_json_reader_get_instance_private(     \
-      GARROW_JSON_READER(object)))
+#define GARROW_JSON_READER_GET_PRIVATE(object)                                           \
+  static_cast<GArrowJSONReaderPrivate *>(                                                \
+    garrow_json_reader_get_instance_private(GARROW_JSON_READER(object)))
 
 static void
 garrow_json_reader_dispose(GObject *object)
@@ -2127,8 +2081,8 @@ garrow_json_reader_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_JSON_TABLE_READER:
-    priv->reader =
-      *static_cast<std::shared_ptr<arrow::json::TableReader> *>(g_value_get_pointer(value));
+    priv->reader = *static_cast<std::shared_ptr<arrow::json::TableReader> *>(
+      g_value_get_pointer(value));
     break;
   case PROP_JSON_READER_INPUT:
     priv->input = GARROW_INPUT_STREAM(g_value_dup_object(value));
@@ -2161,7 +2115,7 @@ static void
 garrow_json_reader_init(GArrowJSONReader *object)
 {
   auto priv = GARROW_JSON_READER_GET_PRIVATE(object);
-  new(&priv->reader) std::shared_ptr<arrow::json::TableReader>;
+  new (&priv->reader) std::shared_ptr<arrow::json::TableReader>;
 }
 
 static void
@@ -2169,28 +2123,26 @@ garrow_json_reader_class_init(GArrowJSONReaderClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_json_reader_dispose;
-  gobject_class->finalize     = garrow_json_reader_finalize;
+  gobject_class->dispose = garrow_json_reader_dispose;
+  gobject_class->finalize = garrow_json_reader_finalize;
   gobject_class->set_property = garrow_json_reader_set_property;
   gobject_class->get_property = garrow_json_reader_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("json-table-reader",
-                              "JSON table reader",
-                              "The raw std::shared<arrow::json::TableReader> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "json-table-reader",
+    "JSON table reader",
+    "The raw std::shared<arrow::json::TableReader> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_JSON_TABLE_READER, spec);
 
-  spec = g_param_spec_object("input",
-                             "Input",
-                             "The input stream to be read",
-                             GARROW_TYPE_INPUT_STREAM,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class,
-                                  PROP_JSON_READER_INPUT,
-                                  spec);
+  spec = g_param_spec_object(
+    "input",
+    "Input",
+    "The input stream to be read",
+    GARROW_TYPE_INPUT_STREAM,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_JSON_READER_INPUT, spec);
 }
 
 /**
@@ -2242,8 +2194,7 @@ garrow_json_reader_new(GArrowInputStream *input,
  * Since: 0.14.0
  */
 GArrowTable *
-garrow_json_reader_read(GArrowJSONReader *reader,
-                        GError **error)
+garrow_json_reader_read(GArrowJSONReader *reader, GError **error)
 {
   auto arrow_reader = garrow_json_reader_get_raw(reader);
   auto arrow_table = arrow_reader->Read();
@@ -2258,14 +2209,14 @@ G_END_DECLS
 
 GArrowRecordBatchReader *
 garrow_record_batch_reader_new_raw(
-  std::shared_ptr<arrow::RecordBatchReader> *arrow_reader,
-  GList *sources)
+  std::shared_ptr<arrow::RecordBatchReader> *arrow_reader, GList *sources)
 {
-  return GARROW_RECORD_BATCH_READER(
-    g_object_new(GARROW_TYPE_RECORD_BATCH_READER,
-                 "record-batch-reader", arrow_reader,
-                 "sources", sources,
-                 NULL));
+  return GARROW_RECORD_BATCH_READER(g_object_new(GARROW_TYPE_RECORD_BATCH_READER,
+                                                 "record-batch-reader",
+                                                 arrow_reader,
+                                                 "sources",
+                                                 sources,
+                                                 NULL));
 }
 
 std::shared_ptr<arrow::ipc::RecordBatchReader>
@@ -2276,16 +2227,16 @@ garrow_record_batch_reader_get_raw(GArrowRecordBatchReader *reader)
 }
 
 GArrowTableBatchReader *
-garrow_table_batch_reader_new_raw(
-  std::shared_ptr<arrow::TableBatchReader> *arrow_reader,
-  GArrowTable *table)
+garrow_table_batch_reader_new_raw(std::shared_ptr<arrow::TableBatchReader> *arrow_reader,
+                                  GArrowTable *table)
 {
   auto sources = g_list_prepend(nullptr, table);
-  auto reader =
-    GARROW_TABLE_BATCH_READER(g_object_new(GARROW_TYPE_TABLE_BATCH_READER,
-                                           "record-batch-reader", arrow_reader,
-                                           "sources", sources,
-                                           NULL));
+  auto reader = GARROW_TABLE_BATCH_READER(g_object_new(GARROW_TYPE_TABLE_BATCH_READER,
+                                                       "record-batch-reader",
+                                                       arrow_reader,
+                                                       "sources",
+                                                       sources,
+                                                       NULL));
   g_list_free(sources);
   return reader;
 }
@@ -2298,24 +2249,26 @@ garrow_table_batch_reader_get_raw(GArrowTableBatchReader *reader)
 }
 
 GArrowRecordBatchStreamReader *
-garrow_record_batch_stream_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchStreamReader> *arrow_reader)
+garrow_record_batch_stream_reader_new_raw(
+  std::shared_ptr<arrow::ipc::RecordBatchStreamReader> *arrow_reader)
 {
   auto reader =
-    GARROW_RECORD_BATCH_STREAM_READER(
-      g_object_new(GARROW_TYPE_RECORD_BATCH_STREAM_READER,
-                   "record-batch-reader", arrow_reader,
-                   NULL));
+    GARROW_RECORD_BATCH_STREAM_READER(g_object_new(GARROW_TYPE_RECORD_BATCH_STREAM_READER,
+                                                   "record-batch-reader",
+                                                   arrow_reader,
+                                                   NULL));
   return reader;
 }
 
 GArrowRecordBatchFileReader *
-garrow_record_batch_file_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchFileReader> *arrow_reader)
+garrow_record_batch_file_reader_new_raw(
+  std::shared_ptr<arrow::ipc::RecordBatchFileReader> *arrow_reader)
 {
   auto reader =
-    GARROW_RECORD_BATCH_FILE_READER(
-      g_object_new(GARROW_TYPE_RECORD_BATCH_FILE_READER,
-                   "record-batch-file-reader", arrow_reader,
-                   NULL));
+    GARROW_RECORD_BATCH_FILE_READER(g_object_new(GARROW_TYPE_RECORD_BATCH_FILE_READER,
+                                                 "record-batch-file-reader",
+                                                 arrow_reader,
+                                                 NULL));
   return reader;
 }
 
@@ -2327,13 +2280,11 @@ garrow_record_batch_file_reader_get_raw(GArrowRecordBatchFileReader *reader)
 }
 
 GArrowFeatherFileReader *
-garrow_feather_file_reader_new_raw(std::shared_ptr<arrow::ipc::feather::Reader> *arrow_reader)
+garrow_feather_file_reader_new_raw(
+  std::shared_ptr<arrow::ipc::feather::Reader> *arrow_reader)
 {
-  auto reader =
-    GARROW_FEATHER_FILE_READER(
-      g_object_new(GARROW_TYPE_FEATHER_FILE_READER,
-                   "feather-reader", arrow_reader,
-                   NULL));
+  auto reader = GARROW_FEATHER_FILE_READER(
+    g_object_new(GARROW_TYPE_FEATHER_FILE_READER, "feather-reader", arrow_reader, NULL));
   return reader;
 }
 
@@ -2349,8 +2300,10 @@ garrow_csv_reader_new_raw(std::shared_ptr<arrow::csv::TableReader> *arrow_reader
                           GArrowInputStream *input)
 {
   auto reader = GARROW_CSV_READER(g_object_new(GARROW_TYPE_CSV_READER,
-                                               "csv-table-reader", arrow_reader,
-                                               "input", input,
+                                               "csv-table-reader",
+                                               arrow_reader,
+                                               "input",
+                                               input,
                                                NULL));
   return reader;
 }
@@ -2367,8 +2320,10 @@ garrow_json_reader_new_raw(std::shared_ptr<arrow::json::TableReader> *arrow_read
                            GArrowInputStream *input)
 {
   auto reader = GARROW_JSON_READER(g_object_new(GARROW_TYPE_JSON_READER,
-                                                "json-table-reader", arrow_reader,
-                                                "input", input,
+                                                "json-table-reader",
+                                                arrow_reader,
+                                                "input",
+                                                input,
                                                 NULL));
   return reader;
 }
diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h
index 1ae26478cbefc..adc6c67e3d90b 100644
--- a/c_glib/arrow-glib/reader.h
+++ b/c_glib/arrow-glib/reader.h
@@ -43,8 +43,7 @@ struct _GArrowRecordBatchReaderClass
 
 GARROW_AVAILABLE_IN_6_0
 GArrowRecordBatchReader *
-garrow_record_batch_reader_import(gpointer c_abi_array_stream,
-                                  GError **error);
+garrow_record_batch_reader_import(gpointer c_abi_array_stream, GError **error);
 
 GARROW_AVAILABLE_IN_6_0
 GArrowRecordBatchReader *
@@ -54,30 +53,27 @@ garrow_record_batch_reader_new(GList *record_batches,
 
 GARROW_AVAILABLE_IN_6_0
 gpointer
-garrow_record_batch_reader_export(GArrowRecordBatchReader *reader,
-                                  GError **error);
+garrow_record_batch_reader_export(GArrowRecordBatchReader *reader, GError **error);
 
-GArrowSchema *garrow_record_batch_reader_get_schema(
-  GArrowRecordBatchReader *reader);
+GArrowSchema *
+garrow_record_batch_reader_get_schema(GArrowRecordBatchReader *reader);
 #ifndef GARROW_DISABLE_DEPRECATED
 G_GNUC_DEPRECATED_FOR(garrow_record_batch_reader_read_next)
-GArrowRecordBatch *garrow_record_batch_reader_get_next_record_batch(
-  GArrowRecordBatchReader *reader,
-  GError **error);
+GArrowRecordBatch *
+garrow_record_batch_reader_get_next_record_batch(GArrowRecordBatchReader *reader,
+                                                 GError **error);
 #endif
 #ifndef GARROW_DISABLE_DEPRECATED
 G_GNUC_DEPRECATED_FOR(garrow_record_batch_reader_read_next)
-GArrowRecordBatch *garrow_record_batch_reader_read_next_record_batch(
-  GArrowRecordBatchReader *reader,
-  GError **error);
+GArrowRecordBatch *
+garrow_record_batch_reader_read_next_record_batch(GArrowRecordBatchReader *reader,
+                                                  GError **error);
 #endif
-GArrowRecordBatch *garrow_record_batch_reader_read_next(
-  GArrowRecordBatchReader *reader,
-  GError **error);
+GArrowRecordBatch *
+garrow_record_batch_reader_read_next(GArrowRecordBatchReader *reader, GError **error);
 GARROW_AVAILABLE_IN_6_0
 GArrowTable *
-garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader,
-                                    GError **error);
+garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader, GError **error);
 
 GARROW_AVAILABLE_IN_13_0
 GList *
@@ -94,36 +90,34 @@ struct _GArrowTableBatchReaderClass
   GArrowRecordBatchReaderClass parent_class;
 };
 
-GArrowTableBatchReader *garrow_table_batch_reader_new(GArrowTable *table);
+GArrowTableBatchReader *
+garrow_table_batch_reader_new(GArrowTable *table);
 
 GARROW_AVAILABLE_IN_12_0
 void
 garrow_table_batch_reader_set_max_chunk_size(GArrowTableBatchReader *reader,
                                              gint64 max_chunk_size);
 
-
-#define GARROW_TYPE_RECORD_BATCH_STREAM_READER          \
+#define GARROW_TYPE_RECORD_BATCH_STREAM_READER                                           \
   (garrow_record_batch_stream_reader_get_type())
-#define GARROW_RECORD_BATCH_STREAM_READER(obj)                          \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                    \
-                              GARROW_TYPE_RECORD_BATCH_STREAM_READER,   \
+#define GARROW_RECORD_BATCH_STREAM_READER(obj)                                           \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
+                              GARROW_TYPE_RECORD_BATCH_STREAM_READER,                    \
                               GArrowRecordBatchStreamReader))
-#define GARROW_RECORD_BATCH_STREAM_READER_CLASS(klass)                  \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                     \
-                           GARROW_TYPE_RECORD_BATCH_STREAM_READER,      \
+#define GARROW_RECORD_BATCH_STREAM_READER_CLASS(klass)                                   \
+  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
+                           GARROW_TYPE_RECORD_BATCH_STREAM_READER,                       \
                            GArrowRecordBatchStreamReaderClass))
-#define GARROW_IS_RECORD_BATCH_STREAM_READER(obj)                       \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj),                                    \
-                              GARROW_TYPE_RECORD_BATCH_STREAM_READER))
-#define GARROW_IS_RECORD_BATCH_STREAM_READER_CLASS(klass)               \
-  (G_TYPE_CHECK_CLASS_TYPE((klass),                                     \
-                           GARROW_TYPE_RECORD_BATCH_STREAM_READER))
-#define GARROW_RECORD_BATCH_STREAM_READER_GET_CLASS(obj)                \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                                     \
-                             GARROW_TYPE_RECORD_BATCH_STREAM_READER,    \
+#define GARROW_IS_RECORD_BATCH_STREAM_READER(obj)                                        \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_RECORD_BATCH_STREAM_READER))
+#define GARROW_IS_RECORD_BATCH_STREAM_READER_CLASS(klass)                                \
+  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_RECORD_BATCH_STREAM_READER))
+#define GARROW_RECORD_BATCH_STREAM_READER_GET_CLASS(obj)                                 \
+  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
+                             GARROW_TYPE_RECORD_BATCH_STREAM_READER,                     \
                              GArrowRecordBatchStreamReaderClass))
 
-typedef struct _GArrowRecordBatchStreamReader      GArrowRecordBatchStreamReader;
+typedef struct _GArrowRecordBatchStreamReader GArrowRecordBatchStreamReader;
 #ifndef __GTK_DOC_IGNORE__
 typedef struct _GArrowRecordBatchStreamReaderClass GArrowRecordBatchStreamReaderClass;
 #endif
@@ -146,35 +140,31 @@ struct _GArrowRecordBatchStreamReaderClass
 };
 #endif
 
-GType garrow_record_batch_stream_reader_get_type(void) G_GNUC_CONST;
-
-GArrowRecordBatchStreamReader *garrow_record_batch_stream_reader_new(
-  GArrowInputStream *stream,
-  GError **error);
+GType
+garrow_record_batch_stream_reader_get_type(void) G_GNUC_CONST;
 
+GArrowRecordBatchStreamReader *
+garrow_record_batch_stream_reader_new(GArrowInputStream *stream, GError **error);
 
-#define GARROW_TYPE_RECORD_BATCH_FILE_READER    \
-  (garrow_record_batch_file_reader_get_type())
-#define GARROW_RECORD_BATCH_FILE_READER(obj)                            \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                    \
-                              GARROW_TYPE_RECORD_BATCH_FILE_READER,     \
+#define GARROW_TYPE_RECORD_BATCH_FILE_READER (garrow_record_batch_file_reader_get_type())
+#define GARROW_RECORD_BATCH_FILE_READER(obj)                                             \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
+                              GARROW_TYPE_RECORD_BATCH_FILE_READER,                      \
                               GArrowRecordBatchFileReader))
-#define GARROW_RECORD_BATCH_FILE_READER_CLASS(klass)                    \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                     \
-                           GARROW_TYPE_RECORD_BATCH_FILE_READER,        \
+#define GARROW_RECORD_BATCH_FILE_READER_CLASS(klass)                                     \
+  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
+                           GARROW_TYPE_RECORD_BATCH_FILE_READER,                         \
                            GArrowRecordBatchFileReaderClass))
-#define GARROW_IS_RECORD_BATCH_FILE_READER(obj)                         \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj),                                    \
-                              GARROW_TYPE_RECORD_BATCH_FILE_READER))
-#define GARROW_IS_RECORD_BATCH_FILE_READER_CLASS(klass)                 \
-  (G_TYPE_CHECK_CLASS_TYPE((klass),                                     \
-                           GARROW_TYPE_RECORD_BATCH_FILE_READER))
-#define GARROW_RECORD_BATCH_FILE_READER_GET_CLASS(obj)                  \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                                     \
-                             GARROW_TYPE_RECORD_BATCH_FILE_READER,      \
+#define GARROW_IS_RECORD_BATCH_FILE_READER(obj)                                          \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_RECORD_BATCH_FILE_READER))
+#define GARROW_IS_RECORD_BATCH_FILE_READER_CLASS(klass)                                  \
+  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_RECORD_BATCH_FILE_READER))
+#define GARROW_RECORD_BATCH_FILE_READER_GET_CLASS(obj)                                   \
+  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
+                             GARROW_TYPE_RECORD_BATCH_FILE_READER,                       \
                              GArrowRecordBatchFileReaderClass))
 
-typedef struct _GArrowRecordBatchFileReader      GArrowRecordBatchFileReader;
+typedef struct _GArrowRecordBatchFileReader GArrowRecordBatchFileReader;
 #ifndef __GTK_DOC_IGNORE__
 typedef struct _GArrowRecordBatchFileReaderClass GArrowRecordBatchFileReaderClass;
 #endif
@@ -197,30 +187,29 @@ struct _GArrowRecordBatchFileReaderClass
 };
 #endif
 
-GType garrow_record_batch_file_reader_get_type(void) G_GNUC_CONST;
+GType
+garrow_record_batch_file_reader_get_type(void) G_GNUC_CONST;
 
-GArrowRecordBatchFileReader *garrow_record_batch_file_reader_new(
-  GArrowSeekableInputStream *file,
-  GError **error);
+GArrowRecordBatchFileReader *
+garrow_record_batch_file_reader_new(GArrowSeekableInputStream *file, GError **error);
 
-GArrowSchema *garrow_record_batch_file_reader_get_schema(
-  GArrowRecordBatchFileReader *reader);
-guint garrow_record_batch_file_reader_get_n_record_batches(
-  GArrowRecordBatchFileReader *reader);
-GArrowMetadataVersion garrow_record_batch_file_reader_get_version(
-  GArrowRecordBatchFileReader *reader);
+GArrowSchema *
+garrow_record_batch_file_reader_get_schema(GArrowRecordBatchFileReader *reader);
+guint
+garrow_record_batch_file_reader_get_n_record_batches(GArrowRecordBatchFileReader *reader);
+GArrowMetadataVersion
+garrow_record_batch_file_reader_get_version(GArrowRecordBatchFileReader *reader);
 #ifndef GARROW_DISABLE_DEPRECATED
 G_GNUC_DEPRECATED_FOR(garrow_record_batch_file_reader_read_record_batch)
-GArrowRecordBatch *garrow_record_batch_file_reader_get_record_batch(
-  GArrowRecordBatchFileReader *reader,
-  guint i,
-  GError **error);
+GArrowRecordBatch *
+garrow_record_batch_file_reader_get_record_batch(GArrowRecordBatchFileReader *reader,
+                                                 guint i,
+                                                 GError **error);
 #endif
-GArrowRecordBatch *garrow_record_batch_file_reader_read_record_batch(
-  GArrowRecordBatchFileReader *reader,
-  guint i,
-  GError **error);
-
+GArrowRecordBatch *
+garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *reader,
+                                                  guint i,
+                                                  GError **error);
 
 #define GARROW_TYPE_FEATHER_FILE_READER (garrow_feather_file_reader_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFeatherFileReader,
@@ -233,15 +222,13 @@ struct _GArrowFeatherFileReaderClass
   GObjectClass parent_class;
 };
 
-GArrowFeatherFileReader *garrow_feather_file_reader_new(
-  GArrowSeekableInputStream *file,
-  GError **error);
+GArrowFeatherFileReader *
+garrow_feather_file_reader_new(GArrowSeekableInputStream *file, GError **error);
 
-gint garrow_feather_file_reader_get_version(
-  GArrowFeatherFileReader *reader);
+gint
+garrow_feather_file_reader_get_version(GArrowFeatherFileReader *reader);
 GArrowTable *
-garrow_feather_file_reader_read(GArrowFeatherFileReader *reader,
-                                GError **error);
+garrow_feather_file_reader_read(GArrowFeatherFileReader *reader, GError **error);
 GArrowTable *
 garrow_feather_file_reader_read_indices(GArrowFeatherFileReader *reader,
                                         const gint *indices,
@@ -254,24 +241,21 @@ garrow_feather_file_reader_read_names(GArrowFeatherFileReader *reader,
                                       GError **error);
 
 #define GARROW_TYPE_CSV_READ_OPTIONS (garrow_csv_read_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowCSVReadOptions,
-                         garrow_csv_read_options,
-                         GARROW,
-                         CSV_READ_OPTIONS,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowCSVReadOptions, garrow_csv_read_options, GARROW, CSV_READ_OPTIONS, GObject)
 struct _GArrowCSVReadOptionsClass
 {
   GObjectClass parent_class;
 };
 
-GArrowCSVReadOptions *garrow_csv_read_options_new(void);
+GArrowCSVReadOptions *
+garrow_csv_read_options_new(void);
 void
 garrow_csv_read_options_add_column_type(GArrowCSVReadOptions *options,
                                         const gchar *name,
                                         GArrowDataType *data_type);
 void
-garrow_csv_read_options_add_schema(GArrowCSVReadOptions *options,
-                                   GArrowSchema *schema);
+garrow_csv_read_options_add_schema(GArrowCSVReadOptions *options, GArrowSchema *schema);
 GHashTable *
 garrow_csv_read_options_get_column_types(GArrowCSVReadOptions *options);
 GARROW_AVAILABLE_IN_0_14
@@ -324,22 +308,18 @@ garrow_csv_read_options_add_column_name(GArrowCSVReadOptions *options,
                                         const gchar *column_name);
 
 #define GARROW_TYPE_CSV_READER (garrow_csv_reader_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowCSVReader,
-                         garrow_csv_reader,
-                         GARROW,
-                         CSV_READER,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowCSVReader, garrow_csv_reader, GARROW, CSV_READER, GObject)
 struct _GArrowCSVReaderClass
 {
   GObjectClass parent_class;
 };
 
-GArrowCSVReader *garrow_csv_reader_new(GArrowInputStream *input,
-                                       GArrowCSVReadOptions *options,
-                                       GError **error);
-GArrowTable *garrow_csv_reader_read(GArrowCSVReader *reader,
-                                    GError **error);
-
+GArrowCSVReader *
+garrow_csv_reader_new(GArrowInputStream *input,
+                      GArrowCSVReadOptions *options,
+                      GError **error);
+GArrowTable *
+garrow_csv_reader_read(GArrowCSVReader *reader, GError **error);
 
 /**
  * GArrowJSONReadUnexpectedFieldBehavior:
@@ -356,36 +336,32 @@ typedef enum {
 } GArrowJSONReadUnexpectedFieldBehavior;
 
 #define GARROW_TYPE_JSON_READ_OPTIONS (garrow_json_read_options_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowJSONReadOptions,
-                         garrow_json_read_options,
-                         GARROW,
-                         JSON_READ_OPTIONS,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowJSONReadOptions, garrow_json_read_options, GARROW, JSON_READ_OPTIONS, GObject)
 struct _GArrowJSONReadOptionsClass
 {
   GObjectClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_0_14
-GArrowJSONReadOptions *garrow_json_read_options_new(void);
+GArrowJSONReadOptions *
+garrow_json_read_options_new(void);
 
 #define GARROW_TYPE_JSON_READER (garrow_json_reader_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowJSONReader,
-                         garrow_json_reader,
-                         GARROW,
-                         JSON_READER,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowJSONReader, garrow_json_reader, GARROW, JSON_READER, GObject)
 struct _GArrowJSONReaderClass
 {
   GObjectClass parent_class;
 };
 
 GARROW_AVAILABLE_IN_0_14
-GArrowJSONReader *garrow_json_reader_new(GArrowInputStream *input,
-                                         GArrowJSONReadOptions *options,
-                                         GError **error);
+GArrowJSONReader *
+garrow_json_reader_new(GArrowInputStream *input,
+                       GArrowJSONReadOptions *options,
+                       GError **error);
 GARROW_AVAILABLE_IN_0_14
-GArrowTable *garrow_json_reader_read(GArrowJSONReader *reader,
-                                     GError **error);
+GArrowTable *
+garrow_json_reader_read(GArrowJSONReader *reader, GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/reader.hpp b/c_glib/arrow-glib/reader.hpp
index 3efb9f9e07587..192497ef52e31 100644
--- a/c_glib/arrow-glib/reader.hpp
+++ b/c_glib/arrow-glib/reader.hpp
@@ -29,26 +29,29 @@
 
 GArrowRecordBatchReader *
 garrow_record_batch_reader_new_raw(
-  std::shared_ptr<arrow::ipc::RecordBatchReader> *arrow_reader,
-  GList *sources);
+  std::shared_ptr<arrow::ipc::RecordBatchReader> *arrow_reader, GList *sources);
 std::shared_ptr<arrow::ipc::RecordBatchReader>
 garrow_record_batch_reader_get_raw(GArrowRecordBatchReader *reader);
 
 GArrowTableBatchReader *
-garrow_table_batch_reader_new_raw(
-  std::shared_ptr<arrow::TableBatchReader> *arrow_reader,
-  GArrowTable *table);
+garrow_table_batch_reader_new_raw(std::shared_ptr<arrow::TableBatchReader> *arrow_reader,
+                                  GArrowTable *table);
 std::shared_ptr<arrow::TableBatchReader>
 garrow_table_batch_reader_get_raw(GArrowTableBatchReader *reader);
 
-GArrowRecordBatchStreamReader *garrow_record_batch_stream_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchStreamReader> *arrow_reader);
+GArrowRecordBatchStreamReader *
+garrow_record_batch_stream_reader_new_raw(
+  std::shared_ptr<arrow::ipc::RecordBatchStreamReader> *arrow_reader);
 
 GArrowRecordBatchFileReader *
-garrow_record_batch_file_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchFileReader> *arrow_reader);
-std::shared_ptr<arrow::ipc::RecordBatchFileReader> garrow_record_batch_file_reader_get_raw(GArrowRecordBatchFileReader *reader);
+garrow_record_batch_file_reader_new_raw(
+  std::shared_ptr<arrow::ipc::RecordBatchFileReader> *arrow_reader);
+std::shared_ptr<arrow::ipc::RecordBatchFileReader>
+garrow_record_batch_file_reader_get_raw(GArrowRecordBatchFileReader *reader);
 
 GArrowFeatherFileReader *
-garrow_feather_file_reader_new_raw(std::shared_ptr<arrow::ipc::feather::Reader> *arrow_reader);
+garrow_feather_file_reader_new_raw(
+  std::shared_ptr<arrow::ipc::feather::Reader> *arrow_reader);
 std::shared_ptr<arrow::ipc::feather::Reader>
 garrow_feather_file_reader_get_raw(GArrowFeatherFileReader *reader);
 
diff --git a/c_glib/arrow-glib/record-batch.cpp b/c_glib/arrow-glib/record-batch.cpp
index 9cc987b4565b4..be9b361ae0397 100644
--- a/c_glib/arrow-glib/record-batch.cpp
+++ b/c_glib/arrow-glib/record-batch.cpp
@@ -49,7 +49,8 @@ G_BEGIN_DECLS
  * batches.
  */
 
-typedef struct GArrowRecordBatchPrivate_ {
+typedef struct GArrowRecordBatchPrivate_
+{
   std::shared_ptr<arrow::RecordBatch> record_batch;
 } GArrowRecordBatchPrivate;
 
@@ -57,14 +58,11 @@ enum {
   PROP_RECORD_BATCH = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatch,
-                           garrow_record_batch,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatch, garrow_record_batch, G_TYPE_OBJECT)
 
-#define GARROW_RECORD_BATCH_GET_PRIVATE(obj)         \
-  static_cast<GArrowRecordBatchPrivate *>(           \
-     garrow_record_batch_get_instance_private(       \
-       GARROW_RECORD_BATCH(obj)))
+#define GARROW_RECORD_BATCH_GET_PRIVATE(obj)                                             \
+  static_cast<GArrowRecordBatchPrivate *>(                                               \
+    garrow_record_batch_get_instance_private(GARROW_RECORD_BATCH(obj)))
 
 static void
 garrow_record_batch_finalize(GObject *object)
@@ -112,7 +110,7 @@ static void
 garrow_record_batch_init(GArrowRecordBatch *object)
 {
   auto priv = GARROW_RECORD_BATCH_GET_PRIVATE(object);
-  new(&priv->record_batch) std::shared_ptr<arrow::RecordBatch>;
+  new (&priv->record_batch) std::shared_ptr<arrow::RecordBatch>;
 }
 
 static void
@@ -123,15 +121,15 @@ garrow_record_batch_class_init(GArrowRecordBatchClass *klass)
 
   gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_record_batch_finalize;
+  gobject_class->finalize = garrow_record_batch_finalize;
   gobject_class->set_property = garrow_record_batch_set_property;
   gobject_class->get_property = garrow_record_batch_get_property;
 
-  spec = g_param_spec_pointer("record-batch",
-                              "RecordBatch",
-                              "The raw std::shared<arrow::RecordBatch> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "record-batch",
+    "RecordBatch",
+    "The raw std::shared<arrow::RecordBatch> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_RECORD_BATCH, spec);
 }
 
@@ -150,17 +148,12 @@ garrow_record_batch_class_init(GArrowRecordBatchClass *klass)
  * Since: 6.0.0
  */
 GArrowRecordBatch *
-garrow_record_batch_import(gpointer c_abi_array,
-                           GArrowSchema *schema,
-                           GError **error)
+garrow_record_batch_import(gpointer c_abi_array, GArrowSchema *schema, GError **error)
 {
   auto arrow_schema = garrow_schema_get_raw(schema);
   auto arrow_record_batch_result =
-    arrow::ImportRecordBatch(static_cast<ArrowArray *>(c_abi_array),
-                             arrow_schema);
-  if (garrow::check(error,
-                    arrow_record_batch_result,
-                    "[record-batch][import]")) {
+    arrow::ImportRecordBatch(static_cast<ArrowArray *>(c_abi_array), arrow_schema);
+  if (garrow::check(error, arrow_record_batch_result, "[record-batch][import]")) {
     return garrow_record_batch_new_raw(&(*arrow_record_batch_result));
   } else {
     return NULL;
@@ -192,14 +185,12 @@ garrow_record_batch_new(GArrowSchema *schema,
 
   const auto &arrow_schema = garrow_schema_get_raw(schema);
   if (arrow_schema->num_fields() != static_cast<int>(arrow_columns.size())) {
-    auto status =
-      arrow::Status::Invalid("Number of columns did not match schema");
+    auto status = arrow::Status::Invalid("Number of columns did not match schema");
     garrow_error_check(error, status, tag);
     return NULL;
   }
 
-  auto arrow_record_batch =
-    arrow::RecordBatch::Make(arrow_schema, n_rows, arrow_columns);
+  auto arrow_record_batch = arrow::RecordBatch::Make(arrow_schema, n_rows, arrow_columns);
   auto status = arrow_record_batch->Validate();
   if (garrow_error_check(error, status, tag)) {
     return garrow_record_batch_new_raw(&arrow_record_batch);
@@ -270,8 +261,7 @@ garrow_record_batch_equal(GArrowRecordBatch *record_batch,
                           GArrowRecordBatch *other_record_batch)
 {
   const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
-  const auto arrow_other_record_batch =
-    garrow_record_batch_get_raw(other_record_batch);
+  const auto arrow_other_record_batch = garrow_record_batch_get_raw(other_record_batch);
   return arrow_record_batch->Equals(*arrow_other_record_batch);
 }
 
@@ -323,8 +313,7 @@ garrow_record_batch_get_schema(GArrowRecordBatch *record_batch)
  * Since: 0.15.0
  */
 GArrowArray *
-garrow_record_batch_get_column_data(GArrowRecordBatch *record_batch,
-                                    gint i)
+garrow_record_batch_get_column_data(GArrowRecordBatch *record_batch, gint i)
 {
   const auto &arrow_record_batch = garrow_record_batch_get_raw(record_batch);
   if (!garrow_internal_index_adjust(i, arrow_record_batch->num_columns())) {
@@ -345,8 +334,7 @@ garrow_record_batch_get_column_data(GArrowRecordBatch *record_batch,
  *   on success, %NULL on out of index
  */
 const gchar *
-garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch,
-                                    gint i)
+garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch, gint i)
 {
   const auto &arrow_record_batch = garrow_record_batch_get_raw(record_batch);
   if (!garrow_internal_index_adjust(i, arrow_record_batch->num_columns())) {
@@ -393,9 +381,7 @@ garrow_record_batch_get_n_rows(GArrowRecordBatch *record_batch)
  *   #GArrowRecordBatch.
  */
 GArrowRecordBatch *
-garrow_record_batch_slice(GArrowRecordBatch *record_batch,
-                          gint64 offset,
-                          gint64 length)
+garrow_record_batch_slice(GArrowRecordBatch *record_batch, gint64 offset, gint64 length)
 {
   const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
   auto arrow_sub_record_batch = arrow_record_batch->Slice(offset, length);
@@ -447,9 +433,7 @@ garrow_record_batch_add_column(GArrowRecordBatch *record_batch,
   const auto arrow_column = garrow_array_get_raw(column);
   auto arrow_new_record_batch =
     arrow_record_batch->AddColumn(i, arrow_field, arrow_column);
-  if (garrow::check(error,
-                    arrow_new_record_batch,
-                    "[record-batch][add-column]")) {
+  if (garrow::check(error, arrow_new_record_batch, "[record-batch][add-column]")) {
     return garrow_record_batch_new_raw(&(*arrow_new_record_batch));
   } else {
     return NULL;
@@ -474,9 +458,7 @@ garrow_record_batch_remove_column(GArrowRecordBatch *record_batch,
 {
   const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
   auto arrow_new_record_batch = arrow_record_batch->RemoveColumn(i);
-  if (garrow::check(error,
-                    arrow_new_record_batch,
-                    "[record-batch][remove-column]")) {
+  if (garrow::check(error, arrow_new_record_batch, "[record-batch][remove-column]")) {
     return garrow_record_batch_new_raw(&(*arrow_new_record_batch));
   } else {
     return NULL;
@@ -504,8 +486,8 @@ garrow_record_batch_serialize(GArrowRecordBatch *record_batch,
   arrow::Result<std::shared_ptr<arrow::Buffer>> arrow_buffer;
   if (options) {
     auto arrow_options = garrow_write_options_get_raw(options);
-    auto arrow_buffer = arrow::ipc::SerializeRecordBatch(*arrow_record_batch,
-                                                         *arrow_options);
+    auto arrow_buffer =
+      arrow::ipc::SerializeRecordBatch(*arrow_record_batch, *arrow_options);
     if (garrow::check(error, arrow_buffer, "[record-batch][serialize]")) {
       return garrow_buffer_new_raw(&(*arrow_buffer));
     } else {
@@ -513,8 +495,8 @@ garrow_record_batch_serialize(GArrowRecordBatch *record_batch,
     }
   } else {
     const auto arrow_options = arrow::ipc::IpcWriteOptions::Defaults();
-    auto arrow_buffer = arrow::ipc::SerializeRecordBatch(*arrow_record_batch,
-                                                         arrow_options);
+    auto arrow_buffer =
+      arrow::ipc::SerializeRecordBatch(*arrow_record_batch, arrow_options);
     if (garrow::check(error, arrow_buffer, "[record-batch][serialize]")) {
       return garrow_buffer_new_raw(&(*arrow_buffer));
     } else {
@@ -523,8 +505,8 @@ garrow_record_batch_serialize(GArrowRecordBatch *record_batch,
   }
 }
 
-
-typedef struct GArrowRecordBatchIteratorPrivate_ {
+typedef struct GArrowRecordBatchIteratorPrivate_
+{
   arrow::RecordBatchIterator iterator;
 } GArrowRecordBatchIteratorPrivate;
 
@@ -536,10 +518,10 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchIterator,
                            garrow_record_batch_iterator,
                            G_TYPE_OBJECT)
 
-#define GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(obj)        \
-  static_cast<GArrowRecordBatchIteratorPrivate *>(           \
-     garrow_record_batch_iterator_get_instance_private(      \
-       GARROW_RECORD_BATCH_ITERATOR(obj)))
+#define GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(obj)                                    \
+  static_cast<GArrowRecordBatchIteratorPrivate *>(                                       \
+    garrow_record_batch_iterator_get_instance_private(                                   \
+      GARROW_RECORD_BATCH_ITERATOR(obj)))
 
 static void
 garrow_record_batch_iterator_finalize(GObject *object)
@@ -574,7 +556,7 @@ static void
 garrow_record_batch_iterator_init(GArrowRecordBatchIterator *object)
 {
   auto priv = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(object);
-  new(&priv->iterator) arrow::RecordBatchIterator;
+  new (&priv->iterator) arrow::RecordBatchIterator;
 }
 
 static void
@@ -582,16 +564,16 @@ garrow_record_batch_iterator_class_init(GArrowRecordBatchIteratorClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_record_batch_iterator_finalize;
+  gobject_class->finalize = garrow_record_batch_iterator_finalize;
   gobject_class->set_property = garrow_record_batch_iterator_set_property;
 
   GParamSpec *spec;
 
-  spec = g_param_spec_pointer("iterator",
-                              "Iterator",
-                              "The raw arrow::RecordBatchIterator",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "iterator",
+    "Iterator",
+    "The raw arrow::RecordBatchIterator",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_ITERATOR, spec);
 }
 
@@ -628,8 +610,7 @@ garrow_record_batch_iterator_new(GList *record_batches)
  * Since: 0.17.0
  */
 GArrowRecordBatch *
-garrow_record_batch_iterator_next(GArrowRecordBatchIterator *iterator,
-                                  GError **error)
+garrow_record_batch_iterator_next(GArrowRecordBatchIterator *iterator, GError **error)
 {
   auto priv = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(iterator);
 
@@ -671,9 +652,8 @@ garrow_record_batch_iterator_equal(GArrowRecordBatchIterator *iterator,
  *
  * Since: 0.17.0
  */
-GList*
-garrow_record_batch_iterator_to_list(GArrowRecordBatchIterator *iterator,
-                                     GError **error)
+GList *
+garrow_record_batch_iterator_to_list(GArrowRecordBatchIterator *iterator, GError **error)
 {
   auto priv = GARROW_RECORD_BATCH_ITERATOR_GET_PRIVATE(iterator);
   GList *record_batches = NULL;
@@ -696,10 +676,8 @@ G_END_DECLS
 GArrowRecordBatch *
 garrow_record_batch_new_raw(std::shared_ptr<arrow::RecordBatch> *arrow_record_batch)
 {
-  auto record_batch =
-    GARROW_RECORD_BATCH(g_object_new(GARROW_TYPE_RECORD_BATCH,
-                                     "record-batch", arrow_record_batch,
-                                     NULL));
+  auto record_batch = GARROW_RECORD_BATCH(
+    g_object_new(GARROW_TYPE_RECORD_BATCH, "record-batch", arrow_record_batch, NULL));
   return record_batch;
 }
 
@@ -713,9 +691,8 @@ garrow_record_batch_get_raw(GArrowRecordBatch *record_batch)
 GArrowRecordBatchIterator *
 garrow_record_batch_iterator_new_raw(arrow::RecordBatchIterator *arrow_iterator)
 {
-  auto iterator = g_object_new(GARROW_TYPE_RECORD_BATCH_ITERATOR,
-                               "iterator", arrow_iterator,
-                               NULL);
+  auto iterator =
+    g_object_new(GARROW_TYPE_RECORD_BATCH_ITERATOR, "iterator", arrow_iterator, NULL);
   return GARROW_RECORD_BATCH_ITERATOR(iterator);
 }
 
diff --git a/c_glib/arrow-glib/record-batch.h b/c_glib/arrow-glib/record-batch.h
index deca3c21b0e62..3c995658224cb 100644
--- a/c_glib/arrow-glib/record-batch.h
+++ b/c_glib/arrow-glib/record-batch.h
@@ -26,11 +26,8 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_RECORD_BATCH (garrow_record_batch_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatch,
-                         garrow_record_batch,
-                         GARROW,
-                         RECORD_BATCH,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowRecordBatch, garrow_record_batch, GARROW, RECORD_BATCH, GObject)
 struct _GArrowRecordBatchClass
 {
   GObjectClass parent_class;
@@ -38,14 +35,13 @@ struct _GArrowRecordBatchClass
 
 GARROW_AVAILABLE_IN_6_0
 GArrowRecordBatch *
-garrow_record_batch_import(gpointer c_abi_array,
-                           GArrowSchema *schema,
-                           GError **error);
+garrow_record_batch_import(gpointer c_abi_array, GArrowSchema *schema, GError **error);
 
-GArrowRecordBatch *garrow_record_batch_new(GArrowSchema *schema,
-                                           guint32 n_rows,
-                                           GList *columns,
-                                           GError **error);
+GArrowRecordBatch *
+garrow_record_batch_new(GArrowSchema *schema,
+                        guint32 n_rows,
+                        GList *columns,
+                        GError **error);
 
 GARROW_AVAILABLE_IN_6_0
 gboolean
@@ -54,45 +50,48 @@ garrow_record_batch_export(GArrowRecordBatch *record_batch,
                            gpointer *c_abi_schema,
                            GError **error);
 
-gboolean garrow_record_batch_equal(GArrowRecordBatch *record_batch,
-                                   GArrowRecordBatch *other_record_batch);
+gboolean
+garrow_record_batch_equal(GArrowRecordBatch *record_batch,
+                          GArrowRecordBatch *other_record_batch);
 GARROW_AVAILABLE_IN_0_17
 gboolean
 garrow_record_batch_equal_metadata(GArrowRecordBatch *record_batch,
                                    GArrowRecordBatch *other_record_batch,
                                    gboolean check_metadata);
 
-GArrowSchema *garrow_record_batch_get_schema     (GArrowRecordBatch *record_batch);
+GArrowSchema *
+garrow_record_batch_get_schema(GArrowRecordBatch *record_batch);
 GARROW_AVAILABLE_IN_0_15
-GArrowArray  *garrow_record_batch_get_column_data(GArrowRecordBatch *record_batch,
-                                                  gint i);
-const gchar  *garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch,
-                                                  gint i);
-guint         garrow_record_batch_get_n_columns  (GArrowRecordBatch *record_batch);
-gint64        garrow_record_batch_get_n_rows     (GArrowRecordBatch *record_batch);
-GArrowRecordBatch *garrow_record_batch_slice     (GArrowRecordBatch *record_batch,
-                                                  gint64 offset,
-                                                  gint64 length);
-
-gchar        *garrow_record_batch_to_string      (GArrowRecordBatch *record_batch,
-                                                  GError **error);
-GArrowRecordBatch *garrow_record_batch_add_column(GArrowRecordBatch *record_batch,
-                                                  guint i,
-                                                  GArrowField *field,
-                                                  GArrowArray *column,
-                                                  GError **error);
-GArrowRecordBatch *garrow_record_batch_remove_column(GArrowRecordBatch *record_batch,
-                                                     guint i,
-                                                     GError **error);
+GArrowArray *
+garrow_record_batch_get_column_data(GArrowRecordBatch *record_batch, gint i);
+const gchar *
+garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch, gint i);
+guint
+garrow_record_batch_get_n_columns(GArrowRecordBatch *record_batch);
+gint64
+garrow_record_batch_get_n_rows(GArrowRecordBatch *record_batch);
+GArrowRecordBatch *
+garrow_record_batch_slice(GArrowRecordBatch *record_batch, gint64 offset, gint64 length);
+
+gchar *
+garrow_record_batch_to_string(GArrowRecordBatch *record_batch, GError **error);
+GArrowRecordBatch *
+garrow_record_batch_add_column(GArrowRecordBatch *record_batch,
+                               guint i,
+                               GArrowField *field,
+                               GArrowArray *column,
+                               GError **error);
+GArrowRecordBatch *
+garrow_record_batch_remove_column(GArrowRecordBatch *record_batch,
+                                  guint i,
+                                  GError **error);
 GARROW_AVAILABLE_IN_1_0
 GArrowBuffer *
 garrow_record_batch_serialize(GArrowRecordBatch *record_batch,
                               GArrowWriteOptions *options,
                               GError **error);
 
-
-#define GARROW_TYPE_RECORD_BATCH_ITERATOR       \
-  (garrow_record_batch_iterator_get_type())
+#define GARROW_TYPE_RECORD_BATCH_ITERATOR (garrow_record_batch_iterator_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchIterator,
                          garrow_record_batch_iterator,
                          GARROW,
@@ -109,8 +108,7 @@ garrow_record_batch_iterator_new(GList *record_batches);
 
 GARROW_AVAILABLE_IN_0_17
 GArrowRecordBatch *
-garrow_record_batch_iterator_next(GArrowRecordBatchIterator *iterator,
-                                  GError **error);
+garrow_record_batch_iterator_next(GArrowRecordBatchIterator *iterator, GError **error);
 
 GARROW_AVAILABLE_IN_0_17
 gboolean
@@ -119,7 +117,6 @@ garrow_record_batch_iterator_equal(GArrowRecordBatchIterator *iterator,
 
 GARROW_AVAILABLE_IN_0_17
 GList *
-garrow_record_batch_iterator_to_list(GArrowRecordBatchIterator *iterator,
-                                     GError **error);
+garrow_record_batch_iterator_to_list(GArrowRecordBatchIterator *iterator, GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/record-batch.hpp b/c_glib/arrow-glib/record-batch.hpp
index 50681100088f0..4c3e5e8a78231 100644
--- a/c_glib/arrow-glib/record-batch.hpp
+++ b/c_glib/arrow-glib/record-batch.hpp
@@ -23,8 +23,10 @@
 
 #include <arrow-glib/record-batch.h>
 
-GArrowRecordBatch *garrow_record_batch_new_raw(std::shared_ptr<arrow::RecordBatch> *arrow_record_batch);
-std::shared_ptr<arrow::RecordBatch> garrow_record_batch_get_raw(GArrowRecordBatch *record_batch);
+GArrowRecordBatch *
+garrow_record_batch_new_raw(std::shared_ptr<arrow::RecordBatch> *arrow_record_batch);
+std::shared_ptr<arrow::RecordBatch>
+garrow_record_batch_get_raw(GArrowRecordBatch *record_batch);
 
 GArrowRecordBatchIterator *
 garrow_record_batch_iterator_new_raw(arrow::RecordBatchIterator *arrow_iterator);
diff --git a/c_glib/arrow-glib/scalar.cpp b/c_glib/arrow-glib/scalar.cpp
index c45fca81c0f13..def6b15148355 100644
--- a/c_glib/arrow-glib/scalar.cpp
+++ b/c_glib/arrow-glib/scalar.cpp
@@ -130,7 +130,8 @@ G_BEGIN_DECLS
  * scalar.
  */
 
-typedef struct GArrowScalarPrivate_ {
+typedef struct GArrowScalarPrivate_
+{
   std::shared_ptr<arrow::Scalar> scalar;
   GArrowDataType *data_type;
 } GArrowScalarPrivate;
@@ -140,14 +141,11 @@ enum {
   PROP_DATA_TYPE,
 };
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowScalar,
-                                    garrow_scalar,
-                                    G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowScalar, garrow_scalar, G_TYPE_OBJECT)
 
-#define GARROW_SCALAR_GET_PRIVATE(obj)            \
-  static_cast<GArrowScalarPrivate *>(             \
-    garrow_scalar_get_instance_private(           \
-      GARROW_SCALAR(obj)))
+#define GARROW_SCALAR_GET_PRIVATE(obj)                                                   \
+  static_cast<GArrowScalarPrivate *>(                                                    \
+    garrow_scalar_get_instance_private(GARROW_SCALAR(obj)))
 
 static void
 garrow_scalar_dispose(GObject *object)
@@ -198,7 +196,7 @@ static void
 garrow_scalar_init(GArrowScalar *object)
 {
   auto priv = GARROW_SCALAR_GET_PRIVATE(object);
-  new(&priv->scalar) std::shared_ptr<arrow::Scalar>;
+  new (&priv->scalar) std::shared_ptr<arrow::Scalar>;
 }
 
 static void
@@ -206,16 +204,16 @@ garrow_scalar_class_init(GArrowScalarClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_scalar_dispose;
-  gobject_class->finalize     = garrow_scalar_finalize;
+  gobject_class->dispose = garrow_scalar_dispose;
+  gobject_class->finalize = garrow_scalar_finalize;
   gobject_class->set_property = garrow_scalar_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("scalar",
-                              "Scalar",
-                              "The raw std::shared<arrow::Scalar> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "scalar",
+    "Scalar",
+    "The raw std::shared<arrow::Scalar> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_SCALAR, spec);
 
   /**
@@ -225,12 +223,12 @@ garrow_scalar_class_init(GArrowScalarClass *klass)
    *
    * Since: 5.0.0
    */
-  spec = g_param_spec_object("data-type",
-                             "Data type",
-                             "The data type of the scalar",
-                             GARROW_TYPE_DATA_TYPE,
-                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "data-type",
+    "Data type",
+    "The data type of the scalar",
+    GARROW_TYPE_DATA_TYPE,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_DATA_TYPE, spec);
 }
 
@@ -254,14 +252,15 @@ garrow_scalar_parse(GArrowDataType *data_type,
                     GError **error)
 {
   const auto arrow_data_type = garrow_data_type_get_raw(data_type);
-  auto arrow_data = std::string_view(reinterpret_cast<const char *>(data),
-                                     size);
+  auto arrow_data = std::string_view(reinterpret_cast<const char *>(data), size);
   auto arrow_scalar_result = arrow::Scalar::Parse(arrow_data_type, arrow_data);
   if (garrow::check(error, arrow_scalar_result, "[scalar][parse]")) {
     auto arrow_scalar = *arrow_scalar_result;
     return garrow_scalar_new_raw(&arrow_scalar,
-                                 "scalar", &arrow_scalar,
-                                 "data-type", data_type,
+                                 "scalar",
+                                 &arrow_scalar,
+                                 "data-type",
+                                 data_type,
                                  NULL);
   } else {
     return NULL;
@@ -312,8 +311,7 @@ garrow_scalar_is_valid(GArrowScalar *scalar)
  * Since: 5.0.0
  */
 gboolean
-garrow_scalar_equal(GArrowScalar *scalar,
-                    GArrowScalar *other_scalar)
+garrow_scalar_equal(GArrowScalar *scalar, GArrowScalar *other_scalar)
 {
   return garrow_scalar_equal_options(scalar, other_scalar, NULL);
 }
@@ -391,18 +389,17 @@ garrow_scalar_cast(GArrowScalar *scalar,
   if (garrow::check(error, arrow_casted_scalar_result, "[scalar][cast]")) {
     auto arrow_casted_scalar = (*arrow_casted_scalar_result).scalar();
     return garrow_scalar_new_raw(&arrow_casted_scalar,
-                                 "scalar", &arrow_casted_scalar,
-                                 "data-type", data_type,
+                                 "scalar",
+                                 &arrow_casted_scalar,
+                                 "data-type",
+                                 data_type,
                                  NULL);
   } else {
     return NULL;
   }
 }
 
-
-G_DEFINE_TYPE(GArrowNullScalar,
-              garrow_null_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowNullScalar, garrow_null_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_null_scalar_init(GArrowNullScalar *object)
@@ -425,15 +422,11 @@ GArrowNullScalar *
 garrow_null_scalar_new(void)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::NullScalar>());
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::NullScalar>());
   return GARROW_NULL_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
-
-G_DEFINE_TYPE(GArrowBooleanScalar,
-              garrow_boolean_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowBooleanScalar, garrow_boolean_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_boolean_scalar_init(GArrowBooleanScalar *object)
@@ -456,9 +449,8 @@ garrow_boolean_scalar_class_init(GArrowBooleanScalarClass *klass)
 GArrowBooleanScalar *
 garrow_boolean_scalar_new(gboolean value)
 {
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::BooleanScalar>(value));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::BooleanScalar>(value));
   return GARROW_BOOLEAN_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -473,16 +465,12 @@ garrow_boolean_scalar_new(gboolean value)
 gboolean
 garrow_boolean_scalar_get_value(GArrowBooleanScalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::BooleanScalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::BooleanScalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowInt8Scalar,
-              garrow_int8_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowInt8Scalar, garrow_int8_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_int8_scalar_init(GArrowInt8Scalar *object)
@@ -506,8 +494,7 @@ GArrowInt8Scalar *
 garrow_int8_scalar_new(gint8 value)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::Int8Scalar>(value));
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::Int8Scalar>(value));
   return GARROW_INT8_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -522,16 +509,12 @@ garrow_int8_scalar_new(gint8 value)
 gint8
 garrow_int8_scalar_get_value(GArrowInt8Scalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::Int8Scalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::Int8Scalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowInt16Scalar,
-              garrow_int16_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowInt16Scalar, garrow_int16_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_int16_scalar_init(GArrowInt16Scalar *object)
@@ -555,8 +538,7 @@ GArrowInt16Scalar *
 garrow_int16_scalar_new(gint16 value)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::Int16Scalar>(value));
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::Int16Scalar>(value));
   return GARROW_INT16_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -571,16 +553,12 @@ garrow_int16_scalar_new(gint16 value)
 gint16
 garrow_int16_scalar_get_value(GArrowInt16Scalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::Int16Scalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::Int16Scalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowInt32Scalar,
-              garrow_int32_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowInt32Scalar, garrow_int32_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_int32_scalar_init(GArrowInt32Scalar *object)
@@ -604,8 +582,7 @@ GArrowInt32Scalar *
 garrow_int32_scalar_new(gint32 value)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::Int32Scalar>(value));
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::Int32Scalar>(value));
   return GARROW_INT32_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -620,16 +597,12 @@ garrow_int32_scalar_new(gint32 value)
 gint32
 garrow_int32_scalar_get_value(GArrowInt32Scalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::Int32Scalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::Int32Scalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowInt64Scalar,
-              garrow_int64_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowInt64Scalar, garrow_int64_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_int64_scalar_init(GArrowInt64Scalar *object)
@@ -653,8 +626,7 @@ GArrowInt64Scalar *
 garrow_int64_scalar_new(gint64 value)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::Int64Scalar>(value));
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::Int64Scalar>(value));
   return GARROW_INT64_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -669,16 +641,12 @@ garrow_int64_scalar_new(gint64 value)
 gint64
 garrow_int64_scalar_get_value(GArrowInt64Scalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::Int64Scalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::Int64Scalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowUInt8Scalar,
-              garrow_uint8_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowUInt8Scalar, garrow_uint8_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_uint8_scalar_init(GArrowUInt8Scalar *object)
@@ -702,8 +670,7 @@ GArrowUInt8Scalar *
 garrow_uint8_scalar_new(guint8 value)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::UInt8Scalar>(value));
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::UInt8Scalar>(value));
   return GARROW_UINT8_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -718,16 +685,12 @@ garrow_uint8_scalar_new(guint8 value)
 guint8
 garrow_uint8_scalar_get_value(GArrowUInt8Scalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::UInt8Scalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::UInt8Scalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowUInt16Scalar,
-              garrow_uint16_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowUInt16Scalar, garrow_uint16_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_uint16_scalar_init(GArrowUInt16Scalar *object)
@@ -751,8 +714,7 @@ GArrowUInt16Scalar *
 garrow_uint16_scalar_new(guint16 value)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::UInt16Scalar>(value));
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::UInt16Scalar>(value));
   return GARROW_UINT16_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -767,16 +729,12 @@ garrow_uint16_scalar_new(guint16 value)
 guint16
 garrow_uint16_scalar_get_value(GArrowUInt16Scalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::UInt16Scalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::UInt16Scalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowUInt32Scalar,
-              garrow_uint32_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowUInt32Scalar, garrow_uint32_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_uint32_scalar_init(GArrowUInt32Scalar *object)
@@ -800,8 +758,7 @@ GArrowUInt32Scalar *
 garrow_uint32_scalar_new(guint32 value)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::UInt32Scalar>(value));
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::UInt32Scalar>(value));
   return GARROW_UINT32_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -816,16 +773,12 @@ garrow_uint32_scalar_new(guint32 value)
 guint32
 garrow_uint32_scalar_get_value(GArrowUInt32Scalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::UInt32Scalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::UInt32Scalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowUInt64Scalar,
-              garrow_uint64_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowUInt64Scalar, garrow_uint64_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_uint64_scalar_init(GArrowUInt64Scalar *object)
@@ -849,8 +802,7 @@ GArrowUInt64Scalar *
 garrow_uint64_scalar_new(guint64 value)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::UInt64Scalar>(value));
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::UInt64Scalar>(value));
   return GARROW_UINT64_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -865,16 +817,12 @@ garrow_uint64_scalar_new(guint64 value)
 guint64
 garrow_uint64_scalar_get_value(GArrowUInt64Scalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::UInt64Scalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::UInt64Scalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowHalfFloatScalar,
-              garrow_half_float_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowHalfFloatScalar, garrow_half_float_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_half_float_scalar_init(GArrowHalfFloatScalar *object)
@@ -897,9 +845,8 @@ garrow_half_float_scalar_class_init(GArrowHalfFloatScalarClass *klass)
 GArrowHalfFloatScalar *
 garrow_half_float_scalar_new(guint16 value)
 {
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::HalfFloatScalar>(value));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::HalfFloatScalar>(value));
   return GARROW_HALF_FLOAT_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -914,16 +861,12 @@ garrow_half_float_scalar_new(guint16 value)
 guint16
 garrow_half_float_scalar_get_value(GArrowHalfFloatScalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::HalfFloatScalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::HalfFloatScalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowFloatScalar,
-              garrow_float_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowFloatScalar, garrow_float_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_float_scalar_init(GArrowFloatScalar *object)
@@ -947,8 +890,7 @@ GArrowFloatScalar *
 garrow_float_scalar_new(gfloat value)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::FloatScalar>(value));
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::FloatScalar>(value));
   return GARROW_FLOAT_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -963,16 +905,12 @@ garrow_float_scalar_new(gfloat value)
 gfloat
 garrow_float_scalar_get_value(GArrowFloatScalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::FloatScalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::FloatScalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowDoubleScalar,
-              garrow_double_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowDoubleScalar, garrow_double_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_double_scalar_init(GArrowDoubleScalar *object)
@@ -996,8 +934,7 @@ GArrowDoubleScalar *
 garrow_double_scalar_new(gdouble value)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::DoubleScalar>(value));
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::DoubleScalar>(value));
   return GARROW_DOUBLE_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -1012,14 +949,13 @@ garrow_double_scalar_new(gdouble value)
 gdouble
 garrow_double_scalar_get_value(GArrowDoubleScalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::DoubleScalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::DoubleScalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-typedef struct GArrowBaseBinaryScalarPrivate_ {
+typedef struct GArrowBaseBinaryScalarPrivate_
+{
   GArrowBuffer *value;
 } GArrowBaseBinaryScalarPrivate;
 
@@ -1031,10 +967,9 @@ G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowBaseBinaryScalar,
                                     garrow_base_binary_scalar,
                                     GARROW_TYPE_SCALAR)
 
-#define GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(obj)            \
-  static_cast<GArrowBaseBinaryScalarPrivate *>(               \
-    garrow_base_binary_scalar_get_instance_private(           \
-      GARROW_BASE_BINARY_SCALAR(obj)))
+#define GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(obj)                                       \
+  static_cast<GArrowBaseBinaryScalarPrivate *>(                                          \
+    garrow_base_binary_scalar_get_instance_private(GARROW_BASE_BINARY_SCALAR(obj)))
 
 static void
 garrow_base_binary_scalar_dispose(GObject *object)
@@ -1076,7 +1011,7 @@ static void
 garrow_base_binary_scalar_class_init(GArrowBaseBinaryScalarClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->dispose      = garrow_base_binary_scalar_dispose;
+  gobject_class->dispose = garrow_base_binary_scalar_dispose;
   gobject_class->set_property = garrow_base_binary_scalar_set_property;
 
   GParamSpec *spec;
@@ -1087,27 +1022,28 @@ garrow_base_binary_scalar_class_init(GArrowBaseBinaryScalarClass *klass)
    *
    * Since: 5.0.0
    */
-  spec = g_param_spec_object("value",
-                             "Value",
-                             "The value of the scalar",
-                             GARROW_TYPE_BUFFER,
-                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "value",
+    "Value",
+    "The value of the scalar",
+    GARROW_TYPE_BUFFER,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUE, spec);
 }
 
 G_END_DECLS
-template<typename ArrowBinaryScalarType>
+template <typename ArrowBinaryScalarType>
 GArrowScalar *
 garrow_base_binary_scalar_new(GArrowBuffer *value)
 {
   auto arrow_value = garrow_buffer_get_raw(value);
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<ArrowBinaryScalarType>(arrow_value));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<ArrowBinaryScalarType>(arrow_value));
   return garrow_scalar_new_raw(&arrow_scalar,
-                               "scalar", &arrow_scalar,
-                               "value", value,
+                               "scalar",
+                               &arrow_scalar,
+                               "value",
+                               value,
                                NULL);
 }
 G_BEGIN_DECLS
@@ -1125,18 +1061,14 @@ garrow_base_binary_scalar_get_value(GArrowBaseBinaryScalar *scalar)
 {
   auto priv = GARROW_BASE_BINARY_SCALAR_GET_PRIVATE(scalar);
   if (!priv->value) {
-    const auto arrow_scalar =
-      std::static_pointer_cast<arrow::BaseBinaryScalar>(
-        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    const auto arrow_scalar = std::static_pointer_cast<arrow::BaseBinaryScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
     priv->value = garrow_buffer_new_raw(&(arrow_scalar->value));
   }
   return priv->value;
 }
 
-
-G_DEFINE_TYPE(GArrowBinaryScalar,
-              garrow_binary_scalar,
-              GARROW_TYPE_BASE_BINARY_SCALAR)
+G_DEFINE_TYPE(GArrowBinaryScalar, garrow_binary_scalar, GARROW_TYPE_BASE_BINARY_SCALAR)
 
 static void
 garrow_binary_scalar_init(GArrowBinaryScalar *object)
@@ -1159,14 +1091,10 @@ garrow_binary_scalar_class_init(GArrowBinaryScalarClass *klass)
 GArrowBinaryScalar *
 garrow_binary_scalar_new(GArrowBuffer *value)
 {
-  return GARROW_BINARY_SCALAR(
-    garrow_base_binary_scalar_new<arrow::BinaryScalar>(value));
+  return GARROW_BINARY_SCALAR(garrow_base_binary_scalar_new<arrow::BinaryScalar>(value));
 }
 
-
-G_DEFINE_TYPE(GArrowStringScalar,
-              garrow_string_scalar,
-              GARROW_TYPE_BASE_BINARY_SCALAR)
+G_DEFINE_TYPE(GArrowStringScalar, garrow_string_scalar, GARROW_TYPE_BASE_BINARY_SCALAR)
 
 static void
 garrow_string_scalar_init(GArrowStringScalar *object)
@@ -1189,11 +1117,9 @@ garrow_string_scalar_class_init(GArrowStringScalarClass *klass)
 GArrowStringScalar *
 garrow_string_scalar_new(GArrowBuffer *value)
 {
-  return GARROW_STRING_SCALAR(
-    garrow_base_binary_scalar_new<arrow::StringScalar>(value));
+  return GARROW_STRING_SCALAR(garrow_base_binary_scalar_new<arrow::StringScalar>(value));
 }
 
-
 G_DEFINE_TYPE(GArrowLargeBinaryScalar,
               garrow_large_binary_scalar,
               GARROW_TYPE_BASE_BINARY_SCALAR)
@@ -1223,7 +1149,6 @@ garrow_large_binary_scalar_new(GArrowBuffer *value)
     garrow_base_binary_scalar_new<arrow::LargeBinaryScalar>(value));
 }
 
-
 G_DEFINE_TYPE(GArrowLargeStringScalar,
               garrow_large_string_scalar,
               GARROW_TYPE_BASE_BINARY_SCALAR)
@@ -1253,7 +1178,6 @@ garrow_large_string_scalar_new(GArrowBuffer *value)
     garrow_base_binary_scalar_new<arrow::LargeStringScalar>(value));
 }
 
-
 G_DEFINE_TYPE(GArrowFixedSizeBinaryScalar,
               garrow_fixed_size_binary_scalar,
               GARROW_TYPE_BASE_BINARY_SCALAR)
@@ -1264,8 +1188,7 @@ garrow_fixed_size_binary_scalar_init(GArrowFixedSizeBinaryScalar *object)
 }
 
 static void
-garrow_fixed_size_binary_scalar_class_init(
-  GArrowFixedSizeBinaryScalarClass *klass)
+garrow_fixed_size_binary_scalar_class_init(GArrowFixedSizeBinaryScalarClass *klass)
 {
 }
 
@@ -1284,22 +1207,19 @@ garrow_fixed_size_binary_scalar_new(GArrowFixedSizeBinaryDataType *data_type,
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   auto arrow_value = garrow_buffer_get_raw(value);
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::FixedSizeBinaryScalar>(
-        arrow_value, arrow_data_type));
-  return GARROW_FIXED_SIZE_BINARY_SCALAR(
-    garrow_scalar_new_raw(&arrow_scalar,
-                          "scalar", &arrow_scalar,
-                          "data-type", data_type,
-                          "value", value,
-                          NULL));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::FixedSizeBinaryScalar>(arrow_value, arrow_data_type));
+  return GARROW_FIXED_SIZE_BINARY_SCALAR(garrow_scalar_new_raw(&arrow_scalar,
+                                                               "scalar",
+                                                               &arrow_scalar,
+                                                               "data-type",
+                                                               data_type,
+                                                               "value",
+                                                               value,
+                                                               NULL));
 }
 
-
-G_DEFINE_TYPE(GArrowDate32Scalar,
-              garrow_date32_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowDate32Scalar, garrow_date32_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_date32_scalar_init(GArrowDate32Scalar *object)
@@ -1323,8 +1243,7 @@ GArrowDate32Scalar *
 garrow_date32_scalar_new(gint32 value)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::Date32Scalar>(value));
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::Date32Scalar>(value));
   return GARROW_DATE32_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -1339,16 +1258,12 @@ garrow_date32_scalar_new(gint32 value)
 gint32
 garrow_date32_scalar_get_value(GArrowDate32Scalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::Date32Scalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::Date32Scalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowDate64Scalar,
-              garrow_date64_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowDate64Scalar, garrow_date64_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_date64_scalar_init(GArrowDate64Scalar *object)
@@ -1372,8 +1287,7 @@ GArrowDate64Scalar *
 garrow_date64_scalar_new(gint64 value)
 {
   auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::Date64Scalar>(value));
+    std::static_pointer_cast<arrow::Scalar>(std::make_shared<arrow::Date64Scalar>(value));
   return GARROW_DATE64_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -1388,16 +1302,12 @@ garrow_date64_scalar_new(gint64 value)
 gint64
 garrow_date64_scalar_get_value(GArrowDate64Scalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::Date64Scalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::Date64Scalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowTime32Scalar,
-              garrow_time32_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowTime32Scalar, garrow_time32_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_time32_scalar_init(GArrowTime32Scalar *object)
@@ -1419,18 +1329,17 @@ garrow_time32_scalar_class_init(GArrowTime32ScalarClass *klass)
  * Since: 5.0.0
  */
 GArrowTime32Scalar *
-garrow_time32_scalar_new(GArrowTime32DataType *data_type,
-                         gint32 value)
+garrow_time32_scalar_new(GArrowTime32DataType *data_type, gint32 value)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::Time32Scalar>(value, arrow_data_type));
-  return GARROW_TIME32_SCALAR(
-    garrow_scalar_new_raw(&arrow_scalar,
-                          "scalar", &arrow_scalar,
-                          "data-type", data_type,
-                          NULL));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::Time32Scalar>(value, arrow_data_type));
+  return GARROW_TIME32_SCALAR(garrow_scalar_new_raw(&arrow_scalar,
+                                                    "scalar",
+                                                    &arrow_scalar,
+                                                    "data-type",
+                                                    data_type,
+                                                    NULL));
 }
 
 /**
@@ -1444,16 +1353,12 @@ garrow_time32_scalar_new(GArrowTime32DataType *data_type,
 gint32
 garrow_time32_scalar_get_value(GArrowTime32Scalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::Time32Scalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::Time32Scalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowTime64Scalar,
-              garrow_time64_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowTime64Scalar, garrow_time64_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_time64_scalar_init(GArrowTime64Scalar *object)
@@ -1475,18 +1380,17 @@ garrow_time64_scalar_class_init(GArrowTime64ScalarClass *klass)
  * Since: 5.0.0
  */
 GArrowTime64Scalar *
-garrow_time64_scalar_new(GArrowTime64DataType *data_type,
-                         gint64 value)
+garrow_time64_scalar_new(GArrowTime64DataType *data_type, gint64 value)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::Time64Scalar>(value, arrow_data_type));
-  return GARROW_TIME64_SCALAR(
-    garrow_scalar_new_raw(&arrow_scalar,
-                          "scalar", &arrow_scalar,
-                          "data-type", data_type,
-                          NULL));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::Time64Scalar>(value, arrow_data_type));
+  return GARROW_TIME64_SCALAR(garrow_scalar_new_raw(&arrow_scalar,
+                                                    "scalar",
+                                                    &arrow_scalar,
+                                                    "data-type",
+                                                    data_type,
+                                                    NULL));
 }
 
 /**
@@ -1500,16 +1404,12 @@ garrow_time64_scalar_new(GArrowTime64DataType *data_type,
 gint64
 garrow_time64_scalar_get_value(GArrowTime64Scalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::Time64Scalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::Time64Scalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowTimestampScalar,
-              garrow_timestamp_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowTimestampScalar, garrow_timestamp_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_timestamp_scalar_init(GArrowTimestampScalar *object)
@@ -1531,18 +1431,17 @@ garrow_timestamp_scalar_class_init(GArrowTimestampScalarClass *klass)
  * Since: 5.0.0
  */
 GArrowTimestampScalar *
-garrow_timestamp_scalar_new(GArrowTimestampDataType *data_type,
-                            gint64 value)
+garrow_timestamp_scalar_new(GArrowTimestampDataType *data_type, gint64 value)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::TimestampScalar>(value, arrow_data_type));
-  return GARROW_TIMESTAMP_SCALAR(
-    garrow_scalar_new_raw(&arrow_scalar,
-                          "scalar", &arrow_scalar,
-                          "data-type", data_type,
-                          NULL));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::TimestampScalar>(value, arrow_data_type));
+  return GARROW_TIMESTAMP_SCALAR(garrow_scalar_new_raw(&arrow_scalar,
+                                                       "scalar",
+                                                       &arrow_scalar,
+                                                       "data-type",
+                                                       data_type,
+                                                       NULL));
 }
 
 /**
@@ -1556,16 +1455,12 @@ garrow_timestamp_scalar_new(GArrowTimestampDataType *data_type,
 gint64
 garrow_timestamp_scalar_get_value(GArrowTimestampScalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::TimestampScalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::TimestampScalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-G_DEFINE_TYPE(GArrowMonthIntervalScalar,
-              garrow_month_interval_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowMonthIntervalScalar, garrow_month_interval_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_month_interval_scalar_init(GArrowMonthIntervalScalar *object)
@@ -1588,9 +1483,8 @@ garrow_month_interval_scalar_class_init(GArrowMonthIntervalScalarClass *klass)
 GArrowMonthIntervalScalar *
 garrow_month_interval_scalar_new(gint32 value)
 {
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::MonthIntervalScalar>(value));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::MonthIntervalScalar>(value));
   return GARROW_MONTH_INTERVAL_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -1605,14 +1499,13 @@ garrow_month_interval_scalar_new(gint32 value)
 gint32
 garrow_month_interval_scalar_get_value(GArrowMonthIntervalScalar *scalar)
 {
-  const auto arrow_scalar =
-    std::static_pointer_cast<arrow::MonthIntervalScalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto arrow_scalar = std::static_pointer_cast<arrow::MonthIntervalScalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->value;
 }
 
-
-typedef struct GArrowDayTimeIntervalScalarPrivate_ {
+typedef struct GArrowDayTimeIntervalScalarPrivate_
+{
   GArrowDayMillisecond *value;
 } GArrowDayTimeIntervalScalarPrivate;
 
@@ -1620,9 +1513,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowDayTimeIntervalScalar,
                            garrow_day_time_interval_scalar,
                            GARROW_TYPE_SCALAR)
 
-#define GARROW_DAY_TIME_INTERVAL_SCALAR_GET_PRIVATE(obj)         \
-  static_cast<GArrowDayTimeIntervalScalarPrivate *>(             \
-    garrow_day_time_interval_scalar_get_instance_private(        \
+#define GARROW_DAY_TIME_INTERVAL_SCALAR_GET_PRIVATE(obj)                                 \
+  static_cast<GArrowDayTimeIntervalScalarPrivate *>(                                     \
+    garrow_day_time_interval_scalar_get_instance_private(                                \
       GARROW_DAY_TIME_INTERVAL_SCALAR(obj)))
 
 static void
@@ -1631,8 +1524,7 @@ garrow_day_time_interval_scalar_init(GArrowDayTimeIntervalScalar *object)
 }
 
 static void
-garrow_day_time_interval_scalar_class_init(
-  GArrowDayTimeIntervalScalarClass *klass)
+garrow_day_time_interval_scalar_class_init(GArrowDayTimeIntervalScalarClass *klass)
 {
 }
 
@@ -1648,9 +1540,8 @@ GArrowDayTimeIntervalScalar *
 garrow_day_time_interval_scalar_new(GArrowDayMillisecond *value)
 {
   auto arrow_value = garrow_day_millisecond_get_raw(value);
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::DayTimeIntervalScalar>(*arrow_value));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::DayTimeIntervalScalar>(*arrow_value));
   return GARROW_DAY_TIME_INTERVAL_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
@@ -1667,9 +1558,8 @@ garrow_day_time_interval_scalar_get_value(GArrowDayTimeIntervalScalar *scalar)
 {
   auto priv = GARROW_DAY_TIME_INTERVAL_SCALAR_GET_PRIVATE(scalar);
   if (!priv->value) {
-    auto arrow_scalar =
-      std::static_pointer_cast<arrow::DayTimeIntervalScalar>(
-        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    auto arrow_scalar = std::static_pointer_cast<arrow::DayTimeIntervalScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
     auto arrow_value = arrow_scalar->value;
     priv->value = garrow_day_millisecond_new_raw(&arrow_value);
   }
@@ -1677,8 +1567,8 @@ garrow_day_time_interval_scalar_get_value(GArrowDayTimeIntervalScalar *scalar)
   return priv->value;
 }
 
-
-typedef struct GArrowMonthDayNanoIntervalScalarPrivate_ {
+typedef struct GArrowMonthDayNanoIntervalScalarPrivate_
+{
   GArrowMonthDayNano *value;
 } GArrowMonthDayNanoIntervalScalarPrivate;
 
@@ -1686,14 +1576,13 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowMonthDayNanoIntervalScalar,
                            garrow_month_day_nano_interval_scalar,
                            GARROW_TYPE_SCALAR)
 
-#define GARROW_MONTH_DAY_NANO_INTERVAL_SCALAR_GET_PRIVATE(obj)         \
-  static_cast<GArrowMonthDayNanoIntervalScalarPrivate *>(              \
-    garrow_month_day_nano_interval_scalar_get_instance_private(        \
+#define GARROW_MONTH_DAY_NANO_INTERVAL_SCALAR_GET_PRIVATE(obj)                           \
+  static_cast<GArrowMonthDayNanoIntervalScalarPrivate *>(                                \
+    garrow_month_day_nano_interval_scalar_get_instance_private(                          \
       GARROW_MONTH_DAY_NANO_INTERVAL_SCALAR(obj)))
 
 static void
-garrow_month_day_nano_interval_scalar_init(
-  GArrowMonthDayNanoIntervalScalar *object)
+garrow_month_day_nano_interval_scalar_init(GArrowMonthDayNanoIntervalScalar *object)
 {
 }
 
@@ -1715,11 +1604,9 @@ GArrowMonthDayNanoIntervalScalar *
 garrow_month_day_nano_interval_scalar_new(GArrowMonthDayNano *value)
 {
   auto arrow_value = garrow_month_day_nano_get_raw(value);
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::MonthDayNanoIntervalScalar>(*arrow_value));
-  return GARROW_MONTH_DAY_NANO_INTERVAL_SCALAR(
-    garrow_scalar_new_raw(&arrow_scalar));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::MonthDayNanoIntervalScalar>(*arrow_value));
+  return GARROW_MONTH_DAY_NANO_INTERVAL_SCALAR(garrow_scalar_new_raw(&arrow_scalar));
 }
 
 /**
@@ -1735,17 +1622,16 @@ garrow_month_day_nano_interval_scalar_get_value(GArrowMonthDayNanoIntervalScalar
 {
   auto priv = GARROW_MONTH_DAY_NANO_INTERVAL_SCALAR_GET_PRIVATE(scalar);
   if (!priv->value) {
-    auto arrow_scalar =
-      std::static_pointer_cast<arrow::MonthDayNanoIntervalScalar>(
-        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    auto arrow_scalar = std::static_pointer_cast<arrow::MonthDayNanoIntervalScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
     priv->value = garrow_month_day_nano_new_raw(&arrow_scalar->value);
   }
 
   return priv->value;
 }
 
-
-typedef struct GArrowDecimal128ScalarPrivate_ {
+typedef struct GArrowDecimal128ScalarPrivate_
+{
   GArrowDecimal128 *value;
 } GArrowDecimal128ScalarPrivate;
 
@@ -1753,10 +1639,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal128Scalar,
                            garrow_decimal128_scalar,
                            GARROW_TYPE_SCALAR)
 
-#define GARROW_DECIMAL128_SCALAR_GET_PRIVATE(obj)            \
-  static_cast<GArrowDecimal128ScalarPrivate *>(              \
-    garrow_decimal128_scalar_get_instance_private(           \
-      GARROW_DECIMAL128_SCALAR(obj)))
+#define GARROW_DECIMAL128_SCALAR_GET_PRIVATE(obj)                                        \
+  static_cast<GArrowDecimal128ScalarPrivate *>(                                          \
+    garrow_decimal128_scalar_get_instance_private(GARROW_DECIMAL128_SCALAR(obj)))
 
 static void
 garrow_decimal128_scalar_dispose(GObject *object)
@@ -1799,7 +1684,7 @@ garrow_decimal128_scalar_class_init(GArrowDecimal128ScalarClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_decimal128_scalar_dispose;
+  gobject_class->dispose = garrow_decimal128_scalar_dispose;
   gobject_class->set_property = garrow_decimal128_scalar_set_property;
 
   GParamSpec *spec;
@@ -1810,12 +1695,12 @@ garrow_decimal128_scalar_class_init(GArrowDecimal128ScalarClass *klass)
    *
    * Since: 5.0.0
    */
-  spec = g_param_spec_object("value",
-                             "Value",
-                             "The value of the scalar",
-                             garrow_decimal128_get_type(),
-                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "value",
+    "Value",
+    "The value of the scalar",
+    garrow_decimal128_get_type(),
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUE, spec);
 }
 
@@ -1829,20 +1714,20 @@ garrow_decimal128_scalar_class_init(GArrowDecimal128ScalarClass *klass)
  * Since: 5.0.0
  */
 GArrowDecimal128Scalar *
-garrow_decimal128_scalar_new(GArrowDecimal128DataType *data_type,
-                             GArrowDecimal128 *value)
+garrow_decimal128_scalar_new(GArrowDecimal128DataType *data_type, GArrowDecimal128 *value)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   auto arrow_value = garrow_decimal128_get_raw(value);
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::Decimal128Scalar>(*arrow_value, arrow_data_type));
-  return GARROW_DECIMAL128_SCALAR(
-    garrow_scalar_new_raw(&arrow_scalar,
-                          "scalar", &arrow_scalar,
-                          "data-type", data_type,
-                          "value", value,
-                          NULL));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::Decimal128Scalar>(*arrow_value, arrow_data_type));
+  return GARROW_DECIMAL128_SCALAR(garrow_scalar_new_raw(&arrow_scalar,
+                                                        "scalar",
+                                                        &arrow_scalar,
+                                                        "data-type",
+                                                        data_type,
+                                                        "value",
+                                                        value,
+                                                        NULL));
 }
 
 /**
@@ -1858,17 +1743,16 @@ garrow_decimal128_scalar_get_value(GArrowDecimal128Scalar *scalar)
 {
   auto priv = GARROW_DECIMAL128_SCALAR_GET_PRIVATE(scalar);
   if (!priv->value) {
-    auto arrow_scalar =
-      std::static_pointer_cast<arrow::Decimal128Scalar>(
-        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    auto arrow_scalar = std::static_pointer_cast<arrow::Decimal128Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
     auto arrow_value = std::make_shared<arrow::Decimal128>(arrow_scalar->value);
     priv->value = garrow_decimal128_new_raw(&arrow_value);
   }
   return priv->value;
 }
 
-
-typedef struct GArrowDecimal256ScalarPrivate_ {
+typedef struct GArrowDecimal256ScalarPrivate_
+{
   GArrowDecimal256 *value;
 } GArrowDecimal256ScalarPrivate;
 
@@ -1876,10 +1760,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowDecimal256Scalar,
                            garrow_decimal256_scalar,
                            GARROW_TYPE_SCALAR)
 
-#define GARROW_DECIMAL256_SCALAR_GET_PRIVATE(obj)            \
-  static_cast<GArrowDecimal256ScalarPrivate *>(              \
-    garrow_decimal256_scalar_get_instance_private(           \
-      GARROW_DECIMAL256_SCALAR(obj)))
+#define GARROW_DECIMAL256_SCALAR_GET_PRIVATE(obj)                                        \
+  static_cast<GArrowDecimal256ScalarPrivate *>(                                          \
+    garrow_decimal256_scalar_get_instance_private(GARROW_DECIMAL256_SCALAR(obj)))
 
 static void
 garrow_decimal256_scalar_dispose(GObject *object)
@@ -1922,7 +1805,7 @@ garrow_decimal256_scalar_class_init(GArrowDecimal256ScalarClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_decimal256_scalar_dispose;
+  gobject_class->dispose = garrow_decimal256_scalar_dispose;
   gobject_class->set_property = garrow_decimal256_scalar_set_property;
 
   GParamSpec *spec;
@@ -1933,12 +1816,12 @@ garrow_decimal256_scalar_class_init(GArrowDecimal256ScalarClass *klass)
    *
    * Since: 5.0.0
    */
-  spec = g_param_spec_object("value",
-                             "Value",
-                             "The value of the scalar",
-                             garrow_decimal256_get_type(),
-                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "value",
+    "Value",
+    "The value of the scalar",
+    garrow_decimal256_get_type(),
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUE, spec);
 }
 
@@ -1952,18 +1835,19 @@ garrow_decimal256_scalar_class_init(GArrowDecimal256ScalarClass *klass)
  * Since: 5.0.0
  */
 GArrowDecimal256Scalar *
-garrow_decimal256_scalar_new(GArrowDecimal256DataType *data_type,
-                             GArrowDecimal256 *value)
+garrow_decimal256_scalar_new(GArrowDecimal256DataType *data_type, GArrowDecimal256 *value)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   auto arrow_value = garrow_decimal256_get_raw(value);
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::Decimal256Scalar>(*arrow_value, arrow_data_type));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::Decimal256Scalar>(*arrow_value, arrow_data_type));
   return GARROW_DECIMAL256_SCALAR(garrow_scalar_new_raw(&arrow_scalar,
-                                                        "scalar", &arrow_scalar,
-                                                        "data-type", data_type,
-                                                        "value", value,
+                                                        "scalar",
+                                                        &arrow_scalar,
+                                                        "data-type",
+                                                        data_type,
+                                                        "value",
+                                                        value,
                                                         NULL));
 }
 
@@ -1980,17 +1864,16 @@ garrow_decimal256_scalar_get_value(GArrowDecimal256Scalar *scalar)
 {
   auto priv = GARROW_DECIMAL256_SCALAR_GET_PRIVATE(scalar);
   if (!priv->value) {
-    auto arrow_scalar =
-      std::static_pointer_cast<arrow::Decimal256Scalar>(
-        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    auto arrow_scalar = std::static_pointer_cast<arrow::Decimal256Scalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
     auto arrow_value = std::make_shared<arrow::Decimal256>(arrow_scalar->value);
     priv->value = garrow_decimal256_new_raw(&arrow_value);
   }
   return priv->value;
 }
 
-
-typedef struct GArrowBaseListScalarPrivate_ {
+typedef struct GArrowBaseListScalarPrivate_
+{
   GArrowArray *value;
 } GArrowBaseListScalarPrivate;
 
@@ -1998,10 +1881,9 @@ G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowBaseListScalar,
                                     garrow_base_list_scalar,
                                     GARROW_TYPE_SCALAR)
 
-#define GARROW_BASE_LIST_SCALAR_GET_PRIVATE(obj)            \
-  static_cast<GArrowBaseListScalarPrivate *>(               \
-    garrow_base_list_scalar_get_instance_private(           \
-      GARROW_BASE_LIST_SCALAR(obj)))
+#define GARROW_BASE_LIST_SCALAR_GET_PRIVATE(obj)                                         \
+  static_cast<GArrowBaseListScalarPrivate *>(                                            \
+    garrow_base_list_scalar_get_instance_private(GARROW_BASE_LIST_SCALAR(obj)))
 
 static void
 garrow_base_list_scalar_dispose(GObject *object)
@@ -2044,7 +1926,7 @@ garrow_base_list_scalar_class_init(GArrowBaseListScalarClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_base_list_scalar_dispose;
+  gobject_class->dispose = garrow_base_list_scalar_dispose;
   gobject_class->set_property = garrow_base_list_scalar_set_property;
 
   GParamSpec *spec;
@@ -2055,29 +1937,31 @@ garrow_base_list_scalar_class_init(GArrowBaseListScalarClass *klass)
    *
    * Since: 5.0.0
    */
-  spec = g_param_spec_object("value",
-                             "Value",
-                             "The value of the scalar",
-                             GARROW_TYPE_ARRAY,
-                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "value",
+    "Value",
+    "The value of the scalar",
+    GARROW_TYPE_ARRAY,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUE, spec);
 }
 
 G_END_DECLS
-template<typename ArrowListScalarType>
+template <typename ArrowListScalarType>
 GArrowScalar *
 garrow_base_list_scalar_new(GArrowArray *value)
 {
   auto arrow_value = garrow_array_get_raw(value);
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<ArrowListScalarType>(arrow_value));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<ArrowListScalarType>(arrow_value));
   auto data_type = garrow_array_get_value_data_type(value);
   auto scalar = garrow_scalar_new_raw(&arrow_scalar,
-                                      "scalar", &arrow_scalar,
-                                      "data-type", data_type,
-                                      "value", value,
+                                      "scalar",
+                                      &arrow_scalar,
+                                      "data-type",
+                                      data_type,
+                                      "value",
+                                      value,
                                       NULL);
   g_object_unref(data_type);
   return scalar;
@@ -2097,18 +1981,14 @@ garrow_base_list_scalar_get_value(GArrowBaseListScalar *scalar)
 {
   auto priv = GARROW_BASE_LIST_SCALAR_GET_PRIVATE(scalar);
   if (!priv->value) {
-    const auto arrow_scalar =
-      std::static_pointer_cast<arrow::BaseListScalar>(
-        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    const auto arrow_scalar = std::static_pointer_cast<arrow::BaseListScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
     priv->value = garrow_array_new_raw(&(arrow_scalar->value));
   }
   return priv->value;
 }
 
-
-G_DEFINE_TYPE(GArrowListScalar,
-              garrow_list_scalar,
-              GARROW_TYPE_BASE_LIST_SCALAR)
+G_DEFINE_TYPE(GArrowListScalar, garrow_list_scalar, GARROW_TYPE_BASE_LIST_SCALAR)
 
 static void
 garrow_list_scalar_init(GArrowListScalar *object)
@@ -2135,7 +2015,6 @@ garrow_list_scalar_new(GArrowListArray *value)
     garrow_base_list_scalar_new<arrow::ListScalar>(GARROW_ARRAY(value)));
 }
 
-
 G_DEFINE_TYPE(GArrowLargeListScalar,
               garrow_large_list_scalar,
               GARROW_TYPE_BASE_LIST_SCALAR)
@@ -2165,10 +2044,7 @@ garrow_large_list_scalar_new(GArrowLargeListArray *value)
     garrow_base_list_scalar_new<arrow::LargeListScalar>(GARROW_ARRAY(value)));
 }
 
-
-G_DEFINE_TYPE(GArrowMapScalar,
-              garrow_map_scalar,
-              GARROW_TYPE_BASE_LIST_SCALAR)
+G_DEFINE_TYPE(GArrowMapScalar, garrow_map_scalar, GARROW_TYPE_BASE_LIST_SCALAR)
 
 static void
 garrow_map_scalar_init(GArrowMapScalar *object)
@@ -2195,19 +2071,16 @@ garrow_map_scalar_new(GArrowStructArray *value)
     garrow_base_list_scalar_new<arrow::MapScalar>(GARROW_ARRAY(value)));
 }
 
-
-typedef struct GArrowStructScalarPrivate_ {
+typedef struct GArrowStructScalarPrivate_
+{
   GList *value;
 } GArrowStructScalarPrivate;
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowStructScalar,
-                           garrow_struct_scalar,
-                           GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowStructScalar, garrow_struct_scalar, GARROW_TYPE_SCALAR)
 
-#define GARROW_STRUCT_SCALAR_GET_PRIVATE(obj)             \
-  static_cast<GArrowStructScalarPrivate *>(               \
-    garrow_struct_scalar_get_instance_private(            \
-      GARROW_STRUCT_SCALAR(obj)))
+#define GARROW_STRUCT_SCALAR_GET_PRIVATE(obj)                                            \
+  static_cast<GArrowStructScalarPrivate *>(                                              \
+    garrow_struct_scalar_get_instance_private(GARROW_STRUCT_SCALAR(obj)))
 
 static void
 garrow_struct_scalar_dispose(GObject *object)
@@ -2244,8 +2117,7 @@ garrow_struct_scalar_class_init(GArrowStructScalarClass *klass)
  * Since: 5.0.0
  */
 GArrowStructScalar *
-garrow_struct_scalar_new(GArrowStructDataType *data_type,
-                         GList *value)
+garrow_struct_scalar_new(GArrowStructDataType *data_type, GList *value)
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   std::vector<std::shared_ptr<arrow::Scalar>> arrow_value;
@@ -2254,19 +2126,16 @@ garrow_struct_scalar_new(GArrowStructDataType *data_type,
     auto arrow_field = garrow_scalar_get_raw(field);
     arrow_value.push_back(arrow_field);
   }
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::StructScalar>(arrow_value, arrow_data_type));
-  auto scalar =
-    GARROW_STRUCT_SCALAR(
-      garrow_scalar_new_raw(&arrow_scalar,
-                            "scalar", &arrow_scalar,
-                            "data-type", data_type,
-                            NULL));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::StructScalar>(arrow_value, arrow_data_type));
+  auto scalar = GARROW_STRUCT_SCALAR(garrow_scalar_new_raw(&arrow_scalar,
+                                                           "scalar",
+                                                           &arrow_scalar,
+                                                           "data-type",
+                                                           data_type,
+                                                           NULL));
   auto priv = GARROW_STRUCT_SCALAR_GET_PRIVATE(scalar);
-  priv->value = g_list_copy_deep(value,
-                                 reinterpret_cast<GCopyFunc>(g_object_ref),
-                                 NULL);
+  priv->value = g_list_copy_deep(value, reinterpret_cast<GCopyFunc>(g_object_ref), NULL);
   return scalar;
 }
 
@@ -2284,20 +2153,18 @@ garrow_struct_scalar_get_value(GArrowStructScalar *scalar)
 {
   auto priv = GARROW_STRUCT_SCALAR_GET_PRIVATE(scalar);
   if (!priv->value) {
-    auto arrow_scalar =
-      std::static_pointer_cast<arrow::StructScalar>(
-        garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+    auto arrow_scalar = std::static_pointer_cast<arrow::StructScalar>(
+      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
     for (auto arrow_element : arrow_scalar->value) {
-      priv->value = g_list_prepend(priv->value,
-                                   garrow_scalar_new_raw(&arrow_element));
+      priv->value = g_list_prepend(priv->value, garrow_scalar_new_raw(&arrow_element));
     }
     priv->value = g_list_reverse(priv->value);
   }
   return priv->value;
 }
 
-
-typedef struct GArrowUnionScalarPrivate_ {
+typedef struct GArrowUnionScalarPrivate_
+{
   GArrowScalar *value;
 } GArrowUnionScalarPrivate;
 
@@ -2305,10 +2172,9 @@ G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowUnionScalar,
                                     garrow_union_scalar,
                                     GARROW_TYPE_SCALAR)
 
-#define GARROW_UNION_SCALAR_GET_PRIVATE(obj)             \
-  static_cast<GArrowUnionScalarPrivate *>(               \
-    garrow_union_scalar_get_instance_private(            \
-      GARROW_UNION_SCALAR(obj)))
+#define GARROW_UNION_SCALAR_GET_PRIVATE(obj)                                             \
+  static_cast<GArrowUnionScalarPrivate *>(                                               \
+    garrow_union_scalar_get_instance_private(GARROW_UNION_SCALAR(obj)))
 
 static void
 garrow_union_scalar_dispose(GObject *object)
@@ -2350,7 +2216,7 @@ static void
 garrow_union_scalar_class_init(GArrowUnionScalarClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->dispose      = garrow_union_scalar_dispose;
+  gobject_class->dispose = garrow_union_scalar_dispose;
   gobject_class->set_property = garrow_union_scalar_set_property;
 
   GParamSpec *spec;
@@ -2361,32 +2227,31 @@ garrow_union_scalar_class_init(GArrowUnionScalarClass *klass)
    *
    * Since: 5.0.0
    */
-  spec = g_param_spec_object("value",
-                             "Value",
-                             "The value of the scalar",
-                             GARROW_TYPE_SCALAR,
-                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "value",
+    "Value",
+    "The value of the scalar",
+    GARROW_TYPE_SCALAR,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_VALUE, spec);
 }
 
 G_END_DECLS
-template<typename ArrowUnionScalarType>
+template <typename ArrowUnionScalarType>
 GArrowScalar *
-garrow_union_scalar_new(GArrowDataType *data_type,
-                        gint8 type_code,
-                        GArrowScalar *value)
+garrow_union_scalar_new(GArrowDataType *data_type, gint8 type_code, GArrowScalar *value)
 {
   auto arrow_data_type = garrow_data_type_get_raw(data_type);
   auto arrow_value = garrow_scalar_get_raw(value);
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<ArrowUnionScalarType>(arrow_value, type_code,
-                                             arrow_data_type));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<ArrowUnionScalarType>(arrow_value, type_code, arrow_data_type));
   auto scalar = garrow_scalar_new_raw(&arrow_scalar,
-                                      "scalar", &arrow_scalar,
-                                      "data-type", data_type,
-                                      "value", value,
+                                      "scalar",
+                                      &arrow_scalar,
+                                      "data-type",
+                                      data_type,
+                                      "value",
+                                      value,
                                       NULL);
   return scalar;
 }
@@ -2403,9 +2268,8 @@ G_BEGIN_DECLS
 gint8
 garrow_union_scalar_get_type_code(GArrowUnionScalar *scalar)
 {
-  const auto &arrow_scalar =
-    std::static_pointer_cast<arrow::UnionScalar>(
-      garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
+  const auto &arrow_scalar = std::static_pointer_cast<arrow::UnionScalar>(
+    garrow_scalar_get_raw(GARROW_SCALAR(scalar)));
   return arrow_scalar->type_code;
 }
 
@@ -2424,7 +2288,6 @@ garrow_union_scalar_get_value(GArrowUnionScalar *scalar)
   return priv->value;
 }
 
-
 G_DEFINE_TYPE(GArrowSparseUnionScalar,
               garrow_sparse_union_scalar,
               GARROW_TYPE_UNION_SCALAR)
@@ -2456,8 +2319,7 @@ garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type,
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   const auto &arrow_type_codes =
-    std::dynamic_pointer_cast<arrow::SparseUnionType>(
-      arrow_data_type)->type_codes();
+    std::dynamic_pointer_cast<arrow::SparseUnionType>(arrow_data_type)->type_codes();
   auto arrow_value = garrow_scalar_get_raw(value);
   arrow::SparseUnionScalar::ValueType arrow_field_values;
   for (int i = 0; i < arrow_data_type->num_fields(); ++i) {
@@ -2468,23 +2330,22 @@ garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type,
         arrow::MakeNullScalar(arrow_data_type->field(i)->type()));
     }
   }
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::SparseUnionScalar>(arrow_field_values,
-                                                 type_code,
-                                                 arrow_data_type));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::SparseUnionScalar>(arrow_field_values,
+                                               type_code,
+                                               arrow_data_type));
   auto scalar = garrow_scalar_new_raw(&arrow_scalar,
-                                      "scalar", &arrow_scalar,
-                                      "data-type", data_type,
-                                      "value", value,
+                                      "scalar",
+                                      &arrow_scalar,
+                                      "data-type",
+                                      data_type,
+                                      "value",
+                                      value,
                                       NULL);
   return GARROW_SPARSE_UNION_SCALAR(scalar);
 }
 
-
-G_DEFINE_TYPE(GArrowDenseUnionScalar,
-              garrow_dense_union_scalar,
-              GARROW_TYPE_UNION_SCALAR)
+G_DEFINE_TYPE(GArrowDenseUnionScalar, garrow_dense_union_scalar, GARROW_TYPE_UNION_SCALAR)
 
 static void
 garrow_dense_union_scalar_init(GArrowDenseUnionScalar *object)
@@ -2513,23 +2374,20 @@ garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type,
 {
   auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type));
   auto arrow_value = garrow_scalar_get_raw(value);
-  auto arrow_scalar =
-    std::static_pointer_cast<arrow::Scalar>(
-      std::make_shared<arrow::DenseUnionScalar>(arrow_value,
-                                                type_code,
-                                                arrow_data_type));
+  auto arrow_scalar = std::static_pointer_cast<arrow::Scalar>(
+    std::make_shared<arrow::DenseUnionScalar>(arrow_value, type_code, arrow_data_type));
   auto scalar = garrow_scalar_new_raw(&arrow_scalar,
-                                      "scalar", &arrow_scalar,
-                                      "data-type", data_type,
-                                      "value", value,
+                                      "scalar",
+                                      &arrow_scalar,
+                                      "data-type",
+                                      data_type,
+                                      "value",
+                                      value,
                                       NULL);
   return GARROW_DENSE_UNION_SCALAR(scalar);
 }
 
-
-G_DEFINE_TYPE(GArrowExtensionScalar,
-              garrow_extension_scalar,
-              GARROW_TYPE_SCALAR)
+G_DEFINE_TYPE(GArrowExtensionScalar, garrow_extension_scalar, GARROW_TYPE_SCALAR)
 
 static void
 garrow_extension_scalar_init(GArrowExtensionScalar *object)
@@ -2541,15 +2399,12 @@ garrow_extension_scalar_class_init(GArrowExtensionScalarClass *klass)
 {
 }
 
-
 G_END_DECLS
 
 GArrowScalar *
 garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar)
 {
-  return garrow_scalar_new_raw(arrow_scalar,
-                               "scalar", arrow_scalar,
-                               NULL);
+  return garrow_scalar_new_raw(arrow_scalar, "scalar", arrow_scalar, NULL);
 }
 
 GArrowScalar *
@@ -2559,9 +2414,7 @@ garrow_scalar_new_raw(std::shared_ptr<arrow::Scalar> *arrow_scalar,
 {
   va_list args;
   va_start(args, first_property_name);
-  auto array = garrow_scalar_new_raw_valist(arrow_scalar,
-                                            first_property_name,
-                                            args);
+  auto array = garrow_scalar_new_raw_valist(arrow_scalar, first_property_name, args);
   va_end(args);
   return array;
 }
@@ -2665,11 +2518,11 @@ garrow_scalar_new_raw_valist(std::shared_ptr<arrow::Scalar> *arrow_scalar,
   case arrow::Type::type::LARGE_LIST:
     type = GARROW_TYPE_LARGE_LIST_SCALAR;
     break;
-/*
-  case arrow::Type::type::FIXED_SIZE_LIST:
-    type = GARROW_TYPE_FIXED_SIZE_LIST_SCALAR;
-    break;
-*/
+    /*
+      case arrow::Type::type::FIXED_SIZE_LIST:
+        type = GARROW_TYPE_FIXED_SIZE_LIST_SCALAR;
+        break;
+    */
   case arrow::Type::type::MAP:
     type = GARROW_TYPE_MAP_SCALAR;
     break;
@@ -2689,9 +2542,7 @@ garrow_scalar_new_raw_valist(std::shared_ptr<arrow::Scalar> *arrow_scalar,
     type = GARROW_TYPE_SCALAR;
     break;
   }
-  scalar = GARROW_SCALAR(g_object_new_valist(type,
-                                             first_property_name,
-                                             args));
+  scalar = GARROW_SCALAR(g_object_new_valist(type, first_property_name, args));
   return scalar;
 }
 
diff --git a/c_glib/arrow-glib/scalar.h b/c_glib/arrow-glib/scalar.h
index f90160e35e0b3..b4a6229c62fd1 100644
--- a/c_glib/arrow-glib/scalar.h
+++ b/c_glib/arrow-glib/scalar.h
@@ -25,11 +25,7 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_SCALAR (garrow_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowScalar,
-                         garrow_scalar,
-                         GARROW,
-                         SCALAR,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowScalar, garrow_scalar, GARROW, SCALAR, GObject)
 struct _GArrowScalarClass
 {
   GObjectClass parent_class;
@@ -50,8 +46,7 @@ gboolean
 garrow_scalar_is_valid(GArrowScalar *scalar);
 GARROW_AVAILABLE_IN_5_0
 gboolean
-garrow_scalar_equal(GArrowScalar *scalar,
-                    GArrowScalar *other_scalar);
+garrow_scalar_equal(GArrowScalar *scalar, GArrowScalar *other_scalar);
 GARROW_AVAILABLE_IN_5_0
 gboolean
 garrow_scalar_equal_options(GArrowScalar *scalar,
@@ -68,13 +63,9 @@ garrow_scalar_cast(GArrowScalar *scalar,
                    GArrowCastOptions *options,
                    GError **error);
 
-
 #define GARROW_TYPE_NULL_SCALAR (garrow_null_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowNullScalar,
-                         garrow_null_scalar,
-                         GARROW,
-                         NULL_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowNullScalar, garrow_null_scalar, GARROW, NULL_SCALAR, GArrowScalar)
 struct _GArrowNullScalarClass
 {
   GArrowScalarClass parent_class;
@@ -84,13 +75,9 @@ GARROW_AVAILABLE_IN_5_0
 GArrowNullScalar *
 garrow_null_scalar_new(void);
 
-
 #define GARROW_TYPE_BOOLEAN_SCALAR (garrow_boolean_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowBooleanScalar,
-                         garrow_boolean_scalar,
-                         GARROW,
-                         BOOLEAN_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowBooleanScalar, garrow_boolean_scalar, GARROW, BOOLEAN_SCALAR, GArrowScalar)
 struct _GArrowBooleanScalarClass
 {
   GArrowScalarClass parent_class;
@@ -103,13 +90,9 @@ GARROW_AVAILABLE_IN_5_0
 gboolean
 garrow_boolean_scalar_get_value(GArrowBooleanScalar *scalar);
 
-
 #define GARROW_TYPE_INT8_SCALAR (garrow_int8_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowInt8Scalar,
-                         garrow_int8_scalar,
-                         GARROW,
-                         INT8_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowInt8Scalar, garrow_int8_scalar, GARROW, INT8_SCALAR, GArrowScalar)
 struct _GArrowInt8ScalarClass
 {
   GArrowScalarClass parent_class;
@@ -122,13 +105,9 @@ GARROW_AVAILABLE_IN_5_0
 gint8
 garrow_int8_scalar_get_value(GArrowInt8Scalar *scalar);
 
-
 #define GARROW_TYPE_INT16_SCALAR (garrow_int16_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowInt16Scalar,
-                         garrow_int16_scalar,
-                         GARROW,
-                         INT16_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowInt16Scalar, garrow_int16_scalar, GARROW, INT16_SCALAR, GArrowScalar)
 struct _GArrowInt16ScalarClass
 {
   GArrowScalarClass parent_class;
@@ -141,13 +120,9 @@ GARROW_AVAILABLE_IN_5_0
 gint16
 garrow_int16_scalar_get_value(GArrowInt16Scalar *scalar);
 
-
 #define GARROW_TYPE_INT32_SCALAR (garrow_int32_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowInt32Scalar,
-                         garrow_int32_scalar,
-                         GARROW,
-                         INT32_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowInt32Scalar, garrow_int32_scalar, GARROW, INT32_SCALAR, GArrowScalar)
 struct _GArrowInt32ScalarClass
 {
   GArrowScalarClass parent_class;
@@ -160,13 +135,9 @@ GARROW_AVAILABLE_IN_5_0
 gint32
 garrow_int32_scalar_get_value(GArrowInt32Scalar *scalar);
 
-
 #define GARROW_TYPE_INT64_SCALAR (garrow_int64_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowInt64Scalar,
-                         garrow_int64_scalar,
-                         GARROW,
-                         INT64_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowInt64Scalar, garrow_int64_scalar, GARROW, INT64_SCALAR, GArrowScalar)
 struct _GArrowInt64ScalarClass
 {
   GArrowScalarClass parent_class;
@@ -179,13 +150,9 @@ GARROW_AVAILABLE_IN_5_0
 gint64
 garrow_int64_scalar_get_value(GArrowInt64Scalar *scalar);
 
-
 #define GARROW_TYPE_UINT8_SCALAR (garrow_uint8_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowUInt8Scalar,
-                         garrow_uint8_scalar,
-                         GARROW,
-                         UINT8_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowUInt8Scalar, garrow_uint8_scalar, GARROW, UINT8_SCALAR, GArrowScalar)
 struct _GArrowUInt8ScalarClass
 {
   GArrowScalarClass parent_class;
@@ -198,13 +165,9 @@ GARROW_AVAILABLE_IN_5_0
 guint8
 garrow_uint8_scalar_get_value(GArrowUInt8Scalar *scalar);
 
-
 #define GARROW_TYPE_UINT16_SCALAR (garrow_uint16_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowUInt16Scalar,
-                         garrow_uint16_scalar,
-                         GARROW,
-                         UINT16_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowUInt16Scalar, garrow_uint16_scalar, GARROW, UINT16_SCALAR, GArrowScalar)
 struct _GArrowUInt16ScalarClass
 {
   GArrowScalarClass parent_class;
@@ -217,13 +180,9 @@ GARROW_AVAILABLE_IN_5_0
 guint16
 garrow_uint16_scalar_get_value(GArrowUInt16Scalar *scalar);
 
-
 #define GARROW_TYPE_UINT32_SCALAR (garrow_uint32_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowUInt32Scalar,
-                         garrow_uint32_scalar,
-                         GARROW,
-                         UINT32_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowUInt32Scalar, garrow_uint32_scalar, GARROW, UINT32_SCALAR, GArrowScalar)
 struct _GArrowUInt32ScalarClass
 {
   GArrowScalarClass parent_class;
@@ -236,13 +195,9 @@ GARROW_AVAILABLE_IN_5_0
 guint32
 garrow_uint32_scalar_get_value(GArrowUInt32Scalar *scalar);
 
-
 #define GARROW_TYPE_UINT64_SCALAR (garrow_uint64_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowUInt64Scalar,
-                         garrow_uint64_scalar,
-                         GARROW,
-                         UINT64_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowUInt64Scalar, garrow_uint64_scalar, GARROW, UINT64_SCALAR, GArrowScalar)
 struct _GArrowUInt64ScalarClass
 {
   GArrowScalarClass parent_class;
@@ -255,7 +210,6 @@ GARROW_AVAILABLE_IN_5_0
 guint64
 garrow_uint64_scalar_get_value(GArrowUInt64Scalar *scalar);
 
-
 #define GARROW_TYPE_HALF_FLOAT_SCALAR (garrow_half_float_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatScalar,
                          garrow_half_float_scalar,
@@ -274,13 +228,9 @@ GARROW_AVAILABLE_IN_11_0
 guint16
 garrow_half_float_scalar_get_value(GArrowHalfFloatScalar *scalar);
 
-
 #define GARROW_TYPE_FLOAT_SCALAR (garrow_float_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowFloatScalar,
-                         garrow_float_scalar,
-                         GARROW,
-                         FLOAT_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowFloatScalar, garrow_float_scalar, GARROW, FLOAT_SCALAR, GArrowScalar)
 struct _GArrowFloatScalarClass
 {
   GArrowScalarClass parent_class;
@@ -293,13 +243,9 @@ GARROW_AVAILABLE_IN_5_0
 gfloat
 garrow_float_scalar_get_value(GArrowFloatScalar *scalar);
 
-
 #define GARROW_TYPE_DOUBLE_SCALAR (garrow_double_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowDoubleScalar,
-                         garrow_double_scalar,
-                         GARROW,
-                         DOUBLE_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowDoubleScalar, garrow_double_scalar, GARROW, DOUBLE_SCALAR, GArrowScalar)
 struct _GArrowDoubleScalarClass
 {
   GArrowScalarClass parent_class;
@@ -312,7 +258,6 @@ GARROW_AVAILABLE_IN_5_0
 gdouble
 garrow_double_scalar_get_value(GArrowDoubleScalar *scalar);
 
-
 #define GARROW_TYPE_BASE_BINARY_SCALAR (garrow_base_binary_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowBaseBinaryScalar,
                          garrow_base_binary_scalar,
@@ -328,13 +273,9 @@ GARROW_AVAILABLE_IN_5_0
 GArrowBuffer *
 garrow_base_binary_scalar_get_value(GArrowBaseBinaryScalar *scalar);
 
-
 #define GARROW_TYPE_BINARY_SCALAR (garrow_binary_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowBinaryScalar,
-                         garrow_binary_scalar,
-                         GARROW,
-                         BINARY_SCALAR,
-                         GArrowBaseBinaryScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowBinaryScalar, garrow_binary_scalar, GARROW, BINARY_SCALAR, GArrowBaseBinaryScalar)
 struct _GArrowBinaryScalarClass
 {
   GArrowBaseBinaryScalarClass parent_class;
@@ -344,13 +285,9 @@ GARROW_AVAILABLE_IN_5_0
 GArrowBinaryScalar *
 garrow_binary_scalar_new(GArrowBuffer *value);
 
-
 #define GARROW_TYPE_STRING_SCALAR (garrow_string_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowStringScalar,
-                         garrow_string_scalar,
-                         GARROW,
-                         STRING_SCALAR,
-                         GArrowBaseBinaryScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowStringScalar, garrow_string_scalar, GARROW, STRING_SCALAR, GArrowBaseBinaryScalar)
 struct _GArrowStringScalarClass
 {
   GArrowBaseBinaryScalarClass parent_class;
@@ -360,7 +297,6 @@ GARROW_AVAILABLE_IN_5_0
 GArrowStringScalar *
 garrow_string_scalar_new(GArrowBuffer *value);
 
-
 #define GARROW_TYPE_LARGE_BINARY_SCALAR (garrow_large_binary_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryScalar,
                          garrow_large_binary_scalar,
@@ -376,7 +312,6 @@ GARROW_AVAILABLE_IN_5_0
 GArrowLargeBinaryScalar *
 garrow_large_binary_scalar_new(GArrowBuffer *value);
 
-
 #define GARROW_TYPE_LARGE_STRING_SCALAR (garrow_large_string_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringScalar,
                          garrow_large_string_scalar,
@@ -392,9 +327,7 @@ GARROW_AVAILABLE_IN_5_0
 GArrowLargeStringScalar *
 garrow_large_string_scalar_new(GArrowBuffer *value);
 
-
-#define GARROW_TYPE_FIXED_SIZE_BINARY_SCALAR    \
-  (garrow_fixed_size_binary_scalar_get_type())
+#define GARROW_TYPE_FIXED_SIZE_BINARY_SCALAR (garrow_fixed_size_binary_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryScalar,
                          garrow_fixed_size_binary_scalar,
                          GARROW,
@@ -410,13 +343,9 @@ GArrowFixedSizeBinaryScalar *
 garrow_fixed_size_binary_scalar_new(GArrowFixedSizeBinaryDataType *data_type,
                                     GArrowBuffer *value);
 
-
 #define GARROW_TYPE_DATE32_SCALAR (garrow_date32_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowDate32Scalar,
-                         garrow_date32_scalar,
-                         GARROW,
-                         DATE32_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowDate32Scalar, garrow_date32_scalar, GARROW, DATE32_SCALAR, GArrowScalar)
 struct _GArrowDate32ScalarClass
 {
   GArrowScalarClass parent_class;
@@ -429,13 +358,9 @@ GARROW_AVAILABLE_IN_5_0
 gint32
 garrow_date32_scalar_get_value(GArrowDate32Scalar *scalar);
 
-
 #define GARROW_TYPE_DATE64_SCALAR (garrow_date64_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowDate64Scalar,
-                         garrow_date64_scalar,
-                         GARROW,
-                         DATE64_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowDate64Scalar, garrow_date64_scalar, GARROW, DATE64_SCALAR, GArrowScalar)
 struct _GArrowDate64ScalarClass
 {
   GArrowScalarClass parent_class;
@@ -448,13 +373,9 @@ GARROW_AVAILABLE_IN_5_0
 gint64
 garrow_date64_scalar_get_value(GArrowDate64Scalar *scalar);
 
-
 #define GARROW_TYPE_TIME32_SCALAR (garrow_time32_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowTime32Scalar,
-                         garrow_time32_scalar,
-                         GARROW,
-                         TIME32_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowTime32Scalar, garrow_time32_scalar, GARROW, TIME32_SCALAR, GArrowScalar)
 struct _GArrowTime32ScalarClass
 {
   GArrowScalarClass parent_class;
@@ -462,19 +383,14 @@ struct _GArrowTime32ScalarClass
 
 GARROW_AVAILABLE_IN_5_0
 GArrowTime32Scalar *
-garrow_time32_scalar_new(GArrowTime32DataType *data_type,
-                         gint32 value);
+garrow_time32_scalar_new(GArrowTime32DataType *data_type, gint32 value);
 GARROW_AVAILABLE_IN_5_0
 gint32
 garrow_time32_scalar_get_value(GArrowTime32Scalar *scalar);
 
-
 #define GARROW_TYPE_TIME64_SCALAR (garrow_time64_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowTime64Scalar,
-                         garrow_time64_scalar,
-                         GARROW,
-                         TIME64_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowTime64Scalar, garrow_time64_scalar, GARROW, TIME64_SCALAR, GArrowScalar)
 struct _GArrowTime64ScalarClass
 {
   GArrowScalarClass parent_class;
@@ -482,19 +398,14 @@ struct _GArrowTime64ScalarClass
 
 GARROW_AVAILABLE_IN_5_0
 GArrowTime64Scalar *
-garrow_time64_scalar_new(GArrowTime64DataType *data_type,
-                         gint64 value);
+garrow_time64_scalar_new(GArrowTime64DataType *data_type, gint64 value);
 GARROW_AVAILABLE_IN_5_0
 gint64
 garrow_time64_scalar_get_value(GArrowTime64Scalar *scalar);
 
-
 #define GARROW_TYPE_TIMESTAMP_SCALAR (garrow_timestamp_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowTimestampScalar,
-                         garrow_timestamp_scalar,
-                         GARROW,
-                         TIMESTAMP_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowTimestampScalar, garrow_timestamp_scalar, GARROW, TIMESTAMP_SCALAR, GArrowScalar)
 struct _GArrowTimestampScalarClass
 {
   GArrowScalarClass parent_class;
@@ -502,15 +413,12 @@ struct _GArrowTimestampScalarClass
 
 GARROW_AVAILABLE_IN_5_0
 GArrowTimestampScalar *
-garrow_timestamp_scalar_new(GArrowTimestampDataType *data_type,
-                            gint64 value);
+garrow_timestamp_scalar_new(GArrowTimestampDataType *data_type, gint64 value);
 GARROW_AVAILABLE_IN_5_0
 gint64
 garrow_timestamp_scalar_get_value(GArrowTimestampScalar *scalar);
 
-
-#define GARROW_TYPE_MONTH_INTERVAL_SCALAR       \
-  (garrow_month_interval_scalar_get_type())
+#define GARROW_TYPE_MONTH_INTERVAL_SCALAR (garrow_month_interval_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalScalar,
                          garrow_month_interval_scalar,
                          GARROW,
@@ -528,9 +436,7 @@ GARROW_AVAILABLE_IN_8_0
 gint32
 garrow_month_interval_scalar_get_value(GArrowMonthIntervalScalar *scalar);
 
-
-#define GARROW_TYPE_DAY_TIME_INTERVAL_SCALAR    \
-  (garrow_day_time_interval_scalar_get_type())
+#define GARROW_TYPE_DAY_TIME_INTERVAL_SCALAR (garrow_day_time_interval_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalScalar,
                          garrow_day_time_interval_scalar,
                          GARROW,
@@ -548,8 +454,7 @@ GARROW_AVAILABLE_IN_8_0
 GArrowDayMillisecond *
 garrow_day_time_interval_scalar_get_value(GArrowDayTimeIntervalScalar *scalar);
 
-
-#define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_SCALAR \
+#define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_SCALAR                                       \
   (garrow_month_day_nano_interval_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalScalar,
                          garrow_month_day_nano_interval_scalar,
@@ -566,9 +471,7 @@ GArrowMonthDayNanoIntervalScalar *
 garrow_month_day_nano_interval_scalar_new(GArrowMonthDayNano *value);
 GARROW_AVAILABLE_IN_8_0
 GArrowMonthDayNano *
-garrow_month_day_nano_interval_scalar_get_value(
-  GArrowMonthDayNanoIntervalScalar *scalar);
-
+garrow_month_day_nano_interval_scalar_get_value(GArrowMonthDayNanoIntervalScalar *scalar);
 
 #define GARROW_TYPE_DECIMAL128_SCALAR (garrow_decimal128_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Scalar,
@@ -589,7 +492,6 @@ GARROW_AVAILABLE_IN_5_0
 GArrowDecimal128 *
 garrow_decimal128_scalar_get_value(GArrowDecimal128Scalar *scalar);
 
-
 #define GARROW_TYPE_DECIMAL256_SCALAR (garrow_decimal256_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256Scalar,
                          garrow_decimal256_scalar,
@@ -609,13 +511,9 @@ GARROW_AVAILABLE_IN_5_0
 GArrowDecimal256 *
 garrow_decimal256_scalar_get_value(GArrowDecimal256Scalar *scalar);
 
-
 #define GARROW_TYPE_BASE_LIST_SCALAR (garrow_base_list_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowBaseListScalar,
-                         garrow_base_list_scalar,
-                         GARROW,
-                         BASE_LIST_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowBaseListScalar, garrow_base_list_scalar, GARROW, BASE_LIST_SCALAR, GArrowScalar)
 struct _GArrowBaseListScalarClass
 {
   GArrowScalarClass parent_class;
@@ -626,11 +524,8 @@ GArrowArray *
 garrow_base_list_scalar_get_value(GArrowBaseListScalar *scalar);
 
 #define GARROW_TYPE_LIST_SCALAR (garrow_list_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowListScalar,
-                         garrow_list_scalar,
-                         GARROW,
-                         LIST_SCALAR,
-                         GArrowBaseListScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowListScalar, garrow_list_scalar, GARROW, LIST_SCALAR, GArrowBaseListScalar)
 struct _GArrowListScalarClass
 {
   GArrowBaseListScalarClass parent_class;
@@ -640,7 +535,6 @@ GARROW_AVAILABLE_IN_5_0
 GArrowListScalar *
 garrow_list_scalar_new(GArrowListArray *value);
 
-
 #define GARROW_TYPE_LARGE_LIST_SCALAR (garrow_large_list_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowLargeListScalar,
                          garrow_large_list_scalar,
@@ -656,13 +550,9 @@ GARROW_AVAILABLE_IN_5_0
 GArrowLargeListScalar *
 garrow_large_list_scalar_new(GArrowLargeListArray *value);
 
-
 #define GARROW_TYPE_MAP_SCALAR (garrow_map_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowMapScalar,
-                         garrow_map_scalar,
-                         GARROW,
-                         MAP_SCALAR,
-                         GArrowBaseListScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowMapScalar, garrow_map_scalar, GARROW, MAP_SCALAR, GArrowBaseListScalar)
 struct _GArrowMapScalarClass
 {
   GArrowBaseListScalarClass parent_class;
@@ -672,13 +562,9 @@ GARROW_AVAILABLE_IN_5_0
 GArrowMapScalar *
 garrow_map_scalar_new(GArrowStructArray *value);
 
-
 #define GARROW_TYPE_STRUCT_SCALAR (garrow_struct_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowStructScalar,
-                         garrow_struct_scalar,
-                         GARROW,
-                         STRUCT_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowStructScalar, garrow_struct_scalar, GARROW, STRUCT_SCALAR, GArrowScalar)
 struct _GArrowStructScalarClass
 {
   GArrowScalarClass parent_class;
@@ -686,19 +572,14 @@ struct _GArrowStructScalarClass
 
 GARROW_AVAILABLE_IN_5_0
 GArrowStructScalar *
-garrow_struct_scalar_new(GArrowStructDataType *data_type,
-                         GList *value);
+garrow_struct_scalar_new(GArrowStructDataType *data_type, GList *value);
 GARROW_AVAILABLE_IN_5_0
 GList *
 garrow_struct_scalar_get_value(GArrowStructScalar *scalar);
 
-
 #define GARROW_TYPE_UNION_SCALAR (garrow_union_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowUnionScalar,
-                         garrow_union_scalar,
-                         GARROW,
-                         UNION_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowUnionScalar, garrow_union_scalar, GARROW, UNION_SCALAR, GArrowScalar)
 struct _GArrowUnionScalarClass
 {
   GArrowScalarClass parent_class;
@@ -711,7 +592,6 @@ GARROW_AVAILABLE_IN_5_0
 GArrowScalar *
 garrow_union_scalar_get_value(GArrowUnionScalar *scalar);
 
-
 #define GARROW_TYPE_SPARSE_UNION_SCALAR (garrow_sparse_union_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionScalar,
                          garrow_sparse_union_scalar,
@@ -729,7 +609,6 @@ garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type,
                                gint8 type_code,
                                GArrowScalar *value);
 
-
 #define GARROW_TYPE_DENSE_UNION_SCALAR (garrow_dense_union_scalar_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionScalar,
                          garrow_dense_union_scalar,
@@ -747,13 +626,9 @@ garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type,
                               gint8 type_code,
                               GArrowScalar *value);
 
-
 #define GARROW_TYPE_EXTENSION_SCALAR (garrow_extension_scalar_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowExtensionScalar,
-                         garrow_extension_scalar,
-                         GARROW,
-                         EXTENSION_SCALAR,
-                         GArrowScalar)
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowExtensionScalar, garrow_extension_scalar, GARROW, EXTENSION_SCALAR, GArrowScalar)
 struct _GArrowExtensionScalarClass
 {
   GArrowScalarClass parent_class;
diff --git a/c_glib/arrow-glib/schema.cpp b/c_glib/arrow-glib/schema.cpp
index 666e74e69f033..994753c322363 100644
--- a/c_glib/arrow-glib/schema.cpp
+++ b/c_glib/arrow-glib/schema.cpp
@@ -35,7 +35,8 @@ G_BEGIN_DECLS
  * table. It has zero or more #GArrowFields.
  */
 
-typedef struct GArrowSchemaPrivate_ {
+typedef struct GArrowSchemaPrivate_
+{
   std::shared_ptr<arrow::Schema> schema;
 } GArrowSchemaPrivate;
 
@@ -44,14 +45,11 @@ enum {
   PROP_SCHEMA
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowSchema,
-                           garrow_schema,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowSchema, garrow_schema, G_TYPE_OBJECT)
 
-#define GARROW_SCHEMA_GET_PRIVATE(obj)         \
-  static_cast<GArrowSchemaPrivate *>(          \
-     garrow_schema_get_instance_private(       \
-       GARROW_SCHEMA(obj)))
+#define GARROW_SCHEMA_GET_PRIVATE(obj)                                                   \
+  static_cast<GArrowSchemaPrivate *>(                                                    \
+    garrow_schema_get_instance_private(GARROW_SCHEMA(obj)))
 
 static void
 garrow_schema_finalize(GObject *object)
@@ -99,7 +97,7 @@ static void
 garrow_schema_init(GArrowSchema *object)
 {
   auto priv = GARROW_SCHEMA_GET_PRIVATE(object);
-  new(&priv->schema) std::shared_ptr<arrow::Schema>;
+  new (&priv->schema) std::shared_ptr<arrow::Schema>;
 }
 
 static void
@@ -110,15 +108,15 @@ garrow_schema_class_init(GArrowSchemaClass *klass)
 
   gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_schema_finalize;
+  gobject_class->finalize = garrow_schema_finalize;
   gobject_class->set_property = garrow_schema_set_property;
   gobject_class->get_property = garrow_schema_get_property;
 
-  spec = g_param_spec_pointer("schema",
-                              "Schema",
-                              "The raw std::shared<arrow::Schema> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "schema",
+    "Schema",
+    "The raw std::shared<arrow::Schema> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_SCHEMA, spec);
 }
 
@@ -234,8 +232,7 @@ garrow_schema_get_field(GArrowSchema *schema, guint i)
  * Returns: (transfer full): The found field or %NULL.
  */
 GArrowField *
-garrow_schema_get_field_by_name(GArrowSchema *schema,
-                                const gchar *name)
+garrow_schema_get_field_by_name(GArrowSchema *schema, const gchar *name)
 {
   const auto arrow_schema = garrow_schema_get_raw(schema);
   auto arrow_field = arrow_schema->GetFieldByName(std::string(name));
@@ -257,8 +254,7 @@ garrow_schema_get_field_by_name(GArrowSchema *schema,
  * Since: 0.15.0
  */
 gint
-garrow_schema_get_field_index(GArrowSchema *schema,
-                              const gchar *name)
+garrow_schema_get_field_index(GArrowSchema *schema, const gchar *name)
 {
   const auto &arrow_schema = garrow_schema_get_raw(schema);
   return arrow_schema->GetFieldIndex(std::string(name));
@@ -344,10 +340,7 @@ garrow_schema_to_string_metadata(GArrowSchema *schema, gboolean show_metadata)
  * Since: 0.10.0
  */
 GArrowSchema *
-garrow_schema_add_field(GArrowSchema *schema,
-                        guint i,
-                        GArrowField *field,
-                        GError **error)
+garrow_schema_add_field(GArrowSchema *schema, guint i, GArrowField *field, GError **error)
 {
   const auto arrow_schema = garrow_schema_get_raw(schema);
   const auto arrow_field = garrow_field_get_raw(field);
@@ -371,9 +364,7 @@ garrow_schema_add_field(GArrowSchema *schema,
  * Since: 0.10.0
  */
 GArrowSchema *
-garrow_schema_remove_field(GArrowSchema *schema,
-                           guint i,
-                           GError **error)
+garrow_schema_remove_field(GArrowSchema *schema, guint i, GError **error)
 {
   const auto arrow_schema = garrow_schema_get_raw(schema);
   auto maybe_new_schema = arrow_schema->RemoveField(i);
@@ -467,8 +458,7 @@ garrow_schema_get_metadata(GArrowSchema *schema)
  * Since: 0.17.0
  */
 GArrowSchema *
-garrow_schema_with_metadata(GArrowSchema *schema,
-                            GHashTable *metadata)
+garrow_schema_with_metadata(GArrowSchema *schema, GHashTable *metadata)
 {
   const auto arrow_schema = garrow_schema_get_raw(schema);
   auto arrow_metadata = garrow_internal_hash_table_to_metadata(metadata);
@@ -476,15 +466,13 @@ garrow_schema_with_metadata(GArrowSchema *schema,
   return garrow_schema_new_raw(&arrow_new_schema);
 }
 
-
 G_END_DECLS
 
 GArrowSchema *
 garrow_schema_new_raw(std::shared_ptr<arrow::Schema> *arrow_schema)
 {
-  auto schema = GARROW_SCHEMA(g_object_new(GARROW_TYPE_SCHEMA,
-                                           "schema", arrow_schema,
-                                           NULL));
+  auto schema =
+    GARROW_SCHEMA(g_object_new(GARROW_TYPE_SCHEMA, "schema", arrow_schema, NULL));
   return schema;
 }
 
diff --git a/c_glib/arrow-glib/schema.h b/c_glib/arrow-glib/schema.h
index 25519000bb0fa..93cd5bd542cf8 100644
--- a/c_glib/arrow-glib/schema.h
+++ b/c_glib/arrow-glib/schema.h
@@ -24,11 +24,7 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_SCHEMA (garrow_schema_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowSchema,
-                         garrow_schema,
-                         GARROW,
-                         SCHEMA,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowSchema, garrow_schema, GARROW, SCHEMA, GObject)
 struct _GArrowSchemaClass
 {
   GObjectClass parent_class;
@@ -36,45 +32,48 @@ struct _GArrowSchemaClass
 
 GARROW_AVAILABLE_IN_6_0
 GArrowSchema *
-garrow_schema_import(gpointer c_abi_schema,
-                     GError **error);
+garrow_schema_import(gpointer c_abi_schema, GError **error);
 
-GArrowSchema    *garrow_schema_new              (GList *fields);
+GArrowSchema *
+garrow_schema_new(GList *fields);
 
 GARROW_AVAILABLE_IN_6_0
 gpointer
-garrow_schema_export(GArrowSchema *schema,
-                     GError **error);
+garrow_schema_export(GArrowSchema *schema, GError **error);
 
-gboolean         garrow_schema_equal            (GArrowSchema *schema,
-                                                 GArrowSchema *other_schema);
-GArrowField     *garrow_schema_get_field        (GArrowSchema *schema,
-                                                 guint i);
-GArrowField     *garrow_schema_get_field_by_name(GArrowSchema *schema,
-                                                 const gchar *name);
+gboolean
+garrow_schema_equal(GArrowSchema *schema, GArrowSchema *other_schema);
+GArrowField *
+garrow_schema_get_field(GArrowSchema *schema, guint i);
+GArrowField *
+garrow_schema_get_field_by_name(GArrowSchema *schema, const gchar *name);
 GARROW_AVAILABLE_IN_0_15
-gint             garrow_schema_get_field_index  (GArrowSchema *schema,
-                                                 const gchar *name);
+gint
+garrow_schema_get_field_index(GArrowSchema *schema, const gchar *name);
 
-guint            garrow_schema_n_fields         (GArrowSchema *schema);
-GList           *garrow_schema_get_fields       (GArrowSchema *schema);
+guint
+garrow_schema_n_fields(GArrowSchema *schema);
+GList *
+garrow_schema_get_fields(GArrowSchema *schema);
 
-gchar *garrow_schema_to_string(GArrowSchema *schema);
+gchar *
+garrow_schema_to_string(GArrowSchema *schema);
 GARROW_AVAILABLE_IN_0_17
-gchar *garrow_schema_to_string_metadata(GArrowSchema *schema,
-                                        gboolean show_metadata);
+gchar *
+garrow_schema_to_string_metadata(GArrowSchema *schema, gboolean show_metadata);
 
-GArrowSchema    *garrow_schema_add_field        (GArrowSchema *schema,
-                                                 guint i,
-                                                 GArrowField *field,
-                                                 GError **error);
-GArrowSchema    *garrow_schema_remove_field     (GArrowSchema *schema,
-                                                 guint i,
-                                                 GError **error);
-GArrowSchema    *garrow_schema_replace_field    (GArrowSchema *schema,
-                                                 guint i,
-                                                 GArrowField *field,
-                                                 GError **error);
+GArrowSchema *
+garrow_schema_add_field(GArrowSchema *schema,
+                        guint i,
+                        GArrowField *field,
+                        GError **error);
+GArrowSchema *
+garrow_schema_remove_field(GArrowSchema *schema, guint i, GError **error);
+GArrowSchema *
+garrow_schema_replace_field(GArrowSchema *schema,
+                            guint i,
+                            GArrowField *field,
+                            GError **error);
 
 GARROW_AVAILABLE_IN_3_0
 gboolean
@@ -84,7 +83,6 @@ GHashTable *
 garrow_schema_get_metadata(GArrowSchema *schema);
 GARROW_AVAILABLE_IN_0_17
 GArrowSchema *
-garrow_schema_with_metadata(GArrowSchema *schema,
-                            GHashTable *metadata);
+garrow_schema_with_metadata(GArrowSchema *schema, GHashTable *metadata);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/schema.hpp b/c_glib/arrow-glib/schema.hpp
index 0d025340844d3..333f73391c900 100644
--- a/c_glib/arrow-glib/schema.hpp
+++ b/c_glib/arrow-glib/schema.hpp
@@ -23,5 +23,7 @@
 
 #include <arrow-glib/schema.h>
 
-GArrowSchema *garrow_schema_new_raw(std::shared_ptr<arrow::Schema> *arrow_schema);
-std::shared_ptr<arrow::Schema> garrow_schema_get_raw(GArrowSchema *schema);
+GArrowSchema *
+garrow_schema_new_raw(std::shared_ptr<arrow::Schema> *arrow_schema);
+std::shared_ptr<arrow::Schema>
+garrow_schema_get_raw(GArrowSchema *schema);
diff --git a/c_glib/arrow-glib/table-builder.cpp b/c_glib/arrow-glib/table-builder.cpp
index 5b423f8e491a4..04b17e44c7083 100644
--- a/c_glib/arrow-glib/table-builder.cpp
+++ b/c_glib/arrow-glib/table-builder.cpp
@@ -35,7 +35,8 @@ G_BEGIN_DECLS
  * new #GArrowRecordBatch.
  */
 
-typedef struct GArrowRecordBatchBuilderPrivate_ {
+typedef struct GArrowRecordBatchBuilderPrivate_
+{
   arrow::RecordBatchBuilder *record_batch_builder;
   GPtrArray *column_builders;
 } GArrowRecordBatchBuilderPrivate;
@@ -49,9 +50,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchBuilder,
                            garrow_record_batch_builder,
                            G_TYPE_OBJECT)
 
-#define GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(object)            \
-  static_cast<GArrowRecordBatchBuilderPrivate *>(                  \
-    garrow_record_batch_builder_get_instance_private(              \
+#define GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(object)                                  \
+  static_cast<GArrowRecordBatchBuilderPrivate *>(                                        \
+    garrow_record_batch_builder_get_instance_private(                                    \
       GARROW_RECORD_BATCH_BUILDER(object)))
 
 static void
@@ -123,20 +124,18 @@ garrow_record_batch_builder_class_init(GArrowRecordBatchBuilderClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->constructed  = garrow_record_batch_builder_constructed;
-  gobject_class->finalize     = garrow_record_batch_builder_finalize;
+  gobject_class->constructed = garrow_record_batch_builder_constructed;
+  gobject_class->finalize = garrow_record_batch_builder_finalize;
   gobject_class->set_property = garrow_record_batch_builder_set_property;
   gobject_class->get_property = garrow_record_batch_builder_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("record-batch-builder",
-                              "RecordBatch builder",
-                              "The raw arrow::RecordBatchBuilder *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class,
-                                  PROP_RECORD_BATCH_BUILDER,
-                                  spec);
+  spec = g_param_spec_pointer(
+    "record-batch-builder",
+    "RecordBatch builder",
+    "The raw arrow::RecordBatchBuilder *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_BUILDER, spec);
 }
 
 /**
@@ -157,7 +156,8 @@ garrow_record_batch_builder_new(GArrowSchema *schema, GError **error)
   auto builder_result = arrow::RecordBatchBuilder::Make(arrow_schema, memory_pool);
 
   if (garrow::check(error, builder_result, "[record-batch-builder][new]")) {
-    std::unique_ptr<arrow::RecordBatchBuilder> arrow_builder = std::move(builder_result).ValueOrDie();
+    std::unique_ptr<arrow::RecordBatchBuilder> arrow_builder =
+      std::move(builder_result).ValueOrDie();
     return garrow_record_batch_builder_new_raw(arrow_builder.release());
   } else {
     return NULL;
@@ -257,8 +257,7 @@ garrow_record_batch_builder_get_n_columns(GArrowRecordBatchBuilder *builder)
  *   Use garrow_record_batch_builder_get_column_builder() instead.
  */
 GArrowArrayBuilder *
-garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder,
-                                      gint i)
+garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder, gint i)
 {
   return garrow_record_batch_builder_get_column_builder(builder, i);
 }
@@ -275,8 +274,7 @@ garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder,
  * Since: 0.13.0
  */
 GArrowArrayBuilder *
-garrow_record_batch_builder_get_column_builder(GArrowRecordBatchBuilder *builder,
-                                               gint i)
+garrow_record_batch_builder_get_column_builder(GArrowRecordBatchBuilder *builder, gint i)
 {
   auto priv = GARROW_RECORD_BATCH_BUILDER_GET_PRIVATE(builder);
   if (i < 0) {
@@ -303,8 +301,7 @@ garrow_record_batch_builder_get_column_builder(GArrowRecordBatchBuilder *builder
  * Since: 0.8.0
  */
 GArrowRecordBatch *
-garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder,
-                                  GError **error)
+garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder, GError **error)
 {
   auto arrow_builder = garrow_record_batch_builder_get_raw(builder);
   auto batch_result = arrow_builder->Flush();
@@ -323,7 +320,8 @@ GArrowRecordBatchBuilder *
 garrow_record_batch_builder_new_raw(arrow::RecordBatchBuilder *arrow_builder)
 {
   auto builder = g_object_new(GARROW_TYPE_RECORD_BATCH_BUILDER,
-                              "record-batch-builder", arrow_builder,
+                              "record-batch-builder",
+                              arrow_builder,
                               NULL);
   return GARROW_RECORD_BATCH_BUILDER(builder);
 }
diff --git a/c_glib/arrow-glib/table-builder.h b/c_glib/arrow-glib/table-builder.h
index a76793953c55d..fff58654b2a94 100644
--- a/c_glib/arrow-glib/table-builder.h
+++ b/c_glib/arrow-glib/table-builder.h
@@ -37,33 +37,35 @@ struct _GArrowRecordBatchBuilderClass
   GObjectClass parent_class;
 };
 
-GArrowRecordBatchBuilder *garrow_record_batch_builder_new(GArrowSchema *schema,
-                                                          GError **error);
+GArrowRecordBatchBuilder *
+garrow_record_batch_builder_new(GArrowSchema *schema, GError **error);
 
-gint64 garrow_record_batch_builder_get_initial_capacity(GArrowRecordBatchBuilder *builder);
-void garrow_record_batch_builder_set_initial_capacity(GArrowRecordBatchBuilder *builder,
-                                                      gint64 capacity);
-GArrowSchema *garrow_record_batch_builder_get_schema(GArrowRecordBatchBuilder *builder);
+gint64
+garrow_record_batch_builder_get_initial_capacity(GArrowRecordBatchBuilder *builder);
+void
+garrow_record_batch_builder_set_initial_capacity(GArrowRecordBatchBuilder *builder,
+                                                 gint64 capacity);
+GArrowSchema *
+garrow_record_batch_builder_get_schema(GArrowRecordBatchBuilder *builder);
 
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_13_FOR(garrow_record_batch_builder_get_n_columns)
-gint garrow_record_batch_builder_get_n_fields(GArrowRecordBatchBuilder *builder);
+gint
+garrow_record_batch_builder_get_n_fields(GArrowRecordBatchBuilder *builder);
 #endif
 GARROW_AVAILABLE_IN_0_13
 gint
 garrow_record_batch_builder_get_n_columns(GArrowRecordBatchBuilder *builder);
 #ifndef GARROW_DISABLE_DEPRECATED
 GARROW_DEPRECATED_IN_0_13_FOR(garrow_record_batch_builder_get_column_builder)
-GArrowArrayBuilder *garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder,
-                                                          gint i);
+GArrowArrayBuilder *
+garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder, gint i);
 #endif
 GARROW_AVAILABLE_IN_0_13
 GArrowArrayBuilder *
-garrow_record_batch_builder_get_column_builder(GArrowRecordBatchBuilder *builder,
-                                               gint i);
-
-GArrowRecordBatch *garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder,
-                                                     GError **error);
+garrow_record_batch_builder_get_column_builder(GArrowRecordBatchBuilder *builder, gint i);
 
+GArrowRecordBatch *
+garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder, GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/table-builder.hpp b/c_glib/arrow-glib/table-builder.hpp
index cf93ded9b4b65..4f4e69b1472cb 100644
--- a/c_glib/arrow-glib/table-builder.hpp
+++ b/c_glib/arrow-glib/table-builder.hpp
@@ -23,5 +23,7 @@
 
 #include <arrow-glib/table-builder.h>
 
-GArrowRecordBatchBuilder *garrow_record_batch_builder_new_raw(arrow::RecordBatchBuilder *arrow_builder);
-arrow::RecordBatchBuilder *garrow_record_batch_builder_get_raw(GArrowRecordBatchBuilder *builder);
+GArrowRecordBatchBuilder *
+garrow_record_batch_builder_new_raw(arrow::RecordBatchBuilder *arrow_builder);
+arrow::RecordBatchBuilder *
+garrow_record_batch_builder_get_raw(GArrowRecordBatchBuilder *builder);
diff --git a/c_glib/arrow-glib/table.cpp b/c_glib/arrow-glib/table.cpp
index 5367f26732580..f8569366685a2 100644
--- a/c_glib/arrow-glib/table.cpp
+++ b/c_glib/arrow-glib/table.cpp
@@ -49,7 +49,8 @@ G_BEGIN_DECLS
  * Feather data.
  */
 
-typedef struct GArrowTableConcatenateOptionsPrivate_ {
+typedef struct GArrowTableConcatenateOptionsPrivate_
+{
   arrow::ConcatenateTablesOptions options;
 } GArrowTableConcatenateOptionsPrivate;
 
@@ -62,9 +63,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowTableConcatenateOptions,
                            garrow_table_concatenate_options,
                            G_TYPE_OBJECT)
 
-#define GARROW_TABLE_CONCATENATE_OPTIONS_GET_PRIVATE(obj)       \
-  static_cast<GArrowTableConcatenateOptionsPrivate *>(          \
-    garrow_table_concatenate_options_get_instance_private(      \
+#define GARROW_TABLE_CONCATENATE_OPTIONS_GET_PRIVATE(obj)                                \
+  static_cast<GArrowTableConcatenateOptionsPrivate *>(                                   \
+    garrow_table_concatenate_options_get_instance_private(                               \
       GARROW_TABLE_CONCATENATE_OPTIONS(obj)))
 
 static void
@@ -88,8 +89,7 @@ garrow_table_concatenate_options_set_property(GObject *object,
     priv->options.unify_schemas = g_value_get_boolean(value);
     break;
   case PROP_PROMOTE_NULLABILITY:
-    priv->options.field_merge_options.promote_nullability =
-      g_value_get_boolean(value);
+    priv->options.field_merge_options.promote_nullability = g_value_get_boolean(value);
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -110,8 +110,7 @@ garrow_table_concatenate_options_get_property(GObject *object,
     g_value_set_boolean(value, priv->options.unify_schemas);
     break;
   case PROP_PROMOTE_NULLABILITY:
-    g_value_set_boolean(value,
-                        priv->options.field_merge_options.promote_nullability);
+    g_value_set_boolean(value, priv->options.field_merge_options.promote_nullability);
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -123,15 +122,14 @@ static void
 garrow_table_concatenate_options_init(GArrowTableConcatenateOptions *object)
 {
   auto priv = GARROW_TABLE_CONCATENATE_OPTIONS_GET_PRIVATE(object);
-  new(&(priv->options)) arrow::ConcatenateTablesOptions;
+  new (&(priv->options)) arrow::ConcatenateTablesOptions;
 }
 
 static void
-garrow_table_concatenate_options_class_init(
-  GArrowTableConcatenateOptionsClass *klass)
+garrow_table_concatenate_options_class_init(GArrowTableConcatenateOptionsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
-  gobject_class->finalize     = garrow_table_concatenate_options_finalize;
+  gobject_class->finalize = garrow_table_concatenate_options_finalize;
   gobject_class->set_property = garrow_table_concatenate_options_set_property;
   gobject_class->get_property = garrow_table_concatenate_options_get_property;
 
@@ -190,12 +188,11 @@ GArrowTableConcatenateOptions *
 garrow_table_concatenate_options_new(void)
 {
   return GARROW_TABLE_CONCATENATE_OPTIONS(
-    g_object_new(GARROW_TYPE_TABLE_CONCATENATE_OPTIONS,
-                 NULL));
+    g_object_new(GARROW_TYPE_TABLE_CONCATENATE_OPTIONS, NULL));
 }
 
-
-typedef struct GArrowTablePrivate_ {
+typedef struct GArrowTablePrivate_
+{
   std::shared_ptr<arrow::Table> table;
 } GArrowTablePrivate;
 
@@ -203,14 +200,10 @@ enum {
   PROP_TABLE = 1,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowTable,
-                           garrow_table,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowTable, garrow_table, G_TYPE_OBJECT)
 
-#define GARROW_TABLE_GET_PRIVATE(obj)         \
-  static_cast<GArrowTablePrivate *>(          \
-     garrow_table_get_instance_private(       \
-       GARROW_TABLE(obj)))
+#define GARROW_TABLE_GET_PRIVATE(obj)                                                    \
+  static_cast<GArrowTablePrivate *>(garrow_table_get_instance_private(GARROW_TABLE(obj)))
 
 static void
 garrow_table_finalize(GObject *object)
@@ -258,7 +251,7 @@ static void
 garrow_table_init(GArrowTable *object)
 {
   auto priv = GARROW_TABLE_GET_PRIVATE(object);
-  new(&priv->table) std::shared_ptr<arrow::Table>;
+  new (&priv->table) std::shared_ptr<arrow::Table>;
 }
 
 static void
@@ -269,15 +262,15 @@ garrow_table_class_init(GArrowTableClass *klass)
 
   gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_table_finalize;
+  gobject_class->finalize = garrow_table_finalize;
   gobject_class->set_property = garrow_table_set_property;
   gobject_class->get_property = garrow_table_get_property;
 
-  spec = g_param_spec_pointer("table",
-                              "Table",
-                              "The raw std::shared_ptr<arrow::Table> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "table",
+    "Table",
+    "The raw std::shared_ptr<arrow::Table> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_TABLE, spec);
 }
 
@@ -294,9 +287,7 @@ garrow_table_class_init(GArrowTableClass *klass)
  * Since: 0.12.0
  */
 GArrowTable *
-garrow_table_new_values(GArrowSchema *schema,
-                        GList *values,
-                        GError **error)
+garrow_table_new_values(GArrowSchema *schema, GList *values, GError **error)
 {
   const auto context = "[table][new][values]";
   auto arrow_schema = garrow_schema_get_raw(schema);
@@ -347,8 +338,7 @@ garrow_table_new_values(GArrowSchema *schema,
   }
 
   if (!arrow_chunked_arrays.empty()) {
-    auto arrow_table = arrow::Table::Make(arrow_schema,
-                                          std::move(arrow_chunked_arrays));
+    auto arrow_table = arrow::Table::Make(arrow_schema, std::move(arrow_chunked_arrays));
     auto status = arrow_table->Validate();
     if (garrow_error_check(error, status, context)) {
       return garrow_table_new_raw(&arrow_table);
@@ -364,8 +354,8 @@ garrow_table_new_values(GArrowSchema *schema,
       return NULL;
     }
   } else {
-    auto maybe_table = arrow::Table::FromRecordBatches(
-      arrow_schema, std::move(arrow_record_batches));
+    auto maybe_table =
+      arrow::Table::FromRecordBatches(arrow_schema, std::move(arrow_record_batches));
     if (garrow::check(error, maybe_table, context)) {
       return garrow_table_new_raw(&(*maybe_table));
     } else {
@@ -465,8 +455,7 @@ garrow_table_new_record_batches(GArrowSchema *schema,
     arrow_record_batches.push_back(arrow_record_batch);
   }
 
-  auto maybe_table = arrow::Table::FromRecordBatches(arrow_schema,
-                                                     arrow_record_batches);
+  auto maybe_table = arrow::Table::FromRecordBatches(arrow_schema, arrow_record_batches);
   if (garrow::check(error, maybe_table, "[table][new][record-batches]")) {
     return garrow_table_new_raw(&(*maybe_table));
   } else {
@@ -539,8 +528,7 @@ garrow_table_get_schema(GArrowTable *table)
  * Since: 0.15.0
  */
 GArrowChunkedArray *
-garrow_table_get_column_data(GArrowTable *table,
-                             gint i)
+garrow_table_get_column_data(GArrowTable *table, gint i)
 {
   const auto &arrow_table = garrow_table_get_raw(table);
   if (!garrow_internal_index_adjust(i, arrow_table->num_columns())) {
@@ -599,9 +587,7 @@ garrow_table_add_column(GArrowTable *table,
   const auto arrow_table = garrow_table_get_raw(table);
   const auto arrow_field = garrow_field_get_raw(field);
   const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array);
-  auto maybe_new_table = arrow_table->AddColumn(i,
-                                                arrow_field,
-                                                arrow_chunked_array);
+  auto maybe_new_table = arrow_table->AddColumn(i, arrow_field, arrow_chunked_array);
   if (garrow::check(error, maybe_new_table, "[table][add-column]")) {
     return garrow_table_new_raw(&(*maybe_new_table));
   } else {
@@ -621,9 +607,7 @@ garrow_table_add_column(GArrowTable *table,
  * Since: 0.3.0
  */
 GArrowTable *
-garrow_table_remove_column(GArrowTable *table,
-                           guint i,
-                           GError **error)
+garrow_table_remove_column(GArrowTable *table, guint i, GError **error)
 {
   const auto arrow_table = garrow_table_get_raw(table);
   auto maybe_new_table = arrow_table->RemoveColumn(i);
@@ -658,9 +642,7 @@ garrow_table_replace_column(GArrowTable *table,
   const auto arrow_table = garrow_table_get_raw(table);
   const auto arrow_field = garrow_field_get_raw(field);
   const auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array);
-  auto maybe_new_table = arrow_table->SetColumn(i,
-                                                arrow_field,
-                                                arrow_chunked_array);
+  auto maybe_new_table = arrow_table->SetColumn(i, arrow_field, arrow_chunked_array);
   if (garrow::check(error, maybe_new_table, "[table][replace-column]")) {
     return garrow_table_new_raw(&(*maybe_new_table));
   } else {
@@ -706,7 +688,7 @@ garrow_table_concatenate(GArrowTable *table,
                          GError **error)
 {
   auto arrow_table = garrow_table_get_raw(table);
-  std::vector<std::shared_ptr<arrow::Table>> arrow_tables = { arrow_table };
+  std::vector<std::shared_ptr<arrow::Table>> arrow_tables = {arrow_table};
   for (auto node = other_tables; node; node = g_list_next(node)) {
     auto arrow_other_table = garrow_table_get_raw(GARROW_TABLE(node->data));
     arrow_tables.push_back(arrow_other_table);
@@ -718,9 +700,7 @@ garrow_table_concatenate(GArrowTable *table,
   }
   auto arrow_concatenated_table_result =
     arrow::ConcatenateTables(arrow_tables, arrow_options);
-  if (garrow::check(error,
-                    arrow_concatenated_table_result,
-                    "[table][concatenate]")) {
+  if (garrow::check(error, arrow_concatenated_table_result, "[table][concatenate]")) {
     auto arrow_concatenated_table = std::move(*arrow_concatenated_table_result);
     return garrow_table_new_raw(&arrow_concatenated_table);
   } else {
@@ -743,9 +723,7 @@ garrow_table_concatenate(GArrowTable *table,
  * Since: 0.14.0
  */
 GArrowTable *
-garrow_table_slice(GArrowTable *table,
-                   gint64 offset,
-                   gint64 length)
+garrow_table_slice(GArrowTable *table, gint64 offset, gint64 length)
 {
   const auto arrow_table = garrow_table_get_raw(table);
   if (offset < 0) {
@@ -766,8 +744,7 @@ garrow_table_slice(GArrowTable *table,
  * Since: 0.16.0
  */
 GArrowTable *
-garrow_table_combine_chunks(GArrowTable *table,
-                            GError **error)
+garrow_table_combine_chunks(GArrowTable *table, GError **error)
 {
   const auto arrow_table = garrow_table_get_raw(table);
 
@@ -779,8 +756,8 @@ garrow_table_combine_chunks(GArrowTable *table,
   }
 }
 
-
-typedef struct GArrowFeatherWritePropertiesPrivate_ {
+typedef struct GArrowFeatherWritePropertiesPrivate_
+{
   arrow::ipc::feather::WriteProperties properties;
 } GArrowFeatherWritePropertiesPrivate;
 
@@ -795,9 +772,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowFeatherWriteProperties,
                            garrow_feather_write_properties,
                            G_TYPE_OBJECT)
 
-#define GARROW_FEATHER_WRITE_PROPERTIES_GET_PRIVATE(obj)        \
-  static_cast<GArrowFeatherWritePropertiesPrivate *>(           \
-    garrow_feather_write_properties_get_instance_private(       \
+#define GARROW_FEATHER_WRITE_PROPERTIES_GET_PRIVATE(obj)                                 \
+  static_cast<GArrowFeatherWritePropertiesPrivate *>(                                    \
+    garrow_feather_write_properties_get_instance_private(                                \
       GARROW_FEATHER_WRITE_PROPERTIES(obj)))
 
 static void
@@ -851,7 +828,7 @@ static void
 garrow_feather_write_properties_init(GArrowFeatherWriteProperties *object)
 {
   auto priv = GARROW_FEATHER_WRITE_PROPERTIES_GET_PRIVATE(object);
-  new(&priv->properties) arrow::ipc::feather::WriteProperties;
+  new (&priv->properties) arrow::ipc::feather::WriteProperties;
   priv->properties = arrow::ipc::feather::WriteProperties::Defaults();
 }
 
@@ -931,9 +908,8 @@ garrow_table_write_as_feather(GArrowTable *table,
   arrow::Status status;
   if (properties) {
     auto arrow_properties = garrow_feather_write_properties_get_raw(properties);
-    status = arrow::ipc::feather::WriteTable(*arrow_table,
-                                             arrow_sink.get(),
-                                             *arrow_properties);
+    status =
+      arrow::ipc::feather::WriteTable(*arrow_table, arrow_sink.get(), *arrow_properties);
   } else {
     status = arrow::ipc::feather::WriteTable(*arrow_table, arrow_sink.get());
   }
@@ -945,9 +921,7 @@ G_END_DECLS
 GArrowTable *
 garrow_table_new_raw(std::shared_ptr<arrow::Table> *arrow_table)
 {
-  auto table = GARROW_TABLE(g_object_new(GARROW_TYPE_TABLE,
-                                         "table", arrow_table,
-                                         NULL));
+  auto table = GARROW_TABLE(g_object_new(GARROW_TYPE_TABLE, "table", arrow_table, NULL));
   return table;
 }
 
diff --git a/c_glib/arrow-glib/table.h b/c_glib/arrow-glib/table.h
index 05a95e91ac4e1..1bf64d25a4f3f 100644
--- a/c_glib/arrow-glib/table.h
+++ b/c_glib/arrow-glib/table.h
@@ -27,7 +27,7 @@
 
 G_BEGIN_DECLS
 
-#define GARROW_TYPE_TABLE_CONCATENATE_OPTIONS   \
+#define GARROW_TYPE_TABLE_CONCATENATE_OPTIONS                                            \
   (garrow_table_concatenate_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowTableConcatenateOptions,
                          garrow_table_concatenate_options,
@@ -43,13 +43,8 @@ GARROW_AVAILABLE_IN_6_0
 GArrowTableConcatenateOptions *
 garrow_table_concatenate_options_new(void);
 
-
 #define GARROW_TYPE_TABLE (garrow_table_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowTable,
-                         garrow_table,
-                         GARROW,
-                         TABLE,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowTable, garrow_table, GARROW, TABLE, GObject)
 struct _GArrowTableClass
 {
   GObjectClass parent_class;
@@ -57,9 +52,7 @@ struct _GArrowTableClass
 
 GARROW_AVAILABLE_IN_0_12
 GArrowTable *
-garrow_table_new_values(GArrowSchema *schema,
-                        GList *values,
-                        GError **error);
+garrow_table_new_values(GArrowSchema *schema, GList *values, GError **error);
 GARROW_AVAILABLE_IN_0_15
 GArrowTable *
 garrow_table_new_chunked_arrays(GArrowSchema *schema,
@@ -79,40 +72,43 @@ garrow_table_new_record_batches(GArrowSchema *schema,
                                 gsize n_record_batches,
                                 GError **error);
 
-gboolean        garrow_table_equal         (GArrowTable *table,
-                                            GArrowTable *other_table);
+gboolean
+garrow_table_equal(GArrowTable *table, GArrowTable *other_table);
 GARROW_AVAILABLE_IN_0_17
 gboolean
 garrow_table_equal_metadata(GArrowTable *table,
                             GArrowTable *other_table,
                             gboolean check_metadata);
 
-GArrowSchema   *garrow_table_get_schema    (GArrowTable *table);
+GArrowSchema *
+garrow_table_get_schema(GArrowTable *table);
 GARROW_AVAILABLE_IN_0_15
 GArrowChunkedArray *
-garrow_table_get_column_data(GArrowTable *table,
-                             gint i);
+garrow_table_get_column_data(GArrowTable *table, gint i);
 
-guint           garrow_table_get_n_columns (GArrowTable *table);
-guint64         garrow_table_get_n_rows    (GArrowTable *table);
+guint
+garrow_table_get_n_columns(GArrowTable *table);
+guint64
+garrow_table_get_n_rows(GArrowTable *table);
 
 GARROW_AVAILABLE_IN_0_15
-GArrowTable    *garrow_table_add_column    (GArrowTable *table,
-                                            guint i,
-                                            GArrowField *field,
-                                            GArrowChunkedArray *chunked_array,
-                                            GError **error);
-GArrowTable    *garrow_table_remove_column (GArrowTable *table,
-                                            guint i,
-                                            GError **error);
+GArrowTable *
+garrow_table_add_column(GArrowTable *table,
+                        guint i,
+                        GArrowField *field,
+                        GArrowChunkedArray *chunked_array,
+                        GError **error);
+GArrowTable *
+garrow_table_remove_column(GArrowTable *table, guint i, GError **error);
 GARROW_AVAILABLE_IN_0_15
-GArrowTable    *garrow_table_replace_column(GArrowTable *table,
-                                            guint i,
-                                            GArrowField *field,
-                                            GArrowChunkedArray *chunked_array,
-                                            GError **error);
-gchar          *garrow_table_to_string     (GArrowTable *table,
-                                            GError **error);
+GArrowTable *
+garrow_table_replace_column(GArrowTable *table,
+                            guint i,
+                            GArrowField *field,
+                            GArrowChunkedArray *chunked_array,
+                            GError **error);
+gchar *
+garrow_table_to_string(GArrowTable *table, GError **error);
 GARROW_AVAILABLE_IN_0_14
 GArrowTable *
 garrow_table_concatenate(GArrowTable *table,
@@ -121,17 +117,12 @@ garrow_table_concatenate(GArrowTable *table,
                          GError **error);
 GARROW_AVAILABLE_IN_0_14
 GArrowTable *
-garrow_table_slice(GArrowTable *table,
-                   gint64 offset,
-                   gint64 length);
+garrow_table_slice(GArrowTable *table, gint64 offset, gint64 length);
 GARROW_AVAILABLE_IN_0_16
 GArrowTable *
-garrow_table_combine_chunks(GArrowTable *table,
-                            GError **error);
-
+garrow_table_combine_chunks(GArrowTable *table, GError **error);
 
-#define GARROW_TYPE_FEATHER_WRITE_PROPERTIES    \
-  (garrow_feather_write_properties_get_type())
+#define GARROW_TYPE_FEATHER_WRITE_PROPERTIES (garrow_feather_write_properties_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowFeatherWriteProperties,
                          garrow_feather_write_properties,
                          GARROW,
diff --git a/c_glib/arrow-glib/table.hpp b/c_glib/arrow-glib/table.hpp
index dc972d80cff73..3077c2ece9b37 100644
--- a/c_glib/arrow-glib/table.hpp
+++ b/c_glib/arrow-glib/table.hpp
@@ -24,8 +24,10 @@
 
 #include <arrow-glib/table.h>
 
-GArrowTable *garrow_table_new_raw(std::shared_ptr<arrow::Table> *arrow_table);
-std::shared_ptr<arrow::Table> garrow_table_get_raw(GArrowTable *table);
+GArrowTable *
+garrow_table_new_raw(std::shared_ptr<arrow::Table> *arrow_table);
+std::shared_ptr<arrow::Table>
+garrow_table_get_raw(GArrowTable *table);
 
 arrow::ipc::feather::WriteProperties *
 garrow_feather_write_properties_get_raw(GArrowFeatherWriteProperties *properties);
diff --git a/c_glib/arrow-glib/tensor.cpp b/c_glib/arrow-glib/tensor.cpp
index ddbf1189b9123..0cee235de3728 100644
--- a/c_glib/arrow-glib/tensor.cpp
+++ b/c_glib/arrow-glib/tensor.cpp
@@ -34,7 +34,8 @@ G_BEGIN_DECLS
  * Since: 0.3.0
  */
 
-typedef struct GArrowTensorPrivate_ {
+typedef struct GArrowTensorPrivate_
+{
   std::shared_ptr<arrow::Tensor> tensor;
   GArrowBuffer *buffer;
 } GArrowTensorPrivate;
@@ -47,10 +48,9 @@ enum {
 
 G_DEFINE_TYPE_WITH_PRIVATE(GArrowTensor, garrow_tensor, G_TYPE_OBJECT)
 
-#define GARROW_TENSOR_GET_PRIVATE(obj)         \
-  static_cast<GArrowTensorPrivate *>(          \
-     garrow_tensor_get_instance_private(       \
-       GARROW_TENSOR(obj)))
+#define GARROW_TENSOR_GET_PRIVATE(obj)                                                   \
+  static_cast<GArrowTensorPrivate *>(                                                    \
+    garrow_tensor_get_instance_private(GARROW_TENSOR(obj)))
 
 static void
 garrow_tensor_dispose(GObject *object)
@@ -119,7 +119,7 @@ static void
 garrow_tensor_init(GArrowTensor *object)
 {
   auto priv = GARROW_TENSOR_GET_PRIVATE(object);
-  new(&priv->tensor) std::shared_ptr<arrow::Tensor>;
+  new (&priv->tensor) std::shared_ptr<arrow::Tensor>;
 }
 
 static void
@@ -129,24 +129,24 @@ garrow_tensor_class_init(GArrowTensorClass *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = garrow_tensor_dispose;
-  gobject_class->finalize     = garrow_tensor_finalize;
+  gobject_class->dispose = garrow_tensor_dispose;
+  gobject_class->finalize = garrow_tensor_finalize;
   gobject_class->set_property = garrow_tensor_set_property;
   gobject_class->get_property = garrow_tensor_get_property;
 
-  spec = g_param_spec_pointer("tensor",
-                              "Tensor",
-                              "The raw std::shared<arrow::Tensor> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "tensor",
+    "Tensor",
+    "The raw std::shared<arrow::Tensor> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_TENSOR, spec);
 
-  spec = g_param_spec_object("buffer",
-                             "Buffer",
-                             "The data",
-                             GARROW_TYPE_BUFFER,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "buffer",
+    "Buffer",
+    "The data",
+    GARROW_TYPE_BUFFER,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_BUFFER, spec);
 }
 
@@ -192,12 +192,11 @@ garrow_tensor_new(GArrowDataType *data_type,
   for (gsize i = 0; i < n_dimension_names; ++i) {
     arrow_dimension_names.push_back(dimension_names[i]);
   }
-  auto arrow_tensor =
-    std::make_shared<arrow::Tensor>(arrow_data_type,
-                                    arrow_data,
-                                    arrow_shape,
-                                    arrow_strides,
-                                    arrow_dimension_names);
+  auto arrow_tensor = std::make_shared<arrow::Tensor>(arrow_data_type,
+                                                      arrow_data,
+                                                      arrow_shape,
+                                                      arrow_strides,
+                                                      arrow_dimension_names);
   auto tensor = garrow_tensor_new_raw_buffer(&arrow_tensor, data);
   return tensor;
 }
@@ -292,8 +291,7 @@ garrow_tensor_get_shape(GArrowTensor *tensor, gint *n_dimensions)
   auto arrow_tensor = garrow_tensor_get_raw(tensor);
   auto arrow_shape = arrow_tensor->shape();
   auto n_dimensions_raw = arrow_shape.size();
-  auto shape =
-    static_cast<gint64 *>(g_malloc_n(sizeof(gint64), n_dimensions_raw));
+  auto shape = static_cast<gint64 *>(g_malloc_n(sizeof(gint64), n_dimensions_raw));
   for (gsize i = 0; i < n_dimensions_raw; ++i) {
     shape[i] = arrow_shape[i];
   }
@@ -319,8 +317,7 @@ garrow_tensor_get_strides(GArrowTensor *tensor, gint *n_strides)
   auto arrow_tensor = garrow_tensor_get_raw(tensor);
   auto arrow_strides = arrow_tensor->strides();
   auto n_strides_raw = arrow_strides.size();
-  auto strides =
-    static_cast<gint64 *>(g_malloc_n(sizeof(gint64), n_strides_raw));
+  auto strides = static_cast<gint64 *>(g_malloc_n(sizeof(gint64), n_strides_raw));
   for (gsize i = 0; i < n_strides_raw; ++i) {
     strides[i] = arrow_strides[i];
   }
@@ -449,10 +446,8 @@ GArrowTensor *
 garrow_tensor_new_raw_buffer(std::shared_ptr<arrow::Tensor> *arrow_tensor,
                              GArrowBuffer *buffer)
 {
-  auto tensor = GARROW_TENSOR(g_object_new(GARROW_TYPE_TENSOR,
-                                           "tensor", arrow_tensor,
-                                           "buffer", buffer,
-                                           NULL));
+  auto tensor = GARROW_TENSOR(
+    g_object_new(GARROW_TYPE_TENSOR, "tensor", arrow_tensor, "buffer", buffer, NULL));
   return tensor;
 }
 
diff --git a/c_glib/arrow-glib/tensor.h b/c_glib/arrow-glib/tensor.h
index daa3a89054125..a6d11b248110e 100644
--- a/c_glib/arrow-glib/tensor.h
+++ b/c_glib/arrow-glib/tensor.h
@@ -25,40 +25,46 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_TENSOR (garrow_tensor_get_type())
-G_DECLARE_DERIVABLE_TYPE(GArrowTensor,
-                         garrow_tensor,
-                         GARROW,
-                         TENSOR,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GArrowTensor, garrow_tensor, GARROW, TENSOR, GObject)
 struct _GArrowTensorClass
 {
   GObjectClass parent_class;
 };
 
-GArrowTensor   *garrow_tensor_new                (GArrowDataType *data_type,
-                                                  GArrowBuffer *data,
-                                                  gint64 *shape,
-                                                  gsize n_dimensions,
-                                                  gint64 *strides,
-                                                  gsize n_strides,
-                                                  gchar **dimension_names,
-                                                  gsize n_dimension_names);
-gboolean        garrow_tensor_equal              (GArrowTensor *tensor,
-                                                  GArrowTensor *other_tensor);
-GArrowDataType *garrow_tensor_get_value_data_type(GArrowTensor *tensor);
-GArrowType      garrow_tensor_get_value_type     (GArrowTensor *tensor);
-GArrowBuffer   *garrow_tensor_get_buffer         (GArrowTensor *tensor);
-gint64         *garrow_tensor_get_shape          (GArrowTensor *tensor,
-                                                  gint *n_dimensions);
-gint64         *garrow_tensor_get_strides        (GArrowTensor *tensor,
-                                                  gint *n_strides);
-gint            garrow_tensor_get_n_dimensions   (GArrowTensor *tensor);
-const gchar    *garrow_tensor_get_dimension_name (GArrowTensor *tensor,
-                                                  gint i);
-gint64          garrow_tensor_get_size           (GArrowTensor *tensor);
-gboolean        garrow_tensor_is_mutable         (GArrowTensor *tensor);
-gboolean        garrow_tensor_is_contiguous      (GArrowTensor *tensor);
-gboolean        garrow_tensor_is_row_major       (GArrowTensor *tensor);
-gboolean        garrow_tensor_is_column_major    (GArrowTensor *tensor);
+GArrowTensor *
+garrow_tensor_new(GArrowDataType *data_type,
+                  GArrowBuffer *data,
+                  gint64 *shape,
+                  gsize n_dimensions,
+                  gint64 *strides,
+                  gsize n_strides,
+                  gchar **dimension_names,
+                  gsize n_dimension_names);
+gboolean
+garrow_tensor_equal(GArrowTensor *tensor, GArrowTensor *other_tensor);
+GArrowDataType *
+garrow_tensor_get_value_data_type(GArrowTensor *tensor);
+GArrowType
+garrow_tensor_get_value_type(GArrowTensor *tensor);
+GArrowBuffer *
+garrow_tensor_get_buffer(GArrowTensor *tensor);
+gint64 *
+garrow_tensor_get_shape(GArrowTensor *tensor, gint *n_dimensions);
+gint64 *
+garrow_tensor_get_strides(GArrowTensor *tensor, gint *n_strides);
+gint
+garrow_tensor_get_n_dimensions(GArrowTensor *tensor);
+const gchar *
+garrow_tensor_get_dimension_name(GArrowTensor *tensor, gint i);
+gint64
+garrow_tensor_get_size(GArrowTensor *tensor);
+gboolean
+garrow_tensor_is_mutable(GArrowTensor *tensor);
+gboolean
+garrow_tensor_is_contiguous(GArrowTensor *tensor);
+gboolean
+garrow_tensor_is_row_major(GArrowTensor *tensor);
+gboolean
+garrow_tensor_is_column_major(GArrowTensor *tensor);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/tensor.hpp b/c_glib/arrow-glib/tensor.hpp
index c90dc6d4d970f..a4395c64db800 100644
--- a/c_glib/arrow-glib/tensor.hpp
+++ b/c_glib/arrow-glib/tensor.hpp
@@ -23,7 +23,10 @@
 
 #include <arrow-glib/tensor.h>
 
-GArrowTensor *garrow_tensor_new_raw(std::shared_ptr<arrow::Tensor> *arrow_tensor);
-GArrowTensor *garrow_tensor_new_raw_buffer(std::shared_ptr<arrow::Tensor> *arrow_tensor,
-                                           GArrowBuffer *buffer);
-std::shared_ptr<arrow::Tensor> garrow_tensor_get_raw(GArrowTensor *tensor);
+GArrowTensor *
+garrow_tensor_new_raw(std::shared_ptr<arrow::Tensor> *arrow_tensor);
+GArrowTensor *
+garrow_tensor_new_raw_buffer(std::shared_ptr<arrow::Tensor> *arrow_tensor,
+                             GArrowBuffer *buffer);
+std::shared_ptr<arrow::Tensor>
+garrow_tensor_get_raw(GArrowTensor *tensor);
diff --git a/c_glib/arrow-glib/type.h b/c_glib/arrow-glib/type.h
index f513c7a36a0dc..6f33ad64ef55c 100644
--- a/c_glib/arrow-glib/type.h
+++ b/c_glib/arrow-glib/type.h
@@ -65,7 +65,8 @@ G_BEGIN_DECLS
  * @GARROW_TYPE_DURATION: Measure of elapsed time in either seconds,
  *   milliseconds, microseconds or nanoseconds.
  * @GARROW_TYPE_LARGE_STRING: 64bit offsets UTF-8 variable-length string.
- * @GARROW_TYPE_LARGE_BINARY: 64bit offsets Variable-length bytes (no guarantee of UTF-8-ness).
+ * @GARROW_TYPE_LARGE_BINARY: 64bit offsets Variable-length bytes (no guarantee of
+ * UTF-8-ness).
  * @GARROW_TYPE_LARGE_LIST: A list of some logical data type with 64-bit offsets.
  * @GARROW_TYPE_MONTH_DAY_NANO_INTERVAL: MONTH_DAY_NANO interval in SQL style.
  * @GARROW_TYPE_RUN_END_ENCODED: Run-end encoded data.
diff --git a/c_glib/arrow-glib/type.hpp b/c_glib/arrow-glib/type.hpp
index 097097530bbfe..9ae815dda15b6 100644
--- a/c_glib/arrow-glib/type.hpp
+++ b/c_glib/arrow-glib/type.hpp
@@ -23,7 +23,11 @@
 
 #include <arrow-glib/type.h>
 
-GArrowType garrow_type_from_raw(arrow::Type::type type);
-GArrowTimeUnit garrow_time_unit_from_raw(arrow::TimeUnit::type unit);
-arrow::TimeUnit::type garrow_time_unit_to_raw(GArrowTimeUnit unit);
-GArrowIntervalType garrow_interval_type_from_raw(arrow::IntervalType::type type);
+GArrowType
+garrow_type_from_raw(arrow::Type::type type);
+GArrowTimeUnit
+garrow_time_unit_from_raw(arrow::TimeUnit::type unit);
+arrow::TimeUnit::type
+garrow_time_unit_to_raw(GArrowTimeUnit unit);
+GArrowIntervalType
+garrow_interval_type_from_raw(arrow::IntervalType::type type);
diff --git a/c_glib/arrow-glib/writable-file.cpp b/c_glib/arrow-glib/writable-file.cpp
index 74850ec2089b1..85ddea9ac2f93 100644
--- a/c_glib/arrow-glib/writable-file.cpp
+++ b/c_glib/arrow-glib/writable-file.cpp
@@ -32,9 +32,7 @@ G_BEGIN_DECLS
  * #GArrowWritableFile is an interface for file output.
  */
 
-G_DEFINE_INTERFACE(GArrowWritableFile,
-                   garrow_writable_file,
-                   G_TYPE_OBJECT)
+G_DEFINE_INTERFACE(GArrowWritableFile, garrow_writable_file, G_TYPE_OBJECT)
 
 static void
 garrow_writable_file_default_init(GArrowWritableFileInterface *iface)
@@ -58,8 +56,7 @@ garrow_writable_file_write_at(GArrowWritableFile *writable_file,
                               gint64 n_bytes,
                               GError **error)
 {
-  const auto arrow_writable_file =
-    garrow_writable_file_get_raw(writable_file);
+  const auto arrow_writable_file = garrow_writable_file_get_raw(writable_file);
 
   auto status = arrow_writable_file->WriteAt(position, data, n_bytes);
   return garrow_error_check(error, status, "[io][writable-file][write-at]");
diff --git a/c_glib/arrow-glib/writable-file.h b/c_glib/arrow-glib/writable-file.h
index 78359f6afa951..cf17b7f6ad03a 100644
--- a/c_glib/arrow-glib/writable-file.h
+++ b/c_glib/arrow-glib/writable-file.h
@@ -24,16 +24,14 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_WRITABLE_FILE (garrow_writable_file_get_type())
-G_DECLARE_INTERFACE(GArrowWritableFile,
-                    garrow_writable_file,
-                    GARROW,
-                    WRITABLE_FILE,
-                    GObject)
+G_DECLARE_INTERFACE(
+  GArrowWritableFile, garrow_writable_file, GARROW, WRITABLE_FILE, GObject)
 
-gboolean garrow_writable_file_write_at(GArrowWritableFile *writable_file,
-                                       gint64 position,
-                                       const guint8 *data,
-                                       gint64 n_bytes,
-                                       GError **error);
+gboolean
+garrow_writable_file_write_at(GArrowWritableFile *writable_file,
+                              gint64 position,
+                              const guint8 *data,
+                              gint64 n_bytes,
+                              GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/writable.cpp b/c_glib/arrow-glib/writable.cpp
index 47c1f3303b72e..0a61ba72823f2 100644
--- a/c_glib/arrow-glib/writable.cpp
+++ b/c_glib/arrow-glib/writable.cpp
@@ -33,9 +33,7 @@ G_BEGIN_DECLS
  * writable.
  */
 
-G_DEFINE_INTERFACE(GArrowWritable,
-                   garrow_writable,
-                   G_TYPE_OBJECT)
+G_DEFINE_INTERFACE(GArrowWritable, garrow_writable, G_TYPE_OBJECT)
 
 static void
 garrow_writable_default_init(GArrowWritableInterface *iface)
@@ -73,8 +71,7 @@ garrow_writable_write(GArrowWritable *writable,
  * Returns: %TRUE on success, %FALSE if there was an error.
  */
 gboolean
-garrow_writable_flush(GArrowWritable *writable,
-                      GError **error)
+garrow_writable_flush(GArrowWritable *writable, GError **error)
 {
   const auto arrow_writable = garrow_writable_get_raw(writable);
 
diff --git a/c_glib/arrow-glib/writable.h b/c_glib/arrow-glib/writable.h
index 151e85d98af23..6e550deb32003 100644
--- a/c_glib/arrow-glib/writable.h
+++ b/c_glib/arrow-glib/writable.h
@@ -24,17 +24,14 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_WRITABLE (garrow_writable_get_type())
-G_DECLARE_INTERFACE(GArrowWritable,
-                    garrow_writable,
-                    GARROW,
-                    WRITABLE,
-                    GObject)
+G_DECLARE_INTERFACE(GArrowWritable, garrow_writable, GARROW, WRITABLE, GObject)
 
-gboolean garrow_writable_write(GArrowWritable *writable,
-                               const guint8 *data,
-                               gint64 n_bytes,
-                               GError **error);
-gboolean garrow_writable_flush(GArrowWritable *writable,
-                               GError **error);
+gboolean
+garrow_writable_write(GArrowWritable *writable,
+                      const guint8 *data,
+                      gint64 n_bytes,
+                      GError **error);
+gboolean
+garrow_writable_flush(GArrowWritable *writable, GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/writer.cpp b/c_glib/arrow-glib/writer.cpp
index 3808150179af2..b0321d51b3ba4 100644
--- a/c_glib/arrow-glib/writer.cpp
+++ b/c_glib/arrow-glib/writer.cpp
@@ -45,7 +45,8 @@ G_BEGIN_DECLS
  * batches in file format into output.
  */
 
-typedef struct GArrowRecordBatchWriterPrivate_ {
+typedef struct GArrowRecordBatchWriterPrivate_
+{
   std::shared_ptr<arrow::ipc::RecordBatchWriter> record_batch_writer;
 } GArrowRecordBatchWriterPrivate;
 
@@ -58,10 +59,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchWriter,
                            garrow_record_batch_writer,
                            G_TYPE_OBJECT);
 
-#define GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(obj)         \
-  static_cast<GArrowRecordBatchWriterPrivate *>(            \
-     garrow_record_batch_writer_get_instance_private(       \
-       GARROW_RECORD_BATCH_WRITER(obj)))
+#define GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(obj)                                      \
+  static_cast<GArrowRecordBatchWriterPrivate *>(                                         \
+    garrow_record_batch_writer_get_instance_private(GARROW_RECORD_BATCH_WRITER(obj)))
 
 static void
 garrow_record_batch_writer_finalize(GObject *object)
@@ -84,7 +84,8 @@ garrow_record_batch_writer_set_property(GObject *object,
   switch (prop_id) {
   case PROP_RECORD_BATCH_WRITER:
     priv->record_batch_writer =
-      *static_cast<std::shared_ptr<arrow::ipc::RecordBatchWriter> *>(g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<arrow::ipc::RecordBatchWriter> *>(
+        g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -109,7 +110,7 @@ static void
 garrow_record_batch_writer_init(GArrowRecordBatchWriter *object)
 {
   auto priv = GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(object);
-  new(&priv->record_batch_writer) std::shared_ptr<arrow::ipc::RecordBatchWriter>;
+  new (&priv->record_batch_writer) std::shared_ptr<arrow::ipc::RecordBatchWriter>;
 }
 
 static void
@@ -120,15 +121,15 @@ garrow_record_batch_writer_class_init(GArrowRecordBatchWriterClass *klass)
 
   gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = garrow_record_batch_writer_finalize;
+  gobject_class->finalize = garrow_record_batch_writer_finalize;
   gobject_class->set_property = garrow_record_batch_writer_set_property;
   gobject_class->get_property = garrow_record_batch_writer_get_property;
 
-  spec = g_param_spec_pointer("record-batch-writer",
-                              "arrow::ipc::RecordBatchWriter",
-                              "The raw std::shared<arrow::ipc::RecordBatchWriter> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "record-batch-writer",
+    "arrow::ipc::RecordBatchWriter",
+    "The raw std::shared<arrow::ipc::RecordBatchWriter> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_WRITER, spec);
 }
 
@@ -152,9 +153,7 @@ garrow_record_batch_writer_write_record_batch(GArrowRecordBatchWriter *writer,
   auto arrow_record_batch_raw = arrow_record_batch.get();
 
   auto status = arrow_writer->WriteRecordBatch(*arrow_record_batch_raw);
-  return garrow_error_check(error,
-                            status,
-                            "[record-batch-writer][write-record-batch]");
+  return garrow_error_check(error, status, "[record-batch-writer][write-record-batch]");
 }
 
 /**
@@ -176,9 +175,7 @@ garrow_record_batch_writer_write_table(GArrowRecordBatchWriter *writer,
   auto arrow_table = garrow_table_get_raw(table);
 
   auto status = arrow_writer->WriteTable(*arrow_table);
-  return garrow_error_check(error,
-                            status,
-                            "[record-batch-writer][write-table]");
+  return garrow_error_check(error, status, "[record-batch-writer][write-table]");
 }
 
 /**
@@ -191,8 +188,7 @@ garrow_record_batch_writer_write_table(GArrowRecordBatchWriter *writer,
  * Since: 0.4.0
  */
 gboolean
-garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer,
-                                 GError **error)
+garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer, GError **error)
 {
   auto arrow_writer = garrow_record_batch_writer_get_raw(writer);
 
@@ -200,7 +196,6 @@ garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer,
   return garrow_error_check(error, status, "[record-batch-writer][close]");
 }
 
-
 G_DEFINE_TYPE(GArrowRecordBatchStreamWriter,
               garrow_record_batch_stream_writer,
               GARROW_TYPE_RECORD_BATCH_WRITER);
@@ -233,11 +228,8 @@ garrow_record_batch_stream_writer_new(GArrowOutputStream *sink,
 {
   auto arrow_sink = garrow_output_stream_get_raw(sink);
   auto arrow_schema = garrow_schema_get_raw(schema);
-  auto arrow_writer_result =
-    arrow::ipc::MakeStreamWriter(arrow_sink, arrow_schema);
-  if (garrow::check(error,
-                    arrow_writer_result,
-                    "[record-batch-stream-writer][open]")) {
+  auto arrow_writer_result = arrow::ipc::MakeStreamWriter(arrow_sink, arrow_schema);
+  if (garrow::check(error, arrow_writer_result, "[record-batch-stream-writer][open]")) {
     auto arrow_writer = *arrow_writer_result;
     return garrow_record_batch_stream_writer_new_raw(&arrow_writer);
   } else {
@@ -245,7 +237,6 @@ garrow_record_batch_stream_writer_new(GArrowOutputStream *sink,
   }
 }
 
-
 G_DEFINE_TYPE(GArrowRecordBatchFileWriter,
               garrow_record_batch_file_writer,
               GARROW_TYPE_RECORD_BATCH_STREAM_WRITER);
@@ -273,17 +264,14 @@ garrow_record_batch_file_writer_class_init(GArrowRecordBatchFileWriterClass *kla
  */
 GArrowRecordBatchFileWriter *
 garrow_record_batch_file_writer_new(GArrowOutputStream *sink,
-                       GArrowSchema *schema,
-                       GError **error)
+                                    GArrowSchema *schema,
+                                    GError **error)
 {
   auto arrow_sink = garrow_output_stream_get_raw(sink);
   auto arrow_schema = garrow_schema_get_raw(schema);
   std::shared_ptr<arrow::ipc::RecordBatchWriter> arrow_writer;
-  auto arrow_writer_result =
-    arrow::ipc::MakeFileWriter(arrow_sink, arrow_schema);
-  if (garrow::check(error,
-                    arrow_writer_result,
-                    "[record-batch-file-writer][open]")) {
+  auto arrow_writer_result = arrow::ipc::MakeFileWriter(arrow_sink, arrow_schema);
+  if (garrow::check(error, arrow_writer_result, "[record-batch-file-writer][open]")) {
     auto arrow_writer = *arrow_writer_result;
     return garrow_record_batch_file_writer_new_raw(&arrow_writer);
   } else {
@@ -294,13 +282,13 @@ garrow_record_batch_file_writer_new(GArrowOutputStream *sink,
 G_END_DECLS
 
 GArrowRecordBatchWriter *
-garrow_record_batch_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer)
+garrow_record_batch_writer_new_raw(
+  std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer)
 {
-  auto writer =
-    GARROW_RECORD_BATCH_WRITER(
-      g_object_new(GARROW_TYPE_RECORD_BATCH_WRITER,
-                   "record-batch-writer", arrow_writer,
-                   NULL));
+  auto writer = GARROW_RECORD_BATCH_WRITER(g_object_new(GARROW_TYPE_RECORD_BATCH_WRITER,
+                                                        "record-batch-writer",
+                                                        arrow_writer,
+                                                        NULL));
   return writer;
 }
 
@@ -312,23 +300,25 @@ garrow_record_batch_writer_get_raw(GArrowRecordBatchWriter *writer)
 }
 
 GArrowRecordBatchStreamWriter *
-garrow_record_batch_stream_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer)
+garrow_record_batch_stream_writer_new_raw(
+  std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer)
 {
   auto writer =
-    GARROW_RECORD_BATCH_STREAM_WRITER(
-      g_object_new(GARROW_TYPE_RECORD_BATCH_STREAM_WRITER,
-                   "record-batch-writer", arrow_writer,
-                   NULL));
+    GARROW_RECORD_BATCH_STREAM_WRITER(g_object_new(GARROW_TYPE_RECORD_BATCH_STREAM_WRITER,
+                                                   "record-batch-writer",
+                                                   arrow_writer,
+                                                   NULL));
   return writer;
 }
 
 GArrowRecordBatchFileWriter *
-garrow_record_batch_file_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer)
+garrow_record_batch_file_writer_new_raw(
+  std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer)
 {
   auto writer =
-    GARROW_RECORD_BATCH_FILE_WRITER(
-      g_object_new(GARROW_TYPE_RECORD_BATCH_FILE_WRITER,
-                   "record-batch-writer", arrow_writer,
-                   NULL));
+    GARROW_RECORD_BATCH_FILE_WRITER(g_object_new(GARROW_TYPE_RECORD_BATCH_FILE_WRITER,
+                                                 "record-batch-writer",
+                                                 arrow_writer,
+                                                 NULL));
   return writer;
 }
diff --git a/c_glib/arrow-glib/writer.h b/c_glib/arrow-glib/writer.h
index a0d22fe626c0b..b547172a5f384 100644
--- a/c_glib/arrow-glib/writer.h
+++ b/c_glib/arrow-glib/writer.h
@@ -27,30 +27,27 @@
 
 G_BEGIN_DECLS
 
-#define GARROW_TYPE_RECORD_BATCH_WRITER         \
-  (garrow_record_batch_writer_get_type())
-#define GARROW_RECORD_BATCH_WRITER(obj)                         \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                            \
-                              GARROW_TYPE_RECORD_BATCH_WRITER,  \
+#define GARROW_TYPE_RECORD_BATCH_WRITER (garrow_record_batch_writer_get_type())
+#define GARROW_RECORD_BATCH_WRITER(obj)                                                  \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
+                              GARROW_TYPE_RECORD_BATCH_WRITER,                           \
                               GArrowRecordBatchWriter))
-#define GARROW_RECORD_BATCH_WRITER_CLASS(klass)                 \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                             \
-                           GARROW_TYPE_RECORD_BATCH_WRITER,     \
+#define GARROW_RECORD_BATCH_WRITER_CLASS(klass)                                          \
+  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
+                           GARROW_TYPE_RECORD_BATCH_WRITER,                              \
                            GArrowRecordBatchWriterClass))
-#define GARROW_IS_RECORD_BATCH_WRITER(obj)                      \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj),                            \
-                              GARROW_TYPE_RECORD_BATCH_WRITER))
-#define GARROW_IS_RECORD_BATCH_WRITER_CLASS(klass)              \
-  (G_TYPE_CHECK_CLASS_TYPE((klass),                             \
-                           GARROW_TYPE_RECORD_BATCH_WRITER))
-#define GARROW_RECORD_BATCH_WRITER_GET_CLASS(obj)               \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                             \
-                             GARROW_TYPE_RECORD_BATCH_WRITER,   \
+#define GARROW_IS_RECORD_BATCH_WRITER(obj)                                               \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_RECORD_BATCH_WRITER))
+#define GARROW_IS_RECORD_BATCH_WRITER_CLASS(klass)                                       \
+  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_RECORD_BATCH_WRITER))
+#define GARROW_RECORD_BATCH_WRITER_GET_CLASS(obj)                                        \
+  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
+                             GARROW_TYPE_RECORD_BATCH_WRITER,                            \
                              GArrowRecordBatchWriterClass))
 
-typedef struct _GArrowRecordBatchWriter         GArrowRecordBatchWriter;
+typedef struct _GArrowRecordBatchWriter GArrowRecordBatchWriter;
 #ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowRecordBatchWriterClass    GArrowRecordBatchWriterClass;
+typedef struct _GArrowRecordBatchWriterClass GArrowRecordBatchWriterClass;
 #endif
 
 /**
@@ -71,43 +68,40 @@ struct _GArrowRecordBatchWriterClass
 };
 #endif
 
-GType garrow_record_batch_writer_get_type(void) G_GNUC_CONST;
-
-gboolean garrow_record_batch_writer_write_record_batch(
-  GArrowRecordBatchWriter *writer,
-  GArrowRecordBatch *record_batch,
-  GError **error);
-gboolean garrow_record_batch_writer_write_table(
-  GArrowRecordBatchWriter *writer,
-  GArrowTable *table,
-  GError **error);
-gboolean garrow_record_batch_writer_close(
-  GArrowRecordBatchWriter *writer,
-  GError **error);
-
-
-#define GARROW_TYPE_RECORD_BATCH_STREAM_WRITER          \
+GType
+garrow_record_batch_writer_get_type(void) G_GNUC_CONST;
+
+gboolean
+garrow_record_batch_writer_write_record_batch(GArrowRecordBatchWriter *writer,
+                                              GArrowRecordBatch *record_batch,
+                                              GError **error);
+gboolean
+garrow_record_batch_writer_write_table(GArrowRecordBatchWriter *writer,
+                                       GArrowTable *table,
+                                       GError **error);
+gboolean
+garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer, GError **error);
+
+#define GARROW_TYPE_RECORD_BATCH_STREAM_WRITER                                           \
   (garrow_record_batch_stream_writer_get_type())
-#define GARROW_RECORD_BATCH_STREAM_WRITER(obj)                          \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                    \
-                              GARROW_TYPE_RECORD_BATCH_STREAM_WRITER,   \
+#define GARROW_RECORD_BATCH_STREAM_WRITER(obj)                                           \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
+                              GARROW_TYPE_RECORD_BATCH_STREAM_WRITER,                    \
                               GArrowRecordBatchStreamWriter))
-#define GARROW_RECORD_BATCH_STREAM_WRITER_CLASS(klass)                  \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                     \
-                           GARROW_TYPE_RECORD_BATCH_STREAM_WRITER,      \
+#define GARROW_RECORD_BATCH_STREAM_WRITER_CLASS(klass)                                   \
+  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
+                           GARROW_TYPE_RECORD_BATCH_STREAM_WRITER,                       \
                            GArrowRecordBatchStreamWriterClass))
-#define GARROW_IS_RECORD_BATCH_STREAM_WRITER(obj)                       \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj),                                    \
-                              GARROW_TYPE_RECORD_BATCH_STREAM_WRITER))
-#define GARROW_IS_RECORD_BATCH_STREAM_WRITER_CLASS(klass)            \
-  (G_TYPE_CHECK_CLASS_TYPE((klass),                     \
-                           GARROW_TYPE_RECORD_BATCH_STREAM_WRITER))
-#define GARROW_RECORD_BATCH_STREAM_WRITER_GET_CLASS(obj)             \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                     \
-                             GARROW_TYPE_RECORD_BATCH_STREAM_WRITER, \
+#define GARROW_IS_RECORD_BATCH_STREAM_WRITER(obj)                                        \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_RECORD_BATCH_STREAM_WRITER))
+#define GARROW_IS_RECORD_BATCH_STREAM_WRITER_CLASS(klass)                                \
+  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_RECORD_BATCH_STREAM_WRITER))
+#define GARROW_RECORD_BATCH_STREAM_WRITER_GET_CLASS(obj)                                 \
+  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
+                             GARROW_TYPE_RECORD_BATCH_STREAM_WRITER,                     \
                              GArrowRecordBatchStreamWriterClass))
 
-typedef struct _GArrowRecordBatchStreamWriter      GArrowRecordBatchStreamWriter;
+typedef struct _GArrowRecordBatchStreamWriter GArrowRecordBatchStreamWriter;
 #ifndef __GTK_DOC_IGNORE__
 typedef struct _GArrowRecordBatchStreamWriterClass GArrowRecordBatchStreamWriterClass;
 #endif
@@ -130,36 +124,33 @@ struct _GArrowRecordBatchStreamWriterClass
 };
 #endif
 
-GType garrow_record_batch_stream_writer_get_type(void) G_GNUC_CONST;
-
-GArrowRecordBatchStreamWriter *garrow_record_batch_stream_writer_new(
-  GArrowOutputStream *sink,
-  GArrowSchema *schema,
-  GError **error);
+GType
+garrow_record_batch_stream_writer_get_type(void) G_GNUC_CONST;
 
+GArrowRecordBatchStreamWriter *
+garrow_record_batch_stream_writer_new(GArrowOutputStream *sink,
+                                      GArrowSchema *schema,
+                                      GError **error);
 
-#define GARROW_TYPE_RECORD_BATCH_FILE_WRITER    \
-  (garrow_record_batch_file_writer_get_type())
-#define GARROW_RECORD_BATCH_FILE_WRITER(obj)                            \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                    \
-                              GARROW_TYPE_RECORD_BATCH_FILE_WRITER,     \
+#define GARROW_TYPE_RECORD_BATCH_FILE_WRITER (garrow_record_batch_file_writer_get_type())
+#define GARROW_RECORD_BATCH_FILE_WRITER(obj)                                             \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
+                              GARROW_TYPE_RECORD_BATCH_FILE_WRITER,                      \
                               GArrowRecordBatchFileWriter))
-#define GARROW_RECORD_BATCH_FILE_WRITER_CLASS(klass)                    \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                     \
-                           GARROW_TYPE_RECORD_BATCH_FILE_WRITER,        \
+#define GARROW_RECORD_BATCH_FILE_WRITER_CLASS(klass)                                     \
+  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
+                           GARROW_TYPE_RECORD_BATCH_FILE_WRITER,                         \
                            GArrowRecordBatchFileWriterClass))
-#define GARROW_IS_RECORD_BATCH_FILE_WRITER(obj)                         \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj),                                    \
-                              GARROW_TYPE_RECORD_BATCH_FILE_WRITER))
-#define GARROW_IS_RECORD_BATCH_FILE_WRITER_CLASS(klass)                 \
-  (G_TYPE_CHECK_CLASS_TYPE((klass),                                     \
-                           GARROW_TYPE_RECORD_BATCH_FILE_WRITER))
-#define GARROW_RECORD_BATCH_FILE_WRITER_GET_CLASS(obj)                  \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                                     \
-                             GARROW_TYPE_RECORD_BATCH_FILE_WRITER,      \
+#define GARROW_IS_RECORD_BATCH_FILE_WRITER(obj)                                          \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_RECORD_BATCH_FILE_WRITER))
+#define GARROW_IS_RECORD_BATCH_FILE_WRITER_CLASS(klass)                                  \
+  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_RECORD_BATCH_FILE_WRITER))
+#define GARROW_RECORD_BATCH_FILE_WRITER_GET_CLASS(obj)                                   \
+  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
+                             GARROW_TYPE_RECORD_BATCH_FILE_WRITER,                       \
                              GArrowRecordBatchFileWriterClass))
 
-typedef struct _GArrowRecordBatchFileWriter      GArrowRecordBatchFileWriter;
+typedef struct _GArrowRecordBatchFileWriter GArrowRecordBatchFileWriter;
 #ifndef __GTK_DOC_IGNORE__
 typedef struct _GArrowRecordBatchFileWriterClass GArrowRecordBatchFileWriterClass;
 #endif
@@ -182,11 +173,12 @@ struct _GArrowRecordBatchFileWriterClass
 };
 #endif
 
-GType garrow_record_batch_file_writer_get_type(void) G_GNUC_CONST;
+GType
+garrow_record_batch_file_writer_get_type(void) G_GNUC_CONST;
 
-GArrowRecordBatchFileWriter *garrow_record_batch_file_writer_new(
-  GArrowOutputStream *sink,
-  GArrowSchema *schema,
-  GError **error);
+GArrowRecordBatchFileWriter *
+garrow_record_batch_file_writer_new(GArrowOutputStream *sink,
+                                    GArrowSchema *schema,
+                                    GError **error);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/writer.hpp b/c_glib/arrow-glib/writer.hpp
index 1e188bd3c6886..aa87ffe77d79b 100644
--- a/c_glib/arrow-glib/writer.hpp
+++ b/c_glib/arrow-glib/writer.hpp
@@ -25,9 +25,16 @@
 
 #include <arrow-glib/writer.h>
 
-GArrowRecordBatchWriter *garrow_record_batch_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
-std::shared_ptr<arrow::ipc::RecordBatchWriter> garrow_record_batch_writer_get_raw(GArrowRecordBatchWriter *writer);
+GArrowRecordBatchWriter *
+garrow_record_batch_writer_new_raw(
+  std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
+std::shared_ptr<arrow::ipc::RecordBatchWriter>
+garrow_record_batch_writer_get_raw(GArrowRecordBatchWriter *writer);
 
-GArrowRecordBatchStreamWriter *garrow_record_batch_stream_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
+GArrowRecordBatchStreamWriter *
+garrow_record_batch_stream_writer_new_raw(
+  std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
 
-GArrowRecordBatchFileWriter *garrow_record_batch_file_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
+GArrowRecordBatchFileWriter *
+garrow_record_batch_file_writer_new_raw(
+  std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
diff --git a/c_glib/example/build.c b/c_glib/example/build.c
index 9b2d58d2b2bba..2ccea8415450f 100644
--- a/c_glib/example/build.c
+++ b/c_glib/example/build.c
@@ -66,8 +66,7 @@ main(int argc, char **argv)
       gint32 value;
 
       value = garrow_int32_array_get_value(GARROW_INT32_ARRAY(array), i);
-      g_print("array[%" G_GINT64_FORMAT "] = %d\n",
-              i, value);
+      g_print("array[%" G_GINT64_FORMAT "] = %d\n", i, value);
     }
   }
 
diff --git a/c_glib/example/extension-type.c b/c_glib/example/extension-type.c
index a23fa427dc2a9..1861943d7e598 100644
--- a/c_glib/example/extension-type.c
+++ b/c_glib/example/extension-type.c
@@ -22,19 +22,14 @@
 #include <arrow-glib/arrow-glib.h>
 
 #define EXAMPLE_TYPE_UUID_ARRAY (example_uuid_array_get_type())
-G_DECLARE_DERIVABLE_TYPE(ExampleUUIDArray,
-                         example_uuid_array,
-                         EXAMPLE,
-                         UUID_ARRAY,
-                         GArrowExtensionArray)
+G_DECLARE_DERIVABLE_TYPE(
+  ExampleUUIDArray, example_uuid_array, EXAMPLE, UUID_ARRAY, GArrowExtensionArray)
 struct _ExampleUUIDArrayClass
 {
   GArrowExtensionArrayClass parent_class;
 };
 
-G_DEFINE_TYPE(ExampleUUIDArray,
-              example_uuid_array,
-              GARROW_TYPE_EXTENSION_ARRAY)
+G_DEFINE_TYPE(ExampleUUIDArray, example_uuid_array, GARROW_TYPE_EXTENSION_ARRAY)
 
 static void
 example_uuid_array_init(ExampleUUIDArray *object)
@@ -46,7 +41,6 @@ example_uuid_array_class_init(ExampleUUIDArrayClass *klass)
 {
 }
 
-
 #define EXAMPLE_TYPE_UUID_DATA_TYPE (example_uuid_data_type_get_type())
 G_DECLARE_DERIVABLE_TYPE(ExampleUUIDDataType,
                          example_uuid_data_type,
@@ -58,7 +52,6 @@ struct _ExampleUUIDDataTypeClass
   GArrowExtensionDataTypeClass parent_class;
 };
 
-
 G_DEFINE_TYPE(ExampleUUIDDataType,
               example_uuid_data_type,
               GARROW_TYPE_EXTENSION_DATA_TYPE)
@@ -78,7 +71,8 @@ example_uuid_data_type_equal(GArrowExtensionDataType *data_type,
 }
 
 static const gchar *example_uuid_data_type_serialize_id = "uuid-serialized";
-static ExampleUUIDDataType *example_uuid_data_type_new(void);
+static ExampleUUIDDataType *
+example_uuid_data_type_new(void);
 
 static GArrowDataType *
 example_uuid_data_type_deserialize(GArrowExtensionDataType *data_type,
@@ -89,9 +83,7 @@ example_uuid_data_type_deserialize(GArrowExtensionDataType *data_type,
   gsize raw_data_size;
   gconstpointer raw_data = g_bytes_get_data(serialized_data, &raw_data_size);
   if (!(raw_data_size == strlen(example_uuid_data_type_serialize_id) &&
-        strncmp(raw_data,
-                example_uuid_data_type_serialize_id,
-                raw_data_size) == 0)) {
+        strncmp(raw_data, example_uuid_data_type_serialize_id, raw_data_size) == 0)) {
     g_set_error(error,
                 GARROW_ERROR,
                 GARROW_ERROR_INVALID,
@@ -104,11 +96,8 @@ example_uuid_data_type_deserialize(GArrowExtensionDataType *data_type,
   }
 
   GArrowDataType *expected_storage_data_type;
-  g_object_get(data_type,
-               "storage-data-type", &expected_storage_data_type,
-               NULL);
-  if (!garrow_data_type_equal(storage_data_type,
-                              expected_storage_data_type)) {
+  g_object_get(data_type, "storage-data-type", &expected_storage_data_type, NULL);
+  if (!garrow_data_type_equal(storage_data_type, expected_storage_data_type)) {
     gchar *expected = garrow_data_type_to_string(expected_storage_data_type);
     gchar *actual = garrow_data_type_to_string(storage_data_type);
     g_set_error(error,
@@ -147,13 +136,11 @@ example_uuid_data_type_init(ExampleUUIDDataType *object)
 static void
 example_uuid_data_type_class_init(ExampleUUIDDataTypeClass *klass)
 {
-  GArrowExtensionDataTypeClass *extension_klass =
-    GARROW_EXTENSION_DATA_TYPE_CLASS(klass);
-  extension_klass->get_extension_name =
-    example_uuid_data_type_get_extension_name;
-  extension_klass->equal           = example_uuid_data_type_equal;
-  extension_klass->deserialize     = example_uuid_data_type_deserialize;
-  extension_klass->serialize       = example_uuid_data_type_serialize;
+  GArrowExtensionDataTypeClass *extension_klass = GARROW_EXTENSION_DATA_TYPE_CLASS(klass);
+  extension_klass->get_extension_name = example_uuid_data_type_get_extension_name;
+  extension_klass->equal = example_uuid_data_type_equal;
+  extension_klass->deserialize = example_uuid_data_type_deserialize;
+  extension_klass->serialize = example_uuid_data_type_serialize;
   extension_klass->get_array_gtype = example_uuid_data_type_get_array_gtype;
 }
 
@@ -163,11 +150,11 @@ example_uuid_data_type_new(void)
   GArrowFixedSizeBinaryDataType *storage_data_type =
     garrow_fixed_size_binary_data_type_new(16);
   return g_object_new(EXAMPLE_TYPE_UUID_DATA_TYPE,
-                      "storage-data-type", storage_data_type,
+                      "storage-data-type",
+                      storage_data_type,
                       NULL);
 }
 
-
 int
 main(int argc, char **argv)
 {
@@ -192,9 +179,7 @@ main(int argc, char **argv)
   {
     /* Build storage data for the created UUID extension data type. */
     GArrowFixedSizeBinaryDataType *storage_data_type;
-    g_object_get(extension_data_type,
-                 "storage-data-type", &storage_data_type,
-                 NULL);
+    g_object_get(extension_data_type, "storage-data-type", &storage_data_type, NULL);
     GArrowFixedSizeBinaryArrayBuilder *builder =
       garrow_fixed_size_binary_array_builder_new(storage_data_type);
     g_object_unref(storage_data_type);
@@ -230,16 +215,14 @@ main(int argc, char **argv)
 
     /* Wrap the created storage data as the created UUID extension array. */
     GArrowExtensionArray *extension_array =
-      garrow_extension_data_type_wrap_array(extension_data_type,
-                                            storage);
+      garrow_extension_data_type_wrap_array(extension_data_type, storage);
     g_object_unref(storage);
     gint64 n_rows = garrow_array_get_length(GARROW_ARRAY(extension_array));
 
     /* Create a record batch to serialize the created UUID extension array. */
     GList *fields = NULL;
-    fields = g_list_append(fields,
-                           garrow_field_new("uuid",
-                                            GARROW_DATA_TYPE(uuid_data_type)));
+    fields =
+      g_list_append(fields, garrow_field_new("uuid", GARROW_DATA_TYPE(uuid_data_type)));
     GArrowSchema *schema = garrow_schema_new(fields);
     g_list_free_full(fields, g_object_unref);
     GList *columns = NULL;
@@ -264,8 +247,7 @@ main(int argc, char **argv)
       goto exit;
     }
     {
-      GArrowBufferOutputStream *output =
-        garrow_buffer_output_stream_new(buffer);
+      GArrowBufferOutputStream *output = garrow_buffer_output_stream_new(buffer);
       GArrowRecordBatchStreamWriter *writer =
         garrow_record_batch_stream_writer_new(GARROW_OUTPUT_STREAM(output),
                                               schema,
@@ -279,10 +261,9 @@ main(int argc, char **argv)
         g_object_unref(record_batch);
         goto exit;
       }
-      garrow_record_batch_writer_write_record_batch(
-        GARROW_RECORD_BATCH_WRITER(writer),
-        record_batch,
-        &error);
+      garrow_record_batch_writer_write_record_batch(GARROW_RECORD_BATCH_WRITER(writer),
+                                                    record_batch,
+                                                    &error);
       if (error) {
         g_print("failed to write record batch: %s\n", error->message);
         g_error_free(error);
@@ -295,8 +276,7 @@ main(int argc, char **argv)
       }
       g_object_unref(schema);
       g_object_unref(record_batch);
-      garrow_record_batch_writer_close(GARROW_RECORD_BATCH_WRITER(writer),
-                                       &error);
+      garrow_record_batch_writer_close(GARROW_RECORD_BATCH_WRITER(writer), &error);
       g_object_unref(writer);
       g_object_unref(output);
       if (error) {
@@ -312,8 +292,7 @@ main(int argc, char **argv)
       GArrowBufferInputStream *input =
         garrow_buffer_input_stream_new(GARROW_BUFFER(buffer));
       GArrowRecordBatchStreamReader *reader =
-        garrow_record_batch_stream_reader_new(GARROW_INPUT_STREAM(input),
-                                              &error);
+        garrow_record_batch_stream_reader_new(GARROW_INPUT_STREAM(input), &error);
       if (error) {
         g_print("failed to create reader: %s\n", error->message);
         g_error_free(error);
@@ -322,8 +301,7 @@ main(int argc, char **argv)
         goto exit;
       }
       record_batch =
-        garrow_record_batch_reader_read_next(GARROW_RECORD_BATCH_READER(reader),
-                                             &error);
+        garrow_record_batch_reader_read_next(GARROW_RECORD_BATCH_READER(reader), &error);
       if (error) {
         g_print("failed to read record batch: %s\n", error->message);
         g_error_free(error);
@@ -333,9 +311,7 @@ main(int argc, char **argv)
         goto exit;
       }
       /* Show the deserialize record batch. */
-      gchar *record_batch_content =
-        garrow_record_batch_to_string(record_batch,
-                                      &error);
+      gchar *record_batch_content = garrow_record_batch_to_string(record_batch, &error);
       if (error) {
         g_print("failed to dump record batch content: %s\n", error->message);
         g_error_free(error);
@@ -363,9 +339,7 @@ main(int argc, char **argv)
     gchar *data_type_name =
       garrow_extension_data_type_get_extension_name(extension_data_type);
     gboolean success =
-      garrow_extension_data_type_registry_unregister(registry,
-                                                     data_type_name,
-                                                     &error);
+      garrow_extension_data_type_registry_unregister(registry, data_type_name, &error);
     g_free(data_type_name);
     if (!success) {
       g_print("failed to unregister: %s\n", error->message);
diff --git a/c_glib/example/read-file.c b/c_glib/example/read-file.c
index 8dae4290ecb28..f29def1abdd6e 100644
--- a/c_glib/example/read-file.c
+++ b/c_glib/example/read-file.c
@@ -32,31 +32,30 @@ print_array(GArrowArray *array)
   g_print("[");
   n = garrow_array_get_length(array);
 
-#define ARRAY_CASE(type, Type, TYPE, format)                            \
-  case GARROW_TYPE_ ## TYPE:                                            \
-    {                                                                   \
-      GArrow ## Type ## Array *real_array;                              \
-      real_array = GARROW_ ## TYPE ## _ARRAY(array);                    \
-      for (i = 0; i < n; i++) {                                         \
-        if (i > 0) {                                                    \
-          g_print(", ");                                                \
-        }                                                               \
-        g_print(format,                                                 \
-                garrow_ ## type ## _array_get_value(real_array, i));    \
-      }                                                                 \
-    }                                                                   \
+#define ARRAY_CASE(type, Type, TYPE, format)                                             \
+  case GARROW_TYPE_##TYPE:                                                               \
+    {                                                                                    \
+      GArrow##Type##Array *real_array;                                                   \
+      real_array = GARROW_##TYPE##_ARRAY(array);                                         \
+      for (i = 0; i < n; i++) {                                                          \
+        if (i > 0) {                                                                     \
+          g_print(", ");                                                                 \
+        }                                                                                \
+        g_print(format, garrow_##type##_array_get_value(real_array, i));                 \
+      }                                                                                  \
+    }                                                                                    \
     break
 
   switch (value_type) {
-    ARRAY_CASE(uint8,  UInt8,  UINT8,  "%hhu");
+    ARRAY_CASE(uint8, UInt8, UINT8, "%hhu");
     ARRAY_CASE(uint16, UInt16, UINT16, "%" G_GUINT16_FORMAT);
     ARRAY_CASE(uint32, UInt32, UINT32, "%" G_GUINT32_FORMAT);
     ARRAY_CASE(uint64, UInt64, UINT64, "%" G_GUINT64_FORMAT);
-    ARRAY_CASE( int8,   Int8,   INT8,  "%hhd");
-    ARRAY_CASE( int16,  Int16,  INT16, "%" G_GINT16_FORMAT);
-    ARRAY_CASE( int32,  Int32,  INT32, "%" G_GINT32_FORMAT);
-    ARRAY_CASE( int64,  Int64,  INT64, "%" G_GINT64_FORMAT);
-    ARRAY_CASE( float,  Float,  FLOAT, "%g");
+    ARRAY_CASE(int8, Int8, INT8, "%hhd");
+    ARRAY_CASE(int16, Int16, INT16, "%" G_GINT16_FORMAT);
+    ARRAY_CASE(int32, Int32, INT32, "%" G_GINT32_FORMAT);
+    ARRAY_CASE(int64, Int64, INT64, "%" G_GINT64_FORMAT);
+    ARRAY_CASE(float, Float, FLOAT, "%g");
     ARRAY_CASE(double, Double, DOUBLE, "%g");
   default:
     break;
@@ -93,8 +92,7 @@ main(int argc, char **argv)
 
   if (argc > 1)
     input_path = argv[1];
-  input = garrow_memory_mapped_input_stream_new(input_path,
-                                                &error);
+  input = garrow_memory_mapped_input_stream_new(input_path, &error);
   if (!input) {
     g_print("failed to open file: %s\n", error->message);
     g_error_free(error);
@@ -105,8 +103,7 @@ main(int argc, char **argv)
     GArrowRecordBatchFileReader *reader;
 
     reader =
-      garrow_record_batch_file_reader_new(GARROW_SEEKABLE_INPUT_STREAM(input),
-                                          &error);
+      garrow_record_batch_file_reader_new(GARROW_SEEKABLE_INPUT_STREAM(input), &error);
     if (!reader) {
       g_print("failed to open file reader: %s\n", error->message);
       g_error_free(error);
diff --git a/c_glib/example/read-stream.c b/c_glib/example/read-stream.c
index 133418faa900f..183994c832ac7 100644
--- a/c_glib/example/read-stream.c
+++ b/c_glib/example/read-stream.c
@@ -32,31 +32,30 @@ print_array(GArrowArray *array)
   g_print("[");
   n = garrow_array_get_length(array);
 
-#define ARRAY_CASE(type, Type, TYPE, format)                            \
-  case GARROW_TYPE_ ## TYPE:                                            \
-    {                                                                   \
-      GArrow ## Type ## Array *real_array;                              \
-      real_array = GARROW_ ## TYPE ## _ARRAY(array);                    \
-      for (i = 0; i < n; i++) {                                         \
-        if (i > 0) {                                                    \
-          g_print(", ");                                                \
-        }                                                               \
-        g_print(format,                                                 \
-                garrow_ ## type ## _array_get_value(real_array, i));    \
-      }                                                                 \
-    }                                                                   \
+#define ARRAY_CASE(type, Type, TYPE, format)                                             \
+  case GARROW_TYPE_##TYPE:                                                               \
+    {                                                                                    \
+      GArrow##Type##Array *real_array;                                                   \
+      real_array = GARROW_##TYPE##_ARRAY(array);                                         \
+      for (i = 0; i < n; i++) {                                                          \
+        if (i > 0) {                                                                     \
+          g_print(", ");                                                                 \
+        }                                                                                \
+        g_print(format, garrow_##type##_array_get_value(real_array, i));                 \
+      }                                                                                  \
+    }                                                                                    \
     break
 
   switch (value_type) {
-    ARRAY_CASE(uint8,  UInt8,  UINT8,  "%hhu");
+    ARRAY_CASE(uint8, UInt8, UINT8, "%hhu");
     ARRAY_CASE(uint16, UInt16, UINT16, "%" G_GUINT16_FORMAT);
     ARRAY_CASE(uint32, UInt32, UINT32, "%" G_GUINT32_FORMAT);
     ARRAY_CASE(uint64, UInt64, UINT64, "%" G_GUINT64_FORMAT);
-    ARRAY_CASE( int8,   Int8,   INT8,  "%hhd");
-    ARRAY_CASE( int16,  Int16,  INT16, "%" G_GINT16_FORMAT);
-    ARRAY_CASE( int32,  Int32,  INT32, "%" G_GINT32_FORMAT);
-    ARRAY_CASE( int64,  Int64,  INT64, "%" G_GINT64_FORMAT);
-    ARRAY_CASE( float,  Float,  FLOAT, "%g");
+    ARRAY_CASE(int8, Int8, INT8, "%hhd");
+    ARRAY_CASE(int16, Int16, INT16, "%" G_GINT16_FORMAT);
+    ARRAY_CASE(int32, Int32, INT32, "%" G_GINT32_FORMAT);
+    ARRAY_CASE(int64, Int64, INT64, "%" G_GINT64_FORMAT);
+    ARRAY_CASE(float, Float, FLOAT, "%g");
     ARRAY_CASE(double, Double, DOUBLE, "%g");
   default:
     break;
@@ -105,8 +104,7 @@ main(int argc, char **argv)
     GArrowRecordBatchStreamReader *stream_reader;
 
     stream_reader =
-      garrow_record_batch_stream_reader_new(GARROW_INPUT_STREAM(input),
-                                            &error);
+      garrow_record_batch_stream_reader_new(GARROW_INPUT_STREAM(input), &error);
     if (!stream_reader) {
       g_print("failed to open stream reader: %s\n", error->message);
       g_error_free(error);
diff --git a/c_glib/example/receive-network.c b/c_glib/example/receive-network.c
index aa7aaa0140375..a2172455eabcf 100644
--- a/c_glib/example/receive-network.c
+++ b/c_glib/example/receive-network.c
@@ -37,7 +37,7 @@ service_event(GSocketListener *listener,
   }
 
   GError *error = NULL;
-  GSocketAddress* local_address = g_socket_get_local_address(socket, &error);
+  GSocketAddress *local_address = g_socket_get_local_address(socket, &error);
   if (!local_address) {
     g_print("failed to get local address: %s\n", error->message);
     g_error_free(error);
@@ -62,31 +62,30 @@ print_array(GArrowArray *array)
   g_print("[");
   n = garrow_array_get_length(array);
 
-#define ARRAY_CASE(type, Type, TYPE, format)                            \
-  case GARROW_TYPE_ ## TYPE:                                            \
-    {                                                                   \
-      GArrow ## Type ## Array *real_array;                              \
-      real_array = GARROW_ ## TYPE ## _ARRAY(array);                    \
-      for (i = 0; i < n; i++) {                                         \
-        if (i > 0) {                                                    \
-          g_print(", ");                                                \
-        }                                                               \
-        g_print(format,                                                 \
-                garrow_ ## type ## _array_get_value(real_array, i));    \
-      }                                                                 \
-    }                                                                   \
+#define ARRAY_CASE(type, Type, TYPE, format)                                             \
+  case GARROW_TYPE_##TYPE:                                                               \
+    {                                                                                    \
+      GArrow##Type##Array *real_array;                                                   \
+      real_array = GARROW_##TYPE##_ARRAY(array);                                         \
+      for (i = 0; i < n; i++) {                                                          \
+        if (i > 0) {                                                                     \
+          g_print(", ");                                                                 \
+        }                                                                                \
+        g_print(format, garrow_##type##_array_get_value(real_array, i));                 \
+      }                                                                                  \
+    }                                                                                    \
     break
 
   switch (value_type) {
-    ARRAY_CASE(uint8,  UInt8,  UINT8,  "%hhu");
+    ARRAY_CASE(uint8, UInt8, UINT8, "%hhu");
     ARRAY_CASE(uint16, UInt16, UINT16, "%" G_GUINT16_FORMAT);
     ARRAY_CASE(uint32, UInt32, UINT32, "%" G_GUINT32_FORMAT);
     ARRAY_CASE(uint64, UInt64, UINT64, "%" G_GUINT64_FORMAT);
-    ARRAY_CASE( int8,   Int8,   INT8,  "%hhd");
-    ARRAY_CASE( int16,  Int16,  INT16, "%" G_GINT16_FORMAT);
-    ARRAY_CASE( int32,  Int32,  INT32, "%" G_GINT32_FORMAT);
-    ARRAY_CASE( int64,  Int64,  INT64, "%" G_GINT64_FORMAT);
-    ARRAY_CASE( float,  Float,  FLOAT, "%g");
+    ARRAY_CASE(int8, Int8, INT8, "%hhd");
+    ARRAY_CASE(int16, Int16, INT16, "%" G_GINT16_FORMAT);
+    ARRAY_CASE(int32, Int32, INT32, "%" G_GINT32_FORMAT);
+    ARRAY_CASE(int64, Int64, INT64, "%" G_GINT64_FORMAT);
+    ARRAY_CASE(float, Float, FLOAT, "%g");
     ARRAY_CASE(double, Double, DOUBLE, "%g");
   default:
     break;
@@ -121,8 +120,7 @@ service_incoming(GSocketService *service,
                  gpointer user_data)
 {
   GArrowGIOInputStream *input =
-    garrow_gio_input_stream_new(
-      g_io_stream_get_input_stream(G_IO_STREAM(connection)));
+    garrow_gio_input_stream_new(g_io_stream_get_input_stream(G_IO_STREAM(connection)));
   GError *error = NULL;
   GArrowRecordBatchStreamReader *reader =
     garrow_record_batch_stream_reader_new(GARROW_INPUT_STREAM(input), &error);
@@ -135,8 +133,7 @@ service_incoming(GSocketService *service,
 
   while (TRUE) {
     GArrowRecordBatch *record_batch =
-      garrow_record_batch_reader_read_next(GARROW_RECORD_BATCH_READER(reader),
-                                           &error);
+      garrow_record_batch_reader_read_next(GARROW_RECORD_BATCH_READER(reader), &error);
     if (error) {
       g_print("failed to read the next record batch: %s\n", error->message);
       g_error_free(error);
@@ -160,7 +157,8 @@ service_incoming(GSocketService *service,
 }
 
 #ifdef G_OS_UNIX
-typedef struct {
+typedef struct
+{
   GSocketService *service;
   GMainLoop *loop;
 } StopData;
@@ -168,7 +166,7 @@ typedef struct {
 static gboolean
 stop(gpointer user_data)
 {
-  StopData* data = user_data;
+  StopData *data = user_data;
   g_object_unref(data->service);
   g_main_loop_quit(data->loop);
   return G_SOURCE_REMOVE;
@@ -184,9 +182,7 @@ main(int argc, char **argv)
 
   GError *error = NULL;
   gboolean success =
-    g_socket_listener_add_any_inet_port(G_SOCKET_LISTENER(service),
-                                        NULL,
-                                        &error);
+    g_socket_listener_add_any_inet_port(G_SOCKET_LISTENER(service), NULL, &error);
   if (!success) {
     g_print("failed to add a listen IP address: %s\n", error->message);
     g_error_free(error);
diff --git a/c_glib/example/send-network.c b/c_glib/example/send-network.c
index d298c5a173d21..40d9772e51460 100644
--- a/c_glib/example/send-network.c
+++ b/c_glib/example/send-network.c
@@ -26,13 +26,12 @@ build_schema(void)
 {
   GList *fields = NULL;
   GArrowBooleanDataType *boolean_data_type = garrow_boolean_data_type_new();
-  fields = g_list_append(fields,
-                         garrow_field_new("boolean",
-                                          GARROW_DATA_TYPE(boolean_data_type)));
+  fields =
+    g_list_append(fields,
+                  garrow_field_new("boolean", GARROW_DATA_TYPE(boolean_data_type)));
   GArrowInt32DataType *int32_data_type = garrow_int32_data_type_new();
-  fields = g_list_append(fields,
-                         garrow_field_new("int32",
-                                          GARROW_DATA_TYPE(int32_data_type)));
+  fields =
+    g_list_append(fields, garrow_field_new("int32", GARROW_DATA_TYPE(int32_data_type)));
   GArrowSchema *schema = garrow_schema_new(fields);
   g_list_free_full(fields, g_object_unref);
 
@@ -47,8 +46,7 @@ build_record_batch(void)
     return NULL;
   }
   GError *error = NULL;
-  GArrowRecordBatchBuilder *builder =
-    garrow_record_batch_builder_new(schema, &error);
+  GArrowRecordBatchBuilder *builder = garrow_record_batch_builder_new(schema, &error);
   g_object_unref(schema);
   if (!builder) {
     g_print("failed to build record batch builder: %s\n", error->message);
@@ -57,9 +55,8 @@ build_record_batch(void)
   }
 
   const gint64 n_records = 3;
-  GArrowBooleanArrayBuilder *boolean_builder =
-    GARROW_BOOLEAN_ARRAY_BUILDER(
-      garrow_record_batch_builder_get_column_builder(builder, 0));
+  GArrowBooleanArrayBuilder *boolean_builder = GARROW_BOOLEAN_ARRAY_BUILDER(
+    garrow_record_batch_builder_get_column_builder(builder, 0));
   gboolean boolean_values[] = {TRUE, TRUE, FALSE};
   gboolean boolean_is_valids[] = {TRUE, FALSE, TRUE};
   if (!garrow_boolean_array_builder_append_values(boolean_builder,
@@ -75,9 +72,8 @@ build_record_batch(void)
     return NULL;
   }
 
-  GArrowInt32ArrayBuilder *int32_builder =
-    GARROW_INT32_ARRAY_BUILDER(
-      garrow_record_batch_builder_get_column_builder(builder, 1));
+  GArrowInt32ArrayBuilder *int32_builder = GARROW_INT32_ARRAY_BUILDER(
+    garrow_record_batch_builder_get_column_builder(builder, 1));
   gint32 int32_values[] = {1, 11, 111};
   gint32 int32_is_valids[] = {FALSE, TRUE, TRUE};
   if (!garrow_int32_array_builder_append_values(int32_builder,
@@ -93,8 +89,7 @@ build_record_batch(void)
     return NULL;
   }
 
-  GArrowRecordBatch *record_batch =
-    garrow_record_batch_builder_flush(builder, &error);
+  GArrowRecordBatch *record_batch = garrow_record_batch_builder_flush(builder, &error);
   if (!record_batch) {
     g_print("failed to build record batch: %s\n", error->message);
     g_error_free(error);
@@ -119,8 +114,7 @@ main(int argc, char **argv)
   guint port = atoi(argv[1]);
 
   GSocketClient *client = g_socket_client_new();
-  GSocketAddress *address = g_inet_socket_address_new_from_string("127.0.0.1",
-                                                                  port);
+  GSocketAddress *address = g_inet_socket_address_new_from_string("127.0.0.1", port);
   GError *error = NULL;
   GSocketConnection *connection =
     g_socket_client_connect(client, G_SOCKET_CONNECTABLE(address), NULL, &error);
@@ -135,12 +129,9 @@ main(int argc, char **argv)
     return EXIT_FAILURE;
   }
   GArrowGIOOutputStream *output =
-    garrow_gio_output_stream_new(
-      g_io_stream_get_output_stream(G_IO_STREAM(connection)));
+    garrow_gio_output_stream_new(g_io_stream_get_output_stream(G_IO_STREAM(connection)));
   GArrowRecordBatchStreamWriter *writer =
-    garrow_record_batch_stream_writer_new(GARROW_OUTPUT_STREAM(output),
-                                          schema,
-                                          &error);
+    garrow_record_batch_stream_writer_new(GARROW_OUTPUT_STREAM(output), schema, &error);
   g_object_unref(schema);
   if (!writer) {
     g_print("failed to create writer: %s\n", error->message);
@@ -163,10 +154,9 @@ main(int argc, char **argv)
       return EXIT_FAILURE;
     }
     gboolean success =
-      garrow_record_batch_writer_write_record_batch(
-        GARROW_RECORD_BATCH_WRITER(writer),
-        record_batch,
-        &error);
+      garrow_record_batch_writer_write_record_batch(GARROW_RECORD_BATCH_WRITER(writer),
+                                                    record_batch,
+                                                    &error);
     g_object_unref(record_batch);
     if (!success) {
       g_print("failed to write record batch: %s\n", error->message);
diff --git a/c_glib/gandiva-glib/expression.cpp b/c_glib/gandiva-glib/expression.cpp
index e4368f84da0cf..f45cf6c79d0f4 100644
--- a/c_glib/gandiva-glib/expression.cpp
+++ b/c_glib/gandiva-glib/expression.cpp
@@ -37,7 +37,8 @@ G_BEGIN_DECLS
  * Since: 0.12.0
  */
 
-typedef struct GGandivaExpressionPrivate_ {
+typedef struct GGandivaExpressionPrivate_
+{
   std::shared_ptr<gandiva::Expression> expression;
   GGandivaNode *root_node;
   GArrowField *result_field;
@@ -49,14 +50,11 @@ enum {
   PROP_RESULT_FIELD
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GGandivaExpression,
-                           ggandiva_expression,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GGandivaExpression, ggandiva_expression, G_TYPE_OBJECT)
 
-#define GGANDIVA_EXPRESSION_GET_PRIVATE(object)                 \
-  static_cast<GGandivaExpressionPrivate *>(                     \
-    ggandiva_expression_get_instance_private(                   \
-      GGANDIVA_EXPRESSION(object)))
+#define GGANDIVA_EXPRESSION_GET_PRIVATE(object)                                          \
+  static_cast<GGandivaExpressionPrivate *>(                                              \
+    ggandiva_expression_get_instance_private(GGANDIVA_EXPRESSION(object)))
 
 static void
 ggandiva_expression_dispose(GObject *object)
@@ -136,7 +134,7 @@ static void
 ggandiva_expression_init(GGandivaExpression *object)
 {
   auto priv = GGANDIVA_EXPRESSION_GET_PRIVATE(object);
-  new(&priv->expression) std::shared_ptr<gandiva::Expression>;
+  new (&priv->expression) std::shared_ptr<gandiva::Expression>;
 }
 
 static void
@@ -144,33 +142,33 @@ ggandiva_expression_class_init(GGandivaExpressionClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = ggandiva_expression_dispose;
-  gobject_class->finalize     = ggandiva_expression_finalize;
+  gobject_class->dispose = ggandiva_expression_dispose;
+  gobject_class->finalize = ggandiva_expression_finalize;
   gobject_class->set_property = ggandiva_expression_set_property;
   gobject_class->get_property = ggandiva_expression_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("expression",
-                              "Expression",
-                              "The raw std::shared<gandiva::Expression> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "expression",
+    "Expression",
+    "The raw std::shared<gandiva::Expression> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_EXPRESSION, spec);
 
-  spec = g_param_spec_object("root-node",
-                             "Root Node",
-                             "The root node for the expression",
-                             GGANDIVA_TYPE_NODE,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "root-node",
+    "Root Node",
+    "The root node for the expression",
+    GGANDIVA_TYPE_NODE,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_ROOT_NODE, spec);
 
-  spec = g_param_spec_object("result-field",
-                             "Result Field",
-                             "The name and type of returned value as #GArrowField",
-                             GARROW_TYPE_FIELD,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "result-field",
+    "Result Field",
+    "The name and type of returned value as #GArrowField",
+    GARROW_TYPE_FIELD,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_RESULT_FIELD, spec);
 }
 
@@ -184,17 +182,13 @@ ggandiva_expression_class_init(GGandivaExpressionClass *klass)
  * Since: 0.12.0
  */
 GGandivaExpression *
-ggandiva_expression_new(GGandivaNode *root_node,
-                        GArrowField *result_field)
+ggandiva_expression_new(GGandivaNode *root_node, GArrowField *result_field)
 {
   auto gandiva_root_node = ggandiva_node_get_raw(root_node);
   auto arrow_result_field = garrow_field_get_raw(result_field);
   auto gandiva_expression =
-    gandiva::TreeExprBuilder::MakeExpression(gandiva_root_node,
-                                             arrow_result_field);
-  return ggandiva_expression_new_raw(&gandiva_expression,
-                                     root_node,
-                                     result_field);
+    gandiva::TreeExprBuilder::MakeExpression(gandiva_root_node, arrow_result_field);
+  return ggandiva_expression_new_raw(&gandiva_expression, root_node, result_field);
 }
 
 /**
@@ -215,10 +209,7 @@ ggandiva_expression_to_string(GGandivaExpression *expression)
   return g_strndup(string.data(), string.size());
 }
 
-
-G_DEFINE_TYPE(GGandivaCondition,
-              ggandiva_condition,
-              GGANDIVA_TYPE_EXPRESSION)
+G_DEFINE_TYPE(GGandivaCondition, ggandiva_condition, GGANDIVA_TYPE_EXPRESSION)
 
 static void
 ggandiva_condition_init(GGandivaCondition *object)
@@ -242,13 +233,10 @@ GGandivaCondition *
 ggandiva_condition_new(GGandivaNode *root_node)
 {
   auto gandiva_root_node = ggandiva_node_get_raw(root_node);
-  auto gandiva_condition =
-    gandiva::TreeExprBuilder::MakeCondition(gandiva_root_node);
-  return ggandiva_condition_new_raw(&gandiva_condition,
-                                    root_node);
+  auto gandiva_condition = gandiva::TreeExprBuilder::MakeCondition(gandiva_root_node);
+  return ggandiva_condition_new_raw(&gandiva_condition, root_node);
 }
 
-
 G_END_DECLS
 
 GGandivaExpression *
@@ -257,9 +245,12 @@ ggandiva_expression_new_raw(std::shared_ptr<gandiva::Expression> *gandiva_expres
                             GArrowField *result_field)
 {
   auto expression = g_object_new(GGANDIVA_TYPE_EXPRESSION,
-                                 "expression", gandiva_expression,
-                                 "root-node", root_node,
-                                 "result-field", result_field,
+                                 "expression",
+                                 gandiva_expression,
+                                 "root-node",
+                                 root_node,
+                                 "result-field",
+                                 result_field,
                                  NULL);
   return GGANDIVA_EXPRESSION(expression);
 }
@@ -271,7 +262,6 @@ ggandiva_expression_get_raw(GGandivaExpression *expression)
   return priv->expression;
 }
 
-
 GGandivaCondition *
 ggandiva_condition_new_raw(std::shared_ptr<gandiva::Condition> *gandiva_condition,
                            GGandivaNode *root_node)
@@ -279,9 +269,12 @@ ggandiva_condition_new_raw(std::shared_ptr<gandiva::Condition> *gandiva_conditio
   auto arrow_result_field = (*gandiva_condition)->result();
   auto result_field = garrow_field_new_raw(&arrow_result_field, nullptr);
   auto condition = g_object_new(GGANDIVA_TYPE_CONDITION,
-                                "expression", gandiva_condition,
-                                "root-node", root_node,
-                                "result-field", result_field,
+                                "expression",
+                                gandiva_condition,
+                                "root-node",
+                                root_node,
+                                "result-field",
+                                result_field,
                                 NULL);
   return GGANDIVA_CONDITION(condition);
 }
diff --git a/c_glib/gandiva-glib/expression.h b/c_glib/gandiva-glib/expression.h
index 0a720d9afbdc8..f8f061ceb08fa 100644
--- a/c_glib/gandiva-glib/expression.h
+++ b/c_glib/gandiva-glib/expression.h
@@ -26,11 +26,8 @@
 G_BEGIN_DECLS
 
 #define GGANDIVA_TYPE_EXPRESSION (ggandiva_expression_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaExpression,
-                         ggandiva_expression,
-                         GGANDIVA,
-                         EXPRESSION,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GGandivaExpression, ggandiva_expression, GGANDIVA, EXPRESSION, GObject)
 
 struct _GGandivaExpressionClass
 {
@@ -38,17 +35,13 @@ struct _GGandivaExpressionClass
 };
 
 GGandivaExpression *
-ggandiva_expression_new(GGandivaNode *root_node,
-                        GArrowField *result_field);
-gchar *ggandiva_expression_to_string(GGandivaExpression *expression);
-
+ggandiva_expression_new(GGandivaNode *root_node, GArrowField *result_field);
+gchar *
+ggandiva_expression_to_string(GGandivaExpression *expression);
 
 #define GGANDIVA_TYPE_CONDITION (ggandiva_condition_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaCondition,
-                         ggandiva_condition,
-                         GGANDIVA,
-                         CONDITION,
-                         GGandivaExpression)
+G_DECLARE_DERIVABLE_TYPE(
+  GGandivaCondition, ggandiva_condition, GGANDIVA, CONDITION, GGandivaExpression)
 
 struct _GGandivaConditionClass
 {
@@ -59,5 +52,4 @@ GGANDIVA_AVAILABLE_IN_4_0
 GGandivaCondition *
 ggandiva_condition_new(GGandivaNode *root_node);
 
-
 G_END_DECLS
diff --git a/c_glib/gandiva-glib/expression.hpp b/c_glib/gandiva-glib/expression.hpp
index 45b6593937fc7..cba71c178b25b 100644
--- a/c_glib/gandiva-glib/expression.hpp
+++ b/c_glib/gandiva-glib/expression.hpp
@@ -30,10 +30,11 @@ GGandivaExpression *
 ggandiva_expression_new_raw(std::shared_ptr<gandiva::Expression> *gandiva_expression,
                             GGandivaNode *root_node,
                             GArrowField *result_field);
-std::shared_ptr<gandiva::Expression> ggandiva_expression_get_raw(GGandivaExpression *expression);
+std::shared_ptr<gandiva::Expression>
+ggandiva_expression_get_raw(GGandivaExpression *expression);
 
-GGandivaCondition
-*ggandiva_condition_new_raw(std::shared_ptr<gandiva::Condition> *gandiva_expression,
-                            GGandivaNode *root_node);
+GGandivaCondition *
+ggandiva_condition_new_raw(std::shared_ptr<gandiva::Condition> *gandiva_expression,
+                           GGandivaNode *root_node);
 std::shared_ptr<gandiva::Condition>
 ggandiva_condition_get_raw(GGandivaCondition *condition);
diff --git a/c_glib/gandiva-glib/filter.cpp b/c_glib/gandiva-glib/filter.cpp
index baed699469713..2b1109c108e08 100644
--- a/c_glib/gandiva-glib/filter.cpp
+++ b/c_glib/gandiva-glib/filter.cpp
@@ -41,7 +41,8 @@ G_BEGIN_DECLS
  * Since: 4.0.0
  */
 
-typedef struct GGandivaFilterPrivate_ {
+typedef struct GGandivaFilterPrivate_
+{
   std::shared_ptr<gandiva::Filter> filter;
   GArrowSchema *schema;
   GGandivaCondition *condition;
@@ -53,14 +54,11 @@ enum {
   PROP_CONDITION,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GGandivaFilter,
-                           ggandiva_filter,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GGandivaFilter, ggandiva_filter, G_TYPE_OBJECT)
 
-#define GGANDIVA_FILTER_GET_PRIVATE(obj)         \
-  static_cast<GGandivaFilterPrivate *>(          \
-     ggandiva_filter_get_instance_private(       \
-       GGANDIVA_FILTER(obj)))
+#define GGANDIVA_FILTER_GET_PRIVATE(obj)                                                 \
+  static_cast<GGandivaFilterPrivate *>(                                                  \
+    ggandiva_filter_get_instance_private(GGANDIVA_FILTER(obj)))
 
 static void
 ggandiva_filter_dispose(GObject *object)
@@ -140,7 +138,7 @@ static void
 ggandiva_filter_init(GGandivaFilter *object)
 {
   auto priv = GGANDIVA_FILTER_GET_PRIVATE(object);
-  new(&priv->filter) std::shared_ptr<gandiva::Filter>;
+  new (&priv->filter) std::shared_ptr<gandiva::Filter>;
 }
 
 static void
@@ -148,33 +146,33 @@ ggandiva_filter_class_init(GGandivaFilterClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = ggandiva_filter_dispose;
-  gobject_class->finalize     = ggandiva_filter_finalize;
+  gobject_class->dispose = ggandiva_filter_dispose;
+  gobject_class->finalize = ggandiva_filter_finalize;
   gobject_class->set_property = ggandiva_filter_set_property;
   gobject_class->get_property = ggandiva_filter_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("filter",
-                              "Filter",
-                              "The raw std::shared<gandiva::Filter> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "filter",
+    "Filter",
+    "The raw std::shared<gandiva::Filter> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FILTER, spec);
 
-  spec = g_param_spec_object("schema",
-                             "Schema",
-                             "The schema for input record batch",
-                             GARROW_TYPE_SCHEMA,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "schema",
+    "Schema",
+    "The schema for input record batch",
+    GARROW_TYPE_SCHEMA,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_SCHEMA, spec);
 
-  spec = g_param_spec_object("condition",
-                             "Condition",
-                             "The condition for the filter",
-                             GGANDIVA_TYPE_CONDITION,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "condition",
+    "Condition",
+    "The condition for the filter",
+    GGANDIVA_TYPE_CONDITION,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CONDITION, spec);
 }
 
@@ -190,16 +188,12 @@ ggandiva_filter_class_init(GGandivaFilterClass *klass)
  * Since: 4.0.0
  */
 GGandivaFilter *
-ggandiva_filter_new(GArrowSchema *schema,
-                    GGandivaCondition *condition,
-                    GError **error)
+ggandiva_filter_new(GArrowSchema *schema, GGandivaCondition *condition, GError **error)
 {
   auto arrow_schema = garrow_schema_get_raw(schema);
   auto gandiva_condition = ggandiva_condition_get_raw(condition);
   std::shared_ptr<gandiva::Filter> gandiva_filter;
-  auto status = gandiva::Filter::Make(arrow_schema,
-                                      gandiva_condition,
-                                      &gandiva_filter);
+  auto status = gandiva::Filter::Make(arrow_schema, gandiva_condition, &gandiva_filter);
   if (garrow_error_check(error, status, "[gandiva][filter][new]")) {
     return ggandiva_filter_new_raw(&gandiva_filter, schema, condition);
   } else {
@@ -227,10 +221,8 @@ ggandiva_filter_evaluate(GGandivaFilter *filter,
 {
   auto gandiva_filter = ggandiva_filter_get_raw(filter);
   auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
-  auto gandiva_selection_vector =
-    ggandiva_selection_vector_get_raw(selection_vector);
-  auto status = gandiva_filter->Evaluate(*arrow_record_batch,
-                                         gandiva_selection_vector);
+  auto gandiva_selection_vector = ggandiva_selection_vector_get_raw(selection_vector);
+  auto status = gandiva_filter->Evaluate(*arrow_record_batch, gandiva_selection_vector);
   return garrow_error_check(error, status, "[gandiva][filter][evaluate]");
 }
 
@@ -242,9 +234,12 @@ ggandiva_filter_new_raw(std::shared_ptr<gandiva::Filter> *gandiva_filter,
                         GGandivaCondition *condition)
 {
   auto filter = g_object_new(GGANDIVA_TYPE_FILTER,
-                             "filter", gandiva_filter,
-                             "schema", schema,
-                             "condition", condition,
+                             "filter",
+                             gandiva_filter,
+                             "schema",
+                             schema,
+                             "condition",
+                             condition,
                              NULL);
   return GGANDIVA_FILTER(filter);
 }
diff --git a/c_glib/gandiva-glib/filter.h b/c_glib/gandiva-glib/filter.h
index 9a0a5dc5d8598..b95981198e0c4 100644
--- a/c_glib/gandiva-glib/filter.h
+++ b/c_glib/gandiva-glib/filter.h
@@ -25,11 +25,7 @@
 G_BEGIN_DECLS
 
 #define GGANDIVA_TYPE_FILTER (ggandiva_filter_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaFilter,
-                         ggandiva_filter,
-                         GGANDIVA,
-                         FILTER,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GGandivaFilter, ggandiva_filter, GGANDIVA, FILTER, GObject)
 
 struct _GGandivaFilterClass
 {
@@ -37,9 +33,7 @@ struct _GGandivaFilterClass
 };
 
 GGandivaFilter *
-ggandiva_filter_new(GArrowSchema *schema,
-                    GGandivaCondition *condition,
-                    GError **error);
+ggandiva_filter_new(GArrowSchema *schema, GGandivaCondition *condition, GError **error);
 gboolean
 ggandiva_filter_evaluate(GGandivaFilter *filter,
                          GArrowRecordBatch *record_batch,
diff --git a/c_glib/gandiva-glib/function-registry.cpp b/c_glib/gandiva-glib/function-registry.cpp
index f47262986db82..b0f19906df8e8 100644
--- a/c_glib/gandiva-glib/function-registry.cpp
+++ b/c_glib/gandiva-glib/function-registry.cpp
@@ -34,7 +34,8 @@ G_BEGIN_DECLS
  * Since: 0.14.0
  */
 
-struct GGandivaFunctionRegistryPrivate {
+struct GGandivaFunctionRegistryPrivate
+{
   std::shared_ptr<gandiva::FunctionRegistry> function_registry;
 };
 
@@ -46,10 +47,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GGandivaFunctionRegistry,
                            ggandiva_function_registry,
                            G_TYPE_OBJECT)
 
-#define GGANDIVA_FUNCTION_REGISTRY_GET_PRIVATE(object)     \
-  static_cast<GGandivaFunctionRegistryPrivate *>(          \
-    ggandiva_function_registry_get_instance_private(       \
-      GGANDIVA_FUNCTION_REGISTRY(object)))
+#define GGANDIVA_FUNCTION_REGISTRY_GET_PRIVATE(object)                                   \
+  static_cast<GGandivaFunctionRegistryPrivate *>(                                        \
+    ggandiva_function_registry_get_instance_private(GGANDIVA_FUNCTION_REGISTRY(object)))
 
 static void
 ggandiva_function_registry_finalize(GObject *object)
@@ -69,9 +69,8 @@ ggandiva_function_registry_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_FUNCTION_REGISTRY:
-    priv->function_registry =
-      *static_cast<std::shared_ptr<gandiva::FunctionRegistry> *>(
-        g_value_get_pointer(value));
+    priv->function_registry = *static_cast<std::shared_ptr<gandiva::FunctionRegistry> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -83,7 +82,7 @@ static void
 ggandiva_function_registry_init(GGandivaFunctionRegistry *object)
 {
   auto priv = GGANDIVA_FUNCTION_REGISTRY_GET_PRIVATE(object);
-  new(&priv->function_registry) std::shared_ptr<gandiva::FunctionRegistry>;
+  new (&priv->function_registry) std::shared_ptr<gandiva::FunctionRegistry>;
 }
 
 static void
@@ -94,11 +93,11 @@ ggandiva_function_registry_class_init(GGandivaFunctionRegistryClass *klass)
   gobject_class->set_property = ggandiva_function_registry_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("function-registry",
-                              "Function registry",
-                              "The raw std::shared_ptr<gandiva::FunctionRegistry> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "function-registry",
+    "Function registry",
+    "The raw std::shared_ptr<gandiva::FunctionRegistry> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FUNCTION_REGISTRY, spec);
 }
 
@@ -144,8 +143,7 @@ GGandivaNativeFunction *
 ggandiva_function_registry_lookup(GGandivaFunctionRegistry *function_registry,
                                   GGandivaFunctionSignature *function_signature)
 {
-  auto gandiva_function_registry =
-    ggandiva_function_registry_get_raw(function_registry);
+  auto gandiva_function_registry = ggandiva_function_registry_get_raw(function_registry);
   auto gandiva_function_signature =
     ggandiva_function_signature_get_raw(function_signature);
   auto gandiva_native_function =
@@ -167,10 +165,10 @@ ggandiva_function_registry_lookup(GGandivaFunctionRegistry *function_registry,
  * Since: 0.14.0
  */
 GList *
-ggandiva_function_registry_get_native_functions(GGandivaFunctionRegistry *function_registry)
+ggandiva_function_registry_get_native_functions(
+  GGandivaFunctionRegistry *function_registry)
 {
-  auto gandiva_function_registry =
-    ggandiva_function_registry_get_raw(function_registry);
+  auto gandiva_function_registry = ggandiva_function_registry_get_raw(function_registry);
   GList *native_functions = nullptr;
   for (const auto &gandiva_native_function : *gandiva_function_registry) {
     auto native_function = ggandiva_native_function_new_raw(&gandiva_native_function);
@@ -183,12 +181,12 @@ G_END_DECLS
 
 GGandivaFunctionRegistry *
 ggandiva_function_registry_new_raw(
-    std::shared_ptr<gandiva::FunctionRegistry> *gandiva_function_registry)
+  std::shared_ptr<gandiva::FunctionRegistry> *gandiva_function_registry)
 {
-  return GGANDIVA_FUNCTION_REGISTRY(
-    g_object_new(GGANDIVA_TYPE_FUNCTION_REGISTRY,
-                 "function-registry", gandiva_function_registry,
-                 nullptr));
+  return GGANDIVA_FUNCTION_REGISTRY(g_object_new(GGANDIVA_TYPE_FUNCTION_REGISTRY,
+                                                 "function-registry",
+                                                 gandiva_function_registry,
+                                                 nullptr));
 }
 
 std::shared_ptr<gandiva::FunctionRegistry>
@@ -197,4 +195,3 @@ ggandiva_function_registry_get_raw(GGandivaFunctionRegistry *function_registry)
   auto priv = GGANDIVA_FUNCTION_REGISTRY_GET_PRIVATE(function_registry);
   return priv->function_registry;
 }
-
diff --git a/c_glib/gandiva-glib/function-registry.h b/c_glib/gandiva-glib/function-registry.h
index 8ff6027cf1734..ed21e120a2533 100644
--- a/c_glib/gandiva-glib/function-registry.h
+++ b/c_glib/gandiva-glib/function-registry.h
@@ -36,11 +36,15 @@ struct _GGandivaFunctionRegistryClass
 };
 
 GARROW_AVAILABLE_IN_15_0
-GGandivaFunctionRegistry *ggandiva_function_registry_default(void);
-GGandivaFunctionRegistry *ggandiva_function_registry_new(void);
+GGandivaFunctionRegistry *
+ggandiva_function_registry_default(void);
+GGandivaFunctionRegistry *
+ggandiva_function_registry_new(void);
 GGandivaNativeFunction *
 ggandiva_function_registry_lookup(GGandivaFunctionRegistry *function_registry,
                                   GGandivaFunctionSignature *function_signature);
-GList *ggandiva_function_registry_get_native_functions(GGandivaFunctionRegistry *function_registry);
+GList *
+ggandiva_function_registry_get_native_functions(
+  GGandivaFunctionRegistry *function_registry);
 
 G_END_DECLS
diff --git a/c_glib/gandiva-glib/function-signature.cpp b/c_glib/gandiva-glib/function-signature.cpp
index be37e8bfd76b0..5306ac6d2644a 100644
--- a/c_glib/gandiva-glib/function-signature.cpp
+++ b/c_glib/gandiva-glib/function-signature.cpp
@@ -32,7 +32,8 @@ G_BEGIN_DECLS
  * Since: 0.14.0
  */
 
-typedef struct GGandivaFunctionSignaturePrivate_ {
+typedef struct GGandivaFunctionSignaturePrivate_
+{
   gandiva::FunctionSignature function_signature;
 } GGandivaFunctionSignaturePrivate;
 
@@ -44,10 +45,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GGandivaFunctionSignature,
                            ggandiva_function_signature,
                            G_TYPE_OBJECT)
 
-#define GGANDIVA_FUNCTION_SIGNATURE_GET_PRIVATE(obj)      \
-    static_cast<GGandivaFunctionSignaturePrivate *>(      \
-        ggandiva_function_signature_get_instance_private( \
-          GGANDIVA_FUNCTION_SIGNATURE(obj)))
+#define GGANDIVA_FUNCTION_SIGNATURE_GET_PRIVATE(obj)                                     \
+  static_cast<GGandivaFunctionSignaturePrivate *>(                                       \
+    ggandiva_function_signature_get_instance_private(GGANDIVA_FUNCTION_SIGNATURE(obj)))
 
 static void
 ggandiva_function_signature_set_property(GObject *object,
@@ -80,11 +80,11 @@ ggandiva_function_signature_class_init(GGandivaFunctionSignatureClass *klass)
   gobject_class->set_property = ggandiva_function_signature_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("function-signature",
-                              "FunctionSignature",
-                              "The raw gandiva::FunctionSignature *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "function-signature",
+    "FunctionSignature",
+    "The raw gandiva::FunctionSignature *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FUNCTION_SIGNATURE, spec);
 }
 
@@ -227,7 +227,8 @@ ggandiva_function_signature_get_param_types(GGandivaFunctionSignature *function_
 G_END_DECLS
 
 GGandivaFunctionSignature *
-ggandiva_function_signature_new_raw(const gandiva::FunctionSignature *gandiva_function_signature)
+ggandiva_function_signature_new_raw(
+  const gandiva::FunctionSignature *gandiva_function_signature)
 {
   auto function_signature =
     GGANDIVA_FUNCTION_SIGNATURE(g_object_new(GGANDIVA_TYPE_FUNCTION_SIGNATURE,
diff --git a/c_glib/gandiva-glib/function-signature.h b/c_glib/gandiva-glib/function-signature.h
index b1099ff99da00..ef6834ea85723 100644
--- a/c_glib/gandiva-glib/function-signature.h
+++ b/c_glib/gandiva-glib/function-signature.h
@@ -35,14 +35,22 @@ struct _GGandivaFunctionSignatureClass
   GObjectClass parent_class;
 };
 
-GGandivaFunctionSignature *ggandiva_function_signature_new(const gchar *base_name,
-                                                           GList *parameter_types,
-                                                           GArrowDataType *return_type);
-gboolean ggandiva_function_signature_equal(GGandivaFunctionSignature *function_signature,
-                                           GGandivaFunctionSignature *other_function_signature);
-gchar *ggandiva_function_signature_to_string(GGandivaFunctionSignature *function_signature);
-GArrowDataType *ggandiva_function_signature_get_return_type(GGandivaFunctionSignature *function_signature);
-gchar *ggandiva_function_signature_get_base_name(GGandivaFunctionSignature *function_signature);
-GList *ggandiva_function_signature_get_param_types(GGandivaFunctionSignature *function_signature);
+GGandivaFunctionSignature *
+ggandiva_function_signature_new(const gchar *base_name,
+                                GList *parameter_types,
+                                GArrowDataType *return_type);
+gboolean
+ggandiva_function_signature_equal(GGandivaFunctionSignature *function_signature,
+                                  GGandivaFunctionSignature *other_function_signature);
+gchar *
+ggandiva_function_signature_to_string(GGandivaFunctionSignature *function_signature);
+GArrowDataType *
+ggandiva_function_signature_get_return_type(
+  GGandivaFunctionSignature *function_signature);
+gchar *
+ggandiva_function_signature_get_base_name(GGandivaFunctionSignature *function_signature);
+GList *
+ggandiva_function_signature_get_param_types(
+  GGandivaFunctionSignature *function_signature);
 
 G_END_DECLS
diff --git a/c_glib/gandiva-glib/function-signature.hpp b/c_glib/gandiva-glib/function-signature.hpp
index 24f71e6bc9d97..415302b70e541 100644
--- a/c_glib/gandiva-glib/function-signature.hpp
+++ b/c_glib/gandiva-glib/function-signature.hpp
@@ -23,5 +23,8 @@
 
 #include <gandiva-glib/function-signature.h>
 
-GGandivaFunctionSignature *ggandiva_function_signature_new_raw(const gandiva::FunctionSignature *gandiva_function_signature);
-const gandiva::FunctionSignature *ggandiva_function_signature_get_raw(GGandivaFunctionSignature *signature);
+GGandivaFunctionSignature *
+ggandiva_function_signature_new_raw(
+  const gandiva::FunctionSignature *gandiva_function_signature);
+const gandiva::FunctionSignature *
+ggandiva_function_signature_get_raw(GGandivaFunctionSignature *signature);
diff --git a/c_glib/gandiva-glib/native-function.cpp b/c_glib/gandiva-glib/native-function.cpp
index 0755ad1d62cab..e5961f77088a6 100644
--- a/c_glib/gandiva-glib/native-function.cpp
+++ b/c_glib/gandiva-glib/native-function.cpp
@@ -32,7 +32,8 @@ G_BEGIN_DECLS
  * Since: 0.14.0
  */
 
-typedef struct GGandivaNativeFunctionPrivate_ {
+typedef struct GGandivaNativeFunctionPrivate_
+{
   const gandiva::NativeFunction *native_function;
 } GGandivaNativeFunctionPrivate;
 
@@ -44,10 +45,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GGandivaNativeFunction,
                            ggandiva_native_function,
                            G_TYPE_OBJECT)
 
-#define GGANDIVA_NATIVE_FUNCTION_GET_PRIVATE(obj)      \
-    static_cast<GGandivaNativeFunctionPrivate *>(      \
-        ggandiva_native_function_get_instance_private( \
-          GGANDIVA_NATIVE_FUNCTION(obj)))
+#define GGANDIVA_NATIVE_FUNCTION_GET_PRIVATE(obj)                                        \
+  static_cast<GGandivaNativeFunctionPrivate *>(                                          \
+    ggandiva_native_function_get_instance_private(GGANDIVA_NATIVE_FUNCTION(obj)))
 
 static void
 ggandiva_native_function_set_property(GObject *object,
@@ -80,11 +80,11 @@ ggandiva_native_function_class_init(GGandivaNativeFunctionClass *klass)
   gobject_class->set_property = ggandiva_native_function_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("native-function",
-                              "NativeFunction",
-                              "The raw gandiva::NativeFunction *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "native-function",
+    "NativeFunction",
+    "The raw gandiva::NativeFunction *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_NATIVE_FUNCTION, spec);
 }
 
@@ -100,8 +100,7 @@ ggandiva_native_function_class_init(GGandivaNativeFunctionClass *klass)
 GList *
 ggandiva_native_function_get_signatures(GGandivaNativeFunction *native_function)
 {
-  auto gandiva_native_function =
-    ggandiva_native_function_get_raw(native_function);
+  auto gandiva_native_function = ggandiva_native_function_get_raw(native_function);
   GList *signatures = nullptr;
   for (auto &gandiva_signature : gandiva_native_function->signatures()) {
     auto signature = ggandiva_function_signature_new_raw(&gandiva_signature);
@@ -123,8 +122,7 @@ gboolean
 ggandiva_native_function_equal(GGandivaNativeFunction *native_function,
                                GGandivaNativeFunction *other_native_function)
 {
-  auto gandiva_native_function =
-    ggandiva_native_function_get_raw(native_function);
+  auto gandiva_native_function = ggandiva_native_function_get_raw(native_function);
   auto gandiva_other_native_function =
     ggandiva_native_function_get_raw(other_native_function);
   return gandiva_native_function == gandiva_other_native_function;
@@ -143,17 +141,14 @@ ggandiva_native_function_equal(GGandivaNativeFunction *native_function,
 gchar *
 ggandiva_native_function_to_string(GGandivaNativeFunction *native_function)
 {
-  auto gandiva_native_function =
-    ggandiva_native_function_get_raw(native_function);
+  auto gandiva_native_function = ggandiva_native_function_get_raw(native_function);
   auto string = g_string_new(NULL);
   for (auto &gandiva_signature : gandiva_native_function->signatures()) {
     if (string->len > 0) {
       g_string_append(string, ", ");
     }
     const auto &signature_string = gandiva_signature.ToString();
-    g_string_append_len(string,
-                        signature_string.data(),
-                        signature_string.length());
+    g_string_append_len(string, signature_string.data(), signature_string.length());
   }
   return g_string_free(string, FALSE);
 }
@@ -170,8 +165,7 @@ ggandiva_native_function_to_string(GGandivaNativeFunction *native_function)
 GGandivaResultNullableType
 ggandiva_native_function_get_result_nullable_type(GGandivaNativeFunction *native_function)
 {
-  auto gandiva_native_function =
-    ggandiva_native_function_get_raw(native_function);
+  auto gandiva_native_function = ggandiva_native_function_get_raw(native_function);
   const auto gandiva_result_nullable_type =
     gandiva_native_function->result_nullable_type();
   return ggandiva_result_nullable_type_from_raw(gandiva_result_nullable_type);
@@ -190,8 +184,7 @@ ggandiva_native_function_get_result_nullable_type(GGandivaNativeFunction *native
 gboolean
 ggandiva_native_function_need_context(GGandivaNativeFunction *native_function)
 {
-  auto gandiva_native_function =
-    ggandiva_native_function_get_raw(native_function);
+  auto gandiva_native_function = ggandiva_native_function_get_raw(native_function);
   return gandiva_native_function->NeedsContext();
 }
 
@@ -208,8 +201,7 @@ ggandiva_native_function_need_context(GGandivaNativeFunction *native_function)
 gboolean
 ggandiva_native_function_need_function_holder(GGandivaNativeFunction *native_function)
 {
-  auto gandiva_native_function =
-    ggandiva_native_function_get_raw(native_function);
+  auto gandiva_native_function = ggandiva_native_function_get_raw(native_function);
   return gandiva_native_function->NeedsFunctionHolder();
 }
 
@@ -226,8 +218,7 @@ ggandiva_native_function_need_function_holder(GGandivaNativeFunction *native_fun
 gboolean
 ggandiva_native_function_can_return_errors(GGandivaNativeFunction *native_function)
 {
-  auto gandiva_native_function =
-    ggandiva_native_function_get_raw(native_function);
+  auto gandiva_native_function = ggandiva_native_function_get_raw(native_function);
   return gandiva_native_function->CanReturnErrors();
 }
 
diff --git a/c_glib/gandiva-glib/native-function.h b/c_glib/gandiva-glib/native-function.h
index 8b4d6a44c80ce..5ceef396ef40c 100644
--- a/c_glib/gandiva-glib/native-function.h
+++ b/c_glib/gandiva-glib/native-function.h
@@ -40,11 +40,8 @@ typedef enum {
 } GGandivaResultNullableType;
 
 #define GGANDIVA_TYPE_NATIVE_FUNCTION (ggandiva_native_function_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaNativeFunction,
-                         ggandiva_native_function,
-                         GGANDIVA,
-                         NATIVE_FUNCTION,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GGandivaNativeFunction, ggandiva_native_function, GGANDIVA, NATIVE_FUNCTION, GObject)
 
 struct _GGandivaNativeFunctionClass
 {
@@ -56,10 +53,16 @@ ggandiva_native_function_get_signatures(GGandivaNativeFunction *native_function)
 gboolean
 ggandiva_native_function_equal(GGandivaNativeFunction *native_function,
                                GGandivaNativeFunction *other_native_function);
-gchar *ggandiva_native_function_to_string(GGandivaNativeFunction *native_function);
-GGandivaResultNullableType ggandiva_native_function_get_result_nullable_type(GGandivaNativeFunction *native_function);
-gboolean ggandiva_native_function_need_context(GGandivaNativeFunction *native_function);
-gboolean ggandiva_native_function_need_function_holder(GGandivaNativeFunction *native_function);
-gboolean ggandiva_native_function_can_return_errors(GGandivaNativeFunction *native_function);
+gchar *
+ggandiva_native_function_to_string(GGandivaNativeFunction *native_function);
+GGandivaResultNullableType
+ggandiva_native_function_get_result_nullable_type(
+  GGandivaNativeFunction *native_function);
+gboolean
+ggandiva_native_function_need_context(GGandivaNativeFunction *native_function);
+gboolean
+ggandiva_native_function_need_function_holder(GGandivaNativeFunction *native_function);
+gboolean
+ggandiva_native_function_can_return_errors(GGandivaNativeFunction *native_function);
 
 G_END_DECLS
diff --git a/c_glib/gandiva-glib/native-function.hpp b/c_glib/gandiva-glib/native-function.hpp
index 76119ca400f18..2a1ff7a1e3537 100644
--- a/c_glib/gandiva-glib/native-function.hpp
+++ b/c_glib/gandiva-glib/native-function.hpp
@@ -30,7 +30,9 @@ ggandiva_result_nullable_type_from_raw(gandiva::ResultNullableType gandiva_type)
 gandiva::ResultNullableType
 ggandiva_result_nullable_type_to_raw(GGandivaResultNullableType type);
 
-GGandivaNativeFunction *ggandiva_native_function_new_raw(const gandiva::NativeFunction *gandiva_native_function);
-const gandiva::NativeFunction *ggandiva_native_function_get_raw(GGandivaNativeFunction *native_function);
+GGandivaNativeFunction *
+ggandiva_native_function_new_raw(const gandiva::NativeFunction *gandiva_native_function);
+const gandiva::NativeFunction *
+ggandiva_native_function_get_raw(GGandivaNativeFunction *native_function);
 
 G_END_DECLS
diff --git a/c_glib/gandiva-glib/node.cpp b/c_glib/gandiva-glib/node.cpp
index 1ced7754a707a..e83dc41e9274b 100644
--- a/c_glib/gandiva-glib/node.cpp
+++ b/c_glib/gandiva-glib/node.cpp
@@ -27,8 +27,8 @@ template <typename Type>
 const Type &
 ggandiva_literal_node_get(GGandivaLiteralNode *node)
 {
-  auto gandiva_literal_node =
-    std::static_pointer_cast<gandiva::LiteralNode>(ggandiva_node_get_raw(GGANDIVA_NODE(node)));
+  auto gandiva_literal_node = std::static_pointer_cast<gandiva::LiteralNode>(
+    ggandiva_node_get_raw(GGANDIVA_NODE(node)));
   return std::get<Type>(gandiva_literal_node->holder());
 }
 
@@ -42,9 +42,11 @@ G_BEGIN_DECLS
  *
  * #GGandivaNode is a base class for a node in the expression tree.
  *
- * #GGandivaFieldNode is a class for a node in the expression tree, representing an Arrow field.
+ * #GGandivaFieldNode is a class for a node in the expression tree, representing an Arrow
+ * field.
  *
- * #GGandivaFunctionNode is a class for a node in the expression tree, representing a function.
+ * #GGandivaFunctionNode is a class for a node in the expression tree, representing a
+ * function.
  *
  * #GGandivaLiteralNode is a base class for a node in the expression tree,
  * representing a literal.
@@ -93,7 +95,8 @@ G_BEGIN_DECLS
  *
  * #GGandivaIfNode is a class for a node in the expression tree, representing an if-else.
  *
- * #GGandivaBooleanNode is a class for a node in the expression tree, representing a boolean.
+ * #GGandivaBooleanNode is a class for a node in the expression tree, representing a
+ * boolean.
  *
  * #GGandivaAndNode is a class for a node in the expression tree, representing an AND.
  *
@@ -102,7 +105,8 @@ G_BEGIN_DECLS
  * Since: 0.12.0
  */
 
-typedef struct GGandivaNodePrivate_ {
+typedef struct GGandivaNodePrivate_
+{
   std::shared_ptr<gandiva::Node> node;
   GArrowDataType *return_type;
 } GGandivaNodePrivate;
@@ -112,14 +116,11 @@ enum {
   PROP_RETURN_TYPE
 };
 
-G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GGandivaNode,
-                                    ggandiva_node,
-                                    G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GGandivaNode, ggandiva_node, G_TYPE_OBJECT)
 
-#define GGANDIVA_NODE_GET_PRIVATE(object)                       \
-  static_cast<GGandivaNodePrivate *>(                           \
-    ggandiva_node_get_instance_private(                         \
-      GGANDIVA_NODE(object)))
+#define GGANDIVA_NODE_GET_PRIVATE(object)                                                \
+  static_cast<GGandivaNodePrivate *>(                                                    \
+    ggandiva_node_get_instance_private(GGANDIVA_NODE(object)))
 
 static void
 ggandiva_node_dispose(GObject *object)
@@ -188,7 +189,7 @@ static void
 ggandiva_node_init(GGandivaNode *object)
 {
   auto priv = GGANDIVA_NODE_GET_PRIVATE(object);
-  new(&priv->node) std::shared_ptr<gandiva::Node>;
+  new (&priv->node) std::shared_ptr<gandiva::Node>;
 }
 
 static void
@@ -196,25 +197,25 @@ ggandiva_node_class_init(GGandivaNodeClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = ggandiva_node_dispose;
-  gobject_class->finalize     = ggandiva_node_finalize;
+  gobject_class->dispose = ggandiva_node_dispose;
+  gobject_class->finalize = ggandiva_node_finalize;
   gobject_class->set_property = ggandiva_node_set_property;
   gobject_class->get_property = ggandiva_node_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("node",
-                              "Node",
-                              "The raw std::shared<gandiva::Node> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "node",
+    "Node",
+    "The raw std::shared<gandiva::Node> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_NODE, spec);
 
-  spec = g_param_spec_object("return-type",
-                             "Return type",
-                             "The return type",
-                             GARROW_TYPE_DATA_TYPE,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "return-type",
+    "Return type",
+    "The return type",
+    GARROW_TYPE_DATA_TYPE,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_RETURN_TYPE, spec);
 }
 
@@ -236,7 +237,8 @@ ggandiva_node_to_string(GGandivaNode *node)
   return g_strndup(string.data(), string.size());
 }
 
-typedef struct GGandivaFieldNodePrivate_ {
+typedef struct GGandivaFieldNodePrivate_
+{
   GArrowField *field;
 } GGandivaFieldNodePrivate;
 
@@ -244,14 +246,11 @@ enum {
   PROP_FIELD = 1
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GGandivaFieldNode,
-                           ggandiva_field_node,
-                           GGANDIVA_TYPE_NODE)
+G_DEFINE_TYPE_WITH_PRIVATE(GGandivaFieldNode, ggandiva_field_node, GGANDIVA_TYPE_NODE)
 
-#define GGANDIVA_FIELD_NODE_GET_PRIVATE(object)                 \
-  static_cast<GGandivaFieldNodePrivate *>(                      \
-    ggandiva_field_node_get_instance_private(                   \
-      GGANDIVA_FIELD_NODE(object)))
+#define GGANDIVA_FIELD_NODE_GET_PRIVATE(object)                                          \
+  static_cast<GGandivaFieldNodePrivate *>(                                               \
+    ggandiva_field_node_get_instance_private(GGANDIVA_FIELD_NODE(object)))
 
 static void
 ggandiva_field_node_dispose(GObject *object)
@@ -312,17 +311,17 @@ ggandiva_field_node_class_init(GGandivaFieldNodeClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = ggandiva_field_node_dispose;
+  gobject_class->dispose = ggandiva_field_node_dispose;
   gobject_class->set_property = ggandiva_field_node_set_property;
   gobject_class->get_property = ggandiva_field_node_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("field",
-                             "Field",
-                             "The field",
-                             GARROW_TYPE_FIELD,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "field",
+    "Field",
+    "The field",
+    GARROW_TYPE_FIELD,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_FIELD, spec);
 }
 
@@ -342,8 +341,8 @@ ggandiva_field_node_new(GArrowField *field)
   return ggandiva_field_node_new_raw(&gandiva_node, field);
 }
 
-
-typedef struct GGandivaFunctionNodePrivate_ {
+typedef struct GGandivaFunctionNodePrivate_
+{
   gchar *name;
   GList *parameters;
 } GGandivaFunctionNodePrivate;
@@ -356,10 +355,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GGandivaFunctionNode,
                            ggandiva_function_node,
                            GGANDIVA_TYPE_NODE)
 
-#define GGANDIVA_FUNCTION_NODE_GET_PRIVATE(object)      \
-  static_cast<GGandivaFunctionNodePrivate *>(           \
-    ggandiva_function_node_get_instance_private(        \
-      GGANDIVA_FUNCTION_NODE(object)))                  \
+#define GGANDIVA_FUNCTION_NODE_GET_PRIVATE(object)                                       \
+  static_cast<GGandivaFunctionNodePrivate *>(                                            \
+    ggandiva_function_node_get_instance_private(GGANDIVA_FUNCTION_NODE(object)))
 
 static void
 ggandiva_function_node_dispose(GObject *object)
@@ -436,18 +434,18 @@ ggandiva_function_node_class_init(GGandivaFunctionNodeClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = ggandiva_function_node_dispose;
-  gobject_class->finalize     = ggandiva_function_node_finalize;
+  gobject_class->dispose = ggandiva_function_node_dispose;
+  gobject_class->finalize = ggandiva_function_node_finalize;
   gobject_class->set_property = ggandiva_function_node_set_property;
   gobject_class->get_property = ggandiva_function_node_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_string("name",
-                             "Name",
-                             "The name of the function",
-                             nullptr,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_string(
+    "name",
+    "Name",
+    "The name of the function",
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_NAME, spec);
 }
 
@@ -472,13 +470,9 @@ ggandiva_function_node_new(const gchar *name,
     gandiva_nodes.push_back(gandiva_node);
   }
   auto arrow_return_type = garrow_data_type_get_raw(return_type);
-  auto gandiva_node = gandiva::TreeExprBuilder::MakeFunction(name,
-                                                             gandiva_nodes,
-                                                             arrow_return_type);
-  return ggandiva_function_node_new_raw(&gandiva_node,
-                                        name,
-                                        parameters,
-                                        return_type);
+  auto gandiva_node =
+    gandiva::TreeExprBuilder::MakeFunction(name, gandiva_nodes, arrow_return_type);
+  return ggandiva_function_node_new_raw(&gandiva_node, name, parameters, return_type);
 }
 
 /**
@@ -497,10 +491,7 @@ ggandiva_function_node_get_parameters(GGandivaFunctionNode *node)
   return priv->parameters;
 }
 
-
-G_DEFINE_TYPE(GGandivaLiteralNode,
-              ggandiva_literal_node,
-              GGANDIVA_TYPE_NODE)
+G_DEFINE_TYPE(GGandivaLiteralNode, ggandiva_literal_node, GGANDIVA_TYPE_NODE)
 
 static void
 ggandiva_literal_node_init(GGandivaLiteralNode *literal_node)
@@ -512,7 +503,6 @@ ggandiva_literal_node_class_init(GGandivaLiteralNodeClass *klass)
 {
 }
 
-
 G_DEFINE_TYPE(GGandivaNullLiteralNode,
               ggandiva_null_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -538,8 +528,7 @@ ggandiva_null_literal_node_class_init(GGandivaNullLiteralNodeClass *klass)
  * Since: 0.12.0
  */
 GGandivaNullLiteralNode *
-ggandiva_null_literal_node_new(GArrowDataType *return_type,
-                               GError **error)
+ggandiva_null_literal_node_new(GArrowDataType *return_type, GError **error)
 {
   auto arrow_return_type = garrow_data_type_get_raw(return_type);
   auto gandiva_node = gandiva::TreeExprBuilder::MakeNull(arrow_return_type);
@@ -552,11 +541,10 @@ ggandiva_null_literal_node_new(GArrowDataType *return_type,
                 arrow_return_type->ToString().c_str());
     return NULL;
   }
-  return GGANDIVA_NULL_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                  return_type));
+  return GGANDIVA_NULL_LITERAL_NODE(
+    ggandiva_literal_node_new_raw(&gandiva_node, return_type));
 }
 
-
 G_DEFINE_TYPE(GGandivaBooleanLiteralNode,
               ggandiva_boolean_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -583,8 +571,8 @@ GGandivaBooleanLiteralNode *
 ggandiva_boolean_literal_node_new(gboolean value)
 {
   auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(static_cast<bool>(value));
-  return GGANDIVA_BOOLEAN_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                     NULL));
+  return GGANDIVA_BOOLEAN_LITERAL_NODE(
+    ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -602,7 +590,6 @@ ggandiva_boolean_literal_node_get_value(GGandivaBooleanLiteralNode *node)
   return static_cast<gboolean>(value);
 }
 
-
 G_DEFINE_TYPE(GGandivaInt8LiteralNode,
               ggandiva_int8_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -629,8 +616,7 @@ GGandivaInt8LiteralNode *
 ggandiva_int8_literal_node_new(gint8 value)
 {
   auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
-  return GGANDIVA_INT8_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                  NULL));
+  return GGANDIVA_INT8_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -647,7 +633,6 @@ ggandiva_int8_literal_node_get_value(GGandivaInt8LiteralNode *node)
   return ggandiva_literal_node_get<int8_t>(GGANDIVA_LITERAL_NODE(node));
 }
 
-
 G_DEFINE_TYPE(GGandivaUInt8LiteralNode,
               ggandiva_uint8_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -674,8 +659,7 @@ GGandivaUInt8LiteralNode *
 ggandiva_uint8_literal_node_new(guint8 value)
 {
   auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
-  return GGANDIVA_UINT8_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                   NULL));
+  return GGANDIVA_UINT8_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -692,7 +676,6 @@ ggandiva_uint8_literal_node_get_value(GGandivaUInt8LiteralNode *node)
   return ggandiva_literal_node_get<uint8_t>(GGANDIVA_LITERAL_NODE(node));
 }
 
-
 G_DEFINE_TYPE(GGandivaInt16LiteralNode,
               ggandiva_int16_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -719,8 +702,7 @@ GGandivaInt16LiteralNode *
 ggandiva_int16_literal_node_new(gint16 value)
 {
   auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
-  return GGANDIVA_INT16_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                   NULL));
+  return GGANDIVA_INT16_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -737,7 +719,6 @@ ggandiva_int16_literal_node_get_value(GGandivaInt16LiteralNode *node)
   return ggandiva_literal_node_get<int16_t>(GGANDIVA_LITERAL_NODE(node));
 }
 
-
 G_DEFINE_TYPE(GGandivaUInt16LiteralNode,
               ggandiva_uint16_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -764,8 +745,7 @@ GGandivaUInt16LiteralNode *
 ggandiva_uint16_literal_node_new(guint16 value)
 {
   auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
-  return GGANDIVA_UINT16_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                    NULL));
+  return GGANDIVA_UINT16_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -782,7 +762,6 @@ ggandiva_uint16_literal_node_get_value(GGandivaUInt16LiteralNode *node)
   return ggandiva_literal_node_get<uint16_t>(GGANDIVA_LITERAL_NODE(node));
 }
 
-
 G_DEFINE_TYPE(GGandivaInt32LiteralNode,
               ggandiva_int32_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -809,8 +788,7 @@ GGandivaInt32LiteralNode *
 ggandiva_int32_literal_node_new(gint32 value)
 {
   auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
-  return GGANDIVA_INT32_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                   NULL));
+  return GGANDIVA_INT32_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -827,7 +805,6 @@ ggandiva_int32_literal_node_get_value(GGandivaInt32LiteralNode *node)
   return ggandiva_literal_node_get<int32_t>(GGANDIVA_LITERAL_NODE(node));
 }
 
-
 G_DEFINE_TYPE(GGandivaUInt32LiteralNode,
               ggandiva_uint32_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -854,8 +831,7 @@ GGandivaUInt32LiteralNode *
 ggandiva_uint32_literal_node_new(guint32 value)
 {
   auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
-  return GGANDIVA_UINT32_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                    NULL));
+  return GGANDIVA_UINT32_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -872,7 +848,6 @@ ggandiva_uint32_literal_node_get_value(GGandivaUInt32LiteralNode *node)
   return ggandiva_literal_node_get<uint32_t>(GGANDIVA_LITERAL_NODE(node));
 }
 
-
 G_DEFINE_TYPE(GGandivaInt64LiteralNode,
               ggandiva_int64_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -899,8 +874,7 @@ GGandivaInt64LiteralNode *
 ggandiva_int64_literal_node_new(gint64 value)
 {
   auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
-  return GGANDIVA_INT64_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                   NULL));
+  return GGANDIVA_INT64_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -917,7 +891,6 @@ ggandiva_int64_literal_node_get_value(GGandivaInt64LiteralNode *node)
   return ggandiva_literal_node_get<int64_t>(GGANDIVA_LITERAL_NODE(node));
 }
 
-
 G_DEFINE_TYPE(GGandivaUInt64LiteralNode,
               ggandiva_uint64_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -944,8 +917,7 @@ GGandivaUInt64LiteralNode *
 ggandiva_uint64_literal_node_new(guint64 value)
 {
   auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
-  return GGANDIVA_UINT64_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                    NULL));
+  return GGANDIVA_UINT64_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -962,7 +934,6 @@ ggandiva_uint64_literal_node_get_value(GGandivaUInt64LiteralNode *node)
   return ggandiva_literal_node_get<uint64_t>(GGANDIVA_LITERAL_NODE(node));
 }
 
-
 G_DEFINE_TYPE(GGandivaFloatLiteralNode,
               ggandiva_float_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -989,8 +960,7 @@ GGandivaFloatLiteralNode *
 ggandiva_float_literal_node_new(gfloat value)
 {
   auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
-  return GGANDIVA_FLOAT_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                   NULL));
+  return GGANDIVA_FLOAT_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -1007,7 +977,6 @@ ggandiva_float_literal_node_get_value(GGandivaFloatLiteralNode *node)
   return ggandiva_literal_node_get<float>(GGANDIVA_LITERAL_NODE(node));
 }
 
-
 G_DEFINE_TYPE(GGandivaDoubleLiteralNode,
               ggandiva_double_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -1034,8 +1003,7 @@ GGandivaDoubleLiteralNode *
 ggandiva_double_literal_node_new(gdouble value)
 {
   auto gandiva_node = gandiva::TreeExprBuilder::MakeLiteral(value);
-  return GGANDIVA_DOUBLE_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                    NULL));
+  return GGANDIVA_DOUBLE_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -1052,8 +1020,8 @@ ggandiva_double_literal_node_get_value(GGandivaDoubleLiteralNode *node)
   return ggandiva_literal_node_get<double>(GGANDIVA_LITERAL_NODE(node));
 }
 
-
-typedef struct GGandivaBinaryLiteralNodePrivate_ {
+typedef struct GGandivaBinaryLiteralNodePrivate_
+{
   GBytes *value;
 } GGandivaBinaryLiteralNodePrivate;
 
@@ -1061,9 +1029,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GGandivaBinaryLiteralNode,
                            ggandiva_binary_literal_node,
                            GGANDIVA_TYPE_LITERAL_NODE)
 
-#define GGANDIVA_BINARY_LITERAL_NODE_GET_PRIVATE(object)                \
-  static_cast<GGandivaBinaryLiteralNodePrivate *>(                      \
-    ggandiva_binary_literal_node_get_instance_private(                  \
+#define GGANDIVA_BINARY_LITERAL_NODE_GET_PRIVATE(object)                                 \
+  static_cast<GGandivaBinaryLiteralNodePrivate *>(                                       \
+    ggandiva_binary_literal_node_get_instance_private(                                   \
       GGANDIVA_BINARY_LITERAL_NODE(object)))
 
 static void
@@ -1102,14 +1070,11 @@ ggandiva_binary_literal_node_class_init(GGandivaBinaryLiteralNodeClass *klass)
  * Since: 0.12.0
  */
 GGandivaBinaryLiteralNode *
-ggandiva_binary_literal_node_new(const guint8 *value,
-                                 gsize size)
+ggandiva_binary_literal_node_new(const guint8 *value, gsize size)
 {
-  auto gandiva_node =
-    gandiva::TreeExprBuilder::MakeBinaryLiteral(std::string(reinterpret_cast<const char *>(value),
-                                                            size));
-  return GGANDIVA_BINARY_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                    NULL));
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeBinaryLiteral(
+    std::string(reinterpret_cast<const char *>(value), size));
+  return GGANDIVA_BINARY_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -1125,12 +1090,9 @@ ggandiva_binary_literal_node_new_bytes(GBytes *value)
 {
   size_t value_size;
   auto raw_value = g_bytes_get_data(value, &value_size);
-  auto gandiva_node =
-    gandiva::TreeExprBuilder::MakeBinaryLiteral(
-      std::string(reinterpret_cast<const char *>(raw_value),
-                  value_size));
-  auto literal_node = ggandiva_literal_node_new_raw(&gandiva_node,
-                                                    NULL);
+  auto gandiva_node = gandiva::TreeExprBuilder::MakeBinaryLiteral(
+    std::string(reinterpret_cast<const char *>(raw_value), value_size));
+  auto literal_node = ggandiva_literal_node_new_raw(&gandiva_node, NULL);
   auto priv = GGANDIVA_BINARY_LITERAL_NODE_GET_PRIVATE(literal_node);
   priv->value = value;
   g_bytes_ref(priv->value);
@@ -1157,7 +1119,6 @@ ggandiva_binary_literal_node_get_value(GGandivaBinaryLiteralNode *node)
   return priv->value;
 }
 
-
 G_DEFINE_TYPE(GGandivaStringLiteralNode,
               ggandiva_string_literal_node,
               GGANDIVA_TYPE_LITERAL_NODE)
@@ -1184,8 +1145,7 @@ GGandivaStringLiteralNode *
 ggandiva_string_literal_node_new(const gchar *value)
 {
   auto gandiva_node = gandiva::TreeExprBuilder::MakeStringLiteral(value);
-  return GGANDIVA_STRING_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node,
-                                                                    NULL));
+  return GGANDIVA_STRING_LITERAL_NODE(ggandiva_literal_node_new_raw(&gandiva_node, NULL));
 }
 
 /**
@@ -1203,8 +1163,8 @@ ggandiva_string_literal_node_get_value(GGandivaStringLiteralNode *node)
   return value.c_str();
 }
 
-
-typedef struct GGandivaIfNodePrivate_ {
+typedef struct GGandivaIfNodePrivate_
+{
   GGandivaNode *condition_node;
   GGandivaNode *then_node;
   GGandivaNode *else_node;
@@ -1216,14 +1176,11 @@ enum {
   PROP_ELSE_NODE,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GGandivaIfNode,
-                           ggandiva_if_node,
-                           GGANDIVA_TYPE_NODE)
+G_DEFINE_TYPE_WITH_PRIVATE(GGandivaIfNode, ggandiva_if_node, GGANDIVA_TYPE_NODE)
 
-#define GGANDIVA_IF_NODE_GET_PRIVATE(object)                 \
-  static_cast<GGandivaIfNodePrivate *>(                      \
-    ggandiva_if_node_get_instance_private(                   \
-      GGANDIVA_IF_NODE(object)))
+#define GGANDIVA_IF_NODE_GET_PRIVATE(object)                                             \
+  static_cast<GGandivaIfNodePrivate *>(                                                  \
+    ggandiva_if_node_get_instance_private(GGANDIVA_IF_NODE(object)))
 
 static void
 ggandiva_if_node_dispose(GObject *object)
@@ -1306,33 +1263,33 @@ ggandiva_if_node_class_init(GGandivaIfNodeClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = ggandiva_if_node_dispose;
+  gobject_class->dispose = ggandiva_if_node_dispose;
   gobject_class->set_property = ggandiva_if_node_set_property;
   gobject_class->get_property = ggandiva_if_node_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_object("condition-node",
-                             "Condition node",
-                             "The condition node",
-                             GGANDIVA_TYPE_NODE,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "condition-node",
+    "Condition node",
+    "The condition node",
+    GGANDIVA_TYPE_NODE,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_CONDITION_NODE, spec);
 
-  spec = g_param_spec_object("then-node",
-                             "Then node",
-                             "The then node",
-                             GGANDIVA_TYPE_NODE,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "then-node",
+    "Then node",
+    "The then node",
+    GGANDIVA_TYPE_NODE,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_THEN_NODE, spec);
 
-  spec = g_param_spec_object("else-node",
-                             "Else node",
-                             "The else node",
-                             GGANDIVA_TYPE_NODE,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "else-node",
+    "Else node",
+    "The else node",
+    GGANDIVA_TYPE_NODE,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_ELSE_NODE, spec);
 }
 
@@ -1391,19 +1348,16 @@ ggandiva_if_node_new(GGandivaNode *condition_node,
                                   return_type);
 }
 
-
-typedef struct GGandivaBooleanNodePrivate_ {
+typedef struct GGandivaBooleanNodePrivate_
+{
   GList *children;
 } GGandivaBooleanNodePrivate;
 
-G_DEFINE_TYPE_WITH_PRIVATE(GGandivaBooleanNode,
-                           ggandiva_boolean_node,
-                           GGANDIVA_TYPE_NODE)
+G_DEFINE_TYPE_WITH_PRIVATE(GGandivaBooleanNode, ggandiva_boolean_node, GGANDIVA_TYPE_NODE)
 
-#define GGANDIVA_BOOLEAN_NODE_GET_PRIVATE(object)      \
-  static_cast<GGandivaBooleanNodePrivate *>(           \
-    ggandiva_boolean_node_get_instance_private(        \
-      GGANDIVA_BOOLEAN_NODE(object)))                  \
+#define GGANDIVA_BOOLEAN_NODE_GET_PRIVATE(object)                                        \
+  static_cast<GGandivaBooleanNodePrivate *>(                                             \
+    ggandiva_boolean_node_get_instance_private(GGANDIVA_BOOLEAN_NODE(object)))
 
 static void
 ggandiva_boolean_node_dispose(GObject *object)
@@ -1449,10 +1403,7 @@ ggandiva_boolean_node_get_children(GGandivaBooleanNode *node)
   return priv->children;
 }
 
-
-G_DEFINE_TYPE(GGandivaAndNode,
-              ggandiva_and_node,
-              GGANDIVA_TYPE_BOOLEAN_NODE)
+G_DEFINE_TYPE(GGandivaAndNode, ggandiva_and_node, GGANDIVA_TYPE_BOOLEAN_NODE)
 
 static void
 ggandiva_and_node_init(GGandivaAndNode *and_node)
@@ -1481,14 +1432,10 @@ ggandiva_and_node_new(GList *children)
     gandiva_nodes.push_back(gandiva_node);
   }
   auto gandiva_node = gandiva::TreeExprBuilder::MakeAnd(gandiva_nodes);
-  return GGANDIVA_AND_NODE(ggandiva_boolean_node_new_raw(&gandiva_node,
-                                                         children));
+  return GGANDIVA_AND_NODE(ggandiva_boolean_node_new_raw(&gandiva_node, children));
 }
 
-
-G_DEFINE_TYPE(GGandivaOrNode,
-              ggandiva_or_node,
-              GGANDIVA_TYPE_BOOLEAN_NODE)
+G_DEFINE_TYPE(GGandivaOrNode, ggandiva_or_node, GGANDIVA_TYPE_BOOLEAN_NODE)
 
 static void
 ggandiva_or_node_init(GGandivaOrNode *or_node)
@@ -1517,8 +1464,7 @@ ggandiva_or_node_new(GList *children)
     gandiva_nodes.push_back(gandiva_node);
   }
   auto gandiva_node = gandiva::TreeExprBuilder::MakeOr(gandiva_nodes);
-  return GGANDIVA_OR_NODE(ggandiva_boolean_node_new_raw(&gandiva_node,
-                                                        children));
+  return GGANDIVA_OR_NODE(ggandiva_boolean_node_new_raw(&gandiva_node, children));
 }
 
 G_END_DECLS
@@ -1537,9 +1483,12 @@ ggandiva_field_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
   auto arrow_return_type = (*gandiva_node)->return_type();
   auto return_type = garrow_field_get_data_type(field);
   auto field_node = g_object_new(GGANDIVA_TYPE_FIELD_NODE,
-                                 "node", gandiva_node,
-                                 "field", field,
-                                 "return-type", return_type,
+                                 "node",
+                                 gandiva_node,
+                                 "field",
+                                 field,
+                                 "return-type",
+                                 return_type,
                                  NULL);
   return GGANDIVA_FIELD_NODE(field_node);
 }
@@ -1551,9 +1500,12 @@ ggandiva_function_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
                                GArrowDataType *return_type)
 {
   auto function_node = g_object_new(GGANDIVA_TYPE_FUNCTION_NODE,
-                                    "node", gandiva_node,
-                                    "name", name,
-                                    "return-type", return_type,
+                                    "node",
+                                    gandiva_node,
+                                    "name",
+                                    name,
+                                    "return-type",
+                                    return_type,
                                     NULL);
   auto priv = GGANDIVA_FUNCTION_NODE_GET_PRIVATE(function_node);
   for (auto node = parameters; node; node = g_list_next(node)) {
@@ -1573,11 +1525,12 @@ ggandiva_literal_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
 
   GGandivaLiteralNode *literal_node;
   if (gandiva_literal_node->is_null()) {
-    literal_node =
-      GGANDIVA_LITERAL_NODE(g_object_new(GGANDIVA_TYPE_NULL_LITERAL_NODE,
-                                         "node", gandiva_node,
-                                         "return-type", return_type,
-                                         NULL));
+    literal_node = GGANDIVA_LITERAL_NODE(g_object_new(GGANDIVA_TYPE_NULL_LITERAL_NODE,
+                                                      "node",
+                                                      gandiva_node,
+                                                      "return-type",
+                                                      return_type,
+                                                      NULL));
   } else {
     GType type;
 
@@ -1628,18 +1581,12 @@ ggandiva_literal_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
     }
 
     if (return_type) {
-      literal_node =
-        GGANDIVA_LITERAL_NODE(g_object_new(type,
-                                           "node", gandiva_node,
-                                           "return-type", return_type,
-                                           NULL));
+      literal_node = GGANDIVA_LITERAL_NODE(
+        g_object_new(type, "node", gandiva_node, "return-type", return_type, NULL));
     } else {
       return_type = garrow_data_type_new_raw(&arrow_return_type);
-      literal_node =
-        GGANDIVA_LITERAL_NODE(g_object_new(type,
-                                           "node", gandiva_node,
-                                           "return-type", return_type,
-                                           NULL));
+      literal_node = GGANDIVA_LITERAL_NODE(
+        g_object_new(type, "node", gandiva_node, "return-type", return_type, NULL));
       g_object_unref(return_type);
     }
   }
@@ -1655,11 +1602,16 @@ ggandiva_if_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
                          GArrowDataType *return_type)
 {
   auto if_node = g_object_new(GGANDIVA_TYPE_IF_NODE,
-                              "node", gandiva_node,
-                              "condition-node", condition_node,
-                              "then-node", then_node,
-                              "else-node", else_node,
-                              "return-type", return_type,
+                              "node",
+                              gandiva_node,
+                              "condition-node",
+                              condition_node,
+                              "then-node",
+                              then_node,
+                              "else-node",
+                              else_node,
+                              "return-type",
+                              return_type,
                               NULL);
   return GGANDIVA_IF_NODE(if_node);
 }
@@ -1677,12 +1629,9 @@ ggandiva_boolean_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
   } else {
     type = GGANDIVA_TYPE_OR_NODE;
   }
-  auto boolean_node = g_object_new(type,
-                                   "node", gandiva_node,
-                                   NULL);
+  auto boolean_node = g_object_new(type, "node", gandiva_node, NULL);
   auto priv = GGANDIVA_BOOLEAN_NODE_GET_PRIVATE(boolean_node);
-  priv->children = g_list_copy_deep(children,
-                                    reinterpret_cast<GCopyFunc>(g_object_ref),
-                                    NULL);
+  priv->children =
+    g_list_copy_deep(children, reinterpret_cast<GCopyFunc>(g_object_ref), NULL);
   return GGANDIVA_BOOLEAN_NODE(boolean_node);
 }
diff --git a/c_glib/gandiva-glib/node.h b/c_glib/gandiva-glib/node.h
index a16f26c656cc0..715a3d6ebaf18 100644
--- a/c_glib/gandiva-glib/node.h
+++ b/c_glib/gandiva-glib/node.h
@@ -26,40 +26,30 @@
 G_BEGIN_DECLS
 
 #define GGANDIVA_TYPE_NODE (ggandiva_node_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaNode,
-                         ggandiva_node,
-                         GGANDIVA,
-                         NODE,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(GGandivaNode, ggandiva_node, GGANDIVA, NODE, GObject)
 
 struct _GGandivaNodeClass
 {
   GObjectClass parent_class;
 };
 
-gchar *ggandiva_node_to_string(GGandivaNode *node);
-
+gchar *
+ggandiva_node_to_string(GGandivaNode *node);
 
 #define GGANDIVA_TYPE_FIELD_NODE (ggandiva_field_node_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaFieldNode,
-                         ggandiva_field_node,
-                         GGANDIVA,
-                         FIELD_NODE,
-                         GGandivaNode)
+G_DECLARE_DERIVABLE_TYPE(
+  GGandivaFieldNode, ggandiva_field_node, GGANDIVA, FIELD_NODE, GGandivaNode)
 struct _GGandivaFieldNodeClass
 {
   GGandivaNodeClass parent_class;
 };
 
-GGandivaFieldNode *ggandiva_field_node_new(GArrowField *field);
-
+GGandivaFieldNode *
+ggandiva_field_node_new(GArrowField *field);
 
 #define GGANDIVA_TYPE_FUNCTION_NODE (ggandiva_function_node_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaFunctionNode,
-                         ggandiva_function_node,
-                         GGANDIVA,
-                         FUNCTION_NODE,
-                         GGandivaNode)
+G_DECLARE_DERIVABLE_TYPE(
+  GGandivaFunctionNode, ggandiva_function_node, GGANDIVA, FUNCTION_NODE, GGandivaNode)
 struct _GGandivaFunctionNodeClass
 {
   GGandivaNodeClass parent_class;
@@ -72,19 +62,14 @@ ggandiva_function_node_new(const gchar *name,
 GList *
 ggandiva_function_node_get_parameters(GGandivaFunctionNode *node);
 
-
 #define GGANDIVA_TYPE_LITERAL_NODE (ggandiva_literal_node_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaLiteralNode,
-                         ggandiva_literal_node,
-                         GGANDIVA,
-                         LITERAL_NODE,
-                         GGandivaNode)
+G_DECLARE_DERIVABLE_TYPE(
+  GGandivaLiteralNode, ggandiva_literal_node, GGANDIVA, LITERAL_NODE, GGandivaNode)
 struct _GGandivaLiteralNodeClass
 {
   GGandivaNodeClass parent_class;
 };
 
-
 #define GGANDIVA_TYPE_NULL_LITERAL_NODE (ggandiva_null_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaNullLiteralNode,
                          ggandiva_null_literal_node,
@@ -97,9 +82,7 @@ struct _GGandivaNullLiteralNodeClass
 };
 
 GGandivaNullLiteralNode *
-ggandiva_null_literal_node_new(GArrowDataType *return_type,
-                               GError **error);
-
+ggandiva_null_literal_node_new(GArrowDataType *return_type, GError **error);
 
 #define GGANDIVA_TYPE_BOOLEAN_LITERAL_NODE (ggandiva_boolean_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaBooleanLiteralNode,
@@ -117,7 +100,6 @@ ggandiva_boolean_literal_node_new(gboolean value);
 gboolean
 ggandiva_boolean_literal_node_get_value(GGandivaBooleanLiteralNode *node);
 
-
 #define GGANDIVA_TYPE_INT8_LITERAL_NODE (ggandiva_int8_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaInt8LiteralNode,
                          ggandiva_int8_literal_node,
@@ -134,7 +116,6 @@ ggandiva_int8_literal_node_new(gint8 value);
 gint8
 ggandiva_int8_literal_node_get_value(GGandivaInt8LiteralNode *node);
 
-
 #define GGANDIVA_TYPE_UINT8_LITERAL_NODE (ggandiva_uint8_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt8LiteralNode,
                          ggandiva_uint8_literal_node,
@@ -151,7 +132,6 @@ ggandiva_uint8_literal_node_new(guint8 value);
 guint8
 ggandiva_uint8_literal_node_get_value(GGandivaUInt8LiteralNode *node);
 
-
 #define GGANDIVA_TYPE_INT16_LITERAL_NODE (ggandiva_int16_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaInt16LiteralNode,
                          ggandiva_int16_literal_node,
@@ -168,7 +148,6 @@ ggandiva_int16_literal_node_new(gint16 value);
 gint16
 ggandiva_int16_literal_node_get_value(GGandivaInt16LiteralNode *node);
 
-
 #define GGANDIVA_TYPE_UINT16_LITERAL_NODE (ggandiva_uint16_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt16LiteralNode,
                          ggandiva_uint16_literal_node,
@@ -185,7 +164,6 @@ ggandiva_uint16_literal_node_new(guint16 value);
 guint16
 ggandiva_uint16_literal_node_get_value(GGandivaUInt16LiteralNode *node);
 
-
 #define GGANDIVA_TYPE_INT32_LITERAL_NODE (ggandiva_int32_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaInt32LiteralNode,
                          ggandiva_int32_literal_node,
@@ -202,7 +180,6 @@ ggandiva_int32_literal_node_new(gint32 value);
 gint32
 ggandiva_int32_literal_node_get_value(GGandivaInt32LiteralNode *node);
 
-
 #define GGANDIVA_TYPE_UINT32_LITERAL_NODE (ggandiva_uint32_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt32LiteralNode,
                          ggandiva_uint32_literal_node,
@@ -219,7 +196,6 @@ ggandiva_uint32_literal_node_new(guint32 value);
 guint32
 ggandiva_uint32_literal_node_get_value(GGandivaUInt32LiteralNode *node);
 
-
 #define GGANDIVA_TYPE_INT64_LITERAL_NODE (ggandiva_int64_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaInt64LiteralNode,
                          ggandiva_int64_literal_node,
@@ -236,7 +212,6 @@ ggandiva_int64_literal_node_new(gint64 value);
 gint64
 ggandiva_int64_literal_node_get_value(GGandivaInt64LiteralNode *node);
 
-
 #define GGANDIVA_TYPE_UINT64_LITERAL_NODE (ggandiva_uint64_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt64LiteralNode,
                          ggandiva_uint64_literal_node,
@@ -253,7 +228,6 @@ ggandiva_uint64_literal_node_new(guint64 value);
 guint64
 ggandiva_uint64_literal_node_get_value(GGandivaUInt64LiteralNode *node);
 
-
 #define GGANDIVA_TYPE_FLOAT_LITERAL_NODE (ggandiva_float_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaFloatLiteralNode,
                          ggandiva_float_literal_node,
@@ -270,7 +244,6 @@ ggandiva_float_literal_node_new(gfloat value);
 gfloat
 ggandiva_float_literal_node_get_value(GGandivaFloatLiteralNode *node);
 
-
 #define GGANDIVA_TYPE_DOUBLE_LITERAL_NODE (ggandiva_double_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaDoubleLiteralNode,
                          ggandiva_double_literal_node,
@@ -287,7 +260,6 @@ ggandiva_double_literal_node_new(gdouble value);
 gdouble
 ggandiva_double_literal_node_get_value(GGandivaDoubleLiteralNode *node);
 
-
 #define GGANDIVA_TYPE_BINARY_LITERAL_NODE (ggandiva_binary_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaBinaryLiteralNode,
                          ggandiva_binary_literal_node,
@@ -300,14 +272,12 @@ struct _GGandivaBinaryLiteralNodeClass
 };
 
 GGandivaBinaryLiteralNode *
-ggandiva_binary_literal_node_new(const guint8 *value,
-                                 gsize size);
+ggandiva_binary_literal_node_new(const guint8 *value, gsize size);
 GGandivaBinaryLiteralNode *
 ggandiva_binary_literal_node_new_bytes(GBytes *value);
 GBytes *
 ggandiva_binary_literal_node_get_value(GGandivaBinaryLiteralNode *node);
 
-
 #define GGANDIVA_TYPE_STRING_LITERAL_NODE (ggandiva_string_literal_node_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaStringLiteralNode,
                          ggandiva_string_literal_node,
@@ -324,13 +294,9 @@ ggandiva_string_literal_node_new(const gchar *value);
 const gchar *
 ggandiva_string_literal_node_get_value(GGandivaStringLiteralNode *node);
 
-
 #define GGANDIVA_TYPE_IF_NODE (ggandiva_if_node_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaIfNode,
-                         ggandiva_if_node,
-                         GGANDIVA,
-                         IF_NODE,
-                         GGandivaNode)
+G_DECLARE_DERIVABLE_TYPE(
+  GGandivaIfNode, ggandiva_if_node, GGANDIVA, IF_NODE, GGandivaNode)
 struct _GGandivaIfNodeClass
 {
   GGandivaNodeClass parent_class;
@@ -343,13 +309,9 @@ ggandiva_if_node_new(GGandivaNode *condition_node,
                      GArrowDataType *return_type,
                      GError **error);
 
-
 #define GGANDIVA_TYPE_BOOLEAN_NODE (ggandiva_boolean_node_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaBooleanNode,
-                         ggandiva_boolean_node,
-                         GGANDIVA,
-                         BOOLEAN_NODE,
-                         GGandivaNode)
+G_DECLARE_DERIVABLE_TYPE(
+  GGandivaBooleanNode, ggandiva_boolean_node, GGANDIVA, BOOLEAN_NODE, GGandivaNode)
 
 struct _GGandivaBooleanNodeClass
 {
@@ -360,13 +322,9 @@ GGANDIVA_AVAILABLE_IN_0_17
 GList *
 ggandiva_boolean_node_get_children(GGandivaBooleanNode *node);
 
-
 #define GGANDIVA_TYPE_AND_NODE (ggandiva_and_node_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaAndNode,
-                         ggandiva_and_node,
-                         GGANDIVA,
-                         AND_NODE,
-                         GGandivaBooleanNode)
+G_DECLARE_DERIVABLE_TYPE(
+  GGandivaAndNode, ggandiva_and_node, GGANDIVA, AND_NODE, GGandivaBooleanNode)
 struct _GGandivaAndNodeClass
 {
   GGandivaBooleanNodeClass parent_class;
@@ -376,13 +334,9 @@ GGANDIVA_AVAILABLE_IN_0_17
 GGandivaAndNode *
 ggandiva_and_node_new(GList *children);
 
-
 #define GGANDIVA_TYPE_OR_NODE (ggandiva_or_node_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaOrNode,
-                         ggandiva_or_node,
-                         GGANDIVA,
-                         OR_NODE,
-                         GGandivaBooleanNode)
+G_DECLARE_DERIVABLE_TYPE(
+  GGandivaOrNode, ggandiva_or_node, GGANDIVA, OR_NODE, GGandivaBooleanNode)
 struct _GGandivaOrNodeClass
 {
   GGandivaBooleanNodeClass parent_class;
diff --git a/c_glib/gandiva-glib/node.hpp b/c_glib/gandiva-glib/node.hpp
index 51dc2cbbf74ac..5a410db0645ef 100644
--- a/c_glib/gandiva-glib/node.hpp
+++ b/c_glib/gandiva-glib/node.hpp
@@ -26,7 +26,8 @@
 
 #include <gandiva-glib/node.h>
 
-std::shared_ptr<gandiva::Node> ggandiva_node_get_raw(GGandivaNode *node);
+std::shared_ptr<gandiva::Node>
+ggandiva_node_get_raw(GGandivaNode *node);
 GGandivaFieldNode *
 ggandiva_field_node_new_raw(std::shared_ptr<gandiva::Node> *gandiva_node,
                             GArrowField *field);
diff --git a/c_glib/gandiva-glib/projector.cpp b/c_glib/gandiva-glib/projector.cpp
index c91cde84c5ffe..8d807510fa9c1 100644
--- a/c_glib/gandiva-glib/projector.cpp
+++ b/c_glib/gandiva-glib/projector.cpp
@@ -42,7 +42,8 @@ G_BEGIN_DECLS
  * Since: 0.12.0
  */
 
-typedef struct GGandivaProjectorPrivate_ {
+typedef struct GGandivaProjectorPrivate_
+{
   std::shared_ptr<gandiva::Projector> projector;
   GArrowSchema *schema;
   GList *expressions;
@@ -54,14 +55,11 @@ enum {
   PROP_EXPRESSIONS,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GGandivaProjector,
-                           ggandiva_projector,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GGandivaProjector, ggandiva_projector, G_TYPE_OBJECT)
 
-#define GGANDIVA_PROJECTOR_GET_PRIVATE(obj)         \
-  static_cast<GGandivaProjectorPrivate *>(          \
-     ggandiva_projector_get_instance_private(       \
-       GGANDIVA_PROJECTOR(obj)))
+#define GGANDIVA_PROJECTOR_GET_PRIVATE(obj)                                              \
+  static_cast<GGandivaProjectorPrivate *>(                                               \
+    ggandiva_projector_get_instance_private(GGANDIVA_PROJECTOR(obj)))
 
 static void
 ggandiva_projector_dispose(GObject *object)
@@ -106,10 +104,9 @@ ggandiva_projector_set_property(GObject *object,
     priv->schema = GARROW_SCHEMA(g_value_dup_object(value));
     break;
   case PROP_EXPRESSIONS:
-    priv->expressions =
-      g_list_copy_deep(static_cast<GList *>(g_value_get_pointer(value)),
-                       reinterpret_cast<GCopyFunc>(g_object_ref),
-                       nullptr);
+    priv->expressions = g_list_copy_deep(static_cast<GList *>(g_value_get_pointer(value)),
+                                         reinterpret_cast<GCopyFunc>(g_object_ref),
+                                         nullptr);
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -139,7 +136,7 @@ static void
 ggandiva_projector_init(GGandivaProjector *object)
 {
   auto priv = GGANDIVA_PROJECTOR_GET_PRIVATE(object);
-  new(&priv->projector) std::shared_ptr<gandiva::Projector>;
+  new (&priv->projector) std::shared_ptr<gandiva::Projector>;
 }
 
 static void
@@ -147,32 +144,32 @@ ggandiva_projector_class_init(GGandivaProjectorClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->dispose      = ggandiva_projector_dispose;
-  gobject_class->finalize     = ggandiva_projector_finalize;
+  gobject_class->dispose = ggandiva_projector_dispose;
+  gobject_class->finalize = ggandiva_projector_finalize;
   gobject_class->set_property = ggandiva_projector_set_property;
   gobject_class->get_property = ggandiva_projector_get_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("projector",
-                              "Projector",
-                              "The raw std::shared<gandiva::Projector> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "projector",
+    "Projector",
+    "The raw std::shared<gandiva::Projector> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_PROJECTOR, spec);
 
-  spec = g_param_spec_object("schema",
-                             "Schema",
-                             "The schema of the projector",
-                             GARROW_TYPE_SCHEMA,
-                             static_cast<GParamFlags>(G_PARAM_READWRITE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "schema",
+    "Schema",
+    "The schema of the projector",
+    GARROW_TYPE_SCHEMA,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_SCHEMA, spec);
 
-  spec = g_param_spec_pointer("expressions",
-                              "Expressions",
-                              "The expressions for the projector",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "expressions",
+    "Expressions",
+    "The expressions for the projector",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_EXPRESSIONS, spec);
 }
 
@@ -188,9 +185,7 @@ ggandiva_projector_class_init(GGandivaProjectorClass *klass)
  * Since: 0.12.0
  */
 GGandivaProjector *
-ggandiva_projector_new(GArrowSchema *schema,
-                       GList *expressions,
-                       GError **error)
+ggandiva_projector_new(GArrowSchema *schema, GList *expressions, GError **error)
 {
   auto arrow_schema = garrow_schema_get_raw(schema);
   std::vector<std::shared_ptr<gandiva::Expression>> gandiva_expressions;
@@ -201,13 +196,9 @@ ggandiva_projector_new(GArrowSchema *schema,
   }
   std::shared_ptr<gandiva::Projector> gandiva_projector;
   auto status =
-    gandiva_projector->Make(arrow_schema,
-                            gandiva_expressions,
-                            &gandiva_projector);
+    gandiva_projector->Make(arrow_schema, gandiva_expressions, &gandiva_projector);
   if (garrow_error_check(error, status, "[gandiva][projector][new]")) {
-    return ggandiva_projector_new_raw(&gandiva_projector,
-                                      schema,
-                                      expressions);
+    return ggandiva_projector_new_raw(&gandiva_projector, schema, expressions);
   } else {
     return NULL;
   }
@@ -234,9 +225,7 @@ ggandiva_projector_evaluate(GGandivaProjector *projector,
   auto memory_pool = arrow::default_memory_pool();
   arrow::ArrayVector arrow_arrays;
   auto status =
-    gandiva_projector->Evaluate(*arrow_record_batch,
-                                memory_pool,
-                                &arrow_arrays);
+    gandiva_projector->Evaluate(*arrow_record_batch, memory_pool, &arrow_arrays);
   if (garrow_error_check(error, status, "[gandiva][projector][evaluate]")) {
     GList *arrays = NULL;
     for (auto arrow_array : arrow_arrays) {
@@ -249,7 +238,6 @@ ggandiva_projector_evaluate(GGandivaProjector *projector,
   }
 }
 
-
 G_DEFINE_TYPE(GGandivaSelectableProjector,
               ggandiva_selectable_projector,
               GGANDIVA_TYPE_PROJECTOR)
@@ -290,20 +278,15 @@ ggandiva_selectable_projector_new(GArrowSchema *schema,
     gandiva_expressions.push_back(gandiva_expression);
   }
   auto gandiva_mode = static_cast<gandiva::SelectionVector::Mode>(mode);
-  auto gandiva_configuration =
-    gandiva::ConfigurationBuilder::DefaultConfiguration();
+  auto gandiva_configuration = gandiva::ConfigurationBuilder::DefaultConfiguration();
   std::shared_ptr<gandiva::Projector> gandiva_projector;
   auto status = gandiva_projector->Make(arrow_schema,
                                         gandiva_expressions,
                                         gandiva_mode,
                                         gandiva_configuration,
                                         &gandiva_projector);
-  if (garrow_error_check(error,
-                         status,
-                         "[gandiva][selectable-projector][new]")) {
-    return ggandiva_selectable_projector_new_raw(&gandiva_projector,
-                                                 schema,
-                                                 expressions);
+  if (garrow_error_check(error, status, "[gandiva][selectable-projector][new]")) {
+    return ggandiva_selectable_projector_new_raw(&gandiva_projector, schema, expressions);
   } else {
     return NULL;
   }
@@ -323,27 +306,22 @@ ggandiva_selectable_projector_new(GArrowSchema *schema,
  * Since: 4.0.0
  */
 GList *
-ggandiva_selectable_projector_evaluate(
-  GGandivaSelectableProjector *projector,
-  GArrowRecordBatch *record_batch,
-  GGandivaSelectionVector *selection_vector,
-  GError **error)
+ggandiva_selectable_projector_evaluate(GGandivaSelectableProjector *projector,
+                                       GArrowRecordBatch *record_batch,
+                                       GGandivaSelectionVector *selection_vector,
+                                       GError **error)
 {
-  auto gandiva_projector =
-    ggandiva_projector_get_raw(GGANDIVA_PROJECTOR(projector));
+  auto gandiva_projector = ggandiva_projector_get_raw(GGANDIVA_PROJECTOR(projector));
   auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
   auto gandiva_selection_vector =
     ggandiva_selection_vector_get_raw(selection_vector).get();
   auto memory_pool = arrow::default_memory_pool();
   arrow::ArrayVector arrow_arrays;
-  auto status =
-    gandiva_projector->Evaluate(*arrow_record_batch,
-                                gandiva_selection_vector,
-                                memory_pool,
-                                &arrow_arrays);
-  if (garrow_error_check(error,
-                         status,
-                         "[gandiva][selectable-projector][evaluate]")) {
+  auto status = gandiva_projector->Evaluate(*arrow_record_batch,
+                                            gandiva_selection_vector,
+                                            memory_pool,
+                                            &arrow_arrays);
+  if (garrow_error_check(error, status, "[gandiva][selectable-projector][evaluate]")) {
     GList *arrays = NULL;
     for (auto arrow_array : arrow_arrays) {
       auto array = garrow_array_new_raw(&arrow_array);
@@ -358,15 +336,17 @@ ggandiva_selectable_projector_evaluate(
 G_END_DECLS
 
 GGandivaProjector *
-ggandiva_projector_new_raw(
-  std::shared_ptr<gandiva::Projector> *gandiva_projector,
-  GArrowSchema *schema,
-  GList *expressions)
+ggandiva_projector_new_raw(std::shared_ptr<gandiva::Projector> *gandiva_projector,
+                           GArrowSchema *schema,
+                           GList *expressions)
 {
   auto projector = g_object_new(GGANDIVA_TYPE_PROJECTOR,
-                                "projector", gandiva_projector,
-                                "schema", schema,
-                                "expressions", expressions,
+                                "projector",
+                                gandiva_projector,
+                                "schema",
+                                schema,
+                                "expressions",
+                                expressions,
                                 NULL);
   return GGANDIVA_PROJECTOR(projector);
 }
@@ -378,7 +358,8 @@ ggandiva_selectable_projector_new_raw(
   GList *expressions)
 {
   auto projector = g_object_new(GGANDIVA_TYPE_SELECTABLE_PROJECTOR,
-                                "projector", gandiva_projector,
+                                "projector",
+                                gandiva_projector,
                                 NULL);
   return GGANDIVA_SELECTABLE_PROJECTOR(projector);
 }
diff --git a/c_glib/gandiva-glib/projector.h b/c_glib/gandiva-glib/projector.h
index 5dd218b808cea..e0afec5cb1ba1 100644
--- a/c_glib/gandiva-glib/projector.h
+++ b/c_glib/gandiva-glib/projector.h
@@ -24,11 +24,8 @@
 G_BEGIN_DECLS
 
 #define GGANDIVA_TYPE_PROJECTOR (ggandiva_projector_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaProjector,
-                         ggandiva_projector,
-                         GGANDIVA,
-                         PROJECTOR,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GGandivaProjector, ggandiva_projector, GGANDIVA, PROJECTOR, GObject)
 
 struct _GGandivaProjectorClass
 {
@@ -36,17 +33,13 @@ struct _GGandivaProjectorClass
 };
 
 GGandivaProjector *
-ggandiva_projector_new(GArrowSchema *schema,
-                       GList *expressions,
-                       GError **error);
+ggandiva_projector_new(GArrowSchema *schema, GList *expressions, GError **error);
 GList *
 ggandiva_projector_evaluate(GGandivaProjector *projector,
                             GArrowRecordBatch *record_batch,
                             GError **error);
 
-
-#define GGANDIVA_TYPE_SELECTABLE_PROJECTOR      \
-  (ggandiva_selectable_projector_get_type())
+#define GGANDIVA_TYPE_SELECTABLE_PROJECTOR (ggandiva_selectable_projector_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaSelectableProjector,
                          ggandiva_selectable_projector,
                          GGANDIVA,
@@ -71,5 +64,4 @@ ggandiva_selectable_projector_evaluate(GGandivaSelectableProjector *projector,
                                        GGandivaSelectionVector *selection_vector,
                                        GError **error);
 
-
 G_END_DECLS
diff --git a/c_glib/gandiva-glib/projector.hpp b/c_glib/gandiva-glib/projector.hpp
index b372f32f59856..ca3581b3c9274 100644
--- a/c_glib/gandiva-glib/projector.hpp
+++ b/c_glib/gandiva-glib/projector.hpp
@@ -26,10 +26,9 @@
 #include <gandiva-glib/projector.h>
 
 GGandivaProjector *
-ggandiva_projector_new_raw(
-  std::shared_ptr<gandiva::Projector> *gandiva_projector,
-  GArrowSchema *schema,
-  GList *expressions);
+ggandiva_projector_new_raw(std::shared_ptr<gandiva::Projector> *gandiva_projector,
+                           GArrowSchema *schema,
+                           GList *expressions);
 GGandivaSelectableProjector *
 ggandiva_selectable_projector_new_raw(
   std::shared_ptr<gandiva::Projector> *gandiva_projector,
diff --git a/c_glib/gandiva-glib/selection-vector.cpp b/c_glib/gandiva-glib/selection-vector.cpp
index 77c3cf2aa14f6..d6a8333e627a3 100644
--- a/c_glib/gandiva-glib/selection-vector.cpp
+++ b/c_glib/gandiva-glib/selection-vector.cpp
@@ -22,7 +22,6 @@
 
 #include <gandiva-glib/selection-vector.hpp>
 
-
 G_BEGIN_DECLS
 
 /**
@@ -45,7 +44,8 @@ G_BEGIN_DECLS
  * Since: 4.0.0
  */
 
-typedef struct GGandivaSelectionVectorPrivate_ {
+typedef struct GGandivaSelectionVectorPrivate_
+{
   std::shared_ptr<gandiva::SelectionVector> selection_vector;
 } GGandivaSelectionVectorPrivate;
 
@@ -57,10 +57,9 @@ G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GGandivaSelectionVector,
                                     ggandiva_selection_vector,
                                     G_TYPE_OBJECT)
 
-#define GGANDIVA_SELECTION_VECTOR_GET_PRIVATE(object)           \
-  static_cast<GGandivaSelectionVectorPrivate *>(                \
-    ggandiva_selection_vector_get_instance_private(             \
-      GGANDIVA_SELECTION_VECTOR(object)))
+#define GGANDIVA_SELECTION_VECTOR_GET_PRIVATE(object)                                    \
+  static_cast<GGandivaSelectionVectorPrivate *>(                                         \
+    ggandiva_selection_vector_get_instance_private(GGANDIVA_SELECTION_VECTOR(object)))
 
 static void
 ggandiva_selection_vector_finalize(GObject *object)
@@ -82,9 +81,8 @@ ggandiva_selection_vector_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_SELECTION_VECTOR:
-    priv->selection_vector =
-      *static_cast<std::shared_ptr<gandiva::SelectionVector> *>(
-        g_value_get_pointer(value));
+    priv->selection_vector = *static_cast<std::shared_ptr<gandiva::SelectionVector> *>(
+      g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -96,7 +94,7 @@ static void
 ggandiva_selection_vector_init(GGandivaSelectionVector *object)
 {
   auto priv = GGANDIVA_SELECTION_VECTOR_GET_PRIVATE(object);
-  new(&priv->selection_vector) std::shared_ptr<gandiva::SelectionVector>;
+  new (&priv->selection_vector) std::shared_ptr<gandiva::SelectionVector>;
 }
 
 static void
@@ -104,15 +102,15 @@ ggandiva_selection_vector_class_init(GGandivaSelectionVectorClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = ggandiva_selection_vector_finalize;
+  gobject_class->finalize = ggandiva_selection_vector_finalize;
   gobject_class->set_property = ggandiva_selection_vector_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("selection-vector",
-                              "Selection vector",
-                              "The raw std::shared<gandiva::SelectionVector> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "selection-vector",
+    "Selection vector",
+    "The raw std::shared<gandiva::SelectionVector> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_SELECTION_VECTOR, spec);
 }
 
@@ -127,8 +125,7 @@ ggandiva_selection_vector_class_init(GGandivaSelectionVectorClass *klass)
 GGandivaSelectionVectorMode
 ggandiva_selection_vector_get_mode(GGandivaSelectionVector *selection_vector)
 {
-  auto gandiva_selection_vector =
-    ggandiva_selection_vector_get_raw(selection_vector);
+  auto gandiva_selection_vector = ggandiva_selection_vector_get_raw(selection_vector);
   auto gandiva_mode = gandiva_selection_vector->GetMode();
   return static_cast<GGandivaSelectionVectorMode>(gandiva_mode);
 }
@@ -145,26 +142,22 @@ ggandiva_selection_vector_get_mode(GGandivaSelectionVector *selection_vector)
 GArrowArray *
 ggandiva_selection_vector_to_array(GGandivaSelectionVector *selection_vector)
 {
-  auto gandiva_selection_vector =
-    ggandiva_selection_vector_get_raw(selection_vector);
+  auto gandiva_selection_vector = ggandiva_selection_vector_get_raw(selection_vector);
   auto arrow_array = gandiva_selection_vector->ToArray();
   return garrow_array_new_raw(&arrow_array);
 }
 
-
 G_DEFINE_TYPE(GGandivaUInt16SelectionVector,
               ggandiva_uint16_selection_vector,
               GGANDIVA_TYPE_SELECTION_VECTOR)
 
 static void
-ggandiva_uint16_selection_vector_init(
-  GGandivaUInt16SelectionVector *selection_vector)
+ggandiva_uint16_selection_vector_init(GGandivaUInt16SelectionVector *selection_vector)
 {
 }
 
 static void
-ggandiva_uint16_selection_vector_class_init(
-  GGandivaUInt16SelectionVectorClass *klass)
+ggandiva_uint16_selection_vector_class_init(GGandivaUInt16SelectionVectorClass *klass)
 {
 }
 
@@ -178,17 +171,14 @@ ggandiva_uint16_selection_vector_class_init(
  * Since: 4.0.0
  */
 GGandivaUInt16SelectionVector *
-ggandiva_uint16_selection_vector_new(gint64 max_slots,
-                                     GError **error)
+ggandiva_uint16_selection_vector_new(gint64 max_slots, GError **error)
 {
   auto memory_pool = arrow::default_memory_pool();
   std::shared_ptr<gandiva::SelectionVector> gandiva_selection_vector;
   auto status = gandiva::SelectionVector::MakeInt16(max_slots,
                                                     memory_pool,
                                                     &gandiva_selection_vector);
-  if (garrow_error_check(error,
-                         status,
-                         "[gandiva][uint16-selection-vector][new]")) {
+  if (garrow_error_check(error, status, "[gandiva][uint16-selection-vector][new]")) {
     return GGANDIVA_UINT16_SELECTION_VECTOR(
       ggandiva_selection_vector_new_raw(&gandiva_selection_vector));
   } else {
@@ -196,20 +186,17 @@ ggandiva_uint16_selection_vector_new(gint64 max_slots,
   }
 }
 
-
 G_DEFINE_TYPE(GGandivaUInt32SelectionVector,
               ggandiva_uint32_selection_vector,
               GGANDIVA_TYPE_SELECTION_VECTOR)
 
 static void
-ggandiva_uint32_selection_vector_init(
-  GGandivaUInt32SelectionVector *selection_vector)
+ggandiva_uint32_selection_vector_init(GGandivaUInt32SelectionVector *selection_vector)
 {
 }
 
 static void
-ggandiva_uint32_selection_vector_class_init(
-  GGandivaUInt32SelectionVectorClass *klass)
+ggandiva_uint32_selection_vector_class_init(GGandivaUInt32SelectionVectorClass *klass)
 {
 }
 
@@ -223,17 +210,14 @@ ggandiva_uint32_selection_vector_class_init(
  * Since: 4.0.0
  */
 GGandivaUInt32SelectionVector *
-ggandiva_uint32_selection_vector_new(gint64 max_slots,
-                                     GError **error)
+ggandiva_uint32_selection_vector_new(gint64 max_slots, GError **error)
 {
   auto memory_pool = arrow::default_memory_pool();
   std::shared_ptr<gandiva::SelectionVector> gandiva_selection_vector;
   auto status = gandiva::SelectionVector::MakeInt32(max_slots,
                                                     memory_pool,
                                                     &gandiva_selection_vector);
-  if (garrow_error_check(error,
-                         status,
-                         "[gandiva][uint32-selection-vector][new]")) {
+  if (garrow_error_check(error, status, "[gandiva][uint32-selection-vector][new]")) {
     return GGANDIVA_UINT32_SELECTION_VECTOR(
       ggandiva_selection_vector_new_raw(&gandiva_selection_vector));
   } else {
@@ -241,20 +225,17 @@ ggandiva_uint32_selection_vector_new(gint64 max_slots,
   }
 }
 
-
 G_DEFINE_TYPE(GGandivaUInt64SelectionVector,
               ggandiva_uint64_selection_vector,
               GGANDIVA_TYPE_SELECTION_VECTOR)
 
 static void
-ggandiva_uint64_selection_vector_init(
-  GGandivaUInt64SelectionVector *selection_vector)
+ggandiva_uint64_selection_vector_init(GGandivaUInt64SelectionVector *selection_vector)
 {
 }
 
 static void
-ggandiva_uint64_selection_vector_class_init(
-  GGandivaUInt64SelectionVectorClass *klass)
+ggandiva_uint64_selection_vector_class_init(GGandivaUInt64SelectionVectorClass *klass)
 {
 }
 
@@ -268,17 +249,14 @@ ggandiva_uint64_selection_vector_class_init(
  * Since: 4.0.0
  */
 GGandivaUInt64SelectionVector *
-ggandiva_uint64_selection_vector_new(gint64 max_slots,
-                                     GError **error)
+ggandiva_uint64_selection_vector_new(gint64 max_slots, GError **error)
 {
   auto memory_pool = arrow::default_memory_pool();
   std::shared_ptr<gandiva::SelectionVector> gandiva_selection_vector;
   auto status = gandiva::SelectionVector::MakeInt64(max_slots,
                                                     memory_pool,
                                                     &gandiva_selection_vector);
-  if (garrow_error_check(error,
-                         status,
-                         "[gandiva][uint64-selection-vector][new]")) {
+  if (garrow_error_check(error, status, "[gandiva][uint64-selection-vector][new]")) {
     return GGANDIVA_UINT64_SELECTION_VECTOR(
       ggandiva_selection_vector_new_raw(&gandiva_selection_vector));
   } else {
@@ -286,10 +264,8 @@ ggandiva_uint64_selection_vector_new(gint64 max_slots,
   }
 }
 
-
 G_END_DECLS
 
-
 GGandivaSelectionVector *
 ggandiva_selection_vector_new_raw(
   std::shared_ptr<gandiva::SelectionVector> *gandiva_selection_vector)
@@ -309,9 +285,7 @@ ggandiva_selection_vector_new_raw(
     break;
   }
   auto selection_vector =
-    g_object_new(type,
-                 "selection-vector", gandiva_selection_vector,
-                 NULL);
+    g_object_new(type, "selection-vector", gandiva_selection_vector, NULL);
   return GGANDIVA_SELECTION_VECTOR(selection_vector);
 }
 
diff --git a/c_glib/gandiva-glib/selection-vector.h b/c_glib/gandiva-glib/selection-vector.h
index 029c4cde5caf1..6d78192e35e28 100644
--- a/c_glib/gandiva-glib/selection-vector.h
+++ b/c_glib/gandiva-glib/selection-vector.h
@@ -46,13 +46,9 @@ typedef enum {
   GGANDIVA_SELECTION_VECTOR_MODE_UINT64,
 } GGandivaSelectionVectorMode;
 
-
 #define GGANDIVA_TYPE_SELECTION_VECTOR (ggandiva_selection_vector_get_type())
-G_DECLARE_DERIVABLE_TYPE(GGandivaSelectionVector,
-                         ggandiva_selection_vector,
-                         GGANDIVA,
-                         SELECTION_VECTOR,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GGandivaSelectionVector, ggandiva_selection_vector, GGANDIVA, SELECTION_VECTOR, GObject)
 
 struct _GGandivaSelectionVectorClass
 {
@@ -67,8 +63,7 @@ GGANDIVA_AVAILABLE_IN_4_0
 GArrowArray *
 ggandiva_selection_vector_to_array(GGandivaSelectionVector *selection_vector);
 
-
-#define GGANDIVA_TYPE_UINT16_SELECTION_VECTOR   \
+#define GGANDIVA_TYPE_UINT16_SELECTION_VECTOR                                            \
   (ggandiva_uint16_selection_vector_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt16SelectionVector,
                          ggandiva_uint16_selection_vector,
@@ -83,11 +78,9 @@ struct _GGandivaUInt16SelectionVectorClass
 
 GGANDIVA_AVAILABLE_IN_4_0
 GGandivaUInt16SelectionVector *
-ggandiva_uint16_selection_vector_new(gint64 max_slots,
-                                     GError **error);
+ggandiva_uint16_selection_vector_new(gint64 max_slots, GError **error);
 
-
-#define GGANDIVA_TYPE_UINT32_SELECTION_VECTOR   \
+#define GGANDIVA_TYPE_UINT32_SELECTION_VECTOR                                            \
   (ggandiva_uint32_selection_vector_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt32SelectionVector,
                          ggandiva_uint32_selection_vector,
@@ -102,11 +95,9 @@ struct _GGandivaUInt32SelectionVectorClass
 
 GGANDIVA_AVAILABLE_IN_4_0
 GGandivaUInt32SelectionVector *
-ggandiva_uint32_selection_vector_new(gint64 max_slots,
-                                     GError **error);
-
+ggandiva_uint32_selection_vector_new(gint64 max_slots, GError **error);
 
-#define GGANDIVA_TYPE_UINT64_SELECTION_VECTOR   \
+#define GGANDIVA_TYPE_UINT64_SELECTION_VECTOR                                            \
   (ggandiva_uint64_selection_vector_get_type())
 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt64SelectionVector,
                          ggandiva_uint64_selection_vector,
@@ -121,8 +112,6 @@ struct _GGandivaUInt64SelectionVectorClass
 
 GGANDIVA_AVAILABLE_IN_4_0
 GGandivaUInt64SelectionVector *
-ggandiva_uint64_selection_vector_new(gint64 max_slots,
-                                     GError **error);
-
+ggandiva_uint64_selection_vector_new(gint64 max_slots, GError **error);
 
 G_END_DECLS
diff --git a/c_glib/parquet-glib/arrow-file-reader.cpp b/c_glib/parquet-glib/arrow-file-reader.cpp
index f7a4b09f814ac..4996d7862713e 100644
--- a/c_glib/parquet-glib/arrow-file-reader.cpp
+++ b/c_glib/parquet-glib/arrow-file-reader.cpp
@@ -36,7 +36,8 @@ G_BEGIN_DECLS
  * from file and returns them as Apache Arrow data.
  */
 
-typedef struct GParquetArrowFileReaderPrivate_ {
+typedef struct GParquetArrowFileReaderPrivate_
+{
   parquet::arrow::FileReader *arrow_file_reader;
 } GParquetArrowFileReaderPrivate;
 
@@ -49,10 +50,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GParquetArrowFileReader,
                            gparquet_arrow_file_reader,
                            G_TYPE_OBJECT)
 
-#define GPARQUET_ARROW_FILE_READER_GET_PRIVATE(obj)     \
-  static_cast<GParquetArrowFileReaderPrivate *>(        \
-     gparquet_arrow_file_reader_get_instance_private(   \
-       GPARQUET_ARROW_FILE_READER(obj)))
+#define GPARQUET_ARROW_FILE_READER_GET_PRIVATE(obj)                                      \
+  static_cast<GParquetArrowFileReaderPrivate *>(                                         \
+    gparquet_arrow_file_reader_get_instance_private(GPARQUET_ARROW_FILE_READER(obj)))
 
 static void
 gparquet_arrow_file_reader_finalize(GObject *object)
@@ -108,15 +108,15 @@ gparquet_arrow_file_reader_class_init(GParquetArrowFileReaderClass *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gparquet_arrow_file_reader_finalize;
+  gobject_class->finalize = gparquet_arrow_file_reader_finalize;
   gobject_class->set_property = gparquet_arrow_file_reader_set_property;
   gobject_class->get_property = gparquet_arrow_file_reader_get_property;
 
-  spec = g_param_spec_pointer("arrow-file-reader",
-                              "ArrowFileReader",
-                              "The raw parquet::arrow::FileReader *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "arrow-file-reader",
+    "ArrowFileReader",
+    "The raw parquet::arrow::FileReader *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_ARROW_FILE_READER, spec);
 }
 
@@ -130,19 +130,15 @@ gparquet_arrow_file_reader_class_init(GParquetArrowFileReaderClass *klass)
  * Since: 0.11.0
  */
 GParquetArrowFileReader *
-gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source,
-                                     GError **error)
+gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source, GError **error)
 {
-  auto arrow_random_access_file =
-    garrow_seekable_input_stream_get_raw(source);
+  auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(source);
   auto arrow_memory_pool = arrow::default_memory_pool();
   std::unique_ptr<parquet::arrow::FileReader> parquet_arrow_file_reader;
   auto status = parquet::arrow::OpenFile(arrow_random_access_file,
                                          arrow_memory_pool,
                                          &parquet_arrow_file_reader);
-  if (garrow_error_check(error,
-                         status,
-                         "[parquet][arrow][file-reader][new-arrow]")) {
+  if (garrow_error_check(error, status, "[parquet][arrow][file-reader][new-arrow]")) {
     return gparquet_arrow_file_reader_new_raw(parquet_arrow_file_reader.release());
   } else {
     return NULL;
@@ -159,8 +155,7 @@ gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source,
  * Since: 0.11.0
  */
 GParquetArrowFileReader *
-gparquet_arrow_file_reader_new_path(const gchar *path,
-                                    GError **error)
+gparquet_arrow_file_reader_new_path(const gchar *path, GError **error)
 {
   auto arrow_memory_mapped_file =
     arrow::io::MemoryMappedFile::Open(path, arrow::io::FileMode::READ);
@@ -177,9 +172,7 @@ gparquet_arrow_file_reader_new_path(const gchar *path,
   auto status = parquet::arrow::OpenFile(arrow_random_access_file,
                                          arrow_memory_pool,
                                          &parquet_arrow_file_reader);
-  if (garrow::check(error,
-                    status,
-                    "[parquet][arrow][file-reader][new-path]")) {
+  if (garrow::check(error, status, "[parquet][arrow][file-reader][new-path]")) {
     return gparquet_arrow_file_reader_new_raw(parquet_arrow_file_reader.release());
   } else {
     return NULL;
@@ -196,15 +189,12 @@ gparquet_arrow_file_reader_new_path(const gchar *path,
  * Since: 0.11.0
  */
 GArrowTable *
-gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader,
-                                      GError **error)
+gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader, GError **error)
 {
   auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader);
   std::shared_ptr<arrow::Table> arrow_table;
   auto status = parquet_arrow_file_reader->ReadTable(&arrow_table);
-  if (garrow_error_check(error,
-                         status,
-                         "[parquet][arrow][file-reader][read-table]")) {
+  if (garrow_error_check(error, status, "[parquet][arrow][file-reader][read-table]")) {
     return garrow_table_new_raw(&arrow_table);
   } else {
     return NULL;
@@ -234,7 +224,7 @@ gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader,
                                           GError **error)
 {
   const gchar *tag = "[parquet][arrow][file-reader][read-row-group]";
-    auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader);
+  auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader);
   std::shared_ptr<arrow::Table> arrow_table;
   arrow::Status status;
   if (column_indices) {
@@ -246,24 +236,24 @@ gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader,
       if (!garrow_internal_index_adjust(column_index, n_columns)) {
         garrow_error_check(error,
                            arrow::Status::IndexError("Out of index: "
-                                                     "<0..", n_columns, ">: "
-                                                     "<", column_index, ">"),
+                                                     "<0..",
+                                                     n_columns,
+                                                     ">: "
+                                                     "<",
+                                                     column_index,
+                                                     ">"),
                            tag);
         return NULL;
       }
       parquet_column_indices.push_back(column_index);
     }
-    status =
-      parquet_arrow_file_reader->ReadRowGroup(row_group_index,
-                                              parquet_column_indices,
-                                              &arrow_table);
+    status = parquet_arrow_file_reader->ReadRowGroup(row_group_index,
+                                                     parquet_column_indices,
+                                                     &arrow_table);
   } else {
-    status =
-      parquet_arrow_file_reader->ReadRowGroup(row_group_index, &arrow_table);
+    status = parquet_arrow_file_reader->ReadRowGroup(row_group_index, &arrow_table);
   }
-  if (garrow_error_check(error,
-                         status,
-                         tag)) {
+  if (garrow_error_check(error, status, tag)) {
     return garrow_table_new_raw(&arrow_table);
   } else {
     return NULL;
@@ -280,16 +270,13 @@ gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader,
  * Since: 0.12.0
  */
 GArrowSchema *
-gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader,
-                                      GError **error)
+gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader, GError **error)
 {
   auto parquet_arrow_file_reader = gparquet_arrow_file_reader_get_raw(reader);
 
   std::shared_ptr<arrow::Schema> arrow_schema;
   auto status = parquet_arrow_file_reader->GetSchema(&arrow_schema);
-  if (garrow_error_check(error,
-                         status,
-                         "[parquet][arrow][file-reader][get-schema]")) {
+  if (garrow_error_check(error, status, "[parquet][arrow][file-reader][get-schema]")) {
     return garrow_schema_new_raw(&arrow_schema);
   } else {
     return NULL;
@@ -321,15 +308,18 @@ gparquet_arrow_file_reader_read_column_data(GParquetArrowFileReader *reader,
   if (!garrow_internal_index_adjust(i, n_columns)) {
     garrow_error_check(error,
                        arrow::Status::IndexError("Out of index: "
-                                                 "<0..", n_columns, ">: "
-                                                 "<", i, ">"),
+                                                 "<0..",
+                                                 n_columns,
+                                                 ">: "
+                                                 "<",
+                                                 i,
+                                                 ">"),
                        tag);
     return NULL;
   }
 
   std::shared_ptr<arrow::ChunkedArray> arrow_chunked_array;
-  auto status =
-    parquet_arrow_file_reader->ReadColumn(i, &arrow_chunked_array);
+  auto status = parquet_arrow_file_reader->ReadColumn(i, &arrow_chunked_array);
   if (!garrow_error_check(error, status, tag)) {
     return NULL;
   }
@@ -405,7 +395,8 @@ gparquet_arrow_file_reader_new_raw(parquet::arrow::FileReader *parquet_arrow_fil
 {
   auto arrow_file_reader =
     GPARQUET_ARROW_FILE_READER(g_object_new(GPARQUET_TYPE_ARROW_FILE_READER,
-                                            "arrow-file-reader", parquet_arrow_file_reader,
+                                            "arrow-file-reader",
+                                            parquet_arrow_file_reader,
                                             NULL));
   return arrow_file_reader;
 }
diff --git a/c_glib/parquet-glib/arrow-file-reader.h b/c_glib/parquet-glib/arrow-file-reader.h
index da234f47c5daf..63c14ac71da86 100644
--- a/c_glib/parquet-glib/arrow-file-reader.h
+++ b/c_glib/parquet-glib/arrow-file-reader.h
@@ -35,15 +35,12 @@ struct _GParquetArrowFileReaderClass
 };
 
 GParquetArrowFileReader *
-gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source,
-                                     GError **error);
+gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source, GError **error);
 GParquetArrowFileReader *
-gparquet_arrow_file_reader_new_path(const gchar *path,
-                                    GError **error);
+gparquet_arrow_file_reader_new_path(const gchar *path, GError **error);
 
 GArrowTable *
-gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader,
-                                      GError **error);
+gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader, GError **error);
 
 GARROW_AVAILABLE_IN_1_0
 GArrowTable *
@@ -54,8 +51,7 @@ gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader,
                                           GError **error);
 
 GArrowSchema *
-gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader,
-                                      GError **error);
+gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader, GError **error);
 
 GArrowChunkedArray *
 gparquet_arrow_file_reader_read_column_data(GParquetArrowFileReader *reader,
diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp b/c_glib/parquet-glib/arrow-file-writer.cpp
index f923edbf5e5c8..b6f019ed27d46 100644
--- a/c_glib/parquet-glib/arrow-file-writer.cpp
+++ b/c_glib/parquet-glib/arrow-file-writer.cpp
@@ -34,7 +34,8 @@ G_BEGIN_DECLS
  * file as Apache Parquet format.
  */
 
-typedef struct GParquetWriterPropertiesPrivate_ {
+typedef struct GParquetWriterPropertiesPrivate_
+{
   std::shared_ptr<parquet::WriterProperties> properties;
   parquet::WriterProperties::Builder *builder;
   gboolean changed;
@@ -44,10 +45,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GParquetWriterProperties,
                            gparquet_writer_properties,
                            G_TYPE_OBJECT)
 
-#define GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(object) \
-  static_cast<GParquetWriterPropertiesPrivate *>(      \
-    gparquet_writer_properties_get_instance_private(   \
-      GPARQUET_WRITER_PROPERTIES(object)))
+#define GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(object)                                   \
+  static_cast<GParquetWriterPropertiesPrivate *>(                                        \
+    gparquet_writer_properties_get_instance_private(GPARQUET_WRITER_PROPERTIES(object)))
 
 static void
 gparquet_writer_properties_finalize(GObject *object)
@@ -64,7 +64,7 @@ static void
 gparquet_writer_properties_init(GParquetWriterProperties *object)
 {
   auto priv = GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(object);
-  new(&priv->properties) std::shared_ptr<parquet::WriterProperties>;
+  new (&priv->properties) std::shared_ptr<parquet::WriterProperties>;
   priv->builder = new parquet::WriterProperties::Builder();
   priv->changed = TRUE;
 }
@@ -74,7 +74,7 @@ gparquet_writer_properties_class_init(GParquetWriterPropertiesClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gparquet_writer_properties_finalize;
+  gobject_class->finalize = gparquet_writer_properties_finalize;
 }
 
 /**
@@ -87,8 +87,7 @@ gparquet_writer_properties_class_init(GParquetWriterPropertiesClass *klass)
 GParquetWriterProperties *
 gparquet_writer_properties_new(void)
 {
-  auto writer_properties = g_object_new(GPARQUET_TYPE_WRITER_PROPERTIES,
-                                        NULL);
+  auto writer_properties = g_object_new(GPARQUET_TYPE_WRITER_PROPERTIES, NULL);
   return GPARQUET_WRITER_PROPERTIES(writer_properties);
 }
 
@@ -200,8 +199,8 @@ gparquet_writer_properties_is_dictionary_enabled(GParquetWriterProperties *prope
  * Since: 0.17.0
  */
 void
-gparquet_writer_properties_set_dictionary_page_size_limit(GParquetWriterProperties *properties,
-                                                          gint64 limit)
+gparquet_writer_properties_set_dictionary_page_size_limit(
+  GParquetWriterProperties *properties, gint64 limit)
 {
   auto priv = GPARQUET_WRITER_PROPERTIES_GET_PRIVATE(properties);
   priv->builder->dictionary_pagesize_limit(limit);
@@ -217,7 +216,8 @@ gparquet_writer_properties_set_dictionary_page_size_limit(GParquetWriterProperti
  * Since: 0.17.0
  */
 gint64
-gparquet_writer_properties_get_dictionary_page_size_limit(GParquetWriterProperties *properties)
+gparquet_writer_properties_get_dictionary_page_size_limit(
+  GParquetWriterProperties *properties)
 {
   auto parquet_properties = gparquet_writer_properties_get_raw(properties);
   return parquet_properties->dictionary_pagesize_limit();
@@ -316,8 +316,8 @@ gparquet_writer_properties_get_data_page_size(GParquetWriterProperties *properti
   return parquet_properties->data_pagesize();
 }
 
-
-typedef struct GParquetArrowFileWriterPrivate_ {
+typedef struct GParquetArrowFileWriterPrivate_
+{
   parquet::arrow::FileWriter *arrow_file_writer;
 } GParquetArrowFileWriterPrivate;
 
@@ -330,10 +330,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GParquetArrowFileWriter,
                            gparquet_arrow_file_writer,
                            G_TYPE_OBJECT)
 
-#define GPARQUET_ARROW_FILE_WRITER_GET_PRIVATE(obj)     \
-  static_cast<GParquetArrowFileWriterPrivate *>(        \
-     gparquet_arrow_file_writer_get_instance_private(   \
-       GPARQUET_ARROW_FILE_WRITER(obj)))
+#define GPARQUET_ARROW_FILE_WRITER_GET_PRIVATE(obj)                                      \
+  static_cast<GParquetArrowFileWriterPrivate *>(                                         \
+    gparquet_arrow_file_writer_get_instance_private(GPARQUET_ARROW_FILE_WRITER(obj)))
 
 static void
 gparquet_arrow_file_writer_finalize(GObject *object)
@@ -389,15 +388,15 @@ gparquet_arrow_file_writer_class_init(GParquetArrowFileWriterClass *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
-  gobject_class->finalize     = gparquet_arrow_file_writer_finalize;
+  gobject_class->finalize = gparquet_arrow_file_writer_finalize;
   gobject_class->set_property = gparquet_arrow_file_writer_set_property;
   gobject_class->get_property = gparquet_arrow_file_writer_get_property;
 
-  spec = g_param_spec_pointer("arrow-file-writer",
-                              "ArrowFileWriter",
-                              "The raw std::shared<parquet::arrow::FileWriter> *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "arrow-file-writer",
+    "ArrowFileWriter",
+    "The raw std::shared<parquet::arrow::FileWriter> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_ARROW_FILE_WRITER, spec);
 }
 
@@ -424,7 +423,8 @@ gparquet_arrow_file_writer_new_arrow(GArrowSchema *schema,
   std::unique_ptr<parquet::arrow::FileWriter> parquet_arrow_file_writer;
   arrow::Result<std::unique_ptr<parquet::arrow::FileWriter>> maybe_writer;
   if (writer_properties) {
-    auto parquet_writer_properties = gparquet_writer_properties_get_raw(writer_properties);
+    auto parquet_writer_properties =
+      gparquet_writer_properties_get_raw(writer_properties);
     maybe_writer = parquet::arrow::FileWriter::Open(*arrow_schema,
                                                     arrow_memory_pool,
                                                     arrow_output_stream,
@@ -436,9 +436,7 @@ gparquet_arrow_file_writer_new_arrow(GArrowSchema *schema,
                                                     arrow_output_stream,
                                                     parquet_writer_properties);
   }
-  if (garrow::check(error,
-                    maybe_writer,
-                    "[parquet][arrow][file-writer][new-arrow]")) {
+  if (garrow::check(error, maybe_writer, "[parquet][arrow][file-writer][new-arrow]")) {
     parquet_arrow_file_writer = std::move(*maybe_writer);
     return gparquet_arrow_file_writer_new_raw(parquet_arrow_file_writer.release());
   } else {
@@ -463,8 +461,7 @@ gparquet_arrow_file_writer_new_path(GArrowSchema *schema,
                                     GParquetWriterProperties *writer_properties,
                                     GError **error)
 {
-  auto arrow_file_output_stream =
-    arrow::io::FileOutputStream::Open(path, false);
+  auto arrow_file_output_stream = arrow::io::FileOutputStream::Open(path, false);
   if (!garrow::check(error,
                      arrow_file_output_stream,
                      "[parquet][arrow][file-writer][new-path]")) {
@@ -478,7 +475,8 @@ gparquet_arrow_file_writer_new_path(GArrowSchema *schema,
   std::unique_ptr<parquet::arrow::FileWriter> parquet_arrow_file_writer;
   arrow::Result<std::unique_ptr<parquet::arrow::FileWriter>> maybe_writer;
   if (writer_properties) {
-    auto parquet_writer_properties = gparquet_writer_properties_get_raw(writer_properties);
+    auto parquet_writer_properties =
+      gparquet_writer_properties_get_raw(writer_properties);
     maybe_writer = parquet::arrow::FileWriter::Open(*arrow_schema,
                                                     arrow_memory_pool,
                                                     arrow_output_stream,
@@ -490,9 +488,7 @@ gparquet_arrow_file_writer_new_path(GArrowSchema *schema,
                                                     arrow_output_stream,
                                                     parquet_writer_properties);
   }
-  if (garrow::check(error,
-                    maybe_writer,
-                    "[parquet][arrow][file-writer][new-path]")) {
+  if (garrow::check(error, maybe_writer, "[parquet][arrow][file-writer][new-path]")) {
     parquet_arrow_file_writer = std::move(*maybe_writer);
     return gparquet_arrow_file_writer_new_raw(parquet_arrow_file_writer.release());
   } else {
@@ -520,9 +516,7 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
   auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
   auto arrow_table = garrow_table_get_raw(table).get();
   auto status = parquet_arrow_file_writer->WriteTable(*arrow_table, chunk_size);
-  return garrow_error_check(error,
-                            status,
-                            "[parquet][arrow][file-writer][write-table]");
+  return garrow_error_check(error, status, "[parquet][arrow][file-writer][write-table]");
 }
 
 /**
@@ -535,17 +529,13 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
  * Since: 0.11.0
  */
 gboolean
-gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer,
-                                 GError **error)
+gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer, GError **error)
 {
   auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
   auto status = parquet_arrow_file_writer->Close();
-  return garrow_error_check(error,
-                            status,
-                            "[parquet][arrow][file-writer][close]");
+  return garrow_error_check(error, status, "[parquet][arrow][file-writer][close]");
 }
 
-
 G_END_DECLS
 
 GParquetArrowFileWriter *
@@ -553,7 +543,8 @@ gparquet_arrow_file_writer_new_raw(parquet::arrow::FileWriter *parquet_arrow_fil
 {
   auto arrow_file_writer =
     GPARQUET_ARROW_FILE_WRITER(g_object_new(GPARQUET_TYPE_ARROW_FILE_WRITER,
-                                            "arrow-file-writer", parquet_arrow_file_writer,
+                                            "arrow-file-writer",
+                                            parquet_arrow_file_writer,
                                             NULL));
   return arrow_file_writer;
 }
diff --git a/c_glib/parquet-glib/arrow-file-writer.h b/c_glib/parquet-glib/arrow-file-writer.h
index 67083a0743e1f..592ea4ae3f1ba 100644
--- a/c_glib/parquet-glib/arrow-file-writer.h
+++ b/c_glib/parquet-glib/arrow-file-writer.h
@@ -23,8 +23,7 @@
 
 G_BEGIN_DECLS
 
-#define GPARQUET_TYPE_WRITER_PROPERTIES         \
-  (gparquet_writer_properties_get_type())
+#define GPARQUET_TYPE_WRITER_PROPERTIES (gparquet_writer_properties_get_type())
 G_DECLARE_DERIVABLE_TYPE(GParquetWriterProperties,
                          gparquet_writer_properties,
                          GPARQUET,
@@ -36,7 +35,8 @@ struct _GParquetWriterPropertiesClass
 };
 
 GARROW_AVAILABLE_IN_0_17
-GParquetWriterProperties *gparquet_writer_properties_new(void);
+GParquetWriterProperties *
+gparquet_writer_properties_new(void);
 GARROW_AVAILABLE_IN_0_17
 void
 gparquet_writer_properties_set_compression(GParquetWriterProperties *properties,
@@ -60,11 +60,12 @@ gparquet_writer_properties_is_dictionary_enabled(GParquetWriterProperties *prope
                                                  const gchar *path);
 GARROW_AVAILABLE_IN_0_17
 void
-gparquet_writer_properties_set_dictionary_page_size_limit(GParquetWriterProperties *properties,
-                                                          gint64 limit);
+gparquet_writer_properties_set_dictionary_page_size_limit(
+  GParquetWriterProperties *properties, gint64 limit);
 GARROW_AVAILABLE_IN_0_17
 gint64
-gparquet_writer_properties_get_dictionary_page_size_limit(GParquetWriterProperties *properties);
+gparquet_writer_properties_get_dictionary_page_size_limit(
+  GParquetWriterProperties *properties);
 GARROW_AVAILABLE_IN_0_17
 void
 gparquet_writer_properties_set_batch_size(GParquetWriterProperties *properties,
@@ -87,7 +88,6 @@ GARROW_AVAILABLE_IN_0_17
 gint64
 gparquet_writer_properties_get_data_page_size(GParquetWriterProperties *properties);
 
-
 #define GPARQUET_TYPE_ARROW_FILE_WRITER (gparquet_arrow_file_writer_get_type())
 G_DECLARE_DERIVABLE_TYPE(GParquetArrowFileWriter,
                          gparquet_arrow_file_writer,
@@ -117,7 +117,6 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
                                        GError **error);
 
 gboolean
-gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer,
-                                 GError **error);
+gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer, GError **error);
 
 G_END_DECLS
diff --git a/c_glib/parquet-glib/metadata.cpp b/c_glib/parquet-glib/metadata.cpp
index 54583340ad91b..9e18decb3fd63 100644
--- a/c_glib/parquet-glib/metadata.cpp
+++ b/c_glib/parquet-glib/metadata.cpp
@@ -36,7 +36,8 @@ G_BEGIN_DECLS
  * #GParquetFileMetadata is a class for file level metadata.
  */
 
-struct GParquetColumnChunkMetadataPrivate {
+struct GParquetColumnChunkMetadataPrivate
+{
   parquet::ColumnChunkMetaData *metadata;
   GParquetRowGroupMetadata *owner;
 };
@@ -50,9 +51,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GParquetColumnChunkMetadata,
                            gparquet_column_chunk_metadata,
                            G_TYPE_OBJECT)
 
-#define GPARQUET_COLUMN_CHUNK_METADATA_GET_PRIVATE(object)      \
-  static_cast<GParquetColumnChunkMetadataPrivate *>(            \
-    gparquet_column_chunk_metadata_get_instance_private(        \
+#define GPARQUET_COLUMN_CHUNK_METADATA_GET_PRIVATE(object)                               \
+  static_cast<GParquetColumnChunkMetadataPrivate *>(                                     \
+    gparquet_column_chunk_metadata_get_instance_private(                                 \
       GPARQUET_COLUMN_CHUNK_METADATA(object)))
 
 static void
@@ -94,27 +95,26 @@ gparquet_column_chunk_metadata_init(GParquetColumnChunkMetadata *object)
 }
 
 static void
-gparquet_column_chunk_metadata_class_init(
-  GParquetColumnChunkMetadataClass *klass)
+gparquet_column_chunk_metadata_class_init(GParquetColumnChunkMetadataClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
   gobject_class->dispose = gparquet_column_chunk_metadata_dispose;
   gobject_class->set_property = gparquet_column_chunk_metadata_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("metadata",
-                              "Metadata",
-                              "The raw parquet::ColumnChunkMetaData *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "metadata",
+    "Metadata",
+    "The raw parquet::ColumnChunkMetaData *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_METADATA, spec);
 
-  spec = g_param_spec_object("owner",
-                             "Owner",
-                             "The row group metadata that owns this metadata",
-                             GPARQUET_TYPE_ROW_GROUP_METADATA,
-                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "owner",
+    "Owner",
+    "The row group metadata that owns this metadata",
+    GPARQUET_TYPE_ROW_GROUP_METADATA,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_OWNER, spec);
 }
 
@@ -133,8 +133,7 @@ gparquet_column_chunk_metadata_equal(GParquetColumnChunkMetadata *metadata,
                                      GParquetColumnChunkMetadata *other_metadata)
 {
   auto parquet_metadata = gparquet_column_chunk_metadata_get_raw(metadata);
-  auto parquet_other_metadata =
-    gparquet_column_chunk_metadata_get_raw(other_metadata);
+  auto parquet_other_metadata = gparquet_column_chunk_metadata_get_raw(other_metadata);
   return parquet_metadata->Equals(*parquet_other_metadata);
 }
 
@@ -181,8 +180,7 @@ gparquet_column_chunk_metadata_get_total_compressed_size(
  * Since: 8.0.0
  */
 gint64
-gparquet_column_chunk_metadata_get_file_offset(
-  GParquetColumnChunkMetadata *metadata)
+gparquet_column_chunk_metadata_get_file_offset(GParquetColumnChunkMetadata *metadata)
 {
   auto parquet_metadata = gparquet_column_chunk_metadata_get_raw(metadata);
   return parquet_metadata->file_offset();
@@ -198,8 +196,7 @@ gparquet_column_chunk_metadata_get_file_offset(
  * Since: 8.0.0
  */
 gboolean
-gparquet_column_chunk_metadata_can_decompress(
-  GParquetColumnChunkMetadata *metadata)
+gparquet_column_chunk_metadata_can_decompress(GParquetColumnChunkMetadata *metadata)
 {
   auto parquet_metadata = gparquet_column_chunk_metadata_get_raw(metadata);
   return parquet_metadata->can_decompress();
@@ -215,8 +212,7 @@ gparquet_column_chunk_metadata_can_decompress(
  * Since: 8.0.0
  */
 GParquetStatistics *
-gparquet_column_chunk_metadata_get_statistics(
-  GParquetColumnChunkMetadata *metadata)
+gparquet_column_chunk_metadata_get_statistics(GParquetColumnChunkMetadata *metadata)
 {
   auto parquet_metadata = gparquet_column_chunk_metadata_get_raw(metadata);
   auto parquet_statistics = parquet_metadata->statistics();
@@ -227,8 +223,8 @@ gparquet_column_chunk_metadata_get_statistics(
   }
 }
 
-
-struct GParquetRowGroupMetadataPrivate {
+struct GParquetRowGroupMetadataPrivate
+{
   parquet::RowGroupMetaData *metadata;
   GParquetFileMetadata *owner;
 };
@@ -237,9 +233,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GParquetRowGroupMetadata,
                            gparquet_row_group_metadata,
                            G_TYPE_OBJECT)
 
-#define GPARQUET_ROW_GROUP_METADATA_GET_PRIVATE(object)      \
-  static_cast<GParquetRowGroupMetadataPrivate *>(            \
-    gparquet_row_group_metadata_get_instance_private(        \
+#define GPARQUET_ROW_GROUP_METADATA_GET_PRIVATE(object)                                  \
+  static_cast<GParquetRowGroupMetadataPrivate *>(                                        \
+    gparquet_row_group_metadata_get_instance_private(                                    \
       GPARQUET_ROW_GROUP_METADATA(object)))
 
 static void
@@ -263,8 +259,7 @@ gparquet_row_group_metadata_set_property(GObject *object,
 
   switch (prop_id) {
   case PROP_METADATA:
-    priv->metadata =
-      static_cast<parquet::RowGroupMetaData *>(g_value_get_pointer(value));
+    priv->metadata = static_cast<parquet::RowGroupMetaData *>(g_value_get_pointer(value));
     break;
   case PROP_OWNER:
     priv->owner = GPARQUET_FILE_METADATA(g_value_dup_object(value));
@@ -288,18 +283,19 @@ gparquet_row_group_metadata_class_init(GParquetRowGroupMetadataClass *klass)
   gobject_class->set_property = gparquet_row_group_metadata_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("metadata", "Metadata",
-                              "The raw parquet::RowGroupMetaData *",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "metadata",
+    "Metadata",
+    "The raw parquet::RowGroupMetaData *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_METADATA, spec);
 
-  spec = g_param_spec_object("owner",
-                             "Owner",
-                             "The file group metadata that owns this metadata",
-                             GPARQUET_TYPE_FILE_METADATA,
-                             static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                      G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_object(
+    "owner",
+    "Owner",
+    "The file group metadata that owns this metadata",
+    GPARQUET_TYPE_FILE_METADATA,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_OWNER, spec);
 }
 
@@ -318,8 +314,7 @@ gparquet_row_group_metadata_equal(GParquetRowGroupMetadata *metadata,
                                   GParquetRowGroupMetadata *other_metadata)
 {
   auto parquet_metadata = gparquet_row_group_metadata_get_raw(metadata);
-  auto parquet_other_metadata =
-    gparquet_row_group_metadata_get_raw(other_metadata);
+  auto parquet_other_metadata = gparquet_row_group_metadata_get_raw(other_metadata);
   return parquet_metadata->Equals(*parquet_other_metadata);
 }
 
@@ -363,12 +358,9 @@ gparquet_row_group_metadata_get_column_chunk(GParquetRowGroupMetadata *metadata,
     return arrow::Status::OK();
     END_PARQUET_CATCH_EXCEPTIONS
   })();
-  if (garrow::check(error,
-                    status,
-                    "[parquet][row-group-metadata][get-column-chunk]")) {
-    return gparquet_column_chunk_metadata_new_raw(
-      parquet_column_chunk_metadata.release(),
-      metadata);
+  if (garrow::check(error, status, "[parquet][row-group-metadata][get-column-chunk]")) {
+    return gparquet_column_chunk_metadata_new_raw(parquet_column_chunk_metadata.release(),
+                                                  metadata);
   } else {
     return NULL;
   }
@@ -415,8 +407,7 @@ gparquet_row_group_metadata_get_total_size(GParquetRowGroupMetadata *metadata)
  * Since: 8.0.0
  */
 gint64
-gparquet_row_group_metadata_get_total_compressed_size(
-  GParquetRowGroupMetadata *metadata)
+gparquet_row_group_metadata_get_total_compressed_size(GParquetRowGroupMetadata *metadata)
 {
   auto parquet_metadata = gparquet_row_group_metadata_get_raw(metadata);
   return parquet_metadata->total_compressed_size();
@@ -458,19 +449,16 @@ gparquet_row_group_metadata_can_decompress(GParquetRowGroupMetadata *metadata)
   return parquet_metadata->can_decompress();
 }
 
-
-struct GParquetFileMetadataPrivate {
+struct GParquetFileMetadataPrivate
+{
   std::shared_ptr<parquet::FileMetaData> metadata;
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GParquetFileMetadata,
-                           gparquet_file_metadata,
-                           G_TYPE_OBJECT)
+G_DEFINE_TYPE_WITH_PRIVATE(GParquetFileMetadata, gparquet_file_metadata, G_TYPE_OBJECT)
 
-#define GPARQUET_FILE_METADATA_GET_PRIVATE(object)      \
-  static_cast<GParquetFileMetadataPrivate *>(           \
-    gparquet_file_metadata_get_instance_private(        \
-      GPARQUET_FILE_METADATA(object)))
+#define GPARQUET_FILE_METADATA_GET_PRIVATE(object)                                       \
+  static_cast<GParquetFileMetadataPrivate *>(                                            \
+    gparquet_file_metadata_get_instance_private(GPARQUET_FILE_METADATA(object)))
 
 static void
 gparquet_file_metadata_finalize(GObject *object)
@@ -491,8 +479,7 @@ gparquet_file_metadata_set_property(GObject *object,
   switch (prop_id) {
   case PROP_METADATA:
     priv->metadata =
-      *static_cast<std::shared_ptr<parquet::FileMetaData> *>(
-        g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<parquet::FileMetaData> *>(g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -504,7 +491,7 @@ static void
 gparquet_file_metadata_init(GParquetFileMetadata *object)
 {
   auto priv = GPARQUET_FILE_METADATA_GET_PRIVATE(object);
-  new(&priv->metadata) std::shared_ptr<parquet::FileMetaData>;
+  new (&priv->metadata) std::shared_ptr<parquet::FileMetaData>;
 }
 
 static void
@@ -515,11 +502,11 @@ gparquet_file_metadata_class_init(GParquetFileMetadataClass *klass)
   gobject_class->set_property = gparquet_file_metadata_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("metadata",
-                              "Metadata",
-                              "The raw std::shared_ptr<parquet::FileMetaData>",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "metadata",
+    "Metadata",
+    "The raw std::shared_ptr<parquet::FileMetaData>",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_METADATA, spec);
 }
 
@@ -692,21 +679,19 @@ gparquet_file_metadata_can_decompress(GParquetFileMetadata *metadata)
   return parquet_metadata->can_decompress();
 }
 
-
 G_END_DECLS
 
-
 GParquetColumnChunkMetadata *
-gparquet_column_chunk_metadata_new_raw(
-  parquet::ColumnChunkMetaData *parquet_metadata,
-  GParquetRowGroupMetadata *owner)
+gparquet_column_chunk_metadata_new_raw(parquet::ColumnChunkMetaData *parquet_metadata,
+                                       GParquetRowGroupMetadata *owner)
 {
   auto metadata =
-    GPARQUET_COLUMN_CHUNK_METADATA(
-      g_object_new(GPARQUET_TYPE_COLUMN_CHUNK_METADATA,
-                   "metadata", parquet_metadata,
-                   "owner", owner,
-                   NULL));
+    GPARQUET_COLUMN_CHUNK_METADATA(g_object_new(GPARQUET_TYPE_COLUMN_CHUNK_METADATA,
+                                                "metadata",
+                                                parquet_metadata,
+                                                "owner",
+                                                owner,
+                                                NULL));
   return metadata;
 }
 
@@ -717,15 +702,16 @@ gparquet_column_chunk_metadata_get_raw(GParquetColumnChunkMetadata *metadata)
   return priv->metadata;
 }
 
-
 GParquetRowGroupMetadata *
 gparquet_row_group_metadata_new_raw(parquet::RowGroupMetaData *parquet_metadata,
                                     GParquetFileMetadata *owner)
 {
   auto metadata =
     GPARQUET_ROW_GROUP_METADATA(g_object_new(GPARQUET_TYPE_ROW_GROUP_METADATA,
-                                             "metadata", parquet_metadata,
-                                             "owner", owner,
+                                             "metadata",
+                                             parquet_metadata,
+                                             "owner",
+                                             owner,
                                              NULL));
   return metadata;
 }
@@ -737,15 +723,11 @@ gparquet_row_group_metadata_get_raw(GParquetRowGroupMetadata *metadata)
   return priv->metadata;
 }
 
-
 GParquetFileMetadata *
-gparquet_file_metadata_new_raw(
-  std::shared_ptr<parquet::FileMetaData> *parquet_metadata)
+gparquet_file_metadata_new_raw(std::shared_ptr<parquet::FileMetaData> *parquet_metadata)
 {
-  auto metadata =
-    GPARQUET_FILE_METADATA(g_object_new(GPARQUET_TYPE_FILE_METADATA,
-                                        "metadata", parquet_metadata,
-                                        NULL));
+  auto metadata = GPARQUET_FILE_METADATA(
+    g_object_new(GPARQUET_TYPE_FILE_METADATA, "metadata", parquet_metadata, NULL));
   return metadata;
 }
 
diff --git a/c_glib/parquet-glib/metadata.h b/c_glib/parquet-glib/metadata.h
index 8759bdedf034d..1c9fce7cc778d 100644
--- a/c_glib/parquet-glib/metadata.h
+++ b/c_glib/parquet-glib/metadata.h
@@ -23,8 +23,7 @@
 
 G_BEGIN_DECLS
 
-#define GPARQUET_TYPE_COLUMN_CHUNK_METADATA     \
-  (gparquet_column_chunk_metadata_get_type())
+#define GPARQUET_TYPE_COLUMN_CHUNK_METADATA (gparquet_column_chunk_metadata_get_type())
 G_DECLARE_DERIVABLE_TYPE(GParquetColumnChunkMetadata,
                          gparquet_column_chunk_metadata,
                          GPARQUET,
@@ -37,30 +36,24 @@ struct _GParquetColumnChunkMetadataClass
 
 GARROW_AVAILABLE_IN_8_0
 gboolean
-gparquet_column_chunk_metadata_equal(
-  GParquetColumnChunkMetadata *metadata,
-  GParquetColumnChunkMetadata *other_metadata);
+gparquet_column_chunk_metadata_equal(GParquetColumnChunkMetadata *metadata,
+                                     GParquetColumnChunkMetadata *other_metadata);
 GARROW_AVAILABLE_IN_8_0
 gint64
-gparquet_column_chunk_metadata_get_total_size(
-  GParquetColumnChunkMetadata *metadata);
+gparquet_column_chunk_metadata_get_total_size(GParquetColumnChunkMetadata *metadata);
 GARROW_AVAILABLE_IN_8_0
 gint64
 gparquet_column_chunk_metadata_get_total_compressed_size(
   GParquetColumnChunkMetadata *metadata);
 GARROW_AVAILABLE_IN_8_0
 gint64
-gparquet_column_chunk_metadata_get_file_offset(
-  GParquetColumnChunkMetadata *metadata);
+gparquet_column_chunk_metadata_get_file_offset(GParquetColumnChunkMetadata *metadata);
 GARROW_AVAILABLE_IN_8_0
 gboolean
-gparquet_column_chunk_metadata_can_decompress(
-  GParquetColumnChunkMetadata *metadata);
+gparquet_column_chunk_metadata_can_decompress(GParquetColumnChunkMetadata *metadata);
 GARROW_AVAILABLE_IN_8_0
 GParquetStatistics *
-gparquet_column_chunk_metadata_get_statistics(
-  GParquetColumnChunkMetadata *metadata);
-
+gparquet_column_chunk_metadata_get_statistics(GParquetColumnChunkMetadata *metadata);
 
 #define GPARQUET_TYPE_ROW_GROUP_METADATA (gparquet_row_group_metadata_get_type())
 G_DECLARE_DERIVABLE_TYPE(GParquetRowGroupMetadata,
@@ -90,27 +83,20 @@ gint64
 gparquet_row_group_metadata_get_n_rows(GParquetRowGroupMetadata *metadata);
 GARROW_AVAILABLE_IN_8_0
 gint64
-gparquet_row_group_metadata_get_total_size(
-  GParquetRowGroupMetadata *metadata);
+gparquet_row_group_metadata_get_total_size(GParquetRowGroupMetadata *metadata);
 GARROW_AVAILABLE_IN_8_0
 gint64
-gparquet_row_group_metadata_get_total_compressed_size(
-  GParquetRowGroupMetadata *metadata);
+gparquet_row_group_metadata_get_total_compressed_size(GParquetRowGroupMetadata *metadata);
 GARROW_AVAILABLE_IN_8_0
 gint64
-gparquet_row_group_metadata_get_file_offset(
-  GParquetRowGroupMetadata *metadata);
+gparquet_row_group_metadata_get_file_offset(GParquetRowGroupMetadata *metadata);
 GARROW_AVAILABLE_IN_8_0
 gboolean
 gparquet_row_group_metadata_can_decompress(GParquetRowGroupMetadata *metadata);
 
-
 #define GPARQUET_TYPE_FILE_METADATA (gparquet_file_metadata_get_type())
-G_DECLARE_DERIVABLE_TYPE(GParquetFileMetadata,
-                         gparquet_file_metadata,
-                         GPARQUET,
-                         FILE_METADATA,
-                         GObject)
+G_DECLARE_DERIVABLE_TYPE(
+  GParquetFileMetadata, gparquet_file_metadata, GPARQUET, FILE_METADATA, GObject)
 struct _GParquetFileMetadataClass
 {
   GObjectClass parent_class;
diff --git a/c_glib/parquet-glib/metadata.hpp b/c_glib/parquet-glib/metadata.hpp
index 279e60ffe8bec..498512901906e 100644
--- a/c_glib/parquet-glib/metadata.hpp
+++ b/c_glib/parquet-glib/metadata.hpp
@@ -24,21 +24,18 @@
 #include <parquet-glib/metadata.h>
 
 GParquetColumnChunkMetadata *
-gparquet_column_chunk_metadata_new_raw(
-  parquet::ColumnChunkMetaData *parquet_metadata,
-  GParquetRowGroupMetadata *owner);
+gparquet_column_chunk_metadata_new_raw(parquet::ColumnChunkMetaData *parquet_metadata,
+                                       GParquetRowGroupMetadata *owner);
 parquet::ColumnChunkMetaData *
 gparquet_column_chunk_metadata_get_raw(GParquetColumnChunkMetadata *metadata);
 
 GParquetRowGroupMetadata *
-gparquet_row_group_metadata_new_raw(
-  parquet::RowGroupMetaData *parquet_metadata,
-  GParquetFileMetadata *owner);
+gparquet_row_group_metadata_new_raw(parquet::RowGroupMetaData *parquet_metadata,
+                                    GParquetFileMetadata *owner);
 parquet::RowGroupMetaData *
 gparquet_row_group_metadata_get_raw(GParquetRowGroupMetadata *metadata);
 
 GParquetFileMetadata *
-gparquet_file_metadata_new_raw(
-  std::shared_ptr<parquet::FileMetaData> *parquet_metadata);
+gparquet_file_metadata_new_raw(std::shared_ptr<parquet::FileMetaData> *parquet_metadata);
 std::shared_ptr<parquet::FileMetaData>
 gparquet_file_metadata_get_raw(GParquetFileMetadata *metadata);
diff --git a/c_glib/parquet-glib/statistics.cpp b/c_glib/parquet-glib/statistics.cpp
index 596768d98007c..9f0074e70e717 100644
--- a/c_glib/parquet-glib/statistics.cpp
+++ b/c_glib/parquet-glib/statistics.cpp
@@ -51,7 +51,8 @@ G_BEGIN_DECLS
  * byte array statistics.
  */
 
-struct GParquetStatisticsPrivate {
+struct GParquetStatisticsPrivate
+{
   std::shared_ptr<parquet::Statistics> statistics;
 };
 
@@ -63,10 +64,9 @@ G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GParquetStatistics,
                                     gparquet_statistics,
                                     G_TYPE_OBJECT)
 
-#define GPARQUET_STATISTICS_GET_PRIVATE(object)       \
-  static_cast<GParquetStatisticsPrivate *>(           \
-    gparquet_statistics_get_instance_private(         \
-      GPARQUET_STATISTICS(object)))
+#define GPARQUET_STATISTICS_GET_PRIVATE(object)                                          \
+  static_cast<GParquetStatisticsPrivate *>(                                              \
+    gparquet_statistics_get_instance_private(GPARQUET_STATISTICS(object)))
 
 static void
 gparquet_statistics_finalize(GObject *object)
@@ -87,8 +87,7 @@ gparquet_statistics_set_property(GObject *object,
   switch (prop_id) {
   case PROP_STATISTICS:
     priv->statistics =
-      *static_cast<std::shared_ptr<parquet::Statistics> *>(
-        g_value_get_pointer(value));
+      *static_cast<std::shared_ptr<parquet::Statistics> *>(g_value_get_pointer(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -100,7 +99,7 @@ static void
 gparquet_statistics_init(GParquetStatistics *object)
 {
   auto priv = GPARQUET_STATISTICS_GET_PRIVATE(object);
-  new(&priv->statistics) std::shared_ptr<parquet::Statistics>;
+  new (&priv->statistics) std::shared_ptr<parquet::Statistics>;
 }
 
 static void
@@ -111,11 +110,11 @@ gparquet_statistics_class_init(GParquetStatisticsClass *klass)
   gobject_class->set_property = gparquet_statistics_set_property;
 
   GParamSpec *spec;
-  spec = g_param_spec_pointer("statistics",
-                              "Statistics",
-                              "The raw std::shared_ptr<parquet::Statistics>",
-                              static_cast<GParamFlags>(G_PARAM_WRITABLE |
-                                                       G_PARAM_CONSTRUCT_ONLY));
+  spec = g_param_spec_pointer(
+    "statistics",
+    "Statistics",
+    "The raw std::shared_ptr<parquet::Statistics>",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
   g_object_class_install_property(gobject_class, PROP_STATISTICS, spec);
 }
 
@@ -228,16 +227,14 @@ gparquet_statistics_has_min_max(GParquetStatistics *statistics)
   return parquet_statistics->HasMinMax();
 }
 
-
 G_END_DECLS
 namespace {
   template <typename ParquetStatisticsClass, typename GParquetStatisticsClass>
   typename std::shared_ptr<ParquetStatisticsClass>
   gparquet_typed_statistics_get_raw(GParquetStatisticsClass *statistics)
   {
-    return
-      std::static_pointer_cast<ParquetStatisticsClass>(
-        gparquet_statistics_get_raw(GPARQUET_STATISTICS(statistics)));
+    return std::static_pointer_cast<ParquetStatisticsClass>(
+      gparquet_statistics_get_raw(GPARQUET_STATISTICS(statistics)));
   }
 
   template <typename ParquetStatisticsClass, typename GParquetStatisticsClass>
@@ -257,10 +254,9 @@ namespace {
       gparquet_typed_statistics_get_raw<ParquetStatisticsClass>(statistics);
     return parquet_statistics->max();
   }
-}
+} // namespace
 G_BEGIN_DECLS
 
-
 G_DEFINE_TYPE(GParquetBooleanStatistics,
               gparquet_boolean_statistics,
               GPARQUET_TYPE_STATISTICS)
@@ -303,7 +299,6 @@ gparquet_boolean_statistics_get_max(GParquetBooleanStatistics *statistics)
   return gparquet_typed_statistics_get_max<parquet::BoolStatistics>(statistics);
 }
 
-
 G_DEFINE_TYPE(GParquetInt32Statistics,
               gparquet_int32_statistics,
               GPARQUET_TYPE_STATISTICS)
@@ -346,7 +341,6 @@ gparquet_int32_statistics_get_max(GParquetInt32Statistics *statistics)
   return gparquet_typed_statistics_get_max<parquet::Int32Statistics>(statistics);
 }
 
-
 G_DEFINE_TYPE(GParquetInt64Statistics,
               gparquet_int64_statistics,
               GPARQUET_TYPE_STATISTICS)
@@ -389,7 +383,6 @@ gparquet_int64_statistics_get_max(GParquetInt64Statistics *statistics)
   return gparquet_typed_statistics_get_max<parquet::Int64Statistics>(statistics);
 }
 
-
 G_DEFINE_TYPE(GParquetFloatStatistics,
               gparquet_float_statistics,
               GPARQUET_TYPE_STATISTICS)
@@ -432,7 +425,6 @@ gparquet_float_statistics_get_max(GParquetFloatStatistics *statistics)
   return gparquet_typed_statistics_get_max<parquet::FloatStatistics>(statistics);
 }
 
-
 G_DEFINE_TYPE(GParquetDoubleStatistics,
               gparquet_double_statistics,
               GPARQUET_TYPE_STATISTICS)
@@ -475,8 +467,8 @@ gparquet_double_statistics_get_max(GParquetDoubleStatistics *statistics)
   return gparquet_typed_statistics_get_max<parquet::DoubleStatistics>(statistics);
 }
 
-
-struct GParquetByteArrayStatisticsPrivate {
+struct GParquetByteArrayStatisticsPrivate
+{
   GBytes *min;
   GBytes *max;
 };
@@ -485,9 +477,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GParquetByteArrayStatistics,
                            gparquet_byte_array_statistics,
                            GPARQUET_TYPE_STATISTICS)
 
-#define GPARQUET_BYTE_ARRAY_STATISTICS_GET_PRIVATE(object)      \
-  static_cast<GParquetByteArrayStatisticsPrivate *>(            \
-    gparquet_byte_array_statistics_get_instance_private(        \
+#define GPARQUET_BYTE_ARRAY_STATISTICS_GET_PRIVATE(object)                               \
+  static_cast<GParquetByteArrayStatisticsPrivate *>(                                     \
+    gparquet_byte_array_statistics_get_instance_private(                                 \
       GPARQUET_BYTE_ARRAY_STATISTICS(object)))
 
 static void
@@ -514,8 +506,7 @@ gparquet_byte_array_statistics_init(GParquetByteArrayStatistics *object)
 }
 
 static void
-gparquet_byte_array_statistics_class_init(
-  GParquetByteArrayStatisticsClass *klass)
+gparquet_byte_array_statistics_class_init(GParquetByteArrayStatisticsClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
   gobject_class->dispose = gparquet_byte_array_statistics_dispose;
@@ -534,9 +525,8 @@ gparquet_byte_array_statistics_get_min(GParquetByteArrayStatistics *statistics)
 {
   auto priv = GPARQUET_BYTE_ARRAY_STATISTICS_GET_PRIVATE(statistics);
   if (!priv->min) {
-    const auto& parquet_min =
-      gparquet_typed_statistics_get_min<
-        parquet::ByteArrayStatistics>(statistics);
+    const auto &parquet_min =
+      gparquet_typed_statistics_get_min<parquet::ByteArrayStatistics>(statistics);
     priv->min = g_bytes_new_static(parquet_min.ptr, parquet_min.len);
   }
   return priv->min;
@@ -555,16 +545,15 @@ gparquet_byte_array_statistics_get_max(GParquetByteArrayStatistics *statistics)
 {
   auto priv = GPARQUET_BYTE_ARRAY_STATISTICS_GET_PRIVATE(statistics);
   if (!priv->max) {
-    const auto& parquet_max =
-      gparquet_typed_statistics_get_max<
-        parquet::ByteArrayStatistics>(statistics);
+    const auto &parquet_max =
+      gparquet_typed_statistics_get_max<parquet::ByteArrayStatistics>(statistics);
     priv->max = g_bytes_new_static(parquet_max.ptr, parquet_max.len);
   }
   return priv->max;
 }
 
-
-struct GParquetFixedLengthByteArrayStatisticsPrivate {
+struct GParquetFixedLengthByteArrayStatisticsPrivate
+{
   GBytes *min;
   GBytes *max;
 };
@@ -573,9 +562,9 @@ G_DEFINE_TYPE_WITH_PRIVATE(GParquetFixedLengthByteArrayStatistics,
                            gparquet_fixed_length_byte_array_statistics,
                            GPARQUET_TYPE_STATISTICS)
 
-#define GPARQUET_FIXED_LENGTH_BYTE_ARRAY_STATISTICS_GET_PRIVATE(object) \
-  static_cast<GParquetFixedLengthByteArrayStatisticsPrivate *>(         \
-    gparquet_fixed_length_byte_array_statistics_get_instance_private(   \
+#define GPARQUET_FIXED_LENGTH_BYTE_ARRAY_STATISTICS_GET_PRIVATE(object)                  \
+  static_cast<GParquetFixedLengthByteArrayStatisticsPrivate *>(                          \
+    gparquet_fixed_length_byte_array_statistics_get_instance_private(                    \
       GPARQUET_FIXED_LENGTH_BYTE_ARRAY_STATISTICS(object)))
 
 static void
@@ -593,8 +582,8 @@ gparquet_fixed_length_byte_array_statistics_dispose(GObject *object)
     priv->max = nullptr;
   }
 
-  G_OBJECT_CLASS(gparquet_fixed_length_byte_array_statistics_parent_class)->
-    dispose(object);
+  G_OBJECT_CLASS(gparquet_fixed_length_byte_array_statistics_parent_class)
+    ->dispose(object);
 }
 
 static void
@@ -623,8 +612,7 @@ GBytes *
 gparquet_fixed_length_byte_array_statistics_get_min(
   GParquetFixedLengthByteArrayStatistics *statistics)
 {
-  auto priv =
-    GPARQUET_FIXED_LENGTH_BYTE_ARRAY_STATISTICS_GET_PRIVATE(statistics);
+  auto priv = GPARQUET_FIXED_LENGTH_BYTE_ARRAY_STATISTICS_GET_PRIVATE(statistics);
   if (!priv->min) {
     auto parquet_statistics =
       gparquet_typed_statistics_get_raw<parquet::FLBAStatistics>(statistics);
@@ -646,8 +634,7 @@ GBytes *
 gparquet_fixed_length_byte_array_statistics_get_max(
   GParquetFixedLengthByteArrayStatistics *statistics)
 {
-  auto priv =
-    GPARQUET_FIXED_LENGTH_BYTE_ARRAY_STATISTICS_GET_PRIVATE(statistics);
+  auto priv = GPARQUET_FIXED_LENGTH_BYTE_ARRAY_STATISTICS_GET_PRIVATE(statistics);
   if (!priv->max) {
     auto parquet_statistics =
       gparquet_typed_statistics_get_raw<parquet::FLBAStatistics>(statistics);
@@ -657,44 +644,39 @@ gparquet_fixed_length_byte_array_statistics_get_max(
   return priv->max;
 }
 
-
 G_END_DECLS
 
-
 GParquetStatistics *
-gparquet_statistics_new_raw(
-  std::shared_ptr<parquet::Statistics> *parquet_statistics)
+gparquet_statistics_new_raw(std::shared_ptr<parquet::Statistics> *parquet_statistics)
 {
   GType type = GPARQUET_TYPE_STATISTICS;
   switch ((*parquet_statistics)->physical_type()) {
-    case parquet::Type::BOOLEAN:
-      type = GPARQUET_TYPE_BOOLEAN_STATISTICS;
-      break;
-    case parquet::Type::INT32:
-      type = GPARQUET_TYPE_INT32_STATISTICS;
-      break;
-    case parquet::Type::INT64:
-      type = GPARQUET_TYPE_INT64_STATISTICS;
-      break;
-    case parquet::Type::FLOAT:
-      type = GPARQUET_TYPE_FLOAT_STATISTICS;
-      break;
-    case parquet::Type::DOUBLE:
-      type = GPARQUET_TYPE_DOUBLE_STATISTICS;
-      break;
-    case parquet::Type::BYTE_ARRAY:
-      type = GPARQUET_TYPE_BYTE_ARRAY_STATISTICS;
-      break;
-    case parquet::Type::FIXED_LEN_BYTE_ARRAY:
-      type = GPARQUET_TYPE_FIXED_LENGTH_BYTE_ARRAY_STATISTICS;
-      break;
-    default:
-      break;
+  case parquet::Type::BOOLEAN:
+    type = GPARQUET_TYPE_BOOLEAN_STATISTICS;
+    break;
+  case parquet::Type::INT32:
+    type = GPARQUET_TYPE_INT32_STATISTICS;
+    break;
+  case parquet::Type::INT64:
+    type = GPARQUET_TYPE_INT64_STATISTICS;
+    break;
+  case parquet::Type::FLOAT:
+    type = GPARQUET_TYPE_FLOAT_STATISTICS;
+    break;
+  case parquet::Type::DOUBLE:
+    type = GPARQUET_TYPE_DOUBLE_STATISTICS;
+    break;
+  case parquet::Type::BYTE_ARRAY:
+    type = GPARQUET_TYPE_BYTE_ARRAY_STATISTICS;
+    break;
+  case parquet::Type::FIXED_LEN_BYTE_ARRAY:
+    type = GPARQUET_TYPE_FIXED_LENGTH_BYTE_ARRAY_STATISTICS;
+    break;
+  default:
+    break;
   }
   auto statistics =
-    GPARQUET_STATISTICS(g_object_new(type,
-                                     "statistics", parquet_statistics,
-                                     NULL));
+    GPARQUET_STATISTICS(g_object_new(type, "statistics", parquet_statistics, NULL));
   return statistics;
 }
 
diff --git a/c_glib/parquet-glib/statistics.h b/c_glib/parquet-glib/statistics.h
index ebc909c59ceec..f28e2a3713638 100644
--- a/c_glib/parquet-glib/statistics.h
+++ b/c_glib/parquet-glib/statistics.h
@@ -23,14 +23,9 @@
 
 G_BEGIN_DECLS
 
-
-#define GPARQUET_TYPE_STATISTICS                \
-  (gparquet_statistics_get_type())
-G_DECLARE_DERIVABLE_TYPE(GParquetStatistics,
-                         gparquet_statistics,
-                         GPARQUET,
-                         STATISTICS,
-                         GObject)
+#define GPARQUET_TYPE_STATISTICS (gparquet_statistics_get_type())
+G_DECLARE_DERIVABLE_TYPE(
+  GParquetStatistics, gparquet_statistics, GPARQUET, STATISTICS, GObject)
 struct _GParquetStatisticsClass
 {
   GObjectClass parent_class;
@@ -59,9 +54,7 @@ GARROW_AVAILABLE_IN_8_0
 gboolean
 gparquet_statistics_has_min_max(GParquetStatistics *statistics);
 
-
-#define GPARQUET_TYPE_BOOLEAN_STATISTICS        \
-  (gparquet_boolean_statistics_get_type())
+#define GPARQUET_TYPE_BOOLEAN_STATISTICS (gparquet_boolean_statistics_get_type())
 G_DECLARE_DERIVABLE_TYPE(GParquetBooleanStatistics,
                          gparquet_boolean_statistics,
                          GPARQUET,
@@ -79,9 +72,7 @@ GARROW_AVAILABLE_IN_8_0
 gboolean
 gparquet_boolean_statistics_get_max(GParquetBooleanStatistics *statistics);
 
-
-#define GPARQUET_TYPE_INT32_STATISTICS          \
-  (gparquet_int32_statistics_get_type())
+#define GPARQUET_TYPE_INT32_STATISTICS (gparquet_int32_statistics_get_type())
 G_DECLARE_DERIVABLE_TYPE(GParquetInt32Statistics,
                          gparquet_int32_statistics,
                          GPARQUET,
@@ -99,9 +90,7 @@ GARROW_AVAILABLE_IN_8_0
 gint32
 gparquet_int32_statistics_get_max(GParquetInt32Statistics *statistics);
 
-
-#define GPARQUET_TYPE_INT64_STATISTICS          \
-  (gparquet_int64_statistics_get_type())
+#define GPARQUET_TYPE_INT64_STATISTICS (gparquet_int64_statistics_get_type())
 G_DECLARE_DERIVABLE_TYPE(GParquetInt64Statistics,
                          gparquet_int64_statistics,
                          GPARQUET,
@@ -119,9 +108,7 @@ GARROW_AVAILABLE_IN_8_0
 gint64
 gparquet_int64_statistics_get_max(GParquetInt64Statistics *statistics);
 
-
-#define GPARQUET_TYPE_FLOAT_STATISTICS          \
-  (gparquet_float_statistics_get_type())
+#define GPARQUET_TYPE_FLOAT_STATISTICS (gparquet_float_statistics_get_type())
 G_DECLARE_DERIVABLE_TYPE(GParquetFloatStatistics,
                          gparquet_float_statistics,
                          GPARQUET,
@@ -139,9 +126,7 @@ GARROW_AVAILABLE_IN_8_0
 gfloat
 gparquet_float_statistics_get_max(GParquetFloatStatistics *statistics);
 
-
-#define GPARQUET_TYPE_DOUBLE_STATISTICS          \
-  (gparquet_double_statistics_get_type())
+#define GPARQUET_TYPE_DOUBLE_STATISTICS (gparquet_double_statistics_get_type())
 G_DECLARE_DERIVABLE_TYPE(GParquetDoubleStatistics,
                          gparquet_double_statistics,
                          GPARQUET,
@@ -159,9 +144,7 @@ GARROW_AVAILABLE_IN_8_0
 gdouble
 gparquet_double_statistics_get_max(GParquetDoubleStatistics *statistics);
 
-
-#define GPARQUET_TYPE_BYTE_ARRAY_STATISTICS          \
-  (gparquet_byte_array_statistics_get_type())
+#define GPARQUET_TYPE_BYTE_ARRAY_STATISTICS (gparquet_byte_array_statistics_get_type())
 G_DECLARE_DERIVABLE_TYPE(GParquetByteArrayStatistics,
                          gparquet_byte_array_statistics,
                          GPARQUET,
@@ -179,8 +162,7 @@ GARROW_AVAILABLE_IN_8_0
 GBytes *
 gparquet_byte_array_statistics_get_max(GParquetByteArrayStatistics *statistics);
 
-
-#define GPARQUET_TYPE_FIXED_LENGTH_BYTE_ARRAY_STATISTICS        \
+#define GPARQUET_TYPE_FIXED_LENGTH_BYTE_ARRAY_STATISTICS                                 \
   (gparquet_fixed_length_byte_array_statistics_get_type())
 G_DECLARE_DERIVABLE_TYPE(GParquetFixedLengthByteArrayStatistics,
                          gparquet_fixed_length_byte_array_statistics,
@@ -201,5 +183,4 @@ GBytes *
 gparquet_fixed_length_byte_array_statistics_get_max(
   GParquetFixedLengthByteArrayStatistics *statistics);
 
-
 G_END_DECLS
diff --git a/c_glib/parquet-glib/statistics.hpp b/c_glib/parquet-glib/statistics.hpp
index 238b9f173a5f3..48574217d8aad 100644
--- a/c_glib/parquet-glib/statistics.hpp
+++ b/c_glib/parquet-glib/statistics.hpp
@@ -24,7 +24,6 @@
 #include <parquet-glib/statistics.h>
 
 GParquetStatistics *
-gparquet_statistics_new_raw(
-  std::shared_ptr<parquet::Statistics> *parquet_statistics);
+gparquet_statistics_new_raw(std::shared_ptr<parquet::Statistics> *parquet_statistics);
 std::shared_ptr<parquet::Statistics>
 gparquet_statistics_get_raw(GParquetStatistics *statistics);

From b171b2634cd02cb7b248c45ba265495270183eec Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Mon, 11 Mar 2024 17:49:10 +0900
Subject: [PATCH 498/570] GH-40023: [Python] Use Cast() instead of CastTo
 (#40116)

### Rationale for this change

Remove legacy code

### What changes are included in this PR?

* Replace the legacy scalar CastTo implementation for Python.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.

* Closes: #40023

Authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 python/pyarrow/includes/libarrow.pxd            |  4 +++-
 python/pyarrow/tests/pyarrow_cython_example.pyx | 14 ++++++++++----
 python/pyarrow/tests/test_cython.py             |  5 ++---
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 5ae0f2e0b55b9..e44c699fc5574 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1161,7 +1161,6 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         c_bool Equals(const CScalar& other) const
         CStatus Validate() const
         CStatus ValidateFull() const
-        CResult[shared_ptr[CScalar]] CastTo(shared_ptr[CDataType] to) const
 
     cdef cppclass CScalarHash" arrow::Scalar::Hash":
         size_t operator()(const shared_ptr[CScalar]& scalar) const
@@ -3010,3 +3009,6 @@ cdef extern from "arrow/python/udf.h" namespace "arrow::py" nogil:
 
     CResult[shared_ptr[CRecordBatchReader]] CallTabularFunction(
         const c_string& func_name, const vector[CDatum]& args, CFunctionRegistry* registry)
+
+cdef extern from "arrow/compute/cast.h" namespace "arrow::compute":
+    CResult[CDatum] Cast(const CDatum& value, const CCastOptions& options)
diff --git a/python/pyarrow/tests/pyarrow_cython_example.pyx b/python/pyarrow/tests/pyarrow_cython_example.pyx
index 08f5e17a980eb..9ae59efb8b78c 100644
--- a/python/pyarrow/tests/pyarrow_cython_example.pyx
+++ b/python/pyarrow/tests/pyarrow_cython_example.pyx
@@ -42,7 +42,9 @@ def cast_scalar(scalar, to_type):
     cdef:
         shared_ptr[CScalar] c_scalar
         shared_ptr[CDataType] c_type
-        CResult[shared_ptr[CScalar]] c_result
+        CCastOptions cast_options
+        CDatum c_datum
+        CResult[CDatum] c_cast_result
 
     c_scalar = pyarrow_unwrap_scalar(scalar)
     if c_scalar.get() == NULL:
@@ -50,6 +52,10 @@ def cast_scalar(scalar, to_type):
     c_type = pyarrow_unwrap_data_type(to_type)
     if c_type.get() == NULL:
         raise TypeError("not a type")
-    c_result = c_scalar.get().CastTo(c_type)
-    c_scalar = GetResultValue(c_result)
-    return pyarrow_wrap_scalar(c_scalar)
+
+    c_datum = CDatum(c_scalar)
+    cast_options = CCastOptions()
+    cast_options.to_type = c_type
+    c_cast_result = Cast(c_datum, cast_options)
+    c_datum = GetResultValue(c_cast_result)
+    return pyarrow_wrap_scalar(c_datum.scalar())
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index 59875e7b01132..0eeae5d65f7d5 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -25,7 +25,6 @@
 import pyarrow as pa
 import pyarrow.tests.util as test_util
 
-
 here = os.path.dirname(os.path.abspath(__file__))
 test_ld_path = os.environ.get('PYARROW_TEST_LD_PATH', '')
 if os.name == 'posix':
@@ -35,7 +34,6 @@
 else:
     compiler_opts = []
 
-
 setup_template = """if 1:
     from setuptools import setup
     from Cython.Build import cythonize
@@ -77,7 +75,8 @@ def check_cython_example_module(mod):
     cast_scal = mod.cast_scalar(scal, pa.utf8())
     assert cast_scal == pa.scalar("123")
     with pytest.raises(NotImplementedError,
-                       match="casting scalars of type int64 to type list"):
+                       match="Unsupported cast from int64 to list using function "
+                             "cast_list"):
         mod.cast_scalar(scal, pa.list_(pa.int64()))
 
 
From 1eb46f763a73d313466fdc895eae1f35fac37945 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 11 Mar 2024 17:56:55 +0900
Subject: [PATCH 499/570] GH-40441: [GLib][Docs] Use Sphinx for Apache Arrow
 GLib front page (#40442)

### Rationale for this change

Using both of apache/arrow and apache/arrow-site to maintain https://arrow.apache.org/docs/c_glib/  is difficult to maintain.

### What changes are included in this PR?

Use  https://github.com/apache/arrow/tree/main/docs/source/c_glib as https://arrow.apache.org/docs/c_glib/ 's source.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #40441

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/conda_env_sphinx.txt                       |  2 +
 docs/requirements.txt                         |  1 +
 docs/source/c_glib/arrow-cuda-glib/index.md   | 23 ++++++++++
 .../source/c_glib/arrow-dataset-glib/index.md | 23 ++++++++++
 docs/source/c_glib/arrow-flight-glib/index.md | 23 ++++++++++
 .../c_glib/arrow-flight-sql-glib/index.md     | 23 ++++++++++
 docs/source/c_glib/arrow-glib/index.md        | 23 ++++++++++
 docs/source/c_glib/gandiva-glib/index.md      | 23 ++++++++++
 docs/source/c_glib/index.md                   | 42 +++++++++++++++++++
 docs/source/c_glib/index.rst                  | 23 ----------
 docs/source/c_glib/parquet-glib/index.md      | 23 ++++++++++
 docs/source/conf.py                           | 24 ++++++++++-
 12 files changed, 229 insertions(+), 24 deletions(-)
 create mode 100644 docs/source/c_glib/arrow-cuda-glib/index.md
 create mode 100644 docs/source/c_glib/arrow-dataset-glib/index.md
 create mode 100644 docs/source/c_glib/arrow-flight-glib/index.md
 create mode 100644 docs/source/c_glib/arrow-flight-sql-glib/index.md
 create mode 100644 docs/source/c_glib/arrow-glib/index.md
 create mode 100644 docs/source/c_glib/gandiva-glib/index.md
 create mode 100644 docs/source/c_glib/index.md
 delete mode 100644 docs/source/c_glib/index.rst
 create mode 100644 docs/source/c_glib/parquet-glib/index.md

diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt
index 0e50875fc1ef8..6899f9c36a7f6 100644
--- a/ci/conda_env_sphinx.txt
+++ b/ci/conda_env_sphinx.txt
@@ -19,6 +19,8 @@
 breathe
 doxygen
 ipython
+linkify-it-py
+myst-parser
 numpydoc
 pydata-sphinx-theme=0.14
 sphinx-autobuild
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 5d6fec7ddf72e..252344a74a58f 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -4,6 +4,7 @@
 
 breathe
 ipython
+myst-parser[linkify]
 numpydoc
 pydata-sphinx-theme~=0.14
 sphinx-autobuild
diff --git a/docs/source/c_glib/arrow-cuda-glib/index.md b/docs/source/c_glib/arrow-cuda-glib/index.md
new file mode 100644
index 0000000000000..3ca5470a09b0c
--- /dev/null
+++ b/docs/source/c_glib/arrow-cuda-glib/index.md
@@ -0,0 +1,23 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+(arrow-cuda-glib)=
+# Apache Arrow CUDA GLib
+
+Stub page for the Apache Arrow CUDA GLib docs; actual source is located in c_glib/ sub-directory.
diff --git a/docs/source/c_glib/arrow-dataset-glib/index.md b/docs/source/c_glib/arrow-dataset-glib/index.md
new file mode 100644
index 0000000000000..50101b299c1a2
--- /dev/null
+++ b/docs/source/c_glib/arrow-dataset-glib/index.md
@@ -0,0 +1,23 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+(arrow-dataset-glib)=
+# Apache Arrow Dataset GLib
+
+Stub page for the Apache Arrow Dataset GLib docs; actual source is located in c_glib/ sub-directory.
diff --git a/docs/source/c_glib/arrow-flight-glib/index.md b/docs/source/c_glib/arrow-flight-glib/index.md
new file mode 100644
index 0000000000000..0a4e59778c820
--- /dev/null
+++ b/docs/source/c_glib/arrow-flight-glib/index.md
@@ -0,0 +1,23 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+(arrow-flight-glib)=
+# Apache Arrow Flight GLib
+
+Stub page for the Apache Arrow Flight GLib docs; actual source is located in c_glib/ sub-directory.
diff --git a/docs/source/c_glib/arrow-flight-sql-glib/index.md b/docs/source/c_glib/arrow-flight-sql-glib/index.md
new file mode 100644
index 0000000000000..acc4422212418
--- /dev/null
+++ b/docs/source/c_glib/arrow-flight-sql-glib/index.md
@@ -0,0 +1,23 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+(arrow-flight-sql-glib)=
+# Apache Arrow Flight SQL GLib
+
+Stub page for the Apache Arrow Flight SQL GLib docs; actual source is located in c_glib/ sub-directory.
diff --git a/docs/source/c_glib/arrow-glib/index.md b/docs/source/c_glib/arrow-glib/index.md
new file mode 100644
index 0000000000000..4ff5a6629adb5
--- /dev/null
+++ b/docs/source/c_glib/arrow-glib/index.md
@@ -0,0 +1,23 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+(arrow-glib)=
+# Apache Arrow GLib
+
+Stub page for the Apache Arrow GLib docs; actual source is located in c_glib/ sub-directory.
diff --git a/docs/source/c_glib/gandiva-glib/index.md b/docs/source/c_glib/gandiva-glib/index.md
new file mode 100644
index 0000000000000..212a1161bba93
--- /dev/null
+++ b/docs/source/c_glib/gandiva-glib/index.md
@@ -0,0 +1,23 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+(gandiva-glib)=
+# Gandiva GLib
+
+Stub page for the Gandiva GLib docs; actual source is located in c_glib/ sub-directory.
diff --git a/docs/source/c_glib/index.md b/docs/source/c_glib/index.md
new file mode 100644
index 0000000000000..2b663639bf200
--- /dev/null
+++ b/docs/source/c_glib/index.md
@@ -0,0 +1,42 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+(c-glib)=
+# Apache Arrow GLib (C)
+
+Apache Arrow GLib is a wrapper library for Apache Arrow C++. Apache Arrow GLib provides C API.
+
+Apache Arrow GLib supports [GObject Introspection][gobject-introspection]. It means that you can create language bindings at runtime or compile time automatically.
+
+## API reference manuals
+
+```{toctree}
+:maxdepth: 1
+
+Apache Arrow GLib <arrow-glib/index>
+Apache Arrow CUDA GLib <arrow-cuda-glib/index>
+Apache Arrow Dataset <arrow-dataset-glib/index>
+Apache Arrow Flight GLib <arrow-flight-glib/index>
+Apache Arrow Flight SQL GLib <arrow-flight-sql-glib/index>
+Apache Parquet GLib <parquet-glib/index>
+Gandiva GLib <gandiva-glib/index>
+```
+
+[gobject-introspection]: https://gi.readthedocs.io/en/latest/
+
diff --git a/docs/source/c_glib/index.rst b/docs/source/c_glib/index.rst
deleted file mode 100644
index b10524eb2e8a5..0000000000000
--- a/docs/source/c_glib/index.rst
+++ /dev/null
@@ -1,23 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-.. _c-glib:
-
-C/GLib docs
-===========
-
-Stub page for the C/GLib docs; actual source is located in c_glib/doc/ sub-directory.
diff --git a/docs/source/c_glib/parquet-glib/index.md b/docs/source/c_glib/parquet-glib/index.md
new file mode 100644
index 0000000000000..5d6fd634d4827
--- /dev/null
+++ b/docs/source/c_glib/parquet-glib/index.md
@@ -0,0 +1,23 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+(parquet-glib)=
+# Apache Parquet GLib
+
+Stub page for the Apache Parquet GLib docs; actual source is located in c_glib/ sub-directory.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index c6be6cb94cfb5..7915e2c2c485a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -114,6 +114,7 @@
     'breathe',
     'IPython.sphinxext.ipython_console_highlighting',
     'IPython.sphinxext.ipython_directive',
+    'myst_parser',
     'numpydoc',
     'sphinx_design',
     'sphinx_copybutton',
@@ -150,6 +151,24 @@
 # ipython directive options
 ipython_mplbackend = ''
 
+# MyST-Parser configuration
+myst_enable_extensions = [
+    'amsmath',
+    'attrs_inline',
+    # 'colon_fence',
+    'deflist',
+    'dollarmath',
+    'fieldlist',
+    'html_admonition',
+    'html_image',
+    'linkify',
+    # 'replacements',
+    # 'smartquotes',
+    'strikethrough',
+    'substitution',
+    'tasklist',
+]
+
 # numpydoc configuration
 numpydoc_xref_param_type = True
 numpydoc_show_class_members = False
@@ -188,7 +207,10 @@
 # You can specify multiple suffix as a list of string:
 #
 
-source_suffix = ['.rst']
+source_suffix = {
+    '.md': 'markdown',
+    '.rst': 'restructuredtext',
+}
 
 autosummary_generate = True
 

From 8ed45601f906a5c1b7ff8b5bff53d4cf193ce526 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Mar 2024 04:35:10 +0900
Subject: [PATCH 500/570] MINOR: [Java] Bump org.apache.commons:commons-dbcp2
 from 2.9.0 to 2.12.0 in /java (#40460)

Bumps org.apache.commons:commons-dbcp2 from 2.9.0 to 2.12.0.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.commons:commons-dbcp2&package-manager=maven&previous-version=2.9.0&new-version=2.12.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/flight/flight-sql/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml
index a0598f70b9545..1da6ed276011b 100644
--- a/java/flight/flight-sql/pom.xml
+++ b/java/flight/flight-sql/pom.xml
@@ -86,7 +86,7 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-dbcp2</artifactId>
-      <version>2.9.0</version>
+      <version>2.12.0</version>
       <scope>test</scope>
       <exclusions>
         <exclusion>

From 49b5619eed7556fcb3c7625cafc80e92d9d3f6c5 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Mar 2024 04:35:47 +0900
Subject: [PATCH 501/570] MINOR: [Java] Bump
 org.codehaus.mojo:properties-maven-plugin from 1.1.0 to 1.2.1 in /java
 (#40461)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.codehaus.mojo:properties-maven-plugin](https://github.com/mojohaus/properties-maven-plugin) from 1.1.0 to 1.2.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/mojohaus/properties-maven-plugin/releases">org.codehaus.mojo:properties-maven-plugin's releases</a>.</em></p>
<blockquote>
<h2>1.2.1</h2>

<h2>🚀 New features and improvements</h2>
<ul>
<li>Introduce properties manager for easy implements other properties formats (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/113">#113</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>read-project-properties optimization - set all properties in one call (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/112">#112</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
</ul>
<h2>🐛 Bug Fixes</h2>
<ul>
<li>Avoid NPE on outputFile checking (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/114">#114</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>Fix unwarranted circular property definition error from CircularDefinitionPreventer (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/110">#110</a>) <a href="https://github.com/PayBas"><code>@​PayBas</code></a></li>
</ul>
<h2>📦 Dependency updates</h2>
<ul>
<li>Bump org.codehaus.mojo:mojo-parent from 76 to 77 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/108">#108</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
</ul>
<h2>👻 Maintenance</h2>
<ul>
<li>Add documentation about plugin limitations (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/115">#115</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>Update ReadPropertiesMojo.java to fix javadoc tag (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/107">#107</a>) <a href="https://github.com/mykelalvis"><code>@​mykelalvis</code></a></li>
</ul>
<h2>1.2.0</h2>

<h2>🚀 New features and improvements</h2>
<ul>
<li>Add &quot;override&quot; option to preserve existing properties from being overridden (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/103">#103</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>Require Maven 3.5.4+ (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/99">#99</a>) <a href="https://github.com/slachiewicz"><code>@​slachiewicz</code></a></li>
<li>Accept default value in placeholders for unresolved property values (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/1">#1</a>) <a href="https://github.com/diamondT"><code>@​diamondT</code></a></li>
</ul>
<h2>🐛 Bug Fixes</h2>
<ul>
<li>Fix for <a href="https://redirect.github.com/mojohaus/properties-maven-plugin/issues/27">#27</a> Unwarranted circular property definition (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/28">#28</a>) <a href="https://github.com/psenechal-stingray"><code>@​psenechal-stingray</code></a></li>
</ul>
<h2>📦 Dependency updates</h2>
<ul>
<li>Bump parent from 72 to 76 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/104">#104</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>Require Maven 3.5.4+ (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/99">#99</a>) <a href="https://github.com/slachiewicz"><code>@​slachiewicz</code></a></li>
<li>Bump plexus-utils from 3.5.1 to 4.0.0 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/101">#101</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump plexus-utils from 3.5.0 to 3.5.1 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/97">#97</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump mojo-parent from 70 to 72 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/94">#94</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump plexus-utils from 3.4.2 to 3.5.0 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/91">#91</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump mojo-parent from 69 to 70 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/87">#87</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump mojo-parent from 67 to 69 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/85">#85</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump plexus-utils from 3.4.1 to 3.4.2 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/82">#82</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump release-drafter/release-drafter from 5.19.0 to 5.20.0 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/81">#81</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump actions/setup-java from 2 to 3 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/78">#78</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump mojo-parent from 65 to 67 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/77">#77</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump actions/checkout from 2 to 3 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/72">#72</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump release-drafter/release-drafter from 5.18.1 to 5.19.0 (<a href="https://redirect.github.com/mojohaus/properties-maven-plugin/pull/73">#73</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/mojohaus/properties-maven-plugin/commit/df184c33b872b1310309285ef61f548d73098ba4"><code>df184c3</code></a> [maven-release-plugin] prepare release 1.2.1</li>
<li><a href="https://github.com/mojohaus/properties-maven-plugin/commit/b3aab5135835a1a7b79fdaf2e10f7841789c1030"><code>b3aab51</code></a> Add documentation about plugin limitations</li>
<li><a href="https://github.com/mojohaus/properties-maven-plugin/commit/62ddbeab7ebe8a533802d54ebbca077542635fa4"><code>62ddbea</code></a> Avoid NPE on outputFile checking</li>
<li><a href="https://github.com/mojohaus/properties-maven-plugin/commit/1ef42e00fbd8bf95693dce23d7cc85f1a308a0de"><code>1ef42e0</code></a> Introduce properties manager for easy implements other properties formats</li>
<li><a href="https://github.com/mojohaus/properties-maven-plugin/commit/8f4e6be40709df142c96825cde0a74e459f1ec4f"><code>8f4e6be</code></a> read-project-properties optimization - set all properties in one call</li>
<li><a href="https://github.com/mojohaus/properties-maven-plugin/commit/c816bf2752546e76d11d03da6d6761e0f4d44b34"><code>c816bf2</code></a> Fix unwarranted circular property definition error from CircularDefinitionPre...</li>
<li><a href="https://github.com/mojohaus/properties-maven-plugin/commit/182eac3cb1f7def7144de7127935f979e2bdf2e2"><code>182eac3</code></a> Bump org.codehaus.mojo:mojo-parent from 76 to 77</li>
<li><a href="https://github.com/mojohaus/properties-maven-plugin/commit/0b1a116822e54f8f2955b971bdbc66b1f29623df"><code>0b1a116</code></a> Update ReadPropertiesMojo.java</li>
<li><a href="https://github.com/mojohaus/properties-maven-plugin/commit/d8a48cd030ac3d57235927c8b5f42fae652dc49a"><code>d8a48cd</code></a> [maven-release-plugin] prepare for next development iteration</li>
<li><a href="https://github.com/mojohaus/properties-maven-plugin/commit/5105ebc6a4973d33d3ed6e5eb5e30ebaf5abd8af"><code>5105ebc</code></a> [maven-release-plugin] prepare release 1.2.0</li>
<li>Additional commits viewable in <a href="https://github.com/mojohaus/properties-maven-plugin/compare/properties-maven-plugin-1.1.0...1.2.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.codehaus.mojo:properties-maven-plugin&package-manager=maven&previous-version=1.1.0&new-version=1.2.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/flight/flight-sql-jdbc-core/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml
index 020f181f5d107..53cf11cbf6ecd 100644
--- a/java/flight/flight-sql-jdbc-core/pom.xml
+++ b/java/flight/flight-sql-jdbc-core/pom.xml
@@ -157,7 +157,7 @@
             <plugin>
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>properties-maven-plugin</artifactId>
-                <version>1.1.0</version>
+                <version>1.2.1</version>
                 <executions>
                     <execution>
                         <id>write-project-properties-to-file</id>

From 8ee9679d401183220a4566681ca7ef9e887ba4d2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Mar 2024 04:36:15 +0900
Subject: [PATCH 502/570] MINOR: [Java] Bump
 org.apache.maven.plugins:maven-shade-plugin from 3.2.4 to 3.5.2 in /java
 (#40462)

Bumps [org.apache.maven.plugins:maven-shade-plugin](https://github.com/apache/maven-shade-plugin) from 3.2.4 to 3.5.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/maven-shade-plugin/releases">org.apache.maven.plugins:maven-shade-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.5.1</h2>
<p><a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12317921&amp;version=12353341">Release Notes - Maven Shade Plugin - Version 3.5.1
</a></p>
<h2>Bug</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MSHADE-454">MSHADE-454</a>] - Shade Plugin does not work with JDK 20</li>
</ul>
<h2>Improvement</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MSHADE-459">MSHADE-459</a>] - Prepare to build and pass tests with Java 21</li>
</ul>
<h2>Task</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MSHADE-458">MSHADE-458</a>] - Refresh download page</li>
</ul>
<h2>Dependency upgrade</h2>
<ul>
<li>[<a href="https://issues.apache.org/jira/browse/MSHADE-457">MSHADE-457</a>] - Upgrade Parent to 40</li>
</ul>
<p>Maven Shade Plugin 3.5.0 release notes <a href="https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12317921&amp;version=12352951">https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12317921&amp;version=12352951</a></p>
<h2>Release Notes - Maven Shade Plugin - Version 3.5.0</h2>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/maven-shade-plugin/commit/95e22b4868a0be01cc51a382dbfa33f1c129cc5e"><code>95e22b4</code></a> [maven-release-plugin] prepare release maven-shade-plugin-3.5.2</li>
<li><a href="https://github.com/apache/maven-shade-plugin/commit/d807fea075c605454a7a384eb7ef677a9fe69063"><code>d807fea</code></a> Bump org.vafer:jdependency from 2.9.0 to 2.10</li>
<li><a href="https://github.com/apache/maven-shade-plugin/commit/6d6084179e4c5f90cfafa79234a156749ae53cfa"><code>6d60841</code></a> Bump org.apache.commons:commons-compress from 1.23.0 to 1.25.0</li>
<li><a href="https://github.com/apache/maven-shade-plugin/commit/68457e50bc4c115767bfbcd22f4e655c8f4dc997"><code>68457e5</code></a> [MSHADE-468] add system requirements history</li>
<li><a href="https://github.com/apache/maven-shade-plugin/commit/631371bd10ff4dce1061f21e3e30697b82860538"><code>631371b</code></a> Add mshade-462 IT</li>
<li><a href="https://github.com/apache/maven-shade-plugin/commit/cb7b10d4b91f03c69012ae4c3d9dbbc6f71d6f8b"><code>cb7b10d</code></a> [MSHADE-462] 3.5.1 not compatible with 3.4.1: The version cannot be empty.</li>
<li><a href="https://github.com/apache/maven-shade-plugin/commit/bd982e7e4b0096380b9bdb0fa8ad33ab2c652c1d"><code>bd982e7</code></a> [MSHADE-420] fix time when read from extra field</li>
<li><a href="https://github.com/apache/maven-shade-plugin/commit/3692f81aaae6a2f2e3193b80d9c2d07cbc4a5f8c"><code>3692f81</code></a> [SHADE-420] create IT: 2 runs with different TZ give different jars</li>
<li><a href="https://github.com/apache/maven-shade-plugin/commit/a5315de8cefad8552a121b93d9efcd4a06ecac03"><code>a5315de</code></a> [MSHADE-464] Maven 3.6.3 as minimum requirements</li>
<li><a href="https://github.com/apache/maven-shade-plugin/commit/e7077c628c2cbb059c829ffb124690f211ba36b0"><code>e7077c6</code></a> [MSHADE-467] Improved concurrency problem fix</li>
<li>Additional commits viewable in <a href="https://github.com/apache/maven-shade-plugin/compare/maven-shade-plugin-3.2.4...maven-shade-plugin-3.5.2">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-shade-plugin&package-manager=maven&previous-version=3.2.4&new-version=3.5.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/flight/flight-core/pom.xml | 2 +-
 java/pom.xml                    | 2 +-
 java/vector/pom.xml             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index e7b299fdbe850..757558398fea6 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -164,7 +164,7 @@
           issues in the arrow-tools tests looking up FlatBuffer
           dependencies.
          -->
-        <version>3.2.4</version>
+        <version>3.5.2</version>
         <executions>
           <execution>
             <id>shade-main</id>
diff --git a/java/pom.xml b/java/pom.xml
index f39e7882153a6..1fd69a12fba56 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -450,7 +450,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-shade-plugin</artifactId>
-          <version>3.5.1</version>
+          <version>3.5.2</version>
         </plugin>
         <plugin>
           <artifactId>maven-surefire-plugin</artifactId>
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index 3638712ea2109..5cd6d0a00fcca 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -184,7 +184,7 @@
           issues in the arrow-tools tests looking up FlatBuffer
           dependencies.
          -->
-        <version>3.2.4</version>
+        <version>3.5.2</version>
         <executions>
           <execution>
             <phase>package</phase>

From 10d141ce586245c319d72766f4e16d8dd0b46845 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Mar 2024 04:36:37 +0900
Subject: [PATCH 503/570] MINOR: [Java] Bump commons-io:commons-io from 2.7 to
 2.15.1 in /java (#40463)

Bumps commons-io:commons-io from 2.7 to 2.15.1.

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=commons-io:commons-io&package-manager=maven&previous-version=2.7&new-version=2.15.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/dataset/pom.xml                       | 2 +-
 java/flight/flight-sql-jdbc-core/pom.xml   | 2 +-
 java/flight/flight-sql-jdbc-driver/pom.xml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index 4c302ea59dbe3..a003fd18068ec 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -150,7 +150,7 @@
 	<dependency>
             <groupId>commons-io</groupId>
             <artifactId>commons-io</artifactId>
-	    <version>2.7</version>
+	    <version>2.15.1</version>
             <scope>test</scope>
         </dependency>
     </dependencies>
diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml
index 53cf11cbf6ecd..20996d7496c05 100644
--- a/java/flight/flight-sql-jdbc-core/pom.xml
+++ b/java/flight/flight-sql-jdbc-core/pom.xml
@@ -96,7 +96,7 @@
         <dependency>
             <groupId>commons-io</groupId>
             <artifactId>commons-io</artifactId>
-            <version>2.7</version>
+            <version>2.15.1</version>
             <scope>test</scope>
         </dependency>
 
diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml
index bf053f3f7798a..84ec1ff8c1f95 100644
--- a/java/flight/flight-sql-jdbc-driver/pom.xml
+++ b/java/flight/flight-sql-jdbc-driver/pom.xml
@@ -140,7 +140,7 @@
         <dependency>
             <groupId>commons-io</groupId>
             <artifactId>commons-io</artifactId>
-            <version>2.7</version>
+            <version>2.15.1</version>
             <scope>test</scope>
         </dependency>
     </dependencies>

From cb1e29c0c1fdc691a8a4f3bc7621c6fdcf181dfc Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 12 Mar 2024 04:39:02 +0900
Subject: [PATCH 504/570] GH-40438: [GLib] Add GArrowTimestampParser (#40457)

### Rationale for this change

It's a binding of `arrow::TimestampParser`.

### What changes are included in this PR?

Add `GArrowStrptimeTimestampParser` and `GArrowISO8601TimestampParser`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40438

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/meson.build          |   9 +-
 c_glib/arrow-glib/reader.cpp           |   4 +-
 c_glib/arrow-glib/timestamp-parser.cpp | 207 +++++++++++++++++++++++++
 c_glib/arrow-glib/timestamp-parser.h   |  74 +++++++++
 c_glib/arrow-glib/timestamp-parser.hpp |  27 ++++
 c_glib/test/test-timestamp-parser.rb   |  42 +++++
 6 files changed, 358 insertions(+), 5 deletions(-)
 create mode 100644 c_glib/arrow-glib/timestamp-parser.cpp
 create mode 100644 c_glib/arrow-glib/timestamp-parser.h
 create mode 100644 c_glib/arrow-glib/timestamp-parser.hpp
 create mode 100644 c_glib/test/test-timestamp-parser.rb

diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build
index 7efba11bf9417..a914002864341 100644
--- a/c_glib/arrow-glib/meson.build
+++ b/c_glib/arrow-glib/meson.build
@@ -36,9 +36,10 @@ sources = files(
   'record-batch.cpp',
   'scalar.cpp',
   'schema.cpp',
-  'table.cpp',
   'table-builder.cpp',
+  'table.cpp',
   'tensor.cpp',
+  'timestamp-parser.cpp',
   'type.cpp',
 )
 
@@ -99,9 +100,10 @@ c_headers = files(
   'record-batch.h',
   'scalar.h',
   'schema.h',
-  'table.h',
   'table-builder.h',
+  'table.h',
   'tensor.h',
+  'timestamp-parser.h',
   'type.h',
 )
 
@@ -160,9 +162,10 @@ cpp_headers = files(
   'record-batch.hpp',
   'scalar.hpp',
   'schema.hpp',
-  'table.hpp',
   'table-builder.hpp',
+  'table.hpp',
   'tensor.hpp',
+  'timestamp-parser.hpp',
   'type.hpp',
 )
 
diff --git a/c_glib/arrow-glib/reader.cpp b/c_glib/arrow-glib/reader.cpp
index 4bde9ee32d5c6..0b388a4ba3814 100644
--- a/c_glib/arrow-glib/reader.cpp
+++ b/c_glib/arrow-glib/reader.cpp
@@ -136,7 +136,7 @@ garrow_record_batch_reader_class_init(GArrowRecordBatchReaderClass *klass)
 {
   auto gobject_class = G_OBJECT_CLASS(klass);
   gobject_class->finalize = garrow_record_batch_reader_finalize;
-  gobject_class->finalize = garrow_record_batch_reader_dispose;
+  gobject_class->dispose = garrow_record_batch_reader_dispose;
   gobject_class->set_property = garrow_record_batch_reader_set_property;
 
   GParamSpec *spec;
@@ -187,7 +187,7 @@ garrow_record_batch_reader_import(gpointer c_abi_array_stream, GError **error)
  * @schema: (nullable): A #GArrowSchema to confirm to.
  * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: The schema in the stream on success, %NULL on error.
+ * Returns: A newly created #GArrowRecordBatchReader.
  *
  * Since: 6.0.0
  */
diff --git a/c_glib/arrow-glib/timestamp-parser.cpp b/c_glib/arrow-glib/timestamp-parser.cpp
new file mode 100644
index 0000000000000..1e41daf082343
--- /dev/null
+++ b/c_glib/arrow-glib/timestamp-parser.cpp
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/timestamp-parser.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: timestamp-parser
+ * @section_id: timestamp-parser-classes
+ * @title: TimestamParser classes
+ * @include: arrow-glib/arrow-glib.h
+ *
+ * #GArrowTimestampParser is a base class for parsing timestamp text.
+ *
+ * #GArrowStrptimeTimestampParser is a class for parsing timestamp
+ * text used by the given stprtime(3) format.
+ *
+ * #GArrowISO8601TimestampParser is a class for parsing ISO 8601
+ * format timestamp text.
+ */
+
+struct GArrowTimestampParserPrivate
+{
+  std::shared_ptr<arrow::TimestampParser> parser;
+};
+
+enum {
+  PROP_PARSER = 1,
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowTimestampParser,
+                                    garrow_timestamp_parser,
+                                    G_TYPE_OBJECT);
+
+#define GARROW_TIMESTAMP_PARSER_GET_PRIVATE(obj)                                         \
+  static_cast<GArrowTimestampParserPrivate *>(                                           \
+    garrow_timestamp_parser_get_instance_private(GARROW_TIMESTAMP_PARSER(obj)))
+
+static void
+garrow_timestamp_parser_finalize(GObject *object)
+{
+  auto priv = GARROW_TIMESTAMP_PARSER_GET_PRIVATE(object);
+
+  priv->parser.~shared_ptr();
+
+  G_OBJECT_CLASS(garrow_timestamp_parser_parent_class)->finalize(object);
+}
+
+static void
+garrow_timestamp_parser_set_property(GObject *object,
+                                     guint prop_id,
+                                     const GValue *value,
+                                     GParamSpec *pspec)
+{
+  auto priv = GARROW_TIMESTAMP_PARSER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_PARSER:
+    priv->parser =
+      *static_cast<std::shared_ptr<arrow::TimestampParser> *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_timestamp_parser_init(GArrowTimestampParser *object)
+{
+  auto priv = GARROW_TIMESTAMP_PARSER_GET_PRIVATE(object);
+  new (&priv->parser) std::shared_ptr<arrow::TimestampParser>;
+}
+
+static void
+garrow_timestamp_parser_class_init(GArrowTimestampParserClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->finalize = garrow_timestamp_parser_finalize;
+  gobject_class->set_property = garrow_timestamp_parser_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "parser",
+    "Parser",
+    "The raw std::shared<arrow::TimestampParser> *",
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_PARSER, spec);
+}
+
+/**
+ * garrow_timestamp_parser_get_kind:
+ * @parser: A #GArrowTimestampParser.
+ *
+ * Returns: The kind of this timestamp parser.
+ *
+ * Since: 16.0.0
+ */
+const gchar *
+garrow_timestamp_parser_get_kind(GArrowTimestampParser *parser)
+{
+  auto arrow_parser = garrow_timestamp_parser_get_raw(parser);
+  return arrow_parser->kind();
+}
+
+G_DEFINE_TYPE(GArrowStrptimeTimestampParser,
+              garrow_strptime_timestamp_parser,
+              GARROW_TYPE_TIMESTAMP_PARSER);
+
+static void
+garrow_strptime_timestamp_parser_init(GArrowStrptimeTimestampParser *object)
+{
+}
+
+static void
+garrow_strptime_timestamp_parser_class_init(GArrowStrptimeTimestampParserClass *klass)
+{
+}
+
+/**
+ * garrow_strptime_timestamp_parser_new:
+ * @format: A format used by strptime(3).
+ *
+ * Returns: (transfer full): A newly allocated #GArrowStrptimeTimestampParser.
+ *
+ * Since: 16.0.0
+ */
+GArrowStrptimeTimestampParser *
+garrow_strptime_timestamp_parser_new(const gchar *format)
+{
+  auto arrow_parser = arrow::TimestampParser::MakeStrptime(format);
+  return GARROW_STRPTIME_TIMESTAMP_PARSER(
+    g_object_new(GARROW_TYPE_STRPTIME_TIMESTAMP_PARSER,
+                 "parser",
+                 &arrow_parser,
+                 nullptr));
+}
+
+/**
+ * garrow_strptime_timestamp_parser_get_format:
+ * @parser: A #GArrowStrptimeTimestampParser.
+ *
+ * Returns: The format used by this parser.
+ *
+ * Since: 16.0.0
+ */
+const gchar *
+garrow_strptime_timestamp_parser_get_format(GArrowStrptimeTimestampParser *parser)
+{
+  auto arrow_parser = garrow_timestamp_parser_get_raw(GARROW_TIMESTAMP_PARSER(parser));
+  return arrow_parser->format();
+}
+
+G_DEFINE_TYPE(GArrowISO8601TimestampParser,
+              garrow_iso8601_timestamp_parser,
+              GARROW_TYPE_TIMESTAMP_PARSER);
+
+static void
+garrow_iso8601_timestamp_parser_init(GArrowISO8601TimestampParser *object)
+{
+}
+
+static void
+garrow_iso8601_timestamp_parser_class_init(GArrowISO8601TimestampParserClass *klass)
+{
+}
+
+/**
+ * garrow_iso8601_timestamp_parser_new:
+ *
+ * Returns: (transfer full): A newly allocated #GArrowISO8601TimestampParser.
+ *
+ * Since: 16.0.0
+ */
+GArrowISO8601TimestampParser *
+garrow_iso8601_timestamp_parser_new(void)
+{
+  auto arrow_parser = arrow::TimestampParser::MakeISO8601();
+  return GARROW_ISO8601_TIMESTAMP_PARSER(
+    g_object_new(GARROW_TYPE_ISO8601_TIMESTAMP_PARSER, "parser", &arrow_parser, nullptr));
+}
+
+G_END_DECLS
+
+std::shared_ptr<arrow::TimestampParser>
+garrow_timestamp_parser_get_raw(GArrowTimestampParser *parser)
+{
+  auto priv = GARROW_TIMESTAMP_PARSER_GET_PRIVATE(parser);
+  return priv->parser;
+}
diff --git a/c_glib/arrow-glib/timestamp-parser.h b/c_glib/arrow-glib/timestamp-parser.h
new file mode 100644
index 0000000000000..d5b62bb568a21
--- /dev/null
+++ b/c_glib/arrow-glib/timestamp-parser.h
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/gobject-type.h>
+#include <arrow-glib/version.h>
+
+G_BEGIN_DECLS
+
+#define GARROW_TYPE_TIMESTAMP_PARSER (garrow_timestamp_parser_get_type())
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowTimestampParser, garrow_timestamp_parser, GARROW, TIMESTAMP_PARSER, GObject)
+struct _GArrowTimestampParserClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_16_0
+const gchar *
+garrow_timestamp_parser_get_kind(GArrowTimestampParser *parser);
+
+#define GARROW_TYPE_STRPTIME_TIMESTAMP_PARSER                                            \
+  (garrow_strptime_timestamp_parser_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowStrptimeTimestampParser,
+                         garrow_strptime_timestamp_parser,
+                         GARROW,
+                         STRPTIME_TIMESTAMP_PARSER,
+                         GArrowTimestampParser)
+struct _GArrowStrptimeTimestampParserClass
+{
+  GArrowTimestampParserClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_16_0
+GArrowStrptimeTimestampParser *
+garrow_strptime_timestamp_parser_new(const gchar *format);
+
+GARROW_AVAILABLE_IN_16_0
+const gchar *
+garrow_strptime_timestamp_parser_get_format(GArrowStrptimeTimestampParser *parser);
+
+#define GARROW_TYPE_ISO8601_TIMESTAMP_PARSER (garrow_iso8601_timestamp_parser_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowISO8601TimestampParser,
+                         garrow_iso8601_timestamp_parser,
+                         GARROW,
+                         ISO8601_TIMESTAMP_PARSER,
+                         GArrowTimestampParser)
+struct _GArrowISO8601TimestampParserClass
+{
+  GArrowTimestampParserClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_16_0
+GArrowISO8601TimestampParser *
+garrow_iso8601_timestamp_parser_new();
+
+G_END_DECLS
diff --git a/c_glib/arrow-glib/timestamp-parser.hpp b/c_glib/arrow-glib/timestamp-parser.hpp
new file mode 100644
index 0000000000000..00c37aed9cd2f
--- /dev/null
+++ b/c_glib/arrow-glib/timestamp-parser.hpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/util/value_parsing.h>
+
+#include <arrow-glib/timestamp-parser.h>
+
+std::shared_ptr<arrow::TimestampParser>
+garrow_timestamp_parser_get_raw(GArrowTimestampParser *parser);
diff --git a/c_glib/test/test-timestamp-parser.rb b/c_glib/test/test-timestamp-parser.rb
new file mode 100644
index 0000000000000..7095b2d26c154
--- /dev/null
+++ b/c_glib/test/test-timestamp-parser.rb
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestTimestampParser < Test::Unit::TestCase
+  sub_test_case("strptime") do
+    def setup
+      @parser = Arrow::StrptimeTimestampParser.new("%Y-%m-%d")
+    end
+
+    def test_kind
+      assert_equal("strptime", @parser.kind)
+    end
+
+    def test_format
+      assert_equal("%Y-%m-%d", @parser.format)
+    end
+  end
+
+  sub_test_case("ISO8601") do
+    def setup
+      @parser = Arrow::ISO8601TimestampParser.new
+    end
+
+    def test_kind
+      assert_equal("iso8601", @parser.kind)
+    end
+  end
+end

From 5908be7bbefed796ebf2bfe42a8de0014704cce2 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 12 Mar 2024 05:40:06 +0900
Subject: [PATCH 505/570] GH-40448: [CI][Dev] Run pre-commit (#40449)

### Rationale for this change

We have pre-commit configuration but it's not used in CI.

### What changes are included in this PR?

Run pre-commit in CI.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40448

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/dev.yml | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 77efda58cb3d2..b58f0d2036432 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -44,6 +44,18 @@ jobs:
         uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
         with:
           python-version: 3.12
+      - name: Install pre-commit
+        run: |
+          python -m pip install pre-commit
+          pre-commit run --show-diff-on-failure --color=always
+      - name: Cache pre-commit
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pre-commit
+          key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}
+      - name: Run pre-commit
+        run: |
+          pre-commit run --show-diff-on-failure --color=always
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build

From 31fa99adaf6acb656dd89e03a9d7f512d27d1195 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 Mar 2024 05:42:12 +0900
Subject: [PATCH 506/570] MINOR: [Java] Bump
 org.codehaus.mojo:exec-maven-plugin from 3.1.1 to 3.2.0 in /java (#40464)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [org.codehaus.mojo:exec-maven-plugin](https://github.com/mojohaus/exec-maven-plugin) from 3.1.1 to 3.2.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/mojohaus/exec-maven-plugin/releases">org.codehaus.mojo:exec-maven-plugin's releases</a>.</em></p>
<blockquote>
<h2>3.2.0</h2>

<h2>🚀 New features and improvements</h2>
<ul>
<li>Enable to exec:java runnables and not only mains with loosely coupled injections (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/408">#408</a>) <a href="https://github.com/rmannibucau"><code>@​rmannibucau</code></a></li>
<li>Try to get rid of legacy API which can break starting with java 17 (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/409">#409</a>) <a href="https://github.com/rmannibucau"><code>@​rmannibucau</code></a></li>
</ul>
<h2>🐛 Bug Fixes</h2>
<ul>
<li>Fix <a href="https://redirect.github.com/mojohaus/exec-maven-plugin/issues/401">#401</a> - Maven v4 compatibility (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/414">#414</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
</ul>
<h2>📦 Dependency updates</h2>
<ul>
<li>Bump org.codehaus.mojo:mojo-parent from 78 to 80 (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/419">#419</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump commons-io:commons-io from 1.1 to 2.7 in /src/it/projects/setup-parent (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/416">#416</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.apache.commons:commons-exec from 1.3 to 1.4.0 (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/405">#405</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.codehaus.mojo:mojo-parent from 77 to 78 (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/406">#406</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.codehaus.plexus:plexus-component-metadata from 2.1.1 to 2.2.0 (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/403">#403</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
<li>Bump org.codehaus.plexus:plexus-component-annotations from 2.1.1 to 2.2.0 (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/404">#404</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
</ul>
<h2>👻 Maintenance</h2>
<ul>
<li>ITs improvement (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/415">#415</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>Fix documentation formatting, add menu items for new examples (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/412">#412</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>Execute mexec-137 also on unix family (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/413">#413</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
<li>Remove unused test (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/410">#410</a>) <a href="https://github.com/slawekjaranowski"><code>@​slawekjaranowski</code></a></li>
</ul>
<h2>🔧 Build</h2>
<ul>
<li>Bump release-drafter/release-drafter from 5 to 6 (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/pull/417">#417</a>) <a href="https://github.com/dependabot"><code>@​dependabot</code></a></li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/mojohaus/exec-maven-plugin/commit/d28f608b75426a98bc1ecc37f33692644a6cc159"><code>d28f608</code></a> [maven-release-plugin] prepare release 3.2.0</li>
<li><a href="https://github.com/mojohaus/exec-maven-plugin/commit/96d307bd5f99bc317eeb9474b8fea047d5ae5600"><code>96d307b</code></a> Bump org.codehaus.mojo:mojo-parent from 78 to 80 (<a href="https://redirect.github.com/mojohaus/exec-maven-plugin/issues/419">#419</a>)</li>
<li><a href="https://github.com/mojohaus/exec-maven-plugin/commit/0134866dee959d0fb42b879d32b76834558b1332"><code>0134866</code></a> Fix <a href="https://redirect.github.com/mojohaus/exec-maven-plugin/issues/401">#401</a> - Maven v4 compatibility</li>
<li><a href="https://github.com/mojohaus/exec-maven-plugin/commit/198c289ca70302c5c6152083375aabe736f38f51"><code>198c289</code></a> Bump commons-io:commons-io in /src/it/projects/setup-parent</li>
<li><a href="https://github.com/mojohaus/exec-maven-plugin/commit/9babd4e7b4709f9e96702cabfc37e7a7e4ce2c35"><code>9babd4e</code></a> Bump release-drafter/release-drafter from 5 to 6</li>
<li><a href="https://github.com/mojohaus/exec-maven-plugin/commit/97f3161f6b79b5d12def13098fa56e9704c8e171"><code>97f3161</code></a> Bump project version to 3.2.0-SNAPSHOT</li>
<li><a href="https://github.com/mojohaus/exec-maven-plugin/commit/594cb0f66abaa327dc9feaf79c854c493461a301"><code>594cb0f</code></a> ITs improvement</li>
<li><a href="https://github.com/mojohaus/exec-maven-plugin/commit/bf6a0f16ffc011cd90be232ba68ed43891536e66"><code>bf6a0f1</code></a> Fix documentation formatting, add menu items for new examples</li>
<li><a href="https://github.com/mojohaus/exec-maven-plugin/commit/1d90140a1f052f1fba988fe1399644edfc41c182"><code>1d90140</code></a> Execute mexec-137 also on unix family</li>
<li><a href="https://github.com/mojohaus/exec-maven-plugin/commit/a7090d06ffe2fe171d790081b433498feda38d1a"><code>a7090d0</code></a> Fixes <a href="https://redirect.github.com/mojohaus/exec-maven-plugin/issues/408">#408</a>, enable to exec:java runnables and not only mains with loosely cou...</li>
<li>Additional commits viewable in <a href="https://github.com/mojohaus/exec-maven-plugin/compare/3.1.1...3.2.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.codehaus.mojo:exec-maven-plugin&package-manager=maven&previous-version=3.1.1&new-version=3.2.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 java/performance/pom.xml | 2 +-
 java/pom.xml             | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index d572876e724a5..d3bba882a0898 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -139,7 +139,7 @@
             <plugin>
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
-                <version>3.1.1</version>
+                <version>3.2.0</version>
                 <executions>
                     <execution>
                         <id>run-java-benchmarks</id>
diff --git a/java/pom.xml b/java/pom.xml
index 1fd69a12fba56..b3282e3f6ac49 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -1038,7 +1038,7 @@
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>exec-maven-plugin</artifactId>
-            <version>3.1.1</version>
+            <version>3.2.0</version>
             <executions>
               <execution>
                 <id>cdata-cmake</id>
@@ -1099,7 +1099,7 @@
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>exec-maven-plugin</artifactId>
-            <version>3.1.1</version>
+            <version>3.2.0</version>
             <executions>
               <execution>
                 <id>jni-cpp-cmake</id>
@@ -1214,7 +1214,7 @@
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>exec-maven-plugin</artifactId>
-            <version>3.1.1</version>
+            <version>3.2.0</version>
             <executions>
               <execution>
                 <id>jni-cpp-cmake</id>

From 5127ef0369e37cda05cbcfb545319ff52191668d Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 12 Mar 2024 05:59:00 +0900
Subject: [PATCH 507/570] GH-40082: [CI][C++] Add a job on ARM64 macOS (#40456)

### Rationale for this change

We can use GitHub hosted M1 macOS runner.

### What changes are included in this PR?

* Add a job on macos-14
* Update expected L2 CPU cache range to 32KiB-12MiB from 32KiB-8MiB because M1 macOS runner has 12MiB
* Disable arrow-s3fs-test for now. It'll be fixed by GH-40410

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40082

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/cpp.yml          | 22 +++++++++++++++-------
 ci/scripts/cpp_test.sh             |  3 +++
 ci/scripts/install_minio.sh        |  1 +
 cpp/src/arrow/util/io_util_test.cc |  2 +-
 4 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index e9409f1cd6248..a48b5aafab41d 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -181,10 +181,18 @@ jobs:
           docker-compose run --rm minimal
 
   macos:
-    name: AMD64 macOS 12 C++
-    runs-on: macos-latest
+    name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} C++
+    runs-on: macos-${{ matrix.macos-version }}
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 75
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - architecture: AMD64
+            macos-version: "12"
+          - architecture: ARM64
+            macos-version: "14"
     env:
       ARROW_AZURE: ON
       ARROW_BUILD_TESTS: ON
@@ -193,7 +201,7 @@ jobs:
       ARROW_GANDIVA: ON
       ARROW_GCS: ON
       ARROW_HDFS: ON
-      ARROW_HOME: /usr/local
+      ARROW_HOME: /tmp/local
       ARROW_JEMALLOC: ON
       ARROW_ORC: ON
       ARROW_PARQUET: ON
@@ -225,11 +233,11 @@ jobs:
       - name: Install MinIO
         run: |
           $(brew --prefix bash)/bin/bash \
-            ci/scripts/install_minio.sh latest /usr/local
+            ci/scripts/install_minio.sh latest ${ARROW_HOME}
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: 3.9
+          python-version: 3.12
       - name: Install Google Cloud Storage Testbench
         run: ci/scripts/install_gcs_testbench.sh default
       - name: Install Azurite Storage Emulator
@@ -245,8 +253,8 @@ jobs:
         uses: actions/cache@v4
         with:
           path: ${{ steps.ccache-info.outputs.cache-dir }}
-          key: cpp-ccache-macos-${{ hashFiles('cpp/**') }}
-          restore-keys: cpp-ccache-macos-
+          key: cpp-ccache-macos-${{ matrix.macos-version }}-${{ hashFiles('cpp/**') }}
+          restore-keys: cpp-ccache-macos-${{ matrix.macos-version }}-
       - name: Build
         run: |
           ci/scripts/cpp_build.sh $(pwd) $(pwd)/build
diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh
index a23ea8eb1cd34..f388825fd0a98 100755
--- a/ci/scripts/cpp_test.sh
+++ b/ci/scripts/cpp_test.sh
@@ -49,6 +49,9 @@ case "$(uname)" in
     ;;
   Darwin)
     n_jobs=$(sysctl -n hw.ncpu)
+    # TODO: https://github.com/apache/arrow/issues/40410
+    exclude_tests="arrow-s3fs-test"
+    ctest_options+=(--exclude-regex "${exclude_tests}")
     ;;
   MINGW*)
     n_jobs=${NUMBER_OF_PROCESSORS:-1}
diff --git a/ci/scripts/install_minio.sh b/ci/scripts/install_minio.sh
index e493a183b4543..40762c9f32d16 100755
--- a/ci/scripts/install_minio.sh
+++ b/ci/scripts/install_minio.sh
@@ -71,6 +71,7 @@ download()
   local output=$1
   local url=$2
 
+  mkdir -p $(dirname ${output})
   if type wget > /dev/null 2>&1; then
     wget -nv --output-document ${output} ${url}
   else
diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc
index 2599c92d821cf..d0569c799561f 100644
--- a/cpp/src/arrow/util/io_util_test.cc
+++ b/cpp/src/arrow/util/io_util_test.cc
@@ -1065,7 +1065,7 @@ TEST(CpuInfo, Basic) {
   const auto l2 = ci->CacheSize(CpuInfo::CacheLevel::L2);
   const auto l3 = ci->CacheSize(CpuInfo::CacheLevel::L3);
   ASSERT_TRUE(l1 >= 4 * 1024 && l1 <= 512 * 1024) << "unexpected L1 size: " << l1;
-  ASSERT_TRUE(l2 >= 32 * 1024 && l2 <= 8 * 1024 * 1024) << "unexpected L2 size: " << l2;
+  ASSERT_TRUE(l2 >= 32 * 1024 && l2 <= 12 * 1024 * 1024) << "unexpected L2 size: " << l2;
   ASSERT_TRUE(l3 >= 256 * 1024 && l3 <= 1024 * 1024 * 1024)
       << "unexpected L3 size: " << l3;
   ASSERT_LE(l1, l2) << "L1 cache size " << l1 << " larger than L2 " << l2;

From 605f8a792c388afb2230b1f19e0f3e4df90d5abe Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 12 Mar 2024 06:34:58 +0900
Subject: [PATCH 508/570] GH-40028: [C++][FS][Azure] Add AzureFileSystem
 support to FileSystemFromUri() (#40325)

### Rationale for this change

`FileSystemFromUri()` is a common API to create a file system object. `FileSystemFromUri()` should be able to create an `AzureFileSystem` object.

### What changes are included in this PR?

Add `AzureOptions::FromUri()` and use it from `FileSystemFromUri()`.

See the `AzureOptions::FromUri()`'s docstring about the supported formats.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #40028

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/arrow/filesystem/azurefs.cc      | 166 +++++++++++++++++++
 cpp/src/arrow/filesystem/azurefs.h       |  56 +++++++
 cpp/src/arrow/filesystem/azurefs_test.cc | 196 +++++++++++++++++++++++
 cpp/src/arrow/filesystem/filesystem.cc   |  18 ++-
 4 files changed, 433 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index ff078f78aeac0..dd9fb817b7aca 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -65,6 +65,172 @@ AzureOptions::AzureOptions() = default;
 
 AzureOptions::~AzureOptions() = default;
 
+void AzureOptions::ExtractFromUriSchemeAndHierPart(const arrow::internal::Uri& uri,
+                                                   std::string* out_path) {
+  const auto host = uri.host();
+  std::string path;
+  if (arrow::internal::EndsWith(host, blob_storage_authority)) {
+    account_name = host.substr(0, host.size() - blob_storage_authority.size());
+    path = internal::RemoveLeadingSlash(uri.path());
+  } else if (arrow::internal::EndsWith(host, dfs_storage_authority)) {
+    account_name = host.substr(0, host.size() - dfs_storage_authority.size());
+    path = internal::ConcatAbstractPath(uri.username(), uri.path());
+  } else {
+    account_name = uri.username();
+    const auto port_text = uri.port_text();
+    if (host.find(".") == std::string::npos && port_text.empty()) {
+      // abfs://container/dir/file
+      path = internal::ConcatAbstractPath(host, uri.path());
+    } else {
+      // abfs://host.domain/container/dir/file
+      // abfs://host.domain:port/container/dir/file
+      // abfs://host:port/container/dir/file
+      std::string host_port = host;
+      if (!port_text.empty()) {
+        host_port += ":" + port_text;
+      }
+      blob_storage_authority = host_port;
+      dfs_storage_authority = host_port;
+      path = internal::RemoveLeadingSlash(uri.path());
+    }
+  }
+  if (out_path != nullptr) {
+    *out_path = path;
+  }
+}
+
+Status AzureOptions::ExtractFromUriQuery(const arrow::internal::Uri& uri) {
+  const auto account_key = uri.password();
+  std::optional<CredentialKind> credential_kind;
+  std::optional<std::string> credential_kind_value;
+  std::string tenant_id;
+  std::string client_id;
+  std::string client_secret;
+  ARROW_ASSIGN_OR_RAISE(const auto options_items, uri.query_items());
+  for (const auto& kv : options_items) {
+    if (kv.first == "blob_storage_authority") {
+      blob_storage_authority = kv.second;
+    } else if (kv.first == "dfs_storage_authority") {
+      dfs_storage_authority = kv.second;
+    } else if (kv.first == "credential_kind") {
+      if (kv.second == "default") {
+        credential_kind = CredentialKind::kDefault;
+      } else if (kv.second == "anonymous") {
+        credential_kind = CredentialKind::kAnonymous;
+      } else if (kv.second == "workload_identity") {
+        credential_kind = CredentialKind::kWorkloadIdentity;
+      } else {
+        // Other credential kinds should be inferred from the given
+        // parameters automatically.
+        return Status::Invalid("Unexpected credential_kind: '", kv.second, "'");
+      }
+      credential_kind_value = kv.second;
+    } else if (kv.first == "tenant_id") {
+      tenant_id = kv.second;
+    } else if (kv.first == "client_id") {
+      client_id = kv.second;
+    } else if (kv.first == "client_secret") {
+      client_secret = kv.second;
+    } else if (kv.first == "enable_tls") {
+      ARROW_ASSIGN_OR_RAISE(auto enable_tls, ::arrow::internal::ParseBoolean(kv.second));
+      if (enable_tls) {
+        blob_storage_scheme = "https";
+        dfs_storage_scheme = "https";
+      } else {
+        blob_storage_scheme = "http";
+        dfs_storage_scheme = "http";
+      }
+    } else {
+      return Status::Invalid(
+          "Unexpected query parameter in Azure Blob File System URI: '", kv.first, "'");
+    }
+  }
+
+  if (credential_kind) {
+    if (!account_key.empty()) {
+      return Status::Invalid("Password must not be specified with credential_kind=",
+                             *credential_kind_value);
+    }
+    if (!tenant_id.empty()) {
+      return Status::Invalid("tenant_id must not be specified with credential_kind=",
+                             *credential_kind_value);
+    }
+    if (!client_id.empty()) {
+      return Status::Invalid("client_id must not be specified with credential_kind=",
+                             *credential_kind_value);
+    }
+    if (!client_secret.empty()) {
+      return Status::Invalid("client_secret must not be specified with credential_kind=",
+                             *credential_kind_value);
+    }
+
+    switch (*credential_kind) {
+      case CredentialKind::kAnonymous:
+        RETURN_NOT_OK(ConfigureAnonymousCredential());
+        break;
+      case CredentialKind::kWorkloadIdentity:
+        RETURN_NOT_OK(ConfigureWorkloadIdentityCredential());
+        break;
+      default:
+        // Default credential
+        break;
+    }
+  } else {
+    if (!account_key.empty()) {
+      // With password
+      if (!tenant_id.empty()) {
+        return Status::Invalid("tenant_id must not be specified with password");
+      }
+      if (!client_id.empty()) {
+        return Status::Invalid("client_id must not be specified with password");
+      }
+      if (!client_secret.empty()) {
+        return Status::Invalid("client_secret must not be specified with password");
+      }
+      RETURN_NOT_OK(ConfigureAccountKeyCredential(account_key));
+    } else {
+      // Without password
+      if (tenant_id.empty() && client_id.empty() && client_secret.empty()) {
+        // No related parameters
+        if (account_name.empty()) {
+          RETURN_NOT_OK(ConfigureAnonymousCredential());
+        } else {
+          // Default credential
+        }
+      } else {
+        // One or more tenant_id, client_id or client_secret are specified
+        if (client_id.empty()) {
+          return Status::Invalid("client_id must be specified");
+        }
+        if (tenant_id.empty() && client_secret.empty()) {
+          RETURN_NOT_OK(ConfigureManagedIdentityCredential(client_id));
+        } else if (!tenant_id.empty() && !client_secret.empty()) {
+          RETURN_NOT_OK(
+              ConfigureClientSecretCredential(tenant_id, client_id, client_secret));
+        } else {
+          return Status::Invalid("Both of tenant_id and client_secret must be specified");
+        }
+      }
+    }
+  }
+  return Status::OK();
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const arrow::internal::Uri& uri,
+                                           std::string* out_path) {
+  AzureOptions options;
+  options.ExtractFromUriSchemeAndHierPart(uri, out_path);
+  RETURN_NOT_OK(options.ExtractFromUriQuery(uri));
+  return options;
+}
+
+Result<AzureOptions> AzureOptions::FromUri(const std::string& uri_string,
+                                           std::string* out_path) {
+  arrow::internal::Uri uri;
+  RETURN_NOT_OK(uri.Parse(uri_string));
+  return FromUri(uri, out_path);
+}
+
 bool AzureOptions::Equals(const AzureOptions& other) const {
   // TODO(GH-38598): update here when more auth methods are added.
   const bool equals = blob_storage_authority == other.blob_storage_authority &&
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 2a131e40c05bf..6218bf574e8dd 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -45,6 +45,7 @@ class DataLakeServiceClient;
 namespace arrow::fs {
 
 class TestAzureFileSystem;
+class TestAzureOptions;
 
 /// Options for the AzureFileSystem implementation.
 ///
@@ -59,6 +60,8 @@ class TestAzureFileSystem;
 ///
 /// Functions are provided for explicit configuration of credentials if that is preferred.
 struct ARROW_EXPORT AzureOptions {
+  friend class TestAzureOptions;
+
   /// \brief The name of the Azure Storage Account being accessed.
   ///
   /// All service URLs will be constructed using this storage account name.
@@ -123,6 +126,59 @@ struct ARROW_EXPORT AzureOptions {
   AzureOptions();
   ~AzureOptions();
 
+ private:
+  void ExtractFromUriSchemeAndHierPart(const arrow::internal::Uri& uri,
+                                       std::string* out_path);
+  Status ExtractFromUriQuery(const arrow::internal::Uri& uri);
+
+ public:
+  /// \brief Construct a new AzureOptions from an URI.
+  ///
+  /// Supported formats:
+  ///
+  /// 1. abfs[s]://[:\<password\>@]\<account\>.blob.core.windows.net
+  ///    [/\<container\>[/\<path\>]]
+  /// 2. abfs[s]://\<container\>[:\<password\>]@\<account\>.dfs.core.windows.net
+  ///     [/path]
+  /// 3. abfs[s]://[\<account[:\<password\>]@]\<host[.domain]\>[\<:port\>]
+  ///    [/\<container\>[/path]]
+  /// 4. abfs[s]://[\<account[:\<password\>]@]\<container\>[/path]
+  ///
+  /// 1. and 2. are compatible with the Azure Data Lake Storage Gen2 URIs:
+  /// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri
+  ///
+  /// 3. is for Azure Blob Storage compatible service including Azurite.
+  ///
+  /// 4. is a shorter version of 1. and 2.
+  ///
+  /// Note that there is no difference between abfs and abfss. HTTPS is
+  /// used with abfs by default. You can force to use HTTP by specifying
+  /// "enable_tls=false" query.
+  ///
+  /// Supported query parameters:
+  ///
+  /// * blob_storage_authority: Set AzureOptions::blob_storage_authority
+  /// * dfs_storage_authority: Set AzureOptions::dfs_storage_authority
+  /// * enable_tls: If it's "false" or "0", HTTP not HTTPS is used.
+  /// * credential_kind: One of "default", "anonymous",
+  ///   "workload_identity". If "default" is specified, it's just
+  ///   ignored.  If "anonymous" is specified,
+  ///   AzureOptions::ConfigureAnonymousCredential() is called. If
+  ///   "workload_identity" is specified,
+  ///   AzureOptions::ConfigureWorkloadIdentityCredential() is called.
+  /// * tenant_id: You must specify "client_id" and "client_secret"
+  ///   too. AzureOptions::ConfigureClientSecretCredential() is called.
+  /// * client_id: If you don't specify "tenant_id" and
+  ///   "client_secret",
+  ///   AzureOptions::ConfigureManagedIdentityCredential() is
+  ///   called. If you specify "tenant_id" and "client_secret" too,
+  ///   AzureOptions::ConfigureClientSecretCredential() is called.
+  /// * client_secret: You must specify "tenant_id" and "client_id"
+  ///   too. AzureOptions::ConfigureClientSecretCredential() is called.
+  static Result<AzureOptions> FromUri(const arrow::internal::Uri& uri,
+                                      std::string* out_path);
+  static Result<AzureOptions> FromUri(const std::string& uri, std::string* out_path);
+
   Status ConfigureDefaultCredential();
   Status ConfigureAnonymousCredential();
   Status ConfigureAccountKeyCredential(const std::string& account_key);
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index f21876f03cc95..0ce84043a537c 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -336,6 +336,202 @@ TEST(AzureFileSystem, OptionsCompare) {
   EXPECT_TRUE(options.Equals(options));
 }
 
+class TestAzureOptions : public ::testing::Test {
+ public:
+  void TestFromUriBlobStorage() {
+    AzureOptions default_options;
+    std::string path;
+    ASSERT_OK_AND_ASSIGN(
+        auto options,
+        AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob",
+                              &path));
+    ASSERT_EQ(options.account_name, "account");
+    ASSERT_EQ(options.blob_storage_authority, default_options.blob_storage_authority);
+    ASSERT_EQ(options.dfs_storage_authority, default_options.dfs_storage_authority);
+    ASSERT_EQ(options.blob_storage_scheme, default_options.blob_storage_scheme);
+    ASSERT_EQ(options.dfs_storage_scheme, default_options.dfs_storage_scheme);
+    ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kDefault);
+    ASSERT_EQ(path, "container/dir/blob");
+  }
+
+  void TestFromUriDfsStorage() {
+    AzureOptions default_options;
+    std::string path;
+    ASSERT_OK_AND_ASSIGN(
+        auto options,
+        AzureOptions::FromUri("abfs://file_system@account.dfs.core.windows.net/dir/file",
+                              &path));
+    ASSERT_EQ(options.account_name, "account");
+    ASSERT_EQ(options.blob_storage_authority, default_options.blob_storage_authority);
+    ASSERT_EQ(options.dfs_storage_authority, default_options.dfs_storage_authority);
+    ASSERT_EQ(options.blob_storage_scheme, default_options.blob_storage_scheme);
+    ASSERT_EQ(options.dfs_storage_scheme, default_options.dfs_storage_scheme);
+    ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kDefault);
+    ASSERT_EQ(path, "file_system/dir/file");
+  }
+
+  void TestFromUriAbfs() {
+    std::string path;
+    ASSERT_OK_AND_ASSIGN(
+        auto options,
+        AzureOptions::FromUri(
+            "abfs://account:password@127.0.0.1:10000/container/dir/blob", &path));
+    ASSERT_EQ(options.account_name, "account");
+    ASSERT_EQ(options.blob_storage_authority, "127.0.0.1:10000");
+    ASSERT_EQ(options.dfs_storage_authority, "127.0.0.1:10000");
+    ASSERT_EQ(options.blob_storage_scheme, "https");
+    ASSERT_EQ(options.dfs_storage_scheme, "https");
+    ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kStorageSharedKey);
+    ASSERT_EQ(path, "container/dir/blob");
+  }
+
+  void TestFromUriAbfss() {
+    std::string path;
+    ASSERT_OK_AND_ASSIGN(
+        auto options,
+        AzureOptions::FromUri(
+            "abfss://account:password@127.0.0.1:10000/container/dir/blob", &path));
+    ASSERT_EQ(options.account_name, "account");
+    ASSERT_EQ(options.blob_storage_authority, "127.0.0.1:10000");
+    ASSERT_EQ(options.dfs_storage_authority, "127.0.0.1:10000");
+    ASSERT_EQ(options.blob_storage_scheme, "https");
+    ASSERT_EQ(options.dfs_storage_scheme, "https");
+    ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kStorageSharedKey);
+    ASSERT_EQ(path, "container/dir/blob");
+  }
+
+  void TestFromUriEnableTls() {
+    std::string path;
+    ASSERT_OK_AND_ASSIGN(auto options,
+                         AzureOptions::FromUri(
+                             "abfs://account:password@127.0.0.1:10000/container/dir/blob?"
+                             "enable_tls=false",
+                             &path));
+    ASSERT_EQ(options.account_name, "account");
+    ASSERT_EQ(options.blob_storage_authority, "127.0.0.1:10000");
+    ASSERT_EQ(options.dfs_storage_authority, "127.0.0.1:10000");
+    ASSERT_EQ(options.blob_storage_scheme, "http");
+    ASSERT_EQ(options.dfs_storage_scheme, "http");
+    ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kStorageSharedKey);
+    ASSERT_EQ(path, "container/dir/blob");
+  }
+
+  void TestFromUriCredentialDefault() {
+    ASSERT_OK_AND_ASSIGN(
+        auto options,
+        AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?"
+                              "credential_kind=default",
+                              nullptr));
+    ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kDefault);
+  }
+
+  void TestFromUriCredentialAnonymous() {
+    ASSERT_OK_AND_ASSIGN(
+        auto options,
+        AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?"
+                              "credential_kind=anonymous",
+                              nullptr));
+    ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kAnonymous);
+  }
+
+  void TestFromUriCredentialStorageSharedKey() {
+    ASSERT_OK_AND_ASSIGN(
+        auto options,
+        AzureOptions::FromUri(
+            "abfs://:password@account.blob.core.windows.net/container/dir/blob",
+            nullptr));
+    ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kStorageSharedKey);
+  }
+
+  void TestFromUriCredentialClientSecret() {
+    ASSERT_OK_AND_ASSIGN(
+        auto options,
+        AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?"
+                              "tenant_id=tenant-id&"
+                              "client_id=client-id&"
+                              "client_secret=client-secret",
+                              nullptr));
+    ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kClientSecret);
+  }
+
+  void TestFromUriCredentialManagedIdentity() {
+    ASSERT_OK_AND_ASSIGN(
+        auto options,
+        AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?"
+                              "client_id=client-id",
+                              nullptr));
+    ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kManagedIdentity);
+  }
+
+  void TestFromUriCredentialWorkloadIdentity() {
+    ASSERT_OK_AND_ASSIGN(
+        auto options,
+        AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?"
+                              "credential_kind=workload_identity",
+                              nullptr));
+    ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kWorkloadIdentity);
+  }
+
+  void TestFromUriCredentialInvalid() {
+    ASSERT_RAISES(Invalid, AzureOptions::FromUri(
+                               "abfs://file_system@account.dfs.core.windows.net/dir/file?"
+                               "credential_kind=invalid",
+                               nullptr));
+  }
+  void TestFromUriBlobStorageAuthority() {
+    ASSERT_OK_AND_ASSIGN(
+        auto options,
+        AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?"
+                              "blob_storage_authority=.blob.local",
+                              nullptr));
+    ASSERT_EQ(options.blob_storage_authority, ".blob.local");
+  }
+
+  void TestFromUriDfsStorageAuthority() {
+    ASSERT_OK_AND_ASSIGN(
+        auto options,
+        AzureOptions::FromUri("abfs://file_system@account.dfs.core.windows.net/dir/file?"
+                              "dfs_storage_authority=.dfs.local",
+                              nullptr));
+    ASSERT_EQ(options.dfs_storage_authority, ".dfs.local");
+  }
+
+  void TestFromUriInvalidQueryParameter() {
+    ASSERT_RAISES(Invalid, AzureOptions::FromUri(
+                               "abfs://file_system@account.dfs.core.windows.net/dir/file?"
+                               "unknown=invalid",
+                               nullptr));
+  }
+};
+
+TEST_F(TestAzureOptions, FromUriBlobStorage) { TestFromUriBlobStorage(); }
+TEST_F(TestAzureOptions, FromUriDfsStorage) { TestFromUriDfsStorage(); }
+TEST_F(TestAzureOptions, FromUriAbfs) { TestFromUriAbfs(); }
+TEST_F(TestAzureOptions, FromUriAbfss) { TestFromUriAbfss(); }
+TEST_F(TestAzureOptions, FromUriEnableTls) { TestFromUriEnableTls(); }
+TEST_F(TestAzureOptions, FromUriCredentialDefault) { TestFromUriCredentialDefault(); }
+TEST_F(TestAzureOptions, FromUriCredentialAnonymous) { TestFromUriCredentialAnonymous(); }
+TEST_F(TestAzureOptions, FromUriCredentialStorageSharedKey) {
+  TestFromUriCredentialStorageSharedKey();
+}
+TEST_F(TestAzureOptions, FromUriCredentialClientSecret) {
+  TestFromUriCredentialClientSecret();
+}
+TEST_F(TestAzureOptions, FromUriCredentialManagedIdentity) {
+  TestFromUriCredentialManagedIdentity();
+}
+TEST_F(TestAzureOptions, FromUriCredentialWorkloadIdentity) {
+  TestFromUriCredentialWorkloadIdentity();
+}
+TEST_F(TestAzureOptions, FromUriCredentialInvalid) { TestFromUriCredentialInvalid(); }
+TEST_F(TestAzureOptions, FromUriBlobStorageAuthority) {
+  TestFromUriBlobStorageAuthority();
+}
+TEST_F(TestAzureOptions, FromUriDfsStorageAuthority) { TestFromUriDfsStorageAuthority(); }
+TEST_F(TestAzureOptions, FromUriInvalidQueryParameter) {
+  TestFromUriInvalidQueryParameter();
+}
+
 struct PreexistingData {
  public:
   using RNG = random::pcg32_fast;
diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc
index 810e9c179b156..1fb74d412988d 100644
--- a/cpp/src/arrow/filesystem/filesystem.cc
+++ b/cpp/src/arrow/filesystem/filesystem.cc
@@ -21,12 +21,15 @@
 #include "arrow/util/config.h"
 
 #include "arrow/filesystem/filesystem.h"
-#ifdef ARROW_HDFS
-#include "arrow/filesystem/hdfs.h"
+#ifdef ARROW_AZURE
+#include "arrow/filesystem/azurefs.h"
 #endif
 #ifdef ARROW_GCS
 #include "arrow/filesystem/gcsfs.h"
 #endif
+#ifdef ARROW_HDFS
+#include "arrow/filesystem/hdfs.h"
+#endif
 #ifdef ARROW_S3
 #include "arrow/filesystem/s3fs.h"
 #endif
@@ -690,6 +693,16 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,
     }
     return std::make_shared<LocalFileSystem>(options, io_context);
   }
+  if (scheme == "abfs" || scheme == "abfss") {
+#ifdef ARROW_AZURE
+    ARROW_ASSIGN_OR_RAISE(auto options, AzureOptions::FromUri(uri, out_path));
+    return AzureFileSystem::Make(options, io_context);
+#else
+    return Status::NotImplemented(
+        "Got Azure Blob File System URI but Arrow compiled without Azure Blob File "
+        "System support");
+#endif
+  }
   if (scheme == "gs" || scheme == "gcs") {
 #ifdef ARROW_GCS
     ARROW_ASSIGN_OR_RAISE(auto options, GcsOptions::FromUri(uri, out_path));
@@ -698,7 +711,6 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,
     return Status::NotImplemented("Got GCS URI but Arrow compiled without GCS support");
 #endif
   }
-
   if (scheme == "hdfs" || scheme == "viewfs") {
 #ifdef ARROW_HDFS
     ARROW_ASSIGN_OR_RAISE(auto options, HdfsOptions::FromUri(uri));

From b202ede131e3c54628616330162f7854ba0c0d70 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Tue, 12 Mar 2024 11:50:09 +0100
Subject: [PATCH 509/570] GH-40458: [Release][Docs] Changes for version and
 warning banner should not affect minor releases (#40459)

This PR adds an if case to me make sure the change in version and warning banner only affects major releases.
* GitHub Issue: #40458

Authored-by: AlenkaF <frim.alenka@gmail.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 dev/release/post-08-docs.sh | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/dev/release/post-08-docs.sh b/dev/release/post-08-docs.sh
index 4df574700e812..1e457c95c033c 100755
--- a/dev/release/post-08-docs.sh
+++ b/dev/release/post-08-docs.sh
@@ -89,20 +89,22 @@ git commit -m "[Website] Update documentations for ${version}"
 
 # Update DOCUMENTATION_OPTIONS.theme_switcher_version_match and
 # DOCUMENTATION_OPTIONS.show_version_warning_banner
-pushd docs/${previous_series}
-find ./ \
-  -type f \
-  -exec \
-    sed -i.bak \
-      -e "s/DOCUMENTATION_OPTIONS.theme_switcher_version_match = '';/DOCUMENTATION_OPTIONS.theme_switcher_version_match = '${previous_version}';/g" \
-      -e "s/DOCUMENTATION_OPTIONS.show_version_warning_banner = false/DOCUMENTATION_OPTIONS.show_version_warning_banner = true/g" \
-      {} \;
-find ./ -name '*.bak' -delete
-popd
-git add docs/${previous_series}
-git commit -m "[Website] Update warning banner for ${previous_series}"
-git clean -d -f -x
-popd
+if [ "$is_major_release" = "yes" ] ; then
+  pushd docs/${previous_series}
+  find ./ \
+    -type f \
+    -exec \
+      sed -i.bak \
+        -e "s/DOCUMENTATION_OPTIONS.theme_switcher_version_match = '';/DOCUMENTATION_OPTIONS.theme_switcher_version_match = '${previous_series}';/g" \
+        -e "s/DOCUMENTATION_OPTIONS.show_version_warning_banner = false/DOCUMENTATION_OPTIONS.show_version_warning_banner = true/g" \
+        {} \;
+  find ./ -name '*.bak' -delete
+  popd
+  git add docs/${previous_series}
+  git commit -m "[Website] Update warning banner for ${previous_series}"
+  git clean -d -f -x
+  popd
+fi
 
 : ${PUSH:=1}
 

From 6121b3fd0609b1e2d282ca573d5154a0c0ebeac5 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Tue, 12 Mar 2024 12:45:34 +0100
Subject: [PATCH 510/570] GH-40485: [Python][CI] Skip failing
 test_dateutil_tzinfo_to_string (#40486)

* GitHub Issue: #40485

Authored-by: AlenkaF <frim.alenka@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/tests/test_types.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index 13f6d83e80a46..21b3829803487 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -346,6 +346,11 @@ def test_pytz_tzinfo_to_string():
 
 
 def test_dateutil_tzinfo_to_string():
+    if sys.platform == 'win32':
+        # Skip due to new release of python-dateutil
+        # https://github.com/apache/arrow/issues/40485
+        pytest.skip('Skip on Win due to new release of python-dateutil')
+
     pytest.importorskip("dateutil")
     import dateutil.tz
 

From 0ce72675f4dd755b2696eb6597850b21df647bb8 Mon Sep 17 00:00:00 2001
From: Rossi Sun <zanmato1984@gmail.com>
Date: Tue, 12 Mar 2024 21:57:35 +0800
Subject: [PATCH 511/570] GH-20339: [C++] Add residual filter support to swiss
 join (#39487)

### Rationale for this change

Add residual filter support to swiss join.

### What changes are included in this PR?

1. Added class `JoinResidualFilter` as a centralized structure to evaluate residual filter in swiss join. It has various flavors of filtering for various join types. Zero-overhead is guaranteed for trivial filters (literal true and sometimes literal false/null). More detailed explanation in code comments.
2. Tuned the structure of swiss join main body (`JoinProbeProcessor::OnNextBatch`) to better cope with `JoinResidualFilter` calls.

### Are these changes tested?

Legacy UTs (`HashJoin.Random`, `HashJoin.ResidualFilter` and `HashJoin.TrivialResidualFilter`) cover part of this change. New fine-grained residual filter cases added as well.

### Are there any user-facing changes?

No.

* Closes: #20339

Lead-authored-by: zanmato <zanmato1984@gmail.com>
Co-authored-by: zanmato1984 <zanmato1984@gmail.com>
Co-authored-by: Ruoxi Sun <zanmato1984@gmail.com>
Co-authored-by: Rossi Sun <zanmato1984@gmail.com>
Signed-off-by: Weston Pace <weston.pace@gmail.com>
---
 cpp/src/arrow/acero/hash_join_benchmark.cc |  197 +++-
 cpp/src/arrow/acero/hash_join_node.cc      |    6 +-
 cpp/src/arrow/acero/hash_join_node_test.cc | 1187 ++++++++++++++++++--
 cpp/src/arrow/acero/swiss_join.cc          |  606 ++++++++--
 cpp/src/arrow/acero/swiss_join_internal.h  |  157 ++-
 5 files changed, 1975 insertions(+), 178 deletions(-)

diff --git a/cpp/src/arrow/acero/hash_join_benchmark.cc b/cpp/src/arrow/acero/hash_join_benchmark.cc
index 9be4bed606553..993c0b9a705b4 100644
--- a/cpp/src/arrow/acero/hash_join_benchmark.cc
+++ b/cpp/src/arrow/acero/hash_join_benchmark.cc
@@ -51,6 +51,10 @@ struct BenchmarkSettings {
   double null_percentage = 0.0;
   double cardinality = 1.0;  // Proportion of distinct keys in build side
   double selectivity = 1.0;  // Probability of a match for a given row
+  int var_length_min = 2;    // Minimal length of any var length types
+  int var_length_max = 20;   // Maximum length of any var length types
+
+  Expression residual_filter = literal(true);
 };
 
 class JoinBenchmark {
@@ -79,8 +83,8 @@ class JoinBenchmark {
       build_metadata["null_probability"] = std::to_string(settings.null_percentage);
       build_metadata["min"] = std::to_string(min_build_value);
       build_metadata["max"] = std::to_string(max_build_value);
-      build_metadata["min_length"] = "2";
-      build_metadata["max_length"] = "20";
+      build_metadata["min_length"] = settings.var_length_min;
+      build_metadata["max_length"] = settings.var_length_max;
 
       std::unordered_map<std::string, std::string> probe_metadata;
       probe_metadata["null_probability"] = std::to_string(settings.null_percentage);
@@ -126,10 +130,9 @@ class JoinBenchmark {
     stats_.num_probe_rows = settings.num_probe_batches * settings.batch_size;
 
     schema_mgr_ = std::make_unique<HashJoinSchema>();
-    Expression filter = literal(true);
     DCHECK_OK(schema_mgr_->Init(settings.join_type, *l_batches_with_schema.schema,
                                 left_keys, *r_batches_with_schema.schema, right_keys,
-                                filter, "l_", "r_"));
+                                settings.residual_filter, "l_", "r_"));
 
     if (settings.use_basic_implementation) {
       join_ = *HashJoinImpl::MakeBasic();
@@ -158,7 +161,7 @@ class JoinBenchmark {
 
     DCHECK_OK(join_->Init(
         &ctx_, settings.join_type, settings.num_threads, &(schema_mgr_->proj_maps[0]),
-        &(schema_mgr_->proj_maps[1]), std::move(key_cmp), std::move(filter),
+        &(schema_mgr_->proj_maps[1]), std::move(key_cmp), settings.residual_filter,
         std::move(register_task_group_callback), std::move(start_task_group_callback),
         [](int64_t, ExecBatch) { return Status::OK(); },
         [](int64_t) { return Status::OK(); }));
@@ -308,6 +311,60 @@ static void BM_HashJoinBasic_NullPercentage(benchmark::State& st) {
 
   HashJoinBasicBenchmarkImpl(st, settings);
 }
+
+template <typename... Args>
+static void BM_HashJoinBasic_TrivialResidualFilter(benchmark::State& st,
+                                                   JoinType join_type,
+                                                   Expression residual_filter,
+                                                   Args&&...) {
+  BenchmarkSettings settings;
+  settings.join_type = join_type;
+  settings.build_payload_types = {binary()};
+  settings.probe_payload_types = {binary()};
+
+  settings.use_basic_implementation = st.range(0);
+
+  settings.num_build_batches = 1024;
+  settings.num_probe_batches = 1024;
+
+  // Let payload column length from 1 to 100.
+  settings.var_length_min = 1;
+  settings.var_length_max = 100;
+
+  settings.residual_filter = std::move(residual_filter);
+
+  HashJoinBasicBenchmarkImpl(st, settings);
+}
+
+template <typename... Args>
+static void BM_HashJoinBasic_ComplexResidualFilter(benchmark::State& st,
+                                                   JoinType join_type, Args&&...) {
+  BenchmarkSettings settings;
+  settings.join_type = join_type;
+  settings.build_payload_types = {binary()};
+  settings.probe_payload_types = {binary()};
+
+  settings.use_basic_implementation = st.range(0);
+
+  settings.num_build_batches = 1024;
+  settings.num_probe_batches = 1024;
+
+  // Let payload column length from 1 to 100.
+  settings.var_length_min = 1;
+  settings.var_length_max = 100;
+
+  // Create filter referring payload columns from both sides.
+  // binary_length(probe_payload) + binary_length(build_payload) <= 2 * selectivity
+  settings.selectivity = static_cast<double>(st.range(1)) / 100.0;
+  using arrow::compute::call;
+  using arrow::compute::field_ref;
+  settings.residual_filter =
+      call("less_equal", {call("plus", {call("binary_length", {field_ref("lp0")}),
+                                        call("binary_length", {field_ref("rp0")})}),
+                          literal(2 * settings.selectivity)});
+
+  HashJoinBasicBenchmarkImpl(st, settings);
+}
 #endif
 
 std::vector<int64_t> hashtable_krows = benchmark::CreateRange(1, 4096, 8);
@@ -435,6 +492,136 @@ BENCHMARK(BM_HashJoinBasic_BuildParallelism)
 BENCHMARK(BM_HashJoinBasic_NullPercentage)
     ->ArgNames({"Null Percentage"})
     ->DenseRange(0, 100, 10);
+
+const char* use_basic_argname = "Use basic";
+std::vector<int64_t> use_basic_arg = benchmark::CreateDenseRange(0, 1, 1);
+
+std::vector<std::string> trivial_residual_filter_argnames = {use_basic_argname};
+std::vector<std::vector<int64_t>> trivial_residual_filter_args = {use_basic_arg};
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Inner/Literal(true)",
+                  JoinType::INNER, literal(true))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Left Semi/Literal(true)",
+                  JoinType::LEFT_SEMI, literal(true))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Right Semi/Literal(true)",
+                  JoinType::RIGHT_SEMI, literal(true))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Left Anti/Literal(true)",
+                  JoinType::LEFT_ANTI, literal(true))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Right Anti/Literal(true)",
+                  JoinType::RIGHT_ANTI, literal(true))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Left Outer/Literal(true)",
+                  JoinType::LEFT_OUTER, literal(true))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Right Outer/Literal(true)",
+                  JoinType::RIGHT_OUTER, literal(true))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Full Outer/Literal(true)",
+                  JoinType::FULL_OUTER, literal(true))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Inner/Literal(false)",
+                  JoinType::INNER, literal(false))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Left Semi/Literal(false)",
+                  JoinType::LEFT_SEMI, literal(false))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Right Semi/Literal(false)",
+                  JoinType::RIGHT_SEMI, literal(false))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Left Anti/Literal(false)",
+                  JoinType::LEFT_ANTI, literal(false))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Right Anti/Literal(false)",
+                  JoinType::RIGHT_ANTI, literal(false))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Left Outer/Literal(false)",
+                  JoinType::LEFT_OUTER, literal(false))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Right Outer/Literal(false)",
+                  JoinType::RIGHT_OUTER, literal(false))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_TrivialResidualFilter, "Full Outer/Literal(false)",
+                  JoinType::FULL_OUTER, literal(false))
+    ->ArgNames(trivial_residual_filter_argnames)
+    ->ArgsProduct(trivial_residual_filter_args);
+
+std::vector<std::string> complex_residual_filter_argnames = {use_basic_argname,
+                                                             "Selectivity"};
+std::vector<std::vector<int64_t>> complex_residual_filter_args = {
+    use_basic_arg, benchmark::CreateDenseRange(0, 100, 20)};
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_ComplexResidualFilter, "Inner", JoinType::INNER)
+    ->ArgNames(complex_residual_filter_argnames)
+    ->ArgsProduct(complex_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_ComplexResidualFilter, "Left Semi",
+                  JoinType::LEFT_SEMI)
+    ->ArgNames(complex_residual_filter_argnames)
+    ->ArgsProduct(complex_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_ComplexResidualFilter, "Right Semi",
+                  JoinType::RIGHT_SEMI)
+    ->ArgNames(complex_residual_filter_argnames)
+    ->ArgsProduct(complex_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_ComplexResidualFilter, "Left Anti",
+                  JoinType::LEFT_ANTI)
+    ->ArgNames(complex_residual_filter_argnames)
+    ->ArgsProduct(complex_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_ComplexResidualFilter, "Right Anti",
+                  JoinType::RIGHT_ANTI)
+    ->ArgNames(complex_residual_filter_argnames)
+    ->ArgsProduct(complex_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_ComplexResidualFilter, "Left Outer",
+                  JoinType::LEFT_OUTER)
+    ->ArgNames(complex_residual_filter_argnames)
+    ->ArgsProduct(complex_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_ComplexResidualFilter, "Right Outer",
+                  JoinType::RIGHT_OUTER)
+    ->ArgNames(complex_residual_filter_argnames)
+    ->ArgsProduct(complex_residual_filter_args);
+
+BENCHMARK_CAPTURE(BM_HashJoinBasic_ComplexResidualFilter, "Full Outer",
+                  JoinType::FULL_OUTER)
+    ->ArgNames(complex_residual_filter_argnames)
+    ->ArgsProduct(complex_residual_filter_args);
 #else
 
 BENCHMARK_CAPTURE(BM_HashJoinBasic_KeyTypes, "{int32}", {int32()})
diff --git a/cpp/src/arrow/acero/hash_join_node.cc b/cpp/src/arrow/acero/hash_join_node.cc
index 254dad361ff87..c0179fd160e4e 100644
--- a/cpp/src/arrow/acero/hash_join_node.cc
+++ b/cpp/src/arrow/acero/hash_join_node.cc
@@ -740,13 +740,11 @@ class HashJoinNode : public ExecNode, public TracedNode {
     // Create hash join implementation object
     // SwissJoin does not support:
     // a) 64-bit string offsets
-    // b) residual predicates
-    // c) dictionaries
+    // b) dictionaries
     //
     bool use_swiss_join;
 #if ARROW_LITTLE_ENDIAN
-    use_swiss_join = (filter == literal(true)) && !schema_mgr->HasDictionaries() &&
-                     !schema_mgr->HasLargeBinary();
+    use_swiss_join = !schema_mgr->HasDictionaries() && !schema_mgr->HasLargeBinary();
 #else
     use_swiss_join = false;
 #endif
diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc
index 58551f4eca00a..63969d9a3ed4b 100644
--- a/cpp/src/arrow/acero/hash_join_node_test.cc
+++ b/cpp/src/arrow/acero/hash_join_node_test.cc
@@ -1893,58 +1893,147 @@ TEST(HashJoin, CheckHashJoinNodeOptionsValidation) {
   }
 }
 
-TEST(HashJoin, ResidualFilter) {
-  for (bool parallel : {false, true}) {
-    SCOPED_TRACE(parallel ? "parallel/merged" : "serial");
-
-    BatchesWithSchema input_left;
-    input_left.batches = {ExecBatchFromJSON({int32(), int32(), utf8()}, R"([
-                   [1, 6, "alpha"],
-                   [2, 5, "beta"],
-                   [3, 4, "alpha"]
-                 ])")};
-    input_left.schema =
-        schema({field("l1", int32()), field("l2", int32()), field("l_str", utf8())});
-
-    BatchesWithSchema input_right;
-    input_right.batches = {ExecBatchFromJSON({int32(), int32(), utf8()}, R"([
-                   [5, 11, "alpha"],
-                   [2, 12, "beta"],
-                   [4, 16, "alpha"]
-                 ])")};
-    input_right.schema =
-        schema({field("r1", int32()), field("r2", int32()), field("r_str", utf8())});
+class ResidualFilterCaseRunner {
+ public:
+  ResidualFilterCaseRunner(BatchesWithSchema left_input, BatchesWithSchema right_input)
+      : left_input_(std::move(left_input)), right_input_(std::move(right_input)) {}
+
+  void Run(JoinType join_type, std::vector<FieldRef> left_keys,
+           std::vector<FieldRef> right_keys, Expression filter,
+           const std::vector<ExecBatch>& expected) const {
+    RunInternal(HashJoinNodeOptions{join_type, std::move(left_keys),
+                                    std::move(right_keys), std::move(filter)},
+                expected);
+  }
+
+  void Run(JoinType join_type, std::vector<FieldRef> left_keys,
+           std::vector<FieldRef> right_keys, std::vector<FieldRef> left_output,
+           std::vector<FieldRef> right_output, Expression filter,
+           const std::vector<ExecBatch>& expected) const {
+    RunInternal(HashJoinNodeOptions{join_type, std::move(left_keys),
+                                    std::move(right_keys), std::move(left_output),
+                                    std::move(right_output), std::move(filter)},
+                expected);
+  }
+
+ private:
+  void RunInternal(const HashJoinNodeOptions& options,
+                   const std::vector<ExecBatch>& expected) const {
+    auto join_type_str = JoinTypeString(options.join_type);
+    auto join_cond_str =
+        JoinConditionString(options.left_keys, options.right_keys, options.filter);
+    auto output_str = OutputString(options.left_output, options.right_output);
+    for (bool parallel : {false, true}) {
+      auto parallel_str = parallel ? "parallel" : "serial";
+      ARROW_SCOPED_TRACE(join_type_str + " " + join_cond_str + " " + output_str + " " +
+                         parallel_str);
 
-    Declaration left{
-        "source",
-        SourceNodeOptions{input_left.schema, input_left.gen(parallel, /*slow=*/false)}};
-    Declaration right{
-        "source",
-        SourceNodeOptions{input_right.schema, input_right.gen(parallel, /*slow=*/false)}};
+      Declaration left{"source",
+                       SourceNodeOptions{left_input_.schema,
+                                         left_input_.gen(parallel, /*slow=*/false)}};
+      Declaration right{"source",
+                        SourceNodeOptions{right_input_.schema,
+                                          right_input_.gen(parallel, /*slow=*/false)}};
 
-    Expression mul = call("multiply", {field_ref("l1"), field_ref("l2")});
-    Expression combination = call("add", {mul, field_ref("r1")});
-    Expression residual_filter = less_equal(combination, field_ref("r2"));
+      Declaration join{"hashjoin", {std::move(left), std::move(right)}, options};
 
-    HashJoinNodeOptions join_opts{
-        JoinType::FULL_OUTER,
-        /*left_keys=*/{"l_str"},
-        /*right_keys=*/{"r_str"}, std::move(residual_filter), "l_", "r_"};
+      ASSERT_OK_AND_ASSIGN(auto result,
+                           DeclarationToExecBatches(std::move(join), parallel));
+      AssertExecBatchesEqualIgnoringOrder(result.schema, expected, result.batches);
+    }
+  }
 
-    Declaration join{"hashjoin", {std::move(left), std::move(right)}, join_opts};
+ private:
+  BatchesWithSchema left_input_;
+  BatchesWithSchema right_input_;
 
-    ASSERT_OK_AND_ASSIGN(auto result,
-                         DeclarationToExecBatches(std::move(join), parallel));
+ private:
+  static std::string JoinTypeString(JoinType t) {
+    switch (t) {
+      case JoinType::LEFT_SEMI:
+        return "LEFT_SEMI";
+      case JoinType::RIGHT_SEMI:
+        return "RIGHT_SEMI";
+      case JoinType::LEFT_ANTI:
+        return "LEFT_ANTI";
+      case JoinType::RIGHT_ANTI:
+        return "RIGHT_ANTI";
+      case JoinType::INNER:
+        return "INNER";
+      case JoinType::LEFT_OUTER:
+        return "LEFT_OUTER";
+      case JoinType::RIGHT_OUTER:
+        return "RIGHT_OUTER";
+      case JoinType::FULL_OUTER:
+        return "FULL_OUTER";
+    }
+    ARROW_DCHECK(false);
+    return "";
+  }
+
+  static std::string JoinConditionString(const std::vector<FieldRef>& left_keys,
+                                         const std::vector<FieldRef>& right_keys,
+                                         const Expression& filter) {
+    ARROW_DCHECK(left_keys.size() > 0);
+    ARROW_DCHECK(left_keys.size() == right_keys.size());
+    std::stringstream ss;
+    ss << "on (";
+    for (size_t i = 0; i < left_keys.size(); ++i) {
+      ss << left_keys[i].ToString() << " = " << right_keys[i].ToString() << " and ";
+    }
+    ss << filter.ToString();
+    ss << ")";
+    return ss.str();
+  }
+
+  static std::string OutputString(const std::vector<FieldRef>& left_output,
+                                  const std::vector<FieldRef>& right_output) {
+    std::vector<FieldRef> both_output;
+    both_output.reserve(left_output.size() + right_output.size());
+    both_output.insert(both_output.end(), left_output.begin(), left_output.end());
+    both_output.insert(both_output.end(), right_output.begin(), right_output.end());
+    std::stringstream ss;
+    ss << "output (";
+    for (size_t i = 0; i < both_output.size(); ++i) {
+      if (i != 0) {
+        ss << ", ";
+      }
+      ss << both_output[i].ToString();
+    }
+    ss << ")";
+    return ss.str();
+  }
+};
 
-    std::vector<ExecBatch> expected = {
-        ExecBatchFromJSON({int32(), int32(), utf8(), int32(), int32(), utf8()}, R"([
-            [1, 6, "alpha", 4, 16, "alpha"],
-            [1, 6, "alpha", 5, 11, "alpha"],
-            [2, 5, "beta", 2, 12, "beta"],
-            [3, 4, "alpha", 4, 16, "alpha"]])")};
+TEST(HashJoin, ResidualFilter) {
+  BatchesWithSchema input_left;
+  input_left.batches = {ExecBatchFromJSON({int32(), int32(), utf8()}, R"([
+                            [1, 6, "alpha"],
+                            [2, 5, "beta"],
+                            [3, 4, "alpha"]])")};
+  input_left.schema =
+      schema({field("l1", int32()), field("l2", int32()), field("l_str", utf8())});
 
-    AssertExecBatchesEqualIgnoringOrder(result.schema, result.batches, expected);
-  }
+  BatchesWithSchema input_right;
+  input_right.batches = {ExecBatchFromJSON({int32(), int32(), utf8()}, R"([
+                             [5, 11, "alpha"],
+                             [2, 12, "beta"],
+                             [4, 16, "alpha"]])")};
+  input_right.schema =
+      schema({field("r1", int32()), field("r2", int32()), field("r_str", utf8())});
+
+  const ResidualFilterCaseRunner runner{std::move(input_left), std::move(input_right)};
+
+  Expression mul = call("multiply", {field_ref("l1"), field_ref("l2")});
+  Expression combination = call("add", {mul, field_ref("r1")});
+  Expression filter = less_equal(combination, field_ref("r2"));
+
+  runner.Run(JoinType::FULL_OUTER, {"l_str"}, {"r_str"}, std::move(filter),
+             {ExecBatchFromJSON({int32(), int32(), utf8(), int32(), int32(), utf8()}, R"([
+                  [1, 6, "alpha", 4, 16, "alpha"],
+                  [1, 6, "alpha", 5, 11, "alpha"],
+                  [2, 5, "beta", 2, 12, "beta"],
+                  [3, 4, "alpha", 4, 16, "alpha"]])")});
 }
 
 TEST(HashJoin, TrivialResidualFilter) {
@@ -1959,47 +2048,993 @@ TEST(HashJoin, TrivialResidualFilter) {
   std::vector<std::string> expected_strings = {expected_true, expected_false};
   std::vector<Expression> filters = {always_true, always_false};
 
+  BatchesWithSchema input_left;
+  input_left.batches = {ExecBatchFromJSON({int32(), utf8()}, R"([
+                            [1, "alpha"]])")};
+  input_left.schema = schema({field("l1", int32()), field("l_str", utf8())});
+
+  BatchesWithSchema input_right;
+  input_right.batches = {ExecBatchFromJSON({int32(), utf8()}, R"([
+                             [1, "alpha"]])")};
+  input_right.schema = schema({field("r1", int32()), field("r_str", utf8())});
+
+  ResidualFilterCaseRunner runner{std::move(input_left), std::move(input_right)};
+
   for (size_t test_id = 0; test_id < 2; test_id++) {
-    for (bool parallel : {false, true}) {
-      SCOPED_TRACE(parallel ? "parallel/merged" : "serial");
+    runner.Run(JoinType::INNER, {"l_str"}, {"r_str"}, filters[test_id],
+               {ExecBatchFromJSON({int32(), utf8(), int32(), utf8()},
+                                  expected_strings[test_id])});
+  }
+}
 
-      BatchesWithSchema input_left;
-      input_left.batches = {ExecBatchFromJSON({int32(), utf8()}, R"([
-                   [1, "alpha"]
-                 ])")};
-      input_left.schema = schema({field("l1", int32()), field("l_str", utf8())});
+TEST(HashJoin, FineGrainedResidualFilter) {
+  struct JoinSchema {
+    std::shared_ptr<Schema> left, right;
 
-      BatchesWithSchema input_right;
-      input_right.batches = {ExecBatchFromJSON({int32(), utf8()}, R"([
-                   [1, "alpha"]
-                 ])")};
-      input_right.schema = schema({field("r1", int32()), field("r_str", utf8())});
+    struct Projector {
+      std::shared_ptr<Schema> left, right;
+      std::vector<int> left_output, right_output;
 
-      auto exec_ctx = std::make_unique<ExecContext>(
-          default_memory_pool(),
-          parallel ? arrow::internal::GetCpuThreadPool() : nullptr);
+      std::vector<FieldRef> LeftOutput(JoinType join_type) const {
+        if (join_type == JoinType::RIGHT_SEMI || join_type == JoinType::RIGHT_ANTI) {
+          return {};
+        }
+        std::vector<FieldRef> output(left_output.size());
+        std::transform(left_output.begin(), left_output.end(), output.begin(),
+                       [](int i) { return i; });
+        return output;
+      }
 
-      Declaration left{
-          "source",
-          SourceNodeOptions{input_left.schema, input_left.gen(parallel, /*slow=*/false)}};
-      Declaration right{"source",
-                        SourceNodeOptions{input_right.schema,
-                                          input_right.gen(parallel, /*slow=*/false)}};
+      std::vector<FieldRef> RightOutput(JoinType join_type) const {
+        if (join_type == JoinType::LEFT_SEMI || join_type == JoinType::LEFT_ANTI) {
+          return {};
+        }
+        std::vector<FieldRef> output(right_output.size());
+        std::transform(right_output.begin(), right_output.end(), output.begin(),
+                       [](int i) { return i; });
+        return output;
+      }
 
-      HashJoinNodeOptions join_opts{
-          JoinType::INNER,
-          /*left_keys=*/{"l_str"},
-          /*right_keys=*/{"r_str"}, filters[test_id], "l_", "r_"};
+      ExecBatch Project(JoinType join_type, const ExecBatch& batch) const {
+        std::vector<Datum> values;
+        if (join_type != JoinType::RIGHT_SEMI && join_type != JoinType::RIGHT_ANTI) {
+          for (int i : left_output) {
+            values.push_back(batch[i]);
+          }
+        }
+        if (join_type != JoinType::LEFT_SEMI && join_type != JoinType::LEFT_ANTI) {
+          int left_size =
+              join_type == JoinType::RIGHT_SEMI || join_type == JoinType::RIGHT_ANTI
+                  ? 0
+                  : left->num_fields();
+          for (int i : right_output) {
+            values.push_back(batch[left_size + i]);
+          }
+        }
+        return {std::move(values), batch.length};
+      }
+    };
 
-      Declaration join{"hashjoin", {std::move(left), std::move(right)}, join_opts};
+    Projector GetProjector(std::vector<int> left_output, std::vector<int> right_output) {
+      return Projector{left, right, std::move(left_output), std::move(right_output)};
+    }
+  };
+
+  BatchesWithSchema left;
+  left.batches = {ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                      [null, null, "l_payload"],
+                      [null, 0, "l_payload"],
+                      [null, 42, "l_payload"],
+                      ["left_only", null, "l_payload"],
+                      ["left_only", 0, "l_payload"],
+                      ["left_only", 42, "l_payload"],
+                      ["both1", null, "l_payload"],
+                      ["both1", 0, "l_payload"],
+                      ["both1", 42, "l_payload"],
+                      ["both2", null, "l_payload"],
+                      ["both2", 0, "l_payload"],
+                      ["both2", 42, "l_payload"]])")};
+  left.schema = schema(
+      {field("l_key", utf8()), field("l_filter", int32()), field("l_payload", utf8())});
+
+  BatchesWithSchema right;
+  right.batches = {ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                       [null, null, "r_payload"],
+                       [null, 0, "r_payload"],
+                       [null, 42, "r_payload"],
+                       ["both1", null, "r_payload"],
+                       ["both1", 0, "r_payload"],
+                       ["both1", 42, "r_payload"],
+                       ["both2", null, "r_payload"],
+                       ["both2", 0, "r_payload"],
+                       ["both2", 42, "r_payload"],
+                       ["right_only", null, "r_payload"],
+                       ["right_only", 0, "r_payload"],
+                       ["right_only", 42, "r_payload"]])")};
+  right.schema = schema(
+      {field("r_key", utf8()), field("r_filter", int32()), field("r_payload", utf8())});
+
+  JoinSchema join_schema{left.schema, right.schema};
+  std::vector<JoinSchema::Projector> projectors{
+      join_schema.GetProjector({0, 1, 2}, {0, 1, 2}),  // Output all.
+      join_schema.GetProjector({0}, {0}),              // Output key columns only.
+      join_schema.GetProjector({1}, {1}),              // Output filter columns only.
+      join_schema.GetProjector({2}, {2})};             // Output payload columns only.
+
+  const ResidualFilterCaseRunner runner{std::move(left), std::move(right)};
 
-      ASSERT_OK_AND_ASSIGN(auto result,
-                           DeclarationToExecBatches(std::move(join), parallel));
+  {
+    // Literal true and scalar true.
+    for (Expression filter : {literal(true), equal(literal(1), literal(1))}) {
+      std::vector<FieldRef> left_keys{"l_key", "l_filter"},
+          right_keys{"r_key", "r_filter"};
+      {
+        // Inner join.
+        JoinType join_type = JoinType::INNER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                ["both1", 0, "l_payload", "both1", 0, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", 0, "l_payload", "both2", 0, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
 
-      std::vector<ExecBatch> expected = {ExecBatchFromJSON(
-          {int32(), utf8(), int32(), utf8()}, expected_strings[test_id])};
+      {
+        // Left outer join.
+        JoinType join_type = JoinType::LEFT_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                [null, null, "l_payload", null, null, null],
+                [null, 0, "l_payload", null, null, null],
+                [null, 42, "l_payload", null, null, null],
+                ["left_only", null, "l_payload", null, null, null],
+                ["left_only", 0, "l_payload", null, null, null],
+                ["left_only", 42, "l_payload", null, null, null],
+                ["both1", null, "l_payload", null, null, null],
+                ["both2", null, "l_payload", null, null, null],
+                ["both1", 0, "l_payload", "both1", 0, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", 0, "l_payload", "both2", 0, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
 
-      AssertExecBatchesEqualIgnoringOrder(result.schema, result.batches, expected);
+      {
+        // Right outer join.
+        JoinType join_type = JoinType::RIGHT_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                ["both1", 0, "l_payload", "both1", 0, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", 0, "l_payload", "both2", 0, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"],
+                [null, null, null, null, null, "r_payload"],
+                [null, null, null, null, 0, "r_payload"],
+                [null, null, null, null, 42, "r_payload"],
+                [null, null, null, "both1", null, "r_payload"],
+                [null, null, null, "both2", null, "r_payload"],
+                [null, null, null, "right_only", null, "r_payload"],
+                [null, null, null, "right_only", 0, "r_payload"],
+                [null, null, null, "right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Full outer join.
+        JoinType join_type = JoinType::FULL_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                [null, null, "l_payload", null, null, null],
+                [null, 0, "l_payload", null, null, null],
+                [null, 42, "l_payload", null, null, null],
+                ["left_only", null, "l_payload", null, null, null],
+                ["left_only", 0, "l_payload", null, null, null],
+                ["left_only", 42, "l_payload", null, null, null],
+                ["both1", null, "l_payload", null, null, null],
+                ["both2", null, "l_payload", null, null, null],
+                ["both1", 0, "l_payload", "both1", 0, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", 0, "l_payload", "both2", 0, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"],
+                [null, null, null, null, null, "r_payload"],
+                [null, null, null, null, 0, "r_payload"],
+                [null, null, null, null, 42, "r_payload"],
+                [null, null, null, "both1", null, "r_payload"],
+                [null, null, null, "both2", null, "r_payload"],
+                [null, null, null, "right_only", null, "r_payload"],
+                [null, null, null, "right_only", 0, "r_payload"],
+                [null, null, null, "right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left semi join.
+        JoinType join_type = JoinType::LEFT_SEMI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            ["both1", 0, "l_payload"],
+                            ["both1", 42, "l_payload"],
+                            ["both2", 0, "l_payload"],
+                            ["both2", 42, "l_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left anti join.
+        JoinType join_type = JoinType::LEFT_ANTI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            [null, null, "l_payload"],
+                            [null, 0, "l_payload"],
+                            [null, 42, "l_payload"],
+                            ["left_only", null, "l_payload"],
+                            ["left_only", 0, "l_payload"],
+                            ["left_only", 42, "l_payload"],
+                            ["both1", null, "l_payload"],
+                            ["both2", null, "l_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right semi join.
+        JoinType join_type = JoinType::RIGHT_SEMI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            ["both1", 0, "r_payload"],
+                            ["both1", 42, "r_payload"],
+                            ["both2", 0, "r_payload"],
+                            ["both2", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right anti join.
+        JoinType join_type = JoinType::RIGHT_ANTI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            [null, null, "r_payload"],
+                            [null, 0, "r_payload"],
+                            [null, 42, "r_payload"], 
+                            ["both1", null, "r_payload"],
+                            ["both2", null, "r_payload"],
+                            ["right_only", null, "r_payload"],
+                            ["right_only", 0, "r_payload"],
+                            ["right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+    }
+  }
+
+  {
+    // Literal false, null, and scalar false, null.
+    for (Expression filter :
+         {literal(false), literal(NullScalar()), equal(literal(0), literal(1)),
+          equal(literal(1), literal(NullScalar()))}) {
+      std::vector<FieldRef> left_keys{"l_key", "l_filter"},
+          right_keys{"r_key", "r_filter"};
+      {
+        // Inner join.
+        JoinType join_type = JoinType::INNER;
+        auto expected = ExecBatchFromJSON(
+            {utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left outer join.
+        JoinType join_type = JoinType::LEFT_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                [null, null, "l_payload", null, null, null],
+                [null, 0, "l_payload", null, null, null],
+                [null, 42, "l_payload", null, null, null],
+                ["left_only", null, "l_payload", null, null, null],
+                ["left_only", 0, "l_payload", null, null, null],
+                ["left_only", 42, "l_payload", null, null, null],
+                ["both1", null, "l_payload", null, null, null],
+                ["both1", 0, "l_payload", null, null, null],
+                ["both1", 42, "l_payload", null, null, null],
+                ["both2", null, "l_payload", null, null, null],
+                ["both2", 0, "l_payload", null, null, null],
+                ["both2", 42, "l_payload", null, null, null]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right outer join.
+        JoinType join_type = JoinType::RIGHT_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                [null, null, null, null, null, "r_payload"],
+                [null, null, null, null, 0, "r_payload"],
+                [null, null, null, null, 42, "r_payload"],
+                [null, null, null, "both1", null, "r_payload"],
+                [null, null, null, "both1", 0, "r_payload"],
+                [null, null, null, "both1", 42, "r_payload"],
+                [null, null, null, "both2", null, "r_payload"],
+                [null, null, null, "both2", 0, "r_payload"],
+                [null, null, null, "both2", 42, "r_payload"],
+                [null, null, null, "right_only", null, "r_payload"],
+                [null, null, null, "right_only", 0, "r_payload"],
+                [null, null, null, "right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Full outer join.
+        JoinType join_type = JoinType::FULL_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                [null, null, "l_payload", null, null, null],
+                [null, 0, "l_payload", null, null, null],
+                [null, 42, "l_payload", null, null, null],
+                ["left_only", null, "l_payload", null, null, null],
+                ["left_only", 0, "l_payload", null, null, null],
+                ["left_only", 42, "l_payload", null, null, null],
+                ["both1", null, "l_payload", null, null, null],
+                ["both1", 0, "l_payload", null, null, null],
+                ["both1", 42, "l_payload", null, null, null],
+                ["both2", null, "l_payload", null, null, null],
+                ["both2", 0, "l_payload", null, null, null],
+                ["both2", 42, "l_payload", null, null, null],
+                [null, null, null, null, null, "r_payload"],
+                [null, null, null, null, 0, "r_payload"],
+                [null, null, null, null, 42, "r_payload"],
+                [null, null, null, "both1", null, "r_payload"],
+                [null, null, null, "both1", 0, "r_payload"],
+                [null, null, null, "both1", 42, "r_payload"],
+                [null, null, null, "both2", null, "r_payload"],
+                [null, null, null, "both2", 0, "r_payload"],
+                [null, null, null, "both2", 42, "r_payload"],
+                [null, null, null, "right_only", null, "r_payload"],
+                [null, null, null, "right_only", 0, "r_payload"],
+                [null, null, null, "right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left semi join.
+        JoinType join_type = JoinType::LEFT_SEMI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left anti join.
+        JoinType join_type = JoinType::LEFT_ANTI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            [null, null, "l_payload"],
+                            [null, 0, "l_payload"],
+                            [null, 42, "l_payload"],
+                            ["left_only", null, "l_payload"],
+                            ["left_only", 0, "l_payload"],
+                            ["left_only", 42, "l_payload"],
+                            ["both1", null, "l_payload"],
+                            ["both1", 0, "l_payload"],
+                            ["both1", 42, "l_payload"],
+                            ["both2", null, "l_payload"],
+                            ["both2", 0, "l_payload"],
+                            ["both2", 42, "l_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right semi join.
+        JoinType join_type = JoinType::RIGHT_SEMI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right anti join.
+        JoinType join_type = JoinType::RIGHT_ANTI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            [null, null, "r_payload"],
+                            [null, 0, "r_payload"],
+                            [null, 42, "r_payload"], 
+                            ["both1", null, "r_payload"],
+                            ["both1", 0, "r_payload"],
+                            ["both1", 42, "r_payload"],
+                            ["both2", null, "r_payload"],
+                            ["both2", 0, "r_payload"],
+                            ["both2", 42, "r_payload"],
+                            ["right_only", null, "r_payload"],
+                            ["right_only", 0, "r_payload"],
+                            ["right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+    }
+  }
+
+  {
+    // Non-trivial filters referring left columns only.
+    for (Expression filter : {equal(field_ref("l_filter"), literal(42)),
+                              not_equal(literal(0), field_ref("l_filter"))}) {
+      std::vector<FieldRef> left_keys{"l_key"}, right_keys{"r_key"};
+      {
+        // Inner join.
+        JoinType join_type = JoinType::INNER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                ["both1", 42, "l_payload", "both1", null, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 0, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", 42, "l_payload", "both2", null, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 0, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left outer join.
+        JoinType join_type = JoinType::LEFT_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                [null, null, "l_payload", null, null, null],
+                [null, 0, "l_payload", null, null, null],
+                [null, 42, "l_payload", null, null, null],
+                ["left_only", null, "l_payload", null, null, null],
+                ["left_only", 0, "l_payload", null, null, null],
+                ["left_only", 42, "l_payload", null, null, null],
+                ["both1", null, "l_payload", null, null, null],
+                ["both1", 0, "l_payload", null, null, null],
+                ["both2", null, "l_payload", null, null, null],
+                ["both2", 0, "l_payload", null, null, null],
+                ["both1", 42, "l_payload", "both1", null, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 0, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", 42, "l_payload", "both2", null, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 0, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right outer join.
+        JoinType join_type = JoinType::RIGHT_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                ["both1", 42, "l_payload", "both1", null, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 0, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", 42, "l_payload", "both2", null, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 0, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"],
+                [null, null, null, null, null, "r_payload"],
+                [null, null, null, null, 0, "r_payload"],
+                [null, null, null, null, 42, "r_payload"],
+                [null, null, null, "right_only", null, "r_payload"],
+                [null, null, null, "right_only", 0, "r_payload"],
+                [null, null, null, "right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Full outer join.
+        JoinType join_type = JoinType::FULL_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                [null, null, "l_payload", null, null, null],
+                [null, 0, "l_payload", null, null, null],
+                [null, 42, "l_payload", null, null, null],
+                ["left_only", null, "l_payload", null, null, null],
+                ["left_only", 0, "l_payload", null, null, null],
+                ["left_only", 42, "l_payload", null, null, null],
+                ["both1", null, "l_payload", null, null, null],
+                ["both1", 0, "l_payload", null, null, null],
+                ["both2", null, "l_payload", null, null, null],
+                ["both2", 0, "l_payload", null, null, null],
+                ["both1", 42, "l_payload", "both1", null, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 0, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", 42, "l_payload", "both2", null, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 0, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"],
+                [null, null, null, null, null, "r_payload"],
+                [null, null, null, null, 0, "r_payload"],
+                [null, null, null, null, 42, "r_payload"],
+                [null, null, null, "right_only", null, "r_payload"],
+                [null, null, null, "right_only", 0, "r_payload"],
+                [null, null, null, "right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left semi join.
+        JoinType join_type = JoinType::LEFT_SEMI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            ["both1", 42, "l_payload"],
+                            ["both2", 42, "l_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left anti join.
+        JoinType join_type = JoinType::LEFT_ANTI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            [null, null, "l_payload"],
+                            [null, 0, "l_payload"],
+                            [null, 42, "l_payload"],
+                            ["left_only", null, "l_payload"],
+                            ["left_only", 0, "l_payload"],
+                            ["left_only", 42, "l_payload"],
+                            ["both1", null, "l_payload"],
+                            ["both1", 0, "l_payload"],
+                            ["both2", null, "l_payload"],
+                            ["both2", 0, "l_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right semi join.
+        JoinType join_type = JoinType::RIGHT_SEMI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            ["both1", null, "r_payload"],
+                            ["both1", 0, "r_payload"],
+                            ["both1", 42, "r_payload"],
+                            ["both2", null, "r_payload"],
+                            ["both2", 0, "r_payload"],
+                            ["both2", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right anti join.
+        JoinType join_type = JoinType::RIGHT_ANTI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            [null, null, "r_payload"],
+                            [null, 0, "r_payload"],
+                            [null, 42, "r_payload"], 
+                            ["right_only", null, "r_payload"],
+                            ["right_only", 0, "r_payload"],
+                            ["right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+    }
+  }
+
+  {
+    // Non-trivial filters referring right columns only.
+    for (Expression filter : {equal(field_ref("r_filter"), literal(42)),
+                              not_equal(literal(0), field_ref("r_filter"))}) {
+      std::vector<FieldRef> left_keys{"l_key"}, right_keys{"r_key"};
+      {
+        // Inner join.
+        JoinType join_type = JoinType::INNER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                ["both1", null, "l_payload", "both1", 42, "r_payload"],
+                ["both1", 0, "l_payload", "both1", 42, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", null, "l_payload", "both2", 42, "r_payload"],
+                ["both2", 0, "l_payload", "both2", 42, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left outer join.
+        JoinType join_type = JoinType::LEFT_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                [null, null, "l_payload", null, null, null],
+                [null, 0, "l_payload", null, null, null],
+                [null, 42, "l_payload", null, null, null],
+                ["left_only", null, "l_payload", null, null, null],
+                ["left_only", 0, "l_payload", null, null, null],
+                ["left_only", 42, "l_payload", null, null, null],
+                ["both1", null, "l_payload", "both1", 42, "r_payload"],
+                ["both1", 0, "l_payload", "both1", 42, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", null, "l_payload", "both2", 42, "r_payload"],
+                ["both2", 0, "l_payload", "both2", 42, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right outer join.
+        JoinType join_type = JoinType::RIGHT_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                ["both1", null, "l_payload", "both1", 42, "r_payload"],
+                ["both1", 0, "l_payload", "both1", 42, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", null, "l_payload", "both2", 42, "r_payload"],
+                ["both2", 0, "l_payload", "both2", 42, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"],
+                [null, null, null, "both1", null, "r_payload"],
+                [null, null, null, "both1", 0, "r_payload"],
+                [null, null, null, "both2", null, "r_payload"],
+                [null, null, null, "both2", 0, "r_payload"],
+                [null, null, null, null, null, "r_payload"],
+                [null, null, null, null, 0, "r_payload"],
+                [null, null, null, null, 42, "r_payload"],
+                [null, null, null, "right_only", null, "r_payload"],
+                [null, null, null, "right_only", 0, "r_payload"],
+                [null, null, null, "right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Full outer join.
+        JoinType join_type = JoinType::FULL_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                [null, null, "l_payload", null, null, null],
+                [null, 0, "l_payload", null, null, null],
+                [null, 42, "l_payload", null, null, null],
+                ["left_only", null, "l_payload", null, null, null],
+                ["left_only", 0, "l_payload", null, null, null],
+                ["left_only", 42, "l_payload", null, null, null],
+                ["both1", null, "l_payload", "both1", 42, "r_payload"],
+                ["both1", 0, "l_payload", "both1", 42, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", null, "l_payload", "both2", 42, "r_payload"],
+                ["both2", 0, "l_payload", "both2", 42, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"],
+                [null, null, null, "both1", null, "r_payload"],
+                [null, null, null, "both1", 0, "r_payload"],
+                [null, null, null, "both2", null, "r_payload"],
+                [null, null, null, "both2", 0, "r_payload"],
+                [null, null, null, null, null, "r_payload"],
+                [null, null, null, null, 0, "r_payload"],
+                [null, null, null, null, 42, "r_payload"],
+                [null, null, null, "right_only", null, "r_payload"],
+                [null, null, null, "right_only", 0, "r_payload"],
+                [null, null, null, "right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left semi join.
+        JoinType join_type = JoinType::LEFT_SEMI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            ["both1", null, "l_payload"],
+                            ["both1", 0, "l_payload"],
+                            ["both1", 42, "l_payload"],
+                            ["both2", null, "l_payload"],
+                            ["both2", 0, "l_payload"],
+                            ["both2", 42, "l_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left anti join.
+        JoinType join_type = JoinType::LEFT_ANTI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            [null, null, "l_payload"],
+                            [null, 0, "l_payload"],
+                            [null, 42, "l_payload"],
+                            ["left_only", null, "l_payload"],
+                            ["left_only", 0, "l_payload"],
+                            ["left_only", 42, "l_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right semi join.
+        JoinType join_type = JoinType::RIGHT_SEMI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            ["both1", 42, "r_payload"],
+                            ["both2", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right anti join.
+        JoinType join_type = JoinType::RIGHT_ANTI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            [null, null, "r_payload"],
+                            [null, 0, "r_payload"],
+                            [null, 42, "r_payload"], 
+                            ["both1", null, "r_payload"],
+                            ["both1", 0, "r_payload"],
+                            ["both2", null, "r_payload"],
+                            ["both2", 0, "r_payload"],
+                            ["right_only", null, "r_payload"],
+                            ["right_only", 0, "r_payload"],
+                            ["right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+    }
+  }
+
+  {
+    // Non-trivial filters referring both left and right columns.
+    for (Expression filter :
+         {equal(field_ref("l_filter"), field_ref("r_filter")),
+          equal(call("subtract", {field_ref("l_filter"), field_ref("r_filter")}),
+                literal(0))}) {
+      std::vector<FieldRef> left_keys{"l_key"}, right_keys{"r_key"};
+      {
+        // Inner join.
+        JoinType join_type = JoinType::INNER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                ["both1", 0, "l_payload", "both1", 0, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", 0, "l_payload", "both2", 0, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left outer join.
+        JoinType join_type = JoinType::LEFT_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                [null, null, "l_payload", null, null, null],
+                [null, 0, "l_payload", null, null, null],
+                [null, 42, "l_payload", null, null, null],
+                ["left_only", null, "l_payload", null, null, null],
+                ["left_only", 0, "l_payload", null, null, null],
+                ["left_only", 42, "l_payload", null, null, null],
+                ["both1", null, "l_payload", null, null, null],
+                ["both2", null, "l_payload", null, null, null],
+                ["both1", 0, "l_payload", "both1", 0, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", 0, "l_payload", "both2", 0, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right outer join.
+        JoinType join_type = JoinType::RIGHT_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                ["both1", 0, "l_payload", "both1", 0, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", 0, "l_payload", "both2", 0, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"],
+                [null, null, null, null, null, "r_payload"],
+                [null, null, null, null, 0, "r_payload"],
+                [null, null, null, null, 42, "r_payload"],
+                [null, null, null, "both1", null, "r_payload"],
+                [null, null, null, "both2", null, "r_payload"],
+                [null, null, null, "right_only", null, "r_payload"],
+                [null, null, null, "right_only", 0, "r_payload"],
+                [null, null, null, "right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Full outer join.
+        JoinType join_type = JoinType::FULL_OUTER;
+        auto expected =
+            ExecBatchFromJSON({utf8(), int32(), utf8(), utf8(), int32(), utf8()}, R"([
+                [null, null, "l_payload", null, null, null],
+                [null, 0, "l_payload", null, null, null],
+                [null, 42, "l_payload", null, null, null],
+                ["left_only", null, "l_payload", null, null, null],
+                ["left_only", 0, "l_payload", null, null, null],
+                ["left_only", 42, "l_payload", null, null, null],
+                ["both1", null, "l_payload", null, null, null],
+                ["both2", null, "l_payload", null, null, null],
+                ["both1", 0, "l_payload", "both1", 0, "r_payload"],
+                ["both1", 42, "l_payload", "both1", 42, "r_payload"],
+                ["both2", 0, "l_payload", "both2", 0, "r_payload"],
+                ["both2", 42, "l_payload", "both2", 42, "r_payload"],
+                [null, null, null, null, null, "r_payload"],
+                [null, null, null, null, 0, "r_payload"],
+                [null, null, null, null, 42, "r_payload"],
+                [null, null, null, "both1", null, "r_payload"],
+                [null, null, null, "both2", null, "r_payload"],
+                [null, null, null, "right_only", null, "r_payload"],
+                [null, null, null, "right_only", 0, "r_payload"],
+                [null, null, null, "right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left semi join.
+        JoinType join_type = JoinType::LEFT_SEMI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            ["both1", 0, "l_payload"],
+                            ["both1", 42, "l_payload"],
+                            ["both2", 0, "l_payload"],
+                            ["both2", 42, "l_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Left anti join.
+        JoinType join_type = JoinType::LEFT_ANTI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            [null, null, "l_payload"],
+                            [null, 0, "l_payload"],
+                            [null, 42, "l_payload"],
+                            ["left_only", null, "l_payload"],
+                            ["left_only", 0, "l_payload"],
+                            ["left_only", 42, "l_payload"],
+                            ["both1", null, "l_payload"],
+                            ["both2", null, "l_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right semi join.
+        JoinType join_type = JoinType::RIGHT_SEMI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            ["both1", 0, "r_payload"],
+                            ["both1", 42, "r_payload"],
+                            ["both2", 0, "r_payload"],
+                            ["both2", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
+
+      {
+        // Right anti join.
+        JoinType join_type = JoinType::RIGHT_ANTI;
+        auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([
+                            [null, null, "r_payload"],
+                            [null, 0, "r_payload"],
+                            [null, 42, "r_payload"], 
+                            ["both1", null, "r_payload"],
+                            ["both2", null, "r_payload"],
+                            ["right_only", null, "r_payload"],
+                            ["right_only", 0, "r_payload"],
+                            ["right_only", 42, "r_payload"]])");
+        for (const auto& projector : projectors) {
+          runner.Run(join_type, left_keys, right_keys, projector.LeftOutput(join_type),
+                     projector.RightOutput(join_type), filter,
+                     {projector.Project(join_type, expected)});
+        }
+      }
     }
   }
 }
diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc
index 2f79ed299bb70..68b0e37b01aa9 100644
--- a/cpp/src/arrow/acero/swiss_join.cc
+++ b/cpp/src/arrow/acero/swiss_join.cc
@@ -1085,10 +1085,30 @@ void SwissTableForJoin::UpdateHasMatchForKeys(int64_t thread_id, int num_ids,
   if (num_ids == 0 || !bit_vector) {
     return;
   }
+  for (int ikey = 0; ikey < num_ids; ++ikey) {
+    // Mark payloads corresponding to this key in hash table as having a match.
+    //
+    uint32_t key_id = key_ids[ikey];
+    uint32_t first_payload_for_key = key_to_payload() ? key_to_payload()[key_id] : key_id;
+    uint32_t last_payload_for_key =
+        key_to_payload() ? key_to_payload()[key_id + 1] - 1 : key_id;
+    for (uint32_t ipayload = first_payload_for_key; ipayload <= last_payload_for_key;
+         ++ipayload) {
+      bit_util::SetBit(bit_vector, ipayload);
+    }
+  }
+}
+
+void SwissTableForJoin::UpdateHasMatchForPayloads(int64_t thread_id, int num_ids,
+                                                  const uint32_t* payload_ids) {
+  uint8_t* bit_vector = local_has_match(thread_id);
+  if (num_ids == 0 || !bit_vector) {
+    return;
+  }
   for (int i = 0; i < num_ids; ++i) {
-    // Mark row in hash table as having a match
+    // Mark payload in hash table as having a match.
     //
-    bit_util::SetBit(bit_vector, key_ids[i]);
+    bit_util::SetBit(bit_vector, payload_ids[i]);
   }
 }
 
@@ -1123,29 +1143,6 @@ uint32_t SwissTableForJoin::payload_id_to_key_id(uint32_t payload_id) const {
   return static_cast<uint32_t>(first_greater - entries) - 1;
 }
 
-void SwissTableForJoin::payload_ids_to_key_ids(int num_rows, const uint32_t* payload_ids,
-                                               uint32_t* key_ids) const {
-  if (num_rows == 0) {
-    return;
-  }
-  if (no_duplicate_keys_) {
-    memcpy(key_ids, payload_ids, num_rows * sizeof(uint32_t));
-    return;
-  }
-
-  const uint32_t* entries = key_to_payload();
-  uint32_t key_id = payload_id_to_key_id(payload_ids[0]);
-  key_ids[0] = key_id;
-  for (int i = 1; i < num_rows; ++i) {
-    ARROW_DCHECK(payload_ids[i] > payload_ids[i - 1]);
-    while (entries[key_id + 1] <= payload_ids[i]) {
-      ++key_id;
-      ARROW_DCHECK(key_id < num_keys());
-    }
-    key_ids[i] = key_id;
-  }
-}
-
 Status SwissTableForJoinBuild::Init(SwissTableForJoin* target, int dop, int64_t num_rows,
                                     bool reject_duplicate_keys, bool no_payload,
                                     const std::vector<KeyColumnMetadata>& key_types,
@@ -1581,6 +1578,10 @@ Status JoinResultMaterialize::AppendProbeOnly(const ExecBatch& key_and_payload,
                                               int num_rows_to_append,
                                               const uint16_t* row_ids,
                                               int* num_rows_appended) {
+  if (num_rows_to_append == 0) {
+    *num_rows_appended = 0;
+    return Status::OK();
+  }
   num_rows_to_append =
       std::min(ExecBatchBuilder::num_rows_max() - num_rows_, num_rows_to_append);
   if (HasProbeOutput()) {
@@ -1607,6 +1608,10 @@ Status JoinResultMaterialize::AppendBuildOnly(int num_rows_to_append,
                                               const uint32_t* key_ids,
                                               const uint32_t* payload_ids,
                                               int* num_rows_appended) {
+  if (num_rows_to_append == 0) {
+    *num_rows_appended = 0;
+    return Status::OK();
+  }
   num_rows_to_append =
       std::min(ExecBatchBuilder::num_rows_max() - num_rows_, num_rows_to_append);
   if (HasProbeOutput()) {
@@ -1634,6 +1639,10 @@ Status JoinResultMaterialize::Append(const ExecBatch& key_and_payload,
                                      int num_rows_to_append, const uint16_t* row_ids,
                                      const uint32_t* key_ids, const uint32_t* payload_ids,
                                      int* num_rows_appended) {
+  if (num_rows_to_append == 0) {
+    *num_rows_appended = 0;
+    return Status::OK();
+  }
   num_rows_to_append =
       std::min(ExecBatchBuilder::num_rows_max() - num_rows_, num_rows_to_append);
   if (HasProbeOutput()) {
@@ -1791,7 +1800,7 @@ void JoinMatchIterator::SetLookupResult(int num_batch_rows, int start_batch_row,
 
 bool JoinMatchIterator::GetNextBatch(int num_rows_max, int* out_num_rows,
                                      uint16_t* batch_row_ids, uint32_t* key_ids,
-                                     uint32_t* payload_ids) {
+                                     uint32_t* payload_ids, int row_id_to_skip) {
   *out_num_rows = 0;
 
   if (no_duplicate_keys_) {
@@ -1816,7 +1825,8 @@ bool JoinMatchIterator::GetNextBatch(int num_rows_max, int* out_num_rows,
     // matches to output.
     //
     while (current_row_ < num_batch_rows_ && *out_num_rows < num_rows_max) {
-      if (!bit_util::GetBit(batch_has_match_, current_row_)) {
+      if (!bit_util::GetBit(batch_has_match_, current_row_) ||
+          current_row_ == row_id_to_skip) {
         ++current_row_;
         current_match_for_row_ = 0;
         continue;
@@ -1855,14 +1865,415 @@ bool JoinMatchIterator::GetNextBatch(int num_rows_max, int* out_num_rows,
   return (*out_num_rows) > 0;
 }
 
+namespace {
+
+// Given match_bitvector identifies that there is a match for row[batch_start_row + i] in
+// given input batch if bit match_bitvector[i] == passing_bit. Collect all the passing row
+// ids according to the given match_bitvector.
+//
+void CollectPassingBatchIds(int passing_bit, int64_t hardware_flags, int batch_start_row,
+                            int num_batch_rows, const uint8_t* match_bitvector,
+                            int* num_passing_ids, uint16_t* passing_batch_row_ids) {
+  arrow::util::bit_util::bits_to_indexes(passing_bit, hardware_flags, num_batch_rows,
+                                         match_bitvector, num_passing_ids,
+                                         passing_batch_row_ids);
+  // Add base batch row index.
+  //
+  for (int i = 0; i < *num_passing_ids; ++i) {
+    passing_batch_row_ids[i] += static_cast<uint16_t>(batch_start_row);
+  }
+}
+
+}  // namespace
+
+void JoinResidualFilter::Init(Expression filter, QueryContext* ctx, MemoryPool* pool,
+                              int64_t hardware_flags,
+                              const HashJoinProjectionMaps* probe_schemas,
+                              const HashJoinProjectionMaps* build_schemas,
+                              SwissTableForJoin* hash_table) {
+  filter_ = std::move(filter);
+  ctx_ = ctx;
+  pool_ = pool;
+  hardware_flags_ = hardware_flags;
+  probe_schemas_ = probe_schemas;
+  build_schemas_ = build_schemas;
+  hash_table_ = hash_table;
+
+  {
+    probe_filter_to_key_and_payload_.resize(
+        probe_schemas_->num_cols(HashJoinProjection::FILTER));
+    int num_key_cols = probe_schemas_->num_cols(HashJoinProjection::KEY);
+    auto to_key =
+        probe_schemas_->map(HashJoinProjection::FILTER, HashJoinProjection::KEY);
+    auto to_payload =
+        probe_schemas_->map(HashJoinProjection::FILTER, HashJoinProjection::PAYLOAD);
+    for (int i = 0; static_cast<size_t>(i) < probe_filter_to_key_and_payload_.size();
+         ++i) {
+      if (auto idx = to_key.get(i); idx != SchemaProjectionMap::kMissingField) {
+        probe_filter_to_key_and_payload_[i] = idx;
+      } else if (idx = to_payload.get(i); idx != SchemaProjectionMap::kMissingField) {
+        probe_filter_to_key_and_payload_[i] = idx + num_key_cols;
+      } else {
+        ARROW_DCHECK(false);
+      }
+    }
+  }
+
+  {
+    int num_columns = build_schemas_->num_cols(HashJoinProjection::FILTER);
+    auto to_key =
+        build_schemas_->map(HashJoinProjection::FILTER, HashJoinProjection::KEY);
+    auto to_payload =
+        build_schemas_->map(HashJoinProjection::FILTER, HashJoinProjection::PAYLOAD);
+    for (int i = 0; i < num_columns; ++i) {
+      if (to_key.get(i) != SchemaProjectionMap::kMissingField) {
+        num_build_keys_referred_++;
+      } else if (to_payload.get(i) != SchemaProjectionMap::kMissingField) {
+        num_build_payloads_referred_++;
+      } else {
+        ARROW_DCHECK(false);
+      }
+    }
+  }
+}
+
+void JoinResidualFilter::OnBuildFinished() {
+  minibatch_size_ = hash_table_->keys()->swiss_table()->minibatch_size();
+  build_keys_ = hash_table_->keys()->keys();
+  build_payloads_ = hash_table_->payloads();
+  key_to_payload_ = hash_table_->key_to_payload();
+}
+
+void JoinResidualFilter::InitFilterBitVector(int num_batch_rows,
+                                             uint8_t* filter_bitvector) {
+  std::memset(filter_bitvector, 0, bit_util::BytesForBits(num_batch_rows));
+}
+
+void JoinResidualFilter::UpdateFilterBitVector(int batch_start_row, int num_batch_rows,
+                                               const uint16_t* batch_row_ids,
+                                               uint8_t* filter_bitvector) {
+  for (int i = 0; i < num_batch_rows; ++i) {
+    int bit_idx = batch_row_ids[i] - batch_start_row;
+    bit_util::SetBitTo(filter_bitvector, bit_idx, 1);
+  }
+}
+
+Status JoinResidualFilter::FilterLeftSemi(const ExecBatch& keypayload_batch,
+                                          int batch_start_row, int num_batch_rows,
+                                          const uint8_t* match_bitvector,
+                                          const uint32_t* key_ids, bool no_duplicate_keys,
+                                          arrow::util::TempVectorStack* temp_stack,
+                                          int* num_passing_ids,
+                                          uint16_t* passing_batch_row_ids) const {
+  if (filter_ == literal(true)) {
+    CollectPassingBatchIds(1, hardware_flags_, batch_start_row, num_batch_rows,
+                           match_bitvector, num_passing_ids, passing_batch_row_ids);
+    return Status::OK();
+  }
+
+  *num_passing_ids = 0;
+  if (filter_.IsNullLiteral() || filter_ == literal(false)) {
+    return Status::OK();
+  }
+
+  if (num_build_keys_referred_ == 0 && num_build_payloads_referred_ == 0) {
+    // If filter refers no column in the right table, then we can directly filter on the
+    // left rows without inner matching and materializing the right rows.
+    //
+    CollectPassingBatchIds(1, hardware_flags_, batch_start_row, num_batch_rows,
+                           match_bitvector, num_passing_ids, passing_batch_row_ids);
+    return FilterOneBatch(keypayload_batch, *num_passing_ids, passing_batch_row_ids,
+                          /*key_ids_maybe_null=*/NULLPTR,
+                          /*payload_ids_maybe_null=*/NULLPTR,
+                          /*output_key_ids=*/false,
+                          /*output_payload_ids=*/false, temp_stack, num_passing_ids);
+  }
+
+  auto match_batch_row_ids_buf =
+      arrow::util::TempVectorHolder<uint16_t>(temp_stack, minibatch_size_);
+  auto match_key_ids_buf =
+      arrow::util::TempVectorHolder<uint32_t>(temp_stack, minibatch_size_);
+  auto match_payload_ids_buf =
+      arrow::util::TempVectorHolder<uint32_t>(temp_stack, minibatch_size_);
+
+  // Inner matching is necessary for non-trivial filter. Only until evaluating filter for
+  // all matches of the same row can we be sure that it's not passing (it could pass
+  // earlier though).
+  //
+  JoinMatchIterator match_iterator;
+  match_iterator.SetLookupResult(num_batch_rows, batch_start_row, match_bitvector,
+                                 key_ids, no_duplicate_keys, key_to_payload_);
+  int num_matches_next = 0;
+  // Used to not only collect distinct row ids, but also skip unecessary matches in the
+  // next batch.
+  //
+  int row_id_last = JoinMatchIterator::kInvalidRowId;
+  while (match_iterator.GetNextBatch(minibatch_size_, &num_matches_next,
+                                     match_batch_row_ids_buf.mutable_data(),
+                                     match_key_ids_buf.mutable_data(),
+                                     match_payload_ids_buf.mutable_data(), row_id_last)) {
+    int num_passing = 0;
+    RETURN_NOT_OK(FilterOneBatch(
+        keypayload_batch, num_matches_next, match_batch_row_ids_buf.mutable_data(),
+        match_key_ids_buf.mutable_data(), match_payload_ids_buf.mutable_data(),
+        /*output_key_ids=*/false,
+        /*output_payload_ids=*/false, temp_stack, &num_passing));
+    // There may be multiple passing of a row in batch. Collect distinct row ids.
+    //
+    for (int ipassing = 0; ipassing < num_passing; ++ipassing) {
+      if (match_batch_row_ids_buf.mutable_data()[ipassing] == row_id_last) {
+        continue;
+      }
+      row_id_last = passing_batch_row_ids[*num_passing_ids] =
+          match_batch_row_ids_buf.mutable_data()[ipassing];
+      ++(*num_passing_ids);
+    }
+  }
+
+  return Status::OK();
+}
+
+Status JoinResidualFilter::FilterLeftAnti(const ExecBatch& keypayload_batch,
+                                          int batch_start_row, int num_batch_rows,
+                                          const uint8_t* match_bitvector,
+                                          const uint32_t* key_ids, bool no_duplicate_keys,
+                                          arrow::util::TempVectorStack* temp_stack,
+                                          int* num_passing_ids,
+                                          uint16_t* passing_batch_row_ids) const {
+  if (filter_ == literal(true)) {
+    CollectPassingBatchIds(0, hardware_flags_, batch_start_row, num_batch_rows,
+                           match_bitvector, num_passing_ids, passing_batch_row_ids);
+    return Status::OK();
+  }
+
+  // Do FilterLeftSemi first.
+  //
+  *num_passing_ids = 0;
+  int num_semi_passing_ids = 0;
+  auto semi_passing_batch_row_ids =
+      arrow::util::TempVectorHolder<uint16_t>(temp_stack, num_batch_rows);
+  RETURN_NOT_OK(FilterLeftSemi(keypayload_batch, batch_start_row, num_batch_rows,
+                               match_bitvector, key_ids, no_duplicate_keys, temp_stack,
+                               &num_semi_passing_ids,
+                               semi_passing_batch_row_ids.mutable_data()));
+
+  // Then collect non-passing row ids of FilterLeftSemi.
+  //
+  int isemi = 0;
+  for (int irow = batch_start_row; irow < batch_start_row + num_batch_rows; ++irow) {
+    while (isemi < num_semi_passing_ids &&
+           semi_passing_batch_row_ids.mutable_data()[isemi] < irow) {
+      ++isemi;
+    }
+    if (isemi == num_semi_passing_ids ||
+        semi_passing_batch_row_ids.mutable_data()[isemi] != irow) {
+      passing_batch_row_ids[*num_passing_ids] = static_cast<uint16_t>(irow);
+      ++(*num_passing_ids);
+    }
+  }
+
+  return Status::OK();
+}
+
+Status JoinResidualFilter::FilterRightSemiAnti(
+    int64_t thread_id, const ExecBatch& keypayload_batch, int batch_start_row,
+    int num_batch_rows, const uint8_t* match_bitvector, const uint32_t* key_ids,
+    bool no_duplicate_keys, arrow::util::TempVectorStack* temp_stack) const {
+  if (filter_.IsNullLiteral() || filter_ == literal(false)) {
+    return Status::OK();
+  }
+
+  int num_matching_ids = 0;
+  if (filter_ == literal(true)) {
+    auto match_relative_batch_ids_buf =
+        arrow::util::TempVectorHolder<uint16_t>(temp_stack, num_batch_rows);
+    auto match_key_ids_buf =
+        arrow::util::TempVectorHolder<uint32_t>(temp_stack, num_batch_rows);
+
+    arrow::util::bit_util::bits_to_indexes(1, hardware_flags_, num_batch_rows,
+                                           match_bitvector, &num_matching_ids,
+                                           match_relative_batch_ids_buf.mutable_data());
+    // Collect key ids of passing rows.
+    //
+    for (int i = 0; i < num_matching_ids; ++i) {
+      uint16_t id = match_relative_batch_ids_buf.mutable_data()[i];
+      match_key_ids_buf.mutable_data()[i] = key_ids[id];
+    }
+
+    hash_table_->UpdateHasMatchForKeys(thread_id, num_matching_ids,
+                                       match_key_ids_buf.mutable_data());
+    return Status::OK();
+  }
+
+  auto match_batch_row_ids_buf =
+      arrow::util::TempVectorHolder<uint16_t>(temp_stack, minibatch_size_);
+  auto match_key_ids_buf =
+      arrow::util::TempVectorHolder<uint32_t>(temp_stack, minibatch_size_);
+  auto match_payload_ids_buf =
+      arrow::util::TempVectorHolder<uint32_t>(temp_stack, minibatch_size_);
+
+  // Inner matching is necessary for non-trivial filter. Because even for the same row
+  // with same matching key, the filter results could vary for different payloads.
+  //
+  JoinMatchIterator match_iterator;
+  match_iterator.SetLookupResult(num_batch_rows, batch_start_row, match_bitvector,
+                                 key_ids, no_duplicate_keys, key_to_payload_);
+  while (match_iterator.GetNextBatch(
+      minibatch_size_, &num_matching_ids, match_batch_row_ids_buf.mutable_data(),
+      match_key_ids_buf.mutable_data(), match_payload_ids_buf.mutable_data())) {
+    int num_filtered = 0;
+    RETURN_NOT_OK(FilterOneBatch(
+        keypayload_batch, num_matching_ids, match_batch_row_ids_buf.mutable_data(),
+        match_key_ids_buf.mutable_data(), match_payload_ids_buf.mutable_data(),
+        /*output_key_ids=*/false,
+        /*output_payload_ids=*/true, temp_stack, &num_filtered));
+    hash_table_->UpdateHasMatchForPayloads(thread_id, num_filtered,
+                                           match_payload_ids_buf.mutable_data());
+  }
+
+  return Status::OK();
+}
+
+Status JoinResidualFilter::FilterInner(
+    const ExecBatch& keypayload_batch, int num_batch_rows, uint16_t* batch_row_ids,
+    uint32_t* key_ids, uint32_t* payload_ids_maybe_null, bool output_payload_ids,
+    arrow::util::TempVectorStack* temp_stack, int* num_passing_rows) const {
+  if (filter_ == literal(true)) {
+    *num_passing_rows = num_batch_rows;
+    return Status::OK();
+  }
+
+  *num_passing_rows = 0;
+  if (filter_.IsNullLiteral() || filter_ == literal(false)) {
+    return Status::OK();
+  }
+
+  return FilterOneBatch(
+      keypayload_batch, num_batch_rows, batch_row_ids, key_ids, payload_ids_maybe_null,
+      /*output_key_ids=*/true, output_payload_ids, temp_stack, num_passing_rows);
+}
+
+Status JoinResidualFilter::FilterOneBatch(const ExecBatch& keypayload_batch,
+                                          int num_batch_rows, uint16_t* batch_row_ids,
+                                          uint32_t* key_ids_maybe_null,
+                                          uint32_t* payload_ids_maybe_null,
+                                          bool output_key_ids, bool output_payload_ids,
+                                          arrow::util::TempVectorStack* temp_stack,
+                                          int* num_passing_rows) const {
+  // Caller must do shortcuts for trivial filter.
+  ARROW_DCHECK(!filter_.IsNullLiteral() && filter_ != literal(true) &&
+               filter_ != literal(false));
+  ARROW_DCHECK(!output_key_ids || key_ids_maybe_null);
+  ARROW_DCHECK(!output_payload_ids || payload_ids_maybe_null);
+
+  *num_passing_rows = 0;
+  ARROW_ASSIGN_OR_RAISE(Datum mask,
+                        EvalFilter(keypayload_batch, num_batch_rows, batch_row_ids,
+                                   key_ids_maybe_null, payload_ids_maybe_null));
+  if (mask.is_scalar()) {
+    const auto& mask_scalar = mask.scalar_as<BooleanScalar>();
+    if (mask_scalar.is_valid && mask_scalar.value) {
+      *num_passing_rows = num_batch_rows;
+    }
+    return Status::OK();
+  }
+
+  ARROW_DCHECK_EQ(mask.array()->offset, 0);
+  ARROW_DCHECK_EQ(mask.array()->length, static_cast<int64_t>(num_batch_rows));
+  const uint8_t* validity =
+      mask.array()->buffers[0] ? mask.array()->buffers[0]->data() : nullptr;
+  const uint8_t* comparisons = mask.array()->buffers[1]->data();
+  for (int irow = 0; irow < num_batch_rows; ++irow) {
+    bool is_valid = !validity || bit_util::GetBit(validity, irow);
+    bool is_cmp_true = bit_util::GetBit(comparisons, irow);
+    if (is_valid && is_cmp_true) {
+      batch_row_ids[*num_passing_rows] = batch_row_ids[irow];
+      if (output_key_ids) {
+        key_ids_maybe_null[*num_passing_rows] = key_ids_maybe_null[irow];
+      }
+      if (output_payload_ids) {
+        payload_ids_maybe_null[*num_passing_rows] = payload_ids_maybe_null[irow];
+      }
+      ++(*num_passing_rows);
+    }
+  }
+
+  return Status::OK();
+}
+
+Result<Datum> JoinResidualFilter::EvalFilter(
+    const ExecBatch& keypayload_batch, int num_batch_rows, const uint16_t* batch_row_ids,
+    const uint32_t* key_ids_maybe_null, const uint32_t* payload_ids_maybe_null) const {
+  ARROW_DCHECK(!filter_.IsNullLiteral() && filter_ != literal(true) &&
+               filter_ != literal(false));
+
+  ARROW_ASSIGN_OR_RAISE(
+      ExecBatch input,
+      MaterializeFilterInput(keypayload_batch, num_batch_rows, batch_row_ids,
+                             key_ids_maybe_null, payload_ids_maybe_null));
+  return ExecuteScalarExpression(filter_, input, ctx_->exec_context());
+}
+
+Result<ExecBatch> JoinResidualFilter::MaterializeFilterInput(
+    const ExecBatch& keypayload_batch, int num_batch_rows, const uint16_t* batch_row_ids,
+    const uint32_t* key_ids_maybe_null, const uint32_t* payload_ids_maybe_null) const {
+  ExecBatch out;
+  out.length = num_batch_rows;
+  out.values.resize(probe_filter_to_key_and_payload_.size() + num_build_keys_referred_ +
+                    num_build_payloads_referred_);
+
+  if (probe_filter_to_key_and_payload_.size() > 0) {
+    ExecBatchBuilder probe_batch_builder;
+    RETURN_NOT_OK(probe_batch_builder.AppendSelected(
+        pool_, keypayload_batch, num_batch_rows, batch_row_ids,
+        static_cast<int>(probe_filter_to_key_and_payload_.size()),
+        probe_filter_to_key_and_payload_.data()));
+    ExecBatch probe_batch = probe_batch_builder.Flush();
+    ARROW_DCHECK(probe_batch.values.size() == probe_filter_to_key_and_payload_.size());
+    for (size_t i = 0; i < probe_batch.values.size(); ++i) {
+      out.values[i] = std::move(probe_batch.values[i]);
+    }
+  }
+
+  if (num_build_keys_referred_ > 0 || num_build_payloads_referred_ > 0) {
+    ARROW_DCHECK(num_build_keys_referred_ == 0 || key_ids_maybe_null);
+    ARROW_DCHECK(num_build_payloads_referred_ == 0 || payload_ids_maybe_null);
+
+    int num_build_cols = build_schemas_->num_cols(HashJoinProjection::FILTER);
+    auto to_key =
+        build_schemas_->map(HashJoinProjection::FILTER, HashJoinProjection::KEY);
+    auto to_payload =
+        build_schemas_->map(HashJoinProjection::FILTER, HashJoinProjection::PAYLOAD);
+    for (int i = 0; i < num_build_cols; ++i) {
+      ResizableArrayData column_data;
+      column_data.Init(build_schemas_->data_type(HashJoinProjection::FILTER, i), pool_,
+                       bit_util::Log2(num_batch_rows));
+      if (auto idx = to_key.get(i); idx != SchemaProjectionMap::kMissingField) {
+        RETURN_NOT_OK(build_keys_->DecodeSelected(&column_data, idx, num_batch_rows,
+                                                  key_ids_maybe_null, pool_));
+      } else if (idx = to_payload.get(i); idx != SchemaProjectionMap::kMissingField) {
+        RETURN_NOT_OK(build_payloads_->DecodeSelected(&column_data, idx, num_batch_rows,
+                                                      payload_ids_maybe_null, pool_));
+      } else {
+        ARROW_DCHECK(false);
+      }
+      out.values[probe_filter_to_key_and_payload_.size() + i] = column_data.array_data();
+    }
+  }
+
+  return out;
+}
+
 void JoinProbeProcessor::Init(int num_key_columns, JoinType join_type,
                               SwissTableForJoin* hash_table,
+                              JoinResidualFilter* residual_filter,
                               std::vector<JoinResultMaterialize*> materialize,
                               const std::vector<JoinKeyCmp>* cmp,
                               OutputBatchFn output_batch_fn) {
   num_key_columns_ = num_key_columns;
   join_type_ = join_type;
   hash_table_ = hash_table;
+  residual_filter_ = residual_filter;
   materialize_.resize(materialize.size());
   for (size_t i = 0; i < materialize.size(); ++i) {
     materialize_[i] = materialize[i];
@@ -1875,6 +2286,7 @@ Status JoinProbeProcessor::OnNextBatch(int64_t thread_id,
                                        const ExecBatch& keypayload_batch,
                                        arrow::util::TempVectorStack* temp_stack,
                                        std::vector<KeyColumnArray>* temp_column_arrays) {
+  bool no_duplicate_keys = (hash_table_->key_to_payload() == nullptr);
   const SwissTable* swiss_table = hash_table_->keys()->swiss_table();
   int64_t hardware_flags = swiss_table->hardware_flags();
   int minibatch_size = swiss_table->minibatch_size();
@@ -1900,6 +2312,8 @@ Status JoinProbeProcessor::OnNextBatch(int64_t thread_id,
       arrow::util::TempVectorHolder<uint32_t>(temp_stack, minibatch_size);
   auto materialize_payload_ids_buf =
       arrow::util::TempVectorHolder<uint32_t>(temp_stack, minibatch_size);
+  auto filter_bitvector_buf = arrow::util::TempVectorHolder<uint8_t>(
+      temp_stack, static_cast<uint32_t>(bit_util::BytesForBits(minibatch_size)));
 
   for (int minibatch_start = 0; minibatch_start < num_rows;) {
     uint32_t minibatch_size_next = std::min(minibatch_size, num_rows - minibatch_start);
@@ -1923,33 +2337,29 @@ Status JoinProbeProcessor::OnNextBatch(int64_t thread_id,
     if (join_type_ == JoinType::LEFT_SEMI || join_type_ == JoinType::LEFT_ANTI ||
         join_type_ == JoinType::RIGHT_SEMI || join_type_ == JoinType::RIGHT_ANTI) {
       int num_passing_ids = 0;
-      arrow::util::bit_util::bits_to_indexes(
-          (join_type_ == JoinType::LEFT_ANTI) ? 0 : 1, hardware_flags,
-          minibatch_size_next, match_bitvector_buf.mutable_data(), &num_passing_ids,
-          materialize_batch_ids_buf.mutable_data());
-
-      // For right-semi, right-anti joins: update has-match flags for the rows
-      // in hash table.
-      //
-      if (join_type_ == JoinType::RIGHT_SEMI || join_type_ == JoinType::RIGHT_ANTI) {
-        for (int i = 0; i < num_passing_ids; ++i) {
-          uint16_t id = materialize_batch_ids_buf.mutable_data()[i];
-          key_ids_buf.mutable_data()[i] = key_ids_buf.mutable_data()[id];
-        }
-        hash_table_->UpdateHasMatchForKeys(thread_id, num_passing_ids,
-                                           key_ids_buf.mutable_data());
+      if (join_type_ == JoinType::LEFT_SEMI) {
+        RETURN_NOT_OK(residual_filter_->FilterLeftSemi(
+            keypayload_batch, minibatch_start, minibatch_size_next,
+            match_bitvector_buf.mutable_data(), key_ids_buf.mutable_data(),
+            no_duplicate_keys, temp_stack, &num_passing_ids,
+            materialize_batch_ids_buf.mutable_data()));
+      } else if (join_type_ == JoinType::LEFT_ANTI) {
+        RETURN_NOT_OK(residual_filter_->FilterLeftAnti(
+            keypayload_batch, minibatch_start, minibatch_size_next,
+            match_bitvector_buf.mutable_data(), key_ids_buf.mutable_data(),
+            no_duplicate_keys, temp_stack, &num_passing_ids,
+            materialize_batch_ids_buf.mutable_data()));
       } else {
-        // For left-semi, left-anti joins: call materialize using match
-        // bit-vector.
-        //
+        RETURN_NOT_OK(residual_filter_->FilterRightSemiAnti(
+            thread_id, keypayload_batch, minibatch_start, minibatch_size_next,
+            match_bitvector_buf.mutable_data(), key_ids_buf.mutable_data(),
+            no_duplicate_keys, temp_stack));
+      }
 
-        // Add base batch row index.
+      if (join_type_ == JoinType::LEFT_SEMI || join_type_ == JoinType::LEFT_ANTI) {
+        // For left-semi, left-anti joins: call materialize using match
+        // row ids.
         //
-        for (int i = 0; i < num_passing_ids; ++i) {
-          materialize_batch_ids_buf.mutable_data()[i] +=
-              static_cast<uint16_t>(minibatch_start);
-        }
-
         RETURN_NOT_OK(materialize_[thread_id]->AppendProbeOnly(
             keypayload_batch, num_passing_ids, materialize_batch_ids_buf.mutable_data(),
             [&](ExecBatch batch) {
@@ -1961,30 +2371,46 @@ Status JoinProbeProcessor::OnNextBatch(int64_t thread_id,
       // Since every hash table lookup for an input row might have multiple
       // matches we use a helper class that implements enumerating all of them.
       //
-      bool no_duplicate_keys = (hash_table_->key_to_payload() == nullptr);
-      bool no_payload_columns = (hash_table_->payloads() == nullptr);
       JoinMatchIterator match_iterator;
       match_iterator.SetLookupResult(
           minibatch_size_next, minibatch_start, match_bitvector_buf.mutable_data(),
           key_ids_buf.mutable_data(), no_duplicate_keys, hash_table_->key_to_payload());
       int num_matches_next;
+      bool use_filter_bitvector = residual_filter_->NeedFilterBitVector(join_type_);
+      if (use_filter_bitvector) {
+        residual_filter_->InitFilterBitVector(minibatch_size_next,
+                                              filter_bitvector_buf.mutable_data());
+      }
       while (match_iterator.GetNextBatch(minibatch_size, &num_matches_next,
                                          materialize_batch_ids_buf.mutable_data(),
                                          materialize_key_ids_buf.mutable_data(),
                                          materialize_payload_ids_buf.mutable_data())) {
+        RETURN_NOT_OK(residual_filter_->FilterInner(
+            keypayload_batch, num_matches_next, materialize_batch_ids_buf.mutable_data(),
+            materialize_key_ids_buf.mutable_data(),
+            materialize_payload_ids_buf.mutable_data(), !no_duplicate_keys, temp_stack,
+            &num_matches_next));
+
         const uint16_t* materialize_batch_ids = materialize_batch_ids_buf.mutable_data();
         const uint32_t* materialize_key_ids = materialize_key_ids_buf.mutable_data();
         const uint32_t* materialize_payload_ids =
-            no_duplicate_keys || no_payload_columns
-                ? materialize_key_ids_buf.mutable_data()
-                : materialize_payload_ids_buf.mutable_data();
+            no_duplicate_keys ? materialize_key_ids_buf.mutable_data()
+                              : materialize_payload_ids_buf.mutable_data();
+
+        // For filtered result, update filter bit-vector.
+        //
+        if (use_filter_bitvector) {
+          residual_filter_->UpdateFilterBitVector(minibatch_start, num_matches_next,
+                                                  materialize_batch_ids,
+                                                  filter_bitvector_buf.mutable_data());
+        }
 
         // For right-outer, full-outer joins we need to update has-match flags
         // for the rows in hash table.
         //
         if (join_type_ == JoinType::RIGHT_OUTER || join_type_ == JoinType::FULL_OUTER) {
-          hash_table_->UpdateHasMatchForKeys(thread_id, num_matches_next,
-                                             materialize_key_ids);
+          hash_table_->UpdateHasMatchForPayloads(thread_id, num_matches_next,
+                                                 materialize_payload_ids);
         }
 
         // Call materialize for resulting id tuples pointing to matching pairs
@@ -2004,17 +2430,11 @@ Status JoinProbeProcessor::OnNextBatch(int64_t thread_id,
       //
       if (join_type_ == JoinType::LEFT_OUTER || join_type_ == JoinType::FULL_OUTER) {
         int num_passing_ids = 0;
-        arrow::util::bit_util::bits_to_indexes(
-            /*bit_to_search=*/0, hardware_flags, minibatch_size_next,
-            match_bitvector_buf.mutable_data(), &num_passing_ids,
-            materialize_batch_ids_buf.mutable_data());
-
-        // Add base batch row index.
-        //
-        for (int i = 0; i < num_passing_ids; ++i) {
-          materialize_batch_ids_buf.mutable_data()[i] +=
-              static_cast<uint16_t>(minibatch_start);
-        }
+        CollectPassingBatchIds(0, hardware_flags, minibatch_start, minibatch_size_next,
+                               use_filter_bitvector ? filter_bitvector_buf.mutable_data()
+                                                    : match_bitvector_buf.mutable_data(),
+                               &num_passing_ids,
+                               materialize_batch_ids_buf.mutable_data());
 
         RETURN_NOT_OK(materialize_[thread_id]->AppendProbeOnly(
             keypayload_batch, num_passing_ids, materialize_batch_ids_buf.mutable_data(),
@@ -2099,8 +2519,12 @@ class SwissJoin : public HashJoinImpl {
       materialize[i] = &local_states_[i].materialize;
     }
 
+    residual_filter_.Init(std::move(filter), ctx_, pool_, hardware_flags_, proj_map_left,
+                          proj_map_right, &hash_table_);
+
     probe_processor_.Init(proj_map_left->num_cols(HashJoinProjection::KEY), join_type_,
-                          &hash_table_, materialize, &key_cmp_, output_batch_callback_);
+                          &hash_table_, &residual_filter_, materialize, &key_cmp_,
+                          output_batch_callback_);
 
     InitTaskGroups();
 
@@ -2180,9 +2604,11 @@ class SwissJoin : public HashJoinImpl {
     //
     const HashJoinProjectionMaps* schema = schema_[1];
     bool reject_duplicate_keys =
-        join_type_ == JoinType::LEFT_SEMI || join_type_ == JoinType::LEFT_ANTI;
+        (join_type_ == JoinType::LEFT_SEMI || join_type_ == JoinType::LEFT_ANTI) &&
+        residual_filter_.NumBuildPayloadsReferred() == 0;
     bool no_payload =
-        reject_duplicate_keys || schema->num_cols(HashJoinProjection::PAYLOAD) == 0;
+        reject_duplicate_keys || (schema->num_cols(HashJoinProjection::PAYLOAD) == 0 &&
+                                  residual_filter_.NumBuildPayloadsReferred() == 0);
 
     std::vector<KeyColumnMetadata> key_types;
     for (int i = 0; i < schema->num_cols(HashJoinProjection::KEY); ++i) {
@@ -2302,6 +2728,8 @@ class SwissJoin : public HashJoinImpl {
     }
     hash_table_ready_.store(true);
 
+    residual_filter_.OnBuildFinished();
+
     return build_finished_callback_(thread_id);
   }
 
@@ -2364,24 +2792,25 @@ class SwissJoin : public HashJoinImpl {
           static_cast<uint32_t>(mini_batch_start + mini_batch_size_next - 1));
       int num_output_rows = 0;
       for (uint32_t key_id = first_key_id; key_id <= last_key_id; ++key_id) {
-        if (bit_util::GetBit(hash_table_.has_match(), key_id) == bit_to_output) {
-          uint32_t first_payload_for_key =
-              std::max(static_cast<uint32_t>(mini_batch_start),
-                       hash_table_.key_to_payload() ? hash_table_.key_to_payload()[key_id]
-                                                    : key_id);
-          uint32_t last_payload_for_key = std::min(
-              static_cast<uint32_t>(mini_batch_start + mini_batch_size_next - 1),
-              hash_table_.key_to_payload() ? hash_table_.key_to_payload()[key_id + 1] - 1
-                                           : key_id);
-          uint32_t num_payloads_for_key =
-              last_payload_for_key - first_payload_for_key + 1;
-          for (uint32_t i = 0; i < num_payloads_for_key; ++i) {
-            key_ids_buf.mutable_data()[num_output_rows + i] = key_id;
-            payload_ids_buf.mutable_data()[num_output_rows + i] =
-                first_payload_for_key + i;
+        uint32_t first_payload_for_key = std::max(
+            static_cast<uint32_t>(mini_batch_start),
+            hash_table_.key_to_payload() ? hash_table_.key_to_payload()[key_id] : key_id);
+        uint32_t last_payload_for_key = std::min(
+            static_cast<uint32_t>(mini_batch_start + mini_batch_size_next - 1),
+            hash_table_.key_to_payload() ? hash_table_.key_to_payload()[key_id + 1] - 1
+                                         : key_id);
+        uint32_t num_payloads_for_key = last_payload_for_key - first_payload_for_key + 1;
+        uint32_t num_payloads_match = 0;
+        for (uint32_t i = 0; i < num_payloads_for_key; ++i) {
+          uint32_t payload = first_payload_for_key + i;
+          if (bit_util::GetBit(hash_table_.has_match(), payload) == bit_to_output) {
+            key_ids_buf.mutable_data()[num_output_rows + num_payloads_match] = key_id;
+            payload_ids_buf.mutable_data()[num_output_rows + num_payloads_match] =
+                payload;
+            num_payloads_match++;
           }
-          num_output_rows += num_payloads_for_key;
         }
+        num_output_rows += num_payloads_match;
       }
 
       if (num_output_rows > 0) {
@@ -2524,6 +2953,7 @@ class SwissJoin : public HashJoinImpl {
 
   SwissTableForJoin hash_table_;
   JoinProbeProcessor probe_processor_;
+  JoinResidualFilter residual_filter_;
   SwissTableForJoinBuild hash_table_build_;
   AccumulationQueue build_side_batches_;
 
diff --git a/cpp/src/arrow/acero/swiss_join_internal.h b/cpp/src/arrow/acero/swiss_join_internal.h
index 6403b7a655e96..aa36a61109274 100644
--- a/cpp/src/arrow/acero/swiss_join_internal.h
+++ b/cpp/src/arrow/acero/swiss_join_internal.h
@@ -367,7 +367,13 @@ class SwissTableForJoin {
   friend class SwissTableForJoinBuild;
 
  public:
+  // Update all payloads corresponding to the given keys as having a match.
+  //
   void UpdateHasMatchForKeys(int64_t thread_id, int num_rows, const uint32_t* key_ids);
+  // Update the given payloads as having a match.
+  //
+  void UpdateHasMatchForPayloads(int64_t thread_id, int num_rows,
+                                 const uint32_t* payload_ids);
   void MergeHasMatch();
 
   const SwissTableWithKeys* keys() const { return &map_; }
@@ -385,10 +391,6 @@ class SwissTableForJoin {
   }
 
   uint32_t payload_id_to_key_id(uint32_t payload_id) const;
-  // Input payload ids must form an increasing sequence.
-  //
-  void payload_ids_to_key_ids(int num_rows, const uint32_t* payload_ids,
-                              uint32_t* key_ids) const;
 
  private:
   uint8_t* local_has_match(int64_t thread_id);
@@ -397,8 +399,10 @@ class SwissTableForJoin {
   int dop_;
 
   struct ThreadLocalState {
+    // Bit-vector for keeping track of whether each payload in the hash table had a match.
     std::vector<uint8_t> has_match;
   };
+  // Bit-vector for keeping track of whether each payload in the hash table had a match.
   std::vector<ThreadLocalState> local_states_;
   std::vector<uint8_t> has_match_;
 
@@ -714,8 +718,20 @@ class JoinMatchIterator {
   void SetLookupResult(int num_batch_rows, int start_batch_row,
                        const uint8_t* batch_has_match, const uint32_t* key_ids,
                        bool no_duplicate_keys, const uint32_t* key_to_payload);
+  // Get the next batch of matching rows by outputting the batch row ids, key ids and
+  // payload ids. If the row_id_to_skip is not kInvalidRowId, then the row with that id
+  // will be skipped. This is useful for left-anti and left-semi joins, where we can
+  // safely skip the subsequent matchings of the row that already has a match in the
+  // previous batch.
+  //
   bool GetNextBatch(int num_rows_max, int* out_num_rows, uint16_t* batch_row_ids,
-                    uint32_t* key_ids, uint32_t* payload_ids);
+                    uint32_t* key_ids, uint32_t* payload_ids,
+                    int row_id_to_skip = kInvalidRowId);
+
+  // The row id that will never exist in an ExecBatch. Used to indicate that there is no
+  // row to skip.
+  //
+  static constexpr uint32_t kInvalidRowId = std::numeric_limits<uint16_t>::max() + 1;
 
  private:
   int num_batch_rows_;
@@ -736,6 +752,135 @@ class JoinMatchIterator {
   int current_match_for_row_;
 };
 
+// Implement the residual filter support used when processing the probe side exec batches.
+// There are four filtering patterns, each with a corresponding public FilterXXX method:
+// - LeftSemi and LeftAnti, each for its co-naming join type, opposite to each other.
+// - RightSemiAnti for both right-semi and right-anti joins: they have the same filtering
+// logic and differ only in the scanning phase.
+// - Inner for inner joins and the inner part of outer joins: caller should take care of
+// filtering the outer part.
+// All the public Filter* methods have zero-cost shortcut for trivial filter.
+//
+class JoinResidualFilter {
+ public:
+  void Init(Expression filter, QueryContext* ctx, MemoryPool* pool,
+            int64_t hardware_flags, const HashJoinProjectionMaps* probe_schemas,
+            const HashJoinProjectionMaps* build_schemas, SwissTableForJoin* hash_table);
+
+  void OnBuildFinished();
+
+  int NumBuildKeysReferred() const { return num_build_keys_referred_; }
+  int NumBuildPayloadsReferred() const { return num_build_payloads_referred_; }
+
+  // Left-outer and full-outer joins can result in a different bit-vector than the one of
+  // probing the hash table if the residual filter is not a literal true. If so, caller
+  // should setup a bit-vector for filtering properly and call `UpdateFilterBitVector`
+  // accordingly.
+  //
+  bool NeedFilterBitVector(JoinType join_type) const {
+    return (join_type == JoinType::LEFT_OUTER || join_type == JoinType::FULL_OUTER) &&
+           filter_ != literal(true);
+  }
+
+  // Init the bit-vector for filtering. Caller should make sure the bit-vector has enough
+  // size for a particular probe side batch.
+  //
+  void InitFilterBitVector(int num_batch_rows, uint8_t* filter_bitvector);
+
+  // Update the bit-vector for filtering according to the given batch row ids.
+  //
+  void UpdateFilterBitVector(int batch_start_row, int num_batch_rows,
+                             const uint16_t* batch_row_ids, uint8_t* filter_bitvector);
+
+  // Left row is passing if filter evaluates true. Output all the passing row ids in
+  // the input batch. Like the left-semi join semantic, each passing row is output only
+  // once.
+  // Zero-overhead shortcut guarantee for trivial filter.
+  //
+  Status FilterLeftSemi(const ExecBatch& keypayload_batch, int batch_start_row,
+                        int num_batch_rows, const uint8_t* match_bitvector,
+                        const uint32_t* key_ids, bool no_duplicate_keys,
+                        arrow::util::TempVectorStack* temp_stack, int* num_passing_ids,
+                        uint16_t* passing_batch_row_ids) const;
+
+  // Logically the opposite of FilterLeftSemi. Output all the passing row ids in the input
+  // batch. Like the left-anti join semantic, each passing row is output only once.
+  // Zero-overhead shortcut guarantee for trivial filter.
+  //
+  Status FilterLeftAnti(const ExecBatch& keypayload_batch, int batch_start_row,
+                        int num_batch_rows, const uint8_t* match_bitvector,
+                        const uint32_t* key_ids, bool no_duplicate_keys,
+                        arrow::util::TempVectorStack* temp_stack, int* num_passing_ids,
+                        uint16_t* passing_batch_row_ids) const;
+
+  // Right row is passing if filter evaluates true. Mark a match for all the passing
+  // payload ids in the hash table. This applies for both right-semi and right-anti joins:
+  // they differ in scanning phase.
+  // Zero-overhead shortcut guarantee for trivial filter.
+  //
+  Status FilterRightSemiAnti(int64_t thread_id, const ExecBatch& keypayload_batch,
+                             int batch_start_row, int num_batch_rows,
+                             const uint8_t* match_bitvector, const uint32_t* key_ids,
+                             bool no_duplicate_keys,
+                             arrow::util::TempVectorStack* temp_stack) const;
+
+  // For a given batch of an inner match (an inner-join or the inner part of an
+  // outer-join), row is passing if filter evaluates true. Does not do any outer filtering
+  // because this method is usually called within a inner match loop, which doesn't have
+  // the full scope of outer join. This requires caller to handle the outer part properly.
+  // All batch_row_ids, key_ids and payload_ids_maybe_null are input and output, this is
+  // for efficient shortcut.
+  // Zero-overhead shortcut guarantee for trivial filter.
+  //
+  Status FilterInner(const ExecBatch& keypayload_batch, int num_batch_rows,
+                     uint16_t* batch_row_ids, uint32_t* key_ids,
+                     uint32_t* payload_ids_maybe_null, bool output_payload_ids,
+                     arrow::util::TempVectorStack* temp_stack,
+                     int* num_passing_rows) const;
+
+ private:
+  // Evaluates the filter for a given batch of matching rows, and outputs the passing
+  // rows. Always introduces overhead of materialization and evaluation, so caller must do
+  // shortcut properly for trivial filters.
+  //
+  Status FilterOneBatch(const ExecBatch& keypayload_batch, int num_batch_rows,
+                        uint16_t* batch_row_ids, uint32_t* key_ids_maybe_null,
+                        uint32_t* payload_ids_maybe_null, bool output_key_ids,
+                        bool output_payload_ids, arrow::util::TempVectorStack* temp_stack,
+                        int* num_passing_rows) const;
+
+  Result<Datum> EvalFilter(const ExecBatch& keypayload_batch, int num_batch_rows,
+                           const uint16_t* batch_row_ids,
+                           const uint32_t* key_ids_maybe_null,
+                           const uint32_t* payload_ids_maybe_null) const;
+
+  Result<ExecBatch> MaterializeFilterInput(const ExecBatch& keypayload_batch,
+                                           int num_batch_rows,
+                                           const uint16_t* batch_row_ids,
+                                           const uint32_t* key_ids_maybe_null,
+                                           const uint32_t* payload_ids_maybe_null) const;
+
+ private:
+  Expression filter_;
+
+  QueryContext* ctx_;
+  MemoryPool* pool_;
+  int64_t hardware_flags_;
+
+  const HashJoinProjectionMaps* probe_schemas_;
+  const HashJoinProjectionMaps* build_schemas_;
+
+  SwissTableForJoin* hash_table_;
+  std::vector<int> probe_filter_to_key_and_payload_;
+  int num_build_keys_referred_ = 0;
+  int num_build_payloads_referred_ = 0;
+
+  int minibatch_size_;
+  const RowArray* build_keys_;
+  const RowArray* build_payloads_;
+  const uint32_t* key_to_payload_;
+};
+
 // Implements entire processing of a probe side exec batch,
 // provided the join hash table is already built and available.
 //
@@ -744,6 +889,7 @@ class JoinProbeProcessor {
   using OutputBatchFn = std::function<Status(int64_t, ExecBatch)>;
 
   void Init(int num_key_columns, JoinType join_type, SwissTableForJoin* hash_table,
+            JoinResidualFilter* residual_filter,
             std::vector<JoinResultMaterialize*> materialize,
             const std::vector<JoinKeyCmp>* cmp, OutputBatchFn output_batch_fn);
   Status OnNextBatch(int64_t thread_id, const ExecBatch& keypayload_batch,
@@ -760,6 +906,7 @@ class JoinProbeProcessor {
   JoinType join_type_;
 
   SwissTableForJoin* hash_table_;
+  JoinResidualFilter* residual_filter_;
   // One element per thread
   //
   std::vector<JoinResultMaterialize*> materialize_;

From 60dbaab71c60bbc88404446f4965de7fcff8d272 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Wed, 13 Mar 2024 13:34:17 +0900
Subject: [PATCH 512/570] GH-40394: [C++] Add support for mold (#40397)

### Rationale for this change

mold is a faster linker. It will reduce build time.

mold supports only ELF. So it's available on only Linux and *BSD.

### What changes are included in this PR?

Add new `ARROW_USE_MOLD` CMake option. It's `OFF` by default because it doesn't work with conda.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40394

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 ci/docker/ubuntu-22.04-cpp.dockerfile         |  2 ++
 ci/docker/ubuntu-24.04-cpp.dockerfile         |  2 ++
 ci/scripts/cpp_build.sh                       |  1 +
 cpp/cmake_modules/DefineOptions.cmake         | 13 +++++++++
 cpp/cmake_modules/SetupCxxFlags.cmake         | 27 +++++++++++++++++++
 cpp/src/arrow/symbols.map                     |  3 +--
 .../apt/debian-bookworm/Dockerfile            |  1 +
 .../apache-arrow/apt/debian-trixie/Dockerfile |  1 +
 .../apache-arrow/apt/ubuntu-jammy/Dockerfile  |  1 +
 .../apache-arrow/apt/ubuntu-noble/Dockerfile  |  1 +
 .../linux-packages/apache-arrow/debian/rules  |  1 +
 11 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile
index 848f62ab8aef8..e8416c1378a9a 100644
--- a/ci/docker/ubuntu-22.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp.dockerfile
@@ -102,6 +102,7 @@ RUN apt-get update -y -q && \
         libxml2-dev \
         libzstd-dev \
         make \
+        mold \
         ninja-build \
         nlohmann-json3-dev \
         npm \
@@ -188,6 +189,7 @@ ENV absl_SOURCE=BUNDLED \
     ARROW_SUBSTRAIT=ON \
     ARROW_USE_ASAN=OFF \
     ARROW_USE_CCACHE=ON \
+    ARROW_USE_MOLD=ON \
     ARROW_USE_UBSAN=OFF \
     ARROW_WITH_BROTLI=ON \
     ARROW_WITH_BZ2=ON \
diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile
index d56895a792f7c..629d532a3dc76 100644
--- a/ci/docker/ubuntu-24.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-24.04-cpp.dockerfile
@@ -102,6 +102,7 @@ RUN apt-get update -y -q && \
         libxml2-dev \
         libzstd-dev \
         make \
+        mold \
         ninja-build \
         nlohmann-json3-dev \
         npm \
@@ -184,6 +185,7 @@ ENV ARROW_ACERO=ON \
     ARROW_SUBSTRAIT=ON \
     ARROW_USE_ASAN=OFF \
     ARROW_USE_CCACHE=ON \
+    ARROW_USE_MOLD=ON \
     ARROW_USE_UBSAN=OFF \
     ARROW_WITH_BROTLI=ON \
     ARROW_WITH_BZ2=ON \
diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh
index 60cab1a9feaba..1e09924a5e576 100755
--- a/ci/scripts/cpp_build.sh
+++ b/ci/scripts/cpp_build.sh
@@ -147,6 +147,7 @@ cmake \
   -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \
   -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \
   -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \
+  -DARROW_USE_MOLD=${ARROW_USE_MOLD:-OFF} \
   -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \
   -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \
   -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \
diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index cce21b6bdba24..5b8bcb3ac6965 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -110,6 +110,17 @@ macro(resolve_option_dependencies)
   if(MSVC_TOOLCHAIN)
     set(ARROW_USE_GLOG OFF)
   endif()
+  # Tests are crashed with mold + sanitizer checks.
+  if(ARROW_USE_ASAN
+     OR ARROW_USE_TSAN
+     OR ARROW_USE_UBSAN)
+    if(ARROW_USE_MOLD)
+      message(WARNING "ARROW_USE_MOLD is disabled when one of "
+                      "ARROW_USE_ASAN, ARROW_USE_TSAN or ARROW_USE_UBSAN is specified "
+                      "because it causes some problems.")
+      set(ARROW_USE_MOLD OFF)
+    endif()
+  endif()
 
   tsort_bool_option_dependencies()
   foreach(option_name ${ARROW_BOOL_OPTION_DEPENDENCIES_TSORTED})
@@ -159,6 +170,8 @@ takes precedence over ccache if a storage backend is configured" ON)
 
   define_option(ARROW_USE_LD_GOLD "Use ld.gold for linking on Linux (if available)" OFF)
 
+  define_option(ARROW_USE_MOLD "Use mold for linking on Linux (if available)" OFF)
+
   define_option(ARROW_USE_PRECOMPILED_HEADERS "Use precompiled headers when compiling"
                 OFF)
 
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake
index 6940c6befacc7..5ef27dd5fd02a 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -625,6 +625,33 @@ if(NOT WIN32 AND NOT APPLE)
   endif()
 endif()
 
+if(NOT WIN32 AND NOT APPLE)
+  if(ARROW_USE_MOLD)
+    find_program(LD_MOLD ld.mold)
+    if(LD_MOLD)
+      if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+        if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "12.1.0")
+          set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fuse-ld=mold")
+          set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=mold")
+          message(STATUS "Using optional mold linker")
+        else()
+          message(STATUS "Need GCC 12.1.0 or later to use mold linker: ${CMAKE_CXX_COMPILER_VERSION}"
+          )
+        endif()
+      elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --ld-path=${LD_MOLD}")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --ld-path=${LD_MOLD}")
+        message(STATUS "Using optional mold linker")
+      else()
+        message(STATUS "Using the default linker because compiler doesn't support mold: ${CMAKE_CXX_COMPILER_ID}"
+        )
+      endif()
+    else()
+      message(STATUS "Using the default linker because mold isn't found")
+    endif()
+  endif()
+endif()
+
 # compiler flags for different build types (run 'cmake -DCMAKE_BUILD_TYPE=<type> .')
 # For all builds:
 # For CMAKE_BUILD_TYPE=Debug
diff --git a/cpp/src/arrow/symbols.map b/cpp/src/arrow/symbols.map
index 0144e6116554b..d3c38c22c9070 100644
--- a/cpp/src/arrow/symbols.map
+++ b/cpp/src/arrow/symbols.map
@@ -34,8 +34,7 @@
     arrow_*;
     Arrow*;
     # ARROW-14771: export Protobuf symbol table
-    descriptor_table_Flight_2eproto;
-    descriptor_table_FlightSql_2eproto;
+    descriptor_table_Flight*_2eproto;
 
   # Symbols marked as 'local' are not exported by the DSO and thus may not
   # be used by client applications.  Everything except the above falls here.
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
index f7aa57848bd36..b38ee72d68c75 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
@@ -69,6 +69,7 @@ RUN \
     llvm-dev \
     lsb-release \
     meson \
+    mold \
     ninja-build \
     nlohmann-json3-dev \
     pkg-config \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile
index 8a6accbfc8b16..3126c6d3cded0 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile
@@ -70,6 +70,7 @@ RUN \
     llvm-dev \
     lsb-release \
     meson \
+    mold \
     ninja-build \
     nlohmann-json3-dev \
     pkg-config \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
index ad3db51252f87..e6718e59b0aba 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
@@ -62,6 +62,7 @@ RUN \
     llvm-dev \
     lsb-release \
     meson \
+    mold \
     ninja-build \
     nlohmann-json3-dev \
     pkg-config \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
index 386be00c37ed7..87ea2402456b0 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
@@ -64,6 +64,7 @@ RUN \
     llvm-dev \
     lsb-release \
     meson \
+    mold \
     mlir-15-tools \
     ninja-build \
     nlohmann-json3-dev \
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules
index 36c43cf0d5968..83bcad98a7a6e 100755
--- a/dev/tasks/linux-packages/apache-arrow/debian/rules
+++ b/dev/tasks/linux-packages/apache-arrow/debian/rules
@@ -42,6 +42,7 @@ override_dh_auto_configure:
 	  -DARROW_PARQUET=ON					\
 	  -DARROW_S3=ON						\
 	  -DARROW_USE_CCACHE=OFF				\
+	  -DARROW_USE_MOLD=ON					\
 	  -DARROW_WITH_BROTLI=ON				\
 	  -DARROW_WITH_BZ2=ON					\
 	  -DARROW_WITH_LZ4=ON					\

From 7ee25f1616bfb73bd2d76a832a89303492ab302d Mon Sep 17 00:00:00 2001
From: Hyunseok Seo <hsseo0501@gmail.com>
Date: Wed, 13 Mar 2024 16:20:52 +0900
Subject: [PATCH 513/570] GH-39669: [C++][Gandiva] Ensure Gandiva benchmarks
 present a bytes/s or items/s metric (#40435)

### Rationale for this change

The Gandiva microbenchmarks only present an iteration time in (nano,micro...)seconds. That is usually tedious to read and difficult to interpret.

### What changes are included in this PR?

Ensure that Gandiva benchmarks present an items/seconds and/or a bytes/seconds metric where that makes sense.

### Are these changes tested?

Manually

### Are there any user-facing changes?

No

* GitHub Issue: #39669

Authored-by: Hyunseok Seo <hsseo0501@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/src/gandiva/tests/timed_evaluate.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/cpp/src/gandiva/tests/timed_evaluate.h b/cpp/src/gandiva/tests/timed_evaluate.h
index eba0f5eb96388..63087f33cfbce 100644
--- a/cpp/src/gandiva/tests/timed_evaluate.h
+++ b/cpp/src/gandiva/tests/timed_evaluate.h
@@ -90,12 +90,15 @@ Status TimedEvaluate(SchemaPtr schema, BaseEvaluator& evaluator,
   int num_fields = schema->num_fields();
   int num_calls = 0;
   Status status;
+  int64_t total_bytes_processed = 0;
+  int64_t total_items_processed = 0;
 
   // Generate batches of data
   std::shared_ptr<arrow::RecordBatch> batches[NUM_BATCHES];
   for (int i = 0; i < NUM_BATCHES; i++) {
     // generate data for all columns in the schema
     std::vector<ArrayPtr> columns;
+    int64_t batch_bytes = 0;
     for (int col = 0; col < num_fields; col++) {
       std::vector<C_TYPE> data = GenerateData<C_TYPE>(batch_size, data_generator);
       std::vector<bool> validity(batch_size, true);
@@ -103,12 +106,14 @@ Status TimedEvaluate(SchemaPtr schema, BaseEvaluator& evaluator,
           MakeArrowArray<TYPE, C_TYPE>(schema->field(col)->type(), data, validity);
 
       columns.push_back(col_data);
+      batch_bytes += data.size() * sizeof(C_TYPE);
     }
 
     // make the record batch
     std::shared_ptr<arrow::RecordBatch> batch =
         arrow::RecordBatch::Make(schema, batch_size, columns);
     batches[i] = batch;
+    total_bytes_processed += batch_bytes;
   }
 
   for (auto _ : state) {
@@ -127,9 +132,13 @@ Status TimedEvaluate(SchemaPtr schema, BaseEvaluator& evaluator,
 
       num_calls++;
       num_remaining -= num_in_batch;
+      total_items_processed += num_in_batch;
     }
   }
 
+  state.SetBytesProcessed(total_bytes_processed);
+  state.SetItemsProcessed(total_items_processed);
+
   return Status::OK();
 }
 

From acdf2a7f68409446e42ff37cc8ebbd68736756d0 Mon Sep 17 00:00:00 2001
From: Dane Pitkin <48041712+danepitkin@users.noreply.github.com>
Date: Wed, 13 Mar 2024 03:23:50 -0400
Subject: [PATCH 514/570] GH-40312: [Python] Add ListView documentation to user
 guide (#40313)

### Rationale for this change

Add documentation of the new ListView array type.

### What changes are included in this PR?

* Update the PyArrow data types documentation page

### Are these changes tested?

Yes, built docs locally and verified.

### Are there any user-facing changes?

No, unless you count documentation updates as user-facing.
* GitHub Issue: #40312

Authored-by: Dane Pitkin <dane@voltrondata.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 docs/source/python/data.rst | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/docs/source/python/data.rst b/docs/source/python/data.rst
index 0ce2ddd698414..1c186d847392f 100644
--- a/docs/source/python/data.rst
+++ b/docs/source/python/data.rst
@@ -230,6 +230,28 @@ like lists:
    nested_arr = pa.array([[], None, [1, 2], [None, 1]])
    print(nested_arr.type)
 
+ListView arrays
+~~~~~~~~~~~
+
+``pyarrow.array`` can create an alternate list type called ListView:
+
+.. ipython:: python
+
+   nested_arr = pa.array([[], None, [1, 2], [None, 1]], type=pa.list_view(pa.int64()))
+   print(nested_arr.type)
+
+ListView arrays have a different set of buffers than List arrays. The ListView array
+has both an offsets and sizes buffer, while a List array only has an offsets buffer.
+This allows for ListView arrays to specify out-of-order offsets:
+
+.. ipython:: python
+
+   values = [1, 2, 3, 4, 5, 6]
+   offsets = [4, 2, 0]
+   sizes = [2, 2, 2]
+   arr = pa.ListViewArray.from_arrays(offsets, sizes, values)
+   arr
+
 Struct arrays
 ~~~~~~~~~~~~~
 
@@ -240,7 +262,7 @@ dictionaries:
 
    pa.array([{'x': 1, 'y': True}, {'z': 3.4, 'x': 4}])
 
-Struct arrays can be initialized from a sequence of Python dicts or tuples. For tuples, 
+Struct arrays can be initialized from a sequence of Python dicts or tuples. For tuples,
 you must explicitly pass the type:
 
 .. ipython:: python
@@ -282,10 +304,10 @@ the type is explicitly passed into :meth:`array`:
    ty = pa.map_(pa.string(), pa.int64())
    pa.array(data, type=ty)
 
-MapArrays can also be constructed from offset, key, and item arrays. Offsets represent the 
+MapArrays can also be constructed from offset, key, and item arrays. Offsets represent the
 starting position of each map. Note that the :attr:`MapArray.keys` and :attr:`MapArray.items`
-properties give the *flattened* keys and items. To keep the keys and items associated to 
-their row, use the :meth:`ListArray.from_arrays` constructor with the 
+properties give the *flattened* keys and items. To keep the keys and items associated to
+their row, use the :meth:`ListArray.from_arrays` constructor with the
 :attr:`MapArray.offsets` property.
 
 .. ipython:: python

From a421314900c4fd38f238a944e506a28cdc322967 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 13 Mar 2024 08:55:59 +0100
Subject: [PATCH 515/570] GH-40376: [Python] Update for NumPy 2.0 ABI change in
 PyArray_Descr->elsize (#40418)

### Rationale for this change

NumPy 2.0 is changing some ABI, see the issue description and https://github.com/numpy/numpy/pull/25946 for more details.

The changes here should make our code compatible both with current numpy 1.x and future numpy 2.x

* GitHub Issue: #40376

Lead-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Co-authored-by: Sebastian Berg <sebastianb@nvidia.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 .env                                          |  2 +-
 python/CMakeLists.txt                         |  3 +++
 .../src/arrow/python/arrow_to_pandas.cc       | 10 +++++----
 .../pyarrow/src/arrow/python/numpy_convert.cc |  6 +++---
 .../pyarrow/src/arrow/python/numpy_interop.h  |  7 +++++++
 .../src/arrow/python/numpy_to_arrow.cc        | 21 +++++++++++--------
 6 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/.env b/.env
index 00c238421d301..afed658db198f 100644
--- a/.env
+++ b/.env
@@ -98,7 +98,7 @@ VCPKG="a42af01b72c28a8e1d7b48107b33e4f286a55ef6"    # 2023.11.20 Release
 # ci/docker/python-wheel-windows-vs2019.dockerfile.
 # This is a workaround for our CI problem that "archery docker build" doesn't
 # use pulled built images in dev/tasks/python-wheels/github.windows.yml.
-PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-02-05
+PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-03-12
 
 # Use conanio/${CONAN} for "docker-compose run --rm conan". See
 # https://github.com/conan-io/conan-docker-tools#readme for available
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index c3a1c578689c8..8c98e269d6ff4 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -260,6 +260,9 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}")
 
 # Python and Numpy libraries
 find_package(Python3Alt REQUIRED)
+message(STATUS "Found NumPy version: ${Python3_NumPy_VERSION}")
+message(STATUS "NumPy include dir: ${NUMPY_INCLUDE_DIRS}")
+
 include(UseCython)
 
 # PyArrow C++
diff --git a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
index cb9cbe5b930e7..023ba5585e704 100644
--- a/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
+++ b/python/pyarrow/src/arrow/python/arrow_to_pandas.cc
@@ -255,7 +255,8 @@ Status SetBufferBase(PyArrayObject* arr, const std::shared_ptr<Buffer>& buffer)
 }
 
 inline void set_numpy_metadata(int type, const DataType* datatype, PyArray_Descr* out) {
-  auto metadata = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(out->c_metadata);
+  auto metadata =
+      reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(out));
   if (type == NPY_DATETIME) {
     if (datatype->id() == Type::TIMESTAMP) {
       const auto& timestamp_type = checked_cast<const TimestampType&>(*datatype);
@@ -276,7 +277,7 @@ Status PyArray_NewFromPool(int nd, npy_intp* dims, PyArray_Descr* descr, MemoryP
   //
   // * Track allocations
   // * Get better performance through custom allocators
-  int64_t total_size = descr->elsize;
+  int64_t total_size = PyDataType_ELSIZE(descr);
   for (int i = 0; i < nd; ++i) {
     total_size *= dims[i];
   }
@@ -537,8 +538,9 @@ class PandasWriter {
 
   void SetDatetimeUnit(NPY_DATETIMEUNIT unit) {
     PyAcquireGIL lock;
-    auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(
-        PyArray_DESCR(reinterpret_cast<PyArrayObject*>(block_arr_.obj()))->c_metadata);
+    auto date_dtype =
+        reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(
+            PyArray_DESCR(reinterpret_cast<PyArrayObject*>(block_arr_.obj()))));
     date_dtype->meta.base = unit;
   }
 
diff --git a/python/pyarrow/src/arrow/python/numpy_convert.cc b/python/pyarrow/src/arrow/python/numpy_convert.cc
index dfee88c092e65..5fd2cb511ff8a 100644
--- a/python/pyarrow/src/arrow/python/numpy_convert.cc
+++ b/python/pyarrow/src/arrow/python/numpy_convert.cc
@@ -46,7 +46,7 @@ NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) {
     PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(ao);
     auto ptr = reinterpret_cast<uint8_t*>(PyArray_DATA(ndarray));
     data_ = const_cast<const uint8_t*>(ptr);
-    size_ = PyArray_SIZE(ndarray) * PyArray_DESCR(ndarray)->elsize;
+    size_ = PyArray_NBYTES(ndarray);
     capacity_ = size_;
     is_mutable_ = !!(PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE);
   }
@@ -150,7 +150,7 @@ Result<std::shared_ptr<DataType>> NumPyDtypeToArrow(PyArray_Descr* descr) {
     TO_ARROW_TYPE_CASE(UNICODE, utf8);
     case NPY_DATETIME: {
       auto date_dtype =
-          reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
+          reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(descr));
       switch (date_dtype->meta.base) {
         case NPY_FR_s:
           return timestamp(TimeUnit::SECOND);
@@ -170,7 +170,7 @@ Result<std::shared_ptr<DataType>> NumPyDtypeToArrow(PyArray_Descr* descr) {
     } break;
     case NPY_TIMEDELTA: {
       auto timedelta_dtype =
-          reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
+          reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(descr));
       switch (timedelta_dtype->meta.base) {
         case NPY_FR_s:
           return duration(TimeUnit::SECOND);
diff --git a/python/pyarrow/src/arrow/python/numpy_interop.h b/python/pyarrow/src/arrow/python/numpy_interop.h
index ce7baed259f91..7ea7d6e16f528 100644
--- a/python/pyarrow/src/arrow/python/numpy_interop.h
+++ b/python/pyarrow/src/arrow/python/numpy_interop.h
@@ -67,6 +67,13 @@
 #define NPY_INT32_IS_INT 0
 #endif
 
+// Backported NumPy 2 API (can be removed if numpy 2 is required)
+#if NPY_ABI_VERSION < 0x02000000
+#define PyDataType_ELSIZE(descr) ((descr)->elsize)
+#define PyDataType_C_METADATA(descr) ((descr)->c_metadata)
+#define PyDataType_FIELDS(descr) ((descr)->fields)
+#endif
+
 namespace arrow {
 namespace py {
 
diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index 8903df31be826..460b1d0ce3fa6 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -196,7 +196,7 @@ class NumPyConverter {
       mask_ = reinterpret_cast<PyArrayObject*>(mo);
     }
     length_ = static_cast<int64_t>(PyArray_SIZE(arr_));
-    itemsize_ = static_cast<int>(PyArray_DESCR(arr_)->elsize);
+    itemsize_ = static_cast<int64_t>(PyArray_ITEMSIZE(arr_));
     stride_ = static_cast<int64_t>(PyArray_STRIDES(arr_)[0]);
   }
 
@@ -296,7 +296,7 @@ class NumPyConverter {
   PyArrayObject* mask_;
   int64_t length_;
   int64_t stride_;
-  int itemsize_;
+  int64_t itemsize_;
 
   bool from_pandas_;
   compute::CastOptions cast_options_;
@@ -478,7 +478,8 @@ inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* d
 
   RETURN_NOT_OK(PrepareInputData<Date32Type>(data));
 
-  auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(dtype_->c_metadata);
+  auto date_dtype =
+      reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(dtype_));
   if (dtype_->type_num == NPY_DATETIME) {
     // If we have inbound datetime64[D] data, this needs to be downcasted
     // separately here from int64_t to int32_t, because this data is not
@@ -514,7 +515,8 @@ inline Status NumPyConverter::ConvertData<Date64Type>(std::shared_ptr<Buffer>* d
 
   RETURN_NOT_OK(PrepareInputData<Date64Type>(data));
 
-  auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(dtype_->c_metadata);
+  auto date_dtype =
+      reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(PyDataType_C_METADATA(dtype_));
   if (dtype_->type_num == NPY_DATETIME) {
     // If we have inbound datetime64[D] data, this needs to be downcasted
     // separately here from int64_t to int32_t, because this data is not
@@ -628,11 +630,11 @@ namespace {
 // NumPy unicode is UCS4/UTF32 always
 constexpr int kNumPyUnicodeSize = 4;
 
-Status AppendUTF32(const char* data, int itemsize, int byteorder,
+Status AppendUTF32(const char* data, int64_t itemsize, int byteorder,
                    ::arrow::internal::ChunkedStringBuilder* builder) {
   // The binary \x00\x00\x00\x00 indicates a nul terminator in NumPy unicode,
   // so we need to detect that here to truncate if necessary. Yep.
-  int actual_length = 0;
+  Py_ssize_t actual_length = 0;
   for (; actual_length < itemsize / kNumPyUnicodeSize; ++actual_length) {
     const char* code_point = data + actual_length * kNumPyUnicodeSize;
     if ((*code_point == '\0') && (*(code_point + 1) == '\0') &&
@@ -705,7 +707,7 @@ Status NumPyConverter::Visit(const StringType& type) {
   auto AppendNonNullValue = [&](const uint8_t* data) {
     if (is_binary_type) {
       if (ARROW_PREDICT_TRUE(util::ValidateUTF8(data, itemsize_))) {
-        return builder.Append(data, itemsize_);
+        return builder.Append(data, static_cast<int32_t>(itemsize_));
       } else {
         return Status::Invalid("Encountered non-UTF8 binary value: ",
                                HexEncode(data, itemsize_));
@@ -750,12 +752,13 @@ Status NumPyConverter::Visit(const StructType& type) {
     PyAcquireGIL gil_lock;
 
     // Create converters for each struct type field
-    if (dtype_->fields == NULL || !PyDict_Check(dtype_->fields)) {
+    if (PyDataType_FIELDS(dtype_) == NULL || !PyDict_Check(PyDataType_FIELDS(dtype_))) {
       return Status::TypeError("Expected struct array");
     }
 
     for (auto field : type.fields()) {
-      PyObject* tup = PyDict_GetItemString(dtype_->fields, field->name().c_str());
+      PyObject* tup =
+          PyDict_GetItemString(PyDataType_FIELDS(dtype_), field->name().c_str());
       if (tup == NULL) {
         return Status::Invalid("Missing field '", field->name(), "' in struct array");
       }

From d7a3f6df77e83c9c76d2a1f13bbcf75b7bbfc863 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Wed, 13 Mar 2024 17:29:36 +0900
Subject: [PATCH 516/570] GH-40495: [GLib] Use G_DECLARE_DERIVABLE_TYPE()
 (#40497)

### Rationale for this change

Using `G_DECLARE_DERIVABLE_TYPE()` or its family is the recommended way to declare a class (type).

### What changes are included in this PR?

Replace raw `#define`s with `G_DECLARE_DERIVABLE_TYPE()`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40495

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/input-stream.h  |  41 ++--------
 c_glib/arrow-glib/output-stream.h | 129 ++++--------------------------
 c_glib/arrow-glib/reader.h        |  86 +++-----------------
 c_glib/arrow-glib/writer.h        | 129 ++++--------------------------
 4 files changed, 45 insertions(+), 340 deletions(-)

diff --git a/c_glib/arrow-glib/input-stream.h b/c_glib/arrow-glib/input-stream.h
index 29f6288b74f08..3e2a2ecdbd4fa 100644
--- a/c_glib/arrow-glib/input-stream.h
+++ b/c_glib/arrow-glib/input-stream.h
@@ -139,46 +139,15 @@ GArrowMemoryMappedInputStream *
 garrow_memory_mapped_input_stream_new(const gchar *path, GError **error);
 
 #define GARROW_TYPE_GIO_INPUT_STREAM (garrow_gio_input_stream_get_type())
-#define GARROW_GIO_INPUT_STREAM(obj)                                                     \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj), GARROW_TYPE_GIO_INPUT_STREAM, GArrowGIOInputStream))
-#define GARROW_GIO_INPUT_STREAM_CLASS(klass)                                             \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
-                           GARROW_TYPE_GIO_INPUT_STREAM,                                 \
-                           GArrowGIOInputStreamClass))
-#define GARROW_IS_GIO_INPUT_STREAM(obj)                                                  \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_GIO_INPUT_STREAM))
-#define GARROW_IS_GIO_INPUT_STREAM_CLASS(klass)                                          \
-  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_GIO_INPUT_STREAM))
-#define GARROW_GIO_INPUT_STREAM_GET_CLASS(obj)                                           \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
-                             GARROW_TYPE_GIO_INPUT_STREAM,                               \
-                             GArrowGIOInputStreamClass))
-
-typedef struct _GArrowGIOInputStream GArrowGIOInputStream;
-#ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowGIOInputStreamClass GArrowGIOInputStreamClass;
-#endif
-
-/**
- * GArrowGIOInputStream:
- *
- * It's an input stream for `GInputStream`.
- */
-struct _GArrowGIOInputStream
-{
-  /*< private >*/
-  GArrowSeekableInputStream parent_instance;
-};
-
-#ifndef __GTK_DOC_IGNORE__
+G_DECLARE_DERIVABLE_TYPE(GArrowGIOInputStream,
+                         garrow_gio_input_stream,
+                         GARROW,
+                         GIO_INPUT_STREAM,
+                         GArrowSeekableInputStream)
 struct _GArrowGIOInputStreamClass
 {
   GArrowSeekableInputStreamClass parent_class;
 };
-#endif
-
-GType
-garrow_gio_input_stream_get_type(void) G_GNUC_CONST;
 
 GArrowGIOInputStream *
 garrow_gio_input_stream_new(GInputStream *gio_input_stream);
diff --git a/c_glib/arrow-glib/output-stream.h b/c_glib/arrow-glib/output-stream.h
index cf876897fa63a..1b18c08c14a5f 100644
--- a/c_glib/arrow-glib/output-stream.h
+++ b/c_glib/arrow-glib/output-stream.h
@@ -51,142 +51,43 @@ garrow_output_stream_write_record_batch(GArrowOutputStream *stream,
                                         GError **error);
 
 #define GARROW_TYPE_FILE_OUTPUT_STREAM (garrow_file_output_stream_get_type())
-#define GARROW_FILE_OUTPUT_STREAM(obj)                                                   \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
-                              GARROW_TYPE_FILE_OUTPUT_STREAM,                            \
-                              GArrowFileOutputStream))
-#define GARROW_FILE_OUTPUT_STREAM_CLASS(klass)                                           \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
-                           GARROW_TYPE_FILE_OUTPUT_STREAM,                               \
-                           GArrowFileOutputStreamClass))
-#define GARROW_IS_FILE_OUTPUT_STREAM(obj)                                                \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_FILE_OUTPUT_STREAM))
-#define GARROW_IS_FILE_OUTPUT_STREAM_CLASS(klass)                                        \
-  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_FILE_OUTPUT_STREAM))
-#define GARROW_FILE_OUTPUT_STREAM_GET_CLASS(obj)                                         \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
-                             GARROW_TYPE_FILE_OUTPUT_STREAM,                             \
-                             GArrowFileOutputStreamClass))
-
-typedef struct _GArrowFileOutputStream GArrowFileOutputStream;
-#ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowFileOutputStreamClass GArrowFileOutputStreamClass;
-#endif
-
-/**
- * GArrowFileOutputStream:
- *
- * It wraps `arrow::io::FileOutputStream`.
- */
-struct _GArrowFileOutputStream
-{
-  /*< private >*/
-  GArrowOutputStream parent_instance;
-};
-
-#ifndef __GTK_DOC_IGNORE__
+G_DECLARE_DERIVABLE_TYPE(GArrowFileOutputStream,
+                         garrow_file_output_stream,
+                         GARROW,
+                         FILE_OUTPUT_STREAM,
+                         GArrowOutputStream)
 struct _GArrowFileOutputStreamClass
 {
   GArrowOutputStreamClass parent_class;
 };
-#endif
-
-GType
-garrow_file_output_stream_get_type(void) G_GNUC_CONST;
 
 GArrowFileOutputStream *
 garrow_file_output_stream_new(const gchar *path, gboolean append, GError **error);
 
 #define GARROW_TYPE_BUFFER_OUTPUT_STREAM (garrow_buffer_output_stream_get_type())
-#define GARROW_BUFFER_OUTPUT_STREAM(obj)                                                 \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
-                              GARROW_TYPE_BUFFER_OUTPUT_STREAM,                          \
-                              GArrowBufferOutputStream))
-#define GARROW_BUFFER_OUTPUT_STREAM_CLASS(klass)                                         \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
-                           GARROW_TYPE_BUFFER_OUTPUT_STREAM,                             \
-                           GArrowBufferOutputStreamClass))
-#define GARROW_IS_BUFFER_OUTPUT_STREAM(obj)                                              \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_BUFFER_OUTPUT_STREAM))
-#define GARROW_IS_BUFFER_OUTPUT_STREAM_CLASS(klass)                                      \
-  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_BUFFER_OUTPUT_STREAM))
-#define GARROW_BUFFER_OUTPUT_STREAM_GET_CLASS(obj)                                       \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
-                             GARROW_TYPE_BUFFER_OUTPUT_STREAM,                           \
-                             GArrowBufferOutputStreamClass))
-
-typedef struct _GArrowBufferOutputStream GArrowBufferOutputStream;
-#ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowBufferOutputStreamClass GArrowBufferOutputStreamClass;
-#endif
-
-/**
- * GArrowBufferOutputStream:
- *
- * It wraps `arrow::io::BufferOutputStream`.
- */
-struct _GArrowBufferOutputStream
-{
-  /*< private >*/
-  GArrowOutputStream parent_instance;
-};
-
-#ifndef __GTK_DOC_IGNORE__
+G_DECLARE_DERIVABLE_TYPE(GArrowBufferOutputStream,
+                         garrow_buffer_output_stream,
+                         GARROW,
+                         BUFFER_OUTPUT_STREAM,
+                         GArrowOutputStream)
 struct _GArrowBufferOutputStreamClass
 {
   GArrowOutputStreamClass parent_class;
 };
-#endif
-
-GType
-garrow_buffer_output_stream_get_type(void) G_GNUC_CONST;
 
 GArrowBufferOutputStream *
 garrow_buffer_output_stream_new(GArrowResizableBuffer *buffer);
 
 #define GARROW_TYPE_GIO_OUTPUT_STREAM (garrow_gio_output_stream_get_type())
-#define GARROW_GIO_OUTPUT_STREAM(obj)                                                    \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
-                              GARROW_TYPE_GIO_OUTPUT_STREAM,                             \
-                              GArrowGIOOutputStream))
-#define GARROW_GIO_OUTPUT_STREAM_CLASS(klass)                                            \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
-                           GARROW_TYPE_GIO_OUTPUT_STREAM,                                \
-                           GArrowGIOOutputStreamClass))
-#define GARROW_IS_GIO_OUTPUT_STREAM(obj)                                                 \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_GIO_OUTPUT_STREAM))
-#define GARROW_IS_GIO_OUTPUT_STREAM_CLASS(klass)                                         \
-  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_GIO_OUTPUT_STREAM))
-#define GARROW_GIO_OUTPUT_STREAM_GET_CLASS(obj)                                          \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
-                             GARROW_TYPE_GIO_OUTPUT_STREAM,                              \
-                             GArrowGIOOutputStreamClass))
-
-typedef struct _GArrowGIOOutputStream GArrowGIOOutputStream;
-#ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowGIOOutputStreamClass GArrowGIOOutputStreamClass;
-#endif
-
-/**
- * GArrowGIOOutputStream:
- *
- * It's an output stream for `GOutputStream`.
- */
-struct _GArrowGIOOutputStream
-{
-  /*< private >*/
-  GArrowOutputStream parent_instance;
-};
-
-#ifndef __GTK_DOC_IGNORE__
+G_DECLARE_DERIVABLE_TYPE(GArrowGIOOutputStream,
+                         garrow_gio_output_stream,
+                         GARROW,
+                         GIO_OUTPUT_STREAM,
+                         GArrowOutputStream)
 struct _GArrowGIOOutputStreamClass
 {
   GArrowOutputStreamClass parent_class;
 };
-#endif
-
-GType
-garrow_gio_output_stream_get_type(void) G_GNUC_CONST;
 
 GArrowGIOOutputStream *
 garrow_gio_output_stream_new(GOutputStream *gio_output_stream);
diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h
index adc6c67e3d90b..061dc5fbb4dc1 100644
--- a/c_glib/arrow-glib/reader.h
+++ b/c_glib/arrow-glib/reader.h
@@ -100,95 +100,29 @@ garrow_table_batch_reader_set_max_chunk_size(GArrowTableBatchReader *reader,
 
 #define GARROW_TYPE_RECORD_BATCH_STREAM_READER                                           \
   (garrow_record_batch_stream_reader_get_type())
-#define GARROW_RECORD_BATCH_STREAM_READER(obj)                                           \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
-                              GARROW_TYPE_RECORD_BATCH_STREAM_READER,                    \
-                              GArrowRecordBatchStreamReader))
-#define GARROW_RECORD_BATCH_STREAM_READER_CLASS(klass)                                   \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
-                           GARROW_TYPE_RECORD_BATCH_STREAM_READER,                       \
-                           GArrowRecordBatchStreamReaderClass))
-#define GARROW_IS_RECORD_BATCH_STREAM_READER(obj)                                        \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_RECORD_BATCH_STREAM_READER))
-#define GARROW_IS_RECORD_BATCH_STREAM_READER_CLASS(klass)                                \
-  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_RECORD_BATCH_STREAM_READER))
-#define GARROW_RECORD_BATCH_STREAM_READER_GET_CLASS(obj)                                 \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
-                             GARROW_TYPE_RECORD_BATCH_STREAM_READER,                     \
-                             GArrowRecordBatchStreamReaderClass))
-
-typedef struct _GArrowRecordBatchStreamReader GArrowRecordBatchStreamReader;
-#ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowRecordBatchStreamReaderClass GArrowRecordBatchStreamReaderClass;
-#endif
-
-/**
- * GArrowRecordBatchStreamReader:
- *
- * It wraps `arrow::ipc::RecordBatchStreamReader`.
- */
-struct _GArrowRecordBatchStreamReader
-{
-  /*< private >*/
-  GArrowRecordBatchReader parent_instance;
-};
-
-#ifndef __GTK_DOC_IGNORE__
+G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchStreamReader,
+                         garrow_record_batch_stream_reader,
+                         GARROW,
+                         RECORD_BATCH_STREAM_READER,
+                         GArrowRecordBatchReader)
 struct _GArrowRecordBatchStreamReaderClass
 {
   GArrowRecordBatchReaderClass parent_class;
 };
-#endif
-
-GType
-garrow_record_batch_stream_reader_get_type(void) G_GNUC_CONST;
 
 GArrowRecordBatchStreamReader *
 garrow_record_batch_stream_reader_new(GArrowInputStream *stream, GError **error);
 
 #define GARROW_TYPE_RECORD_BATCH_FILE_READER (garrow_record_batch_file_reader_get_type())
-#define GARROW_RECORD_BATCH_FILE_READER(obj)                                             \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
-                              GARROW_TYPE_RECORD_BATCH_FILE_READER,                      \
-                              GArrowRecordBatchFileReader))
-#define GARROW_RECORD_BATCH_FILE_READER_CLASS(klass)                                     \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
-                           GARROW_TYPE_RECORD_BATCH_FILE_READER,                         \
-                           GArrowRecordBatchFileReaderClass))
-#define GARROW_IS_RECORD_BATCH_FILE_READER(obj)                                          \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_RECORD_BATCH_FILE_READER))
-#define GARROW_IS_RECORD_BATCH_FILE_READER_CLASS(klass)                                  \
-  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_RECORD_BATCH_FILE_READER))
-#define GARROW_RECORD_BATCH_FILE_READER_GET_CLASS(obj)                                   \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
-                             GARROW_TYPE_RECORD_BATCH_FILE_READER,                       \
-                             GArrowRecordBatchFileReaderClass))
-
-typedef struct _GArrowRecordBatchFileReader GArrowRecordBatchFileReader;
-#ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowRecordBatchFileReaderClass GArrowRecordBatchFileReaderClass;
-#endif
-
-/**
- * GArrowRecordBatchFileReader:
- *
- * It wraps `arrow::ipc::RecordBatchFileReader`.
- */
-struct _GArrowRecordBatchFileReader
-{
-  /*< private >*/
-  GObject parent_instance;
-};
-
-#ifndef __GTK_DOC_IGNORE__
+G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchFileReader,
+                         garrow_record_batch_file_reader,
+                         GARROW,
+                         RECORD_BATCH_FILE_READER,
+                         GObject)
 struct _GArrowRecordBatchFileReaderClass
 {
   GObjectClass parent_class;
 };
-#endif
-
-GType
-garrow_record_batch_file_reader_get_type(void) G_GNUC_CONST;
 
 GArrowRecordBatchFileReader *
 garrow_record_batch_file_reader_new(GArrowSeekableInputStream *file, GError **error);
diff --git a/c_glib/arrow-glib/writer.h b/c_glib/arrow-glib/writer.h
index b547172a5f384..30b0ea987da39 100644
--- a/c_glib/arrow-glib/writer.h
+++ b/c_glib/arrow-glib/writer.h
@@ -28,48 +28,15 @@
 G_BEGIN_DECLS
 
 #define GARROW_TYPE_RECORD_BATCH_WRITER (garrow_record_batch_writer_get_type())
-#define GARROW_RECORD_BATCH_WRITER(obj)                                                  \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
-                              GARROW_TYPE_RECORD_BATCH_WRITER,                           \
-                              GArrowRecordBatchWriter))
-#define GARROW_RECORD_BATCH_WRITER_CLASS(klass)                                          \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
-                           GARROW_TYPE_RECORD_BATCH_WRITER,                              \
-                           GArrowRecordBatchWriterClass))
-#define GARROW_IS_RECORD_BATCH_WRITER(obj)                                               \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_RECORD_BATCH_WRITER))
-#define GARROW_IS_RECORD_BATCH_WRITER_CLASS(klass)                                       \
-  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_RECORD_BATCH_WRITER))
-#define GARROW_RECORD_BATCH_WRITER_GET_CLASS(obj)                                        \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
-                             GARROW_TYPE_RECORD_BATCH_WRITER,                            \
-                             GArrowRecordBatchWriterClass))
-
-typedef struct _GArrowRecordBatchWriter GArrowRecordBatchWriter;
-#ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowRecordBatchWriterClass GArrowRecordBatchWriterClass;
-#endif
-
-/**
- * GArrowRecordBatchWriter:
- *
- * It wraps `arrow::ipc::RecordBatchWriter`.
- */
-struct _GArrowRecordBatchWriter
-{
-  /*< private >*/
-  GObject parent_instance;
-};
-
-#ifndef __GTK_DOC_IGNORE__
+G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchWriter,
+                         garrow_record_batch_writer,
+                         GARROW,
+                         RECORD_BATCH_WRITER,
+                         GObject)
 struct _GArrowRecordBatchWriterClass
 {
   GObjectClass parent_class;
 };
-#endif
-
-GType
-garrow_record_batch_writer_get_type(void) G_GNUC_CONST;
 
 gboolean
 garrow_record_batch_writer_write_record_batch(GArrowRecordBatchWriter *writer,
@@ -84,48 +51,15 @@ garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer, GError **error
 
 #define GARROW_TYPE_RECORD_BATCH_STREAM_WRITER                                           \
   (garrow_record_batch_stream_writer_get_type())
-#define GARROW_RECORD_BATCH_STREAM_WRITER(obj)                                           \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
-                              GARROW_TYPE_RECORD_BATCH_STREAM_WRITER,                    \
-                              GArrowRecordBatchStreamWriter))
-#define GARROW_RECORD_BATCH_STREAM_WRITER_CLASS(klass)                                   \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
-                           GARROW_TYPE_RECORD_BATCH_STREAM_WRITER,                       \
-                           GArrowRecordBatchStreamWriterClass))
-#define GARROW_IS_RECORD_BATCH_STREAM_WRITER(obj)                                        \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_RECORD_BATCH_STREAM_WRITER))
-#define GARROW_IS_RECORD_BATCH_STREAM_WRITER_CLASS(klass)                                \
-  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_RECORD_BATCH_STREAM_WRITER))
-#define GARROW_RECORD_BATCH_STREAM_WRITER_GET_CLASS(obj)                                 \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
-                             GARROW_TYPE_RECORD_BATCH_STREAM_WRITER,                     \
-                             GArrowRecordBatchStreamWriterClass))
-
-typedef struct _GArrowRecordBatchStreamWriter GArrowRecordBatchStreamWriter;
-#ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowRecordBatchStreamWriterClass GArrowRecordBatchStreamWriterClass;
-#endif
-
-/**
- * GArrowRecordBatchStreamWriter:
- *
- * It wraps `arrow::ipc::RecordBatchStreamWriter`.
- */
-struct _GArrowRecordBatchStreamWriter
-{
-  /*< private >*/
-  GArrowRecordBatchWriter parent_instance;
-};
-
-#ifndef __GTK_DOC_IGNORE__
+G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchStreamWriter,
+                         garrow_record_batch_stream_writer,
+                         GARROW,
+                         RECORD_BATCH_STREAM_WRITER,
+                         GArrowRecordBatchWriter)
 struct _GArrowRecordBatchStreamWriterClass
 {
   GArrowRecordBatchWriterClass parent_class;
 };
-#endif
-
-GType
-garrow_record_batch_stream_writer_get_type(void) G_GNUC_CONST;
 
 GArrowRecordBatchStreamWriter *
 garrow_record_batch_stream_writer_new(GArrowOutputStream *sink,
@@ -133,48 +67,15 @@ garrow_record_batch_stream_writer_new(GArrowOutputStream *sink,
                                       GError **error);
 
 #define GARROW_TYPE_RECORD_BATCH_FILE_WRITER (garrow_record_batch_file_writer_get_type())
-#define GARROW_RECORD_BATCH_FILE_WRITER(obj)                                             \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),                                                     \
-                              GARROW_TYPE_RECORD_BATCH_FILE_WRITER,                      \
-                              GArrowRecordBatchFileWriter))
-#define GARROW_RECORD_BATCH_FILE_WRITER_CLASS(klass)                                     \
-  (G_TYPE_CHECK_CLASS_CAST((klass),                                                      \
-                           GARROW_TYPE_RECORD_BATCH_FILE_WRITER,                         \
-                           GArrowRecordBatchFileWriterClass))
-#define GARROW_IS_RECORD_BATCH_FILE_WRITER(obj)                                          \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj), GARROW_TYPE_RECORD_BATCH_FILE_WRITER))
-#define GARROW_IS_RECORD_BATCH_FILE_WRITER_CLASS(klass)                                  \
-  (G_TYPE_CHECK_CLASS_TYPE((klass), GARROW_TYPE_RECORD_BATCH_FILE_WRITER))
-#define GARROW_RECORD_BATCH_FILE_WRITER_GET_CLASS(obj)                                   \
-  (G_TYPE_INSTANCE_GET_CLASS((obj),                                                      \
-                             GARROW_TYPE_RECORD_BATCH_FILE_WRITER,                       \
-                             GArrowRecordBatchFileWriterClass))
-
-typedef struct _GArrowRecordBatchFileWriter GArrowRecordBatchFileWriter;
-#ifndef __GTK_DOC_IGNORE__
-typedef struct _GArrowRecordBatchFileWriterClass GArrowRecordBatchFileWriterClass;
-#endif
-
-/**
- * GArrowRecordBatchFileWriter:
- *
- * It wraps `arrow::ipc::RecordBatchFileWriter`.
- */
-struct _GArrowRecordBatchFileWriter
-{
-  /*< private >*/
-  GArrowRecordBatchStreamWriter parent_instance;
-};
-
-#ifndef __GTK_DOC_IGNORE__
+G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchFileWriter,
+                         garrow_record_batch_file_writer,
+                         GARROW,
+                         RECORD_BATCH_FILE_WRITER,
+                         GArrowRecordBatchStreamWriter)
 struct _GArrowRecordBatchFileWriterClass
 {
   GArrowRecordBatchStreamWriterClass parent_class;
 };
-#endif
-
-GType
-garrow_record_batch_file_writer_get_type(void) G_GNUC_CONST;
 
 GArrowRecordBatchFileWriter *
 garrow_record_batch_file_writer_new(GArrowOutputStream *sink,

From ed19a921983a3783a0aa36059328e2da0f31ae6f Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 13 Mar 2024 03:17:12 -0700
Subject: [PATCH 517/570] GH-40252: [C++] Make span SFINAE standards-conforming
 to enable compilation with nvcc (#40253)

### Rationale for this change

The current code uses an incomplete type in a SFINAE construct.

Closes #40252

### What changes are included in this PR?

This PR changes the way that the type is specified to avoid any UB.

### Are these changes tested?

I tested locally that code that originally failed to compile with nvcc now compiles successfully. The same code also compiles under clang and gcc. This is a minimal reproducer:
```
#include <arrow/api.h>
#include <vector>

int main() {
  arrow::util::span<int> x{std::vector<int>{1, 2, 3}};
}
```

I did not include it here because it is a compile-time rather than runtime issue, and because at present it only manifests on nvcc.

### Are there any user-facing changes?

No.

* GitHub Issue: #40252

Authored-by: Vyas Ramasubramani <vyasr@nvidia.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 cpp/src/arrow/util/span.h       | 28 ++++++++++++++++++++++++++--
 cpp/src/arrow/util/span_test.cc |  1 -
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/util/span.h b/cpp/src/arrow/util/span.h
index 4254fec75e145..71cf9ed44890a 100644
--- a/cpp/src/arrow/util/span.h
+++ b/cpp/src/arrow/util/span.h
@@ -25,6 +25,31 @@
 
 namespace arrow::util {
 
+template <class T>
+class span;
+
+// This trait is used to check if a type R can be used to construct a span<T>.
+// Specifically, it checks if std::data(R) and std::size(R) are valid expressions
+// that may be passed to the span(T*, size_t) constructor. The reason this trait
+// is needed rather than expressing this directly in the relevant span constructor
+// is that this check requires instantiating span<T>, which would violate the
+// C++ standard if written directly in the constructor's enable_if clause
+// because span<T> is an incomplete type at that point. By defining this trait
+// instead, we add an extra level of indirection that lets us delay the
+// evaluation of the template until the first time the associated constructor
+// is actually called, at which point span<T> is a complete type.
+//
+// Note that most compilers do support the noncompliant construct, but nvcc
+// does not. See https://github.com/apache/arrow/issues/40252
+template <class T, class R, class Enable = void>
+struct ConstructibleFromDataAndSize : std::false_type {};
+
+template <class T, class R>
+struct ConstructibleFromDataAndSize<
+    span<T>, R,
+    std::void_t<decltype(span<T>{std::data(std::declval<R>()),
+                                 std::size(std::declval<R>())})>> : std::true_type {};
+
 /// std::span polyfill.
 ///
 /// Does not support static extents.
@@ -58,8 +83,7 @@ writing code which would break when it is replaced by std::span.)");
 
   template <
       typename R,
-      typename DisableUnlessConstructibleFromDataAndSize =
-          decltype(span<T>(std::data(std::declval<R>()), std::size(std::declval<R>()))),
+      std::enable_if_t<ConstructibleFromDataAndSize<span<T>, R>::value, bool> = true,
       typename DisableUnlessSimilarTypes = std::enable_if_t<std::is_same_v<
           std::decay_t<std::remove_pointer_t<decltype(std::data(std::declval<R>()))>>,
           std::decay_t<T>>>>
diff --git a/cpp/src/arrow/util/span_test.cc b/cpp/src/arrow/util/span_test.cc
index fcbb49f71e5d0..01460c2bd0377 100644
--- a/cpp/src/arrow/util/span_test.cc
+++ b/cpp/src/arrow/util/span_test.cc
@@ -51,7 +51,6 @@ TEST(Span, Construction) {
 
   static_assert(std::is_constructible_v<span<const int>, decltype(arr)&>);
   static_assert(std::is_constructible_v<span<const int>, decltype(const_arr)&>);
-  static_assert(std::is_constructible_v<span<const int>, span<int>>);
 
   static_assert(std::is_constructible_v<span<int>, std::vector<int>&>);
   static_assert(!std::is_constructible_v<span<int>, const std::vector<int>&>);

From 93816475f75d751067d4ff427fb9ae64e85acebe Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Wed, 13 Mar 2024 19:19:38 +0900
Subject: [PATCH 518/570] GH-40454: [CI][Debian] Update Debian to 12 from 11
 (#40455)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

Debian 11 will reach EOL on 2024-07.

### What changes are included in this PR?

* Use Debian 12.
* Enable build options that can't be enabled with Debian 11.
  * `ARROW_AZURE`
  * `ARROW_FLIGHT_SQL`
  * `ARROW_WITH_OPENTELEMETRY`

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40454

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 .env                                          |  2 +-
 .github/workflows/go.yml                      |  8 +--
 .github/workflows/js.yml                      |  2 +-
 ...pp.dockerfile => debian-12-cpp.dockerfile} | 64 +++++++++++--------
 ...ile => debian-12-go-cgo-python.dockerfile} | 14 ++--
 ...-go.dockerfile => debian-12-go.dockerfile} |  2 +-
 ...-js.dockerfile => debian-12-js.dockerfile} |  2 +-
 ci/docker/linux-apt-c-glib.dockerfile         | 20 +-----
 ci/docker/linux-apt-python-3.dockerfile       | 23 ++-----
 ci/scripts/go_cgo_python_test.sh              |  4 ++
 ci/scripts/install_numba.sh                   |  4 ++
 ci/scripts/python_build.sh                    |  4 ++
 ci/scripts/python_test.sh                     |  4 ++
 dev/tasks/docker-tests/github.linux.yml       |  2 -
 dev/tasks/tasks.yml                           | 16 ++---
 docker-compose.yml                            |  8 +--
 16 files changed, 90 insertions(+), 89 deletions(-)
 rename ci/docker/{debian-11-cpp.dockerfile => debian-12-cpp.dockerfile} (70%)
 rename ci/docker/{debian-11-go-cgo-python.dockerfile => debian-12-go-cgo-python.dockerfile} (79%)
 rename ci/docker/{debian-11-go.dockerfile => debian-12-go.dockerfile} (96%)
 rename ci/docker/{debian-11-js.dockerfile => debian-12-js.dockerfile} (99%)

diff --git a/.env b/.env
index afed658db198f..877bd50864d38 100644
--- a/.env
+++ b/.env
@@ -48,7 +48,7 @@ ULIMIT_CORE=-1
 # Default versions for platforms
 ALMALINUX=8
 ALPINE_LINUX=3.16
-DEBIAN=11
+DEBIAN=12
 FEDORA=39
 UBUNTU=20.04
 
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
index bbffab6704087..47148d9568c18 100644
--- a/.github/workflows/go.yml
+++ b/.github/workflows/go.yml
@@ -90,7 +90,7 @@ jobs:
           echo "JSON" >> "$GITHUB_OUTPUT"
 
   docker:
-    name: ${{ matrix.arch-label }} Debian 11 Go ${{ matrix.go }}
+    name: ${{ matrix.arch-label }} Debian 12 Go ${{ matrix.go }}
     needs: docker-targets
     runs-on: ${{ matrix.runs-on }}
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
@@ -154,7 +154,7 @@ jobs:
           CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }}
           CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }}
           CONBENCH_REF: ${{ github.ref_name }}
-          CONBENCH_MACHINE_INFO_NAME: ${{ matrix.arch }}-debian-11
+          CONBENCH_MACHINE_INFO_NAME: ${{ matrix.arch }}-debian-12
         run: |
           python3 -m pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python
           python3 ci/scripts/go_bench_adapt.py
@@ -181,7 +181,7 @@ jobs:
           GOARCH=386 go build ./...
 
   docker_cgo:
-    name: AMD64 Debian 11 Go ${{ matrix.go }} - CGO
+    name: AMD64 Debian 12 Go ${{ matrix.go }} - CGO
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 20
@@ -222,7 +222,7 @@ jobs:
 
 
   docker_cgo_python:
-    name: AMD64 Debian 11 Go ${{ matrix.go }} - CGO Python
+    name: AMD64 Debian 12 Go ${{ matrix.go }} - CGO Python
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 15
diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index 0d09e30d6eab5..304eba41e4d37 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -41,7 +41,7 @@ permissions:
 jobs:
 
   docker:
-    name: AMD64 Debian 11 NodeJS 18
+    name: AMD64 Debian 12 NodeJS 18
     runs-on: ubuntu-latest
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 60
diff --git a/ci/docker/debian-11-cpp.dockerfile b/ci/docker/debian-12-cpp.dockerfile
similarity index 70%
rename from ci/docker/debian-11-cpp.dockerfile
rename to ci/docker/debian-12-cpp.dockerfile
index 46824054624ab..7036ddf27d52a 100644
--- a/ci/docker/debian-11-cpp.dockerfile
+++ b/ci/docker/debian-12-cpp.dockerfile
@@ -16,38 +16,33 @@
 # under the License.
 
 ARG arch=amd64
-FROM ${arch}/debian:11
+FROM ${arch}/debian:12
 ARG arch
 
 ENV DEBIAN_FRONTEND noninteractive
 
 ARG llvm
-# We can't use LLVM 14 or later from apt.llvm.org on i386 because LLVM
-# 14 or later dropped support for i386.
 RUN apt-get update -y -q && \
-    apt-get install -y -q --no-install-recommends \
-        dpkg-dev && \
-    latest_available_llvm_i386=13 && \
-    if [ $(dpkg-architecture -qDEB_HOST_ARCH) = "i386" -a \
-         "${llvm}" -gt "${latest_available_llvm_i386}" ]; then \
-        available_llvm="${latest_available_llvm_i386}"; \
-    else \
-        available_llvm="${llvm}"; \
-    fi && \
-    apt-get update -y -q && \
     apt-get install -y -q --no-install-recommends \
         apt-transport-https \
         ca-certificates \
         gnupg \
+        lsb-release \
         wget && \
-    wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
-    echo "deb https://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-${available_llvm} main" > \
-        /etc/apt/sources.list.d/llvm.list && \
+    if [ ${llvm} -ge 17 ]; then \
+      wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | \
+          gpg \
+              --import - \
+              --keyring /usr/share/keyrings/llvm-snapshot.gpg \
+              --no-default-keyring && \
+      echo "deb[keyring=/usr/share/keyrings/llvm-snapshot.gpg] https://apt.llvm.org/$(lsb_release --codename --short)/ llvm-toolchain-$(lsb_release --codename --short)-${available_llvm} main" > \
+          /etc/apt/sources.list.d/llvm.list; \
+    fi && \
     apt-get update -y -q && \
     apt-get install -y -q --no-install-recommends \
         autoconf \
         ccache \
-        clang-${available_llvm} \
+        clang-${llvm} \
         cmake \
         curl \
         g++ \
@@ -55,7 +50,8 @@ RUN apt-get update -y -q && \
         gdb \
         git \
         libbenchmark-dev \
-        libboost-all-dev \
+        libboost-filesystem-dev \
+        libboost-system-dev \
         libbrotli-dev \
         libbz2-dev \
         libc-ares-dev \
@@ -64,22 +60,35 @@ RUN apt-get update -y -q && \
         libgmock-dev \
         libgoogle-glog-dev \
         libgrpc++-dev \
+        libidn2-dev \
+        libkrb5-dev \
+        libldap-dev \
         liblz4-dev \
+        libnghttp2-dev \
         libprotobuf-dev \
         libprotoc-dev \
+        libpsl-dev \
         libre2-dev \
+        librtmp-dev \
         libsnappy-dev \
+        libsqlite3-dev \
+        libssh-dev \
+        libssh2-1-dev \
         libssl-dev \
         libthrift-dev \
         libutf8proc-dev \
+        libxml2-dev \
         libzstd-dev \
-        llvm-${available_llvm}-dev \
+        llvm-${llvm}-dev \
         make \
         ninja-build \
         nlohmann-json3-dev \
+        npm \
         pkg-config \
         protobuf-compiler-grpc \
+        python3-dev \
         python3-pip \
+        python3-venv \
         rapidjson-dev \
         rsync \
         tzdata \
@@ -93,18 +102,24 @@ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_gcs_testbench.sh default
 
+COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_azurite.sh
+
 COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
-ENV absl_SOURCE=BUNDLED \
-    ARROW_ACERO=ON \
+ENV ARROW_ACERO=ON \
+    ARROW_AZURE=ON \
     ARROW_BUILD_TESTS=ON \
     ARROW_DATASET=ON \
     ARROW_DEPENDENCY_SOURCE=SYSTEM \
+    ARROW_DATASET=ON \
     ARROW_FLIGHT=ON \
+    ARROW_FLIGHT_SQL=ON \
     ARROW_GANDIVA=ON \
     ARROW_GCS=ON \
     ARROW_HOME=/usr/local \
+    ARROW_NO_DEPRECATED_API=ON \
     ARROW_ORC=ON \
     ARROW_PARQUET=ON \
     ARROW_S3=ON \
@@ -113,15 +128,14 @@ ENV absl_SOURCE=BUNDLED \
     ARROW_WITH_BROTLI=ON \
     ARROW_WITH_BZ2=ON \
     ARROW_WITH_LZ4=ON \
-    ARROW_WITH_OPENTELEMETRY=OFF \
+    ARROW_WITH_OPENTELEMETRY=ON \
     ARROW_WITH_SNAPPY=ON \
     ARROW_WITH_ZLIB=ON \
     ARROW_WITH_ZSTD=ON \
     AWSSDK_SOURCE=BUNDLED \
-    CC=gcc \
-    CXX=g++ \
+    Azure_SOURCE=BUNDLED \
     google_cloud_cpp_storage_SOURCE=BUNDLED \
-    GTest_SOURCE=BUNDLED \
     ORC_SOURCE=BUNDLED \
     PATH=/usr/lib/ccache/:$PATH \
+    PYTHON=python3 \
     xsimd_SOURCE=BUNDLED
diff --git a/ci/docker/debian-11-go-cgo-python.dockerfile b/ci/docker/debian-12-go-cgo-python.dockerfile
similarity index 79%
rename from ci/docker/debian-11-go-cgo-python.dockerfile
rename to ci/docker/debian-12-go-cgo-python.dockerfile
index 46455a42bb3a9..a24955f76e666 100644
--- a/ci/docker/debian-11-go-cgo-python.dockerfile
+++ b/ci/docker/debian-12-go-cgo-python.dockerfile
@@ -24,13 +24,11 @@ ENV DEBIAN_FRONTEND noninteractive
 RUN apt-get update -y -q && \
     apt-get install -y -q --no-install-recommends \
         python3 \
-        python3-pip && \
+        python3-pip \
+        python3-venv && \
     apt-get clean
 
-RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
-    ln -s /usr/bin/pip3 /usr/local/bin/pip
-
-# Need a newer pip than Debian's to install manylinux201x wheels
-RUN pip install -U pip
-
-RUN pip install pyarrow cffi --only-binary pyarrow
+ENV ARROW_PYTHON_VENV /arrow-dev
+RUN python3 -m venv ${ARROW_PYTHON_VENV} && \
+    . ${ARROW_PYTHON_VENV}/bin/activate && \
+    pip install pyarrow cffi --only-binary pyarrow
diff --git a/ci/docker/debian-11-go.dockerfile b/ci/docker/debian-12-go.dockerfile
similarity index 96%
rename from ci/docker/debian-11-go.dockerfile
rename to ci/docker/debian-12-go.dockerfile
index de8186b9b8e1c..7c077910a67a0 100644
--- a/ci/docker/debian-11-go.dockerfile
+++ b/ci/docker/debian-12-go.dockerfile
@@ -18,7 +18,7 @@
 ARG arch=amd64
 ARG go=1.19
 ARG staticcheck=v0.4.5
-FROM ${arch}/golang:${go}-bullseye
+FROM ${arch}/golang:${go}-bookworm
 
 # FROM collects all the args, get back the staticcheck version arg
 ARG staticcheck
diff --git a/ci/docker/debian-11-js.dockerfile b/ci/docker/debian-12-js.dockerfile
similarity index 99%
rename from ci/docker/debian-11-js.dockerfile
rename to ci/docker/debian-12-js.dockerfile
index e0938d96cd5b7..e0935676d93ee 100644
--- a/ci/docker/debian-11-js.dockerfile
+++ b/ci/docker/debian-12-js.dockerfile
@@ -16,7 +16,7 @@
 # under the License.
 
 ARG arch=amd64
-ARG node=16
+ARG node=18
 FROM ${arch}/node:${node}
 
 ENV NODE_NO_WARNINGS=1
diff --git a/ci/docker/linux-apt-c-glib.dockerfile b/ci/docker/linux-apt-c-glib.dockerfile
index b89301bedafe4..80742255b7667 100644
--- a/ci/docker/linux-apt-c-glib.dockerfile
+++ b/ci/docker/linux-apt-c-glib.dockerfile
@@ -25,6 +25,7 @@ RUN apt-get update -y -q && \
         libglib2.0-doc \
         lsb-release \
         luarocks \
+        meson \
         ninja-build \
         pkg-config \
         python3 \
@@ -36,24 +37,7 @@ RUN apt-get update -y -q && \
 
 RUN luarocks install lgi
 
-# pip on Ubuntu 20.04 may be buggy:
-#
-# Collecting meson
-#  Downloading meson-0.53.2.tar.gz (1.6 MB)
-#  Installing build dependencies: started
-#  Installing build dependencies: finished with status 'done'
-#  Getting requirements to build wheel: started
-#  Getting requirements to build wheel: finished with status 'error'
-#  ERROR: Command errored out with exit status 1:
-#   command: /usr/bin/python3 /usr/share/python-wheels/pep517-0.7.0-py2.py3-none-any.whl/pep517/_in_process.py get_requires_for_build_wheel /tmp/tmpsk4jveay
-#       cwd: /tmp/pip-install-jn79a_kh/meson
-#  Complete output (1 lines):
-#  /usr/bin/python3: can't find '__main__' module in '/usr/share/python-wheels/pep517-0.7.0-py2.py3-none-any.whl/pep517/_in_process.py'
-#  ----------------------------------------
-# ERROR: Command errored out with exit status 1: /usr/bin/python3 /usr/share/python-wheels/pep517-0.7.0-py2.py3-none-any.whl/pep517/_in_process.py get_requires_for_build_wheel /tmp/tmpsk4jveay Check the logs for full command output.
-RUN (python3 -m pip install meson || \
-         python3 -m pip install --no-use-pep517 meson) && \
-    gem install --no-document bundler
+RUN gem install --no-document bundler
 
 COPY c_glib/Gemfile /arrow/c_glib/
 RUN bundle install --gemfile /arrow/c_glib/Gemfile
diff --git a/ci/docker/linux-apt-python-3.dockerfile b/ci/docker/linux-apt-python-3.dockerfile
index 829c32e96b1ac..2e07c244017a0 100644
--- a/ci/docker/linux-apt-python-3.dockerfile
+++ b/ci/docker/linux-apt-python-3.dockerfile
@@ -18,26 +18,17 @@
 ARG base
 FROM ${base}
 
-RUN apt-get update -y -q && \
-    apt-get install -y -q \
-        python3 \
-        python3-pip \
-        python3-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
-    ln -s /usr/bin/pip3 /usr/local/bin/pip
-
-RUN pip install -U pip setuptools wheel
-
 COPY python/requirements-build.txt \
      python/requirements-test.txt \
      /arrow/python/
 
-RUN pip install \
-    -r arrow/python/requirements-build.txt \
-    -r arrow/python/requirements-test.txt
+ENV ARROW_PYTHON_VENV /arrow-dev
+RUN python3 -m venv ${ARROW_PYTHON_VENV} && \
+    . ${ARROW_PYTHON_VENV}/bin/activate && \
+    pip install -U pip setuptools wheel && \
+    pip install \
+      -r arrow/python/requirements-build.txt \
+      -r arrow/python/requirements-test.txt
 
 ARG numba
 COPY ci/scripts/install_numba.sh /arrow/ci/scripts/
diff --git a/ci/scripts/go_cgo_python_test.sh b/ci/scripts/go_cgo_python_test.sh
index ef98e414bbf1e..a76b6d0613a4b 100755
--- a/ci/scripts/go_cgo_python_test.sh
+++ b/ci/scripts/go_cgo_python_test.sh
@@ -21,6 +21,10 @@ set -ex
 
 source_dir=${1}/go
 
+if [ -n "${ARROW_PYTHON_VENV:-}" ]; then
+  . "${ARROW_PYTHON_VENV}/bin/activate"
+fi
+
 export GOFLAGS="${GOFLAGS} -gcflags=all=-d=checkptr"
 
 pushd ${source_dir}/arrow/cdata/test
diff --git a/ci/scripts/install_numba.sh b/ci/scripts/install_numba.sh
index 470f291ba80a9..fba9f50b79c54 100755
--- a/ci/scripts/install_numba.sh
+++ b/ci/scripts/install_numba.sh
@@ -26,6 +26,10 @@ fi
 
 numba=$1
 
+if [ -n "${ARROW_PYTHON_VENV:-}" ]; then
+  . "${ARROW_PYTHON_VENV}/bin/activate"
+fi
+
 if [ "${numba}" = "master" ]; then
   pip install https://github.com/numba/numba/archive/main.tar.gz#egg=numba
 elif [ "${numba}" = "latest" ]; then
diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh
index 9bdcc4d687584..7a24c1172f7b1 100755
--- a/ci/scripts/python_build.sh
+++ b/ci/scripts/python_build.sh
@@ -31,6 +31,10 @@ if [ -x "$(command -v git)" ]; then
   git config --global --add safe.directory ${arrow_dir}
 fi
 
+if [ -n "${ARROW_PYTHON_VENV:-}" ]; then
+  . "${ARROW_PYTHON_VENV}/bin/activate"
+fi
+
 case "$(uname)" in
   Linux)
     n_jobs=$(nproc)
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 7b803518494ee..f6b9b0d7cabaf 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -22,6 +22,10 @@ set -ex
 arrow_dir=${1}
 test_dir=${1}/python/build/dist
 
+if [ -n "${ARROW_PYTHON_VENV:-}" ]; then
+  . "${ARROW_PYTHON_VENV}/bin/activate"
+fi
+
 export ARROW_SOURCE_DIR=${arrow_dir}
 export ARROW_TEST_DATA=${arrow_dir}/testing/data
 export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml
index 2050c86994b2d..9616863b3bd73 100644
--- a/dev/tasks/docker-tests/github.linux.yml
+++ b/dev/tasks/docker-tests/github.linux.yml
@@ -27,9 +27,7 @@ jobs:
 {{ macros.github_set_env(env) }}
     steps:
       {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }}
-    {% if image in ["ubuntu-r-only-r"] %}
       {{ macros.github_free_space()|indent }}
-    {% endif %}
       {{ macros.github_install_archery()|indent }}
 
       - name: Execute Docker Build
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index c166ea15f3761..108c23f61e885 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -1094,7 +1094,7 @@ tasks:
       flags: -e ARROW_SKYHOOK=ON
       image: ubuntu-cpp
 
-{% for debian_version in ["11"] %}
+{% for debian_version in ["12"] %}
   test-debian-{{ debian_version }}-cpp-amd64:
     ci: github
     template: docker-tests/github.linux.yml
@@ -1111,7 +1111,7 @@ tasks:
       env:
         ARCH: "i386"
         DEBIAN: "{{ debian_version }}"
-      flags: "-e ARROW_S3=OFF -e ARROW_GANDIVA=OFF"
+      flags: "-e ARROW_AZURE=OFF -e ARROW_S3=OFF -e ARROW_GANDIVA=OFF"
       image: debian-cpp
 {% endfor %}
 
@@ -1204,21 +1204,21 @@ tasks:
         PYTHON: "3.10"
       image: conda-python-cython2
 
-  test-debian-11-python-3-amd64:
+  test-debian-12-python-3-amd64:
     ci: azure
     template: docker-tests/azure.linux.yml
     params:
       env:
-        DEBIAN: 11
+        DEBIAN: 12
       image: debian-python
 
-  test-debian-11-python-3-i386:
+  test-debian-12-python-3-i386:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
         ARCH: i386
-        DEBIAN: 11
+        DEBIAN: 12
       flags: "-e ARROW_S3=OFF -e ARROW_GANDIVA=OFF"
       image: debian-python
 
@@ -1425,12 +1425,12 @@ tasks:
       image: fedora-r-clang-sanitizer
 
   {% for go_version, staticcheck in [("1.19", "v0.4.5"), ("1.21", "latest")] %}
-  test-debian-11-go-{{ go_version }}:
+  test-debian-12-go-{{ go_version }}:
     ci: azure
     template: docker-tests/azure.linux.yml
     params:
       env:
-        DEBIAN: 11
+        DEBIAN: 12
         GO: "{{go_version}}"
         STATICCHECK: "{{ staticcheck }}"
       image: debian-go
diff --git a/docker-compose.yml b/docker-compose.yml
index 26a42fa13947b..cf6146e477207 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -341,7 +341,7 @@ services:
     #   docker-compose run --rm debian-cpp
     # Parameters:
     #   ARCH: amd64, arm64v8, ...
-    #   DEBIAN: 11
+    #   DEBIAN: 12
     image: ${REPO}:${ARCH}-debian-${DEBIAN}-cpp
     build:
       context: .
@@ -711,7 +711,7 @@ services:
     #   docker-compose run --rm debian-c-glib
     # Parameters:
     #   ARCH: amd64, arm64v8, ...
-    #   DEBIAN: 11
+    #   DEBIAN: 12
     image: ${REPO}:${ARCH}-debian-${DEBIAN}-c-glib
     build:
       context: .
@@ -769,7 +769,7 @@ services:
     #   docker-compose run --rm debian-ruby
     # Parameters:
     #   ARCH: amd64, arm64v8, ...
-    #   DEBIAN: 11
+    #   DEBIAN: 12
     image: ${REPO}:${ARCH}-debian-${DEBIAN}-ruby
     build:
       context: .
@@ -900,7 +900,7 @@ services:
     #   docker-compose run --rm debian-python
     # Parameters:
     #   ARCH: amd64, arm64v8, ...
-    #   DEBIAN: 11
+    #   DEBIAN: 12
     image: ${REPO}:${ARCH}-debian-${DEBIAN}-python-3
     build:
       context: .

From 788200a434462325c9feff4b52203520a90694e4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 13 Mar 2024 14:20:52 +0100
Subject: [PATCH 519/570] GH-40428: [Python][CI] Fix dataset partition filter
 tests with pandas nightly (#40429)

### Rationale for this change

From debugging the failure, it seems this is due to pandas changing a filter operation to sometimes preserve a RangeIndex now instead of returning an Integer64Index. And the conversion to Arrow changes based on that (RangeIndex is metadata only by default, integer index becomes a column)

Therefore making the tests more robust to ensure there is always at least one non-partition column in the DataFrame, so it doesn't depend on the index whether the result is empty or not.

* GitHub Issue: #40428

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/tests/parquet/test_dataset.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index 30dae05124f5d..47e608a1404ff 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -107,9 +107,9 @@ def test_filters_equivalency(tempdir):
     df = pd.DataFrame({
         'integer': np.array(integer_keys, dtype='i4').repeat(15),
         'string': np.tile(np.tile(np.array(string_keys, dtype=object), 5), 2),
-        'boolean': np.tile(np.tile(np.array(boolean_keys, dtype='bool'), 5),
-                           3),
-    }, columns=['integer', 'string', 'boolean'])
+        'boolean': np.tile(np.tile(np.array(boolean_keys, dtype='bool'), 5), 3),
+        'values': np.arange(30),
+    })
 
     _generate_partition_directories(local, base_path, partition_spec, df)
 
@@ -312,9 +312,9 @@ def test_filters_inclusive_set(tempdir):
     df = pd.DataFrame({
         'integer': np.array(integer_keys, dtype='i4').repeat(15),
         'string': np.tile(np.tile(np.array(string_keys, dtype=object), 5), 2),
-        'boolean': np.tile(np.tile(np.array(boolean_keys, dtype='bool'), 5),
-                           3),
-    }, columns=['integer', 'string', 'boolean'])
+        'boolean': np.tile(np.tile(np.array(boolean_keys, dtype='bool'), 5), 3),
+        'values': np.arange(30),
+    })
 
     _generate_partition_directories(local, base_path, partition_spec, df)
 

From dd6d7288e41d5d5c8bc73dbcf96ddc601db009cc Mon Sep 17 00:00:00 2001
From: Donald Tolley <tolleybot@gmail.com>
Date: Wed, 13 Mar 2024 09:54:02 -0400
Subject: [PATCH 520/570] GH-39444: [C++][Parquet] Fix crash in Modular
 Encryption (#39623)

**Rationale for this change:**

This pull request addresses a critical issue (GH-39444) in the C++/Python components of Parquet, specifically a segmentation fault occurring when processing encrypted datasets over 2^15 rows. The fix involves modifications in `cpp/src/parquet/encryption/internal_file_decryptor.cc`, particularly in `InternalFileDecryptor::GetColumnDecryptor`. The caching of the `Decryptor` object was removed to resolve the multithreading issue causing the segmentation fault and encryption failures.

**What changes are included in this PR?**

- Removal of `Decryptor` object caching in `InternalFileDecryptor::GetColumnDecryptor`.
- Addition of two unit tests: `large_row_parquet_encrypt_test.cc` for C++ and an update to `test_dataset_encryption.py` with `test_large_row_encryption_decryption` for Python.

**Are these changes tested?**

Yes, the unit tests (`large_row_parquet_encrypt_test.cc` and `test_large_row_encryption_decryption` in `test_dataset_encryption.py`) have been added to ensure the reliability and effectiveness of these changes.

**Are there any user-facing changes?**

No significant user-facing changes, but the update significantly improves the backend stability and reliability of Parquet file handling.  Calling DecryptionKeyRetriever::GetKey could be an expensive operation potentially involving network calls to key management servers.

* Closes: #39444
* GitHub Issue: #39444

Lead-authored-by: Donald Tolley <tolleybot@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Co-authored-by: Adam Reeve <adreeve@gmail.com>
Co-authored-by: Gang Wu <ustcwg@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../dataset/file_parquet_encryption_test.cc   | 182 +++++++++++-------
 .../parquet/encryption/encryption_internal.cc |  14 +-
 .../encryption/internal_file_decryptor.cc     |  50 ++---
 .../encryption/internal_file_decryptor.h      |   7 +-
 .../pyarrow/tests/test_dataset_encryption.py  |  62 ++++++
 5 files changed, 196 insertions(+), 119 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
index 87028eb6e2fac..307017fd67e06 100644
--- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
@@ -19,10 +19,10 @@
 
 #include "gtest/gtest.h"
 
-#include <arrow/dataset/dataset.h>
-#include <arrow/dataset/file_base.h>
-#include <arrow/dataset/file_parquet.h>
 #include "arrow/array.h"
+#include "arrow/dataset/dataset.h"
+#include "arrow/dataset/file_base.h"
+#include "arrow/dataset/file_parquet.h"
 #include "arrow/dataset/parquet_encryption_config.h"
 #include "arrow/dataset/partition.h"
 #include "arrow/filesystem/mockfs.h"
@@ -30,10 +30,10 @@
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/random.h"
 #include "arrow/type.h"
 #include "parquet/arrow/reader.h"
 #include "parquet/encryption/crypto_factory.h"
-#include "parquet/encryption/encryption.h"
 #include "parquet/encryption/encryption_internal.h"
 #include "parquet/encryption/kms_client.h"
 #include "parquet/encryption/test_in_memory_kms.h"
@@ -51,14 +51,14 @@ using arrow::internal::checked_pointer_cast;
 namespace arrow {
 namespace dataset {
 
-class DatasetEncryptionTest : public ::testing::Test {
+// Base class to test writing and reading encrypted dataset.
+class DatasetEncryptionTestBase : public ::testing::Test {
  public:
   // This function creates a mock file system using the current time point, creates a
   // directory with the given base directory path, and writes a dataset to it using
-  // provided Parquet file write options. The dataset is partitioned using a Hive
-  // partitioning scheme. The function also checks if the written files exist in the file
-  // system.
-  static void SetUpTestSuite() {
+  // provided Parquet file write options. The function also checks if the written files
+  // exist in the file system.
+  void SetUp() override {
 #ifdef ARROW_VALGRIND
     // Not necessary otherwise, but prevents a Valgrind leak by making sure
     // OpenSSL initialization is done from the main thread
@@ -71,24 +71,8 @@ class DatasetEncryptionTest : public ::testing::Test {
                                            std::chrono::system_clock::now(), {}));
     ASSERT_OK(file_system_->CreateDir(std::string(kBaseDir)));
 
-    // Prepare table data.
-    auto table_schema = schema({field("a", int64()), field("c", int64()),
-                                field("e", int64()), field("part", utf8())});
-    table_ = TableFromJSON(table_schema, {R"([
-                          [ 0, 9, 1, "a" ],
-                          [ 1, 8, 2, "a" ],
-                          [ 2, 7, 1, "c" ],
-                          [ 3, 6, 2, "c" ],
-                          [ 4, 5, 1, "e" ],
-                          [ 5, 4, 2, "e" ],
-                          [ 6, 3, 1, "g" ],
-                          [ 7, 2, 2, "g" ],
-                          [ 8, 1, 1, "i" ],
-                          [ 9, 0, 2, "i" ]
-                        ])"});
-
-    // Use a Hive-style partitioning scheme.
-    partitioning_ = std::make_shared<HivePartitioning>(schema({field("part", utf8())}));
+    // Init dataset and partitioning.
+    ASSERT_NO_FATAL_FAILURE(PrepareTableAndPartitioning());
 
     // Prepare encryption properties.
     std::unordered_map<std::string, std::string> key_map;
@@ -133,13 +117,81 @@ class DatasetEncryptionTest : public ::testing::Test {
     ASSERT_OK(FileSystemDataset::Write(write_options, std::move(scanner)));
   }
 
+  virtual void PrepareTableAndPartitioning() = 0;
+
+  void TestScanDataset() {
+    // Create decryption properties.
+    auto decryption_config =
+        std::make_shared<parquet::encryption::DecryptionConfiguration>();
+    auto parquet_decryption_config = std::make_shared<ParquetDecryptionConfig>();
+    parquet_decryption_config->crypto_factory = crypto_factory_;
+    parquet_decryption_config->kms_connection_config = kms_connection_config_;
+    parquet_decryption_config->decryption_config = std::move(decryption_config);
+
+    // Set scan options.
+    auto parquet_scan_options = std::make_shared<ParquetFragmentScanOptions>();
+    parquet_scan_options->parquet_decryption_config =
+        std::move(parquet_decryption_config);
+
+    auto file_format = std::make_shared<ParquetFileFormat>();
+    file_format->default_fragment_scan_options = std::move(parquet_scan_options);
+
+    // Get FileInfo objects for all files under the base directory
+    fs::FileSelector selector;
+    selector.base_dir = kBaseDir;
+    selector.recursive = true;
+
+    FileSystemFactoryOptions factory_options;
+    factory_options.partitioning = partitioning_;
+    factory_options.partition_base_dir = kBaseDir;
+    ASSERT_OK_AND_ASSIGN(auto dataset_factory,
+                         FileSystemDatasetFactory::Make(file_system_, selector,
+                                                        file_format, factory_options));
+
+    // Read dataset into table
+    ASSERT_OK_AND_ASSIGN(auto dataset, dataset_factory->Finish());
+    ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan());
+    ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());
+    ASSERT_OK_AND_ASSIGN(auto read_table, scanner->ToTable());
+
+    // Verify the data was read correctly
+    ASSERT_OK_AND_ASSIGN(auto combined_table, read_table->CombineChunks());
+    // Validate the table
+    ASSERT_OK(combined_table->ValidateFull());
+    AssertTablesEqual(*combined_table, *table_);
+  }
+
  protected:
-  inline static std::shared_ptr<fs::FileSystem> file_system_;
-  inline static std::shared_ptr<Table> table_;
-  inline static std::shared_ptr<HivePartitioning> partitioning_;
-  inline static std::shared_ptr<parquet::encryption::CryptoFactory> crypto_factory_;
-  inline static std::shared_ptr<parquet::encryption::KmsConnectionConfig>
-      kms_connection_config_;
+  std::shared_ptr<fs::FileSystem> file_system_;
+  std::shared_ptr<Table> table_;
+  std::shared_ptr<Partitioning> partitioning_;
+  std::shared_ptr<parquet::encryption::CryptoFactory> crypto_factory_;
+  std::shared_ptr<parquet::encryption::KmsConnectionConfig> kms_connection_config_;
+};
+
+class DatasetEncryptionTest : public DatasetEncryptionTestBase {
+ public:
+  // The dataset is partitioned using a Hive partitioning scheme.
+  void PrepareTableAndPartitioning() override {
+    // Prepare table data.
+    auto table_schema = schema({field("a", int64()), field("c", int64()),
+                                field("e", int64()), field("part", utf8())});
+    table_ = TableFromJSON(table_schema, {R"([
+                          [ 0, 9, 1, "a" ],
+                          [ 1, 8, 2, "a" ],
+                          [ 2, 7, 1, "c" ],
+                          [ 3, 6, 2, "c" ],
+                          [ 4, 5, 1, "e" ],
+                          [ 5, 4, 2, "e" ],
+                          [ 6, 3, 1, "g" ],
+                          [ 7, 2, 2, "g" ],
+                          [ 8, 1, 1, "i" ],
+                          [ 9, 0, 2, "i" ]
+                        ])"});
+
+    // Use a Hive-style partitioning scheme.
+    partitioning_ = std::make_shared<HivePartitioning>(schema({field("part", utf8())}));
+  }
 };
 
 // This test demonstrates the process of writing a partitioned Parquet file with the same
@@ -148,44 +200,7 @@ class DatasetEncryptionTest : public ::testing::Test {
 // test reads the data back and verifies that it can be successfully decrypted and
 // scanned.
 TEST_F(DatasetEncryptionTest, WriteReadDatasetWithEncryption) {
-  // Create decryption properties.
-  auto decryption_config =
-      std::make_shared<parquet::encryption::DecryptionConfiguration>();
-  auto parquet_decryption_config = std::make_shared<ParquetDecryptionConfig>();
-  parquet_decryption_config->crypto_factory = crypto_factory_;
-  parquet_decryption_config->kms_connection_config = kms_connection_config_;
-  parquet_decryption_config->decryption_config = std::move(decryption_config);
-
-  // Set scan options.
-  auto parquet_scan_options = std::make_shared<ParquetFragmentScanOptions>();
-  parquet_scan_options->parquet_decryption_config = std::move(parquet_decryption_config);
-
-  auto file_format = std::make_shared<ParquetFileFormat>();
-  file_format->default_fragment_scan_options = std::move(parquet_scan_options);
-
-  // Get FileInfo objects for all files under the base directory
-  fs::FileSelector selector;
-  selector.base_dir = kBaseDir;
-  selector.recursive = true;
-
-  FileSystemFactoryOptions factory_options;
-  factory_options.partitioning = partitioning_;
-  factory_options.partition_base_dir = kBaseDir;
-  ASSERT_OK_AND_ASSIGN(auto dataset_factory,
-                       FileSystemDatasetFactory::Make(file_system_, selector, file_format,
-                                                      factory_options));
-
-  // Read dataset into table
-  ASSERT_OK_AND_ASSIGN(auto dataset, dataset_factory->Finish());
-  ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan());
-  ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish());
-  ASSERT_OK_AND_ASSIGN(auto read_table, scanner->ToTable());
-
-  // Verify the data was read correctly
-  ASSERT_OK_AND_ASSIGN(auto combined_table, read_table->CombineChunks());
-  // Validate the table
-  ASSERT_OK(combined_table->ValidateFull());
-  AssertTablesEqual(*combined_table, *table_);
+  ASSERT_NO_FATAL_FAILURE(TestScanDataset());
 }
 
 // Read a single parquet file with and without decryption properties.
@@ -220,5 +235,30 @@ TEST_F(DatasetEncryptionTest, ReadSingleFile) {
   ASSERT_EQ(checked_pointer_cast<Int64Array>(table->column(2)->chunk(0))->GetView(0), 1);
 }
 
+// GH-39444: This test covers the case where parquet dataset scanner crashes when
+// processing encrypted datasets over 2^15 rows in multi-threaded mode.
+class LargeRowEncryptionTest : public DatasetEncryptionTestBase {
+ public:
+  // The dataset is partitioned using a Hive partitioning scheme.
+  void PrepareTableAndPartitioning() override {
+    // Specifically chosen to be greater than batch size for triggering prefetch.
+    constexpr int kRowCount = 32769;
+
+    // Create a random floating-point array with large number of rows.
+    arrow::random::RandomArrayGenerator rand_gen(0);
+    auto array = rand_gen.Float32(kRowCount, 0.0, 1.0, false);
+    auto table_schema = schema({field("a", float32())});
+
+    // Prepare table and partitioning.
+    table_ = arrow::Table::Make(table_schema, {array});
+    partitioning_ = std::make_shared<dataset::DirectoryPartitioning>(arrow::schema({}));
+  }
+};
+
+// Test for writing and reading encrypted dataset with large row count.
+TEST_F(LargeRowEncryptionTest, ReadEncryptLargeRows) {
+  ASSERT_NO_FATAL_FAILURE(TestScanDataset());
+}
+
 }  // namespace dataset
 }  // namespace arrow
diff --git a/cpp/src/parquet/encryption/encryption_internal.cc b/cpp/src/parquet/encryption/encryption_internal.cc
index b1770be53358d..465b14793219f 100644
--- a/cpp/src/parquet/encryption/encryption_internal.cc
+++ b/cpp/src/parquet/encryption/encryption_internal.cc
@@ -55,12 +55,7 @@ class AesEncryptor::AesEncryptorImpl {
   explicit AesEncryptorImpl(ParquetCipher::type alg_id, int key_len, bool metadata,
                             bool write_length);
 
-  ~AesEncryptorImpl() {
-    if (nullptr != ctx_) {
-      EVP_CIPHER_CTX_free(ctx_);
-      ctx_ = nullptr;
-    }
-  }
+  ~AesEncryptorImpl() { WipeOut(); }
 
   int Encrypt(const uint8_t* plaintext, int plaintext_len, const uint8_t* key,
               int key_len, const uint8_t* aad, int aad_len, uint8_t* ciphertext);
@@ -318,12 +313,7 @@ class AesDecryptor::AesDecryptorImpl {
   explicit AesDecryptorImpl(ParquetCipher::type alg_id, int key_len, bool metadata,
                             bool contains_length);
 
-  ~AesDecryptorImpl() {
-    if (nullptr != ctx_) {
-      EVP_CIPHER_CTX_free(ctx_);
-      ctx_ = nullptr;
-    }
-  }
+  ~AesDecryptorImpl() { WipeOut(); }
 
   int Decrypt(const uint8_t* ciphertext, int ciphertext_len, const uint8_t* key,
               int key_len, const uint8_t* aad, int aad_len, uint8_t* plaintext);
diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc
index 19e4845c8732d..c4416df90b121 100644
--- a/cpp/src/parquet/encryption/internal_file_decryptor.cc
+++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc
@@ -61,6 +61,7 @@ InternalFileDecryptor::InternalFileDecryptor(FileDecryptionProperties* propertie
 }
 
 void InternalFileDecryptor::WipeOutDecryptionKeys() {
+  std::lock_guard<std::mutex> lock(mutex_);
   properties_->WipeOutDecryptionKeys();
   for (auto const& i : all_decryptors_) {
     if (auto aes_decryptor = i.lock()) {
@@ -139,10 +140,16 @@ std::shared_ptr<Decryptor> InternalFileDecryptor::GetFooterDecryptor(
   // Create both data and metadata decryptors to avoid redundant retrieval of key
   // from the key_retriever.
   int key_len = static_cast<int>(footer_key.size());
-  auto aes_metadata_decryptor = encryption::AesDecryptor::Make(
-      algorithm_, key_len, /*metadata=*/true, &all_decryptors_);
-  auto aes_data_decryptor = encryption::AesDecryptor::Make(
-      algorithm_, key_len, /*metadata=*/false, &all_decryptors_);
+  std::shared_ptr<encryption::AesDecryptor> aes_metadata_decryptor;
+  std::shared_ptr<encryption::AesDecryptor> aes_data_decryptor;
+
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    aes_metadata_decryptor = encryption::AesDecryptor::Make(
+        algorithm_, key_len, /*metadata=*/true, &all_decryptors_);
+    aes_data_decryptor = encryption::AesDecryptor::Make(
+        algorithm_, key_len, /*metadata=*/false, &all_decryptors_);
+  }
 
   footer_metadata_decryptor_ = std::make_shared<Decryptor>(
       aes_metadata_decryptor, footer_key, file_aad_, aad, pool_);
@@ -168,21 +175,7 @@ std::shared_ptr<Decryptor> InternalFileDecryptor::GetColumnDataDecryptor(
 std::shared_ptr<Decryptor> InternalFileDecryptor::GetColumnDecryptor(
     const std::string& column_path, const std::string& column_key_metadata,
     const std::string& aad, bool metadata) {
-  std::string column_key;
-  // first look if we already got the decryptor from before
-  if (metadata) {
-    if (column_metadata_map_.find(column_path) != column_metadata_map_.end()) {
-      auto res(column_metadata_map_.at(column_path));
-      res->UpdateAad(aad);
-      return res;
-    }
-  } else {
-    if (column_data_map_.find(column_path) != column_data_map_.end()) {
-      auto res(column_data_map_.at(column_path));
-      res->UpdateAad(aad);
-      return res;
-    }
-  }
+  std::string column_key = properties_->column_key(column_path);
 
   column_key = properties_->column_key(column_path);
   // No explicit column key given via API. Retrieve via key metadata.
@@ -200,21 +193,12 @@ std::shared_ptr<Decryptor> InternalFileDecryptor::GetColumnDecryptor(
     throw HiddenColumnException("HiddenColumnException, path=" + column_path);
   }
 
-  // Create both data and metadata decryptors to avoid redundant retrieval of key
-  // using the key_retriever.
   int key_len = static_cast<int>(column_key.size());
-  auto aes_metadata_decryptor = encryption::AesDecryptor::Make(
-      algorithm_, key_len, /*metadata=*/true, &all_decryptors_);
-  auto aes_data_decryptor = encryption::AesDecryptor::Make(
-      algorithm_, key_len, /*metadata=*/false, &all_decryptors_);
-
-  column_metadata_map_[column_path] = std::make_shared<Decryptor>(
-      aes_metadata_decryptor, column_key, file_aad_, aad, pool_);
-  column_data_map_[column_path] =
-      std::make_shared<Decryptor>(aes_data_decryptor, column_key, file_aad_, aad, pool_);
-
-  if (metadata) return column_metadata_map_[column_path];
-  return column_data_map_[column_path];
+  std::lock_guard<std::mutex> lock(mutex_);
+  auto aes_decryptor =
+      encryption::AesDecryptor::Make(algorithm_, key_len, metadata, &all_decryptors_);
+  return std::make_shared<Decryptor>(std::move(aes_decryptor), column_key, file_aad_, aad,
+                                     pool_);
 }
 
 namespace {
diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.h b/cpp/src/parquet/encryption/internal_file_decryptor.h
index 0b27effda8822..f12cdefbe67a7 100644
--- a/cpp/src/parquet/encryption/internal_file_decryptor.h
+++ b/cpp/src/parquet/encryption/internal_file_decryptor.h
@@ -19,6 +19,7 @@
 
 #include <map>
 #include <memory>
+#include <mutex>
 #include <string>
 #include <vector>
 
@@ -91,15 +92,15 @@ class InternalFileDecryptor {
   FileDecryptionProperties* properties_;
   // Concatenation of aad_prefix (if exists) and aad_file_unique
   std::string file_aad_;
-  std::map<std::string, std::shared_ptr<Decryptor>> column_data_map_;
-  std::map<std::string, std::shared_ptr<Decryptor>> column_metadata_map_;
 
   std::shared_ptr<Decryptor> footer_metadata_decryptor_;
   std::shared_ptr<Decryptor> footer_data_decryptor_;
   ParquetCipher::type algorithm_;
   std::string footer_key_metadata_;
+  // Mutex to guard access to all_decryptors_
+  mutable std::mutex mutex_;
   // A weak reference to all decryptors that need to be wiped out when decryption is
-  // finished
+  // finished, guarded by mutex_ for thread safety
   std::vector<std::weak_ptr<encryption::AesDecryptor>> all_decryptors_;
 
   ::arrow::MemoryPool* pool_;
diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py
index d25b22990abfb..fadbb6108d440 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -15,9 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import base64
 from datetime import timedelta
+import numpy as np
 import pyarrow.fs as fs
 import pyarrow as pa
+import pyarrow.parquet as pq
 import pytest
 
 encryption_unavailable = False
@@ -151,3 +154,62 @@ def test_write_dataset_parquet_without_encryption():
 
     with pytest.raises(NotImplementedError):
         _ = pformat.make_write_options(encryption_config="some value")
+
+
+@pytest.mark.skipif(
+    encryption_unavailable, reason="Parquet Encryption is not currently enabled"
+)
+def test_large_row_encryption_decryption():
+    """Test encryption and decryption of a large number of rows."""
+
+    class NoOpKmsClient(pe.KmsClient):
+        def wrap_key(self, key_bytes: bytes, _: str) -> bytes:
+            b = base64.b64encode(key_bytes)
+            return b
+
+        def unwrap_key(self, wrapped_key: bytes, _: str) -> bytes:
+            b = base64.b64decode(wrapped_key)
+            return b
+
+    row_count = 2**15 + 1
+    table = pa.Table.from_arrays(
+        [pa.array(np.random.rand(row_count), type=pa.float32())], names=["foo"]
+    )
+
+    kms_config = pe.KmsConnectionConfig()
+    crypto_factory = pe.CryptoFactory(lambda _: NoOpKmsClient())
+    encryption_config = pe.EncryptionConfiguration(
+        footer_key="UNIMPORTANT_KEY",
+        column_keys={"UNIMPORTANT_KEY": ["foo"]},
+        double_wrapping=True,
+        plaintext_footer=False,
+        data_key_length_bits=128,
+    )
+    pqe_config = ds.ParquetEncryptionConfig(
+        crypto_factory, kms_config, encryption_config
+    )
+    pqd_config = ds.ParquetDecryptionConfig(
+        crypto_factory, kms_config, pe.DecryptionConfiguration()
+    )
+    scan_options = ds.ParquetFragmentScanOptions(decryption_config=pqd_config)
+    file_format = ds.ParquetFileFormat(default_fragment_scan_options=scan_options)
+    write_options = file_format.make_write_options(encryption_config=pqe_config)
+    file_decryption_properties = crypto_factory.file_decryption_properties(kms_config)
+
+    mockfs = fs._MockFileSystem()
+    mockfs.create_dir("/")
+
+    path = "large-row-test-dataset"
+    ds.write_dataset(table, path, format=file_format,
+                     file_options=write_options, filesystem=mockfs)
+
+    file_path = path + "/part-0.parquet"
+    new_table = pq.ParquetFile(
+        file_path, decryption_properties=file_decryption_properties,
+        filesystem=mockfs
+    ).read()
+    assert table == new_table
+
+    dataset = ds.dataset(path, format=file_format, filesystem=mockfs)
+    new_table = dataset.to_table()
+    assert table == new_table

From 9f6dc1feb52bebceb4b4bfd966f0160e27f9f40f Mon Sep 17 00:00:00 2001
From: Thomas Newton <thomas.w.newton@gmail.com>
Date: Wed, 13 Mar 2024 14:56:12 +0000
Subject: [PATCH 521/570] GH-39968: [Python][FS][Azure] Minimal Python bindings
 for `AzureFileSystem`  (#40021)

### Rationale for this change
We want to use the new `AzureFileSystem` in `pyarrow`.

### What changes are included in this PR?
- Add minimal python bindings for `AzureFileSystem`. This includes just enough to run the python tests against azurite plus default credential auth to enable real use of this once this PR merges.
- Adding additional configuration options and remaining authentication options can be done as a follow up.
- I tried to copy the existing pybinds for GCS and S3
- Explicitly set `ARROW_AZURE=OFF` rather than relying on defaults. The defaults are different for builds vs tests so this was causing tests to be enabled while Azure was disabled during the build.

### Are these changes tested?
Enabled the the python filesystem tests for the new filesystem. I had to skip azure in a couple of the tests though because they are not yet working on the C++ side. I created Github issues to resolve these https://github.com/apache/arrow/issues/40025 and https://github.com/apache/arrow/issues/40026 and added TODO comments where relevant, that reference these Github issues.

### Are there any user-facing changes?
`pyarrow` users can now use the native `AzureFileSystem` to get much better reliability and performance compared to `adlfs` based options.

* Closes: #39968
* GitHub Issue: #39968

Lead-authored-by: Thomas Newton <thomas.w.newton@gmail.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 ci/docker/alpine-linux-3.16-cpp.dockerfile    |   1 +
 ci/docker/fedora-39-cpp.dockerfile            |   1 +
 ci/docker/linux-apt-docs.dockerfile           |   1 +
 ci/docker/ubuntu-20.04-cpp-minimal.dockerfile |   1 +
 ci/docker/ubuntu-22.04-cpp-minimal.dockerfile |   1 +
 cpp/src/arrow/filesystem/api.h                |   5 +-
 cpp/src/arrow/filesystem/azurefs_test.cc      |   2 +
 cpp/src/arrow/filesystem/type_fwd.h           |   7 +-
 cpp/src/arrow/util/config.h.cmake             |   1 +
 python/CMakeLists.txt                         |   4 +
 python/pyarrow/__init__.py                    |   3 +-
 python/pyarrow/_azurefs.pyx                   | 134 ++++++++++++++++++
 python/pyarrow/_fs.pyx                        |   3 +
 python/pyarrow/conftest.py                    |   9 +-
 python/pyarrow/fs.py                          |   4 +
 python/pyarrow/includes/libarrow_fs.pxd       |  16 +++
 python/pyarrow/tests/conftest.py              |  31 ++++
 python/pyarrow/tests/test_fs.py               |  78 +++++++++-
 python/setup.py                               |   8 ++
 19 files changed, 303 insertions(+), 7 deletions(-)
 create mode 100644 python/pyarrow/_azurefs.pyx

diff --git a/ci/docker/alpine-linux-3.16-cpp.dockerfile b/ci/docker/alpine-linux-3.16-cpp.dockerfile
index 8828e717a53a1..72489c6eae1a7 100644
--- a/ci/docker/alpine-linux-3.16-cpp.dockerfile
+++ b/ci/docker/alpine-linux-3.16-cpp.dockerfile
@@ -74,6 +74,7 @@ COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_gcs_testbench.sh default
 
 ENV ARROW_ACERO=ON \
+    ARROW_AZURE=OFF \
     ARROW_BUILD_TESTS=ON \
     ARROW_DATASET=ON \
     ARROW_DEPENDENCY_SOURCE=SYSTEM \
diff --git a/ci/docker/fedora-39-cpp.dockerfile b/ci/docker/fedora-39-cpp.dockerfile
index c8e98bdd00b11..59db84034bec7 100644
--- a/ci/docker/fedora-39-cpp.dockerfile
+++ b/ci/docker/fedora-39-cpp.dockerfile
@@ -80,6 +80,7 @@ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 # Python process explicitly if we use LLVM 17 or later.
 ENV absl_SOURCE=BUNDLED \
     ARROW_ACERO=ON \
+    ARROW_AZURE=OFF \
     ARROW_BUILD_TESTS=ON \
     ARROW_DEPENDENCY_SOURCE=SYSTEM \
     ARROW_DATASET=ON \
diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index 93412ca81cdd5..c424d04653dbe 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -96,6 +96,7 @@ RUN /arrow/ci/scripts/r_deps.sh /arrow && \
     R -e "install.packages('pkgdown')"
 
 ENV ARROW_ACERO=ON \
+    ARROW_AZURE=OFF \
     ARROW_BUILD_STATIC=OFF \
     ARROW_BUILD_TESTS=OFF \
     ARROW_BUILD_UTILITIES=OFF \
diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
index 3df895b427c9e..ae2ba9421cd55 100644
--- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
@@ -76,6 +76,7 @@ COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
 ENV ARROW_ACERO=ON \
+    ARROW_AZURE=OFF \
     ARROW_BUILD_TESTS=ON \
     ARROW_DATASET=ON \
     ARROW_FLIGHT=ON \
diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
index 7eba541a63af2..dd887a6d00ceb 100644
--- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
@@ -76,6 +76,7 @@ COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
 ENV ARROW_ACERO=ON \
+    ARROW_AZURE=OFF \
     ARROW_BUILD_TESTS=ON \
     ARROW_DATASET=ON \
     ARROW_FLIGHT=ON \
diff --git a/cpp/src/arrow/filesystem/api.h b/cpp/src/arrow/filesystem/api.h
index 732be5f928f58..562b7c1808ec1 100644
--- a/cpp/src/arrow/filesystem/api.h
+++ b/cpp/src/arrow/filesystem/api.h
@@ -20,10 +20,13 @@
 #include "arrow/util/config.h"  // IWYU pragma: export
 
 #include "arrow/filesystem/filesystem.h"  // IWYU pragma: export
-#include "arrow/filesystem/hdfs.h"        // IWYU pragma: export
+#ifdef ARROW_AZURE
+#include "arrow/filesystem/azurefs.h"  // IWYU pragma: export
+#endif
 #ifdef ARROW_GCS
 #include "arrow/filesystem/gcsfs.h"  // IWYU pragma: export
 #endif
+#include "arrow/filesystem/hdfs.h"     // IWYU pragma: export
 #include "arrow/filesystem/localfs.h"  // IWYU pragma: export
 #include "arrow/filesystem/mockfs.h"   // IWYU pragma: export
 #ifdef ARROW_S3
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 0ce84043a537c..bd741fde8cc23 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -2458,6 +2458,7 @@ TEST_F(TestAzuriteFileSystem, WriteMetadata) {
   ASSERT_OK(output->Close());
 
   // Verify the metadata has been set.
+  // TODO(GH-40025): Use `AzureFileSystem` to fetch metadata for this assertion.
   auto blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name)
                            .GetBlockBlobClient(blob_path)
                            .GetProperties()
@@ -2470,6 +2471,7 @@ TEST_F(TestAzuriteFileSystem, WriteMetadata) {
                   full_path, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}})));
   ASSERT_OK(output->Write(expected));
   ASSERT_OK(output->Close());
+  // TODO(GH-40025): Use `AzureFileSystem` to fetch metadata for this assertion.
   blob_metadata = blob_service_client_->GetBlobContainerClient(data.container_name)
                       .GetBlockBlobClient(blob_path)
                       .GetProperties()
diff --git a/cpp/src/arrow/filesystem/type_fwd.h b/cpp/src/arrow/filesystem/type_fwd.h
index 892f7ad2e1b16..92c70799be16c 100644
--- a/cpp/src/arrow/filesystem/type_fwd.h
+++ b/cpp/src/arrow/filesystem/type_fwd.h
@@ -42,11 +42,12 @@ struct FileInfo;
 struct FileSelector;
 
 class FileSystem;
-class SubTreeFileSystem;
-class SlowFileSystem;
+class AzureFileSystem;
+class GcsFileSystem;
 class LocalFileSystem;
 class S3FileSystem;
-class GcsFileSystem;
+class SlowFileSystem;
+class SubTreeFileSystem;
 
 }  // namespace fs
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/config.h.cmake b/cpp/src/arrow/util/config.h.cmake
index fb42a53139f44..9fbd685084fd5 100644
--- a/cpp/src/arrow/util/config.h.cmake
+++ b/cpp/src/arrow/util/config.h.cmake
@@ -52,6 +52,7 @@
 #cmakedefine ARROW_PARQUET
 #cmakedefine ARROW_SUBSTRAIT
 
+#cmakedefine ARROW_AZURE
 #cmakedefine ARROW_ENABLE_THREADING
 #cmakedefine ARROW_GCS
 #cmakedefine ARROW_HDFS
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 8c98e269d6ff4..af65ea7d61483 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -554,6 +554,10 @@ set_source_files_properties(pyarrow/lib.pyx PROPERTIES CYTHON_API TRUE)
 
 set(LINK_LIBS arrow_python)
 
+if(PYARROW_BUILD_AZURE)
+  list(APPEND CYTHON_EXTENSIONS _azurefs)
+endif()
+
 if(PYARROW_BUILD_GCS)
   list(APPEND CYTHON_EXTENSIONS _gcsfs)
 endif()
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 7ede69da665ab..936f4736977c8 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -145,7 +145,8 @@ def print_entry(label, value):
         print(f"  {module: <20}: {status: <8}")
 
     print("\nFilesystems:")
-    filesystems = ["GcsFileSystem", "HadoopFileSystem", "S3FileSystem"]
+    filesystems = ["AzureFileSystem", "GcsFileSystem",
+                   "HadoopFileSystem", "S3FileSystem"]
     for fs in filesystems:
         status = "Enabled" if _filesystem_is_available(fs) else "-"
         print(f"  {fs: <20}: {status: <8}")
diff --git a/python/pyarrow/_azurefs.pyx b/python/pyarrow/_azurefs.pyx
new file mode 100644
index 0000000000000..5cd6300c18c6a
--- /dev/null
+++ b/python/pyarrow/_azurefs.pyx
@@ -0,0 +1,134 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from cython cimport binding
+
+
+from pyarrow.lib import frombytes, tobytes
+from pyarrow.includes.libarrow_fs cimport *
+from pyarrow._fs cimport FileSystem
+
+
+cdef class AzureFileSystem(FileSystem):
+    """
+    Azure Blob Storage backed FileSystem implementation
+
+    This implementation supports flat namespace and hierarchical namespace (HNS) a.k.a.
+    Data Lake Gen2 storage accounts. HNS will be automatically detected and HNS specific 
+    features will be used when they provide a performance advantage. Azurite emulator is 
+    also supported. Note: `/` is the only supported delimiter.
+
+    The storage account is considered the root of the filesystem. When enabled, containers 
+    will be created or deleted during relevant directory operations. Obviously, this also 
+    requires authentication with the additional permissions. 
+
+    By default `DefaultAzureCredential <https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__ 
+    is used for authentication. This means it will try several types of authentication
+    and go with the first one that works. If any authentication parameters are provided when 
+    initialising the FileSystem, they will be used instead of the default credential.
+
+    Parameters
+    ----------
+    account_name : str
+        Azure Blob Storage account name. This is the globally unique identifier for the 
+        storage account.
+    account_key : str, default None
+        Account key of the storage account. Pass None to use default credential. 
+    blob_storage_authority : str, default None
+        hostname[:port] of the Blob Service. Defaults to `.blob.core.windows.net`. Useful
+        for connecting to a local emulator, like Azurite.
+    dfs_storage_authority : str, default None
+        hostname[:port] of the Data Lake Gen 2 Service. Defaults to 
+        `.dfs.core.windows.net`. Useful for connecting to a local emulator, like Azurite.
+    blob_storage_scheme : str, default None
+        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local 
+        emulator, like Azurite.
+    dfs_storage_scheme : str, default None
+        Either `http` or `https`. Defaults to `https`. Useful for connecting to a local 
+        emulator, like Azurite.
+
+    Examples
+    --------
+    >>> from pyarrow import fs
+    >>> azure_fs = fs.AzureFileSystem(account_name='myaccount')
+    >>> azurite_fs = fs.AzureFileSystem(
+    ...     account_name='devstoreaccount1',
+    ...     account_key='Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==',
+    ...     blob_storage_authority='127.0.0.1:10000',
+    ...     dfs_storage_authority='127.0.0.1:10000',
+    ...     blob_storage_scheme='http',
+    ...     dfs_storage_scheme='http',
+    ... )
+
+    For usage of the methods see examples for :func:`~pyarrow.fs.LocalFileSystem`.
+    """
+    cdef:
+        CAzureFileSystem* azurefs
+        c_string account_key
+
+    def __init__(self, account_name, *, account_key=None, blob_storage_authority=None,
+                 dfs_storage_authority=None, blob_storage_scheme=None,
+                 dfs_storage_scheme=None):
+        cdef:
+            CAzureOptions options
+            shared_ptr[CAzureFileSystem] wrapped
+
+        options.account_name = tobytes(account_name)
+        if blob_storage_authority:
+            options.blob_storage_authority = tobytes(blob_storage_authority)
+        if dfs_storage_authority:
+            options.dfs_storage_authority = tobytes(dfs_storage_authority)
+        if blob_storage_scheme:
+            options.blob_storage_scheme = tobytes(blob_storage_scheme)
+        if dfs_storage_scheme:
+            options.dfs_storage_scheme = tobytes(dfs_storage_scheme)
+
+        if account_key:
+            options.ConfigureAccountKeyCredential(tobytes(account_key))
+            self.account_key = tobytes(account_key)
+        else:
+            options.ConfigureDefaultCredential()
+
+        with nogil:
+            wrapped = GetResultValue(CAzureFileSystem.Make(options))
+
+        self.init(<shared_ptr[CFileSystem]> wrapped)
+
+    cdef init(self, const shared_ptr[CFileSystem]& wrapped):
+        FileSystem.init(self, wrapped)
+        self.azurefs = <CAzureFileSystem*> wrapped.get()
+
+    @staticmethod
+    @binding(True)  # Required for cython < 3
+    def _reconstruct(kwargs):
+        # __reduce__ doesn't allow passing named arguments directly to the
+        # reconstructor, hence this wrapper.
+        return AzureFileSystem(**kwargs)
+
+    def __reduce__(self):
+        cdef CAzureOptions opts = self.azurefs.options()
+        return (
+            AzureFileSystem._reconstruct, (dict(
+                account_name=frombytes(opts.account_name),
+                account_key=frombytes(self.account_key),
+                blob_storage_authority=frombytes(opts.blob_storage_authority),
+                dfs_storage_authority=frombytes(opts.dfs_storage_authority),
+                blob_storage_scheme=frombytes(opts.blob_storage_scheme),
+                dfs_storage_scheme=frombytes(opts.dfs_storage_scheme)
+            ),))
diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx
index 395f488144331..86cf39e993c1b 100644
--- a/python/pyarrow/_fs.pyx
+++ b/python/pyarrow/_fs.pyx
@@ -491,6 +491,9 @@ cdef class FileSystem(_Weakrefable):
         elif typ == 'gcs':
             from pyarrow._gcsfs import GcsFileSystem
             self = GcsFileSystem.__new__(GcsFileSystem)
+        elif typ == 'abfs':
+            from pyarrow._azurefs import AzureFileSystem
+            self = AzureFileSystem.__new__(AzureFileSystem)
         elif typ == 'hdfs':
             from pyarrow._hdfs import HadoopFileSystem
             self = HadoopFileSystem.__new__(HadoopFileSystem)
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 6f6807e907d62..2ac8427de17e7 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -24,6 +24,7 @@
 
 groups = [
     'acero',
+    'azure',
     'brotli',
     'bz2',
     'cython',
@@ -54,6 +55,7 @@
 
 defaults = {
     'acero': False,
+    'azure': False,
     'brotli': Codec.is_available('brotli'),
     'bz2': Codec.is_available('bz2'),
     'cython': False,
@@ -142,13 +144,18 @@
 except ImportError:
     pass
 
+try:
+    from pyarrow.fs import AzureFileSystem  # noqa
+    defaults['azure'] = True
+except ImportError:
+    pass
+
 try:
     from pyarrow.fs import GcsFileSystem  # noqa
     defaults['gcs'] = True
 except ImportError:
     pass
 
-
 try:
     from pyarrow.fs import S3FileSystem  # noqa
     defaults['s3'] = True
diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
index a256cc540f742..abdd1a995751a 100644
--- a/python/pyarrow/fs.py
+++ b/python/pyarrow/fs.py
@@ -39,6 +39,10 @@
 FileStats = FileInfo
 
 _not_imported = []
+try:
+    from pyarrow._azurefs import AzureFileSystem  # noqa
+except ImportError:
+    _not_imported.append("AzureFileSystem")
 
 try:
     from pyarrow._hdfs import HadoopFileSystem  # noqa
diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd
index 7876fb0f96671..328b426a498db 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -251,6 +251,22 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil:
         CResult[shared_ptr[CGcsFileSystem]] Make(const CGcsOptions& options)
         CGcsOptions options()
 
+    cdef cppclass CAzureOptions "arrow::fs::AzureOptions":
+        c_string account_name
+        c_string blob_storage_authority
+        c_string dfs_storage_authority
+        c_string blob_storage_scheme
+        c_string dfs_storage_scheme
+
+        c_bool Equals(const CAzureOptions& other)
+        CStatus ConfigureDefaultCredential()
+        CStatus ConfigureAccountKeyCredential(c_string account_key)
+
+    cdef cppclass CAzureFileSystem "arrow::fs::AzureFileSystem":
+        @staticmethod
+        CResult[shared_ptr[CAzureFileSystem]] Make(const CAzureOptions& options)
+        CAzureOptions options()
+
     cdef cppclass CHdfsOptions "arrow::fs::HdfsOptions":
         HdfsConnectionConfig connection_config
         int32_t buffer_size
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 0da757a4bc56e..57bc3c8fc6616 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -256,6 +256,37 @@ def gcs_server():
             proc.wait()
 
 
+@pytest.fixture(scope='session')
+def azure_server(tmpdir_factory):
+    port = find_free_port()
+    env = os.environ.copy()
+    tmpdir = tmpdir_factory.getbasetemp()
+    # We only need blob service emulator, not queue or table.
+    args = ['azurite-blob', "--location", tmpdir, "--blobPort", str(port)]
+    proc = None
+    try:
+        proc = subprocess.Popen(args, env=env)
+        # Make sure the server is alive.
+        if proc.poll() is not None:
+            pytest.skip(f"Command {args} did not start server successfully!")
+    except (ModuleNotFoundError, OSError) as e:
+        pytest.skip(f"Command {args} failed to execute: {e}")
+    else:
+        yield {
+            # Use the standard azurite account_name and account_key.
+            # https://learn.microsoft.com/en-us/azure/storage/common/storage-use-emulator#authorize-with-shared-key-credentials
+            'connection': ('127.0.0.1', port, 'devstoreaccount1',
+                           'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2'
+                           'UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=='),
+            'process': proc,
+            'tempdir': tmpdir,
+        }
+    finally:
+        if proc is not None:
+            proc.kill()
+            proc.wait()
+
+
 @pytest.fixture(
     params=[
         'builtin_pickle',
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 543c4399ddb47..845f1eccecc72 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -291,6 +291,35 @@ def subtree_s3fs(request, s3fs):
 }"""
 
 
+@pytest.fixture
+def azurefs(request, azure_server):
+    request.config.pyarrow.requires('azure')
+    from pyarrow.fs import AzureFileSystem
+
+    host, port, account_name, account_key = azure_server['connection']
+    azurite_authority = f"{host}:{port}"
+    azurite_scheme = "http"
+
+    container = 'pyarrow-filesystem/'
+
+    fs = AzureFileSystem(account_name=account_name,
+                         account_key=account_key,
+                         blob_storage_authority=azurite_authority,
+                         dfs_storage_authority=azurite_authority,
+                         blob_storage_scheme=azurite_scheme,
+                         dfs_storage_scheme=azurite_scheme)
+
+    fs.create_dir(container)
+
+    yield dict(
+        fs=fs,
+        pathfn=container.__add__,
+        allow_move_dir=True,
+        allow_append_to_file=True,
+    )
+    fs.delete_dir(container)
+
+
 @pytest.fixture
 def hdfs(request, hdfs_connection):
     request.config.pyarrow.requires('hdfs')
@@ -383,6 +412,11 @@ def py_fsspec_s3fs(request, s3_server):
         id='GcsFileSystem',
         marks=pytest.mark.gcs
     ),
+    pytest.param(
+        'azurefs',
+        id='AzureFileSystem',
+        marks=pytest.mark.azure
+    ),
     pytest.param(
         'hdfs',
         id='HadoopFileSystem',
@@ -467,6 +501,11 @@ def skip_fsspec_s3fs(fs):
         pytest.xfail(reason="Not working with fsspec's s3fs")
 
 
+def skip_azure(fs, reason):
+    if fs.type_name == "abfs":
+        pytest.skip(reason=reason)
+
+
 @pytest.mark.s3
 def test_s3fs_limited_permissions_create_bucket(s3_server):
     from pyarrow.fs import S3FileSystem
@@ -857,6 +896,9 @@ def test_copy_file(fs, pathfn):
 
 
 def test_move_directory(fs, pathfn, allow_move_dir):
+    # TODO(GH-40025): Stop skipping this test
+    skip_azure(fs, "Not implemented yet in for Azure. See GH-40025")
+
     # move directory (doesn't work with S3)
     s = pathfn('source-dir/')
     t = pathfn('target-dir/')
@@ -878,6 +920,9 @@ def test_move_file(fs, pathfn):
     # (https://github.com/dask/s3fs/issues/394)
     skip_fsspec_s3fs(fs)
 
+    # TODO(GH-40025): Stop skipping this test
+    skip_azure(fs, "Not implemented yet in for Azure. See GH-40025")
+
     s = pathfn('test-move-source-file')
     t = pathfn('test-move-target-file')
 
@@ -1029,7 +1074,11 @@ def test_open_output_stream_metadata(fs, pathfn):
         assert f.read() == data
         got_metadata = f.metadata()
 
-    if fs.type_name in ['s3', 'gcs'] or 'mock' in fs.type_name:
+    if fs.type_name in ['s3', 'gcs', 'abfs'] or 'mock' in fs.type_name:
+        # TODO(GH-40026): Stop skipping this test
+        skip_azure(
+            fs, "Azure filesystem currently only returns system metadata not user "
+            "metadata. See GH-40026")
         for k, v in metadata.items():
             assert got_metadata[k] == v.encode()
     else:
@@ -1379,6 +1428,33 @@ def test_s3fs_wrong_region():
     fs.get_file_info("voltrondata-labs-datasets")
 
 
+@pytest.mark.azure
+def test_azurefs_options(pickle_module):
+    from pyarrow.fs import AzureFileSystem
+
+    fs1 = AzureFileSystem(account_name='fake-account-name')
+    assert isinstance(fs1, AzureFileSystem)
+    assert pickle_module.loads(pickle_module.dumps(fs1)) == fs1
+
+    fs2 = AzureFileSystem(account_name='fake-account-name',
+                          account_key='fakeaccountkey')
+    assert isinstance(fs2, AzureFileSystem)
+    assert pickle_module.loads(pickle_module.dumps(fs2)) == fs2
+    assert fs2 != fs1
+
+    fs3 = AzureFileSystem(account_name='fake-account', account_key='fakeaccount',
+                          blob_storage_authority='fake-blob-authority',
+                          dfs_storage_authority='fake-dfs-authority',
+                          blob_storage_scheme='fake-blob-scheme',
+                          dfs_storage_scheme='fake-dfs-scheme')
+    assert isinstance(fs3, AzureFileSystem)
+    assert pickle_module.loads(pickle_module.dumps(fs3)) == fs3
+    assert fs3 != fs2
+
+    with pytest.raises(TypeError):
+        AzureFileSystem()
+
+
 @pytest.mark.hdfs
 def test_hdfs_options(hdfs_connection, pickle_module):
     from pyarrow.fs import HadoopFileSystem
diff --git a/python/setup.py b/python/setup.py
index 798bd6b05fd0b..ce6996dc1d439 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -113,6 +113,8 @@ def run(self):
                      ('with-parquet', None, 'build the Parquet extension'),
                      ('with-parquet-encryption', None,
                       'build the Parquet encryption extension'),
+                     ('with-azure', None,
+                      'build the Azure Blob Storage extension'),
                      ('with-gcs', None,
                       'build the Google Cloud Storage (GCS) extension'),
                      ('with-s3', None, 'build the Amazon S3 extension'),
@@ -150,6 +152,8 @@ def initialize_options(self):
             if not hasattr(sys, 'gettotalrefcount'):
                 self.build_type = 'release'
 
+        self.with_azure = strtobool(
+            os.environ.get('PYARROW_WITH_AZURE', '0'))
         self.with_gcs = strtobool(
             os.environ.get('PYARROW_WITH_GCS', '0'))
         self.with_s3 = strtobool(
@@ -207,6 +211,7 @@ def initialize_options(self):
         '_parquet_encryption',
         '_pyarrow_cpp_tests',
         '_orc',
+        '_azurefs',
         '_gcsfs',
         '_s3fs',
         '_substrait',
@@ -278,6 +283,7 @@ def append_cmake_bool(value, varname):
             append_cmake_bool(self.with_parquet, 'PYARROW_BUILD_PARQUET')
             append_cmake_bool(self.with_parquet_encryption,
                               'PYARROW_BUILD_PARQUET_ENCRYPTION')
+            append_cmake_bool(self.with_azure, 'PYARROW_BUILD_AZURE')
             append_cmake_bool(self.with_gcs, 'PYARROW_BUILD_GCS')
             append_cmake_bool(self.with_s3, 'PYARROW_BUILD_S3')
             append_cmake_bool(self.with_hdfs, 'PYARROW_BUILD_HDFS')
@@ -344,6 +350,8 @@ def _failure_permitted(self, name):
             return True
         if name == '_substrait' and not self.with_substrait:
             return True
+        if name == '_azurefs' and not self.with_azure:
+            return True
         if name == '_gcsfs' and not self.with_gcs:
             return True
         if name == '_s3fs' and not self.with_s3:

From d10f468b0603da41a285c60a38b095a30b91e2c1 Mon Sep 17 00:00:00 2001
From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com>
Date: Wed, 13 Mar 2024 23:18:27 +0800
Subject: [PATCH 522/570] GH-40395: [C++] Avoid simplifying expressions which
 call impure functions (#40396)

### Rationale for this change
Fix a problem when we call `SimplifyWithGuarantee` evaluate a function call which has no arguments.

### What changes are included in this PR?
Check empty for call.arguments before we evaluate arguments as literal in FoldConstants.

### Are these changes tested?
Yes.

### Are there any user-facing changes?
No.

* GitHub Issue: #40395

Lead-authored-by: hugo.zhang <hugo.zhang@openpie.com>
Co-authored-by: ZhangHuiGui <hugo.zhang@openpie.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/compute/expression.cc           |  7 ++--
 cpp/src/arrow/compute/expression_test.cc      | 34 ++++++++++++++++++-
 cpp/src/arrow/compute/function.h              | 18 ++++++++--
 .../arrow/compute/kernels/scalar_random.cc    |  4 +--
 docs/source/cpp/compute.rst                   |  2 +-
 5 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/cpp/src/arrow/compute/expression.cc b/cpp/src/arrow/compute/expression.cc
index 38312225dd5c5..cc24429c8a27e 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -845,6 +845,8 @@ Result<Expression> FoldConstants(Expression expr) {
       std::move(expr), [](Expression expr) { return expr; },
       [](Expression expr, ...) -> Result<Expression> {
         auto call = CallNotNull(expr);
+        if (call->function->is_impure()) return expr;
+
         if (std::all_of(call->arguments.begin(), call->arguments.end(),
                         [](const Expression& argument) { return argument.literal(); })) {
           // all arguments are literal; we can evaluate this subexpression *now*
@@ -861,10 +863,6 @@ Result<Expression> FoldConstants(Expression expr) {
         if (GetNullHandling(*call) == compute::NullHandling::INTERSECTION) {
           // kernels which always produce intersected validity can be resolved
           // to null *now* if any of their inputs is a null literal
-          if (!call->type.type) {
-            return Status::Invalid("Cannot fold constants for unbound expression ",
-                                   expr.ToString());
-          }
           for (const Expression& argument : call->arguments) {
             if (argument.IsNullLiteral()) {
               if (argument.type()->Equals(*call->type.type)) {
@@ -1087,6 +1085,7 @@ Result<Expression> Canonicalize(Expression expr, compute::ExecContext* exec_cont
       [&AlreadyCanonicalized, exec_context](Expression expr) -> Result<Expression> {
         auto call = expr.call();
         if (!call) return expr;
+        if (call->function->is_impure()) return expr;
 
         if (AlreadyCanonicalized(expr)) return expr;
 
diff --git a/cpp/src/arrow/compute/expression_test.cc b/cpp/src/arrow/compute/expression_test.cc
index 0c3403d3d5fe0..5c87736efb886 100644
--- a/cpp/src/arrow/compute/expression_test.cc
+++ b/cpp/src/arrow/compute/expression_test.cc
@@ -895,8 +895,10 @@ TEST(Expression, ExecuteCallWithNoArguments) {
 
   Expression random_expr = call("random", {}, random_options);
   ASSERT_OK_AND_ASSIGN(random_expr, random_expr.Bind(float64()));
+  ASSERT_OK_AND_ASSIGN(auto simplify_expr,
+                       SimplifyWithGuarantee(random_expr, input.guarantee));
 
-  ASSERT_OK_AND_ASSIGN(Datum actual, ExecuteScalarExpression(random_expr, input));
+  ASSERT_OK_AND_ASSIGN(Datum actual, ExecuteScalarExpression(simplify_expr, input));
   compute::ExecContext* exec_context = default_exec_context();
   ASSERT_OK_AND_ASSIGN(auto function,
                        exec_context->func_registry()->GetFunction("random"));
@@ -1396,6 +1398,36 @@ TEST(Expression, SingleComparisonGuarantees) {
   }
 }
 
+static Status RegisterMyRandom() {
+  const std::string name = "my_random";
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), FunctionDoc::Empty(),
+                                               nullptr, /*is_impure=*/true);
+
+  auto func_exec = [](KernelContext* /*ctx*/, const ExecSpan& /*batch*/,
+                      ExecResult* /*out*/) -> Status { return Status::OK(); };
+
+  ScalarKernel kernel({int32()}, float64(), func_exec);
+  ARROW_RETURN_NOT_OK(func->AddKernel(kernel));
+
+  auto registry = GetFunctionRegistry();
+  ARROW_RETURN_NOT_OK(registry->AddFunction(std::move(func)));
+
+  return Status::OK();
+}
+
+TEST(Expression, SimplifyImpureFunctionCall) {
+  // skip simplification for impure function with no arguments
+  auto impure_expr = call("random", {});
+  Simplify{impure_expr}.WithGuarantee(literal("")).Expect(impure_expr);
+
+  // simplify impure function's arguments
+  ASSERT_OK(RegisterMyRandom());
+  auto pure_expr = call("add", {field_ref("i32"), literal(3)});
+  Simplify{call("my_random", {pure_expr})}
+      .WithGuarantee(equal(field_ref("i32"), literal(1)))
+      .Expect(call("my_random", {literal(4)}));
+}
+
 TEST(Expression, SimplifyWithGuarantee) {
   // drop both members of a conjunctive filter
   Simplify{
diff --git a/cpp/src/arrow/compute/function.h b/cpp/src/arrow/compute/function.h
index be934a3c5abfc..23ff4dbcea2f7 100644
--- a/cpp/src/arrow/compute/function.h
+++ b/cpp/src/arrow/compute/function.h
@@ -229,6 +229,12 @@ class ARROW_EXPORT Function {
 
   virtual Status Validate() const;
 
+  /// \brief Returns the pure property for this function.
+  ///
+  /// For impure functions like 'random', we should skip any simplification
+  /// for this function except it's arguments.
+  virtual bool is_impure() const { return false; }
+
  protected:
   Function(std::string name, Function::Kind kind, const Arity& arity, FunctionDoc doc,
            const FunctionOptions* default_options)
@@ -291,9 +297,10 @@ class ARROW_EXPORT ScalarFunction : public detail::FunctionImpl<ScalarKernel> {
   using KernelType = ScalarKernel;
 
   ScalarFunction(std::string name, const Arity& arity, FunctionDoc doc,
-                 const FunctionOptions* default_options = NULLPTR)
+                 const FunctionOptions* default_options = NULLPTR, bool is_impure = false)
       : detail::FunctionImpl<ScalarKernel>(std::move(name), Function::SCALAR, arity,
-                                           std::move(doc), default_options) {}
+                                           std::move(doc), default_options),
+        is_impure_(is_impure) {}
 
   /// \brief Add a kernel with given input/output types, no required state
   /// initialization, preallocation for fixed-width types, and default null
@@ -304,6 +311,13 @@ class ARROW_EXPORT ScalarFunction : public detail::FunctionImpl<ScalarKernel> {
   /// \brief Add a kernel (function implementation). Returns error if the
   /// kernel's signature does not match the function's arity.
   Status AddKernel(ScalarKernel kernel);
+
+  /// \brief Impure property for expression simplification only takes
+  /// effect in ScalarFunction.
+  bool is_impure() const override { return is_impure_; }
+
+ private:
+  bool is_impure_;
 };
 
 /// \brief A function that executes general array operations that may yield
diff --git a/cpp/src/arrow/compute/kernels/scalar_random.cc b/cpp/src/arrow/compute/kernels/scalar_random.cc
index 608f76302135f..a3d3ec364e00a 100644
--- a/cpp/src/arrow/compute/kernels/scalar_random.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_random.cc
@@ -87,8 +87,8 @@ const FunctionDoc random_doc{
 
 void RegisterScalarRandom(FunctionRegistry* registry) {
   static auto random_options = RandomOptions::Defaults();
-  auto random_func = std::make_shared<ScalarFunction>("random", Arity::Nullary(),
-                                                      random_doc, &random_options);
+  auto random_func = std::make_shared<ScalarFunction>(
+      "random", Arity::Nullary(), random_doc, &random_options, /*is_impure=*/true);
   ScalarKernel kernel{{}, float64(), ExecRandom, RandomState::Init};
   kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
   DCHECK_OK(random_func->AddKernel(kernel));
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index e7310d2c0c711..e1c030b179b8d 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -343,7 +343,7 @@ equivalents above and reflects how they are implemented internally.
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
 | hash_count              | Unary   | Any                                | Int64                  | :struct:`CountOptions`           | \(2)      |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_count_all          | Nullary |                                    | Int64                  |                                  |           |
+| hash_count_all          | Unary   |                                    | Int64                  |                                  |           |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
 | hash_count_distinct     | Unary   | Any                                | Int64                  | :struct:`CountOptions`           | \(2)      |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+

From 1dd0d45375ab974de5cefa9abab10bcb66bb1e6b Mon Sep 17 00:00:00 2001
From: Matt Topol <zotthewizard@gmail.com>
Date: Wed, 13 Mar 2024 13:19:04 -0400
Subject: [PATCH 523/570] MINOR: [Go] update go dependencies (#40511)

### Rationale for this change
Updating Go dependencies, several of which address vulnerabilities identified by https://deps.dev/go/github.com%2Fapache%2Farrow%2Fgo%2Fv15/v15.0.1

Authored-by: Matt Topol <zotthewizard@gmail.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/go.mod | 28 +++++++++++++--------------
 go/go.sum | 58 +++++++++++++++++++++++++++----------------------------
 2 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/go/go.mod b/go/go.mod
index c07abaf3c9888..f1c301170615c 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -25,28 +25,28 @@ require (
 	github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815
 	github.com/goccy/go-json v0.10.2
 	github.com/golang/snappy v0.0.4
-	github.com/google/flatbuffers v23.5.26+incompatible
+	github.com/google/flatbuffers v24.3.7+incompatible
 	github.com/klauspost/asmfmt v1.3.2
-	github.com/klauspost/compress v1.16.7
-	github.com/klauspost/cpuid/v2 v2.2.5
+	github.com/klauspost/compress v1.17.7
+	github.com/klauspost/cpuid/v2 v2.2.7
 	github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8
 	github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3
-	github.com/pierrec/lz4/v4 v4.1.18
+	github.com/pierrec/lz4/v4 v4.1.21
 	github.com/stretchr/testify v1.8.4
 	github.com/zeebo/xxh3 v1.0.2
-	golang.org/x/exp v0.0.0-20231006140011-7918f672742d
-	golang.org/x/sync v0.4.0
-	golang.org/x/sys v0.13.0
-	golang.org/x/tools v0.14.0
-	golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2
+	golang.org/x/exp v0.0.0-20240222234643-814bf88cf225
+	golang.org/x/sync v0.6.0
+	golang.org/x/sys v0.18.0
+	golang.org/x/tools v0.19.0
+	golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028
 	gonum.org/v1/gonum v0.12.0
 	google.golang.org/grpc v1.58.3
-	google.golang.org/protobuf v1.31.0
+	google.golang.org/protobuf v1.33.0
 	modernc.org/sqlite v1.21.2
 )
 
 require (
-	github.com/google/uuid v1.3.1
+	github.com/google/uuid v1.6.0
 	github.com/hamba/avro/v2 v2.17.2
 	github.com/substrait-io/substrait-go v0.4.2
 	github.com/tidwall/sjson v1.2.5
@@ -74,9 +74,9 @@ require (
 	github.com/tidwall/gjson v1.14.2 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.0 // indirect
-	golang.org/x/mod v0.13.0 // indirect
-	golang.org/x/net v0.17.0 // indirect
-	golang.org/x/text v0.13.0 // indirect
+	golang.org/x/mod v0.16.0 // indirect
+	golang.org/x/net v0.22.0 // indirect
+	golang.org/x/text v0.14.0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20231002182017-d307bd883b97 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	lukechampine.com/uint128 v1.3.0 // indirect
diff --git a/go/go.sum b/go/go.sum
index 2c1edd59e03a3..12282b39b498f 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -30,14 +30,14 @@ github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg
 github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
 github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
 github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
-github.com/google/flatbuffers v23.5.26+incompatible h1:M9dgRyhJemaM4Sw8+66GHBu8ioaQmyPLg1b8VwK5WJg=
-github.com/google/flatbuffers v23.5.26+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
+github.com/google/flatbuffers v24.3.7+incompatible h1:BxGUkIQnOciBu33bd5BdvqY8Qvo0O/GR4SPhh7x9Ed0=
+github.com/google/flatbuffers v24.3.7+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ=
-github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
-github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/hamba/avro/v2 v2.17.2 h1:6PKpEWzJfNnvBgn7m2/8WYaDOUASxfDU+Jyb4ojDgFY=
 github.com/hamba/avro/v2 v2.17.2/go.mod h1:Q9YK+qxAhtVrNqOhwlZTATLgLA8qxG2vtvkhK8fJ7Jo=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
@@ -47,10 +47,10 @@ github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNU
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
 github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4=
 github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE=
-github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
-github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
-github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=
-github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg=
+github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
+github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
+github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
 github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
@@ -72,8 +72,8 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
-github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ=
-github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
+github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
@@ -103,27 +103,27 @@ github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6
 github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
 github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
 github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
-golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
-golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
-golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
-golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY=
-golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
-golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
-golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
-golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ=
-golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
+golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ=
+golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc=
+golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic=
+golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
+golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
+golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
-golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
-golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
-golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc=
-golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg=
+golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
+golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw=
+golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
-golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk=
-golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
+golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
+golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
 gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o=
 gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20231002182017-d307bd883b97 h1:6GQBEOdGkX6MMTLT9V+TjtIRZCw9VPD5Z+yHY9wMgS0=
@@ -132,8 +132,8 @@ google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ=
 google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
-google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
+google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
+google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

From bd3fab4333f9e95680f5ed0cd931455e323e6e27 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 13 Mar 2024 16:46:22 -0400
Subject: [PATCH 524/570] MINOR: [Docs] Clarify inlined strings in
 `VariableLengthStringView` is padded with `0` (#40512)

### Rationale for this change
While  implementing `Variable-size Binary View Layout` (thanks @ ariesdevil !) in  https://github.com/apache/arrow-rs/pull/5481 it was not 100% clear if the inlined string was zero padded.

@ bkietz noted that

> The spec does say "padded with zero" https://github.com/apache/arrow/blob/main/docs/source/format/Columnar.rst?plain=1#L384 but it could be repeated in the surrounding paragraph. In any case, padded with zero is definitely the intent

```
    * Short strings, length <= 12
      | Bytes 0-3  | Bytes 4-15                            |
      |------------|---------------------------------------|
      | length     | data (padded with 0)                  |
```
### What changes are included in this PR?

Add a sentence in the surrounding text to make it clear the inlined strings values are zero padded

Note I do not think this is a specification change (and therefore doesn't need a vote on the mailing list) as the spec already specifies the padding is zero (in the diagram). This simply clarifies the text to emphasize this point for ease of understanding

### Are these changes tested?

### Are there any user-facing changes?

Authored-by: Andrew Lamb <andrew@nerdnetworks.org>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 docs/source/format/Columnar.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst
index 7b74b972f2ab8..0cfece2586294 100644
--- a/docs/source/format/Columnar.rst
+++ b/docs/source/format/Columnar.rst
@@ -393,7 +393,8 @@ length of the string and can be used to determine how the rest of the view
 should be interpreted.
 
 In the short string case the string's bytes are inlined — stored inside the
-view itself, in the twelve bytes which follow the length.
+view itself, in the twelve bytes which follow the length. Any remaining bytes
+after the string itself are padded with `0`.
 
 In the long string case, a buffer index indicates which data buffer
 stores the data bytes and an offset indicates where in that buffer the

From ac1708ce65e15a87fadec39e761731b3d916fb19 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Wed, 13 Mar 2024 18:17:25 -0400
Subject: [PATCH 525/570] GH-38916: [R] Simplify dataset and table print output
 (#38917)

### Rationale for this change

When printing objects with data with lots of rows, the output is long and unwieldy.

### What changes are included in this PR?

* Truncates long schema print output and adds the number of columns to dataset print output.
* Add number of columns to output so it's clear how many there are in total

### Are these changes tested?

Yes

### Are there any user-facing changes?

Yes

Before:

``` r
library(arrow)
x <- tibble::tibble(!!!letters, .rows = 5)
InMemoryDataset$create(x)
#> InMemoryDataset
#> "a": string
#> "b": string
#> "c": string
#> "d": string
#> "e": string
#> "f": string
#> "g": string
#> "h": string
#> "i": string
#> "j": string
#> "k": string
#> "l": string
#> "m": string
#> "n": string
#> "o": string
#> "p": string
#> "q": string
#> "r": string
#> "s": string
#> "t": string
#> "u": string
#> "v": string
#> "w": string
#> "x": string
#> "y": string
#> "z": string
arrow_table(x)
#> Table
#> 5 rows x 26 columns
#> $"a" <string>
#> $"b" <string>
#> $"c" <string>
#> $"d" <string>
#> $"e" <string>
#> $"f" <string>
#> $"g" <string>
#> $"h" <string>
#> $"i" <string>
#> $"j" <string>
#> $"k" <string>
#> $"l" <string>
#> $"m" <string>
#> $"n" <string>
#> $"o" <string>
#> $"p" <string>
#> $"q" <string>
#> $"r" <string>
#> $"s" <string>
#> $"t" <string>
#> $"u" <string>
#> $"v" <string>
#> $"w" <string>
#> $"x" <string>
#> $"y" <string>
#> $"z" <string>
record_batch(x)
#> RecordBatch
#> 5 rows x 26 columns
#> $"a" <string>
#> $"b" <string>
#> $"c" <string>
#> $"d" <string>
#> $"e" <string>
#> $"f" <string>
#> $"g" <string>
#> $"h" <string>
#> $"i" <string>
#> $"j" <string>
#> $"k" <string>
#> $"l" <string>
#> $"m" <string>
#> $"n" <string>
#> $"o" <string>
#> $"p" <string>
#> $"q" <string>
#> $"r" <string>
#> $"s" <string>
#> $"t" <string>
#> $"u" <string>
#> $"v" <string>
#> $"w" <string>
#> $"x" <string>
#> $"y" <string>
#> $"z" <string>
```

After:

``` r
library(arrow)

x <- tibble::tibble(!!!letters, .rows = 5)
InMemoryDataset$create(x)
#> InMemoryDataset
#> 26 columns
#> "a": string
#> "b": string
#> "c": string
#> "d": string
#> "e": string
#> "f": string
#> "g": string
#> "h": string
#> "i": string
#> "j": string
#> "k": string
#> "l": string
#> "m": string
#> "n": string
#> "o": string
#> "p": string
#> "q": string
#> "r": string
#> "s": string
#> "t": string
#> ...
#> Use `schema()` to see entire schema
arrow_table(x)
#> Table
#> 5 rows x 26 columns
#> $"a" <string>
#> $"b" <string>
#> $"c" <string>
#> $"d" <string>
#> $"e" <string>
#> $"f" <string>
#> $"g" <string>
#> $"h" <string>
#> $"i" <string>
#> $"j" <string>
#> $"k" <string>
#> $"l" <string>
#> $"m" <string>
#> $"n" <string>
#> $"o" <string>
#> $"p" <string>
#> $"q" <string>
#> $"r" <string>
#> $"s" <string>
#> $"t" <string>
#> ...
#> Use `schema()` to see entire schema
record_batch(x)
#> RecordBatch
#> 5 rows x 26 columns
#> $"a" <string>
#> $"b" <string>
#> $"c" <string>
#> $"d" <string>
#> $"e" <string>
#> $"f" <string>
#> $"g" <string>
#> $"h" <string>
#> $"i" <string>
#> $"j" <string>
#> $"k" <string>
#> $"l" <string>
#> $"m" <string>
#> $"n" <string>
#> $"o" <string>
#> $"p" <string>
#> $"q" <string>
#> $"r" <string>
#> $"s" <string>
#> $"t" <string>
#> ...
#> Use `schema()` to see entire schema
```

* Closes: #38916

Lead-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Bryce Mecum <petridish@gmail.com>
Signed-off-by: Nic Crane <thisisnic@gmail.com>
---
 r/R/arrow-tabular.R                         |  2 +-
 r/R/dataset.R                               |  2 +-
 r/R/record-batch-reader.R                   |  2 +-
 r/R/schema.R                                | 31 +++++++++++++++++----
 r/tests/testthat/_snaps/dplyr-glimpse.md    |  1 +
 r/tests/testthat/test-dataset.R             |  1 +
 r/tests/testthat/test-record-batch-reader.R |  1 +
 r/tests/testthat/test-schema.R              | 15 ++++++++++
 8 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R
index e62547d291b3f..a67fb7db5d4af 100644
--- a/r/R/arrow-tabular.R
+++ b/r/R/arrow-tabular.R
@@ -23,7 +23,7 @@ ArrowTabular <- R6Class("ArrowTabular",
   inherit = ArrowObject,
   public = list(
     ToString = function() {
-      sch <- unlist(strsplit(self$schema$ToString(), "\n"))
+      sch <- unlist(strsplit(self$schema$ToString(truncate = TRUE), "\n"))
       sch <- sub("(.*): (.*)", "$\\1 <\\2>", sch)
       dims <- sprintf("%s rows x %s columns", self$num_rows, self$num_columns)
       paste(c(dims, sch), collapse = "\n")
diff --git a/r/R/dataset.R b/r/R/dataset.R
index 08189f1b290a2..7a6c6c694a60a 100644
--- a/r/R/dataset.R
+++ b/r/R/dataset.R
@@ -426,7 +426,7 @@ Dataset <- R6Class("Dataset",
     # Start a new scan of the data
     # @return A [ScannerBuilder]
     NewScan = function() dataset___Dataset__NewScan(self),
-    ToString = function() self$schema$ToString(),
+    ToString = function() format_schema(self),
     WithSchema = function(schema) {
       assert_is(schema, "Schema")
       dataset___Dataset__ReplaceSchema(self, schema)
diff --git a/r/R/record-batch-reader.R b/r/R/record-batch-reader.R
index 184a77df36b16..f8dc804889f3b 100644
--- a/r/R/record-batch-reader.R
+++ b/r/R/record-batch-reader.R
@@ -100,7 +100,7 @@ RecordBatchReader <- R6Class("RecordBatchReader",
     read_table = function() Table__from_RecordBatchReader(self),
     Close = function() RecordBatchReader__Close(self),
     export_to_c = function(stream_ptr) ExportRecordBatchReader(self, stream_ptr),
-    ToString = function() self$schema$ToString(),
+    ToString = function() format_schema(self),
     .unsafe_delete = function() {
       RecordBatchReader__UnsafeDelete(self)
       super$.unsafe_delete()
diff --git a/r/R/schema.R b/r/R/schema.R
index 75623668d9621..7526bf5b3ba26 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -81,8 +81,8 @@
 Schema <- R6Class("Schema",
   inherit = ArrowObject,
   public = list(
-    ToString = function() {
-      fields <- print_schema_fields(self)
+    ToString = function(truncate = FALSE) {
+      fields <- print_schema_fields(self, truncate)
       if (self$HasMetadata) {
         fields <- paste0(fields, "\n\nSee $metadata for additional Schema metadata")
       }
@@ -224,9 +224,19 @@ prepare_key_value_metadata <- function(metadata) {
   map_chr(metadata, as.character)
 }
 
-print_schema_fields <- function(s) {
-  # Alternative to Schema__ToString that doesn't print metadata
-  paste(map_chr(s$fields, ~ .$ToString()), collapse = "\n")
+# Alternative to Schema__ToString that doesn't print metadata
+print_schema_fields <- function(s, truncate = FALSE, max_fields = 20L) {
+  assert_that(max_fields > 0)
+  num_fields <- length(s$fields)
+  if (truncate && num_fields > max_fields) {
+    fields_out <- paste(map_chr(s$fields[seq_len(max_fields)], ~ .$ToString()), collapse = "\n")
+    fields_out <- paste0(fields_out, "\n...\n")
+    fields_out <- paste0(fields_out, num_fields - max_fields, " more columns\n")
+    fields_out <- paste0(fields_out, "Use `schema()` to see entire schema")
+  } else {
+    fields_out <- paste(map_chr(s$fields, ~ .$ToString()), collapse = "\n")
+  }
+  fields_out
 }
 
 #' Create a schema or extract one from an object.
@@ -460,3 +470,14 @@ as.data.frame.Schema <- function(x, row.names = NULL, optional = FALSE, ...) {
 
 #' @export
 `names<-.Schema` <- function(x, value) x$WithNames(value)
+
+#' Get a string representing a Dataset or RecordBatchReader object's schema
+#' @param obj a Dataset or RecordBatchReader
+#' @return A string containing a formatted representation of the schema of `obj`
+#' @keywords internal
+format_schema <- function(obj) {
+  assert_is(obj, c("Dataset", "RecordBatchReader"))
+  n_fields_out <- paste0(length(obj$schema$fields), " columns", "\n")
+  schema <- obj$schema$ToString(truncate = TRUE)
+  paste0(n_fields_out, schema)
+}
diff --git a/r/tests/testthat/_snaps/dplyr-glimpse.md b/r/tests/testthat/_snaps/dplyr-glimpse.md
index a82f34447500f..5e3b074908d63 100644
--- a/r/tests/testthat/_snaps/dplyr-glimpse.md
+++ b/r/tests/testthat/_snaps/dplyr-glimpse.md
@@ -91,6 +91,7 @@
       Cannot glimpse() data from a RecordBatchReader because it can only be read one time; call `as_arrow_table()` to consume it first.
     Output
       RecordBatchReader
+      7 columns
       int: int32
       dbl: double
       dbl2: double
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index 7ea47db449269..cafe7ada53f9e 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -910,6 +910,7 @@ test_that("Dataset and query print methods", {
     print(ds),
     paste(
       "FileSystemDataset with 2 Parquet files",
+      "8 columns",
       "int: int32",
       "dbl: double",
       "lgl: bool",
diff --git a/r/tests/testthat/test-record-batch-reader.R b/r/tests/testthat/test-record-batch-reader.R
index f9ff8178fa072..a59523790ac3b 100644
--- a/r/tests/testthat/test-record-batch-reader.R
+++ b/r/tests/testthat/test-record-batch-reader.R
@@ -179,6 +179,7 @@ test_that("RBR methods", {
   expect_output(
     print(reader),
     "RecordBatchStreamReader
+2 columns
 x: int32
 y: string"
   )
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index 15342add38fae..68db5c819b7cc 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -315,5 +315,20 @@ test_that("schema extraction", {
 
   adq <- as_adq(example_data)
   expect_equal(schema(adq), adq$.data$schema)
+})
+
+test_that("schema print truncation", {
+  tbl <- arrow_table(example_data)
+  out <- print_schema_fields(schema(tbl), truncate = TRUE, max_fields = 1)
+  expect_output(
+    cat(out),
+    "int: int32\n...\n6 more columns\nUse `schema()` to see entire schema",
+    fixed = TRUE
+  )
+
+  expect_error(
+    print_schema_fields(schema(tbl), truncate = TRUE, max_fields = 0),
+    regexp = "max_fields not greater than 0"
+  )
 
 })

From 7d290bd23e62acc8bd3dfcb4ef99cf75a911304a Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 14 Mar 2024 10:29:49 +0900
Subject: [PATCH 526/570] GH-40077: [CI] Use GitHub hosted M1 macOS runner
 (#40437)

### Rationale for this change

If we use GitHub hosted M1 macOS runner instead of self-hosted M1 macOS runner, we don't need to maintain self-hosted M1 macOS runner.

### What changes are included in this PR?

Use GitHub hosted M1 macOS runner except `r-binary-packages`'s `r-packages` job. It uses macOS 11 but GitHub hosted M1 macOS runner is 13 or 14.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40077

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/tasks/java-jars/github.yml                |   8 +-
 dev/tasks/python-wheels/github.osx.arm64.yml  | 128 ------------------
 .../{github.osx.amd64.yml => github.osx.yml}  |  40 +++---
 dev/tasks/r/github.packages.yml               |  32 +----
 dev/tasks/tasks.yml                           |  52 +++----
 dev/tasks/verify-rc/github.macos.arm64.yml    |   2 +-
 6 files changed, 44 insertions(+), 218 deletions(-)
 delete mode 100644 dev/tasks/python-wheels/github.osx.arm64.yml
 rename dev/tasks/python-wheels/{github.osx.amd64.yml => github.osx.yml} (80%)

diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index 8fe5878254dde..03cbcc7c98fcc 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -79,14 +79,12 @@ jobs:
       matrix:
         platform:
           - { runs_on: ["macos-latest"], arch: "x86_64"}
-          - { runs_on: ["self-hosted", "macOS", "arm64", "devops-managed"], arch: "aarch_64" }
+          - { runs_on: ["macos-14"], arch: "aarch_64" }
     env:
       MACOSX_DEPLOYMENT_TARGET: "10.15"
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       - name: Set up Python
-        if: |
-          !contains(matrix.platform.runs_on, 'self-hosted')
         uses: actions/setup-python@v4
         with:
           cache: 'pip'
@@ -134,9 +132,7 @@ jobs:
         run: |
           set -e
           # make brew Java available to CMake
-          if [ "{{ arch }}" = "aarch_64" ]; then
-            export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home
-          fi
+          export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home
           arrow/ci/scripts/java_jni_macos_build.sh \
             $GITHUB_WORKSPACE/arrow \
             $GITHUB_WORKSPACE/arrow/cpp-build \
diff --git a/dev/tasks/python-wheels/github.osx.arm64.yml b/dev/tasks/python-wheels/github.osx.arm64.yml
deleted file mode 100644
index 380c2e42f1d88..0000000000000
--- a/dev/tasks/python-wheels/github.osx.arm64.yml
+++ /dev/null
@@ -1,128 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Prerequisites on the host:
-# - brew install bash bison coreutils ninja cmake
-# - sudo arrow/ci/scripts/install_python.sh macos 3.9
-
-{% import 'macros.jinja' as macros with context %}
-
-{{ macros.github_header() }}
-
-env:
-  ARROW_JEMALLOC: OFF
-  CC: "clang"
-  CMAKE_BUILD_TYPE: release
-  CMAKE_CXX_COMPILER_LAUNCHER: "ccache"
-  CXX: "clang++"
-  MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}"
-  PYARROW_BUILD_VERBOSE: 1
-  PYARROW_VERSION: "{{ arrow.no_rc_version }}"
-  PYTHON_VERSION: "{{ python_version }}"
-  PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}"
-  VCPKG_FEATURE_FLAGS: "manifests"
-  VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }}
-  VCPKG_ROOT: {{ "${{ github.workspace }}/vcpkg" }}
-
-jobs:
-  build:
-    name: Build wheel for Python {{ python_version }} on macOS
-    runs-on: ["self-hosted", "macOS", "arm64"]
-    steps:
-      - name: Cleanup
-        run: rm -rf arrow vcpkg build crossbow-env build-*-env test-*-env
-
-      {{ macros.github_checkout_arrow()|indent }}
-
-      - name: Add Brew's Bison to PATH
-        run: echo "/opt/homebrew/opt/bison/bin" >> $GITHUB_PATH
-
-      - name: Retrieve VCPKG version from arrow/.env
-        run: |
-          vcpkg_version=$(cat "arrow/.env" | grep "VCPKG" | cut -d "=" -f2 | tr -d '"')
-          echo "VCPKG_VERSION=$vcpkg_version" >> $GITHUB_ENV
-
-      - name: Install Vcpkg
-        env:
-          MACOSX_DEPLOYMENT_TARGET: "11.0"
-        run:  arrow/ci/scripts/install_vcpkg.sh $VCPKG_ROOT $VCPKG_VERSION
-
-      - name: Add Vcpkg to PATH
-        run: echo ${VCPKG_ROOT} >> $GITHUB_PATH
-
-      - name: Install ARM64 Packages
-        env:
-          VCPKG_DEFAULT_TRIPLET: arm64-osx-static-release
-        run: |
-          vcpkg install \
-            --clean-after-build \
-            --x-install-root=${VCPKG_ROOT}/installed \
-            --x-manifest-root=arrow/ci/vcpkg \
-            --x-feature=azure \
-            --x-feature=flight \
-            --x-feature=gcs \
-            --x-feature=json \
-            --x-feature=parquet \
-            --x-feature=s3
-
-      - name: Build ARM64 Wheel
-        env:
-          ARROW_SIMD_LEVEL: "DEFAULT"
-          VCPKG_DEFAULT_TRIPLET: arm64-osx-static-release
-        run: |
-          $PYTHON -m venv build-arm64-env
-          source build-arm64-env/bin/activate
-          pip install --upgrade pip wheel
-          arrow/ci/scripts/python_wheel_macos_build.sh arm64 $(pwd)/arrow $(pwd)/build
-
-      - uses: actions/upload-artifact@v3
-        with:
-          name: wheel
-          path: arrow/python/repaired_wheels/*.whl
-
-      - name: Test Wheel on ARM64
-        shell: bash
-        env:
-          PYTEST_ADDOPTS: "-k 'not test_cancellation'"
-        run: |
-          $PYTHON -m venv test-arm64-env
-          source test-arm64-env/bin/activate
-          pip install --upgrade pip wheel
-          # libffi has to be installed on the m1 runner which causes issues with 
-          # the cffi wheel. We build cffi with the flags pointing to the correct libffi location.
-          LDFLAGS=-L$(brew --prefix libffi)/lib CFLAGS=-I$(brew --prefix libffi)/include \
-             pip install cffi --no-binary :all:
-          pip install -r arrow/python/requirements-wheel-test.txt
-          PYTHON=python  arrow/ci/scripts/install_gcs_testbench.sh default
-          arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
-
-      - name: Upload artifacts
-        shell: bash
-        run: |
-          $PYTHON -m venv crossbow-env
-          source crossbow-env/bin/activate
-          arch -x86_64 pip install -e arrow/dev/archery[crossbow-upload]
-          arch -x86_64 archery crossbow \
-            --queue-path $(pwd) \
-            --queue-remote {{ queue_remote_url }} \
-            upload-artifacts \
-            --sha {{ task.branch }} \
-            --tag {{ task.tag }} \
-            "arrow/python/repaired_wheels/*.whl"
-        env:
-          CROSSBOW_GITHUB_TOKEN: {{ "${{ secrets.CROSSBOW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}" }}
-
-      {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.osx.amd64.yml b/dev/tasks/python-wheels/github.osx.yml
similarity index 80%
rename from dev/tasks/python-wheels/github.osx.amd64.yml
rename to dev/tasks/python-wheels/github.osx.yml
index e31a681653b37..11bdf031f51bd 100644
--- a/dev/tasks/python-wheels/github.osx.amd64.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -19,14 +19,16 @@
 {{ macros.github_header() }}
 
 env:
+  ARROW_JEMALLOC: "{{ arrow_jemalloc }}"
   CC: "clang"
+  CMAKE_BUILD_TYPE: release
   CXX: "clang++"
   MACOSX_DEPLOYMENT_TARGET: "{{ macos_deployment_target }}"
   PYARROW_BUILD_VERBOSE: 1
   PYARROW_VERSION: "{{ arrow.no_rc_version }}"
   PYTHON_VERSION: "{{ python_version }}"
   PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}"
-  VCPKG_DEFAULT_TRIPLET: amd64-osx-static-release
+  VCPKG_DEFAULT_TRIPLET: "{{ vcpkg_arch }}-osx-static-release"
   VCPKG_FEATURE_FLAGS: "manifests"
   VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }}
   VCPKG_ROOT: {{ "${{ github.workspace }}/vcpkg" }}
@@ -34,40 +36,33 @@ env:
 jobs:
   build:
     name: Build wheel for Python {{ python_version }} on macOS
-    runs-on: macos-latest
+    runs-on: "{{ runs_on }}"
     env:
       VCPKG_BINARY_SOURCES: 'clear;nuget,GitHub,readwrite'
     steps:
       {{ macros.github_checkout_arrow()|indent }}
 
       - name: Install System Dependencies
-        run: brew install bash bison coreutils ninja
-
-      - name: Install Specific CMake version
-        uses: lukka/get-cmake@v3.21.2
+        run: |
+          brew install bash bison coreutils ninja
+          echo "$(brew --prefix bison)/bin" >> $GITHUB_PATH
 
       - name: Retrieve VCPKG version from arrow/.env
-        shell: bash
         run: |
           vcpkg_version=$(cat "arrow/.env" | grep "VCPKG" | cut -d "=" -f2 | tr -d '"')
           echo "VCPKG_VERSION=$vcpkg_version" >> $GITHUB_ENV
 
       - name: Install Vcpkg
-        shell: bash
-        env:
-          MACOSX_DEPLOYMENT_TARGET: "10.15"
         run: arrow/ci/scripts/install_vcpkg.sh $VCPKG_ROOT $VCPKG_VERSION
 
       - name: Add Vcpkg to PATH
-        shell: bash
         run: echo ${VCPKG_ROOT} >> $GITHUB_PATH
 
       - name: Setup NuGet Credentials
-        shell: bash
         env:
           GITHUB_TOKEN: {{ '${{ secrets.GITHUB_TOKEN }}' }}
         run: |
-          mono `vcpkg fetch nuget | tail -n 1` \
+          mono $(vcpkg fetch nuget | tail -n 1) \
             sources add \
             -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json" \
             -storepasswordincleartext \
@@ -75,7 +70,7 @@ jobs:
             -username "$GITHUB_REPOSITORY_OWNER" \
             -password "$GITHUB_TOKEN" \
 
-          mono `vcpkg fetch nuget | tail -n 1` \
+          mono $(vcpkg fetch nuget | tail -n 1) \
             setapikey "$GITHUB_TOKEN" \
             -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json"
 
@@ -93,33 +88,30 @@ jobs:
             --x-feature=s3
 
       - name: Install Python {{ python_version }}
-        shell: bash
         run: sudo arrow/ci/scripts/install_python.sh macos {{ python_version }}
 
       - name: Build Wheel
-        shell: bash
         run: |
           $PYTHON -m venv build-env
           source build-env/bin/activate
           pip install --upgrade pip wheel
-          PYTHON=python arrow/ci/scripts/python_wheel_macos_build.sh x86_64 $(pwd)/arrow $(pwd)/build
+          PYTHON=python arrow/ci/scripts/python_wheel_macos_build.sh {{ arch }} $(pwd)/arrow $(pwd)/build
 
       - uses: actions/upload-artifact@v3
         with:
           name: wheel
           path: arrow/python/repaired_wheels/*.whl
 
-      - name: Test Wheel on AMD64
-        shell: bash
+      - name: Test Wheel
         env:
           PYTEST_ADDOPTS: "-k 'not test_cancellation'"
         run: |
-          $PYTHON -m venv test-amd64-env
-          source test-amd64-env/bin/activate
+          $PYTHON -m venv test-env
+          source test-env/bin/activate
           pip install --upgrade pip wheel
-          arch -x86_64 pip install -r arrow/python/requirements-wheel-test.txt
-          PYTHON=python arch -x86_64 arrow/ci/scripts/install_gcs_testbench.sh default
-          arch -x86_64 arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
+          arch -{{ arch }} pip install -r arrow/python/requirements-wheel-test.txt
+          PYTHON=python arch -{{ arch }} arrow/ci/scripts/install_gcs_testbench.sh default
+          arch -{{ arch }} arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml
index 2ddfd02e73134..f23da3c8773a0 100644
--- a/dev/tasks/r/github.packages.yml
+++ b/dev/tasks/r/github.packages.yml
@@ -66,16 +66,14 @@ jobs:
       fail-fast: false
       matrix:
         platform:
-          - { runs_on: ["self-hosted", "macos-10.13"], arch: "x86_64" }
-
-          - { runs_on: ["self-hosted", "macOS", "arm64", "devops-managed"], arch: "arm64" }
+          - { runs_on: macos-13, arch: "x86_64" }
+          - { runs_on: macos-14, arch: "arm64" }
         openssl: ['3.0', '1.1']
 
     steps:
       {{ macros.github_checkout_arrow(action_v="3")|indent }}
       {{ macros.github_change_r_pkg_version(is_fork, '${{ needs.source.outputs.pkg_version }}')|indent }}
       - name: Install Deps
-        if: {{ "${{ !contains(matrix.platform.runs_on, 'macos-10.13') }}" }}
         run: |
           brew install sccache ninja
           brew install openssl@{{ '${{ matrix.openssl }}' }}
@@ -204,50 +202,30 @@ jobs:
 
   r-packages:
     needs: [source, windows-cpp, macos-cpp]
-    name: {{ '${{ matrix.platform.name }} ${{ matrix.r_version.r }}' }}
+    name: {{ '${{ matrix.platform.name }} ${{ matrix.r_version }}' }}
     runs-on: {{ '${{ matrix.platform.runs_on }}' }}
     strategy:
       fail-fast: false
       matrix:
         platform:
           - { runs_on: 'windows-latest', name: "Windows"}
-          - { runs_on: macos-11 , name: "macOS Big Sur"}
-          - { runs_on: ["self-hosted", "macOS", "arm64", "devops-managed"], name: "macOS Big Sur (M1)" }
+          - { runs_on: macos-11, name: "macOS x86_64"}
+          - { runs_on: macos-14, name: "macOS arm64" }
         r_version: [oldrel, release]
     steps:
       - uses: r-lib/actions/setup-r@v2
-        # expression marker prevents the ! being parsed as yaml tag
-        if: {{ "${{ !contains(matrix.platform.runs_on, 'self-hosted') }}" }}
         with:
           r-version: {{ '${{ matrix.r_version }}' }}
-      - name: Setup R Self-Hosted
-        if: contains(matrix.platform.runs_on, 'self-hosted')
-        run: |
-          # rig is a system utility that allows for switching
-          # between pre-installed R version on the self-hosted runners
-          # rig add {{ '${{ matrix.r_version }}' }} #uncomment this to install latest release/oldrel
-          rig default {{ '${{ matrix.r_version }}' }}
-
-          rig system setup-user-lib
       {{ macros.github_setup_local_r_repo(false, true, true)|indent }}
       - name: Prepare Dependency Installation
         shell: bash
         run: |
           tar -xzf repo/src/contrib/arrow_*.tar.gz arrow/DESCRIPTION
       - name: Install dependencies
-        if: {{ "${{ !contains(matrix.platform.runs_on, 'self-hosted') }}" }}
         uses: r-lib/actions/setup-r-dependencies@v2
         with:
            working-directory: 'arrow'
            extra-packages: cpp11
-      - name: Install dependencies self-hosted
-        if: {{ "${{ contains(matrix.platform.runs_on, 'self-hosted') }}" }}
-        shell: Rscript {0}
-        run: |
-          if (!requireNamespace("devtools", quietly = TRUE)) {
-            install.packages("devtools")
-          }
-          devtools::install_dev_deps('./arrow')
       - name: Set CRAN like openssl
         if: contains(matrix.platform.runs_on, 'arm64')
         run: |
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 108c23f61e885..4886ddf724ef8 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -393,61 +393,49 @@ tasks:
       - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-{{ platform_tag }}.whl
 {% endfor %}
 
-{############################## Wheel OSX ####################################}
+{############################## Wheel macOS ####################################}
 
 {% for macos_version, macos_codename in [("10.15", "catalina")] %}
   {% set platform_tag = "macosx_{}_x86_64".format(macos_version.replace('.', '_')) %}
 
   wheel-macos-{{ macos_codename }}-{{ python_tag }}-amd64:
     ci: github
-    template: python-wheels/github.osx.amd64.yml
+    template: python-wheels/github.osx.yml
     params:
+      arch: "x86_64"
+      arrow_jemalloc: "ON"
       python_version: "{{ python_version }}"
       macos_deployment_target: "{{ macos_version }}"
+      runs_on: "macos-latest"
+      vcpkg_arch: "amd64"
     artifacts:
       - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-{{ platform_tag }}.whl
 
 {% endfor %}
 
-{############################## Wheel Windows ################################}
-
-  wheel-windows-{{ python_tag }}-amd64:
+  wheel-macos-big-sur-{{ python_tag }}-arm64:
     ci: github
-    template: python-wheels/github.windows.yml
+    template: python-wheels/github.osx.yml
     params:
+      arch: "arm64"
+      arrow_jemalloc: "OFF"
       python_version: "{{ python_version }}"
-    artifacts:
-      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-win_amd64.whl
-
-{% endfor %}
-
-{############################## Wheel OSX M1 #################################}
-
-  # The python 3.8 universal2 installer has been built with macos deployment
-  # target 11.0, so we cannot build binaries with earlier deployment target
-  # otherwise distutils will raise a deployment target version mismatch error.
-  wheel-macos-big-sur-cp38-arm64:
-    ci: github
-    template: python-wheels/github.osx.arm64.yml
-    params:
-      arch: arm64
-      arrow_simd_level: "DEFAULT"
-      python_version: "3.8"
       macos_deployment_target: "11.0"
-
+      runs_on: "macos-14"
+      vcpkg_arch: "arm64"
     artifacts:
-      - pyarrow-{no_rc_version}-cp38-cp38-macosx_11_0_arm64.whl
+      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_11_0_arm64.whl
 
-{% for python_version, python_tag in [("3.9", "cp39"), ("3.10", "cp310"), ("3.11", "cp311"), ("3.12", "cp312")] %}
-  wheel-macos-big-sur-{{ python_tag }}-arm64:
+{############################## Wheel Windows ################################}
+
+  wheel-windows-{{ python_tag }}-amd64:
     ci: github
-    template: python-wheels/github.osx.arm64.yml
+    template: python-wheels/github.windows.yml
     params:
-      arch: arm64
       python_version: "{{ python_version }}"
-      macos_deployment_target: "11.0"
     artifacts:
-      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_11_0_arm64.whl
+      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-win_amd64.whl
+
 {% endfor %}
 
 {############################ Python sdist ####################################}
@@ -992,7 +980,7 @@ tasks:
         TEST_INTEGRATION_JAVA: 0
         PYTEST_ADDOPTS: "-k 'not test_cancellation'"
       target: {{ target }}
-      github_runner: ["self-hosted", "macOS", "arm64"]
+      github_runner: "macos-14"
   {% endfor %}
 
   {% for macos_version in ["11", "12"] %}
diff --git a/dev/tasks/verify-rc/github.macos.arm64.yml b/dev/tasks/verify-rc/github.macos.arm64.yml
index fce10925df2ae..925f572839e7a 100644
--- a/dev/tasks/verify-rc/github.macos.arm64.yml
+++ b/dev/tasks/verify-rc/github.macos.arm64.yml
@@ -22,7 +22,7 @@
 jobs:
   verify:
     name: "Verify release candidate on macOS"
-    runs-on: {{ github_runner|default(["self-hosted", "macOS", "arm64"]) }}
+    runs-on: {{ github_runner|default("macos-14") }}
     {% if env is defined %}
     env:
     {% for key, value in env.items() %}

From 4df35d88c298cb98dac554938f32d952d1600337 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 14 Mar 2024 11:18:45 +0900
Subject: [PATCH 527/570] GH-40522: [Dev][Go] Add Dependabot configuration for
 Go (#40523)

### Rationale for this change

We don't want to update dependencies manually.

### What changes are included in this PR?

Enable Dependabot.

### Are these changes tested?

No.

### Are there any user-facing changes?

No.
* GitHub Issue: #40522

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/dependabot.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index c100b46c38a59..e96cb8d2eb1e3 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -23,6 +23,12 @@ updates:
       interval: "weekly"
     commit-message:
       prefix: "MINOR: [CI] "
+  - package-ecosystem: "gomod"
+    directory: "/go/"
+    schedule:
+      interval: "weekly"
+    commit-message:
+      prefix: "MINOR: [Go] "
   - package-ecosystem: "maven"
     directory: "/java/"
     schedule:

From 9ae6a1da9763e997524601d2cc097a0cf48bc4e8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 13 Mar 2024 19:48:46 -0700
Subject: [PATCH 528/570] MINOR: [C#] Bump coverlet.collector from 6.0.1 to
 6.0.2 in /csharp (#40533)

Bumps [coverlet.collector](https://github.com/coverlet-coverage/coverlet) from 6.0.1 to 6.0.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/coverlet-coverage/coverlet/releases">coverlet.collector's releases</a>.</em></p>
<blockquote>
<h2>v6.0.2</h2>
<h3>Fixed</h3>
<ul>
<li>Threshold-stat triggers error <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1634">#1634</a></li>
<li>Fixed coverlet collector 6.0.1 requires dotnet sdk 8 <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1625">#1625</a></li>
<li>Type initializer errors after updating from 6.0.0 to 6.0.1 <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1629">#1629</a></li>
<li>Exception when multiple exclude-by-attribute filters specified <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1624">#1624</a></li>
</ul>
<h3>Improvements</h3>
<ul>
<li>More concise options to specify multiple parameters in coverlet.console <a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1624">#1624</a></li>
</ul>
<p><a href="https://github.com/coverlet-coverage/coverlet/compare/v6.0.1...v6.0.2">Diff between 6.0.1 and 6.0.2</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/04ba205b93e57ebc02bd61ce57a790ec983de3e3"><code>04ba205</code></a> new release (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1641">#1641</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/f61bbc7230dc7bb8479bad2aa41528efe6612618"><code>f61bbc7</code></a> Update docs on new source-mapping-file argument (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1632">#1632</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/892d86e16b15522e10666bbb3aafb1284dfcc5c7"><code>892d86e</code></a> Fix TypeInitializer issue (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1636">#1636</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/6bd8783058d3a297b107e2613f667c1d0f368a2c"><code>6bd8783</code></a> Remove usage of System.Text.Json for now (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1637">#1637</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/813f36c5a856ac7478ca0734e2234613ba201d4d"><code>813f36c</code></a> Fix issue 1634 (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1635">#1635</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/783c482bbb1d59e02e6859fd5397ebda95774f3c"><code>783c482</code></a> Add regex evaluation timeout (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1630">#1630</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/9ee7b8fbe4b8f5963405c47d10d7fb6eb8eb80c6"><code>9ee7b8f</code></a> Broken exclude-by-attribute feature in <code>coverlet.console</code> (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1627">#1627</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/59430c38879a5780ee296e686901e78f33ea17e6"><code>59430c3</code></a> Fix issues for V6.0.1 nuget packages (collector, console) (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1628">#1628</a>)</li>
<li><a href="https://github.com/coverlet-coverage/coverlet/commit/aa1d224b09fa66f4bd134711cf8c07aab9ef57ba"><code>aa1d224</code></a> remove current versions in release plan (<a href="https://redirect.github.com/coverlet-coverage/coverlet/issues/1623">#1623</a>)</li>
<li>See full diff in <a href="https://github.com/coverlet-coverage/coverlet/compare/v6.0.1...v6.0.2">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=coverlet.collector&package-manager=nuget&previous-version=6.0.1&new-version=6.0.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 .../Apache.Arrow.Flight.Sql.Tests.csproj                        | 2 +-
 .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 81492462d0ffe..ad6efbd7b45e7 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -9,7 +9,7 @@
       <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
       <PackageReference Include="xunit" Version="2.7.0" />
       <PackageReference Include="xunit.runner.visualstudio" Version="2.5.7" />
-      <PackageReference Include="coverlet.collector" Version="6.0.1" />
+      <PackageReference Include="coverlet.collector" Version="6.0.2" />
     </ItemGroup>
 
     <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index df9393515c638..6f1b4e180e4fc 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -9,7 +9,7 @@
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.9.0" />
     <PackageReference Include="xunit" Version="2.7.0" />
     <PackageReference Include="xunit.runner.visualstudio" Version="2.5.7" />
-    <PackageReference Include="coverlet.collector" Version="6.0.1" />
+    <PackageReference Include="coverlet.collector" Version="6.0.2" />
   </ItemGroup>
 
   <ItemGroup>

From 6b1e254f3b62924f216e06e9e563e92c69f9efd3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 13 Mar 2024 19:49:46 -0700
Subject: [PATCH 529/570] MINOR: [C#] Bump Google.Protobuf from 3.25.3 to
 3.26.0 in /csharp (#40534)

Bumps [Google.Protobuf](https://github.com/protocolbuffers/protobuf) from 3.25.3 to 3.26.0.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/d6511091a0cab1ad13f676a02676ad2a0e5eb9ae"><code>d651109</code></a> Updating version.json and repo version numbers to: 26.0</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/56414524930a81efcd7e809a9e18aa94e05bd406"><code>5641452</code></a> Merge pull request <a href="https://redirect.github.com/protocolbuffers/protobuf/issues/16002">#16002</a> from protocolbuffers/26.x-202402282330</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/999744ca08774b54f9732880eefb46739a35c67b"><code>999744c</code></a> Merge pull request <a href="https://redirect.github.com/protocolbuffers/protobuf/issues/16090">#16090</a> from protocolbuffers/cp-add-include</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/2299e1db6fdd0e76b7a757241a6a443200477b8e"><code>2299e1d</code></a> Add &lt;shellapi.h&gt; include for CommandLineToArgvW</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/480ddc6d66ce82960cffc97e07958f39f0f9fbc6"><code>480ddc6</code></a> Merge pull request <a href="https://redirect.github.com/protocolbuffers/protobuf/issues/16052">#16052</a> from protocolbuffers/cp-610783483</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/aea0e52ec2b5ecab632299eedde77894b75d5495"><code>aea0e52</code></a> Resolve features directly in setProto instead of temporarily setting to null.</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/f00528d90a3bd1d21d86b83ff9c530040a5b53eb"><code>f00528d</code></a> Merge pull request <a href="https://redirect.github.com/protocolbuffers/protobuf/issues/15983">#15983</a> from mkruskal-google/staleness-fix-26</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/64ad139c83aa54028d46f815850b58c542c5e26d"><code>64ad139</code></a> Add back error message unconditionally</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/ccf526bb0697de9d82ab73c31d4f48886088515b"><code>ccf526b</code></a> Updating version.json and repo version numbers to: 26.0-dev</li>
<li><a href="https://github.com/protocolbuffers/protobuf/commit/8a1cdb13a059e8b827a0ccebfa8e6a026d87c620"><code>8a1cdb1</code></a> Updating version.json and repo version numbers to: 26.0-rc3</li>
<li>Additional commits viewable in <a href="https://github.com/protocolbuffers/protobuf/compare/v3.25.3...v3.26.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Google.Protobuf&package-manager=nuget&previous-version=3.25.3&new-version=3.26.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index ab938d83fea00..bd6ae7ad22b42 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
   
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.25.3" />
+    <PackageReference Include="Google.Protobuf" Version="3.26.0" />
     <PackageReference Include="Grpc.Net.Client" Version="2.59.0" />
     <PackageReference Include="Grpc.Tools" Version="2.62.0" PrivateAssets="All" />
   </ItemGroup>

From 9f0a28f61a068059a6e53f25a3ffcb1689d701ec Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 14 Mar 2024 12:09:33 +0900
Subject: [PATCH 530/570] GH-40498: [GLib] Remove arrow-glib/gobject-type.h
 (#40499)

### Rationale for this change

It's needed for GLib < 2.44 but we require GLib >= 2.56. So we don't need it now.

### What changes are included in this PR?

Remove `arrow-glib/gobject-type.h`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40498

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/arrow-glib.h               |   3 +-
 c_glib/arrow-glib/basic-array-definition.h   |   2 +-
 c_glib/arrow-glib/buffer.h                   |   2 +-
 c_glib/arrow-glib/chunked-array-definition.h |   2 +-
 c_glib/arrow-glib/codec.h                    |   3 +-
 c_glib/arrow-glib/compute-definition.h       |   2 +-
 c_glib/arrow-glib/decimal.h                  |   3 +-
 c_glib/arrow-glib/file.h                     |   3 +-
 c_glib/arrow-glib/gobject-type.h             | 101 -------------------
 c_glib/arrow-glib/interval.h                 |   3 +-
 c_glib/arrow-glib/ipc-options.h              |   3 +-
 c_glib/arrow-glib/memory-pool.h              |   2 +-
 c_glib/arrow-glib/meson.build                |   1 -
 c_glib/arrow-glib/readable.h                 |   1 -
 c_glib/arrow-glib/reader.h                   |   1 -
 c_glib/arrow-glib/table-builder.h            |   1 -
 c_glib/arrow-glib/timestamp-parser.h         |   3 +-
 c_glib/arrow-glib/version.h.in               |   2 -
 c_glib/arrow-glib/writable-file.h            |   2 +-
 c_glib/arrow-glib/writable.h                 |   2 +-
 20 files changed, 21 insertions(+), 121 deletions(-)
 delete mode 100644 c_glib/arrow-glib/gobject-type.h

diff --git a/c_glib/arrow-glib/arrow-glib.h b/c_glib/arrow-glib/arrow-glib.h
index a1ea90f7d6b7a..7ba20882610e8 100644
--- a/c_glib/arrow-glib/arrow-glib.h
+++ b/c_glib/arrow-glib/arrow-glib.h
@@ -19,7 +19,8 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
+
 #include <arrow-glib/version.h>
 
 #include <arrow-glib/array.h>
diff --git a/c_glib/arrow-glib/basic-array-definition.h b/c_glib/arrow-glib/basic-array-definition.h
index 803e63f58e4cf..54642dae018ec 100644
--- a/c_glib/arrow-glib/basic-array-definition.h
+++ b/c_glib/arrow-glib/basic-array-definition.h
@@ -19,7 +19,7 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
 
 G_BEGIN_DECLS
 
diff --git a/c_glib/arrow-glib/buffer.h b/c_glib/arrow-glib/buffer.h
index 42bcb8702d41b..8f93a5ef0ddb2 100644
--- a/c_glib/arrow-glib/buffer.h
+++ b/c_glib/arrow-glib/buffer.h
@@ -19,7 +19,7 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
 
 G_BEGIN_DECLS
 
diff --git a/c_glib/arrow-glib/chunked-array-definition.h b/c_glib/arrow-glib/chunked-array-definition.h
index b53574ca3296c..b687735419eeb 100644
--- a/c_glib/arrow-glib/chunked-array-definition.h
+++ b/c_glib/arrow-glib/chunked-array-definition.h
@@ -19,7 +19,7 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
 
 G_BEGIN_DECLS
 
diff --git a/c_glib/arrow-glib/codec.h b/c_glib/arrow-glib/codec.h
index 5f7d53c8f875d..9b8611bb0a7ee 100644
--- a/c_glib/arrow-glib/codec.h
+++ b/c_glib/arrow-glib/codec.h
@@ -19,7 +19,8 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
+
 #include <arrow-glib/version.h>
 
 G_BEGIN_DECLS
diff --git a/c_glib/arrow-glib/compute-definition.h b/c_glib/arrow-glib/compute-definition.h
index 40b4e8ec9ae63..b699e9e99a9fc 100644
--- a/c_glib/arrow-glib/compute-definition.h
+++ b/c_glib/arrow-glib/compute-definition.h
@@ -19,7 +19,7 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
 
 G_BEGIN_DECLS
 
diff --git a/c_glib/arrow-glib/decimal.h b/c_glib/arrow-glib/decimal.h
index 9c605d4456e2d..b967fa36d5611 100644
--- a/c_glib/arrow-glib/decimal.h
+++ b/c_glib/arrow-glib/decimal.h
@@ -19,7 +19,8 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
+
 #include <arrow-glib/version.h>
 
 G_BEGIN_DECLS
diff --git a/c_glib/arrow-glib/file.h b/c_glib/arrow-glib/file.h
index cbb2b9afd38ba..42afed139463c 100644
--- a/c_glib/arrow-glib/file.h
+++ b/c_glib/arrow-glib/file.h
@@ -19,8 +19,9 @@
 
 #pragma once
 
+#include <glib-object.h>
+
 #include <arrow-glib/file-mode.h>
-#include <arrow-glib/gobject-type.h>
 #include <arrow-glib/version.h>
 
 G_BEGIN_DECLS
diff --git a/c_glib/arrow-glib/gobject-type.h b/c_glib/arrow-glib/gobject-type.h
deleted file mode 100644
index 617f8d7802569..0000000000000
--- a/c_glib/arrow-glib/gobject-type.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#pragma once
-
-#include <glib-object.h>
-
-#ifndef G_DECLARE_DERIVABLE_TYPE
-#  define G_DECLARE_DERIVABLE_TYPE(ObjectName,                                           \
-                                   object_name,                                          \
-                                   MODULE_NAME,                                          \
-                                   OBJECT_NAME,                                          \
-                                   ParentName)                                           \
-    typedef struct _##ObjectName ObjectName;                                             \
-    typedef struct _##ObjectName##Class ObjectName##Class;                               \
-                                                                                         \
-    struct _##ObjectName                                                                 \
-    {                                                                                    \
-      ParentName parent_instance;                                                        \
-    };                                                                                   \
-                                                                                         \
-    GType object_name##_get_type(void) G_GNUC_CONST;                                     \
-                                                                                         \
-    static inline ObjectName *MODULE_NAME##_##OBJECT_NAME(gpointer object)               \
-    {                                                                                    \
-      return G_TYPE_CHECK_INSTANCE_CAST(object, object_name##_get_type(), ObjectName);   \
-    }                                                                                    \
-                                                                                         \
-    static inline ObjectName##Class *MODULE_NAME##_##OBJECT_NAME##_CLASS(gpointer klass) \
-    {                                                                                    \
-      return G_TYPE_CHECK_CLASS_CAST(klass,                                              \
-                                     object_name##_get_type(),                           \
-                                     ObjectName##Class);                                 \
-    }                                                                                    \
-                                                                                         \
-    static inline gboolean MODULE_NAME##_IS_##OBJECT_NAME(gpointer object)               \
-    {                                                                                    \
-      return G_TYPE_CHECK_INSTANCE_TYPE(object, object_name##_get_type());               \
-    }                                                                                    \
-                                                                                         \
-    static inline gboolean MODULE_NAME##_IS_##OBJECT_NAME##_CLASS(gpointer klass)        \
-    {                                                                                    \
-      return G_TYPE_CHECK_CLASS_TYPE(klass, object_name##_get_type());                   \
-    }                                                                                    \
-                                                                                         \
-    static inline ObjectName##Class *MODULE_NAME##_##ObjectName##_GET_CLASS(             \
-      gpointer object)                                                                   \
-    {                                                                                    \
-      return G_TYPE_INSTANCE_GET_CLASS(object,                                           \
-                                       object_name##_get_type(),                         \
-                                       ObjectName##Class);                               \
-    }
-#endif
-
-#ifndef G_DECLARE_INTERFACE
-#  define G_DECLARE_INTERFACE(ModuleObjectName,                                          \
-                              module_object_name,                                        \
-                              MODULE_NAME,                                               \
-                              OBJECT_NAME,                                               \
-                              PrerequisiteName)                                          \
-    typedef struct _##ModuleObjectName ModuleObjectName;                                 \
-    typedef struct _##ModuleObjectName##Interface ModuleObjectName##Interface;           \
-                                                                                         \
-    GType module_object_name##_get_type(void);                                           \
-                                                                                         \
-    static inline ModuleObjectName *MODULE_NAME##_##OBJECT_NAME(gpointer object)         \
-    {                                                                                    \
-      return G_TYPE_CHECK_INSTANCE_CAST(object,                                          \
-                                        module_object_name##_get_type(),                 \
-                                        ModuleObjectName);                               \
-    }                                                                                    \
-                                                                                         \
-    static inline gboolean MODULE_NAME##_IS_##OBJECT_NAME(gpointer object)               \
-    {                                                                                    \
-      return G_TYPE_CHECK_INSTANCE_TYPE(object, module_object_name##_get_type());        \
-    }                                                                                    \
-                                                                                         \
-    static inline ModuleObjectName##Interface *MODULE_NAME##_##OBJECT_NAME##_GET_IFACE(  \
-      gpointer object)                                                                   \
-    {                                                                                    \
-      return G_TYPE_INSTANCE_GET_INTERFACE(object,                                       \
-                                           module_object_name##_get_type(),              \
-                                           ModuleObjectName##Interface);                 \
-    }
-#endif
diff --git a/c_glib/arrow-glib/interval.h b/c_glib/arrow-glib/interval.h
index 3f60db503f6a2..a6c9e1ff1e1ef 100644
--- a/c_glib/arrow-glib/interval.h
+++ b/c_glib/arrow-glib/interval.h
@@ -19,7 +19,8 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
+
 #include <arrow-glib/version.h>
 
 G_BEGIN_DECLS
diff --git a/c_glib/arrow-glib/ipc-options.h b/c_glib/arrow-glib/ipc-options.h
index 188e451c51325..418b08f080152 100644
--- a/c_glib/arrow-glib/ipc-options.h
+++ b/c_glib/arrow-glib/ipc-options.h
@@ -19,7 +19,8 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
+
 #include <arrow-glib/version.h>
 
 G_BEGIN_DECLS
diff --git a/c_glib/arrow-glib/memory-pool.h b/c_glib/arrow-glib/memory-pool.h
index 1d7fdc93e20a2..de2a5d717a183 100644
--- a/c_glib/arrow-glib/memory-pool.h
+++ b/c_glib/arrow-glib/memory-pool.h
@@ -19,7 +19,7 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
 
 G_BEGIN_DECLS
 
diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build
index a914002864341..67909ff22c428 100644
--- a/c_glib/arrow-glib/meson.build
+++ b/c_glib/arrow-glib/meson.build
@@ -94,7 +94,6 @@ c_headers = files(
   'error.h',
   'expression.h',
   'field.h',
-  'gobject-type.h',
   'interval.h',
   'memory-pool.h',
   'record-batch.h',
diff --git a/c_glib/arrow-glib/readable.h b/c_glib/arrow-glib/readable.h
index 0481bde7ab074..d0b1f5b6a99ee 100644
--- a/c_glib/arrow-glib/readable.h
+++ b/c_glib/arrow-glib/readable.h
@@ -20,7 +20,6 @@
 #pragma once
 
 #include <arrow-glib/buffer.h>
-#include <arrow-glib/gobject-type.h>
 #include <arrow-glib/version.h>
 
 G_BEGIN_DECLS
diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h
index 061dc5fbb4dc1..08faf86cd09f7 100644
--- a/c_glib/arrow-glib/reader.h
+++ b/c_glib/arrow-glib/reader.h
@@ -19,7 +19,6 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
 #include <arrow-glib/record-batch.h>
 #include <arrow-glib/schema.h>
 #include <arrow-glib/table.h>
diff --git a/c_glib/arrow-glib/table-builder.h b/c_glib/arrow-glib/table-builder.h
index fff58654b2a94..0e13352bbdde3 100644
--- a/c_glib/arrow-glib/table-builder.h
+++ b/c_glib/arrow-glib/table-builder.h
@@ -20,7 +20,6 @@
 #pragma once
 
 #include <arrow-glib/array-builder.h>
-#include <arrow-glib/gobject-type.h>
 #include <arrow-glib/record-batch.h>
 #include <arrow-glib/schema.h>
 
diff --git a/c_glib/arrow-glib/timestamp-parser.h b/c_glib/arrow-glib/timestamp-parser.h
index d5b62bb568a21..05cad54746eeb 100644
--- a/c_glib/arrow-glib/timestamp-parser.h
+++ b/c_glib/arrow-glib/timestamp-parser.h
@@ -19,7 +19,8 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
+
 #include <arrow-glib/version.h>
 
 G_BEGIN_DECLS
diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in
index 01760fbfed1ff..a83c68a2a16dc 100644
--- a/c_glib/arrow-glib/version.h.in
+++ b/c_glib/arrow-glib/version.h.in
@@ -19,8 +19,6 @@
 
 #pragma once
 
-#include <glib.h>
-
 /**
  * SECTION: version
  * @section_id: version-macros
diff --git a/c_glib/arrow-glib/writable-file.h b/c_glib/arrow-glib/writable-file.h
index cf17b7f6ad03a..555705767e4aa 100644
--- a/c_glib/arrow-glib/writable-file.h
+++ b/c_glib/arrow-glib/writable-file.h
@@ -19,7 +19,7 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
 
 G_BEGIN_DECLS
 
diff --git a/c_glib/arrow-glib/writable.h b/c_glib/arrow-glib/writable.h
index 6e550deb32003..a556443967b5a 100644
--- a/c_glib/arrow-glib/writable.h
+++ b/c_glib/arrow-glib/writable.h
@@ -19,7 +19,7 @@
 
 #pragma once
 
-#include <arrow-glib/gobject-type.h>
+#include <glib-object.h>
 
 G_BEGIN_DECLS
 

From 4fe364efa4be98b35964509e0e3d57a421a48039 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 14 Mar 2024 12:15:24 +0900
Subject: [PATCH 531/570] MINOR: [Go] Bump modernc.org/sqlite from 1.21.2 to
 1.29.5 in /go (#40528)

Bumps [modernc.org/sqlite](https://gitlab.com/cznic/sqlite) from 1.21.2 to 1.29.5.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://gitlab.com/cznic/sqlite/commit/569f7680f838c9da4ddc965633f4cab2c852fda6"><code>569f768</code></a> fix builder.json</li>
<li><a href="https://gitlab.com/cznic/sqlite/commit/0c6d7f8503a1ceccdf5b1e4aa6dee8c6e5d20458"><code>0c6d7f8</code></a> add HACKING, builder.json /2</li>
<li><a href="https://gitlab.com/cznic/sqlite/commit/0352b52348bdeaaa270cbfb9dcdbd57b7963e480"><code>0352b52</code></a> add HACKING, builder.json, updates <a href="https://gitlab.com/cznic/sqlite/issues/178">#178</a></li>
<li><a href="https://gitlab.com/cznic/sqlite/commit/c6a836514a2380c8d94a4eb46f1c6e3225675739"><code>c6a8365</code></a> retract v1.29.4, builders have not yet tested the commit</li>
<li><a href="https://gitlab.com/cznic/sqlite/commit/bcde81101ef90c9f677c30d4140cae7bcedeefb0"><code>bcde811</code></a> Merge branch 'ensure-statement-pointer-finalized' into 'master'</li>
<li><a href="https://gitlab.com/cznic/sqlite/commit/37bb58fcd5c24e234fb62bb80a252c4d19048f24"><code>37bb58f</code></a> [bugfix] ensure statement pointers finalized</li>
<li><a href="https://gitlab.com/cznic/sqlite/commit/1c9d08747953d512cc123f3d29bc1ab1449f1fd2"><code>1c9d087</code></a> doc.go: add a note about libc versions, closes <a href="https://gitlab.com/cznic/sqlite/issues/177">#177</a></li>
<li><a href="https://gitlab.com/cznic/sqlite/commit/3605b1ba4d3d62de584445277c2e110558a537b2"><code>3605b1b</code></a> Makefile: adjust the build_all_targets target, updates <a href="https://gitlab.com/cznic/sqlite/issues/177">#177</a></li>
<li><a href="https://gitlab.com/cznic/sqlite/commit/f293f7fe743f821196b3a4f627cf0b72ea0a78cd"><code>f293f7f</code></a> vendor libsqlite3@ v1.1.0, closes <a href="https://gitlab.com/cznic/sqlite/issues/173">#173</a></li>
<li><a href="https://gitlab.com/cznic/sqlite/commit/f49aba7eddcec7d31797e72c67aafb0398970730"><code>f49aba7</code></a> skip recursive -race test on unsupported targets /4</li>
<li>Additional commits viewable in <a href="https://gitlab.com/cznic/sqlite/compare/v1.21.2...v1.29.5">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=modernc.org/sqlite&package-manager=go_modules&previous-version=1.21.2&new-version=1.29.5)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 go/go.mod | 18 ++++++++----------
 go/go.sum | 44 ++++++++++++++++++--------------------------
 2 files changed, 26 insertions(+), 36 deletions(-)

diff --git a/go/go.mod b/go/go.mod
index f1c301170615c..0a68251ea0a1a 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -42,7 +42,7 @@ require (
 	gonum.org/v1/gonum v0.12.0
 	google.golang.org/grpc v1.58.3
 	google.golang.org/protobuf v1.33.0
-	modernc.org/sqlite v1.21.2
+	modernc.org/sqlite v1.29.5
 )
 
 require (
@@ -59,14 +59,15 @@ require (
 	github.com/fatih/color v1.15.0 // indirect
 	github.com/goccy/go-yaml v1.11.0 // indirect
 	github.com/golang/protobuf v1.5.3 // indirect
+	github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
-	github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect
 	github.com/kr/text v0.2.0 // indirect
 	github.com/mattn/go-colorable v0.1.13 // indirect
 	github.com/mattn/go-isatty v0.0.19 // indirect
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
 	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/ncruces/go-strftime v0.1.9 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	github.com/rogpeppe/go-internal v1.9.0 // indirect
@@ -79,13 +80,10 @@ require (
 	golang.org/x/text v0.14.0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20231002182017-d307bd883b97 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	lukechampine.com/uint128 v1.3.0 // indirect
-	modernc.org/cc/v3 v3.40.0 // indirect
-	modernc.org/ccgo/v3 v3.16.13 // indirect
-	modernc.org/libc v1.22.4 // indirect
-	modernc.org/mathutil v1.5.0 // indirect
-	modernc.org/memory v1.5.0 // indirect
-	modernc.org/opt v0.1.3 // indirect
-	modernc.org/strutil v1.1.3 // indirect
+	modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect
+	modernc.org/libc v1.41.0 // indirect
+	modernc.org/mathutil v1.6.0 // indirect
+	modernc.org/memory v1.7.2 // indirect
+	modernc.org/strutil v1.2.0 // indirect
 	modernc.org/token v1.1.0 // indirect
 )
diff --git a/go/go.sum b/go/go.sum
index 12282b39b498f..7d042a4ab6ac7 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -40,11 +40,11 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
 github.com/hamba/avro/v2 v2.17.2 h1:6PKpEWzJfNnvBgn7m2/8WYaDOUASxfDU+Jyb4ojDgFY=
 github.com/hamba/avro/v2 v2.17.2/go.mod h1:Q9YK+qxAhtVrNqOhwlZTATLgLA8qxG2vtvkhK8fJ7Jo=
+github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
+github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
 github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
 github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
-github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
-github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
 github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4=
 github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE=
 github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg=
@@ -60,7 +60,7 @@ github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovk
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
 github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/mattn/go-sqlite3 v1.14.16 h1:yOQRA0RpS5PFz/oikGwBEqvAWhWg5ufRz4ETLjwpU1Y=
+github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs=
 github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY=
 github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI=
@@ -72,11 +72,12 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
 github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
+github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
 github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=
 github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
 github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
@@ -139,27 +140,18 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-lukechampine.com/uint128 v1.3.0 h1:cDdUVfRwDUDovz610ABgFD17nXD4/uDgVHl2sC3+sbo=
-lukechampine.com/uint128 v1.3.0/go.mod h1:c4eWIwlEGaxC/+H1VguhU4PHXNWDCDMUlWdIWl2j1gk=
-modernc.org/cc/v3 v3.40.0 h1:P3g79IUS/93SYhtoeaHW+kRCIrYaxJ27MFPv+7kaTOw=
-modernc.org/cc/v3 v3.40.0/go.mod h1:/bTg4dnWkSXowUO6ssQKnOV0yMVxDYNIsIrzqTFDGH0=
-modernc.org/ccgo/v3 v3.16.13 h1:Mkgdzl46i5F/CNR/Kj80Ri59hC8TKAhZrYSaqvkwzUw=
-modernc.org/ccgo/v3 v3.16.13/go.mod h1:2Quk+5YgpImhPjv2Qsob1DnZ/4som1lJTodubIcoUkY=
-modernc.org/ccorpus v1.11.6 h1:J16RXiiqiCgua6+ZvQot4yUuUy8zxgqbqEEUuGPlISk=
-modernc.org/httpfs v1.0.6 h1:AAgIpFZRXuYnkjftxTAZwMIiwEqAfk8aVB2/oA6nAeM=
-modernc.org/libc v1.22.4 h1:wymSbZb0AlrjdAVX3cjreCHTPCpPARbQXNz6BHPzdwQ=
-modernc.org/libc v1.22.4/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY=
-modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ=
-modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E=
-modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds=
-modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU=
-modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4=
-modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0=
-modernc.org/sqlite v1.21.2 h1:ixuUG0QS413Vfzyx6FWx6PYTmHaOegTY+hjzhn7L+a0=
-modernc.org/sqlite v1.21.2/go.mod h1:cxbLkB5WS32DnQqeH4h4o1B0eMr8W/y8/RGuxQ3JsC0=
-modernc.org/strutil v1.1.3 h1:fNMm+oJklMGYfU9Ylcywl0CO5O6nTfaowNsh2wpPjzY=
-modernc.org/strutil v1.1.3/go.mod h1:MEHNA7PdEnEwLvspRMtWTNnp2nnyvMfkimT1NKNAGbw=
-modernc.org/tcl v1.15.1 h1:mOQwiEK4p7HruMZcwKTZPw/aqtGM4aY00uzWhlKKYws=
+modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE=
+modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 h1:5D53IMaUuA5InSeMu9eJtlQXS2NxAhyWQvkKEgXZhHI=
+modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6/go.mod h1:Qz0X07sNOR1jWYCrJMEnbW/X55x206Q7Vt4mz6/wHp4=
+modernc.org/libc v1.41.0 h1:g9YAc6BkKlgORsUWj+JwqoB1wU3o4DE3bM3yvA3k+Gk=
+modernc.org/libc v1.41.0/go.mod h1:w0eszPsiXoOnoMJgrXjglgLuDy/bt5RR4y3QzUUeodY=
+modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
+modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo=
+modernc.org/memory v1.7.2 h1:Klh90S215mmH8c9gO98QxQFsY+W451E8AnzjoE2ee1E=
+modernc.org/memory v1.7.2/go.mod h1:NO4NVCQy0N7ln+T9ngWqOQfi7ley4vpwvARR+Hjw95E=
+modernc.org/sqlite v1.29.5 h1:8l/SQKAjDtZFo9lkJLdk8g9JEOeYRG4/ghStDCCTiTE=
+modernc.org/sqlite v1.29.5/go.mod h1:S02dvcmm7TnTRvGhv8IGYyLnIt7AS2KPaB1F/71p75U=
+modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA=
+modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0=
 modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
 modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
-modernc.org/z v1.7.0 h1:xkDw/KepgEjeizO2sNco+hqYkU12taxQFqPEmgm1GWE=

From 32e5bdb2d4baa045abe306e279a5ad8512b00a93 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 14 Mar 2024 13:50:34 +0900
Subject: [PATCH 532/570] MINOR: [Go] Bump github.com/apache/thrift from 0.17.0
 to 0.19.0 in /go (#40529)

Bumps [github.com/apache/thrift](https://github.com/apache/thrift) from 0.17.0 to 0.19.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/apache/thrift/releases">github.com/apache/thrift's releases</a>.</em></p>
<blockquote>
<h2>Version 0.19.0</h2>
<p>Please head over to the official release download source:
<a href="http://thrift.apache.org/download">http://thrift.apache.org/download</a></p>
<p>The assets listed below are added by Github based on the release tag and they will therefore not match the checkums published on the Thrift project website.</p>
<h2>Version 0.18.1</h2>
<p>Please head over to the official release download source:
<a href="http://thrift.apache.org/download">http://thrift.apache.org/download</a></p>
<p>The assets listed below are added by Github based on the release tag and they will therefore not match the checkums published on the Thrift project website.</p>
<h2>Version 0.18.0</h2>
<p>Please head over to the <strong>official release download source</strong>:
<a href="http://thrift.apache.org/download">http://thrift.apache.org/download</a></p>
<p>The assets listed below are added by Github based on the release tag and they will therefore not match the checkums published on the Thrift project website.</p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/apache/thrift/blob/master/CHANGES.md">github.com/apache/thrift's changelog</a>.</em></p>
<blockquote>
<h2>0.19.0</h2>
<h3>Known Open Issues (Blocker or Critical)</h3>
<ul>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-3877">THRIFT-3877</a> - C++ library don't work with HTTP (csharp server, cpp client; need cross test enhancement)</li>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5468">THRIFT-5468</a> - Swift service generator doesn't support oneway</li>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5654">THRIFT-5654</a> - LNK4042 and LNK2019 in go_validator_generator.cc</li>
</ul>
<h2>Build Process</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5701">THRIFT-5701</a> - Add dependabot</li>
</ul>
<h2>C++</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5725">THRIFT-5725</a> - Thrift SSL server stops working if the file descriptor returned is zero</li>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5716">THRIFT-5716</a> - TMemoryBuffer resizing might shrink the buffer size due to uint32_t overflow</li>
</ul>
<h2>Compiler (General)</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5690">THRIFT-5690</a> - Constant expects type to be defined before</li>
</ul>
<h2>Delphi</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5686">THRIFT-5686</a> - Add comparer and capacity arguments to container classes</li>
</ul>
<h2>Go</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5731">THRIFT-5731</a> - Handle ErrAbandonRequest automatically</li>
</ul>
<h2>Haxe</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5717">THRIFT-5717</a> - uuid sets and map keys may throw on some Haxe targets</li>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5704">THRIFT-5704</a> - Superfluous block scope in generated write() code</li>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5703">THRIFT-5703</a> - Haxe 4.30 emits &quot;Local variable retval used without being initialized&quot; on generated code</li>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5692">THRIFT-5692</a> - Support for deprecated methods (via annotation)</li>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5707">THRIFT-5707</a> - deprecation warning fixes for @:extern and @:enum</li>
</ul>
<h2>Java</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5700">THRIFT-5700</a> - Migration to JakartaEE and Apache HttpComponents 5</li>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5711">THRIFT-5711</a> - FutureClient does not extend when service extends from another service</li>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5702">THRIFT-5702</a> - Support Java 8</li>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5696">THRIFT-5696</a> - TByteBuffer.java does not allow non-default TConfiguration</li>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5653">THRIFT-5653</a> - Fix Java UUID typeid</li>
</ul>
<h2>JavaScript</h2>
<ul>
<li><a href="https://issues.apache.org/jira/browse/THRIFT-5674">THRIFT-5674</a> - Server implementation exceptions are not sent to client in ES6 promise-style invocation</li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/apache/thrift/commit/5656208a202ca0be4d4dc44125b5ca0485f91bf0"><code>5656208</code></a> release 0.19.0</li>
<li><a href="https://github.com/apache/thrift/commit/291b560c36f7f1a15ab87b19ceef23b0d92e11c6"><code>291b560</code></a> THRIFT-5653: Fix Java UUID typeid</li>
<li><a href="https://github.com/apache/thrift/commit/ee1a7ea35b72ab95445106410343088cf66ac173"><code>ee1a7ea</code></a> THRIFT-5731: Handle ErrAbandonRequest automatically</li>
<li><a href="https://github.com/apache/thrift/commit/4ca47e0362c943e5fd389c61c793986ac99d4e05"><code>4ca47e0</code></a> fix kotlin cross test by downgrading to java 8 (<a href="https://redirect.github.com/apache/thrift/issues/2840">#2840</a>)</li>
<li><a href="https://github.com/apache/thrift/commit/79c56026fc012ba7e3ecb3eefb120dd19047bd5b"><code>79c5602</code></a> Update supported go versions to 1.20 and 1.21</li>
<li><a href="https://github.com/apache/thrift/commit/628023c273d203fc5a38c47f4eaeeb3c9295ef70"><code>628023c</code></a> fix gradle format</li>
<li><a href="https://github.com/apache/thrift/commit/fd60a9a3a167154fdb1b9f10685197a6f469a52f"><code>fd60a9a</code></a> reformat kotlin files</li>
<li><a href="https://github.com/apache/thrift/commit/30b31aaecbfc440e2cdb9cd6a0cdf2d856581519"><code>30b31aa</code></a> Release 0.19.0</li>
<li><a href="https://github.com/apache/thrift/commit/f5a67528d820d9bf1ba194d9403519f032a31b0a"><code>f5a6752</code></a> Release 0.19.0</li>
<li><a href="https://github.com/apache/thrift/commit/bccca9301dedd6403062ef6be4c56533f889e5b5"><code>bccca93</code></a> THRIFT-5725: Fix the comparison check for bio_get_fd</li>
<li>Additional commits viewable in <a href="https://github.com/apache/thrift/compare/v0.17.0...v0.19.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/apache/thrift&package-manager=go_modules&previous-version=0.17.0&new-version=0.19.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go/go.mod b/go/go.mod
index 0a68251ea0a1a..3f53c809e27b2 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -21,7 +21,7 @@ go 1.20
 require (
 	github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c
 	github.com/andybalholm/brotli v1.0.5
-	github.com/apache/thrift v0.17.0
+	github.com/apache/thrift v0.19.0
 	github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815
 	github.com/goccy/go-json v0.10.2
 	github.com/golang/snappy v0.0.4
diff --git a/go/go.sum b/go/go.sum
index 7d042a4ab6ac7..09bb5e8d9a7c4 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -6,8 +6,8 @@ github.com/alecthomas/participle/v2 v2.1.0/go.mod h1:Y1+hAs8DHPmc3YUFzqllV+eSQ9l
 github.com/alecthomas/repr v0.2.0 h1:HAzS41CIzNW5syS8Mf9UwXhNH1J9aix/BvDRf1Ml2Yk=
 github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
 github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
-github.com/apache/thrift v0.17.0 h1:cMd2aj52n+8VoAtvSvLn4kDC3aZ6IAkBuqWQ2IDu7wo=
-github.com/apache/thrift v0.17.0/go.mod h1:OLxhMRJxomX+1I/KUw03qoV3mMz16BwaKI+d4fPBx7Q=
+github.com/apache/thrift v0.19.0 h1:sOqkWPzMj7w6XaYbJQG7m4sGqVolaW/0D28Ln7yPzMk=
+github.com/apache/thrift v0.19.0/go.mod h1:SUALL216IiaOw2Oy+5Vs9lboJ/t9g40C+G07Dc0QC1I=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=

From c6bdedfef03f1e206d626b1271169673dee2bd58 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 14 Mar 2024 13:51:45 +0900
Subject: [PATCH 533/570] MINOR: [Go] Bump github.com/stretchr/testify from
 1.8.4 to 1.9.0 in /go (#40532)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [github.com/stretchr/testify](https://github.com/stretchr/testify) from 1.8.4 to 1.9.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/stretchr/testify/releases">github.com/stretchr/testify's releases</a>.</em></p>
<blockquote>
<h2>v1.9.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Fix Go modules version by <a href="https://github.com/SuperQ"><code>@​SuperQ</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1394">stretchr/testify#1394</a></li>
<li>Document that require is not safe to call in created goroutines by <a href="https://github.com/programmer04"><code>@​programmer04</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1392">stretchr/testify#1392</a></li>
<li>Remove myself from MAINTAINERS.md by <a href="https://github.com/mvdkleijn"><code>@​mvdkleijn</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1367">stretchr/testify#1367</a></li>
<li>Correct spelling/grammar by <a href="https://github.com/echarrod"><code>@​echarrod</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1389">stretchr/testify#1389</a></li>
<li>docs: Update URLs in README by <a href="https://github.com/davidjb"><code>@​davidjb</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1349">stretchr/testify#1349</a></li>
<li>Update mockery link to Github Pages in README by <a href="https://github.com/LandonTClipp"><code>@​LandonTClipp</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1346">stretchr/testify#1346</a></li>
<li>docs: Fix typos in tests and comments by <a href="https://github.com/alexandear"><code>@​alexandear</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1410">stretchr/testify#1410</a></li>
<li>CI: tests from go1.17 by <a href="https://github.com/SuperQ"><code>@​SuperQ</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1409">stretchr/testify#1409</a></li>
<li>Fix adding ? when no values passed by <a href="https://github.com/lesichkovm"><code>@​lesichkovm</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1320">stretchr/testify#1320</a></li>
<li>codegen: use standard header for generated files by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1406">stretchr/testify#1406</a></li>
<li>mock: AssertExpectations log reason only on failure by <a href="https://github.com/hikyaru-suzuki"><code>@​hikyaru-suzuki</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1360">stretchr/testify#1360</a></li>
<li>assert: fix flaky TestNeverTrue by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1417">stretchr/testify#1417</a></li>
<li>README: fix typos &quot;set up&quot; vs &quot;setup&quot; by <a href="https://github.com/ossan-dev"><code>@​ossan-dev</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1428">stretchr/testify#1428</a></li>
<li>mock: move regexp compilation outside of <code>Called</code> by <a href="https://github.com/aud10slave"><code>@​aud10slave</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/631">stretchr/testify#631</a></li>
<li>assert: refactor internal func getLen() by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1445">stretchr/testify#1445</a></li>
<li>mock: deprecate type AnythingOfTypeArgument (<a href="https://redirect.github.com/stretchr/testify/issues/1434">#1434</a>) by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1441">stretchr/testify#1441</a></li>
<li>Remove no longer needed assert.canConvert by <a href="https://github.com/alexandear"><code>@​alexandear</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1470">stretchr/testify#1470</a></li>
<li>assert: ObjectsAreEqual: use time.Equal for time.Time types by <a href="https://github.com/tscales"><code>@​tscales</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1464">stretchr/testify#1464</a></li>
<li>Bump actions/checkout from 3 to 4 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1466">stretchr/testify#1466</a></li>
<li>Bump actions/setup-go from 3.2.0 to 4.1.0 by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1451">stretchr/testify#1451</a></li>
<li>fix: make EventuallyWithT concurrency safe by <a href="https://github.com/czeslavo"><code>@​czeslavo</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1395">stretchr/testify#1395</a></li>
<li>assert: fix httpCode and HTTPBody occur panic when http.Handler read Body by <a href="https://github.com/hidu"><code>@​hidu</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1484">stretchr/testify#1484</a></li>
<li>assert.EqualExportedValues: fix handling of arrays by <a href="https://github.com/zrbecker"><code>@​zrbecker</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1473">stretchr/testify#1473</a></li>
<li>.github: use latest Go versions by <a href="https://github.com/kevinburkesegment"><code>@​kevinburkesegment</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1489">stretchr/testify#1489</a></li>
<li>assert: Deprecate EqualExportedValues by <a href="https://github.com/HaraldNordgren"><code>@​HaraldNordgren</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1488">stretchr/testify#1488</a></li>
<li>suite: refactor test assertions by <a href="https://github.com/alexandear"><code>@​alexandear</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1474">stretchr/testify#1474</a></li>
<li>suite: fix SetupSubTest and TearDownSubTest execution order by <a href="https://github.com/linusbarth"><code>@​linusbarth</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1471">stretchr/testify#1471</a></li>
<li>docs: Fix deprecation comments for http package by <a href="https://github.com/alexandear"><code>@​alexandear</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1335">stretchr/testify#1335</a></li>
<li>Add map support doc comments to Subset and NotSubset by <a href="https://github.com/jedevc"><code>@​jedevc</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1306">stretchr/testify#1306</a></li>
<li>TestErrorIs/TestNotErrorIs: check error message contents by <a href="https://github.com/craig65535"><code>@​craig65535</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1435">stretchr/testify#1435</a></li>
<li>suite: fix subtest names (fix <a href="https://redirect.github.com/stretchr/testify/issues/1501">#1501</a>) by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1504">stretchr/testify#1504</a></li>
<li>assert: improve unsafe.Pointer tests by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1505">stretchr/testify#1505</a></li>
<li>assert: simplify isNil implementation by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1506">stretchr/testify#1506</a></li>
<li>assert.InEpsilonSlice: fix expected/actual order and other improvements by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1483">stretchr/testify#1483</a></li>
<li>Fix dependency cycle with objx <a href="https://redirect.github.com/stretchr/testify/issues/1292">#1292</a> by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1453">stretchr/testify#1453</a></li>
<li>mock: refactor TestIsArgsEqual by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1444">stretchr/testify#1444</a></li>
<li>mock: optimize argument matching checks by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1416">stretchr/testify#1416</a></li>
<li>assert: fix TestEventuallyTimeout by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1412">stretchr/testify#1412</a></li>
<li>CI: add go 1.21 in GitHub Actions by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1450">stretchr/testify#1450</a></li>
<li>suite: fix recoverAndFailOnPanic to report test failure at the right location by <a href="https://github.com/dolmen"><code>@​dolmen</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1502">stretchr/testify#1502</a></li>
<li>Update maintainers by <a href="https://github.com/brackendawson"><code>@​brackendawson</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1533">stretchr/testify#1533</a></li>
<li>assert: Fix EqualValues to handle overflow/underflow by <a href="https://github.com/arjunmahishi"><code>@​arjunmahishi</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1531">stretchr/testify#1531</a></li>
<li>assert: better formatting for Len() error by <a href="https://github.com/kevinburkesegment"><code>@​kevinburkesegment</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1485">stretchr/testify#1485</a></li>
<li>Ensure AssertExpectations does not fail in skipped tests by <a href="https://github.com/ianrose14"><code>@​ianrose14</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1331">stretchr/testify#1331</a></li>
<li>suite: fix deadlock in suite.Require()/Assert() by <a href="https://github.com/arjunmahishi"><code>@​arjunmahishi</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1535">stretchr/testify#1535</a></li>
<li>Revert &quot;assert: ObjectsAreEqual: use time.Equal for time.Time type&quot; by <a href="https://github.com/brackendawson"><code>@​brackendawson</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1537">stretchr/testify#1537</a></li>
<li>[chore] Add issue templates by <a href="https://github.com/arjunmahishi"><code>@​arjunmahishi</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1538">stretchr/testify#1538</a></li>
<li>Update the build status badge by <a href="https://github.com/brackendawson"><code>@​brackendawson</code></a> in <a href="https://redirect.github.com/stretchr/testify/pull/1540">stretchr/testify#1540</a></li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/stretchr/testify/commit/bb548d0473d4e1c9b7bbfd6602c7bf12f7a84dd2"><code>bb548d0</code></a> Merge pull request <a href="https://redirect.github.com/stretchr/testify/issues/1552">#1552</a> from stretchr/dependabot/go_modules/github.com/stret...</li>
<li><a href="https://github.com/stretchr/testify/commit/814075f391adffd2bf2b5110a74c51827ba132c4"><code>814075f</code></a> build(deps): bump github.com/stretchr/objx from 0.5.1 to 0.5.2</li>
<li><a href="https://github.com/stretchr/testify/commit/e0456122451b1839c8d58d32df6364e4d0f0a709"><code>e045612</code></a> Merge pull request <a href="https://redirect.github.com/stretchr/testify/issues/1339">#1339</a> from bogdandrutu/uintptr</li>
<li><a href="https://github.com/stretchr/testify/commit/5b6926d686d412518f50e888b9ae9b938355e011"><code>5b6926d</code></a> Merge pull request <a href="https://redirect.github.com/stretchr/testify/issues/1385">#1385</a> from hslatman/not-implements</li>
<li><a href="https://github.com/stretchr/testify/commit/9f97d67703eff02136d487e6c907e76fdea31a8b"><code>9f97d67</code></a> Merge pull request <a href="https://redirect.github.com/stretchr/testify/issues/1550">#1550</a> from stretchr/release-notes</li>
<li><a href="https://github.com/stretchr/testify/commit/bcb0d3fe49ff300fb78288cc144bc61a881f58ec"><code>bcb0d3f</code></a> Include the auto-release notes in releases</li>
<li><a href="https://github.com/stretchr/testify/commit/fb770f8238261aa22f8e0c56f18168ccb90f4a09"><code>fb770f8</code></a> Merge pull request <a href="https://redirect.github.com/stretchr/testify/issues/1247">#1247</a> from ccoVeille/typos</li>
<li><a href="https://github.com/stretchr/testify/commit/85d8bb6eea715dcbbb68f7c87b50e1956e20f892"><code>85d8bb6</code></a> fix typos in comments, tests and github templates</li>
<li><a href="https://github.com/stretchr/testify/commit/e2741fa4e9bf2fdfe3ed48d976a7eeebe76c5009"><code>e2741fa</code></a> Merge pull request <a href="https://redirect.github.com/stretchr/testify/issues/1548">#1548</a> from arjunmahishi/msgAndArgs</li>
<li><a href="https://github.com/stretchr/testify/commit/6e59f20c0d3883d2bdc589a9e48374ea30601851"><code>6e59f20</code></a> http_assertions: assert that the msgAndArgs actually works in tests</li>
<li>Additional commits viewable in <a href="https://github.com/stretchr/testify/compare/v1.8.4...v1.9.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/stretchr/testify&package-manager=go_modules&previous-version=1.8.4&new-version=1.9.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 go/go.mod |  4 ++--
 go/go.sum | 12 ++++--------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/go/go.mod b/go/go.mod
index 3f53c809e27b2..2623d68c2ead5 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -32,7 +32,7 @@ require (
 	github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8
 	github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3
 	github.com/pierrec/lz4/v4 v4.1.21
-	github.com/stretchr/testify v1.8.4
+	github.com/stretchr/testify v1.9.0
 	github.com/zeebo/xxh3 v1.0.2
 	golang.org/x/exp v0.0.0-20240222234643-814bf88cf225
 	golang.org/x/sync v0.6.0
@@ -71,7 +71,7 @@ require (
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
 	github.com/rogpeppe/go-internal v1.9.0 // indirect
-	github.com/stretchr/objx v0.5.0 // indirect
+	github.com/stretchr/objx v0.5.2 // indirect
 	github.com/tidwall/gjson v1.14.2 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.0 // indirect
diff --git a/go/go.sum b/go/go.sum
index 09bb5e8d9a7c4..61898ff88419b 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -83,14 +83,11 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qq
 github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
 github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
-github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
-github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
-github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
-github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
 github.com/substrait-io/substrait-go v0.4.2 h1:buDnjsb3qAqTaNbOR7VKmNgXf4lYQxWEcnSGUWBtmN8=
 github.com/substrait-io/substrait-go v0.4.2/go.mod h1:qhpnLmrcvAnlZsUyPXZRqldiHapPTXC3t7xFgDi3aQg=
 github.com/tidwall/gjson v1.14.2 h1:6BBkirS0rAHjumnjHF6qgy5d2YAJ1TLIaFE2lzfOLqo=
@@ -137,7 +134,6 @@ google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGm
 google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
-gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE=

From 93eb6cd0acc041fd33ad6fc8928eb73e26f21c19 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 14 Mar 2024 15:04:24 +0900
Subject: [PATCH 534/570] MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.17.2
 to 2.20.0 in /go (#40530)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [github.com/hamba/avro/v2](https://github.com/hamba/avro) from 2.17.2 to 2.20.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/hamba/avro/releases">github.com/hamba/avro/v2's releases</a>.</em></p>
<blockquote>
<h2>v2.20.0</h2>
<h2>What's Changed</h2>
<ul>
<li>chore: bump the all group with 1 update by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/346">hamba/avro#346</a></li>
<li>feat: optimise generic decodes by <a href="https://github.com/nrwiersma"><code>@​nrwiersma</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/349">hamba/avro#349</a></li>
<li>feat: optimise reading ints and longs by <a href="https://github.com/nrwiersma"><code>@​nrwiersma</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/348">hamba/avro#348</a></li>
<li>chore: bump the all group with 1 update by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/350">hamba/avro#350</a></li>
<li>chore: support go 1.22 by <a href="https://github.com/nrwiersma"><code>@​nrwiersma</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/353">hamba/avro#353</a></li>
<li>fix: schema compatibility fingerprint by <a href="https://github.com/nrwiersma"><code>@​nrwiersma</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/354">hamba/avro#354</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/hamba/avro/compare/v2.19.0...v2.20.0">https://github.com/hamba/avro/compare/v2.19.0...v2.20.0</a></p>
<h2>v2.19.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Move generic decoding to codec level by <a href="https://github.com/redaLaanait"><code>@​redaLaanait</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/336">hamba/avro#336</a></li>
<li>add support for schema evolution by <a href="https://github.com/redaLaanait"><code>@​redaLaanait</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/329">hamba/avro#329</a></li>
<li>Make defaultMaxByteSliceSize equal to 1 MiB by <a href="https://github.com/torwig"><code>@​torwig</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/342">hamba/avro#342</a></li>
<li>feat: support zstandard in ocf by <a href="https://github.com/nrwiersma"><code>@​nrwiersma</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/343">hamba/avro#343</a></li>
<li>feat: support local timestamp logic types by <a href="https://github.com/nrwiersma"><code>@​nrwiersma</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/345">hamba/avro#345</a></li>
<li>fix: enum schema evolution by <a href="https://github.com/redaLaanait"><code>@​redaLaanait</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/344">hamba/avro#344</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/torwig"><code>@​torwig</code></a> made their first contribution in <a href="https://redirect.github.com/hamba/avro/pull/342">hamba/avro#342</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/hamba/avro/compare/v2.18.0...v2.19.0">https://github.com/hamba/avro/compare/v2.18.0...v2.19.0</a></p>
<h2>v2.18.0</h2>
<h2>What's Changed</h2>
<ul>
<li>chore: bump the all group with 1 update by <a href="https://github.com/dependabot"><code>@​dependabot</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/324">hamba/avro#324</a></li>
<li>fix: handle uuid logical type in avrogen by <a href="https://github.com/nrwiersma"><code>@​nrwiersma</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/326">hamba/avro#326</a></li>
<li>feat: support TextMarshaller for map key by <a href="https://github.com/nrwiersma"><code>@​nrwiersma</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/328">hamba/avro#328</a></li>
<li>fix: correct godoc link in README by <a href="https://github.com/hhromic"><code>@​hhromic</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/330">hamba/avro#330</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/hamba/avro/compare/v2.17.2...v2.18.0">https://github.com/hamba/avro/compare/v2.17.2...v2.18.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/hamba/avro/commit/0673db67a378fa6aa3518051df611d05aef37c20"><code>0673db6</code></a> fix: schema compatibility fingerprint (<a href="https://redirect.github.com/hamba/avro/issues/354">#354</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/c51e9548ac4c88c448f4b2858ae7640c377107e5"><code>c51e954</code></a> chore: support go 1.22 (<a href="https://redirect.github.com/hamba/avro/issues/353">#353</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/ed6db00bdde26c1b9034d05761db6530748133cd"><code>ed6db00</code></a> chore: bump the all group with 1 update (<a href="https://redirect.github.com/hamba/avro/issues/350">#350</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/832df2244e7faf5737f2ba6f04f9b602cd7d1914"><code>832df22</code></a> feat: optimise reading ints and longs (<a href="https://redirect.github.com/hamba/avro/issues/348">#348</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/aa7f619fd2c0362a8b649c4d7b7f0d3317bb69dd"><code>aa7f619</code></a> feat: optimise generic decodes (<a href="https://redirect.github.com/hamba/avro/issues/349">#349</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/07fb763786f653404eaa37806654842aa72e17a7"><code>07fb763</code></a> chore: bump the all group with 1 update (<a href="https://redirect.github.com/hamba/avro/issues/346">#346</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/23b5cc172c6eefdc34f665e9e4865f7656f4d78c"><code>23b5cc1</code></a> fix: enum schema evolution (<a href="https://redirect.github.com/hamba/avro/issues/344">#344</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/4ad91f8a3c073e73bf556ea60a32e1db69a0c04d"><code>4ad91f8</code></a> feat: support local timestamp logic types (<a href="https://redirect.github.com/hamba/avro/issues/345">#345</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/d25c1c8822296452e9d9d929b2eef3a08e9a1d36"><code>d25c1c8</code></a> feat: support zstandard in ocf (<a href="https://redirect.github.com/hamba/avro/issues/343">#343</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/20c2c8e96d3a25dd1543232355229608d93403f4"><code>20c2c8e</code></a> fix: make defaultMaxByteSliceSize equal to 1 MiB (<a href="https://redirect.github.com/hamba/avro/issues/342">#342</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/hamba/avro/compare/v2.17.2...v2.20.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/hamba/avro/v2&package-manager=go_modules&previous-version=2.17.2&new-version=2.20.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go/go.mod b/go/go.mod
index 2623d68c2ead5..b59d7b71630bc 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -47,7 +47,7 @@ require (
 
 require (
 	github.com/google/uuid v1.6.0
-	github.com/hamba/avro/v2 v2.17.2
+	github.com/hamba/avro/v2 v2.20.0
 	github.com/substrait-io/substrait-go v0.4.2
 	github.com/tidwall/sjson v1.2.5
 )
diff --git a/go/go.sum b/go/go.sum
index 61898ff88419b..9aa8675812768 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -38,8 +38,8 @@ github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/
 github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/hamba/avro/v2 v2.17.2 h1:6PKpEWzJfNnvBgn7m2/8WYaDOUASxfDU+Jyb4ojDgFY=
-github.com/hamba/avro/v2 v2.17.2/go.mod h1:Q9YK+qxAhtVrNqOhwlZTATLgLA8qxG2vtvkhK8fJ7Jo=
+github.com/hamba/avro/v2 v2.20.0 h1:zTOh3qAwt1ahUU6Rq99EP1Ek24abSzMW8aTbyhdIpHM=
+github.com/hamba/avro/v2 v2.20.0/go.mod h1:mp3l5/S+XRRTIz/dscaZprFxWLMBWbcjxw0PqL+6wng=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=

From a1fd4c49648e48475c733ba6010b08f710b69c9b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 14 Mar 2024 15:04:53 +0900
Subject: [PATCH 535/570] MINOR: [Go] Bump github.com/andybalholm/brotli from
 1.0.5 to 1.1.0 in /go (#40531)

Bumps [github.com/andybalholm/brotli](https://github.com/andybalholm/brotli) from 1.0.5 to 1.1.0.
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/andybalholm/brotli/commit/17e5901d050574f228e7d5a3f754a30a7cb55d55"><code>17e5901</code></a> Make my matchfinder work more accessible.</li>
<li><a href="https://github.com/andybalholm/brotli/commit/cf812c06f81910e3ae90490e622d9e35649e84eb"><code>cf812c0</code></a> matchfinder: add M0</li>
<li><a href="https://github.com/andybalholm/brotli/commit/1b6cf3696e719ad6494a1cfb3498b7cb9454350c"><code>1b6cf36</code></a> matchfinder: remove MultiHash</li>
<li><a href="https://github.com/andybalholm/brotli/commit/265f3afc2a20805dd16a7adee7eed6fc30a2b548"><code>265f3af</code></a> matchfinder: penalize score for overlapping matches</li>
<li><a href="https://github.com/andybalholm/brotli/commit/a8d524a96df6d1853526c1e953fc83fc5294f912"><code>a8d524a</code></a> matchfinder: replace Score function with DistanceBitCost</li>
<li><a href="https://github.com/andybalholm/brotli/commit/578645e154bc2f876fc6cdf5c7c4c7bd5036b25c"><code>578645e</code></a> matchfinder: add MultiHash</li>
<li><a href="https://github.com/andybalholm/brotli/commit/24b2bfad2d0c50a17d75dbb35e208adcbc1c4708"><code>24b2bfa</code></a> matchfinder.M4: add Score function</li>
<li><a href="https://github.com/andybalholm/brotli/commit/4a024e3eff5fe82fc6eb6dda0b8d0c4a3d5484ec"><code>4a024e3</code></a> matchfinder.M4: add match chain</li>
<li><a href="https://github.com/andybalholm/brotli/commit/3a1c5cd370951059b6490f869ebc1f16f4559c95"><code>3a1c5cd</code></a> Fix typo in comment.</li>
<li><a href="https://github.com/andybalholm/brotli/commit/0d2aef37af4839ea55889a491d417d9f54c8d268"><code>0d2aef3</code></a> matchfinder.M4: factor out extendMatch2</li>
<li>Additional commits viewable in <a href="https://github.com/andybalholm/brotli/compare/v1.0.5...v1.1.0">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/andybalholm/brotli&package-manager=go_modules&previous-version=1.0.5&new-version=1.1.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go/go.mod b/go/go.mod
index b59d7b71630bc..c021dc51640b8 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -20,7 +20,7 @@ go 1.20
 
 require (
 	github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c
-	github.com/andybalholm/brotli v1.0.5
+	github.com/andybalholm/brotli v1.1.0
 	github.com/apache/thrift v0.19.0
 	github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815
 	github.com/goccy/go-json v0.10.2
diff --git a/go/go.sum b/go/go.sum
index 9aa8675812768..c06635f5d6536 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -4,8 +4,8 @@ github.com/alecthomas/assert/v2 v2.3.0 h1:mAsH2wmvjsuvyBvAmCtm7zFsBlb8mIHx5ySLVd
 github.com/alecthomas/participle/v2 v2.1.0 h1:z7dElHRrOEEq45F2TG5cbQihMtNTv8vwldytDj7Wrz4=
 github.com/alecthomas/participle/v2 v2.1.0/go.mod h1:Y1+hAs8DHPmc3YUFzqllV+eSQ9ljPTk0ZkPMtEdAx2c=
 github.com/alecthomas/repr v0.2.0 h1:HAzS41CIzNW5syS8Mf9UwXhNH1J9aix/BvDRf1Ml2Yk=
-github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
-github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
+github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
+github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
 github.com/apache/thrift v0.19.0 h1:sOqkWPzMj7w6XaYbJQG7m4sGqVolaW/0D28Ln7yPzMk=
 github.com/apache/thrift v0.19.0/go.mod h1:SUALL216IiaOw2Oy+5Vs9lboJ/t9g40C+G07Dc0QC1I=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=

From fd1e9ca81f4d79d746e66963e9f8be7c85ef11d7 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 14 Mar 2024 08:48:47 +0100
Subject: [PATCH 536/570] GH-39444: [Python] Fix parquet import in encryption
 test (#40505)

Small follow-up on https://github.com/apache/arrow/pull/39623 fixing am import in the test.

But first testing here if it actually fails.

* GitHub Issue: #39444

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 dev/tasks/tasks.yml                             | 1 +
 python/pyarrow/tests/test_dataset_encryption.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 4886ddf724ef8..0999335c65ead 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -82,6 +82,7 @@ groups:
 
   python:
     - test-*python*
+    - example-*python*
 
   r:
     - test*-r-*
diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py
index fadbb6108d440..2a631db9fc0fa 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -20,14 +20,16 @@
 import numpy as np
 import pyarrow.fs as fs
 import pyarrow as pa
-import pyarrow.parquet as pq
+
 import pytest
 
 encryption_unavailable = False
 
 try:
+    import pyarrow.parquet as pq
     import pyarrow.dataset as ds
 except ImportError:
+    pq = None
     ds = None
 
 try:

From e268b310639b98ccba0d00dbda04451e645e86b7 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 14 Mar 2024 18:01:15 +0900
Subject: [PATCH 537/570] GH-40542: [Dev][CI] Run pre-commit to all files
 (#40543)

### Rationale for this change

We need to use `--all-files` explicitly.

### What changes are included in this PR?

Use `--all-files` explicitly.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40542

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .github/workflows/dev.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index b58f0d2036432..891996fd1b171 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -55,7 +55,7 @@ jobs:
           key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}
       - name: Run pre-commit
         run: |
-          pre-commit run --show-diff-on-failure --color=always
+          pre-commit run --all-files --color=always --show-diff-on-failure
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build

From bad7b1af8c5bb44c433c78caed8700f96ef2b9f2 Mon Sep 17 00:00:00 2001
From: Alenka Frim <AlenkaF@users.noreply.github.com>
Date: Thu, 14 Mar 2024 12:15:57 +0100
Subject: [PATCH 538/570] GH-40297: [C++] Add TensorFromJSON helper function
 (#40365)

### Rationale for this change

To make tests easier to write and read, we should create a `TensorFromJSON()` helper function.

* GitHub Issue: #40297

Lead-authored-by: AlenkaF <frim.alenka@gmail.com>
Co-authored-by: Alenka Frim <AlenkaF@users.noreply.github.com>
Co-authored-by: Rok Mihevc <rok@mihevc.org>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
---
 cpp/src/arrow/record_batch_test.cc       | 52 ++++++++----------------
 cpp/src/arrow/testing/gtest_util.cc      | 43 ++++++++++++++++++++
 cpp/src/arrow/testing/gtest_util.h       | 13 ++++++
 cpp/src/arrow/testing/gtest_util_test.cc | 37 +++++++++++++++++
 4 files changed, 110 insertions(+), 35 deletions(-)

diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc
index 05a20aa487abc..db68a9a93790d 100644
--- a/cpp/src/arrow/record_batch_test.cc
+++ b/cpp/src/arrow/record_batch_test.cc
@@ -705,17 +705,12 @@ TEST_F(TestRecordBatch, ToTensorSupportedNaN) {
   std::vector<int64_t> shape = {9, 2};
   const int64_t f32_size = sizeof(float);
   std::vector<int64_t> f_strides = {f32_size, f32_size * shape[0]};
-  std::vector<float> f_values = {
-      static_cast<float>(NAN), 2,  3,  4,  5, 6, 7, 8, 9, 10, 20, 30, 40,
-      static_cast<float>(NAN), 60, 70, 80, 90};
-  auto data = Buffer::Wrap(f_values);
-
-  std::shared_ptr<Tensor> tensor_expected;
-  ASSERT_OK_AND_ASSIGN(tensor_expected, Tensor::Make(float32(), data, shape, f_strides));
+  std::shared_ptr<Tensor> tensor_expected = TensorFromJSON(
+      float32(), "[NaN, 2,  3,  4,  5, 6, 7, 8, 9, 10, 20, 30, 40, NaN, 60, 70, 80, 90]",
+      shape, f_strides);
 
   EXPECT_FALSE(tensor_expected->Equals(*tensor));
   EXPECT_TRUE(tensor_expected->Equals(*tensor, EqualOptions().nans_equal(true)));
-
   CheckTensor<FloatType>(tensor, 18, shape, f_strides);
 }
 
@@ -752,15 +747,11 @@ TYPED_TEST_P(TestBatchToTensor, SupportedTypes) {
 
   std::vector<int64_t> shape = {9, 3};
   std::vector<int64_t> f_strides = {unit_size, unit_size * shape[0]};
-  std::vector<c_data_type> f_values = {1,   2,   3,   4,   5,   6,   7,   8,   9,
-                                       10,  20,  30,  40,  50,  60,  70,  80,  90,
-                                       100, 100, 100, 100, 100, 100, 100, 100, 100};
-  auto data = Buffer::Wrap(f_values);
-
-  std::shared_ptr<Tensor> tensor_expected;
-  ASSERT_OK_AND_ASSIGN(
-      tensor_expected,
-      Tensor::Make(TypeTraits<DataType>::type_singleton(), data, shape, f_strides));
+  std::shared_ptr<Tensor> tensor_expected = TensorFromJSON(
+      TypeTraits<DataType>::type_singleton(),
+      "[1,   2,   3,   4,   5,   6,   7,   8,   9, 10,  20,  30,  40,  50,  60,  70,  "
+      "80,  90, 100, 100, 100, 100, 100, 100, 100, 100, 100]",
+      shape, f_strides);
 
   EXPECT_TRUE(tensor_expected->Equals(*tensor));
   CheckTensor<DataType>(tensor, 27, shape, f_strides);
@@ -773,15 +764,11 @@ TYPED_TEST_P(TestBatchToTensor, SupportedTypes) {
 
   std::vector<int64_t> shape_sliced = {8, 3};
   std::vector<int64_t> f_strides_sliced = {unit_size, unit_size * shape_sliced[0]};
-  std::vector<c_data_type> f_values_sliced = {2,   3,   4,   5,   6,   7,   8,   9,
-                                              20,  30,  40,  50,  60,  70,  80,  90,
-                                              100, 100, 100, 100, 100, 100, 100, 100};
-  auto data_sliced = Buffer::Wrap(f_values_sliced);
-
-  std::shared_ptr<Tensor> tensor_expected_sliced;
-  ASSERT_OK_AND_ASSIGN(tensor_expected_sliced,
-                       Tensor::Make(TypeTraits<DataType>::type_singleton(), data_sliced,
-                                    shape_sliced, f_strides_sliced));
+  std::shared_ptr<Tensor> tensor_expected_sliced =
+      TensorFromJSON(TypeTraits<DataType>::type_singleton(),
+                     "[2,   3,   4,   5,   6,   7,   8,   9, 20,  30,  40,  50,  60,  "
+                     "70,  80,  90, 100, 100, 100, 100, 100, 100, 100, 100]",
+                     shape_sliced, f_strides_sliced);
 
   EXPECT_TRUE(tensor_expected_sliced->Equals(*tensor_sliced));
   CheckTensor<DataType>(tensor_expected_sliced, 24, shape_sliced, f_strides_sliced);
@@ -793,15 +780,10 @@ TYPED_TEST_P(TestBatchToTensor, SupportedTypes) {
 
   std::vector<int64_t> shape_sliced_1 = {5, 3};
   std::vector<int64_t> f_strides_sliced_1 = {unit_size, unit_size * shape_sliced_1[0]};
-  std::vector<c_data_type> f_values_sliced_1 = {
-      2, 3, 4, 5, 6, 20, 30, 40, 50, 60, 100, 100, 100, 100, 100,
-  };
-  auto data_sliced_1 = Buffer::Wrap(f_values_sliced_1);
-
-  std::shared_ptr<Tensor> tensor_expected_sliced_1;
-  ASSERT_OK_AND_ASSIGN(tensor_expected_sliced_1,
-                       Tensor::Make(TypeTraits<DataType>::type_singleton(), data_sliced_1,
-                                    shape_sliced_1, f_strides_sliced_1));
+  std::shared_ptr<Tensor> tensor_expected_sliced_1 =
+      TensorFromJSON(TypeTraits<DataType>::type_singleton(),
+                     "[2, 3, 4, 5, 6, 20, 30, 40, 50, 60, 100, 100, 100, 100, 100]",
+                     shape_sliced_1, f_strides_sliced_1);
 
   EXPECT_TRUE(tensor_expected_sliced_1->Equals(*tensor_sliced_1));
   CheckTensor<DataType>(tensor_expected_sliced_1, 15, shape_sliced_1, f_strides_sliced_1);
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 37865948882da..95de16c715f19 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -50,9 +50,11 @@
 #include "arrow/compute/api_vector.h"
 #include "arrow/datum.h"
 #include "arrow/ipc/json_simple.h"
+#include "arrow/json/rapidjson_defs.h"  // IWYU pragma: keep
 #include "arrow/pretty_print.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
+#include "arrow/tensor.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/config.h"
@@ -62,6 +64,10 @@
 #include "arrow/util/thread_pool.h"
 #include "arrow/util/windows_compatibility.h"
 
+#include <rapidjson/document.h>
+
+namespace rj = arrow::rapidjson;
+
 namespace arrow {
 
 using internal::checked_cast;
@@ -425,6 +431,43 @@ std::shared_ptr<Table> TableFromJSON(const std::shared_ptr<Schema>& schema,
   return *Table::FromRecordBatches(schema, std::move(batches));
 }
 
+std::shared_ptr<Tensor> TensorFromJSON(const std::shared_ptr<DataType>& type,
+                                       std::string_view data, std::string_view shape,
+                                       std::string_view strides,
+                                       std::string_view dim_names) {
+  std::shared_ptr<Array> array = ArrayFromJSON(type, data);
+
+  rj::Document json_shape;
+  json_shape.Parse(shape.data(), shape.length());
+  std::vector<int64_t> shape_vector;
+  for (auto& x : json_shape.GetArray()) {
+    shape_vector.emplace_back(x.GetInt64());
+  }
+  rj::Document json_strides;
+  json_strides.Parse(strides.data(), strides.length());
+  std::vector<int64_t> strides_vector;
+  for (auto& x : json_strides.GetArray()) {
+    strides_vector.emplace_back(x.GetInt64());
+  }
+  rj::Document json_dim_names;
+  json_dim_names.Parse(dim_names.data(), dim_names.length());
+  std::vector<std::string> dim_names_vector;
+  for (auto& x : json_dim_names.GetArray()) {
+    dim_names_vector.emplace_back(x.GetString());
+  }
+  return *Tensor::Make(type, array->data()->buffers[1], shape_vector, strides_vector,
+                       dim_names_vector);
+}
+
+std::shared_ptr<Tensor> TensorFromJSON(const std::shared_ptr<DataType>& type,
+                                       std::string_view data,
+                                       const std::vector<int64_t>& shape,
+                                       const std::vector<int64_t>& strides,
+                                       const std::vector<std::string>& dim_names) {
+  std::shared_ptr<Array> array = ArrayFromJSON(type, data);
+  return *Tensor::Make(type, array->data()->buffers[1], shape, strides, dim_names);
+}
+
 Result<std::shared_ptr<Table>> RunEndEncodeTableColumns(
     const Table& table, const std::vector<int>& column_indices) {
   const int num_columns = table.num_columns();
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 916067d85b753..85b4c1f1f0138 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -354,6 +354,19 @@ ARROW_TESTING_EXPORT
 std::shared_ptr<Table> TableFromJSON(const std::shared_ptr<Schema>&,
                                      const std::vector<std::string>& json);
 
+ARROW_TESTING_EXPORT
+std::shared_ptr<Tensor> TensorFromJSON(const std::shared_ptr<DataType>& type,
+                                       std::string_view data, std::string_view shape,
+                                       std::string_view strides = "[]",
+                                       std::string_view dim_names = "[]");
+
+ARROW_TESTING_EXPORT
+std::shared_ptr<Tensor> TensorFromJSON(const std::shared_ptr<DataType>& type,
+                                       std::string_view data,
+                                       const std::vector<int64_t>& shape,
+                                       const std::vector<int64_t>& strides = {},
+                                       const std::vector<std::string>& dim_names = {});
+
 ARROW_TESTING_EXPORT
 Result<std::shared_ptr<Table>> RunEndEncodeTableColumns(
     const Table& table, const std::vector<int>& column_indices);
diff --git a/cpp/src/arrow/testing/gtest_util_test.cc b/cpp/src/arrow/testing/gtest_util_test.cc
index 14c17a972aa06..9b4514197d776 100644
--- a/cpp/src/arrow/testing/gtest_util_test.cc
+++ b/cpp/src/arrow/testing/gtest_util_test.cc
@@ -21,6 +21,7 @@
 #include "arrow/array/builder_decimal.h"
 #include "arrow/datum.h"
 #include "arrow/record_batch.h"
+#include "arrow/tensor.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
@@ -134,4 +135,40 @@ TEST_F(TestAssertContainsNaN, DatumEqual) {
   AssertDatumsEqual(expected_chunked, actual_chunked);
 }
 
+class TestTensorFromJSON : public ::testing::Test {};
+
+TEST_F(TestTensorFromJSON, FromJSONAndArray) {
+  std::vector<int64_t> shape = {9, 2};
+  const int64_t i64_size = sizeof(int64_t);
+  std::vector<int64_t> f_strides = {i64_size, i64_size * shape[0]};
+  std::vector<int64_t> f_values = {1,  2,  3,  4,  5,  6,  7,  8,  9,
+                                   10, 20, 30, 40, 50, 60, 70, 80, 90};
+  auto data = Buffer::Wrap(f_values);
+
+  std::shared_ptr<Tensor> tensor_expected;
+  ASSERT_OK_AND_ASSIGN(tensor_expected, Tensor::Make(int64(), data, shape, f_strides));
+
+  std::shared_ptr<Tensor> result = TensorFromJSON(
+      int64(), "[1, 2,  3,  4,  5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90]",
+      shape, f_strides);
+
+  EXPECT_TRUE(tensor_expected->Equals(*result));
+}
+
+TEST_F(TestTensorFromJSON, FromJSON) {
+  std::vector<int64_t> shape = {9, 2};
+  std::vector<int64_t> values = {1,  2,  3,  4,  5,  6,  7,  8,  9,
+                                 10, 20, 30, 40, 50, 60, 70, 80, 90};
+  auto data = Buffer::Wrap(values);
+
+  std::shared_ptr<Tensor> tensor_expected;
+  ASSERT_OK_AND_ASSIGN(tensor_expected, Tensor::Make(int64(), data, shape));
+
+  std::shared_ptr<Tensor> result = TensorFromJSON(
+      int64(), "[1, 2,  3,  4,  5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90]",
+      "[9, 2]");
+
+  EXPECT_TRUE(tensor_expected->Equals(*result));
+}
+
 }  // namespace arrow

From 49968d56bc39f100544349269dd0ce8674ef1435 Mon Sep 17 00:00:00 2001
From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com>
Date: Fri, 15 Mar 2024 00:42:54 +0800
Subject: [PATCH 539/570] GH-40306: [C++] Fix a wrong total_bytes to generate
 StringType's test data in vector_hash_benchmark (#40307)

### Rationale for this change
Fix a wrong total_bytes to generate StringType's test data in vector_hash_benchmark

### What changes are included in this PR?
Use  `params.length*byte_width` as the length of `std::vector<uint8_t> uniques` array.

### Are these changes tested?

### Are there any user-facing changes?
No

* GitHub Issue: #40306

Authored-by: hugo.zhang <hugo.zhang@openpie.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 .../compute/kernels/vector_hash_benchmark.cc  | 52 +++++--------------
 1 file changed, 12 insertions(+), 40 deletions(-)

diff --git a/cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc
index 472f50db8cf92..3200f7a469ad7 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc
@@ -87,32 +87,23 @@ struct HashBenchCase {
   double null_probability;
 };
 
-template <typename Type>
+template <typename ArrowType>
 struct HashParams {
-  using T = typename Type::c_type;
-
+  using CType = typename TypeTraits<ArrowType>::CType;
   HashBenchCase params;
 
   void GenerateTestData(std::shared_ptr<Array>* arr) const {
-    std::vector<int64_t> draws;
-    std::vector<T> values;
-    std::vector<bool> is_valid;
-    randint<int64_t>(params.length, 0, params.num_unique, &draws);
-    for (int64_t draw : draws) {
-      values.push_back(static_cast<T>(draw));
-    }
-    if (params.null_probability > 0) {
-      random_is_valid(params.length, params.null_probability, &is_valid);
-      ArrayFromVector<Type, T>(is_valid, values, arr);
-    } else {
-      ArrayFromVector<Type, T>(values, arr);
-    }
+    random::RandomArrayGenerator rand(0);
+    auto min = static_cast<CType>(0);
+    auto max = static_cast<CType>(params.num_unique);
+
+    *arr = rand.Numeric<ArrowType>(params.length, min, max, params.null_probability);
   }
 
   void SetMetadata(benchmark::State& state) const {
     state.counters["null_percent"] = params.null_probability * 100;
     state.counters["num_unique"] = static_cast<double>(params.num_unique);
-    state.SetBytesProcessed(state.iterations() * params.length * sizeof(T));
+    state.SetBytesProcessed(state.iterations() * params.length * sizeof(CType));
     state.SetItemsProcessed(state.iterations() * params.length);
   }
 };
@@ -122,29 +113,10 @@ struct HashParams<StringType> {
   HashBenchCase params;
   int32_t byte_width;
   void GenerateTestData(std::shared_ptr<Array>* arr) const {
-    std::vector<int64_t> draws;
-    randint<int64_t>(params.length, 0, params.num_unique, &draws);
-
-    const int64_t total_bytes = this->byte_width * params.num_unique;
-    std::vector<uint8_t> uniques(total_bytes);
-    const uint32_t seed = 0;
-    random_bytes(total_bytes, seed, uniques.data());
-
-    std::vector<bool> is_valid;
-    if (params.null_probability > 0) {
-      random_is_valid(params.length, params.null_probability, &is_valid);
-    }
-
-    StringBuilder builder;
-    for (int64_t i = 0; i < params.length; ++i) {
-      if (params.null_probability == 0 || is_valid[i]) {
-        ABORT_NOT_OK(builder.Append(uniques.data() + this->byte_width * draws[i],
-                                    this->byte_width));
-      } else {
-        ABORT_NOT_OK(builder.AppendNull());
-      }
-    }
-    ABORT_NOT_OK(builder.Finish(arr));
+    random::RandomArrayGenerator rnd(/*seed=*/0);
+    *arr = rnd.StringWithRepeats(
+        params.length, params.num_unique, /*min_length=*/this->byte_width,
+        /*max_length=*/this->byte_width, params.null_probability);
   }
 
   void SetMetadata(benchmark::State& state) const {

From e0f2bd2fd46002267293217b8c8db61c8cddd558 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Fri, 15 Mar 2024 01:46:00 +0900
Subject: [PATCH 540/570] GH-40540: [CI][C++] Don't install FlatBuffers
 (#40541)

### Rationale for this change

We don't need it because we're using bundled one.

### What changes are included in this PR?

Remove FlatBuffers install commands.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40540

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 ci/conda_env_cpp.txt               | 1 -
 ci/docker/fedora-39-cpp.dockerfile | 1 -
 ci/scripts/msys2_setup.sh          | 1 -
 ci/vcpkg/vcpkg.json                | 1 -
 4 files changed, 4 deletions(-)

diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt
index b8c792008a958..52e456eaab0cc 100644
--- a/ci/conda_env_cpp.txt
+++ b/ci/conda_env_cpp.txt
@@ -27,7 +27,6 @@ brotli
 bzip2
 c-ares
 cmake
-flatbuffers
 gflags
 glog
 gmock>=1.10.0
diff --git a/ci/docker/fedora-39-cpp.dockerfile b/ci/docker/fedora-39-cpp.dockerfile
index 59db84034bec7..8ecaa6c3ca784 100644
--- a/ci/docker/fedora-39-cpp.dockerfile
+++ b/ci/docker/fedora-39-cpp.dockerfile
@@ -32,7 +32,6 @@ RUN dnf update -y && \
         cmake \
         curl \
         curl-devel \
-        flatbuffers-devel \
         gcc \
         gcc-c++ \
         gflags-devel \
diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh
index 155522342eb9f..e5b08424022de 100755
--- a/ci/scripts/msys2_setup.sh
+++ b/ci/scripts/msys2_setup.sh
@@ -34,7 +34,6 @@ case "${target}" in
     packages+=(${MINGW_PACKAGE_PREFIX}-clang)
     packages+=(${MINGW_PACKAGE_PREFIX}-cmake)
     packages+=(${MINGW_PACKAGE_PREFIX}-double-conversion)
-    packages+=(${MINGW_PACKAGE_PREFIX}-flatbuffers)
     packages+=(${MINGW_PACKAGE_PREFIX}-gflags)
     packages+=(${MINGW_PACKAGE_PREFIX}-grpc)
     packages+=(${MINGW_PACKAGE_PREFIX}-gtest)
diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json
index e86479a7c32fc..58b1382d1ca88 100644
--- a/ci/vcpkg/vcpkg.json
+++ b/ci/vcpkg/vcpkg.json
@@ -17,7 +17,6 @@
     "brotli",
     "bzip2",
     "curl",
-    "flatbuffers",
     "gflags",
     "glog",
     "lz4",

From 9035c4e0efaf07e5314ecef9c1a91e8291534c3b Mon Sep 17 00:00:00 2001
From: Jacob Wujciak-Jens <jacob@wujciak.de>
Date: Thu, 14 Mar 2024 18:27:21 +0100
Subject: [PATCH 541/570] GH-40536: [CI]: Migrate remaining jobs away from
 self-hosted mac runners. (#40537)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change
CRAN no longer builds on 10.13 and gh provides M1 runners now.

### What changes are included in this PR?

use m1 for  verify-rc-binaries-wheels-macos-14-arm64:
and macos-11 for r-binary-packages

### Are these changes tested?
crossbow
* GitHub Issue: #40536

Lead-authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
---
 dev/tasks/r/github.packages.yml               | 10 ++--
 dev/tasks/tasks.yml                           | 20 +++----
 dev/tasks/verify-rc/github.macos.arm64.yml    | 54 -------------------
 ...ithub.macos.amd64.yml => github.macos.yml} |  2 +-
 4 files changed, 18 insertions(+), 68 deletions(-)
 delete mode 100644 dev/tasks/verify-rc/github.macos.arm64.yml
 rename dev/tasks/verify-rc/{github.macos.amd64.yml => github.macos.yml} (98%)

diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml
index f23da3c8773a0..2979f57bb6146 100644
--- a/dev/tasks/r/github.packages.yml
+++ b/dev/tasks/r/github.packages.yml
@@ -151,7 +151,7 @@ jobs:
           PKG_FILE: arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip
           VERSION: {{ '${{ needs.source.outputs.pkg_version }}' }}
         run: |
-          # These files were created by the docker user so we have to chown them 
+          # These files were created by the docker user so we have to chown them
           sudo chown -R $USER:$USER arrow/r/libarrow
 
           cd arrow/r/libarrow/dist
@@ -209,7 +209,7 @@ jobs:
       matrix:
         platform:
           - { runs_on: 'windows-latest', name: "Windows"}
-          - { runs_on: macos-11, name: "macOS x86_64"}
+          - { runs_on: macos-12, name: "macOS x86_64"}
           - { runs_on: macos-14, name: "macOS arm64" }
         r_version: [oldrel, release]
     steps:
@@ -383,10 +383,9 @@ jobs:
       matrix:
         platform:
           - {runs_on: "ubuntu-latest", name: "Linux"}
-          - {runs_on: ["self-hosted", "macos-10.13"] , name: "macOS"}
+          - {runs_on: "macos-12" , name: "macOS"}
     steps:
       - name: Install R
-        if: matrix.platform.name == 'Linux'
         uses: r-lib/actions/setup-r@v2
       {{ macros.github_setup_local_r_repo(false, false)|indent }}
       {{ macros.github_checkout_arrow(action_v="3")|indent }}
@@ -398,6 +397,9 @@ jobs:
       - name: Install R package system dependencies (Linux)
         if: matrix.platform.name == 'Linux'
         run: sudo apt-get install -y libcurl4-openssl-dev libssl-dev
+      - name: Install R package system dependencies (macOS)
+        if: matrix.platform.name == 'macOS'
+        run: brew install sccache openssl curl
       - name: Remove arrow/
         run: |
           rm -rf arrow/
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 0999335c65ead..15b687b2d2fad 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -941,11 +941,11 @@ tasks:
   {% for target in ["cpp", "integration", "python"] %}
   verify-rc-source-{{ target }}-macos-conda-amd64:
     ci: github
-    template: verify-rc/github.macos.amd64.yml
+    template: verify-rc/github.macos.yml
     params:
       target: {{ target }}
       use_conda: True
-      github_runner: "macos-latest"
+      github_runner: "macos-12"
   {% endfor %}
 
   {% for target in ["cpp",
@@ -958,10 +958,10 @@ tasks:
                     "ruby"] %}
   verify-rc-source-{{ target }}-macos-amd64:
     ci: github
-    template: verify-rc/github.macos.amd64.yml
+    template: verify-rc/github.macos.yml
     params:
       target: {{ target }}
-      github_runner: "macos-latest"
+      github_runner: "macos-12"
   {% endfor %}
 
   {% for target in ["cpp",
@@ -973,7 +973,7 @@ tasks:
                     "ruby"] %}
   verify-rc-source-{{ target }}-macos-arm64:
     ci: github
-    template: verify-rc/github.macos.arm64.yml
+    template: verify-rc/github.macos.yml
     params:
       env:
         ARROW_FLIGHT: 0
@@ -987,20 +987,22 @@ tasks:
   {% for macos_version in ["11", "12"] %}
   verify-rc-binaries-wheels-macos-{{ macos_version }}-amd64:
     ci: github
-    template: verify-rc/github.macos.amd64.yml
+    template: verify-rc/github.macos.yml
     params:
       github_runner: "macos-{{ macos_version }}"
       target: "wheels"
   {% endfor %}
 
-  verify-rc-binaries-wheels-macos-11-arm64:
+  {% for macos_version in ["14"] %}
+  verify-rc-binaries-wheels-macos-{{ macos_version }}-arm64:
     ci: github
-    template: verify-rc/github.macos.arm64.yml
+    template: verify-rc/github.macos.yml
     params:
       env:
         PYTEST_ADDOPTS: "-k 'not test_cancellation'"
-      github_runner: ["self-hosted", "macOS", "arm64"]
+      github_runner: "macos-{{ macos_version }}"
       target: "wheels"
+  {% endfor %}
 
   ######################## Windows verification ##############################
 
diff --git a/dev/tasks/verify-rc/github.macos.arm64.yml b/dev/tasks/verify-rc/github.macos.arm64.yml
deleted file mode 100644
index 925f572839e7a..0000000000000
--- a/dev/tasks/verify-rc/github.macos.arm64.yml
+++ /dev/null
@@ -1,54 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-{% import 'macros.jinja' as macros with context %}
-
-{{ macros.github_header() }}
-
-jobs:
-  verify:
-    name: "Verify release candidate on macOS"
-    runs-on: {{ github_runner|default("macos-14") }}
-    {% if env is defined %}
-    env:
-    {% for key, value in env.items() %}
-      {{ key }}: {{ value }}
-    {% endfor %}
-    {% endif %}
-
-    steps:
-      - name: Cleanup
-        shell: bash
-        run: rm -rf arrow
-
-      {{ macros.github_checkout_arrow(fetch_depth=0)|indent }}
-
-      - name: Run verification
-        shell: bash
-        env:
-          TEST_DEFAULT: 0
-          TEST_{{ target|upper }}: 1
-        run: |
-          brew bundle --file=arrow/cpp/Brewfile
-          brew bundle --file=arrow/c_glib/Brewfile
-          # Force node v20 due to GH-39469
-          brew install node@20
-          export PATH="$(brew --prefix node@20)/bin:$PATH"
-          export PATH="$(brew --prefix ruby)/bin:$PATH"
-          export PKG_CONFIG_PATH="$(brew --prefix ruby)/lib/pkgconfig"
-          arrow/dev/release/verify-release-candidate.sh \
-            {{ release|default("") }} {{ rc|default("") }}
diff --git a/dev/tasks/verify-rc/github.macos.amd64.yml b/dev/tasks/verify-rc/github.macos.yml
similarity index 98%
rename from dev/tasks/verify-rc/github.macos.amd64.yml
rename to dev/tasks/verify-rc/github.macos.yml
index 6c31b8b29d013..642cee2bb9aec 100644
--- a/dev/tasks/verify-rc/github.macos.amd64.yml
+++ b/dev/tasks/verify-rc/github.macos.yml
@@ -58,7 +58,7 @@ jobs:
 
       - uses: actions/setup-node@v4
         with:
-          node-version: '18'
+          node-version: '20'
 
       - name: Run verification
         shell: bash

From b235f83ed10bcad174b267113479a24ca045def5 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 14 Mar 2024 14:18:30 -0400
Subject: [PATCH 542/570] GH-38309: [C++] build filesystems as separate modules
 (#39067)

### Rationale for this change

Each filesystem implementation carries unique and potentially heavy dependencies, so it'd be useful to build them separately. Furthermore, one typically doesn't need all of them at the same time and building separate modules would allow them to be dynamically loaded as necessary. Finally, defining this interface allows custom filesystem implementations to be supported seamlessly.

### What changes are included in this PR?

An initial sketch of a registry, with documentation as if the registry were complete to illustrate intended usage.

### Are these changes tested?

A toy module is added and a single unit test too.

### Are there any user-facing changes?

Users would be able to add their own filesystem implementations to the registry

* Closes: #38309

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/examples/arrow/CMakeLists.txt             |  10 +
 .../arrow/filesystem_definition_example.cc    | 151 +++++++++++++
 .../arrow/filesystem_usage_example.cc         |  55 +++++
 cpp/src/arrow/dataset/partition.cc            |   4 +-
 cpp/src/arrow/dataset/partition_test.cc       |   2 +-
 .../engine/substrait/relation_internal.cc     |   4 +-
 cpp/src/arrow/filesystem/CMakeLists.txt       |   4 +-
 cpp/src/arrow/filesystem/azurefs.h            |  16 +-
 cpp/src/arrow/filesystem/filesystem.cc        | 199 +++++++++++++++++-
 cpp/src/arrow/filesystem/filesystem.h         | 162 ++++++++++++--
 cpp/src/arrow/filesystem/filesystem_library.h |  39 ++++
 cpp/src/arrow/filesystem/gcsfs.cc             |   4 +-
 cpp/src/arrow/filesystem/gcsfs.h              |   3 +-
 cpp/src/arrow/filesystem/hdfs.cc              |   2 +-
 cpp/src/arrow/filesystem/hdfs.h               |  21 +-
 cpp/src/arrow/filesystem/hdfs_test.cc         |   2 +-
 cpp/src/arrow/filesystem/localfs.cc           |  18 +-
 cpp/src/arrow/filesystem/localfs.h            |  15 +-
 cpp/src/arrow/filesystem/localfs_test.cc      | 117 +++++++++-
 cpp/src/arrow/filesystem/mockfs.h             |  25 +--
 cpp/src/arrow/filesystem/path_util.cc         |   2 +-
 cpp/src/arrow/filesystem/s3fs.cc              |   2 +-
 cpp/src/arrow/filesystem/s3fs.h               |  21 +-
 .../arrow/filesystem/s3fs_narrative_test.cc   |   6 +-
 cpp/src/arrow/filesystem/s3fs_test.cc         |   2 +-
 cpp/src/arrow/filesystem/util_internal.cc     |   2 +-
 cpp/src/arrow/filesystem/util_internal.h      |   2 +-
 cpp/src/arrow/flight/cookie_internal.cc       |   4 +-
 cpp/src/arrow/flight/transport.h              |   2 +-
 .../flight/transport/grpc/grpc_client.cc      |   4 +-
 .../flight/transport/grpc/grpc_server.cc      |  11 +-
 .../arrow/flight/transport/ucx/ucx_client.cc  |   9 +-
 .../arrow/flight/transport/ucx/ucx_server.cc  |   3 +-
 .../flight/transport/ucx/util_internal.cc     |   2 +-
 .../flight/transport/ucx/util_internal.h      |   2 +-
 cpp/src/arrow/flight/transport_server.h       |   2 +-
 cpp/src/arrow/flight/types.cc                 |   2 +-
 cpp/src/arrow/flight/types.h                  |   6 +-
 cpp/src/arrow/io/hdfs_internal.cc             | 124 ++++-------
 cpp/src/arrow/io/hdfs_internal.h              |  14 +-
 cpp/src/arrow/testing/CMakeLists.txt          |   6 +
 cpp/src/arrow/testing/examplefs.cc            |  38 ++++
 cpp/src/arrow/util/io_util.cc                 |  67 +++++-
 cpp/src/arrow/util/io_util.h                  |  41 +++-
 cpp/src/arrow/util/type_fwd.h                 |   1 +
 cpp/src/arrow/util/uri.cc                     |  33 +--
 cpp/src/arrow/util/uri.h                      |   9 +-
 cpp/src/arrow/util/uri_test.cc                |   6 +-
 cpp/src/arrow/util/visibility.h               |   6 +
 docs/source/cpp/api/filesystem.rst            |  16 +-
 docs/source/cpp/io.rst                        |  57 ++++-
 python/pyarrow/src/arrow/python/filesystem.h  |  24 ++-
 52 files changed, 1100 insertions(+), 279 deletions(-)
 create mode 100644 cpp/examples/arrow/filesystem_definition_example.cc
 create mode 100644 cpp/examples/arrow/filesystem_usage_example.cc
 create mode 100644 cpp/src/arrow/filesystem/filesystem_library.h
 create mode 100644 cpp/src/arrow/testing/examplefs.cc

diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt
index a092a31733f72..a5b69a5d8b4ad 100644
--- a/cpp/examples/arrow/CMakeLists.txt
+++ b/cpp/examples/arrow/CMakeLists.txt
@@ -195,3 +195,13 @@ if(ARROW_GANDIVA)
   endif()
   add_arrow_example(gandiva_example EXTRA_LINK_LIBS ${GANDIVA_EXAMPLE_LINK_LIBS})
 endif()
+
+if(ARROW_FILESYSTEM)
+  add_library(filesystem_definition_example MODULE filesystem_definition_example.cc)
+  target_link_libraries(filesystem_definition_example ${ARROW_EXAMPLE_LINK_LIBS})
+
+  add_arrow_example(filesystem_usage_example)
+  target_compile_definitions(filesystem-usage-example
+                             PUBLIC FILESYSTEM_EXAMPLE_LIBPATH="$<TARGET_FILE:filesystem_definition_example>"
+  )
+endif()
diff --git a/cpp/examples/arrow/filesystem_definition_example.cc b/cpp/examples/arrow/filesystem_definition_example.cc
new file mode 100644
index 0000000000000..efe1bd10470c0
--- /dev/null
+++ b/cpp/examples/arrow/filesystem_definition_example.cc
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/filesystem/filesystem.h>
+#include <arrow/filesystem/filesystem_library.h>
+#include <arrow/io/memory.h>
+#include <arrow/result.h>
+#include <arrow/util/uri.h>
+
+// Demonstrate registering a user-defined Arrow FileSystem outside
+// of the Arrow source tree.
+
+using arrow::Result;
+using arrow::Status;
+namespace io = arrow::io;
+namespace fs = arrow::fs;
+
+class ExampleFileSystem : public fs::FileSystem {
+ public:
+  explicit ExampleFileSystem(const io::IOContext& io_context)
+      : fs::FileSystem{io_context} {}
+
+  // This is a mock filesystem whose root directory contains a single file.
+  // All operations which would mutate will simply raise an error.
+  static constexpr std::string_view kPath = "example_file";
+  static constexpr std::string_view kContents = "hello world";
+  static fs::FileInfo info() {
+    fs::FileInfo info;
+    info.set_path(std::string{kPath});
+    info.set_type(fs::FileType::File);
+    info.set_size(kContents.size());
+    return info;
+  }
+
+  static Status NotFound(std::string_view path) {
+    return Status::IOError("Path does not exist '", path, "'");
+  }
+
+  static Status NoMutation() {
+    return Status::IOError("operations which would mutate are not permitted");
+  }
+
+  Result<std::string> PathFromUri(const std::string& uri_string) const override {
+    ARROW_ASSIGN_OR_RAISE(auto uri, arrow::util::Uri::FromString(uri_string));
+    return uri.path();
+  }
+
+  std::string type_name() const override { return "example"; }
+
+  bool Equals(const FileSystem& other) const override {
+    return type_name() == other.type_name();
+  }
+
+  /// \cond FALSE
+  using FileSystem::CreateDir;
+  using FileSystem::DeleteDirContents;
+  using FileSystem::GetFileInfo;
+  using FileSystem::OpenAppendStream;
+  using FileSystem::OpenOutputStream;
+  /// \endcond
+
+  Result<fs::FileInfo> GetFileInfo(const std::string& path) override {
+    if (path == kPath) {
+      return info();
+    }
+    return NotFound(path);
+  }
+
+  Result<std::vector<fs::FileInfo>> GetFileInfo(const fs::FileSelector& select) override {
+    if (select.base_dir == "/" || select.base_dir == "") {
+      return std::vector<fs::FileInfo>{info()};
+    }
+    if (select.allow_not_found) {
+      return std::vector<fs::FileInfo>{};
+    }
+    return NotFound(select.base_dir);
+  }
+
+  Status CreateDir(const std::string& path, bool recursive) override {
+    return NoMutation();
+  }
+
+  Status DeleteDir(const std::string& path) override { return NoMutation(); }
+
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override {
+    return NoMutation();
+  }
+
+  Status DeleteRootDirContents() override { return NoMutation(); }
+
+  Status DeleteFile(const std::string& path) override { return NoMutation(); }
+
+  Status Move(const std::string& src, const std::string& dest) override {
+    return NoMutation();
+  }
+
+  Status CopyFile(const std::string& src, const std::string& dest) override {
+    return NoMutation();
+  }
+
+  Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+      const std::string& path) override {
+    return OpenInputFile(path);
+  }
+
+  Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+      const std::string& path) override {
+    if (path == kPath) {
+      return io::BufferReader::FromString(std::string{kContents});
+    }
+    return NotFound(path);
+  }
+
+  Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+      const std::string& path,
+      const std::shared_ptr<const arrow::KeyValueMetadata>& metadata) override {
+    return NoMutation();
+  }
+
+  Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+      const std::string& path,
+      const std::shared_ptr<const arrow::KeyValueMetadata>& metadata) override {
+    return NoMutation();
+  }
+};
+
+fs::FileSystemRegistrar kExampleFileSystemModule{
+    "example",
+    [](const arrow::util::Uri& uri, const io::IOContext& io_context,
+       std::string* out_path) -> Result<std::shared_ptr<fs::FileSystem>> {
+      auto fs = std::make_shared<ExampleFileSystem>(io_context);
+      if (out_path) {
+        ARROW_ASSIGN_OR_RAISE(*out_path, fs->PathFromUri(uri.ToString()));
+      }
+      return fs;
+    },
+};
diff --git a/cpp/examples/arrow/filesystem_usage_example.cc b/cpp/examples/arrow/filesystem_usage_example.cc
new file mode 100644
index 0000000000000..e674586bc63e8
--- /dev/null
+++ b/cpp/examples/arrow/filesystem_usage_example.cc
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <iostream>
+
+#include <arrow/filesystem/filesystem.h>
+#include <arrow/result.h>
+
+namespace fs = arrow::fs;
+
+// Demonstrate dynamically loading a user-defined Arrow FileSystem
+
+arrow::Status Execute() {
+  ARROW_RETURN_NOT_OK(arrow::fs::LoadFileSystemFactories(FILESYSTEM_EXAMPLE_LIBPATH));
+
+  std::string uri = "example:///example_file";
+  std::cout << "Uri: " << uri << std::endl;
+
+  std::string path;
+  ARROW_ASSIGN_OR_RAISE(auto fs, arrow::fs::FileSystemFromUri(uri, &path));
+  std::cout << "Path: " << path << std::endl;
+
+  fs::FileSelector sel;
+  sel.base_dir = "/";
+  ARROW_ASSIGN_OR_RAISE(auto infos, fs->GetFileInfo(sel));
+
+  std::cout << "Root directory contains:" << std::endl;
+  for (const auto& info : infos) {
+    std::cout << "- " << info << std::endl;
+  }
+  return arrow::Status::OK();
+}
+
+int main() {
+  auto status = Execute();
+  if (!status.ok()) {
+    std::cerr << "Error occurred : " << status.message() << std::endl;
+    return EXIT_FAILURE;
+  }
+  return EXIT_SUCCESS;
+}
diff --git a/cpp/src/arrow/dataset/partition.cc b/cpp/src/arrow/dataset/partition.cc
index bf7145ca9368d..4d5c6e8ce7a79 100644
--- a/cpp/src/arrow/dataset/partition.cc
+++ b/cpp/src/arrow/dataset/partition.cc
@@ -53,7 +53,7 @@ namespace dataset {
 namespace {
 /// Apply UriUnescape, then ensure the results are valid UTF-8.
 Result<std::string> SafeUriUnescape(std::string_view encoded) {
-  auto decoded = ::arrow::internal::UriUnescape(encoded);
+  auto decoded = ::arrow::util::UriUnescape(encoded);
   if (!util::ValidateUTF8(decoded)) {
     return Status::Invalid("Partition segment was not valid UTF-8 after URL decoding: ",
                            encoded);
@@ -755,7 +755,7 @@ Result<PartitionPathFormat> HivePartitioning::FormatValues(
       // field_index <-> path nesting relation
       segments[i] = name + "=" + hive_options_.null_fallback;
     } else {
-      segments[i] = name + "=" + arrow::internal::UriEscape(values[i]->ToString());
+      segments[i] = name + "=" + arrow::util::UriEscape(values[i]->ToString());
     }
   }
 
diff --git a/cpp/src/arrow/dataset/partition_test.cc b/cpp/src/arrow/dataset/partition_test.cc
index 1b71be15d19f5..9f0bd7c0be040 100644
--- a/cpp/src/arrow/dataset/partition_test.cc
+++ b/cpp/src/arrow/dataset/partition_test.cc
@@ -935,7 +935,7 @@ TEST_F(TestPartitioning, WriteHiveWithSlashesInValues) {
       "experiment/A/f.csv", "experiment/B/f.csv", "experiment/C/k.csv",
       "experiment/M/i.csv"};
   for (auto partition : unique_partitions) {
-    encoded_paths.push_back("part=" + arrow::internal::UriEscape(partition));
+    encoded_paths.push_back("part=" + arrow::util::UriEscape(partition));
   }
 
   ASSERT_EQ(all_dirs.size(), encoded_paths.size());
diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc b/cpp/src/arrow/engine/substrait/relation_internal.cc
index 73f55c27ee834..f15f1a5527b7b 100644
--- a/cpp/src/arrow/engine/substrait/relation_internal.cc
+++ b/cpp/src/arrow/engine/substrait/relation_internal.cc
@@ -67,7 +67,7 @@ namespace arrow {
 using internal::checked_cast;
 using internal::StartsWith;
 using internal::ToChars;
-using internal::UriFromAbsolutePath;
+using util::UriFromAbsolutePath;
 
 namespace engine {
 
@@ -463,7 +463,7 @@ Result<DeclarationInfo> FromProto(const substrait::Rel& rel, const ExtensionSet&
         }
 
         // Extract and parse the read relation's source URI
-        ::arrow::internal::Uri item_uri;
+        ::arrow::util::Uri item_uri;
         switch (item.path_type_case()) {
           case substrait::ReadRel::LocalFiles::FileOrFiles::kUriPath:
             RETURN_NOT_OK(item_uri.Parse(item.uri_path()));
diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt
index b9ed11e7608f3..deac04af72f5a 100644
--- a/cpp/src/arrow/filesystem/CMakeLists.txt
+++ b/cpp/src/arrow/filesystem/CMakeLists.txt
@@ -26,7 +26,9 @@ add_arrow_test(filesystem-test
                filesystem_test.cc
                localfs_test.cc
                EXTRA_LABELS
-               filesystem)
+               filesystem
+               DEFINITIONS
+               ARROW_FILESYSTEM_EXAMPLE_LIBPATH="$<TARGET_FILE:arrow_filesystem_example>")
 
 if(ARROW_BUILD_BENCHMARKS)
   add_arrow_benchmark(localfs_benchmark
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 6218bf574e8dd..c4c4c3fc9fd61 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -252,15 +252,23 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {
 
   bool Equals(const FileSystem& other) const override;
 
+  /// \cond FALSE
+  using FileSystem::CreateDir;
+  using FileSystem::DeleteDirContents;
+  using FileSystem::GetFileInfo;
+  using FileSystem::OpenAppendStream;
+  using FileSystem::OpenOutputStream;
+  /// \endcond
+
   Result<FileInfo> GetFileInfo(const std::string& path) override;
 
   Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
 
-  Status CreateDir(const std::string& path, bool recursive = true) override;
+  Status CreateDir(const std::string& path, bool recursive) override;
 
   Status DeleteDir(const std::string& path) override;
 
-  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
 
   Status DeleteRootDirContents() override;
 
@@ -302,11 +310,11 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {
 
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
 
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
 };
 
 }  // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc
index 1fb74d412988d..c96a5fd2cf280 100644
--- a/cpp/src/arrow/filesystem/filesystem.cc
+++ b/cpp/src/arrow/filesystem/filesystem.cc
@@ -15,9 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <mutex>
+#include <shared_mutex>
 #include <sstream>
+#include <unordered_map>
 #include <utility>
 
+#include "arrow/type_fwd.h"
 #include "arrow/util/config.h"
 
 #include "arrow/filesystem/filesystem.h"
@@ -46,19 +50,22 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/parallel.h"
+#include "arrow/util/string.h"
 #include "arrow/util/uri.h"
 #include "arrow/util/vector.h"
+#include "arrow/util/visibility.h"
 #include "arrow/util/windows_fixup.h"
 
 namespace arrow {
 
 using internal::checked_pointer_cast;
 using internal::TaskHints;
-using internal::Uri;
 using io::internal::SubmitIO;
+using util::Uri;
 
 namespace fs {
 
+using arrow::internal::GetEnvVar;
 using internal::ConcatAbstractPath;
 using internal::EnsureTrailingSlash;
 using internal::GetAbstractPathParent;
@@ -131,7 +138,7 @@ std::string FileInfo::extension() const {
 //////////////////////////////////////////////////////////////////////////
 // FileSystem default method implementations
 
-FileSystem::~FileSystem() {}
+FileSystem::~FileSystem() = default;
 
 Result<std::string> FileSystem::NormalizePath(std::string path) { return path; }
 
@@ -206,6 +213,10 @@ Future<> FileSystem::DeleteDirContentsAsync(const std::string& path,
                          });
 }
 
+Future<> FileSystem::DeleteDirContentsAsync(const std::string& path) {
+  return DeleteDirContentsAsync(path, false);
+}
+
 Result<std::shared_ptr<io::InputStream>> FileSystem::OpenInputStream(
     const FileInfo& info) {
   RETURN_NOT_OK(ValidateInputFileInfo(info));
@@ -282,7 +293,7 @@ SubTreeFileSystem::SubTreeFileSystem(const std::string& base_path,
       base_path_(NormalizeBasePath(base_path, base_fs).ValueOrDie()),
       base_fs_(base_fs) {}
 
-SubTreeFileSystem::~SubTreeFileSystem() {}
+SubTreeFileSystem::~SubTreeFileSystem() = default;
 
 Result<std::string> SubTreeFileSystem::NormalizeBasePath(
     std::string base_path, const std::shared_ptr<FileSystem>& base_fs) {
@@ -677,6 +688,157 @@ Status CopyFiles(const std::shared_ptr<FileSystem>& source_fs,
   return CopyFiles(sources, destinations, io_context, chunk_size, use_threads);
 }
 
+class FileSystemFactoryRegistry {
+ public:
+  static FileSystemFactoryRegistry* GetInstance() {
+    static FileSystemFactoryRegistry registry;
+    if (registry.merged_into_ != nullptr) {
+      return registry.merged_into_;
+    }
+    return &registry;
+  }
+
+  Result<const FileSystemFactory*> FactoryForScheme(const std::string& scheme) {
+    std::shared_lock lock{mutex_};
+    RETURN_NOT_OK(CheckValid());
+
+    auto it = scheme_to_factory_.find(scheme);
+    if (it == scheme_to_factory_.end()) return nullptr;
+
+    return it->second.Map([](const auto& r) { return &r.factory; });
+  }
+
+  Status MergeInto(FileSystemFactoryRegistry* main_registry) {
+    std::unique_lock lock{mutex_};
+    RETURN_NOT_OK(CheckValid());
+
+    std::unique_lock main_lock{main_registry->mutex_};
+    RETURN_NOT_OK(main_registry->CheckValid());
+
+    std::vector<std::string_view> duplicated_schemes;
+    for (auto& [scheme, registered] : scheme_to_factory_) {
+      if (!registered.ok()) {
+        duplicated_schemes.emplace_back(scheme);
+        continue;
+      }
+
+      auto [it, success] =
+          main_registry->scheme_to_factory_.emplace(std::move(scheme), registered);
+      if (success) continue;
+
+      duplicated_schemes.emplace_back(it->first);
+    }
+    scheme_to_factory_.clear();
+    merged_into_ = main_registry;
+
+    if (duplicated_schemes.empty()) return Status::OK();
+    return Status::KeyError("Attempted to register ", duplicated_schemes.size(),
+                            " factories for schemes ['",
+                            arrow::internal::JoinStrings(duplicated_schemes, "', '"),
+                            "'] but those schemes were already registered.");
+  }
+
+  void EnsureFinalized() {
+    std::unique_lock lock{mutex_};
+    if (finalized_) return;
+
+    for (const auto& [_, registered_or_error] : scheme_to_factory_) {
+      if (!registered_or_error.ok()) continue;
+      registered_or_error->finalizer();
+    }
+    finalized_ = true;
+  }
+
+  Status RegisterFactory(std::string scheme, FileSystemFactory factory,
+                         std::function<void()> finalizer, bool defer_error) {
+    std::unique_lock lock{mutex_};
+    RETURN_NOT_OK(CheckValid());
+
+    auto [it, success] = scheme_to_factory_.emplace(
+        std::move(scheme), Registered{std::move(factory), std::move(finalizer)});
+    if (success) {
+      return Status::OK();
+    }
+
+    auto st = Status::KeyError(
+        "Attempted to register factory for "
+        "scheme '",
+        it->first,
+        "' but that scheme is already "
+        "registered.");
+    if (!defer_error) return st;
+
+    it->second = std::move(st);
+    return Status::OK();
+  }
+
+ private:
+  struct Registered {
+    FileSystemFactory factory;
+    std::function<void()> finalizer;
+  };
+
+  Status CheckValid() {
+    if (finalized_) {
+      return Status::Invalid("FileSystem factories were already finalized!");
+    }
+    if (merged_into_ != nullptr) {
+      return Status::Invalid(
+          "FileSystem factories were merged into a different registry!");
+    }
+    return Status::OK();
+  }
+
+  std::shared_mutex mutex_;
+  std::unordered_map<std::string, Result<Registered>> scheme_to_factory_;
+  bool finalized_ = false;
+  FileSystemFactoryRegistry* merged_into_ = nullptr;
+};
+
+Status RegisterFileSystemFactory(std::string scheme, FileSystemFactory factory,
+                                 std::function<void()> finalizer) {
+  return FileSystemFactoryRegistry::GetInstance()->RegisterFactory(
+      std::move(scheme), factory, std::move(finalizer),
+      /*defer_error=*/false);
+}
+
+void EnsureFinalized() { FileSystemFactoryRegistry::GetInstance()->EnsureFinalized(); }
+
+FileSystemRegistrar::FileSystemRegistrar(std::string scheme, FileSystemFactory factory,
+                                         std::function<void()> finalizer) {
+  DCHECK_OK(FileSystemFactoryRegistry::GetInstance()->RegisterFactory(
+      std::move(scheme), std::move(factory), std::move(finalizer),
+      /*defer_error=*/true));
+}
+
+namespace internal {
+void* GetFileSystemRegistry() { return FileSystemFactoryRegistry::GetInstance(); }
+}  // namespace internal
+
+Status LoadFileSystemFactories(const char* libpath) {
+  using ::arrow::internal::GetSymbolAs;
+  using ::arrow::internal::LoadDynamicLibrary;
+
+  ARROW_ASSIGN_OR_RAISE(void* lib, LoadDynamicLibrary(libpath));
+  auto* get_instance =
+      GetSymbolAs<void*()>(lib, "arrow_filesystem_get_registry").ValueOr(nullptr);
+  if (get_instance == nullptr) {
+    // If a third party library is linked such that registry deduplication is not
+    // necessary (for example if built with `ARROW_BUILD_SHARED`), we do not require that
+    // library to export arrow_filesystem_get_registry() since that symbol is not used
+    // except for deduplication.
+    return Status::OK();
+  }
+
+  auto* lib_registry = static_cast<FileSystemFactoryRegistry*>(get_instance());
+  auto* main_registry = FileSystemFactoryRegistry::GetInstance();
+  if (lib_registry != main_registry) {
+    RETURN_NOT_OK(lib_registry->MergeInto(main_registry));
+  }
+
+  return Status::OK();
+}
+
 namespace {
 
 Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,
@@ -685,6 +847,15 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,
                                                           std::string* out_path) {
   const auto scheme = uri.scheme();
 
+  {
+    ARROW_ASSIGN_OR_RAISE(
+        auto* factory,
+        FileSystemFactoryRegistry::GetInstance()->FactoryForScheme(scheme));
+    if (factory != nullptr) {
+      return (*factory)(uri, io_context, out_path);
+    }
+  }
+
   if (scheme == "file") {
     std::string path;
     ARROW_ASSIGN_OR_RAISE(auto options, LocalFileSystemOptions::FromUri(uri, &path));
@@ -708,7 +879,9 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,
     ARROW_ASSIGN_OR_RAISE(auto options, GcsOptions::FromUri(uri, out_path));
     return GcsFileSystem::Make(options, io_context);
 #else
-    return Status::NotImplemented("Got GCS URI but Arrow compiled without GCS support");
+    return Status::NotImplemented(
+        "Got GCS URI but Arrow compiled "
+        "without GCS support");
 #endif
   }
   if (scheme == "hdfs" || scheme == "viewfs") {
@@ -720,7 +893,9 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,
     ARROW_ASSIGN_OR_RAISE(auto hdfs, HadoopFileSystem::Make(options, io_context));
     return hdfs;
 #else
-    return Status::NotImplemented("Got HDFS URI but Arrow compiled without HDFS support");
+    return Status::NotImplemented(
+        "Got HDFS URI but Arrow compiled "
+        "without HDFS support");
 #endif
   }
   if (scheme == "s3") {
@@ -730,13 +905,17 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,
     ARROW_ASSIGN_OR_RAISE(auto s3fs, S3FileSystem::Make(options, io_context));
     return s3fs;
 #else
-    return Status::NotImplemented("Got S3 URI but Arrow compiled without S3 support");
+    return Status::NotImplemented(
+        "Got S3 URI but Arrow compiled "
+        "without S3 support");
 #endif
   }
 
   if (scheme == "mock") {
-    // MockFileSystem does not have an absolute / relative path distinction,
-    // normalize path by removing leading slash.
+    // MockFileSystem does not have an
+    // absolute / relative path distinction,
+    // normalize path by removing leading
+    // slash.
     if (out_path != nullptr) {
       *out_path = std::string(RemoveLeadingSlash(uri.path()));
     }
@@ -772,8 +951,8 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
   if (internal::DetectAbsolutePath(uri_string)) {
     // Normalize path separators
     if (out_path != nullptr) {
-      *out_path =
-          std::string(RemoveTrailingSlash(ToSlashes(uri_string), /*preserve_root=*/true));
+      *out_path = std::string(RemoveTrailingSlash(ToSlashes(uri_string),
+                                                  /*preserve_root=*/true));
     }
     return std::make_shared<LocalFileSystem>();
   }
diff --git a/cpp/src/arrow/filesystem/filesystem.h b/cpp/src/arrow/filesystem/filesystem.h
index 559f1335f12c1..272e42256a388 100644
--- a/cpp/src/arrow/filesystem/filesystem.h
+++ b/cpp/src/arrow/filesystem/filesystem.h
@@ -38,6 +38,8 @@
 namespace arrow {
 namespace fs {
 
+using arrow::util::Uri;
+
 // A system clock time point expressed as a 64-bit (or more) number of
 // nanoseconds since the epoch.
 using TimePoint =
@@ -156,7 +158,11 @@ struct IterationTraits<fs::FileInfoVector> {
 namespace fs {
 
 /// \brief Abstract file system API
-class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem> {
+class ARROW_EXPORT FileSystem
+    /// \cond false
+    : public std::enable_shared_from_this<FileSystem>
+/// \endcond
+{  // NOLINT
  public:
   virtual ~FileSystem();
 
@@ -225,7 +231,8 @@ class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem>
   /// Create a directory and subdirectories.
   ///
   /// This function succeeds if the directory already exists.
-  virtual Status CreateDir(const std::string& path, bool recursive = true) = 0;
+  virtual Status CreateDir(const std::string& path, bool recursive) = 0;
+  Status CreateDir(const std::string& path) { return CreateDir(path, true); }
 
   /// Delete a directory and its contents, recursively.
   virtual Status DeleteDir(const std::string& path) = 0;
@@ -234,12 +241,18 @@ class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem>
   ///
   /// Like DeleteDir, but doesn't delete the directory itself.
   /// Passing an empty path ("" or "/") is disallowed, see DeleteRootDirContents.
-  virtual Status DeleteDirContents(const std::string& path,
-                                   bool missing_dir_ok = false) = 0;
+  virtual Status DeleteDirContents(const std::string& path, bool missing_dir_ok) = 0;
+  Status DeleteDirContents(const std::string& path) {
+    return DeleteDirContents(path, false);
+  }
 
   /// Async version of DeleteDirContents.
-  virtual Future<> DeleteDirContentsAsync(const std::string& path,
-                                          bool missing_dir_ok = false);
+  virtual Future<> DeleteDirContentsAsync(const std::string& path, bool missing_dir_ok);
+
+  /// Async version of DeleteDirContents.
+  ///
+  /// This overload allows missing directories.
+  Future<> DeleteDirContentsAsync(const std::string& path);
 
   /// EXPERIMENTAL: Delete the root directory's contents, recursively.
   ///
@@ -272,6 +285,7 @@ class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem>
   /// Open an input stream for sequential reading.
   virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(
       const std::string& path) = 0;
+
   /// Open an input stream for sequential reading.
   ///
   /// This override assumes the given FileInfo validly represents the file's
@@ -282,6 +296,7 @@ class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem>
   /// Open an input file for random access reading.
   virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
       const std::string& path) = 0;
+
   /// Open an input file for random access reading.
   ///
   /// This override assumes the given FileInfo validly represents the file's
@@ -293,6 +308,7 @@ class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem>
   /// Async version of OpenInputStream
   virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
       const std::string& path);
+
   /// Async version of OpenInputStream
   virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
       const FileInfo& info);
@@ -300,6 +316,7 @@ class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem>
   /// Async version of OpenInputFile
   virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
       const std::string& path);
+
   /// Async version of OpenInputFile
   virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
       const FileInfo& info);
@@ -335,6 +352,9 @@ class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem>
   bool default_async_is_sync_ = true;
 };
 
+using FileSystemFactory = std::function<Result<std::shared_ptr<FileSystem>>(
+    const Uri& uri, const io::IOContext& io_context, std::string* out_path)>;
+
 /// \brief A FileSystem implementation that delegates to another
 /// implementation after prepending a fixed base path.
 ///
@@ -361,17 +381,22 @@ class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
   bool Equals(const FileSystem& other) const override;
 
   /// \cond FALSE
+  using FileSystem::CreateDir;
+  using FileSystem::DeleteDirContents;
   using FileSystem::GetFileInfo;
+  using FileSystem::OpenAppendStream;
+  using FileSystem::OpenOutputStream;
   /// \endcond
+
   Result<FileInfo> GetFileInfo(const std::string& path) override;
   Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
 
   FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
 
-  Status CreateDir(const std::string& path, bool recursive = true) override;
+  Status CreateDir(const std::string& path, bool recursive) override;
 
   Status DeleteDir(const std::string& path) override;
-  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
   Status DeleteRootDirContents() override;
 
   Status DeleteFile(const std::string& path) override;
@@ -399,13 +424,13 @@ class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
 
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
 
  protected:
-  SubTreeFileSystem() {}
+  SubTreeFileSystem() = default;
 
   const std::string base_path_;
   std::shared_ptr<FileSystem> base_fs_;
@@ -433,14 +458,21 @@ class ARROW_EXPORT SlowFileSystem : public FileSystem {
   bool Equals(const FileSystem& other) const override;
   Result<std::string> PathFromUri(const std::string& uri_string) const override;
 
+  /// \cond FALSE
+  using FileSystem::CreateDir;
+  using FileSystem::DeleteDirContents;
   using FileSystem::GetFileInfo;
+  using FileSystem::OpenAppendStream;
+  using FileSystem::OpenOutputStream;
+  /// \endcond
+
   Result<FileInfo> GetFileInfo(const std::string& path) override;
   Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
 
-  Status CreateDir(const std::string& path, bool recursive = true) override;
+  Status CreateDir(const std::string& path, bool recursive) override;
 
   Status DeleteDir(const std::string& path) override;
-  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
   Status DeleteRootDirContents() override;
 
   Status DeleteFile(const std::string& path) override;
@@ -458,16 +490,25 @@ class ARROW_EXPORT SlowFileSystem : public FileSystem {
       const FileInfo& info) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
 
  protected:
   std::shared_ptr<FileSystem> base_fs_;
   std::shared_ptr<io::LatencyGenerator> latencies_;
 };
 
+/// \brief Ensure all registered filesystem implementations are finalized.
+///
+/// Individual finalizers may wait for concurrent calls to finish so as to avoid
+/// race conditions. After this function has been called, all filesystem APIs
+/// will fail with an error.
+///
+/// The user is responsible for synchronization of calls to this function.
+void EnsureFinalized();
+
 /// \defgroup filesystem-factories Functions for creating FileSystem instances
 ///
 /// @{
@@ -477,6 +518,8 @@ class ARROW_EXPORT SlowFileSystem : public FileSystem {
 /// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
 /// "gs" and "gcs".
 ///
+/// Support for other schemes can be added using RegisterFileSystemFactory.
+///
 /// \param[in] uri a URI-based path, ex: file:///some/local/path
 /// \param[out] out_path (optional) Path inside the filesystem.
 /// \return out_fs FileSystem instance.
@@ -489,6 +532,8 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
 /// Recognized schemes are "file", "mock", "hdfs", "viewfs", "s3",
 /// "gs" and "gcs".
 ///
+/// Support for other schemes can be added using RegisterFileSystemFactory.
+///
 /// \param[in] uri a URI-based path, ex: file:///some/local/path
 /// \param[in] io_context an IOContext which will be associated with the filesystem
 /// \param[out] out_path (optional) Path inside the filesystem.
@@ -500,6 +545,8 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
 
 /// \brief Create a new FileSystem by URI
 ///
+/// Support for other schemes can be added using RegisterFileSystemFactory.
+///
 /// Same as FileSystemFromUri, but in addition also recognize non-URIs
 /// and treat them as local filesystem paths.  Only absolute local filesystem
 /// paths are allowed.
@@ -509,6 +556,8 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
 
 /// \brief Create a new FileSystem by URI with a custom IO context
 ///
+/// Support for other schemes can be added using RegisterFileSystemFactory.
+///
 /// Same as FileSystemFromUri, but in addition also recognize non-URIs
 /// and treat them as local filesystem paths.  Only absolute local filesystem
 /// paths are allowed.
@@ -519,6 +568,89 @@ Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
 
 /// @}
 
+/// \defgroup filesystem-factory-registration Helpers for FileSystem registration
+///
+/// @{
+
+/// \brief Register a FileSystem factory
+///
+/// Support for custom URI schemes can be added by registering a factory
+/// for the corresponding FileSystem.
+///
+/// \param[in] scheme a Uri scheme which the factory will handle.
+///            If a factory has already been registered for a scheme,
+///            the new factory will be ignored.
+/// \param[in] factory a function which can produce a FileSystem for Uris which match
+///            scheme.
+/// \param[in] finalizer a function which must be called to finalize the factory before
+///            the process exits, or nullptr if no finalization is necessary.
+/// \return raises KeyError if a name collision occurs.
+ARROW_EXPORT Status RegisterFileSystemFactory(std::string scheme,
+                                              FileSystemFactory factory,
+                                              std::function<void()> finalizer = {});
+
+/// \brief Register FileSystem factories from a shared library
+///
+/// FileSystem implementations may be housed in separate shared libraries and only
+/// registered when the shared library is explicitly loaded. FileSystemRegistrar is
+/// provided to simplify definition of such libraries: each instance at namespace scope
+/// in the library will register a factory for a scheme. Any library which uses
+/// FileSystemRegistrars and which must be dynamically loaded should be loaded using
+/// LoadFileSystemFactories(), which will additionally merge registries are if necessary
+/// (static linkage to arrow can produce isolated registries).
+ARROW_EXPORT Status LoadFileSystemFactories(const char* libpath);
+
+struct ARROW_EXPORT FileSystemRegistrar {
+  /// \brief Register a FileSystem factory at load time
+  ///
+  /// Support for custom URI schemes can be added by registering a factory for the
+  /// corresponding FileSystem. An instance of this helper can be defined at namespace
+  /// scope to cause the factory to be registered at load time.
+  ///
+  /// Global constructors will finish execution before main() starts if the registrar is
+  /// linked into the same binary as main(), or before dlopen()/LoadLibrary() returns if
+  /// the library in which the registrar is defined is dynamically loaded.
+  ///
+  /// \code
+  ///     FileSystemRegistrar kSlowFileSystemModule{
+  ///       "slowfile",
+  ///       [](const Uri& uri, const io::IOContext& io_context, std::string* out_path)
+  ///           ->Result<std::shared_ptr<FileSystem>> {
+  ///         auto local_uri = "file" + uri.ToString().substr(uri.scheme().size());
+  ///         ARROW_ASSIGN_OR_RAISE(auto base_fs,
+  ///             FileSystemFromUri(local_uri, io_context, out_path));
+  ///         double average_latency = 1;
+  ///         int32_t seed = 0xDEADBEEF;
+  ///         ARROW_ASSIGN_OR_RAISE(auto params, uri.query_item());
+  ///         for (const auto& [key, value] : params) {
+  ///           if (key == "average_latency") {
+  ///             average_latency = std::stod(value);
+  ///           }
+  ///           if (key == "seed") {
+  ///             seed = std::stoi(value, nullptr, /*base=*/16);
+  ///           }
+  ///         }
+  ///         return std::make_shared<SlowFileSystem>(base_fs, average_latency, seed);
+  ///     }));
+  /// \endcode
+  ///
+  /// \param[in] scheme a Uri scheme which the factory will handle.
+  ///            If a factory has already been registered for a scheme, the
+  ///            new factory will be ignored.
+  /// \param[in] factory a function which can produce a FileSystem for Uris which match
+  ///            scheme.
+  /// \param[in] finalizer a function which must be called to finalize the factory before
+  ///            the process exits, or nullptr if no finalization is necessary.
+  FileSystemRegistrar(std::string scheme, FileSystemFactory factory,
+                      std::function<void()> finalizer = {});
+};
+
+/// @}
+
+namespace internal {
+ARROW_EXPORT void* GetFileSystemRegistry();
+}  // namespace internal
+
 /// \brief Copy files, including from one FileSystem to another
 ///
 /// If a source and destination are resident in the same FileSystem FileSystem::CopyFile
diff --git a/cpp/src/arrow/filesystem/filesystem_library.h b/cpp/src/arrow/filesystem/filesystem_library.h
new file mode 100644
index 0000000000000..d610c72237a5a
--- /dev/null
+++ b/cpp/src/arrow/filesystem/filesystem_library.h
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/filesystem/filesystem.h"
+
+namespace arrow::fs {
+extern "C" {
+
+// ARROW_FORCE_EXPORT ensures this function's visibility is
+// _declspec(dllexport)/[[gnu::visibility("default")]] even when
+// this header is #included by a non-arrow source, as in a third
+// party filesystem implementation.
+ARROW_FORCE_EXPORT void* arrow_filesystem_get_registry() {
+  // In the case where libarrow is linked statically both to the executable and to a
+  // dynamically loaded filesystem implementation library, the library contains a
+  // duplicate definition of the registry into which the library's instances of
+  // FileSystemRegistrar insert their factories. This function is made accessible to
+  // dlsym/GetProcAddress to enable detection of such duplicate registries and merging
+  // into the registry accessible to the executable.
+  return internal::GetFileSystemRegistry();
+}
+}
+}  // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/gcsfs.cc b/cpp/src/arrow/filesystem/gcsfs.cc
index 1f091e980bf97..97cd391272611 100644
--- a/cpp/src/arrow/filesystem/gcsfs.cc
+++ b/cpp/src/arrow/filesystem/gcsfs.cc
@@ -760,7 +760,7 @@ GcsOptions GcsOptions::FromServiceAccountCredentials(const std::string& json_obj
   return options;
 }
 
-Result<GcsOptions> GcsOptions::FromUri(const arrow::internal::Uri& uri,
+Result<GcsOptions> GcsOptions::FromUri(const arrow::util::Uri& uri,
                                        std::string* out_path) {
   const auto bucket = uri.host();
   auto path = uri.path();
@@ -824,7 +824,7 @@ Result<GcsOptions> GcsOptions::FromUri(const arrow::internal::Uri& uri,
 
 Result<GcsOptions> GcsOptions::FromUri(const std::string& uri_string,
                                        std::string* out_path) {
-  arrow::internal::Uri uri;
+  arrow::util::Uri uri;
   RETURN_NOT_OK(uri.Parse(uri_string));
   return FromUri(uri, out_path);
 }
diff --git a/cpp/src/arrow/filesystem/gcsfs.h b/cpp/src/arrow/filesystem/gcsfs.h
index e4a1edfd6bfb9..f1fbc95bf957c 100644
--- a/cpp/src/arrow/filesystem/gcsfs.h
+++ b/cpp/src/arrow/filesystem/gcsfs.h
@@ -146,8 +146,7 @@ struct ARROW_EXPORT GcsOptions {
   static GcsOptions FromServiceAccountCredentials(const std::string& json_object);
 
   /// Initialize from URIs such as "gs://bucket/object".
-  static Result<GcsOptions> FromUri(const arrow::internal::Uri& uri,
-                                    std::string* out_path);
+  static Result<GcsOptions> FromUri(const arrow::util::Uri& uri, std::string* out_path);
   static Result<GcsOptions> FromUri(const std::string& uri, std::string* out_path);
 };
 
diff --git a/cpp/src/arrow/filesystem/hdfs.cc b/cpp/src/arrow/filesystem/hdfs.cc
index b227aae65d9cd..d59b2a342d723 100644
--- a/cpp/src/arrow/filesystem/hdfs.cc
+++ b/cpp/src/arrow/filesystem/hdfs.cc
@@ -35,7 +35,7 @@ namespace arrow {
 
 using internal::ErrnoFromStatus;
 using internal::ParseValue;
-using internal::Uri;
+using util::Uri;
 
 namespace fs {
 
diff --git a/cpp/src/arrow/filesystem/hdfs.h b/cpp/src/arrow/filesystem/hdfs.h
index 798aac0ea9075..25604a39e3ace 100644
--- a/cpp/src/arrow/filesystem/hdfs.h
+++ b/cpp/src/arrow/filesystem/hdfs.h
@@ -25,8 +25,7 @@
 #include "arrow/io/hdfs.h"
 #include "arrow/util/uri.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
 
 /// Options for the HDFS implementation.
 struct ARROW_EXPORT HdfsOptions {
@@ -51,7 +50,7 @@ struct ARROW_EXPORT HdfsOptions {
 
   bool Equals(const HdfsOptions& other) const;
 
-  static Result<HdfsOptions> FromUri(const ::arrow::internal::Uri& uri);
+  static Result<HdfsOptions> FromUri(const ::arrow::util::Uri& uri);
   static Result<HdfsOptions> FromUri(const std::string& uri);
 };
 
@@ -69,16 +68,21 @@ class ARROW_EXPORT HadoopFileSystem : public FileSystem {
   Result<std::string> PathFromUri(const std::string& uri_string) const override;
 
   /// \cond FALSE
+  using FileSystem::CreateDir;
+  using FileSystem::DeleteDirContents;
   using FileSystem::GetFileInfo;
+  using FileSystem::OpenAppendStream;
+  using FileSystem::OpenOutputStream;
   /// \endcond
+
   Result<FileInfo> GetFileInfo(const std::string& path) override;
   Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
 
-  Status CreateDir(const std::string& path, bool recursive = true) override;
+  Status CreateDir(const std::string& path, bool recursive) override;
 
   Status DeleteDir(const std::string& path) override;
 
-  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
 
   Status DeleteRootDirContents() override;
 
@@ -94,10 +98,10 @@ class ARROW_EXPORT HadoopFileSystem : public FileSystem {
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
 
   /// Create a HdfsFileSystem instance from the given options.
   static Result<std::shared_ptr<HadoopFileSystem>> Make(
@@ -110,5 +114,4 @@ class ARROW_EXPORT HadoopFileSystem : public FileSystem {
   std::unique_ptr<Impl> impl_;
 };
 
-}  // namespace fs
-}  // namespace arrow
+}  // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/hdfs_test.cc b/cpp/src/arrow/filesystem/hdfs_test.cc
index 7ad9e6cd40e65..db5cefef37488 100644
--- a/cpp/src/arrow/filesystem/hdfs_test.cc
+++ b/cpp/src/arrow/filesystem/hdfs_test.cc
@@ -34,7 +34,7 @@
 namespace arrow {
 
 using internal::ErrnoFromStatus;
-using internal::Uri;
+using util::Uri;
 
 namespace fs {
 
diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc
index 01ac946379119..fbb33fd00868b 100644
--- a/cpp/src/arrow/filesystem/localfs.cc
+++ b/cpp/src/arrow/filesystem/localfs.cc
@@ -24,10 +24,10 @@
 #ifdef _WIN32
 #include "arrow/util/windows_compatibility.h"
 #else
-#include <errno.h>
 #include <fcntl.h>
-#include <stdio.h>
 #include <sys/stat.h>
+#include <cerrno>
+#include <cstdio>
 #endif
 
 #include "arrow/filesystem/filesystem.h"
@@ -42,8 +42,7 @@
 #include "arrow/util/uri.h"
 #include "arrow/util/windows_fixup.h"
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
 
 using ::arrow::internal::IOErrorFromErrno;
 #ifdef _WIN32
@@ -217,9 +216,7 @@ Status StatSelector(const PlatformFilename& dir_fn, const FileSelector& select,
 
 }  // namespace
 
-LocalFileSystemOptions LocalFileSystemOptions::Defaults() {
-  return LocalFileSystemOptions();
-}
+LocalFileSystemOptions LocalFileSystemOptions::Defaults() { return {}; }
 
 bool LocalFileSystemOptions::Equals(const LocalFileSystemOptions& other) const {
   return use_mmap == other.use_mmap && directory_readahead == other.directory_readahead &&
@@ -227,7 +224,7 @@ bool LocalFileSystemOptions::Equals(const LocalFileSystemOptions& other) const {
 }
 
 Result<LocalFileSystemOptions> LocalFileSystemOptions::FromUri(
-    const ::arrow::internal::Uri& uri, std::string* out_path) {
+    const ::arrow::util::Uri& uri, std::string* out_path) {
   if (!uri.username().empty() || !uri.password().empty()) {
     return Status::Invalid("Unsupported username or password in local URI: '",
                            uri.ToString(), "'");
@@ -260,7 +257,7 @@ LocalFileSystem::LocalFileSystem(const LocalFileSystemOptions& options,
                                  const io::IOContext& io_context)
     : FileSystem(io_context), options_(options) {}
 
-LocalFileSystem::~LocalFileSystem() {}
+LocalFileSystem::~LocalFileSystem() = default;
 
 Result<std::string> LocalFileSystem::NormalizePath(std::string path) {
   return DoNormalizePath(std::move(path));
@@ -689,5 +686,4 @@ Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenAppendStream(
   return OpenOutputStreamGeneric(path, truncate, append);
 }
 
-}  // namespace fs
-}  // namespace arrow
+}  // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/localfs.h b/cpp/src/arrow/filesystem/localfs.h
index 108530c2b2778..45a3da317f663 100644
--- a/cpp/src/arrow/filesystem/localfs.h
+++ b/cpp/src/arrow/filesystem/localfs.h
@@ -62,7 +62,7 @@ struct ARROW_EXPORT LocalFileSystemOptions {
 
   bool Equals(const LocalFileSystemOptions& other) const;
 
-  static Result<LocalFileSystemOptions> FromUri(const ::arrow::internal::Uri& uri,
+  static Result<LocalFileSystemOptions> FromUri(const ::arrow::util::Uri& uri,
                                                 std::string* out_path);
 };
 
@@ -89,16 +89,21 @@ class ARROW_EXPORT LocalFileSystem : public FileSystem {
   LocalFileSystemOptions options() const { return options_; }
 
   /// \cond FALSE
+  using FileSystem::CreateDir;
+  using FileSystem::DeleteDirContents;
   using FileSystem::GetFileInfo;
+  using FileSystem::OpenAppendStream;
+  using FileSystem::OpenOutputStream;
   /// \endcond
+
   Result<FileInfo> GetFileInfo(const std::string& path) override;
   Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
   FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
 
-  Status CreateDir(const std::string& path, bool recursive = true) override;
+  Status CreateDir(const std::string& path, bool recursive) override;
 
   Status DeleteDir(const std::string& path) override;
-  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
   Status DeleteRootDirContents() override;
 
   Status DeleteFile(const std::string& path) override;
@@ -113,10 +118,10 @@ class ARROW_EXPORT LocalFileSystem : public FileSystem {
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
 
  protected:
   LocalFileSystemOptions options_;
diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc
index 7ce2a56968679..f90833a88d118 100644
--- a/cpp/src/arrow/filesystem/localfs_test.cc
+++ b/cpp/src/arrow/filesystem/localfs_test.cc
@@ -35,14 +35,12 @@
 #include "arrow/util/io_util.h"
 #include "arrow/util/uri.h"
 
-namespace arrow {
-namespace fs {
-namespace internal {
+namespace arrow::fs::internal {
 
 using ::arrow::internal::FileDescriptor;
 using ::arrow::internal::PlatformFilename;
 using ::arrow::internal::TemporaryDir;
-using ::arrow::internal::UriFromAbsolutePath;
+using ::arrow::util::UriFromAbsolutePath;
 
 class LocalFSTestMixin : public ::testing::Test {
  public:
@@ -83,6 +81,108 @@ Result<std::shared_ptr<FileSystem>> FSFromUriOrPath(const std::string& uri,
   return FileSystemFromUriOrPath(uri, out_path);
 }
 
+////////////////////////////////////////////////////////////////////////////
+// Registered FileSystemFactory tests
+
+class SlowFileSystemPublicProps : public SlowFileSystem {
+ public:
+  SlowFileSystemPublicProps(std::shared_ptr<FileSystem> base_fs, double average_latency,
+                            int32_t seed)
+      : SlowFileSystem(base_fs, average_latency, seed),
+        average_latency{average_latency},
+        seed{seed} {}
+  double average_latency;
+  int32_t seed;
+};
+
+Result<std::shared_ptr<FileSystem>> SlowFileSystemFactory(const Uri& uri,
+                                                          const io::IOContext& io_context,
+                                                          std::string* out_path) {
+  auto local_uri = "file" + uri.ToString().substr(uri.scheme().size());
+  ARROW_ASSIGN_OR_RAISE(auto base_fs, FileSystemFromUri(local_uri, io_context, out_path));
+  double average_latency = 1;
+  int32_t seed = 0xDEADBEEF;
+  ARROW_ASSIGN_OR_RAISE(auto params, uri.query_items());
+  for (const auto& [key, value] : params) {
+    if (key == "average_latency") {
+      average_latency = std::stod(value);
+    }
+    if (key == "seed") {
+      seed = std::stoi(value, nullptr, /*base=*/16);
+    }
+  }
+  return std::make_shared<SlowFileSystemPublicProps>(base_fs, average_latency, seed);
+}
+FileSystemRegistrar kSlowFileSystemModule{
+    "slowfile",
+    SlowFileSystemFactory,
+};
+
+TEST(FileSystemFromUri, LinkedRegisteredFactory) {
+  // Since the registrar's definition is in this translation unit (which is linked to the
+  // unit test executable), its factory will be registered be loaded automatically before
+  // main() is entered.
+  std::string path;
+  ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("slowfile:///hey/yo", &path));
+  EXPECT_EQ(path, "/hey/yo");
+  EXPECT_EQ(fs->type_name(), "slow");
+
+  ASSERT_OK_AND_ASSIGN(
+      fs, FileSystemFromUri("slowfile:///hey/yo?seed=42&average_latency=0.5", &path));
+  EXPECT_EQ(path, "/hey/yo");
+  EXPECT_EQ(fs->type_name(), "slow");
+  const auto& slow_fs =
+      ::arrow::internal::checked_cast<const SlowFileSystemPublicProps&>(*fs);
+  EXPECT_EQ(slow_fs.seed, 0x42);
+  EXPECT_EQ(slow_fs.average_latency, 0.5);
+}
+
+TEST(FileSystemFromUri, LoadedRegisteredFactory) {
+  // Since the registrar's definition is in libarrow_filesystem_example.so,
+  // its factory will be registered only after the library is dynamically loaded.
+  std::string path;
+  EXPECT_THAT(FileSystemFromUri("example:///hey/yo", &path), Raises(StatusCode::Invalid));
+
+  EXPECT_THAT(LoadFileSystemFactories(ARROW_FILESYSTEM_EXAMPLE_LIBPATH), Ok());
+
+  ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("example:///hey/yo", &path));
+  EXPECT_EQ(path, "/hey/yo");
+  EXPECT_EQ(fs->type_name(), "local");
+}
+
+TEST(FileSystemFromUri, RuntimeRegisteredFactory) {
+  std::string path;
+  EXPECT_THAT(FileSystemFromUri("slowfile2:///hey/yo", &path),
+              Raises(StatusCode::Invalid));
+
+  EXPECT_THAT(RegisterFileSystemFactory("slowfile2", SlowFileSystemFactory), Ok());
+
+  ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("slowfile2:///hey/yo", &path));
+  EXPECT_EQ(path, "/hey/yo");
+  EXPECT_EQ(fs->type_name(), "slow");
+
+  EXPECT_THAT(
+      RegisterFileSystemFactory("slowfile2", SlowFileSystemFactory),
+      Raises(StatusCode::KeyError,
+             testing::HasSubstr("Attempted to register factory for scheme 'slowfile2' "
+                                "but that scheme is already registered")));
+}
+
+FileSystemRegistrar kSegfaultFileSystemModule[]{
+    {"segfault", nullptr},
+    {"segfault", nullptr},
+    {"segfault", nullptr},
+};
+TEST(FileSystemFromUri, LinkedRegisteredFactoryNameCollision) {
+  // Since multiple registrars are defined in this translation unit which all
+  // register factories for the 'segfault' scheme, using that scheme in FileSystemFromUri
+  // is invalidated and raises KeyError.
+  std::string path;
+  EXPECT_THAT(FileSystemFromUri("segfault:///hey/yo", &path),
+              Raises(StatusCode::KeyError));
+  // other schemes are not affected by the collision
+  EXPECT_THAT(FileSystemFromUri("slowfile:///hey/yo", &path), Ok());
+}
 ////////////////////////////////////////////////////////////////////////////
 // Misc tests
 
@@ -164,7 +264,7 @@ GENERIC_FS_TEST_FUNCTIONS(TestLocalFSGenericMMap);
 template <typename PathFormatter>
 class TestLocalFS : public LocalFSTestMixin {
  public:
-  void SetUp() {
+  void SetUp() override {
     LocalFSTestMixin::SetUp();
     path_formatter_ = PathFormatter();
     local_path_ = EnsureTrailingSlash(path_formatter_(temp_dir_->path().ToString()));
@@ -494,9 +594,4 @@ TYPED_TEST(TestLocalFS, StressGetFileInfoGenerator) {
   }
 }
 
-// TODO Should we test backslash paths on Windows?
-// SubTreeFileSystem isn't compatible with them.
-
-}  // namespace internal
-}  // namespace fs
-}  // namespace arrow
+}  // namespace arrow::fs::internal
diff --git a/cpp/src/arrow/filesystem/mockfs.h b/cpp/src/arrow/filesystem/mockfs.h
index 32d06e5910dfe..5626560e08363 100644
--- a/cpp/src/arrow/filesystem/mockfs.h
+++ b/cpp/src/arrow/filesystem/mockfs.h
@@ -26,9 +26,7 @@
 #include "arrow/filesystem/filesystem.h"
 #include "arrow/util/windows_fixup.h"
 
-namespace arrow {
-namespace fs {
-namespace internal {
+namespace arrow::fs::internal {
 
 struct MockDirInfo {
   std::string full_path;
@@ -68,16 +66,21 @@ class ARROW_EXPORT MockFileSystem : public FileSystem {
   bool Equals(const FileSystem& other) const override;
   Result<std::string> PathFromUri(const std::string& uri_string) const override;
 
-  // XXX It's not very practical to have to explicitly declare inheritance
-  // of default overrides.
+  /// \cond FALSE
+  using FileSystem::CreateDir;
+  using FileSystem::DeleteDirContents;
   using FileSystem::GetFileInfo;
+  using FileSystem::OpenAppendStream;
+  using FileSystem::OpenOutputStream;
+  /// \endcond
+
   Result<FileInfo> GetFileInfo(const std::string& path) override;
   Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
 
-  Status CreateDir(const std::string& path, bool recursive = true) override;
+  Status CreateDir(const std::string& path, bool recursive) override;
 
   Status DeleteDir(const std::string& path) override;
-  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
   Status DeleteRootDirContents() override;
 
   Status DeleteFile(const std::string& path) override;
@@ -92,10 +95,10 @@ class ARROW_EXPORT MockFileSystem : public FileSystem {
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
 
   // Contents-dumping helpers to ease testing.
   // Output is lexicographically-ordered by full path.
@@ -128,6 +131,4 @@ class ARROW_EXPORT MockAsyncFileSystem : public MockFileSystem {
   FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
 };
 
-}  // namespace internal
-}  // namespace fs
-}  // namespace arrow
+}  // namespace arrow::fs::internal
diff --git a/cpp/src/arrow/filesystem/path_util.cc b/cpp/src/arrow/filesystem/path_util.cc
index f38537e59f426..3cac8d3e48da2 100644
--- a/cpp/src/arrow/filesystem/path_util.cc
+++ b/cpp/src/arrow/filesystem/path_util.cc
@@ -356,7 +356,7 @@ bool IsLikelyUri(std::string_view v) {
     // with 36 characters.
     return false;
   }
-  return ::arrow::internal::IsValidUriScheme(v.substr(0, pos));
+  return ::arrow::util::IsValidUriScheme(v.substr(0, pos));
 }
 
 struct Globber::Impl {
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 77fd951ba29c4..2ba64ee22f54f 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -132,8 +132,8 @@ namespace arrow {
 
 using internal::TaskGroup;
 using internal::ToChars;
-using internal::Uri;
 using io::internal::SubmitIO;
+using util::Uri;
 
 namespace fs {
 
diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h
index 13a0abde32318..82d08bc5ea89a 100644
--- a/cpp/src/arrow/filesystem/s3fs.h
+++ b/cpp/src/arrow/filesystem/s3fs.h
@@ -51,7 +51,7 @@ struct ARROW_EXPORT S3ProxyOptions {
   /// Initialize from URI such as http://username:password@host:port
   /// or http://host:port
   static Result<S3ProxyOptions> FromUri(const std::string& uri);
-  static Result<S3ProxyOptions> FromUri(const ::arrow::internal::Uri& uri);
+  static Result<S3ProxyOptions> FromUri(const ::arrow::util::Uri& uri);
 
   bool Equals(const S3ProxyOptions& other) const;
 };
@@ -232,7 +232,7 @@ struct ARROW_EXPORT S3Options {
   /// generate temporary credentials.
   static S3Options FromAssumeRoleWithWebIdentity();
 
-  static Result<S3Options> FromUri(const ::arrow::internal::Uri& uri,
+  static Result<S3Options> FromUri(const ::arrow::util::Uri& uri,
                                    std::string* out_path = NULLPTR);
   static Result<S3Options> FromUri(const std::string& uri,
                                    std::string* out_path = NULLPTR);
@@ -258,19 +258,24 @@ class ARROW_EXPORT S3FileSystem : public FileSystem {
   Result<std::string> PathFromUri(const std::string& uri_string) const override;
 
   /// \cond FALSE
+  using FileSystem::CreateDir;
+  using FileSystem::DeleteDirContents;
+  using FileSystem::DeleteDirContentsAsync;
   using FileSystem::GetFileInfo;
+  using FileSystem::OpenAppendStream;
+  using FileSystem::OpenOutputStream;
   /// \endcond
+
   Result<FileInfo> GetFileInfo(const std::string& path) override;
   Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
 
   FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
 
-  Status CreateDir(const std::string& path, bool recursive = true) override;
+  Status CreateDir(const std::string& path, bool recursive) override;
 
   Status DeleteDir(const std::string& path) override;
-  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
-  Future<> DeleteDirContentsAsync(const std::string& path,
-                                  bool missing_dir_ok = false) override;
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
+  Future<> DeleteDirContentsAsync(const std::string& path, bool missing_dir_ok) override;
   Status DeleteRootDirContents() override;
 
   Status DeleteFile(const std::string& path) override;
@@ -312,11 +317,11 @@ class ARROW_EXPORT S3FileSystem : public FileSystem {
   /// implementing your own background execution strategy.
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
 
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
 
   /// Create a S3FileSystem instance from the given options.
   static Result<std::shared_ptr<S3FileSystem>> Make(
diff --git a/cpp/src/arrow/filesystem/s3fs_narrative_test.cc b/cpp/src/arrow/filesystem/s3fs_narrative_test.cc
index bbb3c32ee6bd2..9322beb45ead2 100644
--- a/cpp/src/arrow/filesystem/s3fs_narrative_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_narrative_test.cc
@@ -48,8 +48,7 @@ DEFINE_string(region, "", "AWS region");
 DEFINE_string(endpoint, "", "Endpoint override (e.g. '127.0.0.1:9000')");
 DEFINE_string(scheme, "https", "Connection scheme");
 
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
 
 #define ASSERT_RAISES_PRINT(context_msg, error_type, expr) \
   do {                                                     \
@@ -247,8 +246,7 @@ void TestMain(int argc, char** argv) {
   ASSERT_OK(FinalizeS3());
 }
 
-}  // namespace fs
-}  // namespace arrow
+}  // namespace arrow::fs
 
 int main(int argc, char** argv) {
   std::stringstream ss;
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index 2ed97c5e9729e..88cc96956e34c 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -75,8 +75,8 @@ namespace fs {
 using ::arrow::internal::checked_pointer_cast;
 using ::arrow::internal::PlatformFilename;
 using ::arrow::internal::ToChars;
-using ::arrow::internal::UriEscape;
 using ::arrow::internal::Zip;
+using ::arrow::util::UriEscape;
 
 using ::arrow::fs::internal::ConnectRetryStrategy;
 using ::arrow::fs::internal::ErrorToStatus;
diff --git a/cpp/src/arrow/filesystem/util_internal.cc b/cpp/src/arrow/filesystem/util_internal.cc
index 8747f9683b90f..d69f6c896d08e 100644
--- a/cpp/src/arrow/filesystem/util_internal.cc
+++ b/cpp/src/arrow/filesystem/util_internal.cc
@@ -30,7 +30,7 @@
 namespace arrow {
 
 using internal::StatusDetailFromErrno;
-using internal::Uri;
+using util::Uri;
 
 namespace fs {
 namespace internal {
diff --git a/cpp/src/arrow/filesystem/util_internal.h b/cpp/src/arrow/filesystem/util_internal.h
index 96cc5178a9f31..74ddf015432d8 100644
--- a/cpp/src/arrow/filesystem/util_internal.h
+++ b/cpp/src/arrow/filesystem/util_internal.h
@@ -28,7 +28,7 @@
 #include "arrow/util/visibility.h"
 
 namespace arrow {
-using internal::Uri;
+using util::Uri;
 namespace fs {
 namespace internal {
 
diff --git a/cpp/src/arrow/flight/cookie_internal.cc b/cpp/src/arrow/flight/cookie_internal.cc
index 921f017313611..8f41106ebce5c 100644
--- a/cpp/src/arrow/flight/cookie_internal.cc
+++ b/cpp/src/arrow/flight/cookie_internal.cc
@@ -159,8 +159,8 @@ CookiePair Cookie::ParseCookieAttribute(const std::string& cookie_header_value,
   }
 
   // Key/Value may be URI-encoded.
-  out_key = arrow::internal::UriUnescape(out_key);
-  out_value = arrow::internal::UriUnescape(out_value);
+  out_key = arrow::util::UriUnescape(out_key);
+  out_value = arrow::util::UriUnescape(out_value);
 
   // Strip outer quotes on the value.
   if (out_value.size() >= 2 && out_value[0] == '"' &&
diff --git a/cpp/src/arrow/flight/transport.h b/cpp/src/arrow/flight/transport.h
index ee7bd01720730..4029aa5223deb 100644
--- a/cpp/src/arrow/flight/transport.h
+++ b/cpp/src/arrow/flight/transport.h
@@ -168,7 +168,7 @@ class ARROW_FLIGHT_EXPORT ClientTransport {
 
   /// Initialize the client.
   virtual Status Init(const FlightClientOptions& options, const Location& location,
-                      const arrow::internal::Uri& uri) = 0;
+                      const arrow::util::Uri& uri) = 0;
   /// Close the client. Once this returns, the client is no longer usable.
   virtual Status Close() = 0;
 
diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
index f7612759e8de6..f799ba761c40d 100644
--- a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
+++ b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
@@ -684,13 +684,13 @@ class GrpcClientImpl : public internal::ClientTransport {
   }
 
   Status Init(const FlightClientOptions& options, const Location& location,
-              const arrow::internal::Uri& uri) override {
+              const arrow::util::Uri& uri) override {
     const std::string& scheme = location.scheme();
 
     std::stringstream grpc_uri;
     std::shared_ptr<::grpc::ChannelCredentials> creds;
     if (scheme == kSchemeGrpc || scheme == kSchemeGrpcTcp || scheme == kSchemeGrpcTls) {
-      grpc_uri << arrow::internal::UriEncodeHost(uri.host()) << ':' << uri.port_text();
+      grpc_uri << arrow::util::UriEncodeHost(uri.host()) << ':' << uri.port_text();
 
       if (scheme == kSchemeGrpcTls) {
         if (options.disable_server_verification) {
diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_server.cc b/cpp/src/arrow/flight/transport/grpc/grpc_server.cc
index a9780b5eeb77e..28fc736aa0088 100644
--- a/cpp/src/arrow/flight/transport/grpc/grpc_server.cc
+++ b/cpp/src/arrow/flight/transport/grpc/grpc_server.cc
@@ -575,8 +575,7 @@ class GrpcServerTransport : public internal::ServerTransport {
         new GrpcServerTransport(base, std::move(memory_manager)));
   }
 
-  Status Init(const FlightServerOptions& options,
-              const arrow::internal::Uri& uri) override {
+  Status Init(const FlightServerOptions& options, const arrow::util::Uri& uri) override {
     grpc_service_.reset(
         new GrpcServiceHandler(options.auth_handler, options.middleware, this));
 
@@ -588,7 +587,7 @@ class GrpcServerTransport : public internal::ServerTransport {
     int port = 0;
     if (scheme == kSchemeGrpc || scheme == kSchemeGrpcTcp || scheme == kSchemeGrpcTls) {
       std::stringstream address;
-      address << arrow::internal::UriEncodeHost(uri.host()) << ':' << uri.port_text();
+      address << arrow::util::UriEncodeHost(uri.host()) << ':' << uri.port_text();
 
       std::shared_ptr<::grpc::ServerCredentials> creds;
       if (scheme == kSchemeGrpcTls) {
@@ -635,12 +634,10 @@ class GrpcServerTransport : public internal::ServerTransport {
 
     if (scheme == kSchemeGrpcTls) {
       ARROW_ASSIGN_OR_RAISE(
-          location_,
-          Location::ForGrpcTls(arrow::internal::UriEncodeHost(uri.host()), port));
+          location_, Location::ForGrpcTls(arrow::util::UriEncodeHost(uri.host()), port));
     } else if (scheme == kSchemeGrpc || scheme == kSchemeGrpcTcp) {
       ARROW_ASSIGN_OR_RAISE(
-          location_,
-          Location::ForGrpcTcp(arrow::internal::UriEncodeHost(uri.host()), port));
+          location_, Location::ForGrpcTcp(arrow::util::UriEncodeHost(uri.host()), port));
     }
     return Status::OK();
   }
diff --git a/cpp/src/arrow/flight/transport/ucx/ucx_client.cc b/cpp/src/arrow/flight/transport/ucx/ucx_client.cc
index cd9ddaa85a6f6..32c2fd776f32b 100644
--- a/cpp/src/arrow/flight/transport/ucx/ucx_client.cc
+++ b/cpp/src/arrow/flight/transport/ucx/ucx_client.cc
@@ -77,7 +77,7 @@ class ClientConnection {
   ARROW_DEFAULT_MOVE_AND_ASSIGN(ClientConnection);
   ~ClientConnection() { DCHECK(!driver_) << "Connection was not closed!"; }
 
-  Status Init(std::shared_ptr<UcpContext> ucp_context, const arrow::internal::Uri& uri) {
+  Status Init(std::shared_ptr<UcpContext> ucp_context, const arrow::util::Uri& uri) {
     auto status = InitImpl(std::move(ucp_context), uri);
     // Clean up after-the-fact if we fail to initialize
     if (!status.ok()) {
@@ -91,8 +91,7 @@ class ClientConnection {
     return status;
   }
 
-  Status InitImpl(std::shared_ptr<UcpContext> ucp_context,
-                  const arrow::internal::Uri& uri) {
+  Status InitImpl(std::shared_ptr<UcpContext> ucp_context, const arrow::util::Uri& uri) {
     {
       ucs_status_t status;
       ucp_worker_params_t worker_params;
@@ -521,7 +520,7 @@ class UcxClientImpl : public arrow::flight::internal::ClientTransport {
   }
 
   Status Init(const FlightClientOptions& options, const Location& location,
-              const arrow::internal::Uri& uri) override {
+              const arrow::util::Uri& uri) override {
     RETURN_NOT_OK(uri_.Parse(uri.ToString()));
     {
       ucp_config_t* ucp_config;
@@ -721,7 +720,7 @@ class UcxClientImpl : public arrow::flight::internal::ClientTransport {
  private:
   static constexpr size_t kMaxOpenConnections = 3;
 
-  arrow::internal::Uri uri_;
+  arrow::util::Uri uri_;
   std::shared_ptr<UcpContext> ucp_context_;
   std::mutex connections_mutex_;
   std::deque<ClientConnection> connections_;
diff --git a/cpp/src/arrow/flight/transport/ucx/ucx_server.cc b/cpp/src/arrow/flight/transport/ucx/ucx_server.cc
index b20f8f286e3bc..cb9c8948ccf1e 100644
--- a/cpp/src/arrow/flight/transport/ucx/ucx_server.cc
+++ b/cpp/src/arrow/flight/transport/ucx/ucx_server.cc
@@ -198,8 +198,7 @@ class UcxServerImpl : public arrow::flight::internal::ServerTransport {
     }
   }
 
-  Status Init(const FlightServerOptions& options,
-              const arrow::internal::Uri& uri) override {
+  Status Init(const FlightServerOptions& options, const arrow::util::Uri& uri) override {
     const auto max_threads = std::max<uint32_t>(8, std::thread::hardware_concurrency());
     ARROW_ASSIGN_OR_RAISE(rpc_pool_, arrow::internal::ThreadPool::Make(max_threads));
 
diff --git a/cpp/src/arrow/flight/transport/ucx/util_internal.cc b/cpp/src/arrow/flight/transport/ucx/util_internal.cc
index acaa4f5872343..2db7d4e2630ff 100644
--- a/cpp/src/arrow/flight/transport/ucx/util_internal.cc
+++ b/cpp/src/arrow/flight/transport/ucx/util_internal.cc
@@ -50,7 +50,7 @@ ucs_status_t FlightUcxStatusDetail::Unwrap(const Status& status) {
   return dynamic_cast<const FlightUcxStatusDetail*>(status.detail().get())->status_;
 }
 
-arrow::Result<size_t> UriToSockaddr(const arrow::internal::Uri& uri,
+arrow::Result<size_t> UriToSockaddr(const arrow::util::Uri& uri,
                                     struct sockaddr_storage* addr) {
   std::string host = uri.host();
   if (host.empty()) {
diff --git a/cpp/src/arrow/flight/transport/ucx/util_internal.h b/cpp/src/arrow/flight/transport/ucx/util_internal.h
index 84e84ba071154..958868d59d4f5 100644
--- a/cpp/src/arrow/flight/transport/ucx/util_internal.h
+++ b/cpp/src/arrow/flight/transport/ucx/util_internal.h
@@ -71,7 +71,7 @@ static inline bool IsIgnorableDisconnectError(ucs_status_t ucs_status) {
 ///
 /// \return The length of the sockaddr
 ARROW_FLIGHT_EXPORT
-arrow::Result<size_t> UriToSockaddr(const arrow::internal::Uri& uri,
+arrow::Result<size_t> UriToSockaddr(const arrow::util::Uri& uri,
                                     struct sockaddr_storage* addr);
 
 ARROW_FLIGHT_EXPORT
diff --git a/cpp/src/arrow/flight/transport_server.h b/cpp/src/arrow/flight/transport_server.h
index 51105a89304f4..8e5fe3e710c13 100644
--- a/cpp/src/arrow/flight/transport_server.h
+++ b/cpp/src/arrow/flight/transport_server.h
@@ -69,7 +69,7 @@ class ARROW_FLIGHT_EXPORT ServerTransport {
   /// This method should launch the server in a background thread, i.e. it
   /// should not block. Once this returns, the server should be active.
   virtual Status Init(const FlightServerOptions& options,
-                      const arrow::internal::Uri& uri) = 0;
+                      const arrow::util::Uri& uri) = 0;
   /// \brief Shutdown the server.
   ///
   /// This should wait for active RPCs to finish. Once this returns, the
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index a1b799a3a069e..39b59f65d9cfb 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -821,7 +821,7 @@ arrow::Result<CloseSessionResult> CloseSessionResult::Deserialize(
   return out;
 }
 
-Location::Location() { uri_ = std::make_shared<arrow::internal::Uri>(); }
+Location::Location() { uri_ = std::make_shared<arrow::util::Uri>(); }
 
 arrow::Result<Location> Location::Parse(const std::string& uri_string) {
   Location location;
diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h
index 7b3259c3c3a1b..b3df8377b8ffd 100644
--- a/cpp/src/arrow/flight/types.h
+++ b/cpp/src/arrow/flight/types.h
@@ -52,11 +52,11 @@ class DictionaryMemo;
 
 }  // namespace ipc
 
-namespace internal {
+namespace util {
 
 class Uri;
 
-}  // namespace internal
+}  // namespace util
 
 namespace flight {
 
@@ -474,7 +474,7 @@ struct ARROW_FLIGHT_EXPORT Location {
  private:
   friend class FlightClient;
   friend class FlightServerBase;
-  std::shared_ptr<arrow::internal::Uri> uri_;
+  std::shared_ptr<arrow::util::Uri> uri_;
 };
 
 /// \brief A flight ticket and list of locations where the ticket can be
diff --git a/cpp/src/arrow/io/hdfs_internal.cc b/cpp/src/arrow/io/hdfs_internal.cc
index 4592392b806b5..5619dd2435acc 100644
--- a/cpp/src/arrow/io/hdfs_internal.cc
+++ b/cpp/src/arrow/io/hdfs_internal.cc
@@ -37,6 +37,7 @@
 #include <string>
 #include <utility>
 #include <vector>
+#include "arrow/util/basic_decimal.h"
 
 #ifndef _WIN32
 #include <dlfcn.h>
@@ -51,53 +52,27 @@ namespace arrow {
 
 using internal::GetEnvVarNative;
 using internal::PlatformFilename;
-#ifdef _WIN32
-using internal::WinErrorMessage;
-#endif
 
-namespace io {
-namespace internal {
+namespace io::internal {
 
 namespace {
 
-void* GetLibrarySymbol(LibraryHandle handle, const char* symbol) {
-  if (handle == NULL) return NULL;
-#ifndef _WIN32
-  return dlsym(handle, symbol);
-#else
+template <bool Required, typename T>
+Status SetSymbol(void* handle, char const* name, T** symbol) {
+  if (*symbol != nullptr) return Status::OK();
 
-  void* ret = reinterpret_cast<void*>(GetProcAddress(handle, symbol));
-  if (ret == NULL) {
-    // logstream(LOG_INFO) << "GetProcAddress error: "
-    //                     << get_last_err_str(GetLastError()) << std::endl;
+  auto maybe_symbol = ::arrow::internal::GetSymbolAs<T>(handle, name);
+  if (Required || maybe_symbol.ok()) {
+    ARROW_ASSIGN_OR_RAISE(*symbol, maybe_symbol);
   }
-  return ret;
-#endif
+  return Status::OK();
 }
 
-#define GET_SYMBOL_REQUIRED(SHIM, SYMBOL_NAME)                         \
-  do {                                                                 \
-    if (!SHIM->SYMBOL_NAME) {                                          \
-      *reinterpret_cast<void**>(&SHIM->SYMBOL_NAME) =                  \
-          GetLibrarySymbol(SHIM->handle, "" #SYMBOL_NAME);             \
-    }                                                                  \
-    if (!SHIM->SYMBOL_NAME)                                            \
-      return Status::IOError("Getting symbol " #SYMBOL_NAME "failed"); \
-  } while (0)
-
-#define GET_SYMBOL(SHIM, SYMBOL_NAME)                    \
-  if (!SHIM->SYMBOL_NAME) {                              \
-    *reinterpret_cast<void**>(&SHIM->SYMBOL_NAME) =      \
-        GetLibrarySymbol(SHIM->handle, "" #SYMBOL_NAME); \
-  }
+#define GET_SYMBOL_REQUIRED(SHIM, SYMBOL_NAME) \
+  RETURN_NOT_OK(SetSymbol<true>(SHIM->handle, #SYMBOL_NAME, &SHIM->SYMBOL_NAME));
 
-LibraryHandle libjvm_handle = nullptr;
-
-// Helper functions for dlopens
-Result<std::vector<PlatformFilename>> get_potential_libjvm_paths();
-Result<std::vector<PlatformFilename>> get_potential_libhdfs_paths();
-Result<LibraryHandle> try_dlopen(const std::vector<PlatformFilename>& potential_paths,
-                                 const char* name);
+#define GET_SYMBOL(SHIM, SYMBOL_NAME) \
+  DCHECK_OK(SetSymbol<false>(SHIM->handle, #SYMBOL_NAME, &SHIM->SYMBOL_NAME));
 
 Result<std::vector<PlatformFilename>> MakeFilenameVector(
     const std::vector<std::string>& names) {
@@ -244,46 +219,18 @@ Result<std::vector<PlatformFilename>> get_potential_libjvm_paths() {
   return potential_paths;
 }
 
-#ifndef _WIN32
-Result<LibraryHandle> try_dlopen(const std::vector<PlatformFilename>& potential_paths,
-                                 const char* name) {
-  std::string error_message = "unknown error";
-  LibraryHandle handle;
-
-  for (const auto& p : potential_paths) {
-    handle = dlopen(p.ToNative().c_str(), RTLD_NOW | RTLD_LOCAL);
-
-    if (handle != NULL) {
-      return handle;
-    } else {
-      const char* err_msg = dlerror();
-      if (err_msg != NULL) {
-        error_message = err_msg;
-      }
-    }
-  }
-
-  return Status::IOError("Unable to load ", name, ": ", error_message);
-}
-
-#else
-Result<LibraryHandle> try_dlopen(const std::vector<PlatformFilename>& potential_paths,
-                                 const char* name) {
-  std::string error_message;
-  LibraryHandle handle;
+Result<void*> try_dlopen(const std::vector<PlatformFilename>& potential_paths,
+                         std::string name) {
+  std::string error_message = "Unable to load " + name;
 
   for (const auto& p : potential_paths) {
-    handle = LoadLibraryW(p.ToNative().c_str());
-    if (handle != NULL) {
-      return handle;
-    } else {
-      error_message = WinErrorMessage(GetLastError());
-    }
+    auto maybe_handle = arrow::internal::LoadDynamicLibrary(p);
+    if (maybe_handle.ok()) return *maybe_handle;
+    error_message += "\n" + maybe_handle.status().message();
   }
 
-  return Status::IOError("Unable to load ", name, ": ", error_message);
+  return Status(StatusCode::IOError, std::move(error_message));
 }
-#endif  // _WIN32
 
 LibHdfsShim libhdfs_shim;
 
@@ -335,7 +282,7 @@ Status ConnectLibHdfs(LibHdfsShim** driver) {
     shim->Initialize();
 
     ARROW_ASSIGN_OR_RAISE(auto libjvm_potential_paths, get_potential_libjvm_paths());
-    ARROW_ASSIGN_OR_RAISE(libjvm_handle, try_dlopen(libjvm_potential_paths, "libjvm"));
+    RETURN_NOT_OK(try_dlopen(libjvm_potential_paths, "libjvm"));
 
     ARROW_ASSIGN_OR_RAISE(auto libhdfs_potential_paths, get_potential_libhdfs_paths());
     ARROW_ASSIGN_OR_RAISE(shim->handle, try_dlopen(libhdfs_potential_paths, "libhdfs"));
@@ -350,7 +297,7 @@ Status ConnectLibHdfs(LibHdfsShim** driver) {
 ///////////////////////////////////////////////////////////////////////////
 // HDFS thin wrapper methods
 
-hdfsBuilder* LibHdfsShim::NewBuilder(void) { return this->hdfsNewBuilder(); }
+hdfsBuilder* LibHdfsShim::NewBuilder() { return this->hdfsNewBuilder(); }
 
 void LibHdfsShim::BuilderSetNameNode(hdfsBuilder* bld, const char* nn) {
   this->hdfsBuilderSetNameNode(bld, nn);
@@ -426,26 +373,29 @@ int LibHdfsShim::Flush(hdfsFS fs, hdfsFile file) { return this->hdfsFlush(fs, fi
 
 int LibHdfsShim::Available(hdfsFS fs, hdfsFile file) {
   GET_SYMBOL(this, hdfsAvailable);
-  if (this->hdfsAvailable)
+  if (this->hdfsAvailable) {
     return this->hdfsAvailable(fs, file);
-  else
+  } else {
     return 0;
+  }
 }
 
 int LibHdfsShim::Copy(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst) {
   GET_SYMBOL(this, hdfsCopy);
-  if (this->hdfsCopy)
+  if (this->hdfsCopy) {
     return this->hdfsCopy(srcFS, src, dstFS, dst);
-  else
+  } else {
     return 0;
+  }
 }
 
 int LibHdfsShim::Move(hdfsFS srcFS, const char* src, hdfsFS dstFS, const char* dst) {
   GET_SYMBOL(this, hdfsMove);
-  if (this->hdfsMove)
+  if (this->hdfsMove) {
     return this->hdfsMove(srcFS, src, dstFS, dst);
-  else
+  } else {
     return 0;
+  }
 }
 
 int LibHdfsShim::Delete(hdfsFS fs, const char* path, int recursive) {
@@ -454,10 +404,11 @@ int LibHdfsShim::Delete(hdfsFS fs, const char* path, int recursive) {
 
 int LibHdfsShim::Rename(hdfsFS fs, const char* oldPath, const char* newPath) {
   GET_SYMBOL(this, hdfsRename);
-  if (this->hdfsRename)
+  if (this->hdfsRename) {
     return this->hdfsRename(fs, oldPath, newPath);
-  else
+  } else {
     return 0;
+  }
 }
 
 char* LibHdfsShim::GetWorkingDirectory(hdfsFS fs, char* buffer, size_t bufferSize) {
@@ -465,7 +416,7 @@ char* LibHdfsShim::GetWorkingDirectory(hdfsFS fs, char* buffer, size_t bufferSiz
   if (this->hdfsGetWorkingDirectory) {
     return this->hdfsGetWorkingDirectory(fs, buffer, bufferSize);
   } else {
-    return NULL;
+    return nullptr;
   }
 }
 
@@ -509,7 +460,7 @@ char*** LibHdfsShim::GetHosts(hdfsFS fs, const char* path, tOffset start,
   if (this->hdfsGetHosts) {
     return this->hdfsGetHosts(fs, path, start, length);
   } else {
-    return NULL;
+    return nullptr;
   }
 }
 
@@ -551,6 +502,5 @@ int LibHdfsShim::Utime(hdfsFS fs, const char* path, tTime mtime, tTime atime) {
   }
 }
 
-}  // namespace internal
-}  // namespace io
+}  // namespace io::internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/io/hdfs_internal.h b/cpp/src/arrow/io/hdfs_internal.h
index 590e3a4835932..4b6b4884c00c9 100644
--- a/cpp/src/arrow/io/hdfs_internal.h
+++ b/cpp/src/arrow/io/hdfs_internal.h
@@ -33,18 +33,11 @@ namespace arrow {
 
 class Status;
 
-namespace io {
-namespace internal {
-
-#ifndef _WIN32
-typedef void* LibraryHandle;
-#else
-typedef HINSTANCE LibraryHandle;
-#endif
+namespace io::internal {
 
 // NOTE(wesm): cpplint does not like use of short and other imprecise C types
 struct LibHdfsShim {
-  LibraryHandle handle;
+  void* handle;
 
   hdfsBuilder* (*hdfsNewBuilder)(void);
   void (*hdfsBuilderSetNameNode)(hdfsBuilder* bld, const char* nn);
@@ -217,6 +210,5 @@ struct LibHdfsShim {
 // TODO(wesm): Remove these exports when we are linking statically
 ARROW_EXPORT Status ConnectLibHdfs(LibHdfsShim** driver);
 
-}  // namespace internal
-}  // namespace io
+}  // namespace io::internal
 }  // namespace arrow
diff --git a/cpp/src/arrow/testing/CMakeLists.txt b/cpp/src/arrow/testing/CMakeLists.txt
index 59825f0bf227a..6cf4b2d2b12ca 100644
--- a/cpp/src/arrow/testing/CMakeLists.txt
+++ b/cpp/src/arrow/testing/CMakeLists.txt
@@ -20,4 +20,10 @@ arrow_install_all_headers("arrow/testing")
 if(ARROW_BUILD_TESTS)
   add_arrow_test(random_test)
   add_arrow_test(gtest_util_test)
+
+  if(ARROW_FILESYSTEM)
+    add_library(arrow_filesystem_example MODULE examplefs.cc)
+    target_link_libraries(arrow_filesystem_example ${ARROW_TEST_LINK_LIBS}
+                          ${ARROW_EXAMPLE_LINK_LIBS})
+  endif()
 endif()
diff --git a/cpp/src/arrow/testing/examplefs.cc b/cpp/src/arrow/testing/examplefs.cc
new file mode 100644
index 0000000000000..d3e7e3b03f6d7
--- /dev/null
+++ b/cpp/src/arrow/testing/examplefs.cc
@@ -0,0 +1,38 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/filesystem/filesystem_library.h"
+#include "arrow/result.h"
+#include "arrow/util/uri.h"
+
+#include <gtest/gtest.h>
+
+namespace arrow::fs {
+
+FileSystemRegistrar kExampleFileSystemModule{
+    "example",
+    [](const Uri& uri, const io::IOContext& io_context,
+       std::string* out_path) -> Result<std::shared_ptr<FileSystem>> {
+      constexpr std::string_view kScheme = "example";
+      EXPECT_EQ(uri.scheme(), kScheme);
+      auto local_uri = "file" + uri.ToString().substr(kScheme.size());
+      return FileSystemFromUri(local_uri, io_context, out_path);
+    },
+};
+
+}  // namespace arrow::fs
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index b693336e09921..5928ebcb88959 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -116,11 +116,13 @@
 #include <fstream>
 #endif
 
-namespace arrow {
-
-using internal::checked_cast;
+#ifdef _WIN32
+#include <Windows.h>
+#else
+#include <dlfcn.h>
+#endif
 
-namespace internal {
+namespace arrow::internal {
 
 namespace {
 
@@ -2215,5 +2217,58 @@ int64_t GetTotalMemoryBytes() {
 #endif
 }
 
-}  // namespace internal
-}  // namespace arrow
+Result<void*> LoadDynamicLibrary(const char* path) {
+#ifdef _WIN32
+  ARROW_ASSIGN_OR_RAISE(auto platform_path, PlatformFilename::FromString(path));
+  return LoadDynamicLibrary(platform_path);
+#else
+  constexpr int kFlags =
+      // All undefined symbols in the shared object are resolved before dlopen() returns.
+      RTLD_NOW
+      // Symbols defined in  this  shared  object are not made available to
+      // resolve references in subsequently loaded shared objects.
+      | RTLD_LOCAL;
+  if (void* handle = dlopen(path, kFlags)) return handle;
+  // dlopen(3) man page: "If dlopen() fails for any reason, it returns NULL."
+  // There is no null-returning non-error condition.
+  auto* error = dlerror();
+  return Status::IOError("dlopen(", path, ") failed: ", error ? error : "unknown error");
+#endif
+}
+
+Result<void*> LoadDynamicLibrary(const PlatformFilename& path) {
+#ifdef _WIN32
+  if (void* handle = LoadLibraryW(path.ToNative().c_str())) {
+    return handle;
+  }
+  // win32 api doc: "If the function fails, the return value is NULL."
+  // There is no null-returning non-error condition.
+  return IOErrorFromWinError(GetLastError(), "LoadLibrary(", path.ToString(), ") failed");
+#else
+  return LoadDynamicLibrary(path.ToNative().c_str());
+#endif
+}
+
+Result<void*> GetSymbol(void* handle, const char* name) {
+  if (handle == nullptr) {
+    return Status::Invalid("Attempting to retrieve symbol '", name,
+                           "' from null library handle");
+  }
+#ifdef _WIN32
+  if (void* sym = reinterpret_cast<void*>(
+          GetProcAddress(reinterpret_cast<HMODULE>(handle), name))) {
+    return sym;
+  }
+  // win32 api doc: "If the function fails, the return value is NULL."
+  // There is no null-returning non-error condition.
+  return IOErrorFromWinError(GetLastError(), "GetProcAddress(", name, ") failed.");
+#else
+  if (void* sym = dlsym(handle, name)) return sym;
+  // dlsym(3) man page: "On failure, they return NULL"
+  // There is no null-returning non-error condition.
+  auto* error = dlerror();
+  return Status::IOError("dlsym(", name, ") failed: ", error ? error : "unknown error");
+#endif
+}
+
+}  // namespace arrow::internal
diff --git a/cpp/src/arrow/util/io_util.h b/cpp/src/arrow/util/io_util.h
index bba71c0d80ad4..5f5bbd169e2eb 100644
--- a/cpp/src/arrow/util/io_util.h
+++ b/cpp/src/arrow/util/io_util.h
@@ -29,16 +29,16 @@
 #include <vector>
 
 #if ARROW_HAVE_SIGACTION
-#include <signal.h>  // Needed for struct sigaction
+#include <csignal>  // Needed for struct sigaction
 #endif
 
+#include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/windows_fixup.h"
 
-namespace arrow {
-namespace internal {
+namespace arrow::internal {
 
 // NOTE: 8-bit path strings on Windows are encoded using UTF-8.
 // Using MBCS would fail encoding some paths.
@@ -338,7 +338,7 @@ class ARROW_EXPORT TemporaryDir {
 
 class ARROW_EXPORT SignalHandler {
  public:
-  typedef void (*Callback)(int);
+  using Callback = void (*)(int);
 
   SignalHandler();
   explicit SignalHandler(Callback cb);
@@ -419,5 +419,34 @@ int64_t GetCurrentRSS();
 ARROW_EXPORT
 int64_t GetTotalMemoryBytes();
 
-}  // namespace internal
-}  // namespace arrow
+/// \brief Load a dynamic library
+///
+/// This wraps dlopen() except on Windows, where LoadLibrary() is called.
+/// These two platforms handle absolute paths consistently; relative paths
+/// or the library's bare name may be handled but inconsistently.
+///
+/// \return An opaque handle for the dynamic library, which can be used for
+///         subsequent symbol lookup. Nullptr will never be returned; instead
+///         an error will be raised.
+ARROW_EXPORT Result<void*> LoadDynamicLibrary(const PlatformFilename& path);
+
+/// \brief Load a dynamic library
+///
+/// An overload taking null terminated string.
+ARROW_EXPORT Result<void*> LoadDynamicLibrary(const char* path);
+
+/// \brief Retrieve a symbol by name from a library handle.
+///
+/// This wraps dlsym() except on Windows, where GetProcAddress() is called.
+///
+/// \return The address associated with the named symbol. Nullptr will never be
+///         returned; instead an error will be raised.
+ARROW_EXPORT Result<void*> GetSymbol(void* handle, const char* name);
+
+template <typename T>
+Result<T*> GetSymbolAs(void* handle, const char* name) {
+  ARROW_ASSIGN_OR_RAISE(void* sym, GetSymbol(handle, name));
+  return reinterpret_cast<T*>(sym);
+}
+
+}  // namespace arrow::internal
diff --git a/cpp/src/arrow/util/type_fwd.h b/cpp/src/arrow/util/type_fwd.h
index 6d904f19b11b5..3174881f4d018 100644
--- a/cpp/src/arrow/util/type_fwd.h
+++ b/cpp/src/arrow/util/type_fwd.h
@@ -64,6 +64,7 @@ class AsyncTaskScheduler;
 class Compressor;
 class Decompressor;
 class Codec;
+class Uri;
 }  // namespace util
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/uri.cc b/cpp/src/arrow/util/uri.cc
index b291ee3d7f1f2..9c0f7f9a59630 100644
--- a/cpp/src/arrow/util/uri.cc
+++ b/cpp/src/arrow/util/uri.cc
@@ -27,8 +27,7 @@
 #include "arrow/util/value_parsing.h"
 #include "arrow/vendored/uriparser/Uri.h"
 
-namespace arrow {
-namespace internal {
+namespace arrow::util {
 
 namespace {
 
@@ -111,7 +110,7 @@ bool IsValidUriScheme(std::string_view s) {
 }
 
 struct Uri::Impl {
-  Impl() : string_rep_(""), port_(-1) { memset(&uri_, 0, sizeof(uri_)); }
+  Impl() { memset(&uri_, 0, sizeof(uri_)); }
 
   ~Impl() { uriFreeUriMembersA(&uri_); }
 
@@ -133,7 +132,7 @@ struct Uri::Impl {
   // Keep alive strings that uriparser stores pointers to
   std::vector<std::string> data_;
   std::string string_rep_;
-  int32_t port_;
+  int32_t port_ = -1;
   std::vector<std::string_view> path_segments_;
   bool is_file_uri_;
   bool is_absolute_path_;
@@ -141,7 +140,7 @@ struct Uri::Impl {
 
 Uri::Uri() : impl_(new Impl) {}
 
-Uri::~Uri() {}
+Uri::~Uri() = default;
 
 Uri::Uri(Uri&& u) : impl_(std::move(u.impl_)) {}
 
@@ -169,21 +168,19 @@ int32_t Uri::port() const { return impl_->port_; }
 std::string Uri::username() const {
   auto userpass = TextRangeToView(impl_->uri_.userInfo);
   auto sep_pos = userpass.find_first_of(':');
-  if (sep_pos == std::string_view::npos) {
-    return UriUnescape(userpass);
-  } else {
-    return UriUnescape(userpass.substr(0, sep_pos));
+  if (sep_pos != std::string_view::npos) {
+    userpass = userpass.substr(0, sep_pos);
   }
+  return UriUnescape(userpass);
 }
 
 std::string Uri::password() const {
   auto userpass = TextRangeToView(impl_->uri_.userInfo);
   auto sep_pos = userpass.find_first_of(':');
   if (sep_pos == std::string_view::npos) {
-    return std::string();
-  } else {
-    return UriUnescape(userpass.substr(sep_pos + 1));
+    return "";
   }
+  return UriUnescape(userpass.substr(sep_pos + 1));
 }
 
 std::string Uri::path() const {
@@ -301,7 +298,8 @@ Status Uri::Parse(const std::string& uri_string) {
   auto port_text = TextRangeToView(impl_->uri_.portText);
   if (port_text.size()) {
     uint16_t port_num;
-    if (!ParseValue<UInt16Type>(port_text.data(), port_text.size(), &port_num)) {
+    if (!::arrow::internal::ParseValue<UInt16Type>(port_text.data(), port_text.size(),
+                                                   &port_num)) {
       return Status::Invalid("Invalid port number '", port_text, "' in URI '", uri_string,
                              "'");
     }
@@ -311,6 +309,12 @@ Status Uri::Parse(const std::string& uri_string) {
   return Status::OK();
 }
 
+Result<Uri> Uri::FromString(const std::string& uri_string) {
+  Uri uri;
+  ARROW_RETURN_NOT_OK(uri.Parse(uri_string));
+  return uri;
+}
+
 Result<std::string> UriFromAbsolutePath(std::string_view path) {
   if (path.empty()) {
     return Status::Invalid(
@@ -336,5 +340,4 @@ Result<std::string> UriFromAbsolutePath(std::string_view path) {
   return out;
 }
 
-}  // namespace internal
-}  // namespace arrow
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/util/uri.h b/cpp/src/arrow/util/uri.h
index 855a61408da99..74dbe924ff237 100644
--- a/cpp/src/arrow/util/uri.h
+++ b/cpp/src/arrow/util/uri.h
@@ -27,8 +27,7 @@
 #include "arrow/type_fwd.h"
 #include "arrow/util/visibility.h"
 
-namespace arrow {
-namespace internal {
+namespace arrow::util {
 
 /// \brief A parsed URI
 class ARROW_EXPORT Uri {
@@ -86,6 +85,9 @@ class ARROW_EXPORT Uri {
   /// Factory function to parse a URI from its string representation.
   Status Parse(const std::string& uri_string);
 
+  /// Factory function to parse a URI from its string representation.
+  static Result<Uri> FromString(const std::string& uri_string);
+
  private:
   struct Impl;
   std::unique_ptr<Impl> impl_;
@@ -114,5 +116,4 @@ bool IsValidUriScheme(std::string_view s);
 ARROW_EXPORT
 Result<std::string> UriFromAbsolutePath(std::string_view path);
 
-}  // namespace internal
-}  // namespace arrow
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/util/uri_test.cc b/cpp/src/arrow/util/uri_test.cc
index 4293dc73b01cb..36e09b1b2e879 100644
--- a/cpp/src/arrow/util/uri_test.cc
+++ b/cpp/src/arrow/util/uri_test.cc
@@ -26,8 +26,7 @@
 #include "arrow/util/logging.h"
 #include "arrow/util/uri.h"
 
-namespace arrow {
-namespace internal {
+namespace arrow::util {
 
 TEST(UriEscape, Basics) {
   ASSERT_EQ(UriEscape(""), "");
@@ -371,5 +370,4 @@ TEST(UriFromAbsolutePath, Basics) {
 #endif
 }
 
-}  // namespace internal
-}  // namespace arrow
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/util/visibility.h b/cpp/src/arrow/util/visibility.h
index c06ae51b8e5a3..1498d2085a03d 100644
--- a/cpp/src/arrow/util/visibility.h
+++ b/cpp/src/arrow/util/visibility.h
@@ -36,6 +36,9 @@
 #define ARROW_DLLIMPORT __declspec(dllimport)
 #endif
 
+// _declspec(dllexport) even when the #included by a non-arrow source
+#define ARROW_FORCE_EXPORT ARROW_DLLEXPORT
+
 #ifdef ARROW_STATIC
 #define ARROW_EXPORT
 #define ARROW_FRIEND_EXPORT
@@ -77,4 +80,7 @@
 #define ARROW_FRIEND_EXPORT
 #define ARROW_TEMPLATE_EXPORT
 
+// [[gnu::visibility("default")]] even when #included by a non-arrow source
+#define ARROW_FORCE_EXPORT [[gnu::visibility("default")]]
+
 #endif  // Non-Windows
diff --git a/docs/source/cpp/api/filesystem.rst b/docs/source/cpp/api/filesystem.rst
index 8132af42e2495..02b12668327f2 100644
--- a/docs/source/cpp/api/filesystem.rst
+++ b/docs/source/cpp/api/filesystem.rst
@@ -19,6 +19,8 @@
 Filesystems
 ===========
 
+.. _cpp-api-filesystems:
+
 Interface
 =========
 
@@ -33,12 +35,22 @@ Interface
 .. doxygenclass:: arrow::fs::FileSystem
    :members:
 
-High-level factory function
-===========================
+.. doxygenfunction:: arrow::fs::EnsureFinalized()
+
+.. _filesystem-factory-functions:
+
+High-level factory functions
+============================
 
 .. doxygengroup:: filesystem-factories
    :content-only:
 
+Factory registration functions
+==============================
+
+.. doxygengroup:: filesystem-factory-registration
+   :content-only:
+
 Concrete implementations
 ========================
 
diff --git a/docs/source/cpp/io.rst b/docs/source/cpp/io.rst
index 28ab5d783a23d..2a05473852c24 100644
--- a/docs/source/cpp/io.rst
+++ b/docs/source/cpp/io.rst
@@ -73,6 +73,9 @@ The :class:`filesystem interface <FileSystem>` allows abstracted access over
 various data storage backends such as the local filesystem or a S3 bucket.
 It provides input and output streams as well as directory operations.
 
+.. seealso::
+    :ref:`Filesystems API reference <cpp-api-filesystems>`.
+
 The filesystem interface exposes a simplified view of the underlying data
 storage.  Data paths are represented as *abstract paths*, which are
 ``/``-separated, even on Windows, and shouldn't include special path
@@ -81,7 +84,14 @@ underlying storage, are automatically dereferenced.  Only basic
 :class:`metadata <FileStats>` about file entries, such as the file size
 and modification time, is made available.
 
-Concrete implementations are available for
+Filesystem instances can be constructed from URI strings using one of the
+:ref:`FromUri factories <filesystem-factory-functions>`, which dispatch to
+implementation-specific factories based on the URI's ``scheme``. Other properties
+for the new instance are extracted from the URI's other properties such as the
+``hostname``, ``username``, etc. Arrow supports runtime registration of new
+filesystems, and provides built-in support for several filesystems.
+
+Which built-in filesystems are supported is configured at build time and may include
 :class:`local filesystem access <LocalFileSystem>`,
 :class:`HDFS <HadoopFileSystem>`,
 :class:`Amazon S3-compatible storage <S3FileSystem>` and
@@ -91,4 +101,47 @@ Concrete implementations are available for
 
   Tasks that use filesystems will typically run on the
   :ref:`I/O thread pool<io_thread_pool>`.  For filesystems that support high levels
-  of concurrency you may get a benefit from increasing the size of the I/O thread pool.
\ No newline at end of file
+  of concurrency you may get a benefit from increasing the size of the I/O thread pool.
+
+Defining new filesystems
+========================
+
+Support for additional URI schemes can be added to the
+:ref:`FromUri factories <filesystem-factory-functions>`
+by registering a factory for each new URI scheme with
+:func:`~arrow::fs::RegisterFileSystemFactory`. To enable the common case
+wherein it is preferred that registration be automatic, an instance of
+:class:`~arrow::fs::FileSystemRegistrar` can be defined at namespace
+scope, which will register a factory whenever the instance is loaded:
+
+.. code-block:: cpp
+
+    arrow::fs::FileSystemRegistrar kExampleFileSystemModule{
+      "example",
+      [](const Uri& uri, const io::IOContext& io_context,
+          std::string* out_path) -> Result<std::shared_ptr<arrow::fs::FileSystem>> {
+        EnsureExampleFileSystemInitialized();
+        return std::make_shared<ExampleFileSystem>();
+      },
+      &EnsureExampleFileSystemFinalized,
+    };
+
+If a filesystem implementation requires initialization before any instances
+may be constructed, this should be included in the corresponding factory or
+otherwise automatically ensured before the factory is invoked. Likewise if
+a filesystem implementation requires tear down before the process ends, this
+can be wrapped in a function and registered alongside the factory. All
+finalizers will be called by :func:`~arrow::fs::EnsureFinalized`.
+
+Build complexity can be decreased by compartmentalizing a filesystem
+implementation into a separate shared library, which applications may
+link or load dynamically. Arrow's built-in filesystem implementations
+also follow this pattern. If a shared library containing instances of
+:class:`~arrow::fs::FileSystemRegistrar` must be dynamically loaded,
+:func:`~arrow::fs::LoadFileSystemFactories` should be used to load it.
+If such a library might link statically to arrow, it
+should have exactly one of its sources
+``#include "arrow/filesystem/filesystem_library.h"``
+in order to ensure the presence of the symbol on which
+:func:`~arrow::fs::LoadFileSystemFactories` depends.
+
diff --git a/python/pyarrow/src/arrow/python/filesystem.h b/python/pyarrow/src/arrow/python/filesystem.h
index 003fd5cb80551..194b226ac5c35 100644
--- a/python/pyarrow/src/arrow/python/filesystem.h
+++ b/python/pyarrow/src/arrow/python/filesystem.h
@@ -26,9 +26,7 @@
 #include "arrow/python/visibility.h"
 #include "arrow/util/macros.h"
 
-namespace arrow {
-namespace py {
-namespace fs {
+namespace arrow::py::fs {
 
 class ARROW_PYTHON_EXPORT PyFileSystemVtable {
  public:
@@ -83,16 +81,24 @@ class ARROW_PYTHON_EXPORT PyFileSystem : public arrow::fs::FileSystem {
 
   bool Equals(const FileSystem& other) const override;
 
+  /// \cond FALSE
+  using FileSystem::CreateDir;
+  using FileSystem::DeleteDirContents;
+  using FileSystem::GetFileInfo;
+  using FileSystem::OpenAppendStream;
+  using FileSystem::OpenOutputStream;
+  /// \endcond
+
   Result<arrow::fs::FileInfo> GetFileInfo(const std::string& path) override;
   Result<std::vector<arrow::fs::FileInfo>> GetFileInfo(
       const std::vector<std::string>& paths) override;
   Result<std::vector<arrow::fs::FileInfo>> GetFileInfo(
       const arrow::fs::FileSelector& select) override;
 
-  Status CreateDir(const std::string& path, bool recursive = true) override;
+  Status CreateDir(const std::string& path, bool recursive) override;
 
   Status DeleteDir(const std::string& path) override;
-  Status DeleteDirContents(const std::string& path, bool missing_dir_ok = false) override;
+  Status DeleteDirContents(const std::string& path, bool missing_dir_ok) override;
   Status DeleteRootDirContents() override;
 
   Status DeleteFile(const std::string& path) override;
@@ -107,10 +113,10 @@ class ARROW_PYTHON_EXPORT PyFileSystem : public arrow::fs::FileSystem {
       const std::string& path) override;
   Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
       const std::string& path,
-      const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+      const std::shared_ptr<const KeyValueMetadata>& metadata) override;
 
   Result<std::string> NormalizePath(std::string path) override;
 
@@ -121,6 +127,4 @@ class ARROW_PYTHON_EXPORT PyFileSystem : public arrow::fs::FileSystem {
   PyFileSystemVtable vtable_;
 };
 
-}  // namespace fs
-}  // namespace py
-}  // namespace arrow
+}  // namespace arrow::py::fs

From eecd7797f1d789551fbb000a93504182c47b6baa Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Thu, 14 Mar 2024 17:03:51 -0400
Subject: [PATCH 543/570] GH-40562: [C++] Repair FileSystem merge error
 (#40564)

### Rationale for this change

Failure to rebase and build when merging https://github.com/apache/arrow/pull/39067 (which renamed `internal::Uri` -> `util::Uri`) led to a merge conflict since https://github.com/apache/arrow/pull/40325 added more usages of `internal::Uri`

### What changes are included in this PR?
Rename internal::Uri -> util::Uri

### Are these changes tested?
Yes

### Are there any user-facing changes?
No

* GitHub Issue: #40562

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/filesystem/azurefs.cc | 9 ++++-----
 cpp/src/arrow/filesystem/azurefs.h  | 8 +++-----
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index dd9fb817b7aca..8b7358fbcf2c8 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -65,7 +65,7 @@ AzureOptions::AzureOptions() = default;
 
 AzureOptions::~AzureOptions() = default;
 
-void AzureOptions::ExtractFromUriSchemeAndHierPart(const arrow::internal::Uri& uri,
+void AzureOptions::ExtractFromUriSchemeAndHierPart(const Uri& uri,
                                                    std::string* out_path) {
   const auto host = uri.host();
   std::string path;
@@ -99,7 +99,7 @@ void AzureOptions::ExtractFromUriSchemeAndHierPart(const arrow::internal::Uri& u
   }
 }
 
-Status AzureOptions::ExtractFromUriQuery(const arrow::internal::Uri& uri) {
+Status AzureOptions::ExtractFromUriQuery(const Uri& uri) {
   const auto account_key = uri.password();
   std::optional<CredentialKind> credential_kind;
   std::optional<std::string> credential_kind_value;
@@ -216,8 +216,7 @@ Status AzureOptions::ExtractFromUriQuery(const arrow::internal::Uri& uri) {
   return Status::OK();
 }
 
-Result<AzureOptions> AzureOptions::FromUri(const arrow::internal::Uri& uri,
-                                           std::string* out_path) {
+Result<AzureOptions> AzureOptions::FromUri(const Uri& uri, std::string* out_path) {
   AzureOptions options;
   options.ExtractFromUriSchemeAndHierPart(uri, out_path);
   RETURN_NOT_OK(options.ExtractFromUriQuery(uri));
@@ -226,7 +225,7 @@ Result<AzureOptions> AzureOptions::FromUri(const arrow::internal::Uri& uri,
 
 Result<AzureOptions> AzureOptions::FromUri(const std::string& uri_string,
                                            std::string* out_path) {
-  arrow::internal::Uri uri;
+  Uri uri;
   RETURN_NOT_OK(uri.Parse(uri_string));
   return FromUri(uri, out_path);
 }
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index c4c4c3fc9fd61..308347426ae26 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -127,9 +127,8 @@ struct ARROW_EXPORT AzureOptions {
   ~AzureOptions();
 
  private:
-  void ExtractFromUriSchemeAndHierPart(const arrow::internal::Uri& uri,
-                                       std::string* out_path);
-  Status ExtractFromUriQuery(const arrow::internal::Uri& uri);
+  void ExtractFromUriSchemeAndHierPart(const Uri& uri, std::string* out_path);
+  Status ExtractFromUriQuery(const Uri& uri);
 
  public:
   /// \brief Construct a new AzureOptions from an URI.
@@ -175,8 +174,7 @@ struct ARROW_EXPORT AzureOptions {
   ///   AzureOptions::ConfigureClientSecretCredential() is called.
   /// * client_secret: You must specify "tenant_id" and "client_id"
   ///   too. AzureOptions::ConfigureClientSecretCredential() is called.
-  static Result<AzureOptions> FromUri(const arrow::internal::Uri& uri,
-                                      std::string* out_path);
+  static Result<AzureOptions> FromUri(const Uri& uri, std::string* out_path);
   static Result<AzureOptions> FromUri(const std::string& uri, std::string* out_path);
 
   Status ConfigureDefaultCredential();

From 4c8eff2ec8bae76364fcb28ffac574c1df262705 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Fri, 15 Mar 2024 06:18:39 +0900
Subject: [PATCH 544/570] GH-40500: [C++] Ensure using bundled FlatBuffers
 (#40519)

### Rationale for this change

We'rebundling FlatBuffers and generated files by FlatBuffers. If we use system FlatBuffers that is different version of bundled one, we got a build error.

### What changes are included in this PR?

Use `arrow::flatbuffers` as the first link library to prioritize bundled FlatBuffers than system FlatBuffers.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40500

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/CMakeLists.txt               | 11 +++++++----
 cpp/src/arrow/CMakeLists.txt     |  7 +++----
 cpp/src/arrow/gpu/CMakeLists.txt |  2 +-
 cpp/src/arrow/ipc/CMakeLists.txt |  2 +-
 4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index b6d9ad5a5990e..1fbf0bfcfb528 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -692,18 +692,21 @@ if("${ARROW_TEST_LINKAGE}" STREQUAL "shared")
 pass ARROW_BUILD_SHARED=on")
   endif()
   # Use shared linking for unit tests if it's available
-  set(ARROW_TEST_LINK_LIBS arrow_testing_shared ${ARROW_GTEST_GMOCK}
-                           ${ARROW_GTEST_GTEST_MAIN})
+  set(ARROW_TEST_LINK_LIBS arrow_testing_shared)
   set(ARROW_EXAMPLE_LINK_LIBS arrow_shared)
 else()
   if(ARROW_BUILD_TESTS AND NOT ARROW_BUILD_STATIC)
     message(FATAL_ERROR "If using static linkage for unit tests, must also \
 pass ARROW_BUILD_STATIC=on")
   endif()
-  set(ARROW_TEST_LINK_LIBS arrow_testing_static ${ARROW_GTEST_GMOCK}
-                           ${ARROW_GTEST_GTEST_MAIN})
+  set(ARROW_TEST_LINK_LIBS arrow_testing_static)
   set(ARROW_EXAMPLE_LINK_LIBS arrow_static)
 endif()
+# arrow::flatbuffers isn't needed for all tests but we specify it as
+# the first link library. It's for prioritizing bundled FlatBuffers
+# than system FlatBuffers.
+list(PREPEND ARROW_TEST_LINK_LIBS arrow::flatbuffers)
+list(APPEND ARROW_TEST_LINK_LIBS ${ARROW_GTEST_GMOCK} ${ARROW_GTEST_GTEST_MAIN})
 
 if(ARROW_BUILD_BENCHMARKS)
   set(ARROW_BENCHMARK_LINK_LIBS benchmark::benchmark_main ${ARROW_TEST_LINK_LIBS})
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index c97645594e3d8..c5449d9956c28 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -145,11 +145,10 @@ if(NOT MSVC_TOOLCHAIN)
   list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS})
 endif()
 
-set(ARROW_TEST_LINK_TOOLCHAIN arrow::flatbuffers ${ARROW_GTEST_GMOCK}
-                              ${ARROW_GTEST_GTEST_MAIN})
-set(ARROW_TEST_STATIC_LINK_LIBS arrow_testing_static arrow_static
+set(ARROW_TEST_LINK_TOOLCHAIN ${ARROW_GTEST_GMOCK} ${ARROW_GTEST_GTEST_MAIN})
+set(ARROW_TEST_STATIC_LINK_LIBS arrow::flatbuffers arrow_testing_static arrow_static
                                 ${ARROW_TEST_LINK_TOOLCHAIN})
-set(ARROW_TEST_SHARED_LINK_LIBS arrow_testing_shared arrow_shared
+set(ARROW_TEST_SHARED_LINK_LIBS arrow::flatbuffers arrow_testing_shared arrow_shared
                                 ${ARROW_TEST_LINK_TOOLCHAIN})
 if(NOT MSVC)
   list(APPEND ARROW_TEST_SHARED_LINK_LIBS ${CMAKE_DL_LIBS})
diff --git a/cpp/src/arrow/gpu/CMakeLists.txt b/cpp/src/arrow/gpu/CMakeLists.txt
index 7238a0e0b7c9b..48216034375bd 100644
--- a/cpp/src/arrow/gpu/CMakeLists.txt
+++ b/cpp/src/arrow/gpu/CMakeLists.txt
@@ -59,8 +59,8 @@ add_arrow_lib(arrow_cuda
               SHARED_LINK_FLAGS
               ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
               SHARED_LINK_LIBS
-              arrow_shared
               ${ARROW_CUDA_LINK_LIBS}
+              arrow_shared
               ${ARROW_CUDA_SHARED_LINK_LIBS}
               SHARED_INSTALL_INTERFACE_LIBS
               Arrow::arrow_shared
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index 8b7eee495808b..9fd71361d9b76 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -39,7 +39,7 @@ endfunction()
 
 add_arrow_test(feather_test)
 add_arrow_ipc_test(json_simple_test)
-add_arrow_ipc_test(read_write_test EXTRA_LINK_LIBS arrow::flatbuffers)
+add_arrow_ipc_test(read_write_test)
 add_arrow_ipc_test(tensor_test)
 
 # Headers: top level

From 5cd64e7d0960d2c2a2815a73a5c61986fb7aec5a Mon Sep 17 00:00:00 2001
From: Antoine Pitrou <antoine@python.org>
Date: Thu, 14 Mar 2024 22:48:14 +0100
Subject: [PATCH 545/570] GH-40558: [C++][CI] Fix TSAN and ASAN/UBSAN crashes
 (#40559)

### Rationale for this change

A recent kernel update on some distributions (particularly Ubuntu) broke sanitizers:
https://github.com/google/sanitizers/issues/1716

### What changes are included in this PR?

Apply recommended workaround by reducing the number of bits used by ASLR (address space layout randomization).

### Are these changes tested?

Yes, they should fix the failing C++ sanitizer jobs.

### Are there any user-facing changes?

No.

* GitHub Issue: #40558

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .github/workflows/cpp.yml               | 2 ++
 dev/tasks/docker-tests/github.linux.yml | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index a48b5aafab41d..3036d06d5d7b2 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -145,6 +145,8 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
+          # GH-40558: reduce ASLR to avoid ASAN/LSAN crashes
+          sudo sysctl -w vm.mmap_rnd_bits=28
           sudo sysctl -w kernel.core_pattern="core.%e.%p"
           ulimit -c unlimited
           archery docker run ${{ matrix.image }}
diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml
index 9616863b3bd73..13e00abc70a84 100644
--- a/dev/tasks/docker-tests/github.linux.yml
+++ b/dev/tasks/docker-tests/github.linux.yml
@@ -35,6 +35,8 @@ jobs:
         env:
         {{ macros.github_set_sccache_envvars()|indent(8) }}
         run: |
+          # GH-40558: reduce ASLR to avoid TSAN crashing
+          sudo sysctl -w vm.mmap_rnd_bits=28
           archery docker run \
             -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \
             {{ flags|default("") }} \

From d3d96a151ea12930d00066323305570c1bbc69b4 Mon Sep 17 00:00:00 2001
From: Sandro <sandro.jaeckel@gmail.com>
Date: Fri, 15 Mar 2024 02:23:30 +0100
Subject: [PATCH 546/570] GH-40566: [C++] Fix 3.12 Python support (#40322)

sysconfig is available since Python 3.2

Tested on nixpkgs and works as expected

### Rationale for this change

Support Python 3.12

### What changes are included in this PR?

### Are these changes tested?

Yes

### Are there any user-facing changes?

No

* GitHub Issue: #40566

Authored-by: Sandro <sandro.jaeckel@gmail.com>
Signed-off-by: Jacob Wujciak-Jens <jacob@wujciak.de>
---
 cpp/cmake_modules/FindPython3Alt.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/cmake_modules/FindPython3Alt.cmake b/cpp/cmake_modules/FindPython3Alt.cmake
index cd74bea356625..a057be8da6311 100644
--- a/cpp/cmake_modules/FindPython3Alt.cmake
+++ b/cpp/cmake_modules/FindPython3Alt.cmake
@@ -67,7 +67,7 @@ get_target_property(NUMPY_INCLUDE_DIRS Python3::NumPy INTERFACE_INCLUDE_DIRECTOR
 # detect it ourselves.
 # (https://gitlab.kitware.com/cmake/cmake/issues/20408)
 execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
-                        "from distutils import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))"
+                        "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))"
                 RESULT_VARIABLE _PYTHON_RESULT
                 OUTPUT_VARIABLE _PYTHON_STDOUT
                 ERROR_VARIABLE _PYTHON_STDERR)

From 54ff758a4570d9eeed9f6195dccdc21dcfe8d6d7 Mon Sep 17 00:00:00 2001
From: David Greiss <dgreiss@users.noreply.github.com>
Date: Thu, 14 Mar 2024 21:40:35 -0400
Subject: [PATCH 547/570] GH-35875: [R] Update Readme (#40148)

### Rationale for this change

#35875  #35082 and #32895 make a number of recommendations to update the the Readme

### What changes are included in this PR?

Rewording and reorganizing the Readme and sidebar.

### Are these changes tested?
n/a

### Are there any user-facing changes?

Yes

* Closes: #35875

Lead-authored-by: David Greiss <david.dgreiss@gmail.com>
Co-authored-by: Nic Crane <thisisnic@gmail.com>
Co-authored-by: Bryce Mecum <petridish@gmail.com>
Signed-off-by: Nic Crane <thisisnic@gmail.com>
---
 r/README.md    | 138 ++++++++++++++++++++++++-------------------------
 r/_pkgdown.yml |   6 ++-
 2 files changed, 73 insertions(+), 71 deletions(-)

diff --git a/r/README.md b/r/README.md
index 72c1574417a20..710fa8e8d7cb5 100644
--- a/r/README.md
+++ b/r/README.md
@@ -1,99 +1,100 @@
 # arrow <img src="https://arrow.apache.org/img/arrow-logo_hex_black-txt_white-bg.png" align="right" alt="" width="120" />
 
+<!-- badges: start -->
+
 [![cran](https://www.r-pkg.org/badges/version-last-release/arrow)](https://cran.r-project.org/package=arrow)
 [![CI](https://github.com/apache/arrow/workflows/R/badge.svg?event=push)](https://github.com/apache/arrow/actions?query=workflow%3AR+branch%3Amain+event%3Apush)
 [![conda-forge](https://img.shields.io/conda/vn/conda-forge/r-arrow.svg)](https://anaconda.org/conda-forge/r-arrow)
 
-[Apache Arrow](https://arrow.apache.org/) is a cross-language
-development platform for in-memory and larger-than-memory data. It specifies a standardized
-language-independent columnar memory format for flat and hierarchical
-data, organized for efficient analytic operations on modern hardware. It
-also provides computational libraries and zero-copy streaming, messaging,
-and interprocess communication.
-
-The arrow R package exposes an interface to the Arrow C++ library,
-enabling access to many of its features in R. It provides low-level
-access to the Arrow C++ library API and higher-level access through a
-`{dplyr}` backend and familiar R functions.
-
-## What can the arrow package do?
-
-The arrow package provides functionality for a wide range of data analysis
-tasks. It allows users to read and write data in a variety formats:
+<!-- badges: end -->
 
--   Read and write Parquet files, an efficient and widely used columnar format
--   Read and write Arrow (formerly known as Feather) files, a format optimized for speed and
-    interoperability
--   Read and write CSV files with excellent speed and efficiency
--   Read and write multi-file and larger-than-memory datasets
--   Read JSON files
+## Overview
 
-It provides data analysis tools for both in-memory and larger-than-memory data sets
-
--   Analyze and process larger-than-memory datasets
--   Manipulate and analyze Arrow data with dplyr verbs
-
-It provides access to remote filesystems and servers
-
--   Read and write files in Amazon S3 and Google Cloud Storage buckets
--   Connect to Arrow Flight servers to transport large datasets over networks  
-    
-Additional features include:
+The R `{arrow}` package provides access to many of the features of the [Apache Arrow C++ library](https://arrow.apache.org/docs/cpp/index.html) for R users. The goal of arrow is to provide an Arrow C++ backend to `{dplyr}`, and access to the Arrow C++ library through familiar base R and tidyverse functions, or `{R6}` classes.
 
--   Zero-copy data sharing between R and Python
--   Fine control over column types to work seamlessly
-    with databases and data warehouses
--   Support for compression codecs including Snappy, gzip, Brotli,
-    Zstandard, LZ4, LZO, and bzip2
--   Access and manipulate Arrow objects through low-level bindings
-    to the C++ library
--   Toolkit for building connectors to other applications
-    and services that use Arrow
+To learn more about the Apache Arrow project, see the parent documentation of the [Arrow Project](https://arrow.apache.org/). The Arrow project provides functionality for a wide range of data analysis tasks to store, process and move data fast. See the [read/write article](articles/read_write.html) to learn about reading and writing data files, [data wrangling](articles/data_wrangling.html) to learn how to use dplyr syntax with arrow objects, and the [function documentation](reference/acero.html) for a full list of supported functions within dplyr queries.
 
 ## Installation
 
-Most R users will probably want to install the latest release of arrow 
-from CRAN:
+The latest release of arrow can be installed from CRAN. In most cases installing the latest release should work without requiring any additional system dependencies, especially if you are using
+Windows or macOS.
 
-``` r
+```r
 install.packages("arrow")
 ```
 
 Alternatively, if you are using conda you can install arrow from conda-forge:
 
-``` shell
+```sh
 conda install -c conda-forge --strict-channel-priority r-arrow
 ```
 
-In most cases installing the latest release should work without 
-requiring any additional system dependencies, especially if you are using 
-Window or a Mac. For those users, CRAN hosts binary packages that contain 
-the Arrow C++ library upon which the arrow package relies, and no 
-additional steps should be required.
-
 There are some special cases to note:
 
-- On macOS, the R you use with Arrow should match the architecture of the machine you are using. If you're using an ARM (aka M1, M2, etc.) processor use R compiled for arm64. If you're using an Intel based mac, use R compiled for x86. Using R and Arrow compiled for Intel based macs on an ARM based mac will result in segfaults and crashes. 
+- On macOS, the R you use with Arrow should match the architecture of the machine you are using. If you're using an ARM (aka M1, M2, etc.) processor use R compiled for arm64. If you're using an Intel based mac, use R compiled for x86. Using R and Arrow compiled for Intel based macs on an ARM based mac will result in segfaults and crashes.
+
+- On Linux the installation process can sometimes be more involved because CRAN does not host binaries for Linux. For more information please see the [installation guide](articles/install.html).
+
+- If you are compiling arrow from source, please note that as of version 10.0.0, arrow requires C++17 to build. This has implications on Windows and CentOS 7. For Windows users it means you need to be running an R version of 4.0 or later. On CentOS 7, it means you need to install a newer compiler than the default system compiler gcc. See the [installation details article](https://arrow.apache.org/docs/r/articles/developers/install_details.html) for guidance.
+
+- Development versions of arrow are released nightly. For information on how to installl nighhtly builds please see the [installing nightly builds](articles/install_nightly.html) article.
+
+## What can the arrow package do?
+
+The Arrow C++ library is comprised of different parts, each of which serves a specific purpose. The arrow package provides binding to the C++ functionality for a wide range of data analysis
+tasks.
+
+It allows users to read and write data in a variety formats:
 
-- On Linux the installation process can sometimes be more involved because 
-CRAN does not host binaries for Linux. For more information please see the [installation guide](https://arrow.apache.org/docs/r/articles/install.html).
+- Read and write Parquet files, an efficient and widely used columnar format
+- Read and write Arrow (formerly known as Feather) files, a format optimized for speed and
+  interoperability
+- Read and write CSV files with excellent speed and efficiency
+- Read and write multi-file and larger-than-memory datasets
+- Read JSON files
 
-- If you are compiling arrow from source, please note that as of version 
-10.0.0, arrow requires C++17 to build. This has implications on Windows and
-CentOS 7. For Windows users it means you need to be running an R version of 
-4.0 or later. On CentOS 7, it means you need to install a newer compiler 
-than the default system compiler gcc 4.8. See the [installation details article](https://arrow.apache.org/docs/r/articles/developers/install_details.html) for guidance. Note that 
-this does not affect users who are installing a binary version of the package.
+It provides access to remote filesystems and servers:
 
-- Development versions of arrow are released nightly. Most users will not 
-need to install nightly builds, but if you do please see the article on [installing nightly builds](https://arrow.apache.org/docs/r/articles/install_nightly.html) for more information.
+- Read and write files in Amazon S3 and Google Cloud Storage buckets
+- Connect to Arrow Flight servers to transport large datasets over networks
 
-## Arrow resources 
+Additional features include:
+
+- Manipulate and analyze Arrow data with dplyr verbs
+- Zero-copy data sharing between R and Python
+- Fine control over column types to work seamlessly with databases and data warehouses
+- Toolkit for building connectors to other applications and services that use Arrow
+
+## What is Apache Arrow?
+
+Apache Arrow is a cross-language development platform for in-memory and
+larger-than-memory data. It specifies a standardized language-independent
+columnar memory format for flat and hierarchical data, organized for efficient
+analytic operations on modern hardware. It also provides computational libraries
+and zero-copy streaming, messaging, and interprocess communication.
+
+This package exposes an interface to the Arrow C++ library, enabling access to
+many of its features in R. It provides low-level access to the Arrow C++ library
+API and higher-level access through a dplyr backend and familiar R functions.
 
-In addition to the official [Arrow R package documentation](https://arrow.apache.org/docs/r/), the [Arrow for R cheatsheet](https://github.com/apache/arrow/blob/-/r/cheatsheet/arrow-cheatsheet.pdf), and the [Apache Arrow R Cookbook](https://arrow.apache.org/cookbook/r/index.html) are useful resources for getting started with arrow.
+
+## Arrow resources
+
+There are a few additional resources that you may find useful for getting started with arrow:
+
+- The official [Arrow R package documentation](https://arrow.apache.org/docs/r/)
+- [Arrow for R cheatsheet](https://github.com/apache/arrow/blob/-/r/cheatsheet/arrow-cheatsheet.pdf)
+- [Apache Arrow R Cookbook](https://arrow.apache.org/cookbook/r/index.html)
+- R for Data Science [Chapter on Arrow](https://r4ds.hadley.nz/arrow)
+- [Awesome Arrow R](https://github.com/thisisnic/awesome-arrow-r)
 
 ## Getting help
 
+We welcome questions, discussion, and contributions from users of the
+arrow package. For information about mailing lists and other venues
+for engaging with the Arrow developer and user communities, please see
+the [Apache Arrow Community](https://arrow.apache.org/community/) page.
+
 If you encounter a bug, please file an issue with a minimal reproducible
 example on [GitHub issues](https://github.com/apache/arrow/issues).
 Log in to your GitHub account, click on **New issue** and select the type of
@@ -104,11 +105,8 @@ features** section of the [Contributing to Apache
 Arrow](https://arrow.apache.org/docs/developers/#contributing) page
 in the Arrow developer documentation.
 
-We welcome questions, discussion, and contributions from users of the
-arrow package. For information about mailing lists and other venues
-for engaging with the Arrow developer and user communities, please see
-the [Apache Arrow Community](https://arrow.apache.org/community/) page.
+## Code of Conduct
 
-Please note that all participation in the Apache Arrow project is 
+Please note that all participation in the Apache Arrow project is
 governed by the Apache Software Foundation's [code of
 conduct](https://www.apache.org/foundation/policies/conduct.html).
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index e9513b8c16b26..7fec4462c02da 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -57,10 +57,10 @@ home:
   sidebar:
     structure:
       - project
-      - implementations
       - links
       - license
       - community
+      - implementations
       - citation
       - authors
       - dev
@@ -85,6 +85,10 @@ home:
           [R](index.html) <br>
           [Ruby](https://github.com/apache/arrow/blob/main/ruby/README.md) <br>
           [Rust](https://docs.rs/crate/arrow/latest)
+      community:
+        title: Community
+        text: >
+          [Code of conduct](https://www.apache.org/foundation/policies/conduct.html)
 
 navbar:
   bg: black

From 5baca0f16e924c42741729f041b31a02883548b9 Mon Sep 17 00:00:00 2001
From: tobim <tobim@fastmail.fm>
Date: Fri, 15 Mar 2024 14:05:53 +0100
Subject: [PATCH 548/570] GH-40398: [C++] Expose protobuf dependency if
 opentelemetry or ORC are enabled (#40399)

### Rationale for this change

When not bundling dependencies, `libarrow.a` depends on protobuf symbols if built with ORC or opentelemetry support. However this dependency was not looked up in ArrowConfig.cmake, causing client projects using linking to `arrow_static` to fail in configuration.

### What changes are included in this PR?

This change makes it so that the dependency on `protobuf` is propagated to the installed `ArrowConfig.cmake` file when necessary.
* GitHub Issue: #40398

Authored-by: Tobias Mayer <tobim@fastmail.fm>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 4de7741d340f9..b8e765f08587a 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1812,12 +1812,17 @@ if(ARROW_WITH_PROTOBUF)
   else()
     set(ARROW_PROTOBUF_REQUIRED_VERSION "2.6.1")
   endif()
-  if(ARROW_FLIGHT)
+  if(ARROW_ORC OR ARROW_WITH_OPENTELEMETRY)
+    set(ARROW_PROTOBUF_ARROW_CMAKE_PACKAGE_NAME "Arrow")
+    set(ARROW_PROTOBUF_ARROW_PC_PACKAGE_NAME "arrow")
+  elseif(ARROW_FLIGHT)
     set(ARROW_PROTOBUF_ARROW_CMAKE_PACKAGE_NAME "ArrowFlight")
     set(ARROW_PROTOBUF_ARROW_PC_PACKAGE_NAME "arrow-flight")
   else()
-    set(ARROW_PROTOBUF_ARROW_CMAKE_PACKAGE_NAME "Arrow")
-    set(ARROW_PROTOBUF_ARROW_PC_PACKAGE_NAME "arrow")
+    message(FATAL_ERROR "ARROW_WITH_PROTOBUF must be propagated in the build tooling installation."
+                        " Please extend the mappings of ARROW_PROTOBUF_ARROW_CMAKE_PACKAGE_NAME and"
+                        " ARROW_PROTOBUF_ARROW_PC_PACKAGE_NAME for newly introduced dependencies on"
+                        " protobuf.")
   endif()
   # We need to use FORCE_ANY_NEWER_VERSION here to accept Protobuf
   # newer version such as 23.4. If we don't use it, 23.4 is processed

From 0402e306a9d9f57ff22c87bf8689b8e7203483e5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 15 Mar 2024 15:17:14 +0100
Subject: [PATCH 549/570] GH-40291: [Python] Accept dict in
 pyarrow.record_batch() function (#40292)

### Rationale for this change

`pa.table(dict)` works, but `pa.record_batch(dict)` is not supported. Let's make this consistent.

Also harmonized the documentation for the `data` argument for both functions.

* GitHub Issue: #40291

Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/table.pxi | 49 +++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index dfd549befc2fe..9f60150427195 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -5109,10 +5109,10 @@ def record_batch(data, names=None, schema=None, metadata=None):
 
     Parameters
     ----------
-    data : pandas.DataFrame, list, Arrow-compatible table
-        A DataFrame, list of arrays or chunked arrays, or a tabular object
-        implementing the Arrow PyCapsule Protocol (has an
-        ``__arrow_c_array__`` method).
+    data : dict, list, pandas.DataFrame, Arrow-compatible table
+        A mapping of strings to Arrays or Python lists, a list of Arrays,
+        a pandas DataFame, or any tabular object implementing the
+        Arrow PyCapsule Protocol (has an ``__arrow_c_array__`` method).
     names : list, default None
         Column names if list of arrays passed as data. Mutually exclusive with
         'schema' argument.
@@ -5137,16 +5137,16 @@ def record_batch(data, names=None, schema=None, metadata=None):
     >>> animals = pa.array(["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"])
     >>> names = ["n_legs", "animals"]
 
-    Creating a RecordBatch from a list of arrays with names:
+    Construct a RecordBatch from a python dictionary:
 
-    >>> pa.record_batch([n_legs, animals], names=names)
+    >>> pa.record_batch({"n_legs": n_legs, "animals": animals})
     pyarrow.RecordBatch
     n_legs: int64
     animals: string
     ----
     n_legs: [2,2,4,4,5,100]
     animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
-    >>> pa.record_batch([n_legs, animals], names=["n_legs", "animals"]).to_pandas()
+    >>> pa.record_batch({"n_legs": n_legs, "animals": animals}).to_pandas()
        n_legs        animals
     0       2       Flamingo
     1       2         Parrot
@@ -5155,6 +5155,16 @@ def record_batch(data, names=None, schema=None, metadata=None):
     4       5  Brittle stars
     5     100      Centipede
 
+    Creating a RecordBatch from a list of arrays with names:
+
+    >>> pa.record_batch([n_legs, animals], names=names)
+    pyarrow.RecordBatch
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [2,2,4,4,5,100]
+    animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
+
     Creating a RecordBatch from a list of arrays with names and metadata:
 
     >>> my_metadata={"n_legs": "How many legs does an animal have?"}
@@ -5231,6 +5241,11 @@ def record_batch(data, names=None, schema=None, metadata=None):
     if isinstance(data, (list, tuple)):
         return RecordBatch.from_arrays(data, names=names, schema=schema,
                                        metadata=metadata)
+    elif isinstance(data, dict):
+        if names is not None:
+            raise ValueError(
+                "The 'names' argument is not valid when passing a dictionary")
+        return RecordBatch.from_pydict(data, schema=schema, metadata=metadata)
     elif hasattr(data, "__arrow_c_array__"):
         if schema is not None:
             requested_schema = schema.__arrow_c_schema__()
@@ -5241,7 +5256,7 @@ def record_batch(data, names=None, schema=None, metadata=None):
         if schema is not None and batch.schema != schema:
             # __arrow_c_array__ coerces schema with best effort, so we might
             # need to cast it if the producer wasn't able to cast to exact schema.
-            batch = Table.from_batches([batch]).cast(schema).to_batches()[0]
+            batch = batch.cast(schema)
         return batch
     elif _pandas_api.is_data_frame(data):
         return RecordBatch.from_pandas(data, schema=schema)
@@ -5255,9 +5270,11 @@ def table(data, names=None, schema=None, metadata=None, nthreads=None):
 
     Parameters
     ----------
-    data : pandas.DataFrame, dict, list
-        A DataFrame, mapping of strings to Arrays or Python lists, or list of
-        arrays or chunked arrays.
+    data : dict, list, pandas.DataFrame, Arrow-compatible table
+        A mapping of strings to Arrays or Python lists, a list of arrays or
+        chunked arrays, a pandas DataFame, or any tabular object implementing
+        the Arrow PyCapsule Protocol (has an ``__arrow_c_array__`` or
+        ``__arrow_c_stream__`` method).
     names : list, default None
         Column names if list of arrays passed as data. Mutually exclusive with
         'schema' argument.
@@ -5290,6 +5307,16 @@ def table(data, names=None, schema=None, metadata=None, nthreads=None):
     >>> animals = pa.array(["Flamingo", "Horse", "Brittle stars", "Centipede"])
     >>> names = ["n_legs", "animals"]
 
+    Construct a Table from a python dictionary:
+
+    >>> pa.table({"n_legs": n_legs, "animals": animals})
+    pyarrow.Table
+    n_legs: int64
+    animals: string
+    ----
+    n_legs: [[2,4,5,100]]
+    animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+
     Construct a Table from arrays:
 
     >>> pa.table([n_legs, animals], names=names)

From 7f361fd80651010f3dc91ec6302f661a16892291 Mon Sep 17 00:00:00 2001
From: Judah Rand <17158624+judahrand@users.noreply.github.com>
Date: Fri, 15 Mar 2024 15:15:38 +0000
Subject: [PATCH 550/570] GH-30915: [C++][Python] Add missing methods to
 `RecordBatch` (#39506)

### Rationale for this change

These methods are present on `Table` but missing on `RecordBatch`:

* `add_column`
* `append_column`
* `remove_column`
* `set_column`
* `drop_columns`
* `rename_columns`
* `cast`

We also should probably accept a `dict` as input to `pa.record_batch` like we do for `pa.table`.

### What changes are included in this PR?

Add the methods.

### Are these changes tested?

Yes.

* Parent issue: https://github.com/apache/arrow/issues/36399
* Related: #30559
* Closes #30915
* GitHub Issue: #30915

Lead-authored-by: Judah Rand <17158624+judahrand@users.noreply.github.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 cpp/src/arrow/record_batch.cc        |  29 ++
 cpp/src/arrow/record_batch.h         |   7 +
 cpp/src/arrow/record_batch_test.cc   |  26 ++
 python/pyarrow/includes/libarrow.pxd |   7 +
 python/pyarrow/table.pxi             | 474 +++++++++++++++++++++------
 python/pyarrow/tests/test_table.py   | 202 +++++++-----
 6 files changed, 561 insertions(+), 184 deletions(-)

diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index d23b2b584bc20..d52ebe053b098 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -395,6 +395,35 @@ Result<std::shared_ptr<RecordBatch>> RecordBatch::ReplaceSchema(
   return RecordBatch::Make(std::move(schema), num_rows(), columns());
 }
 
+std::vector<std::string> RecordBatch::ColumnNames() const {
+  std::vector<std::string> names(num_columns());
+  for (int i = 0; i < num_columns(); ++i) {
+    names[i] = schema()->field(i)->name();
+  }
+  return names;
+}
+
+Result<std::shared_ptr<RecordBatch>> RecordBatch::RenameColumns(
+    const std::vector<std::string>& names) const {
+  int n = num_columns();
+
+  if (static_cast<int>(names.size()) != n) {
+    return Status::Invalid("tried to rename a record batch of ", n, " columns but only ",
+                           names.size(), " names were provided");
+  }
+
+  ArrayVector columns(n);
+  FieldVector fields(n);
+
+  for (int i = 0; i < n; ++i) {
+    columns[i] = column(i);
+    fields[i] = schema()->field(i)->WithName(names[i]);
+  }
+
+  return RecordBatch::Make(::arrow::schema(std::move(fields)), num_rows(),
+                           std::move(columns));
+}
+
 Result<std::shared_ptr<RecordBatch>> RecordBatch::SelectColumns(
     const std::vector<int>& indices) const {
   int n = static_cast<int>(indices.size());
diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h
index 8a2c1ba6d7497..16d721caad443 100644
--- a/cpp/src/arrow/record_batch.h
+++ b/cpp/src/arrow/record_batch.h
@@ -227,6 +227,13 @@ class ARROW_EXPORT RecordBatch {
   /// \return PrettyPrint representation suitable for debugging
   std::string ToString() const;
 
+  /// \brief Return names of all columns
+  std::vector<std::string> ColumnNames() const;
+
+  /// \brief Rename columns with provided names
+  Result<std::shared_ptr<RecordBatch>> RenameColumns(
+      const std::vector<std::string>& names) const;
+
   /// \brief Return new record batch with specified columns
   Result<std::shared_ptr<RecordBatch>> SelectColumns(
       const std::vector<int>& indices) const;
diff --git a/cpp/src/arrow/record_batch_test.cc b/cpp/src/arrow/record_batch_test.cc
index db68a9a93790d..45cf7cae654ad 100644
--- a/cpp/src/arrow/record_batch_test.cc
+++ b/cpp/src/arrow/record_batch_test.cc
@@ -315,6 +315,32 @@ TEST_F(TestRecordBatch, RemoveColumn) {
   AssertBatchesEqual(*new_batch, *batch4);
 }
 
+TEST_F(TestRecordBatch, RenameColumns) {
+  const int length = 10;
+
+  auto field1 = field("f1", int32());
+  auto field2 = field("f2", uint8());
+  auto field3 = field("f3", int16());
+
+  auto schema1 = ::arrow::schema({field1, field2, field3});
+
+  random::RandomArrayGenerator gen(42);
+
+  auto array1 = gen.ArrayOf(int32(), length);
+  auto array2 = gen.ArrayOf(uint8(), length);
+  auto array3 = gen.ArrayOf(int16(), length);
+
+  auto batch = RecordBatch::Make(schema1, length, {array1, array2, array3});
+  EXPECT_THAT(batch->ColumnNames(), testing::ElementsAre("f1", "f2", "f3"));
+
+  ASSERT_OK_AND_ASSIGN(auto renamed, batch->RenameColumns({"zero", "one", "two"}));
+  EXPECT_THAT(renamed->ColumnNames(), testing::ElementsAre("zero", "one", "two"));
+  EXPECT_THAT(renamed->columns(), testing::ElementsAre(array1, array2, array3));
+  ASSERT_OK(renamed->ValidateFull());
+
+  ASSERT_RAISES(Invalid, batch->RenameColumns({"hello", "world"}));
+}
+
 TEST_F(TestRecordBatch, SelectColumns) {
   const int length = 10;
 
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index e44c699fc5574..9e5e3d3fa683b 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -961,8 +961,15 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CArray] column(int i)
         const c_string& column_name(int i)
 
+        CResult[shared_ptr[CRecordBatch]] AddColumn(
+            int i, shared_ptr[CField] field, shared_ptr[CArray] column)
+        CResult[shared_ptr[CRecordBatch]] RemoveColumn(int i)
+        CResult[shared_ptr[CRecordBatch]] SetColumn(
+            int i, shared_ptr[CField] field, shared_ptr[CArray] column)
+
         const vector[shared_ptr[CArray]]& columns()
 
+        CResult[shared_ptr[CRecordBatch]] RenameColumns(const vector[c_string]&)
         CResult[shared_ptr[CRecordBatch]] SelectColumns(const vector[int]&)
 
         int num_columns()
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 9f60150427195..2ec19db1d1019 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -2150,6 +2150,117 @@ cdef class _Tabular(_PandasConvertible):
                 pieces.append('...')
         return '\n'.join(pieces)
 
+    def remove_column(self, int i):
+        # implemented in RecordBatch/Table subclasses
+        raise NotImplementedError
+
+    def drop_columns(self, columns):
+        """
+        Drop one or more columns and return a new Table or RecordBatch.
+
+        Parameters
+        ----------
+        columns : str or list[str]
+            Field name(s) referencing existing column(s).
+
+        Raises
+        ------
+        KeyError
+            If any of the passed column names do not exist.
+
+        Returns
+        -------
+        Table or RecordBatch
+            A tabular object without the column(s).
+
+        Examples
+        --------
+        Table (works similarly for RecordBatch)
+
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
+        ...                    'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
+        >>> table = pa.Table.from_pandas(df)
+
+        Drop one column:
+
+        >>> table.drop_columns("animals")
+        pyarrow.Table
+        n_legs: int64
+        ----
+        n_legs: [[2,4,5,100]]
+
+        Drop one or more columns:
+
+        >>> table.drop_columns(["n_legs", "animals"])
+        pyarrow.Table
+        ...
+        ----
+        """
+        if isinstance(columns, str):
+            columns = [columns]
+
+        indices = []
+        for col in columns:
+            idx = self.schema.get_field_index(col)
+            if idx == -1:
+                raise KeyError("Column {!r} not found".format(col))
+            indices.append(idx)
+
+        indices.sort()
+        indices.reverse()
+
+        res = self
+        for idx in indices:
+            res = res.remove_column(idx)
+
+        return res
+
+    def add_column(self, int i, field_, column):
+        # implemented in RecordBatch/Table subclasses
+        raise NotImplementedError
+
+    def append_column(self, field_, column):
+        """
+        Append column at end of columns.
+
+        Parameters
+        ----------
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array or value coercible to array
+            Column data.
+
+        Returns
+        -------
+        Table or RecordBatch
+            New table or record batch with the passed column added.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
+        ...                    'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
+        >>> table = pa.Table.from_pandas(df)
+
+        Append column at the end:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> table.append_column('year', [year])
+        pyarrow.Table
+        n_legs: int64
+        animals: string
+        year: int64
+        ----
+        n_legs: [[2,4,5,100]]
+        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
+        year: [[2021,2022,2019,2021]]
+        """
+        return self.add_column(self.num_columns, field_, column)
+
 
 cdef class RecordBatch(_Tabular):
     """
@@ -2483,6 +2594,214 @@ cdef class RecordBatch(_Tabular):
     def __sizeof__(self):
         return super(RecordBatch, self).__sizeof__() + self.nbytes
 
+    def add_column(self, int i, field_, column):
+        """
+        Add column to RecordBatch at position i.
+
+        A new record batch is returned with the column added, the original record batch
+        object is left unchanged.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array or value coercible to array
+            Column data.
+
+        Returns
+        -------
+        RecordBatch
+            New record batch with the passed column added.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
+        ...                    'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
+        >>> batch = pa.RecordBatch.from_pandas(df)
+
+        Add column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> batch.add_column(0,"year", year)
+        pyarrow.RecordBatch
+        year: int64
+        n_legs: int64
+        animals: string
+        ----
+        year: [2021,2022,2019,2021]
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+
+        Original record batch is left unchanged:
+
+        >>> batch
+        pyarrow.RecordBatch
+        n_legs: int64
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
+        """
+        cdef:
+            shared_ptr[CRecordBatch] c_batch
+            Field c_field
+            Array c_arr
+
+        if isinstance(column, Array):
+            c_arr = column
+        else:
+            c_arr = array(column)
+
+        if isinstance(field_, Field):
+            c_field = field_
+        else:
+            c_field = field(field_, c_arr.type)
+
+        with nogil:
+            c_batch = GetResultValue(self.batch.AddColumn(
+                i, c_field.sp_field, c_arr.sp_array))
+
+        return pyarrow_wrap_batch(c_batch)
+
+    def remove_column(self, int i):
+        """
+        Create new RecordBatch with the indicated column removed.
+
+        Parameters
+        ----------
+        i : int
+            Index of column to remove.
+
+        Returns
+        -------
+        Table
+            New record batch without the column.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
+        ...                    'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
+        >>> batch = pa.RecordBatch.from_pandas(df)
+        >>> batch.remove_column(1)
+        pyarrow.RecordBatch
+        n_legs: int64
+        ----
+        n_legs: [2,4,5,100]
+        """
+        cdef shared_ptr[CRecordBatch] c_batch
+
+        with nogil:
+            c_batch = GetResultValue(self.batch.RemoveColumn(i))
+
+        return pyarrow_wrap_batch(c_batch)
+
+    def set_column(self, int i, field_, column):
+        """
+        Replace column in RecordBatch at position.
+
+        Parameters
+        ----------
+        i : int
+            Index to place the column at.
+        field_ : str or Field
+            If a string is passed then the type is deduced from the column
+            data.
+        column : Array or value coercible to array
+            Column data.
+
+        Returns
+        -------
+        RecordBatch
+            New record batch with the passed column set.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
+        ...                    'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
+        >>> batch = pa.RecordBatch.from_pandas(df)
+
+        Replace a column:
+
+        >>> year = [2021, 2022, 2019, 2021]
+        >>> batch.set_column(1,'year', year)
+        pyarrow.RecordBatch
+        n_legs: int64
+        year: int64
+        ----
+        n_legs: [2,4,5,100]
+        year: [2021,2022,2019,2021]
+        """
+        cdef:
+            shared_ptr[CRecordBatch] c_batch
+            Field c_field
+            Array c_arr
+
+        if isinstance(column, Array):
+            c_arr = column
+        else:
+            c_arr = array(column)
+
+        if isinstance(field_, Field):
+            c_field = field_
+        else:
+            c_field = field(field_, c_arr.type)
+
+        with nogil:
+            c_batch = GetResultValue(self.batch.SetColumn(
+                i, c_field.sp_field, c_arr.sp_array))
+
+        return pyarrow_wrap_batch(c_batch)
+
+    def rename_columns(self, names):
+        """
+        Create new record batch with columns renamed to provided names.
+
+        Parameters
+        ----------
+        names : list of str
+            List of new column names.
+
+        Returns
+        -------
+        RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
+        ...                    'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
+        >>> batch = pa.RecordBatch.from_pandas(df)
+        >>> new_names = ["n", "name"]
+        >>> batch.rename_columns(new_names)
+        pyarrow.RecordBatch
+        n: int64
+        name: string
+        ----
+        n: [2,4,5,100]
+        name: ["Flamingo","Horse","Brittle stars","Centipede"]
+        """
+        cdef:
+            shared_ptr[CRecordBatch] c_batch
+            vector[c_string] c_names
+
+        for name in names:
+            c_names.push_back(tobytes(name))
+
+        with nogil:
+            c_batch = GetResultValue(self.batch.RenameColumns(move(c_names)))
+
+        return pyarrow_wrap_batch(c_batch)
+
     def serialize(self, memory_pool=None):
         """
         Write RecordBatch to Buffer as encapsulated IPC message, which does not
@@ -2744,7 +3063,7 @@ cdef class RecordBatch(_Tabular):
 
     def cast(self, Schema target_schema, safe=None, options=None):
         """
-        Cast batch values to another schema.
+        Cast record batch values to another schema.
 
         Parameters
         ----------
@@ -2758,12 +3077,52 @@ cdef class RecordBatch(_Tabular):
         Returns
         -------
         RecordBatch
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
+        ...                    'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
+        >>> batch = pa.RecordBatch.from_pandas(df)
+        >>> batch.schema
+        n_legs: int64
+        animals: string
+        -- schema metadata --
+        pandas: '{"index_columns": [{"kind": "range", "name": null, "start": 0, ...
+
+        Define new schema and cast batch values:
+
+        >>> my_schema = pa.schema([
+        ...     pa.field('n_legs', pa.duration('s')),
+        ...     pa.field('animals', pa.string())]
+        ...     )
+        >>> batch.cast(target_schema=my_schema)
+        pyarrow.RecordBatch
+        n_legs: duration[s]
+        animals: string
+        ----
+        n_legs: [2,4,5,100]
+        animals: ["Flamingo","Horse","Brittle stars","Centipede"]
         """
-        # Wrap the more general Table cast implementation
-        tbl = Table.from_batches([self])
-        casted_tbl = tbl.cast(target_schema, safe=safe, options=options)
-        casted_batch, = casted_tbl.to_batches()
-        return casted_batch
+        cdef:
+            Array column, casted
+            Field field
+            list newcols = []
+
+        if self.schema.names != target_schema.names:
+            raise ValueError("Target schema's field names are not matching "
+                             "the record batch's field names: {!r}, {!r}"
+                             .format(self.schema.names, target_schema.names))
+
+        for column, field in zip(self.itercolumns(), target_schema):
+            if not field.nullable and column.null_count > 0:
+                raise ValueError("Casting field {!r} with null values to non-nullable"
+                                 .format(field.name))
+            casted = column.cast(field.type, safe=safe, options=options)
+            newcols.append(casted)
+
+        return RecordBatch.from_arrays(newcols, schema=target_schema)
 
     def _to_pandas(self, options, **kwargs):
         return Table.from_batches([self])._to_pandas(options, **kwargs)
@@ -4664,46 +5023,6 @@ cdef class Table(_Tabular):
 
         return pyarrow_wrap_table(c_table)
 
-    def append_column(self, field_, column):
-        """
-        Append column at end of columns.
-
-        Parameters
-        ----------
-        field_ : str or Field
-            If a string is passed then the type is deduced from the column
-            data.
-        column : Array, list of Array, or values coercible to arrays
-            Column data.
-
-        Returns
-        -------
-        Table
-            New table with the passed column added.
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
-        ...                    'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
-        >>> table = pa.Table.from_pandas(df)
-
-        Append column at the end:
-
-        >>> year = [2021, 2022, 2019, 2021]
-        >>> table.append_column('year', [year])
-        pyarrow.Table
-        n_legs: int64
-        animals: string
-        year: int64
-        ----
-        n_legs: [[2,4,5,100]]
-        animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
-        year: [[2021,2022,2019,2021]]
-        """
-        return self.add_column(self.num_columns, field_, column)
-
     def remove_column(self, int i):
         """
         Create new Table with the indicated column removed.
@@ -4838,67 +5157,6 @@ cdef class Table(_Tabular):
 
         return pyarrow_wrap_table(c_table)
 
-    def drop_columns(self, columns):
-        """
-        Drop one or more columns and return a new table.
-
-        Parameters
-        ----------
-        columns : str or list[str]
-            Field name(s) referencing existing column(s).
-
-        Raises
-        ------
-        KeyError
-            If any of the passed column names do not exist.
-
-        Returns
-        -------
-        Table
-            New table without the column(s).
-
-        Examples
-        --------
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame({'n_legs': [2, 4, 5, 100],
-        ...                    'animals': ["Flamingo", "Horse", "Brittle stars", "Centipede"]})
-        >>> table = pa.Table.from_pandas(df)
-
-        Drop one column:
-
-        >>> table.drop_columns("animals")
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[2,4,5,100]]
-
-        Drop one or more columns:
-
-        >>> table.drop_columns(["n_legs", "animals"])
-        pyarrow.Table
-        ...
-        ----
-        """
-        if isinstance(columns, str):
-            columns = [columns]
-
-        indices = []
-        for col in columns:
-            idx = self.schema.get_field_index(col)
-            if idx == -1:
-                raise KeyError("Column {!r} not found".format(col))
-            indices.append(idx)
-
-        indices.sort()
-        indices.reverse()
-
-        table = self
-        for idx in indices:
-            table = table.remove_column(idx)
-
-        return table
-
     def drop(self, columns):
         """
         Drop one or more columns and return a new table.
@@ -5258,8 +5516,10 @@ def record_batch(data, names=None, schema=None, metadata=None):
             # need to cast it if the producer wasn't able to cast to exact schema.
             batch = batch.cast(schema)
         return batch
+
     elif _pandas_api.is_data_frame(data):
         return RecordBatch.from_pandas(data, schema=schema)
+
     else:
         raise TypeError("Expected pandas DataFrame or list of arrays")
 
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 87b17c35011c4..374a29e956e3f 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -505,12 +505,14 @@ def test_recordbatch_basics():
     batch.nbytes == (5 * 2) + (5 * 4 + 1)
     assert sys.getsizeof(batch) >= object.__sizeof__(
         batch) + batch.get_total_buffer_size()
+
     pydict = batch.to_pydict()
     assert pydict == OrderedDict([
         ('c0', [0, 1, 2, 3, 4]),
         ('c1', [-10, -5, 0, None, 10])
     ])
     assert isinstance(pydict, dict)
+    assert batch == pa.record_batch(pydict, schema=batch.schema)
 
     with pytest.raises(IndexError):
         # bounds checking
@@ -1259,12 +1261,14 @@ def test_table_basics():
     assert table.nbytes == 2 * (5 * 8)
     assert sys.getsizeof(table) >= object.__sizeof__(
         table) + table.get_total_buffer_size()
+
     pydict = table.to_pydict()
     assert pydict == OrderedDict([
         ('a', [0, 1, 2, 3, 4]),
         ('b', [-10, -5, 0, 5, 10])
     ])
     assert isinstance(pydict, dict)
+    assert table == pa.table(pydict, schema=table.schema)
 
     columns = []
     for col in table.itercolumns():
@@ -1405,60 +1409,81 @@ def test_table_column_with_duplicates():
         table.column('a')
 
 
-def test_table_add_column():
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_add_column(cls):
     data = [
         pa.array(range(5)),
         pa.array([-10, -5, 0, 5, 10]),
         pa.array(range(5, 10))
     ]
-    table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))
+    table = cls.from_arrays(data, names=('a', 'b', 'c'))
 
     new_field = pa.field('d', data[1].type)
     t2 = table.add_column(3, new_field, data[1])
     t3 = table.append_column(new_field, data[1])
 
-    expected = pa.Table.from_arrays(data + [data[1]],
-                                    names=('a', 'b', 'c', 'd'))
+    expected = cls.from_arrays(data + [data[1]],
+                               names=('a', 'b', 'c', 'd'))
     assert t2.equals(expected)
     assert t3.equals(expected)
 
     t4 = table.add_column(0, new_field, data[1])
-    expected = pa.Table.from_arrays([data[1]] + data,
-                                    names=('d', 'a', 'b', 'c'))
+    expected = cls.from_arrays([data[1]] + data,
+                               names=('d', 'a', 'b', 'c'))
     assert t4.equals(expected)
 
 
-def test_table_set_column():
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_set_column(cls):
     data = [
         pa.array(range(5)),
         pa.array([-10, -5, 0, 5, 10]),
         pa.array(range(5, 10))
     ]
-    table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))
+    table = cls.from_arrays(data, names=('a', 'b', 'c'))
 
     new_field = pa.field('d', data[1].type)
     t2 = table.set_column(0, new_field, data[1])
 
     expected_data = list(data)
     expected_data[0] = data[1]
-    expected = pa.Table.from_arrays(expected_data,
-                                    names=('d', 'b', 'c'))
+    expected = cls.from_arrays(expected_data,
+                               names=('d', 'b', 'c'))
     assert t2.equals(expected)
 
 
-def test_table_drop_columns():
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_drop_columns(cls):
     """ drop one or more columns given labels"""
     a = pa.array(range(5))
     b = pa.array([-10, -5, 0, 5, 10])
     c = pa.array(range(5, 10))
 
-    table = pa.Table.from_arrays([a, b, c], names=('a', 'b', 'c'))
+    table = cls.from_arrays([a, b, c], names=('a', 'b', 'c'))
     t2 = table.drop_columns(['a', 'b'])
     t3 = table.drop_columns('a')
 
-    exp_t2 = pa.Table.from_arrays([c], names=('c',))
+    exp_t2 = cls.from_arrays([c], names=('c',))
     assert exp_t2.equals(t2)
-    exp_t3 = pa.Table.from_arrays([b, c], names=('b', 'c',))
+    exp_t3 = cls.from_arrays([b, c], names=('b', 'c',))
     assert exp_t3.equals(t3)
 
     # -- raise KeyError if column not in Table
@@ -1486,26 +1511,40 @@ def test_table_drop():
         table.drop(['d'])
 
 
-def test_table_remove_column():
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_remove_column(cls):
     data = [
         pa.array(range(5)),
         pa.array([-10, -5, 0, 5, 10]),
         pa.array(range(5, 10))
     ]
-    table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))
+    table = cls.from_arrays(data, names=('a', 'b', 'c'))
 
     t2 = table.remove_column(0)
     t2.validate()
-    expected = pa.Table.from_arrays(data[1:], names=('b', 'c'))
+    expected = cls.from_arrays(data[1:], names=('b', 'c'))
     assert t2.equals(expected)
 
 
-def test_table_remove_column_empty():
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_remove_column_empty(cls):
     # ARROW-1865
     data = [
         pa.array(range(5)),
     ]
-    table = pa.Table.from_arrays(data, names=['a'])
+    table = cls.from_arrays(data, names=['a'])
 
     t2 = table.remove_column(0)
     t2.validate()
@@ -1533,20 +1572,27 @@ def test_empty_table():
     assert table.equals(pa.Table.from_arrays([], []))
 
 
-def test_table_rename_columns():
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_rename_columns(cls):
     data = [
         pa.array(range(5)),
         pa.array([-10, -5, 0, 5, 10]),
         pa.array(range(5, 10))
     ]
-    table = pa.Table.from_arrays(data, names=['a', 'b', 'c'])
+    table = cls.from_arrays(data, names=['a', 'b', 'c'])
     assert table.column_names == ['a', 'b', 'c']
 
     t2 = table.rename_columns(['eh', 'bee', 'sea'])
     t2.validate()
     assert t2.column_names == ['eh', 'bee', 'sea']
 
-    expected = pa.Table.from_arrays(data, names=['eh', 'bee', 'sea'])
+    expected = cls.from_arrays(data, names=['eh', 'bee', 'sea'])
     assert t2.equals(expected)
 
 
@@ -1769,12 +1815,19 @@ def test_table_negative_indexing():
         table[4]
 
 
-def test_table_cast_to_incompatible_schema():
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_cast_to_incompatible_schema(cls):
     data = [
         pa.array(range(5)),
         pa.array([-10, -5, 0, 5, 10]),
     ]
-    table = pa.Table.from_arrays(data, names=tuple('ab'))
+    table = cls.from_arrays(data, names=tuple('ab'))
 
     target_schema1 = pa.schema([
         pa.field('A', pa.int32()),
@@ -1783,22 +1836,35 @@ def test_table_cast_to_incompatible_schema():
     target_schema2 = pa.schema([
         pa.field('a', pa.int32()),
     ])
-    message = ("Target schema's field names are not matching the table's "
-               "field names:.*")
+
+    if cls is pa.Table:
+        cls_name = 'table'
+    else:
+        cls_name = 'record batch'
+    message = ("Target schema's field names are not matching the "
+               f"{cls_name}'s field names:.*")
+
     with pytest.raises(ValueError, match=message):
         table.cast(target_schema1)
     with pytest.raises(ValueError, match=message):
         table.cast(target_schema2)
 
 
-def test_table_safe_casting():
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_safe_casting(cls):
     data = [
         pa.array(range(5), type=pa.int64()),
         pa.array([-10, -5, 0, 5, 10], type=pa.int32()),
         pa.array([1.0, 2.0, 3.0, 4.0, 5.0], type=pa.float64()),
         pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
     ]
-    table = pa.Table.from_arrays(data, names=tuple('abcd'))
+    table = cls.from_arrays(data, names=tuple('abcd'))
 
     expected_data = [
         pa.array(range(5), type=pa.int32()),
@@ -1806,7 +1872,7 @@ def test_table_safe_casting():
         pa.array([1, 2, 3, 4, 5], type=pa.int64()),
         pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
     ]
-    expected_table = pa.Table.from_arrays(expected_data, names=tuple('abcd'))
+    expected_table = cls.from_arrays(expected_data, names=tuple('abcd'))
 
     target_schema = pa.schema([
         pa.field('a', pa.int32()),
@@ -1819,14 +1885,21 @@ def test_table_safe_casting():
     assert casted_table.equals(expected_table)
 
 
-def test_table_unsafe_casting():
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_unsafe_casting(cls):
     data = [
         pa.array(range(5), type=pa.int64()),
         pa.array([-10, -5, 0, 5, 10], type=pa.int32()),
         pa.array([1.1, 2.2, 3.3, 4.4, 5.5], type=pa.float64()),
         pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
     ]
-    table = pa.Table.from_arrays(data, names=tuple('abcd'))
+    table = cls.from_arrays(data, names=tuple('abcd'))
 
     expected_data = [
         pa.array(range(5), type=pa.int32()),
@@ -1834,7 +1907,7 @@ def test_table_unsafe_casting():
         pa.array([1, 2, 3, 4, 5], type=pa.int64()),
         pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
     ]
-    expected_table = pa.Table.from_arrays(expected_data, names=tuple('abcd'))
+    expected_table = cls.from_arrays(expected_data, names=tuple('abcd'))
 
     target_schema = pa.schema([
         pa.field('a', pa.int32()),
@@ -2707,20 +2780,34 @@ def test_table_join_many_columns():
     })
 
 
-def test_table_cast_invalid():
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_cast_invalid(cls):
     # Casting a nullable field to non-nullable should be invalid!
-    table = pa.table({'a': [None, 1], 'b': [None, True]})
+    table = cls.from_pydict({'a': [None, 1], 'b': [None, True]})
     new_schema = pa.schema([pa.field("a", "int64", nullable=True),
                             pa.field("b", "bool", nullable=False)])
     with pytest.raises(ValueError):
         table.cast(new_schema)
 
-    table = pa.table({'a': [None, 1], 'b': [False, True]})
+    table = cls.from_pydict({'a': [None, 1], 'b': [False, True]})
     assert table.cast(new_schema).schema == new_schema
 
 
-def test_table_sort_by():
-    table = pa.table([
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_sort_by(cls):
+    table = cls.from_arrays([
         pa.array([3, 1, 4, 2, 5]),
         pa.array(["b", "a", "b", "a", "c"]),
     ], names=["values", "keys"])
@@ -2735,7 +2822,7 @@ def test_table_sort_by():
         "values": [5, 4, 3, 2, 1]
     }
 
-    tab = pa.Table.from_arrays([
+    tab = cls.from_arrays([
         pa.array([5, 7, 7, 35], type=pa.int64()),
         pa.array(["foo", "car", "bar", "foobar"])
     ], names=["a", "b"])
@@ -2751,45 +2838,6 @@ def test_table_sort_by():
     assert sorted_tab_dict["b"] == ["foo", "car", "bar", "foobar"]
 
 
-def test_record_batch_sort():
-    rb = pa.RecordBatch.from_arrays([
-        pa.array([7, 35, 7, 5], type=pa.int64()),
-        pa.array([4, 1, 3, 2], type=pa.int64()),
-        pa.array(["foo", "car", "bar", "foobar"])
-    ], names=["a", "b", "c"])
-
-    sorted_rb = rb.sort_by([("a", "descending"), ("b", "descending")])
-    sorted_rb_dict = sorted_rb.to_pydict()
-    assert sorted_rb_dict["a"] == [35, 7, 7, 5]
-    assert sorted_rb_dict["b"] == [1, 4, 3, 2]
-    assert sorted_rb_dict["c"] == ["car", "foo", "bar", "foobar"]
-
-    sorted_rb = rb.sort_by([("a", "ascending"), ("b", "ascending")])
-    sorted_rb_dict = sorted_rb.to_pydict()
-    assert sorted_rb_dict["a"] == [5, 7, 7, 35]
-    assert sorted_rb_dict["b"] == [2, 3, 4, 1]
-    assert sorted_rb_dict["c"] == ["foobar", "bar", "foo", "car"]
-
-
-def test_record_batch_cast():
-    rb = pa.RecordBatch.from_arrays([
-        pa.array([None, 1]),
-        pa.array([False, True])
-    ], names=["a", "b"])
-    new_schema = pa.schema([pa.field("a", "int64", nullable=True),
-                            pa.field("b", "bool", nullable=False)])
-
-    assert rb.cast(new_schema).schema == new_schema
-
-    # Casting a nullable field to non-nullable is invalid
-    rb = pa.RecordBatch.from_arrays([
-        pa.array([None, 1]),
-        pa.array([None, True])
-    ], names=["a", "b"])
-    with pytest.raises(ValueError):
-        rb.cast(new_schema)
-
-
 @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch])
 def test_numpy_asarray(constructor):
     table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"])

From 03c771a6269663eca376ca4e95f86aaaebc9f10b Mon Sep 17 00:00:00 2001
From: Chun Yang <Chuck.Yang@gmail.com>
Date: Fri, 15 Mar 2024 08:33:00 -0700
Subject: [PATCH 551/570] GH-37989: [Python] Plug reference leaks when creating
 Arrow array from Python list of dicts (#40412)

### Rationale for this change

When creating Arrow arrays using `pa.array` from lists of dicts, memory usage is observed to increase over time despite the created arrays going out of scope. The issue appears to only happen for lists of dicts, as opposed to lists of numpy arrays or other types.

### What changes are included in this PR?

This PR makes two changes to _python_to_arrow.cc_, to ensure that new references created by [`PyDict_Items`](https://docs.python.org/3/c-api/dict.html#c.PyDict_Items) and [`PySequence_GetItem`](https://docs.python.org/3/c-api/sequence.html#c.PySequence_GetItem) are properly reference counted via `OwnedRef`.

### Are these changes tested?

The change was tested against the following reproduction script:
```python
"""Repro memory increase observed when creating pyarrow arrays."""

# System imports
import logging

# Third-party imports
import numpy as np
import psutil
import pyarrow as pa

LIST_LENGTH = 5 * (2**20)
LOGGER = logging.getLogger(__name__)

def initialize_logging() -> None:
    logging.basicConfig(
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        level=logging.INFO,
    )

def get_rss_in_mib() -> float:
    """Return the Resident Set Size of the current process in MiB."""
    return psutil.Process().memory_info().rss / 1024 / 1024

def main() -> None:
    initialize_logging()

    for idx in range(100):
        data = np.random.randint(256, size=(LIST_LENGTH,), dtype=np.uint8)
        # data = "a" * LIST_LENGTH
        pa.array([{"data": data}])
        if (idx + 1) % 10 == 0:
            LOGGER.info(
                "%d dict arrays created, RSS: %.2f MiB", idx + 1, get_rss_in_mib()
            )

    LOGGER.info("---------")

    for idx in range(100):
        pa.array(
            [
                np.random.randint(256, size=(LIST_LENGTH,), dtype=np.uint8).tobytes(),
            ]
        )
        if (idx + 1) % 10 == 0:
            LOGGER.info(
                "%d non-dict arrays created, RSS: %.2f MiB", idx + 1, get_rss_in_mib()
            )

if __name__ == "__main__":
    main()
```

Prior to this change, the reproduction script produces the following output:
```
2024-03-07 23:14:17,560 - __main__ - INFO - 10 dict arrays created, RSS: 121.05 MiB
2024-03-07 23:14:17,698 - __main__ - INFO - 20 dict arrays created, RSS: 171.07 MiB
2024-03-07 23:14:17,835 - __main__ - INFO - 30 dict arrays created, RSS: 221.09 MiB
2024-03-07 23:14:17,971 - __main__ - INFO - 40 dict arrays created, RSS: 271.11 MiB
2024-03-07 23:14:18,109 - __main__ - INFO - 50 dict arrays created, RSS: 320.86 MiB
2024-03-07 23:14:18,245 - __main__ - INFO - 60 dict arrays created, RSS: 371.65 MiB
2024-03-07 23:14:18,380 - __main__ - INFO - 70 dict arrays created, RSS: 422.18 MiB
2024-03-07 23:14:18,516 - __main__ - INFO - 80 dict arrays created, RSS: 472.20 MiB
2024-03-07 23:14:18,650 - __main__ - INFO - 90 dict arrays created, RSS: 522.21 MiB
2024-03-07 23:14:18,788 - __main__ - INFO - 100 dict arrays created, RSS: 572.23 MiB
2024-03-07 23:14:18,789 - __main__ - INFO - ---------
2024-03-07 23:14:19,001 - __main__ - INFO - 10 non-dict arrays created, RSS: 567.61 MiB
2024-03-07 23:14:19,211 - __main__ - INFO - 20 non-dict arrays created, RSS: 567.61 MiB
2024-03-07 23:14:19,417 - __main__ - INFO - 30 non-dict arrays created, RSS: 567.61 MiB
2024-03-07 23:14:19,623 - __main__ - INFO - 40 non-dict arrays created, RSS: 567.61 MiB
2024-03-07 23:14:19,832 - __main__ - INFO - 50 non-dict arrays created, RSS: 567.61 MiB
2024-03-07 23:14:20,047 - __main__ - INFO - 60 non-dict arrays created, RSS: 567.61 MiB
2024-03-07 23:14:20,253 - __main__ - INFO - 70 non-dict arrays created, RSS: 567.61 MiB
2024-03-07 23:14:20,499 - __main__ - INFO - 80 non-dict arrays created, RSS: 567.61 MiB
2024-03-07 23:14:20,725 - __main__ - INFO - 90 non-dict arrays created, RSS: 567.61 MiB
2024-03-07 23:14:20,950 - __main__ - INFO - 100 non-dict arrays created, RSS: 567.61 MiB
```

After this change, the output changes to the following. Notice that the Resident Set Size (RSS) no longer increases as more Arrow arrays are created from list of dict.
```
2024-03-07 23:14:47,246 - __main__ - INFO - 10 dict arrays created, RSS: 81.73 MiB
2024-03-07 23:14:47,353 - __main__ - INFO - 20 dict arrays created, RSS: 76.53 MiB
2024-03-07 23:14:47,445 - __main__ - INFO - 30 dict arrays created, RSS: 82.20 MiB
2024-03-07 23:14:47,537 - __main__ - INFO - 40 dict arrays created, RSS: 86.59 MiB
2024-03-07 23:14:47,634 - __main__ - INFO - 50 dict arrays created, RSS: 80.28 MiB
2024-03-07 23:14:47,734 - __main__ - INFO - 60 dict arrays created, RSS: 85.44 MiB
2024-03-07 23:14:47,827 - __main__ - INFO - 70 dict arrays created, RSS: 85.44 MiB
2024-03-07 23:14:47,921 - __main__ - INFO - 80 dict arrays created, RSS: 85.44 MiB
2024-03-07 23:14:48,024 - __main__ - INFO - 90 dict arrays created, RSS: 82.94 MiB
2024-03-07 23:14:48,132 - __main__ - INFO - 100 dict arrays created, RSS: 87.84 MiB
2024-03-07 23:14:48,132 - __main__ - INFO - ---------
2024-03-07 23:14:48,229 - __main__ - INFO - 10 non-dict arrays created, RSS: 87.84 MiB
2024-03-07 23:14:48,324 - __main__ - INFO - 20 non-dict arrays created, RSS: 87.84 MiB
2024-03-07 23:14:48,420 - __main__ - INFO - 30 non-dict arrays created, RSS: 87.84 MiB
2024-03-07 23:14:48,516 - __main__ - INFO - 40 non-dict arrays created, RSS: 87.84 MiB
2024-03-07 23:14:48,613 - __main__ - INFO - 50 non-dict arrays created, RSS: 87.84 MiB
2024-03-07 23:14:48,710 - __main__ - INFO - 60 non-dict arrays created, RSS: 87.84 MiB
2024-03-07 23:14:48,806 - __main__ - INFO - 70 non-dict arrays created, RSS: 87.84 MiB
2024-03-07 23:14:48,905 - __main__ - INFO - 80 non-dict arrays created, RSS: 87.84 MiB
2024-03-07 23:14:49,009 - __main__ - INFO - 90 non-dict arrays created, RSS: 87.84 MiB
2024-03-07 23:14:49,108 - __main__ - INFO - 100 non-dict arrays created, RSS: 87.84 MiB
```

When this change is tested against the reproduction script provided in https://github.com/apache/arrow/issues/37989#issue-1924129600, the reported memory increase is no longer observed.

I have not added a unit test, but it may be possible to add one similar to the reproduction scripts used above, provided there's an accurate way to capture process memory usage on all the platforms that Arrow supports, and provided memory usage is not affected by concurrently running tests. If this code could be tested under valgrind, that may be an even better way to go.

### Are there any user-facing changes?

* GitHub Issue: #37989

Authored-by: Chuck Yang <chuck.yang@getcruise.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/src/arrow/python/python_to_arrow.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index a0bae2f50194d..902814a4e91f1 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -1069,7 +1069,8 @@ class PyStructConverter : public StructConverter<PyConverter, PyConverterTrait>
       case KeyKind::BYTES:
         return AppendDict(dict, bytes_field_names_.obj());
       default:
-        RETURN_NOT_OK(InferKeyKind(PyDict_Items(dict)));
+        OwnedRef item_ref(PyDict_Items(dict));
+        RETURN_NOT_OK(InferKeyKind(item_ref.obj()));
         if (key_kind_ == KeyKind::UNKNOWN) {
           // was unable to infer the type which means that all keys are absent
           return AppendEmpty();
@@ -1115,6 +1116,7 @@ class PyStructConverter : public StructConverter<PyConverter, PyConverterTrait>
   Result<std::pair<PyObject*, PyObject*>> GetKeyValuePair(PyObject* seq, int index) {
     PyObject* pair = PySequence_GetItem(seq, index);
     RETURN_IF_PYERROR();
+    OwnedRef pair_ref(pair);  // ensure reference count is decreased at scope end
     if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
       return internal::InvalidType(pair, "was expecting tuple of (key, value) pair");
     }

From 00a48217e93bea1e84f10dbfdf3c0c93dfe1ea3d Mon Sep 17 00:00:00 2001
From: LucasG0 <44552904+LucasG0@users.noreply.github.com>
Date: Fri, 15 Mar 2024 16:38:28 +0100
Subject: [PATCH 552/570] GH-38768: [Python] Slicing an array backwards beyond
 the start now includes first item. (#39240)

### What changes are included in this PR?

Minor changes in `_normalize_slice` so `start` and `stop` are both computed in a single if/else block instead of having them modified later in case of a negative `step`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Fixing wrong data returned in an edge case.
* Closes: #38768

Authored-by: LucasG0 <guillermou.lucas@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/pyarrow/array.pxi           | 40 ++++++++++++++++--------------
 python/pyarrow/tests/test_array.py |  1 +
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 7d9b65c77d25a..def4c5e9ba962 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -549,32 +549,36 @@ def _normalize_slice(object arrow_obj, slice key):
         Py_ssize_t start, stop, step
         Py_ssize_t n = len(arrow_obj)
 
-    start = key.start or 0
-    if start < 0:
-        start += n
+    step = key.step or 1
+
+    if key.start is None:
+        if step < 0:
+            start = n - 1
+        else:
+            start = 0
+    elif key.start < 0:
+        start = key.start + n
         if start < 0:
             start = 0
-    elif start >= n:
+    elif key.start >= n:
         start = n
+    else:
+        start = key.start
 
-    stop = key.stop if key.stop is not None else n
-    if stop < 0:
-        stop += n
-        if stop < 0:
+    if step < 0 and (key.stop is None or key.stop < -n):
+        stop = -1
+    elif key.stop is None:
+        stop = n
+    elif key.stop < 0:
+        stop = key.stop + n
+        if stop < 0:  # step > 0 in this case.
             stop = 0
-    elif stop >= n:
+    elif key.stop >= n:
         stop = n
+    else:
+        stop = key.stop
 
-    step = key.step or 1
     if step != 1:
-        if step < 0:
-            # Negative steps require some special handling
-            if key.start is None:
-                start = n - 1
-
-            if key.stop is None:
-                stop = -1
-
         indices = np.arange(start, stop, step)
         return arrow_obj.take(indices)
     else:
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 782c41d0d7015..a8cd20720e437 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -485,6 +485,7 @@ def test_array_slice_negative_step():
         slice(10, 2, -2),
         slice(None, None, 2),
         slice(0, 10, 2),
+        slice(15, -25, -1),  # GH-38768
     ]
 
     for case in cases:

From 681be03cfc63fbdda1e3a445d38f20b0434cb1c2 Mon Sep 17 00:00:00 2001
From: Judah Rand <17158624+judahrand@users.noreply.github.com>
Date: Fri, 15 Mar 2024 19:32:39 +0000
Subject: [PATCH 553/570] GH-34235: [Python] Add `join_asof` binding (#34234)

* Closes: #34235

Lead-authored-by: Judah Rand <17158624+judahrand@users.noreply.github.com>
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 cpp/src/arrow/acero/asof_join_node.cc      |   2 +-
 python/pyarrow/_acero.pyx                  |  79 ++++++++++
 python/pyarrow/_dataset.pyx                |  64 ++++++++
 python/pyarrow/acero.py                    |  87 +++++++++++
 python/pyarrow/includes/libarrow_acero.pxd |   7 +
 python/pyarrow/table.pxi                   |  86 +++++++++++
 python/pyarrow/tests/test_acero.py         |  35 +++++
 python/pyarrow/tests/test_dataset.py       | 114 ++++++++++++++
 python/pyarrow/tests/test_table.py         | 163 +++++++++++++++++++++
 9 files changed, 636 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc
index e96d5ad44a9e6..cf0d475c1d770 100644
--- a/cpp/src/arrow/acero/asof_join_node.cc
+++ b/cpp/src/arrow/acero/asof_join_node.cc
@@ -1242,7 +1242,7 @@ class AsofJoinNode : public ExecNode {
         if (by_key_type[k] == NULLPTR) {
           by_key_type[k] = by_field[k]->type().get();
         } else if (*by_key_type[k] != *by_field[k]->type()) {
-          return Status::Invalid("Expected on-key type ", *by_key_type[k], " but got ",
+          return Status::Invalid("Expected by-key type ", *by_key_type[k], " but got ",
                                  *by_field[k]->type(), " for field ", by_field[k]->name(),
                                  " in input ", j);
         }
diff --git a/python/pyarrow/_acero.pyx b/python/pyarrow/_acero.pyx
index 1c9b2f75c39f1..9e8cbd65be224 100644
--- a/python/pyarrow/_acero.pyx
+++ b/python/pyarrow/_acero.pyx
@@ -392,6 +392,85 @@ class HashJoinNodeOptions(_HashJoinNodeOptions):
         )
 
 
+cdef class _AsofJoinNodeOptions(ExecNodeOptions):
+
+    def _set_options(self, left_on, left_by, right_on, right_by, tolerance):
+        cdef:
+            vector[CFieldRef] c_left_by
+            vector[CFieldRef] c_right_by
+            CAsofJoinKeys c_left_keys
+            CAsofJoinKeys c_right_keys
+            vector[CAsofJoinKeys] c_input_keys
+
+        # Prepare left AsofJoinNodeOption::Keys
+        if not isinstance(left_by, (list, tuple)):
+            left_by = [left_by]
+        for key in left_by:
+            c_left_by.push_back(_ensure_field_ref(key))
+
+        c_left_keys.on_key = _ensure_field_ref(left_on)
+        c_left_keys.by_key = c_left_by
+
+        c_input_keys.push_back(c_left_keys)
+
+        # Prepare right AsofJoinNodeOption::Keys
+        if not isinstance(right_by, (list, tuple)):
+            right_by = [right_by]
+        for key in right_by:
+            c_right_by.push_back(_ensure_field_ref(key))
+
+        c_right_keys.on_key = _ensure_field_ref(right_on)
+        c_right_keys.by_key = c_right_by
+
+        c_input_keys.push_back(c_right_keys)
+
+        self.wrapped.reset(
+            new CAsofJoinNodeOptions(
+                c_input_keys,
+                tolerance,
+            )
+        )
+
+
+class AsofJoinNodeOptions(_AsofJoinNodeOptions):
+    """
+    Make a node which implements 'as of join' operation.
+
+    This is the option class for the "asofjoin" node factory.
+
+    Parameters
+    ----------
+    left_on : str, Expression
+        The left key on which the join operation should be performed.
+        Can be a string column name or a field expression.
+
+        An inexact match is used on the "on" key, i.e. a row is considered a
+        match if and only if left_on - tolerance <= right_on <= left_on.
+
+        The input dataset must be sorted by the "on" key. Must be a single
+        field of a common type.
+
+        Currently, the "on" key must be an integer, date, or timestamp type.
+    left_by: str, Expression or list
+        The left keys on which the join operation should be performed.
+        Exact equality is used for each field of the "by" keys.
+        Each key can be a string column name or a field expression,
+        or a list of such field references.
+    right_on : str, Expression
+        The right key on which the join operation should be performed.
+        See `left_on` for details.
+    right_by: str, Expression or list
+        The right keys on which the join operation should be performed.
+        See `left_by` for details.
+    tolerance : int
+        The tolerance to use for the asof join. The tolerance is interpreted in
+        the same units as the "on" key.
+    """
+
+    def __init__(self, left_on, left_by, right_on, right_by, tolerance):
+        self._set_options(left_on, left_by, right_on, right_by, tolerance)
+
+
 cdef class Declaration(_Weakrefable):
     """
     Helper class for declaring the nodes of an ExecPlan.
diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 8b9e62d628870..3583a3213ccbc 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -880,6 +880,70 @@ cdef class Dataset(_Weakrefable):
             output_type=InMemoryDataset
         )
 
+    def join_asof(self, right_dataset, on, by, tolerance, right_on=None, right_by=None):
+        """
+        Perform an asof join between this dataset and another one.
+
+        This is similar to a left-join except that we match on nearest key rather
+        than equal keys. Both datasets must be sorted by the key. This type of join
+        is most useful for time series data that are not perfectly aligned.
+
+        Optionally match on equivalent keys with "by" before searching with "on".
+
+        Result of the join will be a new Dataset, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_dataset : dataset
+            The dataset to join to the current one, acting as the right dataset
+            in the join operation.
+        on : str
+            The column from current dataset that should be used as the "on" key
+            of the join operation left side.
+
+            An inexact match is used on the "on" key, i.e. a row is considered a
+            match if and only if left_on - tolerance <= right_on <= left_on.
+
+            The input table must be sorted by the "on" key. Must be a single
+            field of a common type.
+
+            Currently, the "on" key must be an integer, date, or timestamp type.
+        by : str or list[str]
+            The columns from current dataset that should be used as the keys
+            of the join operation left side. The join operation is then done
+            only for the matches in these columns.
+        tolerance : int
+            The tolerance for inexact "on" key matching. A right row is considered
+            a match with the left row `right.on - left.on <= tolerance`. The
+            `tolerance` may be:
+
+            - negative, in which case a past-as-of-join occurs;
+            - or positive, in which case a future-as-of-join occurs;
+            - or zero, in which case an exact-as-of-join occurs.
+
+            The tolerance is interpreted in the same units as the "on" key.
+        right_on : str or list[str], default None
+            The columns from the right_dataset that should be used as the on key
+            on the join operation right side.
+            When ``None`` use the same key name as the left dataset.
+        right_by : str or list[str], default None
+            The columns from the right_dataset that should be used as by keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left dataset.
+
+        Returns
+        -------
+        InMemoryDataset
+        """
+        if right_on is None:
+            right_on = on
+        if right_by is None:
+            right_by = by
+        return _pac()._perform_join_asof(self, on, by,
+                                         right_dataset, right_on, right_by,
+                                         tolerance, output_type=InMemoryDataset)
+
 
 cdef class InMemoryDataset(Dataset):
     """
diff --git a/python/pyarrow/acero.py b/python/pyarrow/acero.py
index a5583c9e657d2..619e1fce393ae 100644
--- a/python/pyarrow/acero.py
+++ b/python/pyarrow/acero.py
@@ -35,6 +35,7 @@
         AggregateNodeOptions,
         OrderByNodeOptions,
         HashJoinNodeOptions,
+        AsofJoinNodeOptions,
     )
 except ImportError as exc:
     raise ImportError(
@@ -253,6 +254,92 @@ def _perform_join(join_type, left_operand, left_keys,
         raise TypeError("Unsupported output type")
 
 
+def _perform_join_asof(left_operand, left_on, left_by,
+                       right_operand, right_on, right_by,
+                       tolerance, use_threads=True,
+                       output_type=Table):
+    """
+    Perform asof join of two tables or datasets.
+
+    The result will be an output table with the result of the join operation
+
+    Parameters
+    ----------
+    left_operand : Table or Dataset
+        The left operand for the join operation.
+    left_on : str
+        The left key (or keys) on which the join operation should be performed.
+    left_by: str or list[str]
+        The left key (or keys) on which the join operation should be performed.
+    right_operand : Table or Dataset
+        The right operand for the join operation.
+    right_on : str or list[str]
+        The right key (or keys) on which the join operation should be performed.
+    right_by: str or list[str]
+        The right key (or keys) on which the join operation should be performed.
+    tolerance : int
+        The tolerance to use for the asof join. The tolerance is interpreted in
+        the same units as the "on" key.
+    output_type: Table or InMemoryDataset
+        The output type for the exec plan result.
+
+    Returns
+    -------
+    result_table : Table or InMemoryDataset
+    """
+    if not isinstance(left_operand, (Table, ds.Dataset)):
+        raise TypeError(f"Expected Table or Dataset, got {type(left_operand)}")
+    if not isinstance(right_operand, (Table, ds.Dataset)):
+        raise TypeError(f"Expected Table or Dataset, got {type(right_operand)}")
+
+    if not isinstance(left_by, (tuple, list)):
+        left_by = [left_by]
+    if not isinstance(right_by, (tuple, list)):
+        right_by = [right_by]
+
+    # AsofJoin does not return on or by columns for right_operand.
+    right_columns = [
+        col for col in right_operand.schema.names
+        if col not in [right_on] + right_by
+    ]
+    columns_collisions = set(left_operand.schema.names) & set(right_columns)
+    if columns_collisions:
+        raise ValueError(
+            "Columns {} present in both tables. AsofJoin does not support "
+            "column collisions.".format(columns_collisions),
+        )
+
+    # Add the join node to the execplan
+    if isinstance(left_operand, ds.Dataset):
+        left_source = _dataset_to_decl(left_operand, use_threads=use_threads)
+    else:
+        left_source = Declaration(
+            "table_source", TableSourceNodeOptions(left_operand),
+        )
+    if isinstance(right_operand, ds.Dataset):
+        right_source = _dataset_to_decl(right_operand, use_threads=use_threads)
+    else:
+        right_source = Declaration(
+            "table_source", TableSourceNodeOptions(right_operand)
+        )
+
+    join_opts = AsofJoinNodeOptions(
+        left_on, left_by, right_on, right_by, tolerance
+    )
+    decl = Declaration(
+        "asofjoin", options=join_opts, inputs=[left_source, right_source]
+    )
+
+    result_table = decl.to_table(use_threads=use_threads)
+
+    if output_type == Table:
+        return result_table
+    elif output_type == ds.InMemoryDataset:
+        return ds.InMemoryDataset(result_table)
+    else:
+        raise TypeError("Unsupported output type")
+
+
 def _filter_table(table, expression):
     """Filter rows of a table based on the provided expression.
 
diff --git a/python/pyarrow/includes/libarrow_acero.pxd b/python/pyarrow/includes/libarrow_acero.pxd
index bb1e3646c91bf..dc9babee190e1 100644
--- a/python/pyarrow/includes/libarrow_acero.pxd
+++ b/python/pyarrow/includes/libarrow_acero.pxd
@@ -76,6 +76,13 @@ cdef extern from "arrow/acero/options.h" namespace "arrow::acero" nogil:
                              c_string output_suffix_for_left,
                              c_string output_suffix_for_right)
 
+    cdef struct CAsofJoinKeys "arrow::acero::AsofJoinNodeOptions::Keys":
+        CFieldRef on_key
+        vector[CFieldRef] by_key
+
+    cdef cppclass CAsofJoinNodeOptions "arrow::acero::AsofJoinNodeOptions"(CExecNodeOptions):
+        CAsofJoinNodeOptions(vector[CAsofJoinKeys] keys, int64_t tolerance)
+
 
 cdef extern from "arrow/acero/exec_plan.h" namespace "arrow::acero" nogil:
     cdef cppclass CDeclaration "arrow::acero::Declaration":
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 2ec19db1d1019..164427d2747a8 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -5333,6 +5333,92 @@ cdef class Table(_Tabular):
             output_type=Table
         )
 
+    def join_asof(self, right_table, on, by, tolerance, right_on=None, right_by=None):
+        """
+        Perform an asof join between this table and another one.
+
+        This is similar to a left-join except that we match on nearest key rather
+        than equal keys. Both tables must be sorted by the key. This type of join
+        is most useful for time series data that are not perfectly aligned.
+
+        Optionally match on equivalent keys with "by" before searching with "on".
+
+        Result of the join will be a new Table, where further
+        operations can be applied.
+
+        Parameters
+        ----------
+        right_table : Table
+            The table to join to the current one, acting as the right table
+            in the join operation.
+        on : str
+            The column from current table that should be used as the "on" key
+            of the join operation left side.
+
+            An inexact match is used on the "on" key, i.e. a row is considered a
+            match if and only if left_on - tolerance <= right_on <= left_on.
+
+            The input dataset must be sorted by the "on" key. Must be a single
+            field of a common type.
+
+            Currently, the "on" key must be an integer, date, or timestamp type.
+        by : str or list[str]
+            The columns from current table that should be used as the keys
+            of the join operation left side. The join operation is then done
+            only for the matches in these columns.
+        tolerance : int
+            The tolerance for inexact "on" key matching. A right row is considered
+            a match with the left row ``right.on - left.on <= tolerance``. The
+            ``tolerance`` may be:
+
+            - negative, in which case a past-as-of-join occurs;
+            - or positive, in which case a future-as-of-join occurs;
+            - or zero, in which case an exact-as-of-join occurs.
+
+            The tolerance is interpreted in the same units as the "on" key.
+        right_on : str or list[str], default None
+            The columns from the right_table that should be used as the on key
+            on the join operation right side.
+            When ``None`` use the same key name as the left table.
+        right_by : str or list[str], default None
+            The columns from the right_table that should be used as keys
+            on the join operation right side.
+            When ``None`` use the same key names as the left table.
+
+        Returns
+        -------
+        Table
+
+        Example
+        --------
+        >>> import pyarrow as pa
+        >>> t1 = pa.table({'id': [1, 3, 2, 3, 3],
+        ...                'year': [2020, 2021, 2022, 2022, 2023]})
+        >>> t2 = pa.table({'id': [3, 4],
+        ...                'year': [2020, 2021],
+        ...                'n_legs': [5, 100],
+        ...                'animal': ["Brittle stars", "Centipede"]})
+
+        >>> t1.join_asof(t2, on='year', by='id', tolerance=-2)
+        pyarrow.Table
+        id: int64
+        year: int64
+        n_legs: int64
+        animal: string
+        ----
+        id: [[1,3,2,3,3]]
+        year: [[2020,2021,2022,2022,2023]]
+        n_legs: [[null,5,null,5,null]]
+        animal: [[null,"Brittle stars",null,"Brittle stars",null]]
+        """
+        if right_on is None:
+            right_on = on
+        if right_by is None:
+            right_by = by
+        return _pac()._perform_join_asof(self, on, by,
+                                         right_table, right_on, right_by,
+                                         tolerance, output_type=Table)
+
     def __arrow_c_stream__(self, requested_schema=None):
         """
         Export the table as an Arrow C stream PyCapsule.
diff --git a/python/pyarrow/tests/test_acero.py b/python/pyarrow/tests/test_acero.py
index a43606013027c..1bdfabd8f5832 100644
--- a/python/pyarrow/tests/test_acero.py
+++ b/python/pyarrow/tests/test_acero.py
@@ -30,6 +30,7 @@
         AggregateNodeOptions,
         OrderByNodeOptions,
         HashJoinNodeOptions,
+        AsofJoinNodeOptions,
     )
 except ImportError:
     pass
@@ -342,6 +343,40 @@ def test_hash_join():
     assert result.sort_by("a").equals(expected)
 
 
+def test_asof_join():
+    left = pa.table({'key': [1, 2, 3], 'ts': [1, 1, 1], 'a': [4, 5, 6]})
+    left_source = Declaration("table_source", options=TableSourceNodeOptions(left))
+    right = pa.table({'key': [2, 3, 4], 'ts': [2, 5, 2], 'b': [4, 5, 6]})
+    right_source = Declaration("table_source", options=TableSourceNodeOptions(right))
+
+    # asof join
+    join_opts = AsofJoinNodeOptions(
+        left_on="ts", left_by=["key"],
+        right_on="ts", right_by=["key"],
+        tolerance=1,
+    )
+    joined = Declaration(
+        "asofjoin", options=join_opts, inputs=[left_source, right_source]
+    )
+    result = joined.to_table()
+    expected = pa.table(
+        [[1, 2, 3], [1, 1, 1], [4, 5, 6], [None, 4, None]],
+        names=["key", "ts", "a", "b"])
+    assert result == expected
+
+    for by in [field("key"), ["key"], [field("key")]]:
+        for on in [field("ts"), "ts"]:
+            join_opts = AsofJoinNodeOptions(
+                left_on=on, left_by=by,
+                right_on=on, right_by=by,
+                tolerance=1,
+            )
+            joined = Declaration(
+                "asofjoin", options=join_opts, inputs=[left_source, right_source])
+            result = joined.to_table()
+            assert result == expected
+
+
 @pytest.mark.dataset
 def test_scan(tempdir):
     table = pa.table({'a': [1, 2, 3], 'b': [4, 5, 6]})
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 8e20390385885..3d77214c174c5 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -5075,6 +5075,120 @@ def test_dataset_join_collisions(tempdir):
     ], names=["colA", "colB", "colVals", "colB_r", "colVals_r"])
 
 
+@pytest.mark.dataset
+def test_dataset_join_asof(tempdir):
+    t1 = pa.Table.from_pydict({
+        "colA": [1, 1, 5, 6, 7],
+        "col2": ["a", "b", "a", "b", "f"]
+    })
+    ds.write_dataset(t1, tempdir / "t1", format="ipc")
+    ds1 = ds.dataset(tempdir / "t1", format="ipc")
+
+    t2 = pa.Table.from_pydict({
+        "colB": [2, 9, 15],
+        "col3": ["a", "b", "g"],
+        "colC": [1., 3., 5.]
+    })
+    ds.write_dataset(t2, tempdir / "t2", format="ipc")
+    ds2 = ds.dataset(tempdir / "t2", format="ipc")
+
+    result = ds1.join_asof(
+        ds2, on="colA", by="col2", tolerance=1,
+        right_on="colB", right_by="col3",
+    )
+    assert result.to_table().sort_by("colA") == pa.table({
+        "colA": [1, 1, 5, 6, 7],
+        "col2": ["a", "b", "a", "b", "f"],
+        "colC": [1., None, None, None, None],
+    })
+
+
+@pytest.mark.dataset
+def test_dataset_join_asof_multiple_by(tempdir):
+    t1 = pa.table({
+        "colA": [1, 2, 6],
+        "colB": [10, 20, 60],
+        "on": [1, 2, 3],
+    })
+    ds.write_dataset(t1, tempdir / "t1", format="ipc")
+    ds1 = ds.dataset(tempdir / "t1", format="ipc")
+
+    t2 = pa.table({
+        "colB": [99, 20, 10],
+        "colVals": ["Z", "B", "A"],
+        "colA": [99, 2, 1],
+        "on": [2, 3, 4],
+    })
+    ds.write_dataset(t2, tempdir / "t2", format="ipc")
+    ds2 = ds.dataset(tempdir / "t2", format="ipc")
+
+    result = ds1.join_asof(
+        ds2, on="on", by=["colA", "colB"], tolerance=1
+    )
+    assert result.to_table().sort_by("colA") == pa.table({
+        "colA": [1, 2, 6],
+        "colB": [10, 20, 60],
+        "on": [1, 2, 3],
+        "colVals": [None, "B", None],
+    })
+
+
+@pytest.mark.dataset
+def test_dataset_join_asof_empty_by(tempdir):
+    t1 = pa.table({
+        "on": [1, 2, 3],
+    })
+    ds.write_dataset(t1, tempdir / "t1", format="ipc")
+    ds1 = ds.dataset(tempdir / "t1", format="ipc")
+
+    t2 = pa.table({
+        "colVals": ["Z", "B", "A"],
+        "on": [2, 3, 4],
+    })
+    ds.write_dataset(t2, tempdir / "t2", format="ipc")
+    ds2 = ds.dataset(tempdir / "t2", format="ipc")
+
+    result = ds1.join_asof(
+        ds2, on="on", by=[], tolerance=1
+    )
+    assert result.to_table() == pa.table({
+        "on": [1, 2, 3],
+        "colVals": ["Z", "Z", "B"],
+    })
+
+
+@pytest.mark.dataset
+def test_dataset_join_asof_collisions(tempdir):
+    t1 = pa.table({
+        "colA": [1, 2, 6],
+        "colB": [10, 20, 60],
+        "on": [1, 2, 3],
+        "colVals": ["a", "b", "f"]
+    })
+    ds.write_dataset(t1, tempdir / "t1", format="ipc")
+    ds1 = ds.dataset(tempdir / "t1", format="ipc")
+
+    t2 = pa.table({
+        "colB": [99, 20, 10],
+        "colVals": ["Z", "B", "A"],
+        "colUniq": [100, 200, 300],
+        "colA": [99, 2, 1],
+        "on": [2, 3, 4],
+    })
+    ds.write_dataset(t2, tempdir / "t2", format="ipc")
+    ds2 = ds.dataset(tempdir / "t2", format="ipc")
+
+    msg = (
+        "Columns {'colVals'} present in both tables. "
+        "AsofJoin does not support column collisions."
+    )
+    with pytest.raises(ValueError, match=msg):
+        ds1.join_asof(
+            ds2, on="on", by=["colA", "colB"], tolerance=1,
+            right_on="on", right_by=["colA", "colB"],
+        )
+
+
 @pytest.mark.parametrize('dstype', [
     "fs", "mem"
 ])
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 374a29e956e3f..d0a7ccacac0da 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -2780,6 +2780,169 @@ def test_table_join_many_columns():
     })
 
 
+@pytest.mark.dataset
+def test_table_join_asof():
+    t1 = pa.Table.from_pydict({
+        "colA": [1, 1, 5, 6, 7],
+        "col2": ["a", "b", "a", "b", "f"]
+    })
+
+    t2 = pa.Table.from_pydict({
+        "colB": [2, 9, 15],
+        "col3": ["a", "b", "g"],
+        "colC": [1., 3., 5.]
+    })
+
+    r = t1.join_asof(
+        t2, on="colA", by="col2", tolerance=1,
+        right_on="colB", right_by="col3",
+    )
+    assert r.combine_chunks() == pa.table({
+        "colA": [1, 1, 5, 6, 7],
+        "col2": ["a", "b", "a", "b", "f"],
+        "colC": [1., None, None, None, None],
+    })
+
+
+@pytest.mark.dataset
+def test_table_join_asof_multiple_by():
+    t1 = pa.table({
+        "colA": [1, 2, 6],
+        "colB": [10, 20, 60],
+        "on": [1, 2, 3],
+    })
+
+    t2 = pa.table({
+        "colB": [99, 20, 10],
+        "colVals": ["Z", "B", "A"],
+        "colA": [99, 2, 1],
+        "on": [2, 3, 4],
+    })
+
+    result = t1.join_asof(
+        t2, on="on", by=["colA", "colB"], tolerance=1
+    )
+    assert result.sort_by("colA") == pa.table({
+        "colA": [1, 2, 6],
+        "colB": [10, 20, 60],
+        "on": [1, 2, 3],
+        "colVals": [None, "B", None],
+    })
+
+
+@pytest.mark.dataset
+def test_table_join_asof_empty_by():
+    t1 = pa.table({
+        "on": [1, 2, 3],
+    })
+
+    t2 = pa.table({
+        "colVals": ["Z", "B", "A"],
+        "on": [2, 3, 4],
+    })
+
+    result = t1.join_asof(
+        t2, on="on", by=[], tolerance=1
+    )
+    assert result == pa.table({
+        "on": [1, 2, 3],
+        "colVals": ["Z", "Z", "B"],
+    })
+
+
+@pytest.mark.dataset
+def test_table_join_asof_collisions():
+    t1 = pa.table({
+        "colA": [1, 2, 6],
+        "colB": [10, 20, 60],
+        "on": [1, 2, 3],
+        "colVals": ["a", "b", "f"]
+    })
+
+    t2 = pa.table({
+        "colB": [99, 20, 10],
+        "colVals": ["Z", "B", "A"],
+        "colUniq": [100, 200, 300],
+        "colA": [99, 2, 1],
+        "on": [2, 3, 4],
+    })
+
+    msg = (
+        "Columns {'colVals'} present in both tables. "
+        "AsofJoin does not support column collisions."
+    )
+    with pytest.raises(ValueError, match=msg):
+        t1.join_asof(
+            t2, on="on", by=["colA", "colB"], tolerance=1,
+            right_on="on", right_by=["colA", "colB"],
+        )
+
+
+@pytest.mark.dataset
+def test_table_join_asof_by_length_mismatch():
+    t1 = pa.table({
+        "colA": [1, 2, 6],
+        "colB": [10, 20, 60],
+        "on": [1, 2, 3],
+    })
+
+    t2 = pa.table({
+        "colVals": ["Z", "B", "A"],
+        "colUniq": [100, 200, 300],
+        "colA": [99, 2, 1],
+        "on": [2, 3, 4],
+    })
+
+    msg = "inconsistent size of by-key across inputs"
+    with pytest.raises(pa.lib.ArrowInvalid, match=msg):
+        t1.join_asof(
+            t2, on="on", by=["colA", "colB"], tolerance=1,
+            right_on="on", right_by=["colA"],
+        )
+
+
+def test_table_join_asof_by_type_mismatch():
+    t1 = pa.table({
+        "colA": [1, 2, 6],
+        "on": [1, 2, 3],
+    })
+
+    t2 = pa.table({
+        "colVals": ["Z", "B", "A"],
+        "colUniq": [100, 200, 300],
+        "colA": [99., 2., 1.],
+        "on": [2, 3, 4],
+    })
+
+    msg = "Expected by-key type int64 but got double for field colA in input 1"
+    with pytest.raises(pa.lib.ArrowInvalid, match=msg):
+        t1.join_asof(
+            t2, on="on", by=["colA"], tolerance=1,
+            right_on="on", right_by=["colA"],
+        )
+
+
+def test_table_join_asof_on_type_mismatch():
+    t1 = pa.table({
+        "colA": [1, 2, 6],
+        "on": [1, 2, 3],
+    })
+
+    t2 = pa.table({
+        "colVals": ["Z", "B", "A"],
+        "colUniq": [100, 200, 300],
+        "colA": [99, 2, 1],
+        "on": [2., 3., 4.],
+    })
+
+    msg = "Expected on-key type int64 but got double for field on in input 1"
+    with pytest.raises(pa.lib.ArrowInvalid, match=msg):
+        t1.join_asof(
+            t2, on="on", by=["colA"], tolerance=1,
+            right_on="on", right_by=["colA"],
+        )
+
+
 @pytest.mark.parametrize(
     ('cls'),
     [

From 2224a299f09d6c0ca3bd271346dca306f07d2792 Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Fri, 15 Mar 2024 21:19:59 -0400
Subject: [PATCH 554/570] GH-40577: [C++] Ensure pkg-config flags include -ldl
 for static builds (#40578)

### Rationale for this change

When linking statically, pkg-config doesn't pick up the new dependency on libdl introduced by https://github.com/apache/arrow/pull/39067

This produces [unresolved symbol errors](https://github.com/apache/arrow/pull/39067#issuecomment-1999218559)

### What changes are included in this PR?

Addition of `-ldl` to `ARROW_PC_LIBS_PRIVATE` to ensure linkage to the necessary library

### Are these changes tested?

yes

### Are there any user-facing changes?

no

* GitHub Issue: #40577

Authored-by: Benjamin Kietzman <bengilgit@gmail.com>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
---
 cpp/src/arrow/CMakeLists.txt | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index c5449d9956c28..3d1b621db0e9f 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -136,22 +136,21 @@ endif()
 
 if(ARROW_ENABLE_THREADING)
   list(APPEND ARROW_SHARED_PRIVATE_LINK_LIBS Threads::Threads)
-  list(APPEND ARROW_STATIC_LINK_LIBS Threads::Threads)
   list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS Threads::Threads)
 endif()
 
-if(NOT MSVC_TOOLCHAIN)
-  list(APPEND ARROW_SHARED_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS})
-  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS})
-endif()
-
 set(ARROW_TEST_LINK_TOOLCHAIN ${ARROW_GTEST_GMOCK} ${ARROW_GTEST_GTEST_MAIN})
 set(ARROW_TEST_STATIC_LINK_LIBS arrow::flatbuffers arrow_testing_static arrow_static
                                 ${ARROW_TEST_LINK_TOOLCHAIN})
 set(ARROW_TEST_SHARED_LINK_LIBS arrow::flatbuffers arrow_testing_shared arrow_shared
                                 ${ARROW_TEST_LINK_TOOLCHAIN})
-if(NOT MSVC)
+
+if(CMAKE_DL_LIBS)
+  list(APPEND ARROW_SHARED_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS})
+  list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS})
   list(APPEND ARROW_TEST_SHARED_LINK_LIBS ${CMAKE_DL_LIBS})
+  list(APPEND ARROW_TEST_STATIC_LINK_LIBS ${CMAKE_DL_LIBS})
+  string(APPEND ARROW_PC_LIBS_PRIVATE " -l${CMAKE_DL_LIBS}")
 endif()
 
 # ----------------------------------------------------------------------

From 555ce5c667c67a7fa033c3903685c0de888a182d Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Sat, 16 Mar 2024 16:15:20 +0900
Subject: [PATCH 555/570] GH-40544: [Dev] Add cmake-format configuration to
 pre-commit (#40545)

### Rationale for this change

We can maintain cmake-format related configuration by just listing target files and excluded files in YAML instead of writing not only listing paths but also extra codes in Python by using pre-commit.

### What changes are included in this PR?

Add cmake-format related pre-commit configuration.

We don't remove cmake-format related code from Archery for now. We'll do it when pre-commit can replace `archery lint` entirely.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40544

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 .pre-commit-config.yaml | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index af47aa4e50cff..16a1bfb7b8900 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -59,3 +59,26 @@ repos:
         name: C/GLib Format
         files: >-
           ^c_glib/
+  - repo: https://github.com/cheshirekow/cmake-format-precommit
+    rev: v0.6.13
+    hooks:
+      - id: cmake-format
+        name: CMake Format
+        files: >-
+          (
+          ?^ci/.*/.*\.cmake$|
+          ?^cpp/.*/.*\.cmake\.in$|
+          ?^cpp/.*/.*\.cmake$|
+          ?^cpp/.*/CMakeLists\.txt$|
+          ?^go/.*/CMakeLists\.txt$|
+          ?^java/.*/CMakeLists\.txt$|
+          ?^matlab/.*/CMakeLists\.txt$|
+          ?^python/.*/CMakeLists\.txt$|
+          )
+        exclude: >-
+          (
+          ?^cpp/cmake_modules/FindNumPy\.cmake$|
+          ?^cpp/cmake_modules/FindPythonLibsNew\.cmake$|
+          ?^cpp/cmake_modules/UseCython\.cmake$|
+          ?^cpp/src/arrow/util/config\.h\.cmake$|
+          )

From b448b33808f2dd42866195fa4bb44198e2fc26b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Sat, 16 Mar 2024 13:13:22 +0100
Subject: [PATCH 556/570] GH-40535: [Docs][R] Set RETICULATE_PYTHON_ENV in
 order to find pyarrow (#40571)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Rationale for this change

With the changes in https://github.com/apache/arrow/pull/40455 reticulate can't find pyarrow.

### What changes are included in this PR?

Set `RETICULATE_PYTHON_ENV` to `ARROW_PYTHON_VENV`.
See https://rstudio.github.io/reticulate/articles/versions.html#order-of-discovery why we can use `RETICULATE_PYTHON_ENV`.

This also simplifies the current configurations:
* Install Meson by `apt-get`.
* Use `ARROW_PYTHON_VENV` to install Sphinx and related packages.

### Are these changes tested?

Via archery.

### Are there any user-facing changes?

No
* GitHub Issue: #40535

Lead-authored-by: Raúl Cumplido <raulcumplido@gmail.com>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 ci/docker/linux-apt-docs.dockerfile | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index c424d04653dbe..5ac39424ed560 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -46,6 +46,7 @@ RUN apt-get update -y && \
         libtiff-dev \
         libtool \
         libxml2-dev \
+        meson \
         ninja-build \
         nvidia-cuda-toolkit \
         openjdk-${jdk}-jdk-headless \
@@ -77,7 +78,9 @@ RUN apt-get purge -y npm && \
     npm install -g yarn
 
 COPY docs/requirements.txt /arrow/docs/
-RUN pip install -r arrow/docs/requirements.txt meson
+RUN python3 -m venv ${ARROW_PYTHON_VENV} && \
+    . ${ARROW_PYTHON_VENV}/bin/activate && \
+    pip install -r arrow/docs/requirements.txt
 
 COPY c_glib/Gemfile /arrow/c_glib/
 RUN gem install --no-document bundler && \
@@ -111,4 +114,5 @@ ENV ARROW_ACERO=ON \
     ARROW_JSON=ON \
     ARROW_S3=ON \
     ARROW_USE_GLOG=OFF \
-    CMAKE_UNITY_BUILD=ON
+    CMAKE_UNITY_BUILD=ON \
+    RETICULATE_PYTHON_ENV=${ARROW_PYTHON_VENV}

From bebb9e2148774774a5a4958f80db57242bfdb490 Mon Sep 17 00:00:00 2001
From: Ali Khalili <ali.khalili@truelayer.com>
Date: Mon, 18 Mar 2024 02:35:13 +0000
Subject: [PATCH 557/570] GH-40553: [C#] Avoid logger instantiations per
 request (#40554)

### What changes are included in this PR?

- Replacing `CreateLogger<FlightSqlServer>` with `CreateLogger(typeof(FlightSqlServer))`

### Are these changes tested?

It is not gonna change any behavior

### Are there any user-facing changes?

No

* GitHub Issue: #40553

Authored-by: Ali Khalili <ali.khalili@truelayer.com>
Signed-off-by: Curt Hagenlocher <curt@hagenlocher.org>
---
 csharp/src/Apache.Arrow.Flight.Sql/FlightSqlServer.cs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/csharp/src/Apache.Arrow.Flight.Sql/FlightSqlServer.cs b/csharp/src/Apache.Arrow.Flight.Sql/FlightSqlServer.cs
index dbfc1f7c7ea49..cbccc9dab34b1 100644
--- a/csharp/src/Apache.Arrow.Flight.Sql/FlightSqlServer.cs
+++ b/csharp/src/Apache.Arrow.Flight.Sql/FlightSqlServer.cs
@@ -29,7 +29,7 @@ namespace Apache.Arrow.Flight.Sql;
 
 public abstract class FlightSqlServer : FlightServer
 {
-    private ILogger<FlightSqlServer>? Logger { get; }
+    private ILogger? Logger { get; }
     public static readonly Schema CatalogSchema = new(new List<Field> {new("catalog_name", StringType.Default, false)}, null);
     public static readonly Schema TableTypesSchema = new(new List<Field> {new("table_type", StringType.Default, false)}, null);
     public static readonly Schema DbSchemaFlightSchema = new(new List<Field> {new("catalog_name", StringType.Default, true), new("db_schema_name", StringType.Default, false)}, null);
@@ -212,7 +212,7 @@ public static Schema GetTableSchema(bool includeTableSchemaField)
 
     protected FlightSqlServer(ILoggerFactory? factory = null)
     {
-        Logger = factory?.CreateLogger<FlightSqlServer>();
+        Logger = factory?.CreateLogger(typeof(FlightSqlServer));
     }
 
     /// <summary>

From 2139359933b984a2c85d15cda62caec44fa888da Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 18 Mar 2024 15:20:03 +0900
Subject: [PATCH 558/570] GH-40615: [Packaging][deb] Move libprotobuf-dev
 dependency to libarrow-dev from libarrow-flight-dev (#40617)

### Rationale for this change

This is a follow-up of GH-40399.

### What changes are included in this PR?

Move `libprotobuf-dev` dependency to `libarrow-dev` from `libarrow-flight-dev`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #40615

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 dev/tasks/linux-packages/apache-arrow/debian/control.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index 5258fa97f4b74..e23e2a7ee44e3 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -145,6 +145,8 @@ Depends:
   libcurl4-openssl-dev,
   liblz4-dev,
   libc-ares-dev,
+@USE_SYSTEM_PROTOBUF@  libprotobuf-dev,
+@USE_SYSTEM_PROTOBUF@  libprotoc-dev,
   libre2-dev,
   libsnappy-dev,
   libssl-dev,
@@ -203,9 +205,7 @@ Depends:
   libarrow-dev (= ${binary:Version}),
   libarrow-flight1600 (= ${binary:Version}),
   libc-ares-dev,
-@USE_SYSTEM_GRPC@  libgrpc++-dev,
-@USE_SYSTEM_PROTOBUF@  libprotobuf-dev,
-@USE_SYSTEM_PROTOBUF@  libprotoc-dev
+@USE_SYSTEM_GRPC@  libgrpc++-dev
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Flight RPC system.

From 4569d6ef8ef41ffbad6012b6c295c1532725e2c5 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 18 Mar 2024 15:21:17 +0900
Subject: [PATCH 559/570] GH-40616: [Docs][GLib] Ensure overwriting placeholder
 front pages (#40618)

### Rationale for this change

Place holder font pages are still used because Sphinx build is executed before GLib docs build.

### What changes are included in this PR?

Build Sphinx docs before GLib docs are built.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #40616

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 docker-compose.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index cf6146e477207..eb434b90626d4 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1764,8 +1764,8 @@ services:
     command: &docs-command >
       /bin/bash -c "
         /arrow/ci/scripts/cpp_build.sh /arrow /build &&
-        /arrow/ci/scripts/c_glib_build.sh /arrow /build &&
         /arrow/ci/scripts/python_build.sh /arrow /build &&
+        /arrow/ci/scripts/c_glib_build.sh /arrow /build &&
         /arrow/ci/scripts/r_build.sh /arrow /build &&
         /arrow/ci/scripts/js_build.sh /arrow /build &&
         /arrow/ci/scripts/java_build.sh /arrow /build"

From 7d3f7b3f8b22cd7fbd3a69d6dcab7716dee79202 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Mon, 18 Mar 2024 15:22:46 +0900
Subject: [PATCH 560/570] GH-40573: [GLib][Ruby][CSV] Add support for
 customizing timestamp parsers (#40590)

### Rationale for this change

ISO8601 timestamp values in CSV can be parsed by default but non-ISO8601 timestamp values can't.

### What changes are included in this PR?

* Add `garrow_csv_read_options_set_timestamp_parsers()`
* Add `garrow_csv_read_options_get_timestamp_parsers()`
* Add `garrow_csv_read_options_add_timestamp_parser()`
* Add `Arrow::TimestampParser.try_convert` for implicit cast

### Are these changes tested?

Yes.

### Are there any user-facing changes?

Yes.
* GitHub Issue: #40573

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 c_glib/arrow-glib/reader.cpp                 | 83 +++++++++++++++++++-
 c_glib/arrow-glib/reader.h                   | 18 ++++-
 c_glib/test/test-csv-reader.rb               | 15 ++++
 ruby/red-arrow/lib/arrow/loader.rb           |  1 +
 ruby/red-arrow/lib/arrow/timestamp-parser.rb | 33 ++++++++
 ruby/red-arrow/test/test-csv-loader.rb       | 37 +++++++++
 6 files changed, 181 insertions(+), 6 deletions(-)
 create mode 100644 ruby/red-arrow/lib/arrow/timestamp-parser.rb

diff --git a/c_glib/arrow-glib/reader.cpp b/c_glib/arrow-glib/reader.cpp
index 0b388a4ba3814..8a1c3722d4a0f 100644
--- a/c_glib/arrow-glib/reader.cpp
+++ b/c_glib/arrow-glib/reader.cpp
@@ -29,6 +29,7 @@
 #include <arrow-glib/record-batch.hpp>
 #include <arrow-glib/schema.hpp>
 #include <arrow-glib/table.hpp>
+#include <arrow-glib/timestamp-parser.hpp>
 
 #include <arrow/c/bridge.h>
 
@@ -872,12 +873,13 @@ garrow_feather_file_reader_read_names(GArrowFeatherFileReader *reader,
   }
 }
 
-typedef struct GArrowCSVReadOptionsPrivate_
+struct GArrowCSVReadOptionsPrivate
 {
   arrow::csv::ReadOptions read_options;
   arrow::csv::ParseOptions parse_options;
   arrow::csv::ConvertOptions convert_options;
-} GArrowCSVReadOptionsPrivate;
+  GList *timestamp_parsers;
+};
 
 enum {
   PROP_USE_THREADS = 1,
@@ -902,6 +904,17 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowCSVReadOptions, garrow_csv_read_options, G_TYPE
   static_cast<GArrowCSVReadOptionsPrivate *>(                                            \
     garrow_csv_read_options_get_instance_private(GARROW_CSV_READ_OPTIONS(object)))
 
+static void
+garrow_csv_read_options_dispose(GObject *object)
+{
+  auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(object);
+
+  g_list_free_full(priv->timestamp_parsers, g_object_unref);
+  priv->timestamp_parsers = nullptr;
+
+  G_OBJECT_CLASS(garrow_csv_read_options_parent_class)->dispose(object);
+}
+
 static void
 garrow_csv_read_options_set_property(GObject *object,
                                      guint prop_id,
@@ -1032,6 +1045,7 @@ garrow_csv_read_options_class_init(GArrowCSVReadOptionsClass *klass)
 
   auto gobject_class = G_OBJECT_CLASS(klass);
 
+  gobject_class->dispose = garrow_csv_read_options_dispose;
   gobject_class->set_property = garrow_csv_read_options_set_property;
   gobject_class->get_property = garrow_csv_read_options_get_property;
 
@@ -1623,6 +1637,71 @@ garrow_csv_read_options_add_column_name(GArrowCSVReadOptions *options,
   priv->read_options.column_names.push_back(column_name);
 }
 
+/**
+ * garrow_csv_read_options_set_timestamp_parsers:
+ * @options: A #GArrowCSVReadOptions.
+ * @parsers: (element-type GArrowTimestampParser): The list of
+ *   #GArrowTimestampParser to be added.
+ *
+ * Since: 16.0.0
+ */
+void
+garrow_csv_read_options_set_timestamp_parsers(GArrowCSVReadOptions *options,
+                                              GList *parsers)
+{
+  auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options);
+  g_list_free_full(priv->timestamp_parsers, g_object_unref);
+  priv->convert_options.timestamp_parsers.clear();
+  for (auto node = parsers; node; node = g_list_next(node)) {
+    if (!node->data) {
+      continue;
+    }
+    auto parser = GARROW_TIMESTAMP_PARSER(node->data);
+    g_object_ref(parser);
+    priv->timestamp_parsers = g_list_prepend(priv->timestamp_parsers, parser);
+    priv->convert_options.timestamp_parsers.push_back(
+      garrow_timestamp_parser_get_raw(parser));
+  }
+  priv->timestamp_parsers = g_list_reverse(priv->timestamp_parsers);
+}
+
+/**
+ * garrow_csv_read_options_get_timestamp_parsers:
+ * @options: A #GArrowCSVReadOptions.
+ *
+ * Returns: (element-type GArrowTimestampParser) (transfer none):
+ *
+ *   The list of #GArrowTimestampParsers to be used.
+ *
+ * Since: 16.0.0
+ */
+GList *
+garrow_csv_read_options_get_timestamp_parsers(GArrowCSVReadOptions *options)
+{
+  auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options);
+  return priv->timestamp_parsers;
+}
+
+/**
+ * garrow_csv_read_options_add_timestamp_parser:
+ * @options: A #GArrowCSVReadOptions.
+ * @parser: The #GArrowTimestampParser to be added.
+ *
+ * Since: 16.0.0
+ */
+void
+garrow_csv_read_options_add_timestamp_parser(GArrowCSVReadOptions *options,
+                                             GArrowTimestampParser *parser)
+{
+  auto priv = GARROW_CSV_READ_OPTIONS_GET_PRIVATE(options);
+  if (parser) {
+    g_object_ref(parser);
+    priv->timestamp_parsers = g_list_append(priv->timestamp_parsers, parser);
+    priv->convert_options.timestamp_parsers.push_back(
+      garrow_timestamp_parser_get_raw(parser));
+  }
+}
+
 typedef struct GArrowCSVReaderPrivate_
 {
   std::shared_ptr<arrow::csv::TableReader> reader;
diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h
index 08faf86cd09f7..96e4c5bbb5890 100644
--- a/c_glib/arrow-glib/reader.h
+++ b/c_glib/arrow-glib/reader.h
@@ -19,13 +19,12 @@
 
 #pragma once
 
+#include <arrow-glib/input-stream.h>
+#include <arrow-glib/metadata-version.h>
 #include <arrow-glib/record-batch.h>
 #include <arrow-glib/schema.h>
 #include <arrow-glib/table.h>
-
-#include <arrow-glib/input-stream.h>
-
-#include <arrow-glib/metadata-version.h>
+#include <arrow-glib/timestamp-parser.h>
 
 G_BEGIN_DECLS
 
@@ -239,6 +238,17 @@ GARROW_AVAILABLE_IN_0_15
 void
 garrow_csv_read_options_add_column_name(GArrowCSVReadOptions *options,
                                         const gchar *column_name);
+GARROW_AVAILABLE_IN_16_0
+void
+garrow_csv_read_options_set_timestamp_parsers(GArrowCSVReadOptions *options,
+                                              GList *parsers);
+GARROW_AVAILABLE_IN_16_0
+GList *
+garrow_csv_read_options_get_timestamp_parsers(GArrowCSVReadOptions *options);
+GARROW_AVAILABLE_IN_16_0
+void
+garrow_csv_read_options_add_timestamp_parser(GArrowCSVReadOptions *options,
+                                             GArrowTimestampParser *parser);
 
 #define GARROW_TYPE_CSV_READER (garrow_csv_reader_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowCSVReader, garrow_csv_reader, GARROW, CSV_READER, GObject)
diff --git a/c_glib/test/test-csv-reader.rb b/c_glib/test/test-csv-reader.rb
index 018f062ac3b99..cc102553b121e 100644
--- a/c_glib/test/test-csv-reader.rb
+++ b/c_glib/test/test-csv-reader.rb
@@ -236,6 +236,21 @@ def test_generate_column_names
         assert_equal(build_table(columns),
                      table.read)
       end
+
+      def test_timestamp_parsers
+        options = Arrow::CSVReadOptions.new
+        assert_equal([], options.timestamp_parsers)
+
+        iso8601_timestamp_parser = Arrow::ISO8601TimestampParser.new
+        options.timestamp_parsers = [iso8601_timestamp_parser]
+        assert_equal([iso8601_timestamp_parser],
+                     options.timestamp_parsers)
+
+        date_timestamp_parser = Arrow::StrptimeTimestampParser.new("%Y-%m-%d")
+        options.add_timestamp_parser(date_timestamp_parser)
+        assert_equal([iso8601_timestamp_parser, date_timestamp_parser],
+                     options.timestamp_parsers)
+      end
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb
index 9d1432bbfbabb..bd0d03930885c 100644
--- a/ruby/red-arrow/lib/arrow/loader.rb
+++ b/ruby/red-arrow/lib/arrow/loader.rb
@@ -138,6 +138,7 @@ def require_libraries
       require "arrow/timestamp-array"
       require "arrow/timestamp-array-builder"
       require "arrow/timestamp-data-type"
+      require "arrow/timestamp-parser"
       require "arrow/union-array-builder"
       require "arrow/writable"
     end
diff --git a/ruby/red-arrow/lib/arrow/timestamp-parser.rb b/ruby/red-arrow/lib/arrow/timestamp-parser.rb
new file mode 100644
index 0000000000000..d50ac5846efb1
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/timestamp-parser.rb
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class TimestampParser
+    class << self
+      def try_convert(value)
+        case value
+        when :iso8601
+          ISO8601TimestampParser.new
+        when String
+          StrptimeTimestampParser.new(value)
+        else
+          nil
+        end
+      end
+    end
+  end
+end
diff --git a/ruby/red-arrow/test/test-csv-loader.rb b/ruby/red-arrow/test/test-csv-loader.rb
index 7f7f23498d0fd..72bae2fcaba70 100644
--- a/ruby/red-arrow/test/test-csv-loader.rb
+++ b/ruby/red-arrow/test/test-csv-loader.rb
@@ -246,5 +246,42 @@ def load_csv(data, **options)
                             encoding: encoding,
                             compression: :gzip))
     end
+
+    sub_test_case(":timestamp_parsers") do
+      test(":iso8601") do
+        data_type = Arrow::TimestampDataType.new(:second,
+                                                 GLib::TimeZone.new("UTC"))
+        timestamps = [
+          Time.iso8601("2024-03-16T23:54:12Z"),
+          Time.iso8601("2024-03-16T23:54:13Z"),
+          Time.iso8601("2024-03-16T23:54:14Z"),
+        ]
+        values = Arrow::TimestampArray.new(data_type, timestamps)
+        assert_equal(Arrow::Table.new(value: values),
+                     load_csv(<<-CSV, headers: true, timestamp_parsers: [:iso8601]))
+value
+#{timestamps[0].iso8601}
+#{timestamps[1].iso8601}
+#{timestamps[2].iso8601}
+                     CSV
+      end
+
+      test("String") do
+        timestamps = [
+          Time.iso8601("2024-03-16T23:54:12Z"),
+          Time.iso8601("2024-03-16T23:54:13Z"),
+          Time.iso8601("2024-03-16T23:54:14Z"),
+        ]
+        values = Arrow::TimestampArray.new(:second, timestamps)
+        format = "%Y-%m-%dT%H:%M:%S"
+        assert_equal(Arrow::Table.new(value: values).schema,
+                     load_csv(<<-CSV, headers: true, timestamp_parsers: [format]).schema)
+value
+#{timestamps[0].iso8601.chomp("Z")}
+#{timestamps[1].iso8601.chomp("Z")}
+#{timestamps[2].iso8601.chomp("Z")}
+                     CSV
+      end
+    end
   end
 end

From 6a3a6ba4d1de3ebac51c74ab7845b4fdf57f0c7f Mon Sep 17 00:00:00 2001
From: Bryce Mecum <petridish@gmail.com>
Date: Mon, 18 Mar 2024 03:09:27 -0800
Subject: [PATCH 561/570] GH-40333: [Docs] Improve env var docs for
 ARROW_USER_SIMD_LEVEL (#40374)

### Rationale for this change

Conversation in https://github.com/apache/arrow/issues/40333.

### What changes are included in this PR?

Just tweaks to the text in docs/source/cpp/env_vars.rst.

### Are these changes tested?

I rendered them locally.

### Are there any user-facing changes?

Just docs here.
* GitHub Issue: #40333

Lead-authored-by: Bryce Mecum <petridish@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Co-authored-by: Rossi Sun <zanmato1984@gmail.com>
Co-authored-by: Will Jones <willjones127@gmail.com>
Signed-off-by: mwish <maplewish117@gmail.com>
---
 docs/source/cpp/env_vars.rst | 39 +++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/docs/source/cpp/env_vars.rst b/docs/source/cpp/env_vars.rst
index eb7c797df5e27..116c151824c75 100644
--- a/docs/source/cpp/env_vars.rst
+++ b/docs/source/cpp/env_vars.rst
@@ -132,11 +132,15 @@ that changing their value later will have an effect.
 
 .. envvar:: ARROW_USER_SIMD_LEVEL
 
-   The SIMD optimization level to select.  By default, Arrow C++ detects
-   the capabilities of the current CPU at runtime and chooses the best
-   execution paths based on that information.  One can override the detection
-   by setting this environment variable to a well-defined value.
-   Supported values are:
+   The maximum SIMD optimization level selectable at runtime.  Useful for
+   comparing the performance impact of enabling or disabling respective code
+   paths or working around situations where instructions are supported but are
+   not performant or cause other issues.
+
+   By default, Arrow C++ detects the capabilities of the current CPU at runtime
+   and chooses the best execution paths based on that information.  This
+   behavior can be overriden by setting this environment variable to a
+   well-defined value.  Supported values are:
 
    - ``NONE`` disables any runtime-selected SIMD optimization;
    - ``SSE4_2`` enables any SSE2-based optimizations until SSE4.2 (included);
@@ -148,19 +152,18 @@ that changing their value later will have an effect.
    platforms currently do not implement any form of runtime dispatch.
 
    .. note::
-      In addition to runtime dispatch, the compile-time SIMD level can
-      be set using the ``ARROW_SIMD_LEVEL`` CMake configuration variable.
-      Unlike runtime dispatch, compile-time SIMD optimizations cannot be
-      changed at runtime (for example, if you compile Arrow C++ with AVX512
-      enabled, the resulting binary will only run on AVX512-enabled CPUs).
-      Setting ``ARROW_USER_SIMD_LEVEL=NONE`` prevents the execution of
-      explicit SIMD optimization code, but it does not rule out the execution
-      of compiler generated SIMD instructions.  E.g., on x86_64 platform,
-      Arrow is built with ``ARROW_SIMD_LEVEL=SSE4_2`` by default.  Compiler
-      may generate SSE4.2 instructions from any C/C++ source code.  On legacy
-      x86_64 platforms do not support SSE4.2, Arrow binary may fail with
-      SIGILL (Illegal Instruction).  User must rebuild Arrow and PyArrow from
-      scratch by setting cmake option ``ARROW_SIMD_LEVEL=NONE``.
+      In addition to runtime-selected SIMD optimizations dispatch, Arrow C++ can
+      also be compiled with SIMD optimizations that cannot be disabled at
+      runtime.  For example, by default, SSE4.2 optimizations are enabled on x86
+      builds: therefore, with this default setting, Arrow C++ does not work at
+      all on a CPU without support for SSE4.2.  This setting can be changed
+      using the ``ARROW_SIMD_LEVEL`` CMake variable so as to either raise or
+      lower the optimization level.
+
+      Finally, the ``ARROW_RUNTIME_SIMD_LEVEL`` CMake variable sets a
+      compile-time upper bound to runtime-selected SIMD optimizations.  This is
+      useful in cases where a compiler reports support for an instruction set
+      but does not actually support it in full.
 
 .. envvar:: AWS_ENDPOINT_URL
 

From 1339625b14e6eaff1c09dcd9897b4048f206e51d Mon Sep 17 00:00:00 2001
From: mwish <maplewish117@gmail.com>
Date: Mon, 18 Mar 2024 19:16:48 +0800
Subject: [PATCH 562/570] GH-38560: [C++][Parquet] Rewrite BYTE_STREAM_SPLIT
 SSE optimizations using xsimd (#40335)

### Rationale for this change

This is part of https://github.com/apache/arrow/issues/38560#issuecomment-1966666606 . It tried to Rewrite SSE4_2 using xsimd.

### What changes are included in this PR?

Rewrite SSE4_2 using xsimd.

### Are these changes tested?

Yes

### Are there any user-facing changes?

no

* GitHub Issue: #38560

Lead-authored-by: mwish <maplewish117@gmail.com>
Co-authored-by: mwish <anmmscs_maple@qq.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
---
 .../arrow/util/byte_stream_split_internal.h   | 145 +++++++++++-------
 cpp/src/arrow/util/byte_stream_split_test.cc  |   6 +-
 cpp/src/parquet/CMakeLists.txt                |   6 +
 cpp/src/parquet/encoding_benchmark.cc         |  35 ++++-
 4 files changed, 131 insertions(+), 61 deletions(-)

diff --git a/cpp/src/arrow/util/byte_stream_split_internal.h b/cpp/src/arrow/util/byte_stream_split_internal.h
index cd43d8ec00b5d..77284d695b833 100644
--- a/cpp/src/arrow/util/byte_stream_split_internal.h
+++ b/cpp/src/arrow/util/byte_stream_split_internal.h
@@ -25,9 +25,10 @@
 #include <cassert>
 #include <cstdint>
 
-#ifdef ARROW_HAVE_SSE4_2
+#if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
+#include <xsimd/xsimd.hpp>
 #define ARROW_HAVE_SIMD_SPLIT
-#endif  // ARROW_HAVE_SSE4_2
+#endif
 
 namespace arrow::util::internal {
 
@@ -35,13 +36,15 @@ namespace arrow::util::internal {
 // SIMD implementations
 //
 
-#if defined(ARROW_HAVE_SSE4_2)
+#if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
 template <int kNumStreams>
-void ByteStreamSplitDecodeSse2(const uint8_t* data, int64_t num_values, int64_t stride,
-                               uint8_t* out) {
+void ByteStreamSplitDecodeSimd128(const uint8_t* data, int64_t num_values, int64_t stride,
+                                  uint8_t* out) {
+  using simd_batch = xsimd::make_sized_batch_t<int8_t, 16>;
+
   static_assert(kNumStreams == 4 || kNumStreams == 8, "Invalid number of streams.");
   constexpr int kNumStreamsLog2 = (kNumStreams == 8 ? 3 : 2);
-  constexpr int64_t kBlockSize = sizeof(__m128i) * kNumStreams;
+  constexpr int64_t kBlockSize = sizeof(simd_batch) * kNumStreams;
 
   const int64_t size = num_values * kNumStreams;
   const int64_t num_blocks = size / kBlockSize;
@@ -64,46 +67,47 @@ void ByteStreamSplitDecodeSse2(const uint8_t* data, int64_t num_values, int64_t
   // Stage 1: AAAA BBBB CCCC DDDD
   // Stage 2: ACAC ACAC BDBD BDBD
   // Stage 3: ABCD ABCD ABCD ABCD
-  __m128i stage[kNumStreamsLog2 + 1][kNumStreams];
+  simd_batch stage[kNumStreamsLog2 + 1][kNumStreams];
   constexpr int kNumStreamsHalf = kNumStreams / 2U;
 
   for (int64_t i = 0; i < num_blocks; ++i) {
     for (int j = 0; j < kNumStreams; ++j) {
-      stage[0][j] = _mm_loadu_si128(
-          reinterpret_cast<const __m128i*>(&data[i * sizeof(__m128i) + j * stride]));
+      stage[0][j] =
+          simd_batch::load_unaligned(&data[i * sizeof(simd_batch) + j * stride]);
     }
     for (int step = 0; step < kNumStreamsLog2; ++step) {
       for (int j = 0; j < kNumStreamsHalf; ++j) {
         stage[step + 1U][j * 2] =
-            _mm_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+            xsimd::zip_lo(stage[step][j], stage[step][kNumStreamsHalf + j]);
         stage[step + 1U][j * 2 + 1U] =
-            _mm_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+            xsimd::zip_hi(stage[step][j], stage[step][kNumStreamsHalf + j]);
       }
     }
     for (int j = 0; j < kNumStreams; ++j) {
-      _mm_storeu_si128(
-          reinterpret_cast<__m128i*>(out + (i * kNumStreams + j) * sizeof(__m128i)),
+      xsimd::store_unaligned(
+          reinterpret_cast<int8_t*>(out + (i * kNumStreams + j) * sizeof(simd_batch)),
           stage[kNumStreamsLog2][j]);
     }
   }
 }
 
 template <int kNumStreams>
-void ByteStreamSplitEncodeSse2(const uint8_t* raw_values, const int64_t num_values,
-                               uint8_t* output_buffer_raw) {
+void ByteStreamSplitEncodeSimd128(const uint8_t* raw_values, const int64_t num_values,
+                                  uint8_t* output_buffer_raw) {
+  using simd_batch = xsimd::make_sized_batch_t<int8_t, 16>;
+
   static_assert(kNumStreams == 4 || kNumStreams == 8, "Invalid number of streams.");
-  constexpr int kBlockSize = sizeof(__m128i) * kNumStreams;
+  constexpr int kBlockSize = sizeof(simd_batch) * kNumStreams;
 
-  __m128i stage[3][kNumStreams];
-  __m128i final_result[kNumStreams];
+  simd_batch stage[3][kNumStreams];
+  simd_batch final_result[kNumStreams];
 
   const int64_t size = num_values * kNumStreams;
   const int64_t num_blocks = size / kBlockSize;
-  const __m128i* raw_values_sse = reinterpret_cast<const __m128i*>(raw_values);
-  __m128i* output_buffer_streams[kNumStreams];
+  int8_t* output_buffer_streams[kNumStreams];
   for (int i = 0; i < kNumStreams; ++i) {
     output_buffer_streams[i] =
-        reinterpret_cast<__m128i*>(&output_buffer_raw[num_values * i]);
+        reinterpret_cast<int8_t*>(&output_buffer_raw[num_values * i]);
   }
 
   // First handle suffix.
@@ -117,20 +121,23 @@ void ByteStreamSplitEncodeSse2(const uint8_t* raw_values, const int64_t num_valu
   // The current shuffling algorithm diverges for float and double types but the compiler
   // should be able to remove the branch since only one path is taken for each template
   // instantiation.
-  // Example run for floats:
-  // Step 0, copy:
+  // Example run for 32-bit variables:
+  // Step 0: copy from unaligned input bytes:
   //   0: ABCD ABCD ABCD ABCD 1: ABCD ABCD ABCD ABCD ...
-  // Step 1: _mm_unpacklo_epi8 and mm_unpackhi_epi8:
+  // Step 1: simd_batch<int8_t, 8>::zip_lo and simd_batch<int8_t, 8>::zip_hi:
   //   0: AABB CCDD AABB CCDD 1: AABB CCDD AABB CCDD ...
+  // Step 2: apply simd_batch<int8_t, 8>::zip_lo and  simd_batch<int8_t, 8>::zip_hi again:
   //   0: AAAA BBBB CCCC DDDD 1: AAAA BBBB CCCC DDDD ...
-  // Step 3: __mm_unpacklo_epi8 and _mm_unpackhi_epi8:
+  // Step 3: simd_batch<int8_t, 8>::zip_lo and simd_batch<int8_t, 8>::zip_hi:
   //   0: AAAA AAAA BBBB BBBB 1: CCCC CCCC DDDD DDDD ...
-  // Step 4: __mm_unpacklo_epi64 and _mm_unpackhi_epi64:
+  // Step 4: simd_batch<int64_t, 2>::zip_lo and simd_batch<int64_t, 2>::zip_hi:
   //   0: AAAA AAAA AAAA AAAA 1: BBBB BBBB BBBB BBBB ...
   for (int64_t block_index = 0; block_index < num_blocks; ++block_index) {
     // First copy the data to stage 0.
     for (int i = 0; i < kNumStreams; ++i) {
-      stage[0][i] = _mm_loadu_si128(&raw_values_sse[block_index * kNumStreams + i]);
+      stage[0][i] = simd_batch::load_unaligned(
+          reinterpret_cast<const int8_t*>(raw_values) +
+          (block_index * kNumStreams + i) * sizeof(simd_batch));
     }
 
     // The shuffling of bytes is performed through the unpack intrinsics.
@@ -139,40 +146,72 @@ void ByteStreamSplitEncodeSse2(const uint8_t* raw_values, const int64_t num_valu
     for (int stage_lvl = 0; stage_lvl < 2; ++stage_lvl) {
       for (int i = 0; i < kNumStreams / 2; ++i) {
         stage[stage_lvl + 1][i * 2] =
-            _mm_unpacklo_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
+            xsimd::zip_lo(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
         stage[stage_lvl + 1][i * 2 + 1] =
-            _mm_unpackhi_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
+            xsimd::zip_hi(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
       }
     }
     if constexpr (kNumStreams == 8) {
-      // This is the path for double.
-      __m128i tmp[8];
+      // This is the path for 64bits data.
+      simd_batch tmp[8];
+      using int32_batch = xsimd::make_sized_batch_t<int32_t, 4>;
+      // This is a workaround, see: https://github.com/xtensor-stack/xsimd/issues/735
+      auto from_int32_batch = [](int32_batch from) -> simd_batch {
+        simd_batch dest;
+        memcpy(&dest, &from, sizeof(simd_batch));
+        return dest;
+      };
+      auto to_int32_batch = [](simd_batch from) -> int32_batch {
+        int32_batch dest;
+        memcpy(&dest, &from, sizeof(simd_batch));
+        return dest;
+      };
       for (int i = 0; i < 4; ++i) {
-        tmp[i * 2] = _mm_unpacklo_epi32(stage[2][i], stage[2][i + 4]);
-        tmp[i * 2 + 1] = _mm_unpackhi_epi32(stage[2][i], stage[2][i + 4]);
+        tmp[i * 2] = from_int32_batch(
+            xsimd::zip_lo(to_int32_batch(stage[2][i]), to_int32_batch(stage[2][i + 4])));
+        tmp[i * 2 + 1] = from_int32_batch(
+            xsimd::zip_hi(to_int32_batch(stage[2][i]), to_int32_batch(stage[2][i + 4])));
       }
       for (int i = 0; i < 4; ++i) {
-        final_result[i * 2] = _mm_unpacklo_epi32(tmp[i], tmp[i + 4]);
-        final_result[i * 2 + 1] = _mm_unpackhi_epi32(tmp[i], tmp[i + 4]);
+        final_result[i * 2] = from_int32_batch(
+            xsimd::zip_lo(to_int32_batch(tmp[i]), to_int32_batch(tmp[i + 4])));
+        final_result[i * 2 + 1] = from_int32_batch(
+            xsimd::zip_hi(to_int32_batch(tmp[i]), to_int32_batch(tmp[i + 4])));
       }
     } else {
-      // this is the path for float.
-      __m128i tmp[4];
+      // This is the path for 32bits data.
+      using int64_batch = xsimd::make_sized_batch_t<int64_t, 2>;
+      // This is a workaround, see: https://github.com/xtensor-stack/xsimd/issues/735
+      auto from_int64_batch = [](int64_batch from) -> simd_batch {
+        simd_batch dest;
+        memcpy(&dest, &from, sizeof(simd_batch));
+        return dest;
+      };
+      auto to_int64_batch = [](simd_batch from) -> int64_batch {
+        int64_batch dest;
+        memcpy(&dest, &from, sizeof(simd_batch));
+        return dest;
+      };
+      simd_batch tmp[4];
       for (int i = 0; i < 2; ++i) {
-        tmp[i * 2] = _mm_unpacklo_epi8(stage[2][i * 2], stage[2][i * 2 + 1]);
-        tmp[i * 2 + 1] = _mm_unpackhi_epi8(stage[2][i * 2], stage[2][i * 2 + 1]);
+        tmp[i * 2] = xsimd::zip_lo(stage[2][i * 2], stage[2][i * 2 + 1]);
+        tmp[i * 2 + 1] = xsimd::zip_hi(stage[2][i * 2], stage[2][i * 2 + 1]);
       }
       for (int i = 0; i < 2; ++i) {
-        final_result[i * 2] = _mm_unpacklo_epi64(tmp[i], tmp[i + 2]);
-        final_result[i * 2 + 1] = _mm_unpackhi_epi64(tmp[i], tmp[i + 2]);
+        final_result[i * 2] = from_int64_batch(
+            xsimd::zip_lo(to_int64_batch(tmp[i]), to_int64_batch(tmp[i + 2])));
+        final_result[i * 2 + 1] = from_int64_batch(
+            xsimd::zip_hi(to_int64_batch(tmp[i]), to_int64_batch(tmp[i + 2])));
       }
     }
     for (int i = 0; i < kNumStreams; ++i) {
-      _mm_storeu_si128(&output_buffer_streams[i][block_index], final_result[i]);
+      xsimd::store_unaligned(&output_buffer_streams[i][block_index * sizeof(simd_batch)],
+                             final_result[i]);
     }
   }
 }
-#endif  // ARROW_HAVE_SSE4_2
+
+#endif
 
 #if defined(ARROW_HAVE_AVX2)
 template <int kNumStreams>
@@ -184,7 +223,7 @@ void ByteStreamSplitDecodeAvx2(const uint8_t* data, int64_t num_values, int64_t
 
   const int64_t size = num_values * kNumStreams;
   if (size < kBlockSize)  // Back to SSE for small size
-    return ByteStreamSplitDecodeSse2<kNumStreams>(data, num_values, stride, out);
+    return ByteStreamSplitDecodeSimd128<kNumStreams>(data, num_values, stride, out);
   const int64_t num_blocks = size / kBlockSize;
 
   // First handle suffix.
@@ -266,13 +305,13 @@ void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const int64_t num_valu
   constexpr int kBlockSize = sizeof(__m256i) * kNumStreams;
 
   if constexpr (kNumStreams == 8)  // Back to SSE, currently no path for double.
-    return ByteStreamSplitEncodeSse2<kNumStreams>(raw_values, num_values,
-                                                  output_buffer_raw);
+    return ByteStreamSplitEncodeSimd128<kNumStreams>(raw_values, num_values,
+                                                     output_buffer_raw);
 
   const int64_t size = num_values * kNumStreams;
   if (size < kBlockSize)  // Back to SSE for small size
-    return ByteStreamSplitEncodeSse2<kNumStreams>(raw_values, num_values,
-                                                  output_buffer_raw);
+    return ByteStreamSplitEncodeSimd128<kNumStreams>(raw_values, num_values,
+                                                     output_buffer_raw);
   const int64_t num_blocks = size / kBlockSize;
   const __m256i* raw_values_simd = reinterpret_cast<const __m256i*>(raw_values);
   __m256i* output_buffer_streams[kNumStreams];
@@ -338,8 +377,8 @@ void inline ByteStreamSplitDecodeSimd(const uint8_t* data, int64_t num_values,
                                       int64_t stride, uint8_t* out) {
 #if defined(ARROW_HAVE_AVX2)
   return ByteStreamSplitDecodeAvx2<kNumStreams>(data, num_values, stride, out);
-#elif defined(ARROW_HAVE_SSE4_2)
-  return ByteStreamSplitDecodeSse2<kNumStreams>(data, num_values, stride, out);
+#elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
+  return ByteStreamSplitDecodeSimd128<kNumStreams>(data, num_values, stride, out);
 #else
 #error "ByteStreamSplitDecodeSimd not implemented"
 #endif
@@ -351,9 +390,9 @@ void inline ByteStreamSplitEncodeSimd(const uint8_t* raw_values, const int64_t n
 #if defined(ARROW_HAVE_AVX2)
   return ByteStreamSplitEncodeAvx2<kNumStreams>(raw_values, num_values,
                                                 output_buffer_raw);
-#elif defined(ARROW_HAVE_SSE4_2)
-  return ByteStreamSplitEncodeSse2<kNumStreams>(raw_values, num_values,
-                                                output_buffer_raw);
+#elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
+  return ByteStreamSplitEncodeSimd128<kNumStreams>(raw_values, num_values,
+                                                   output_buffer_raw);
 #else
 #error "ByteStreamSplitEncodeSimd not implemented"
 #endif
diff --git a/cpp/src/arrow/util/byte_stream_split_test.cc b/cpp/src/arrow/util/byte_stream_split_test.cc
index 421edce4e0aa3..83ed8c9ba5fcd 100644
--- a/cpp/src/arrow/util/byte_stream_split_test.cc
+++ b/cpp/src/arrow/util/byte_stream_split_test.cc
@@ -73,10 +73,8 @@ class TestByteStreamSplitSpecialized : public ::testing::Test {
 #if defined(ARROW_HAVE_SIMD_SPLIT)
     encode_funcs_.push_back({"simd", &ByteStreamSplitEncodeSimd<kWidth>});
     decode_funcs_.push_back({"simd", &ByteStreamSplitDecodeSimd<kWidth>});
-#endif
-#if defined(ARROW_HAVE_SSE4_2)
-    encode_funcs_.push_back({"sse2", &ByteStreamSplitEncodeSse2<kWidth>});
-    decode_funcs_.push_back({"sse2", &ByteStreamSplitDecodeSse2<kWidth>});
+    encode_funcs_.push_back({"simd128", &ByteStreamSplitEncodeSimd128<kWidth>});
+    decode_funcs_.push_back({"simd128", &ByteStreamSplitDecodeSimd128<kWidth>});
 #endif
 #if defined(ARROW_HAVE_AVX2)
     encode_funcs_.push_back({"avx2", &ByteStreamSplitEncodeAvx2<kWidth>});
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 8be5a88c33c55..93f2e72d8d661 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -230,6 +230,12 @@ endif()
 set(PARQUET_SHARED_LINK_LIBS)
 set(PARQUET_SHARED_PRIVATE_LINK_LIBS)
 
+if(ARROW_USE_XSIMD)
+  list(APPEND PARQUET_SHARED_LINK_LIBS ${ARROW_XSIMD})
+  list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS ${ARROW_XSIMD})
+  list(APPEND PARQUET_STATIC_LINK_LIBS ${ARROW_XSIMD})
+endif()
+
 if(PARQUET_REQUIRE_ENCRYPTION)
   list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS ${ARROW_OPENSSL_LIBS})
   set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc
diff --git a/cpp/src/parquet/encoding_benchmark.cc b/cpp/src/parquet/encoding_benchmark.cc
index dd258ab815244..3069e8c9057a9 100644
--- a/cpp/src/parquet/encoding_benchmark.cc
+++ b/cpp/src/parquet/encoding_benchmark.cc
@@ -417,22 +417,22 @@ BENCHMARK(BM_ByteStreamSplitEncode_Double_Scalar)->Range(MIN_RANGE, MAX_RANGE);
 #if defined(ARROW_HAVE_SSE4_2)
 static void BM_ByteStreamSplitDecode_Float_Sse2(benchmark::State& state) {
   BM_ByteStreamSplitDecode<float>(
-      state, ::arrow::util::internal::ByteStreamSplitDecodeSse2<sizeof(float)>);
+      state, ::arrow::util::internal::ByteStreamSplitDecodeSimd128<sizeof(float)>);
 }
 
 static void BM_ByteStreamSplitDecode_Double_Sse2(benchmark::State& state) {
   BM_ByteStreamSplitDecode<double>(
-      state, ::arrow::util::internal::ByteStreamSplitDecodeSse2<sizeof(double)>);
+      state, ::arrow::util::internal::ByteStreamSplitDecodeSimd128<sizeof(double)>);
 }
 
 static void BM_ByteStreamSplitEncode_Float_Sse2(benchmark::State& state) {
   BM_ByteStreamSplitEncode<float>(
-      state, ::arrow::util::internal::ByteStreamSplitEncodeSse2<sizeof(float)>);
+      state, ::arrow::util::internal::ByteStreamSplitEncodeSimd128<sizeof(float)>);
 }
 
 static void BM_ByteStreamSplitEncode_Double_Sse2(benchmark::State& state) {
   BM_ByteStreamSplitEncode<double>(
-      state, ::arrow::util::internal::ByteStreamSplitEncodeSse2<sizeof(double)>);
+      state, ::arrow::util::internal::ByteStreamSplitEncodeSimd128<sizeof(double)>);
 }
 
 BENCHMARK(BM_ByteStreamSplitDecode_Float_Sse2)->Range(MIN_RANGE, MAX_RANGE);
@@ -468,6 +468,33 @@ BENCHMARK(BM_ByteStreamSplitEncode_Float_Avx2)->Range(MIN_RANGE, MAX_RANGE);
 BENCHMARK(BM_ByteStreamSplitEncode_Double_Avx2)->Range(MIN_RANGE, MAX_RANGE);
 #endif
 
+#if defined(ARROW_HAVE_NEON)
+static void BM_ByteStreamSplitDecode_Float_Neon(benchmark::State& state) {
+  BM_ByteStreamSplitDecode<float>(
+      state, ::arrow::util::internal::ByteStreamSplitDecodeSimd128<sizeof(float)>);
+}
+
+static void BM_ByteStreamSplitDecode_Double_Neon(benchmark::State& state) {
+  BM_ByteStreamSplitDecode<double>(
+      state, ::arrow::util::internal::ByteStreamSplitDecodeSimd128<sizeof(double)>);
+}
+
+static void BM_ByteStreamSplitEncode_Float_Neon(benchmark::State& state) {
+  BM_ByteStreamSplitEncode<float>(
+      state, ::arrow::util::internal::ByteStreamSplitEncodeSimd128<sizeof(float)>);
+}
+
+static void BM_ByteStreamSplitEncode_Double_Neon(benchmark::State& state) {
+  BM_ByteStreamSplitEncode<double>(
+      state, ::arrow::util::internal::ByteStreamSplitEncodeSimd128<sizeof(double)>);
+}
+
+BENCHMARK(BM_ByteStreamSplitDecode_Float_Neon)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_ByteStreamSplitDecode_Double_Neon)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_ByteStreamSplitEncode_Float_Neon)->Range(MIN_RANGE, MAX_RANGE);
+BENCHMARK(BM_ByteStreamSplitEncode_Double_Neon)->Range(MIN_RANGE, MAX_RANGE);
+#endif
+
 template <typename DType>
 static auto MakeDeltaBitPackingInputFixed(size_t length) {
   using T = typename DType::c_type;

From 5718a2862b4254d8bf938912d8958837ac7313a5 Mon Sep 17 00:00:00 2001
From: arunppsg <arun.palaniappan1999@gmail.com>
Date: Mon, 18 Mar 2024 18:37:52 +0530
Subject: [PATCH 563/570] GH-40575: [Docs][Python] Added JsonFileFormat to docs
 (#40585)

### Rationale for this change

Docs for JsonFileFormat is missing in the documentation. In this PR, I am adding it to the documentation.

### Are these changes tested?

Tested minimally by building in local machine (installed pyarrow by `pip install pyarrow`, built the docs, and observed `pyarrow.dataset.JsonFileFormat.md` being generated.)

### Are there any user-facing changes?

No.

* GitHub Issue: #40575

Authored-by: arunppsg <arunppsg@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
---
 docs/source/python/api/dataset.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/python/api/dataset.rst b/docs/source/python/api/dataset.rst
index 3575846c35390..4867c7bd7164b 100644
--- a/docs/source/python/api/dataset.rst
+++ b/docs/source/python/api/dataset.rst
@@ -45,6 +45,7 @@ Classes
    CsvFileFormat
    CsvFragmentScanOptions
    IpcFileFormat
+   JsonFileFormat
    ParquetFileFormat
    ParquetReadOptions
    ParquetFragmentScanOptions

From 08401514a79046c198310a68889ddb3c0d00002f Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 19 Mar 2024 01:12:06 +0900
Subject: [PATCH 564/570] GH-40621: [C++] Add missing util/config.h in
 arrow/io/compressed_test.cc (#40625)

### Rationale for this change

This is a follow-up of #40222. We need `#include arrow/util/config.h` for `ARROW_WITH_*`.

### What changes are included in this PR?

Add missing `#include`.

### Are these changes tested?

Yes.

### Are there any user-facing changes?

No.
* GitHub Issue: #40621

Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: mwish <anmmscs_maple@qq.com>
---
 cpp/src/arrow/io/compressed_test.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/src/arrow/io/compressed_test.cc b/cpp/src/arrow/io/compressed_test.cc
index c4ed606676810..bd414149d5345 100644
--- a/cpp/src/arrow/io/compressed_test.cc
+++ b/cpp/src/arrow/io/compressed_test.cc
@@ -33,6 +33,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/compression.h"
+#include "arrow/util/config.h"
 
 namespace arrow {
 namespace io {

From 2e0d701029bb064eff3e51f2f77b14cc10a4839b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 18 Mar 2024 12:33:33 -0400
Subject: [PATCH 565/570] MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.20.0
 to 2.20.1 in /go (#40637)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [github.com/hamba/avro/v2](https://github.com/hamba/avro) from 2.20.0 to 2.20.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/hamba/avro/releases">github.com/hamba/avro/v2's releases</a>.</em></p>
<blockquote>
<h2>v2.20.1</h2>
<h2>What's Changed</h2>
<ul>
<li>fix: union reader schema resolution by <a href="https://github.com/redaLaanait"><code>@​redaLaanait</code></a> in <a href="https://redirect.github.com/hamba/avro/pull/358">hamba/avro#358</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/hamba/avro/compare/v2.20.0...v2.20.1">https://github.com/hamba/avro/compare/v2.20.0...v2.20.1</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/hamba/avro/commit/1cb8acd0ae81d79ea2e80d50456ee60689ad6895"><code>1cb8acd</code></a> fix: union reader schema resolution (<a href="https://redirect.github.com/hamba/avro/issues/358">#358</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/979cff33f173469348a6b822fdbd2f9d866491a5"><code>979cff3</code></a> chore: bump the all group with 1 update (<a href="https://redirect.github.com/hamba/avro/issues/356">#356</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/35405ff53669e046baae7f8627cbcc037c084562"><code>35405ff</code></a> chore: bump the all group with 1 update (<a href="https://redirect.github.com/hamba/avro/issues/351">#351</a>)</li>
<li><a href="https://github.com/hamba/avro/commit/93f2a49014d31108e85d3c69993e7ef45970e427"><code>93f2a49</code></a> chore: bump the all group with 1 update (<a href="https://redirect.github.com/hamba/avro/issues/355">#355</a>)</li>
<li>See full diff in <a href="https://github.com/hamba/avro/compare/v2.20.0...v2.20.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/hamba/avro/v2&package-manager=go_modules&previous-version=2.20.0&new-version=2.20.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go/go.mod b/go/go.mod
index c021dc51640b8..d53b7e6d57b8b 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -47,7 +47,7 @@ require (
 
 require (
 	github.com/google/uuid v1.6.0
-	github.com/hamba/avro/v2 v2.20.0
+	github.com/hamba/avro/v2 v2.20.1
 	github.com/substrait-io/substrait-go v0.4.2
 	github.com/tidwall/sjson v1.2.5
 )
diff --git a/go/go.sum b/go/go.sum
index c06635f5d6536..7722bf66cf880 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -38,8 +38,8 @@ github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/
 github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/hamba/avro/v2 v2.20.0 h1:zTOh3qAwt1ahUU6Rq99EP1Ek24abSzMW8aTbyhdIpHM=
-github.com/hamba/avro/v2 v2.20.0/go.mod h1:mp3l5/S+XRRTIz/dscaZprFxWLMBWbcjxw0PqL+6wng=
+github.com/hamba/avro/v2 v2.20.1 h1:3WByQiVn7wT7d27WQq6pvBRC00FVOrniP6u67FLA/2E=
+github.com/hamba/avro/v2 v2.20.1/go.mod h1:xHiKXbISpb3Ovc809XdzWow+XGTn+Oyf/F9aZbTLAig=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=

From 98a0fc85709970e3270c2dd29ef077d60fa10f74 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 18 Mar 2024 12:33:51 -0400
Subject: [PATCH 566/570] MINOR: [Go] Bump google.golang.org/grpc from 1.58.3
 to 1.62.1 in /go (#40638)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps [google.golang.org/grpc](https://github.com/grpc/grpc-go) from 1.58.3 to 1.62.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/grpc/grpc-go/releases">google.golang.org/grpc's releases</a>.</em></p>
<blockquote>
<h2>Release 1.62.1</h2>
<h1>Bug Fixes</h1>
<ul>
<li>xds: fix a bug that results in <code>no matching virtual host found</code> RPC errors due to a difference between the target and LDS resource names (<a href="https://redirect.github.com/grpc/grpc-go/issues/6997">#6997</a>)</li>
<li>server: fixed stats handler data <code>InPayload.Length</code> for unary RPC calls (<a href="https://redirect.github.com/grpc/grpc-go/issues/6766">#6766</a>)
<ul>
<li>Special Thanks: <a href="https://github.com/hueypark"><code>@​hueypark</code></a></li>
</ul>
</li>
<li>grpc: the experimental <code>RecvBufferPool</code> <code>DialOption</code> and <code>ServerOption</code> are now active during unary RPCs with compression (<a href="https://redirect.github.com/grpc/grpc-go/issues/6766">#6766</a>)
<ul>
<li>Special Thanks: <a href="https://github.com/hueypark"><code>@​hueypark</code></a></li>
</ul>
</li>
<li>grpc: trim whitespaces in <code>accept-encoding</code> header before determining compressors
<ul>
<li>Special Thanks: <a href="https://github.com/sercand"><code>@​sercand</code></a></li>
</ul>
</li>
</ul>
<h2>Release 1.62.0</h2>
<h1>New Features</h1>
<ul>
<li>grpc: Add StaticMethod CallOption as a signal to stats handler that a method is safe to use as an instrument key (<a href="https://redirect.github.com/grpc/grpc-go/issues/6986">#6986</a>)</li>
</ul>
<h1>Behavior Changes</h1>
<ul>
<li>grpc: Return canonical target string from ClientConn.Target() and resolver.Address.String() (<a href="https://redirect.github.com/grpc/grpc-go/issues/6923">#6923</a>)</li>
</ul>
<h1>Bug Fixes</h1>
<ul>
<li>server: wait to close connection until incoming socket is drained (with timeout) to prevent data loss on client-side (<a href="https://redirect.github.com/grpc/grpc-go/issues/6977">#6977</a>)
<ul>
<li>Special Thanks: <a href="https://github.com/s-matyukevich"><code>@​s-matyukevich</code></a> for discovering the root cause</li>
</ul>
</li>
</ul>
<h1>Performance Improvements</h1>
<ul>
<li>*: Allow building without <code>x/net/trace</code> by using <code>grpcnotrace</code> to enable dead code elimination (<a href="https://redirect.github.com/grpc/grpc-go/issues/6954">#6954</a>)
<ul>
<li>Special Thanks: <a href="https://github.com/hugelgupf"><code>@​hugelgupf</code></a></li>
</ul>
</li>
<li>rand: improve performance and simplify implementation of <code>grpcrand</code> by adopting <code>math/rand</code>'s top-level functions for go version 1.21.0 and newer. (<a href="https://redirect.github.com/grpc/grpc-go/issues/6925">#6925</a>)
<ul>
<li>Special Thanks: <a href="https://github.com/kmirzavaziri"><code>@​kmirzavaziri</code></a></li>
</ul>
</li>
</ul>
<h1>Dependencies</h1>
<ul>
<li>*: Use google.golang.org/protobuf/proto instead of github.com/golang/protobuf. (<a href="https://redirect.github.com/grpc/grpc-go/issues/6919">#6919</a>)
<ul>
<li>Special Thanks: <a href="https://github.com/Clement-Jean"><code>@​Clement-Jean</code></a></li>
</ul>
</li>
</ul>
<blockquote>
<p>[!NOTE]
The above change in proto library usage introduces a minor behavior change within those libraries.  The old <code>github.com/golang/protobuf</code> library would error if given a <code>nil</code> message to <code>Marshal</code>, while the new <code>google.golang.org/protobuf</code> library will successfully output zero bytes in this case.  This means server method handlers that did <code>return nil, nil</code> will now return an empty message and no error, while it used to return an error.  This also affects the client side, where clients sending <code>nil</code> messages used to fail without sending the RPC, and now they will send an empty message.</p>
</blockquote>
<h2>Release 1.61.1</h2>
<h1>Bug Fixes</h1>
<ul>
<li>server: wait to close connection until incoming socket is drained (with timeout) to prevent data loss on client-side (<a href="https://redirect.github.com/grpc/grpc-go/issues/6977">#6977</a>)
<ul>
<li>Special Thanks: <a href="https://github.com/s-matyukevich"><code>@​s-matyukevich</code></a> for discovering the root cause</li>
</ul>
</li>
</ul>

</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="https://github.com/grpc/grpc-go/commit/9952aa83979822b5915c3fcb2bb0f60afe55aa7d"><code>9952aa8</code></a> Change version to 1.62.1 (<a href="https://redirect.github.com/grpc/grpc-go/issues/7020">#7020</a>)</li>
<li><a href="https://github.com/grpc/grpc-go/commit/d7334c477d1ea670fa8d5fa12f06a2bfe4f41d4f"><code>d7334c4</code></a> fix enabling compression by trimming whitespaces in accept encoding header (#...</li>
<li><a href="https://github.com/grpc/grpc-go/commit/d076e14b4849f4262f6f50042a9370ec5ce0116d"><code>d076e14</code></a> rpc_util: Fix RecvBufferPool deactivation issues (<a href="https://redirect.github.com/grpc/grpc-go/issues/6766">#6766</a>)</li>
<li><a href="https://github.com/grpc/grpc-go/commit/9d981b0eb01a1ccd61f16593461277e95e83a34b"><code>9d981b0</code></a> cherry-pick <a href="https://redirect.github.com/grpc/grpc-go/issues/6997">#6997</a> to 1.62.x release branch (<a href="https://redirect.github.com/grpc/grpc-go/issues/6979">#6979</a>) (<a href="https://redirect.github.com/grpc/grpc-go/issues/7018">#7018</a>)</li>
<li><a href="https://github.com/grpc/grpc-go/commit/7c4b5533d07d5d7d01aa71145c32af27ac6e1a4d"><code>7c4b553</code></a> Switch version to 1.62.1-dev (<a href="https://redirect.github.com/grpc/grpc-go/issues/6995">#6995</a>)</li>
<li><a href="https://github.com/grpc/grpc-go/commit/d715b2ef0602fe2133221519cba0642ac282cc3f"><code>d715b2e</code></a> Change version to 1.62.0 (<a href="https://redirect.github.com/grpc/grpc-go/issues/6994">#6994</a>)</li>
<li><a href="https://github.com/grpc/grpc-go/commit/d706a42d29ab785cc1ad86a09fe828ef0a196f26"><code>d706a42</code></a> cherry-pick <a href="https://redirect.github.com/grpc/grpc-go/issues/6926">#6926</a> to 1.62 release branch (<a href="https://redirect.github.com/grpc/grpc-go/issues/6986">#6986</a>)</li>
<li><a href="https://github.com/grpc/grpc-go/commit/8a4ca0cc41ceb5bd0763c865cc4094650df9f95f"><code>8a4ca0c</code></a> cherry-pick <a href="https://redirect.github.com/grpc/grpc-go/issues/6977">#6977</a> to 1.62.x release branch (<a href="https://redirect.github.com/grpc/grpc-go/issues/6979">#6979</a>)</li>
<li><a href="https://github.com/grpc/grpc-go/commit/d41b01db97ca2e3627b2c9949fffe8f152a4255d"><code>d41b01d</code></a> encoding: fix typo (<a href="https://redirect.github.com/grpc/grpc-go/issues/6966">#6966</a>)</li>
<li><a href="https://github.com/grpc/grpc-go/commit/c2b50ee081682eca4b995d2fb79e642019f78aea"><code>c2b50ee</code></a> deps: fix backwards compatibility with encoding (<a href="https://redirect.github.com/grpc/grpc-go/issues/6965">#6965</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/grpc/grpc-go/compare/v1.58.3...v1.62.1">compare view</a></li>
</ul>
</details>
<br />

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=google.golang.org/grpc&package-manager=go_modules&previous-version=1.58.3&new-version=1.62.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@ dependabot rebase` will rebase this PR
- `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it
- `@ dependabot merge` will merge this PR after your CI passes on it
- `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it
- `@ dependabot cancel merge` will cancel a previously requested merge and block automerging
- `@ dependabot reopen` will reopen this PR if it is closed
- `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- `@ dependabot show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)

</details>

Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Signed-off-by: Matt Topol <zotthewizard@gmail.com>
---
 go/go.mod |  4 ++--
 go/go.sum | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/go/go.mod b/go/go.mod
index d53b7e6d57b8b..cd6ab608d0822 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -40,7 +40,7 @@ require (
 	golang.org/x/tools v0.19.0
 	golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028
 	gonum.org/v1/gonum v0.12.0
-	google.golang.org/grpc v1.58.3
+	google.golang.org/grpc v1.62.1
 	google.golang.org/protobuf v1.33.0
 	modernc.org/sqlite v1.29.5
 )
@@ -78,7 +78,7 @@ require (
 	golang.org/x/mod v0.16.0 // indirect
 	golang.org/x/net v0.22.0 // indirect
 	golang.org/x/text v0.14.0 // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20231002182017-d307bd883b97 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect
 	modernc.org/libc v1.41.0 // indirect
diff --git a/go/go.sum b/go/go.sum
index 7722bf66cf880..3fbb675aa059e 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -33,7 +33,7 @@ github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEW
 github.com/google/flatbuffers v24.3.7+incompatible h1:BxGUkIQnOciBu33bd5BdvqY8Qvo0O/GR4SPhh7x9Ed0=
 github.com/google/flatbuffers v24.3.7+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
 github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
@@ -124,10 +124,10 @@ golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSm
 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
 gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o=
 gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20231002182017-d307bd883b97 h1:6GQBEOdGkX6MMTLT9V+TjtIRZCw9VPD5Z+yHY9wMgS0=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20231002182017-d307bd883b97/go.mod h1:v7nGkzlmW8P3n/bKmWBn2WpBjpOEx8Q6gMueudAmKfY=
-google.golang.org/grpc v1.58.3 h1:BjnpXut1btbtgN/6sp+brB2Kbm2LjNXnidYujAVbSoQ=
-google.golang.org/grpc v1.58.3/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 h1:AjyfHzEPEFp/NpvfN5g+KDla3EMojjhRVZc1i7cj+oM=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80/go.mod h1:PAREbraiVEVGVdTZsVWjSbbTtSyGbAgIIvni8a8CD5s=
+google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk=
+google.golang.org/grpc v1.62.1/go.mod h1:IWTG0VlJLCh1SkC58F7np9ka9mx/WNkjl4PGJaiq+QE=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
 google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=

From e22b5483903137724347f82817f59eea39d44f57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Mon, 18 Mar 2024 18:16:39 +0100
Subject: [PATCH 567/570] MINOR: [Release] Update versions for 16.0.0-SNAPSHOT

---
 ci/scripts/PKGBUILD            | 2 +-
 r/DESCRIPTION                  | 2 +-
 r/NEWS.md                      | 4 +++-
 r/pkgdown/assets/versions.json | 4 ++--
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index 1995f1b67fdd3..dfdf90501f49c 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -18,7 +18,7 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=15.0.1.9000
+pkgver=15.0.2.9000
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
 arch=("any")
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index ab39714864d28..c6726523e68e9 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: arrow
 Title: Integration to 'Apache' 'Arrow'
-Version: 15.0.1.9000
+Version: 15.0.2.9000
 Authors@R: c(
     person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")),
     person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
diff --git a/r/NEWS.md b/r/NEWS.md
index 07c0562d67168..ce422c6cbdfd0 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,7 +17,9 @@
   under the License.
 -->
 
-# arrow 15.0.1.9000
+# arrow 15.0.2.9000
+
+# arrow 15.0.2
 
 # arrow 15.0.1
 
diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json
index 44b4c76e4ca56..e4e9534a81b46 100644
--- a/r/pkgdown/assets/versions.json
+++ b/r/pkgdown/assets/versions.json
@@ -1,10 +1,10 @@
 [
     {
-        "name": "15.0.1.9000 (dev)",
+        "name": "15.0.2.9000 (dev)",
         "version": "dev/"
     },
     {
-        "name": "15.0.1 (release)",
+        "name": "15.0.2 (release)",
         "version": ""
     },
     {

From ea5b620aac61c4f67d116efb0655688cb2175a80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= <raulcumplido@gmail.com>
Date: Mon, 18 Mar 2024 18:16:39 +0100
Subject: [PATCH 568/570] MINOR: [Release] Update .deb/.rpm changelogs for
 15.0.2

---
 .../linux-packages/apache-arrow-apt-source/debian/changelog | 6 ++++++
 .../apache-arrow-release/yum/apache-arrow-release.spec.in   | 3 +++
 dev/tasks/linux-packages/apache-arrow/debian/changelog      | 6 ++++++
 dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in     | 3 +++
 4 files changed, 18 insertions(+)

diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
index 0ee56839e9d45..ea99882edfb2e 100644
--- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow-apt-source (15.0.2-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Raúl Cumplido <raulcumplido@gmail.com>  Wed, 13 Mar 2024 13:37:23 -0000
+
 apache-arrow-apt-source (15.0.1-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
index 2919c5b703a1b..0e28c475ffef1 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
@@ -102,6 +102,9 @@ else
 fi
 
 %changelog
+* Wed Mar 13 2024 Raúl Cumplido <raulcumplido@gmail.com> - 15.0.2-1
+- New upstream release.
+
 * Fri Feb 23 2024 Raúl Cumplido <raulcumplido@gmail.com> - 15.0.1-1
 - New upstream release.
 
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog
index b025afe9e5027..68a8c01e1ec18 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow (15.0.2-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Raúl Cumplido <raulcumplido@gmail.com>  Wed, 13 Mar 2024 13:37:23 -0000
+
 apache-arrow (15.0.1-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index 4d78fbf3e81c7..9465141379564 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -881,6 +881,9 @@ Documentation for Apache Parquet GLib.
 %endif
 
 %changelog
+* Wed Mar 13 2024 Raúl Cumplido <raulcumplido@gmail.com> - 15.0.2-1
+- New upstream release.
+
 * Fri Feb 23 2024 Raúl Cumplido <raulcumplido@gmail.com> - 15.0.1-1
 - New upstream release.
 

From 61f538f0cdd41468153e5a0a3034ed5ea3ad2190 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 19 Mar 2024 06:27:09 +0900
Subject: [PATCH 569/570] MINOR: [Go] Bump gonum.org/v1/gonum from 0.12.0 to
 0.15.0 in /go (#40639)

Bumps [gonum.org/v1/gonum](https://github.com/gonum/gonum) from 0.12.0
to 0.15.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/gonum/gonum/releases">gonum.org/v1/gonum's
releases</a>.</em></p>
<blockquote>
<h2>v0.15.0</h2>
<p>Release v0.15.0 is a minor release in the v0.15 branch.</p>
<p>Bug fixes/improvements since v0.14.0:</p>
<p>269815f0 spatial/curve: new package to constuct 2-, 3- and 4-D
Hilbert curves
b27ae13f lapack/gonum: add Dptcon
55edfc1d lapack/testlapack: add dlanst
3462e90a lapack/gonum: add Dptsv
44d84c93 lapack/gonum: add Dpttrs
c4e3bfbe lapack/gonum: add Dpttrf
fa306f21 lapack/gonum: handle NaN and Inf input to Dgecon
db43f45c graph/path: do not keep duplicate paths in YenKShortestPaths
5e05b179 lapack/gonum: fix accumulation in Dlassq
606793d4 stat/distmv: add EigenSym interface
ff24a548 stat/distmv: add special case in NormalRandCov for mat.EigenSym
83fd3a6d mat: add RawValues and RawQ to EigenSym
999e48d0 mat: make EigenSym satisfy Matrix
71ca02b7 mat: delegate to SolveTo method in *Dense.Solve
f560d5cb stat/distmv: add NormalRandCov
b2722176 mat: make LQ satisfy Matrix
78bc3a48 mat: add VecDense.Permute
6e2f5c58 lapack/gonum: require exact length of tau in QR routines
bd767ae5 mat: don't panic in Dims on zero Cholesky types
45b74210 mat: make QR satisfy Matrix
aef3c5f3 mat: make LU satisfy Matrix
2d1137f1 mat: add LU.RowPivots and deprecate LU.Pivot
ef75f4dd mat: return U and ColumnPivots from PivotedCholesky
5f74663e mat: add Dense.PermuteRows and PermuteCols
ff3e3209 lapack/lapack64: add Geqp3 and clean up docs
7df15c33 lapack/gonum: clean up Dgghrd and its test
f0a57a45 lapack/gonum: add Dgghrd and its test
7bed099d lapack/gonum: clean up Dlanhs and its test
aa92aa08 spatial/kdtree: update value in place in NKeeper.Keep</p>
<h2>v0.14.0</h2>
<p>Release v0.14.0 is a minor release in the v0.14 branch.</p>
<p>API breaking changes:</p>
<p>9e7bb936 graph/path: allow cost-based Yen shortest path
calculation</p>
<p>Bug fixes/improvements since v0.13.0:</p>
<p>82e11dfe mat: add PivotedCholesky
fbaba968 lapack/gonum: propagate NaN from Dgecon
efb7471d lapack/gonum: implement optimal packing in Dlaqr5
d87a8117 lapack/gonum: improve exceptional shift in Dlahqr
dcd6047d lapack/gonum: avoid NaN generation in Dlatrs
09a7841b lapack/gonum: return scale 1 for zero sized matrices in Dlatrs
and Dlatbs</p>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/gonum/gonum/commit/c72bc440ad773810d5787c662f2b67a5e0ded3e4"><code>c72bc44</code></a>
A+C: add Ethan Reesor</li>
<li><a
href="https://github.com/gonum/gonum/commit/269815f07ecaf001554c4a6963cc52fc0f9fef22"><code>269815f</code></a>
spatial/curve: new package to constuct 2-, 3- and 4-D Hilbert
curves</li>
<li><a
href="https://github.com/gonum/gonum/commit/43d122ba357012c9bf66038e4b9d1cac21094aa3"><code>43d122b</code></a>
A+C: add Jonathan Bluett-Duncan</li>
<li><a
href="https://github.com/gonum/gonum/commit/9c01d5c2beaa3612d39529776026cbf0993dcc3d"><code>9c01d5c</code></a>
ci: update to go1.21</li>
<li><a
href="https://github.com/gonum/gonum/commit/d532ec56a8f2d219667a693e08dd209afd9b8d0a"><code>d532ec5</code></a>
ci: update actions to latest versions</li>
<li><a
href="https://github.com/gonum/gonum/commit/3a8bd7099d9712e17a07e56ad80e849f7f1df116"><code>3a8bd70</code></a>
ci: update for go1.22</li>
<li><a
href="https://github.com/gonum/gonum/commit/f74f45f5f3e9cc7c1d0f0af2ffd19ccf8972a87e"><code>f74f45f</code></a>
all: bump x/image to v0.14.0</li>
<li><a
href="https://github.com/gonum/gonum/commit/b27ae13fdd03d83a4be8e309a5e8427e159ee35f"><code>b27ae13</code></a>
lapack/gonum: add Dptcon</li>
<li><a
href="https://github.com/gonum/gonum/commit/55edfc1d26c0824d0ffc4932bd9bd0f8dfb69b44"><code>55edfc1</code></a>
lapack/testlapack: add dlanst</li>
<li><a
href="https://github.com/gonum/gonum/commit/3462e90a5a785b37e8b2a8a5a256ee32cbb98a10"><code>3462e90</code></a>
lapack/gonum: add Dptsv</li>
<li>Additional commits viewable in <a
href="https://github.com/gonum/gonum/compare/v0.12.0...v0.15.0">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=gonum.org/v1/gonum&package-manager=go_modules&previous-version=0.12.0&new-version=0.15.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 go/go.mod | 2 +-
 go/go.sum | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/go/go.mod b/go/go.mod
index cd6ab608d0822..5c297c74d6080 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -39,7 +39,7 @@ require (
 	golang.org/x/sys v0.18.0
 	golang.org/x/tools v0.19.0
 	golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028
-	gonum.org/v1/gonum v0.12.0
+	gonum.org/v1/gonum v0.15.0
 	google.golang.org/grpc v1.62.1
 	google.golang.org/protobuf v1.33.0
 	modernc.org/sqlite v1.29.5
diff --git a/go/go.sum b/go/go.sum
index 3fbb675aa059e..593746bcf9e4e 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -122,8 +122,8 @@ golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
-gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o=
-gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY=
+gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ=
+gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80 h1:AjyfHzEPEFp/NpvfN5g+KDla3EMojjhRVZc1i7cj+oM=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20240123012728-ef4313101c80/go.mod h1:PAREbraiVEVGVdTZsVWjSbbTtSyGbAgIIvni8a8CD5s=
 google.golang.org/grpc v1.62.1 h1:B4n+nfKzOICUXMgyrNd19h/I9oH0L1pizfk1d4zSgTk=

From 0956f3f7c9f8d4f976275cd670744b52ee30cbf3 Mon Sep 17 00:00:00 2001
From: Felipe Oliveira Carvalho <felipekde@gmail.com>
Date: Mon, 18 Mar 2024 20:58:26 -0300
Subject: [PATCH 570/570] GH-40607: [C++] Rename `Function::is_impure()` to
 `is_pure()` (#40608)

### Rationale for this change

Remove the property negation from the function name so that reasoning about the boolean value is easier.

"is_impure = false" vs "is_pure = true"
"is_impure" vs "!is_pure"

### What changes are included in this PR?

 - Renaming of the virtual function and overrides
 - Renaming of constructor parameters and class attributes
 - Swapping of the boolean constants
 - Replace `is_impure()` calls with `!is_pure()`
 - Undoing a documentation change done in #40396

### Are these changes tested?

By existing tests.

* GitHub Issue: #40607

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
---
 cpp/src/arrow/compute/expression.cc           |  4 ++--
 cpp/src/arrow/compute/expression_test.cc      |  2 +-
 cpp/src/arrow/compute/function.h              | 19 ++++++++++---------
 .../arrow/compute/kernels/scalar_random.cc    |  2 +-
 docs/source/cpp/compute.rst                   |  2 +-
 5 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/cpp/src/arrow/compute/expression.cc b/cpp/src/arrow/compute/expression.cc
index cc24429c8a27e..532869b3453a7 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -845,7 +845,7 @@ Result<Expression> FoldConstants(Expression expr) {
       std::move(expr), [](Expression expr) { return expr; },
       [](Expression expr, ...) -> Result<Expression> {
         auto call = CallNotNull(expr);
-        if (call->function->is_impure()) return expr;
+        if (!call->function->is_pure()) return expr;
 
         if (std::all_of(call->arguments.begin(), call->arguments.end(),
                         [](const Expression& argument) { return argument.literal(); })) {
@@ -1085,7 +1085,7 @@ Result<Expression> Canonicalize(Expression expr, compute::ExecContext* exec_cont
       [&AlreadyCanonicalized, exec_context](Expression expr) -> Result<Expression> {
         auto call = expr.call();
         if (!call) return expr;
-        if (call->function->is_impure()) return expr;
+        if (!call->function->is_pure()) return expr;
 
         if (AlreadyCanonicalized(expr)) return expr;
 
diff --git a/cpp/src/arrow/compute/expression_test.cc b/cpp/src/arrow/compute/expression_test.cc
index 5c87736efb886..30bd882b2c039 100644
--- a/cpp/src/arrow/compute/expression_test.cc
+++ b/cpp/src/arrow/compute/expression_test.cc
@@ -1401,7 +1401,7 @@ TEST(Expression, SingleComparisonGuarantees) {
 static Status RegisterMyRandom() {
   const std::string name = "my_random";
   auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), FunctionDoc::Empty(),
-                                               nullptr, /*is_impure=*/true);
+                                               nullptr, /*is_pure=*/false);
 
   auto func_exec = [](KernelContext* /*ctx*/, const ExecSpan& /*batch*/,
                       ExecResult* /*out*/) -> Status { return Status::OK(); };
diff --git a/cpp/src/arrow/compute/function.h b/cpp/src/arrow/compute/function.h
index 23ff4dbcea2f7..2b86f642166e2 100644
--- a/cpp/src/arrow/compute/function.h
+++ b/cpp/src/arrow/compute/function.h
@@ -231,9 +231,11 @@ class ARROW_EXPORT Function {
 
   /// \brief Returns the pure property for this function.
   ///
-  /// For impure functions like 'random', we should skip any simplification
-  /// for this function except it's arguments.
-  virtual bool is_impure() const { return false; }
+  /// Impure functions are those that may return different results for the same
+  /// input arguments. For example, a function that returns a random number is
+  /// not pure. An expression containing only pure functions can be simplified by
+  /// pre-evaluating any sub-expressions that have constant arguments.
+  virtual bool is_pure() const { return true; }
 
  protected:
   Function(std::string name, Function::Kind kind, const Arity& arity, FunctionDoc doc,
@@ -297,10 +299,10 @@ class ARROW_EXPORT ScalarFunction : public detail::FunctionImpl<ScalarKernel> {
   using KernelType = ScalarKernel;
 
   ScalarFunction(std::string name, const Arity& arity, FunctionDoc doc,
-                 const FunctionOptions* default_options = NULLPTR, bool is_impure = false)
+                 const FunctionOptions* default_options = NULLPTR, bool is_pure = true)
       : detail::FunctionImpl<ScalarKernel>(std::move(name), Function::SCALAR, arity,
                                            std::move(doc), default_options),
-        is_impure_(is_impure) {}
+        is_pure_(is_pure) {}
 
   /// \brief Add a kernel with given input/output types, no required state
   /// initialization, preallocation for fixed-width types, and default null
@@ -312,12 +314,11 @@ class ARROW_EXPORT ScalarFunction : public detail::FunctionImpl<ScalarKernel> {
   /// kernel's signature does not match the function's arity.
   Status AddKernel(ScalarKernel kernel);
 
-  /// \brief Impure property for expression simplification only takes
-  /// effect in ScalarFunction.
-  bool is_impure() const override { return is_impure_; }
+  /// \brief Returns the pure property for this function.
+  bool is_pure() const override { return is_pure_; }
 
  private:
-  bool is_impure_;
+  const bool is_pure_;
 };
 
 /// \brief A function that executes general array operations that may yield
diff --git a/cpp/src/arrow/compute/kernels/scalar_random.cc b/cpp/src/arrow/compute/kernels/scalar_random.cc
index a3d3ec364e00a..517cf068673c5 100644
--- a/cpp/src/arrow/compute/kernels/scalar_random.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_random.cc
@@ -88,7 +88,7 @@ const FunctionDoc random_doc{
 void RegisterScalarRandom(FunctionRegistry* registry) {
   static auto random_options = RandomOptions::Defaults();
   auto random_func = std::make_shared<ScalarFunction>(
-      "random", Arity::Nullary(), random_doc, &random_options, /*is_impure=*/true);
+      "random", Arity::Nullary(), random_doc, &random_options, /*is_pure=*/false);
   ScalarKernel kernel{{}, float64(), ExecRandom, RandomState::Init};
   kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
   DCHECK_OK(random_func->AddKernel(kernel));
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index e1c030b179b8d..e7310d2c0c711 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -343,7 +343,7 @@ equivalents above and reflects how they are implemented internally.
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
 | hash_count              | Unary   | Any                                | Int64                  | :struct:`CountOptions`           | \(2)      |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
-| hash_count_all          | Unary   |                                    | Int64                  |                                  |           |
+| hash_count_all          | Nullary |                                    | Int64                  |                                  |           |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+
 | hash_count_distinct     | Unary   | Any                                | Int64                  | :struct:`CountOptions`           | \(2)      |
 +-------------------------+---------+------------------------------------+------------------------+----------------------------------+-----------+